update WF-100 Petersen for special case

2026-01-15 08:36:49 +01:00 · 2026-01-15 08:36:49 +01:00 · 3557ab0687
commit 3557ab0687
parent ab4eb1cbac
1 changed files with 29 additions and 9 deletions
--- a/data_analysis/02-3_oracle_workflow_test.py
+++ b/data_analysis/02-3_oracle_workflow_test.py
@ -39,10 +39,10 @@ USER_NAME = CFG["user"]["name"]
 USER_PASS = CFG["user"]["pass"]
 # %%
 # !! init thick mode
-p_oracle_client = Path(r"C:\Databases\Oracle\instantclient_19_29")
+# p_oracle_client = Path(r"C:\Databases\Oracle\instantclient_19_29")
-assert p_oracle_client.exists()
+# assert p_oracle_client.exists()
-assert p_oracle_client.is_dir()
+# assert p_oracle_client.is_dir()
-oracledb.init_oracle_client(lib_dir=str(p_oracle_client))
+# oracledb.init_oracle_client(lib_dir=str(p_oracle_client))
 # %%
 conn_string = (
    f"oracle+oracledb://{USER_NAME}:{USER_PASS}@{HOST}:{PORT}?service_name={SERVICE}"
@ -264,19 +264,21 @@ df.with_columns(
 )
 # %%
 # ** CHECK: distribution of MELDENUMMER
 temp = df.filter(pl.col.BEDP_MAN.is_in((1, 90)))
 sum_entries = len(temp)
 temp = (
-    df.group_by("MELDENUMMER")
+    temp.group_by("MELDENUMMER")
    .agg(pl.col("MELDENUMMER").len().alias("count"))
    .sort("count", descending=True)
 )
-sum_entries = len(df)
+
 temp = temp.with_columns((pl.col.count / sum_entries).alias("proportion"))
 temp = temp.with_columns(pl.col.proportion.cum_sum().alias("cum"))
 temp
 # df.filter(pl.col("MELDENUMMER").is_not_null() & pl.col("MELDENUMMER").is_in((17, 18))).select(
 #     pl.len()
 # )
-# p_save = Path.cwd() / "meldenummer_anteile_20260114-1.xlsx"
+# p_save = Path.cwd() / "meldenummer_anteile_20260114-2.xlsx"
 # temp.write_excel(p_save)
 # %%
 # ** CHECK: differences MANDANT in BEDP and in TINFO
@ -703,7 +705,6 @@ def workflow_100_petersen(
    filter_mandant = pl.col(MANDANT_CRITERION) == 90
    filter_WDB = pl.col("VERLAGSNR").is_in((76008, 76070))
    filter_number_vm = pl.col(vm_criterion) == 0
    res = _apply_several_filters(
        pipe_result.open,
        (
@ -742,10 +743,29 @@ def workflow_100_petersen(
    pipe_result.merge_pipeline(wdb_sub_pipe)
    # // other branch
    # show always entries with #VM > 1
    filter_meldenummer = pl.col("MELDENUMMER") == 18
    filter_mandant = pl.col(MANDANT_CRITERION) == 90
    filter_number_vm = pl.col(vm_criterion) > 1
    res = _apply_several_filters(
        pipe_result.open,
        (
            filter_meldenummer,
            filter_mandant,
            filter_number_vm,
        ),
    )
    pipe_result.write_results(
        data=res.in_,
        vorlage=True,
        wf_id=100,
        freigabe_auto=types.Freigabe.WF_100,
        is_out=False,
    )
    filter_meldenummer = pl.col("MELDENUMMER") == 18
    filter_mandant = pl.col(MANDANT_CRITERION) == 90
    filter_number_vm = pl.col(vm_criterion) > 0
    res = _apply_several_filters(
        pipe_result.open,
        (