diff --git a/data_analysis/02-3_oracle_workflow_test.py b/data_analysis/02-3_oracle_workflow_test.py index 5c686eb..11f2093 100644 --- a/data_analysis/02-3_oracle_workflow_test.py +++ b/data_analysis/02-3_oracle_workflow_test.py @@ -39,10 +39,10 @@ USER_NAME = CFG["user"]["name"] USER_PASS = CFG["user"]["pass"] # %% # !! init thick mode -p_oracle_client = Path(r"C:\Databases\Oracle\instantclient_19_29") -assert p_oracle_client.exists() -assert p_oracle_client.is_dir() -oracledb.init_oracle_client(lib_dir=str(p_oracle_client)) +# p_oracle_client = Path(r"C:\Databases\Oracle\instantclient_19_29") +# assert p_oracle_client.exists() +# assert p_oracle_client.is_dir() +# oracledb.init_oracle_client(lib_dir=str(p_oracle_client)) # %% conn_string = ( f"oracle+oracledb://{USER_NAME}:{USER_PASS}@{HOST}:{PORT}?service_name={SERVICE}" @@ -264,19 +264,21 @@ df.with_columns( ) # %% # ** CHECK: distribution of MELDENUMMER +temp = df.filter(pl.col.BEDP_MAN.is_in((1, 90))) +sum_entries = len(temp) temp = ( - df.group_by("MELDENUMMER") + temp.group_by("MELDENUMMER") .agg(pl.col("MELDENUMMER").len().alias("count")) .sort("count", descending=True) ) -sum_entries = len(df) + temp = temp.with_columns((pl.col.count / sum_entries).alias("proportion")) temp = temp.with_columns(pl.col.proportion.cum_sum().alias("cum")) temp # df.filter(pl.col("MELDENUMMER").is_not_null() & pl.col("MELDENUMMER").is_in((17, 18))).select( # pl.len() # ) -# p_save = Path.cwd() / "meldenummer_anteile_20260114-1.xlsx" +# p_save = Path.cwd() / "meldenummer_anteile_20260114-2.xlsx" # temp.write_excel(p_save) # %% # ** CHECK: differences MANDANT in BEDP and in TINFO @@ -703,7 +705,6 @@ def workflow_100_petersen( filter_mandant = pl.col(MANDANT_CRITERION) == 90 filter_WDB = pl.col("VERLAGSNR").is_in((76008, 76070)) filter_number_vm = pl.col(vm_criterion) == 0 - res = _apply_several_filters( pipe_result.open, ( @@ -742,10 +743,29 @@ def workflow_100_petersen( pipe_result.merge_pipeline(wdb_sub_pipe) # // other branch + # show always entries with #VM > 1 + filter_meldenummer = pl.col("MELDENUMMER") == 18 + filter_mandant = pl.col(MANDANT_CRITERION) == 90 + filter_number_vm = pl.col(vm_criterion) > 1 + res = _apply_several_filters( + pipe_result.open, + ( + filter_meldenummer, + filter_mandant, + filter_number_vm, + ), + ) + pipe_result.write_results( + data=res.in_, + vorlage=True, + wf_id=100, + freigabe_auto=types.Freigabe.WF_100, + is_out=False, + ) + filter_meldenummer = pl.col("MELDENUMMER") == 18 filter_mandant = pl.col(MANDANT_CRITERION) == 90 filter_number_vm = pl.col(vm_criterion) > 0 - res = _apply_several_filters( pipe_result.open, (