diff --git a/data_analysis/02-3_oracle_workflow_test.py b/data_analysis/02-3_oracle_workflow_test.py index 51b2f73..b40fe7b 100644 --- a/data_analysis/02-3_oracle_workflow_test.py +++ b/data_analysis/02-3_oracle_workflow_test.py @@ -270,7 +270,7 @@ temp.filter(pl.col.BEDP_MENGE_BEDARF_VM < 0) # // NO LIVE DATA NEEDED # SAVING/LOADING # p_save = Path.cwd() / "raw_data_from_sql_query_20260115-altered_query.arrow" -p_save = Path.cwd() / "raw_data_from_sql_query_20260116-1.arrow" +p_save = Path.cwd() / "raw_data_from_sql_query_20260303-1.arrow" # df.write_ipc(p_save) df = pl.read_ipc(p_save) # %% @@ -897,7 +897,7 @@ def _wf100_sub1_umbreit( ) if SAVE_TMP_FILES: - save_tmp_file(relevant_titles, TMPFILE_WF200_SUB1) + save_tmp_file(relevant_titles, TMPFILE_WF100_SUB1_UMBREIT) relevant_titles = relevant_titles.filter(pl.col.CUSTOMER_COUNT >= 3) entries_to_show = pipe_result.open.filter( @@ -1236,7 +1236,7 @@ def _wf200_sub1( # SAVING/LOADING READ_DATABASE = False OVERWRITE = True -FILENAME = "raw_data_from_sql_query_20260202-1.arrow" +FILENAME = "raw_data_from_sql_query_20260303-2.arrow" p_save = Path.cwd() / FILENAME if READ_DATABASE: df = get_raw_data() @@ -1308,30 +1308,42 @@ pipe_res.results.height all_tmps = load_all_tmp_files() print(len(all_tmps)) +# %% +all_tmps + # %% def prepare_tmp_data() -> list[pl.DataFrame]: all_tmps = load_all_tmp_files() - WF_100_TMP_RENAME = {"COUNT": "WF-100_WDB_Anz-Best-Petersen_verg_6_Monate"} + WF_100_TMP_WDB_RENAME = {"COUNT": "WF-100_WDB_Anz-Best-Petersen_verg_6_Monate"} + WF_100_TMP_UMBREIT_RENAME = { + "COUNT": "WF-100_Umbreit_Anz-Best-Kunde_verg_3_Monate", + "CUSTOMER_COUNT": "WF-100_Umbreit_Anz-Kunden_verg_3_Monate", + } WF_200_TMP_RENAME = { "COUNT": "WF-200_Anz-Best-Kunde_verg_3_Monate", "CUSTOMER_COUNT": "WF-200_Anz-Kunden_verg_3_Monate", } - WF_100: list[pl.DataFrame] = [] + WF_100_UMBREIT: list[pl.DataFrame] = [] + WF_100_WDB: list[pl.DataFrame] = [] WF_200: list[pl.DataFrame] = [] for name, df in all_tmps.items(): - if TMPFILE_WF100_SUB1_WDB in name: - rename_schema = WF_100_TMP_RENAME + if TMPFILE_WF100_SUB1_UMBREIT in name: + rename_schema = WF_100_TMP_UMBREIT_RENAME df = df.rename(rename_schema) - WF_100.append(df) + WF_100_UMBREIT.append(df) + elif TMPFILE_WF100_SUB1_WDB in name: + rename_schema = WF_100_TMP_WDB_RENAME + df = df.rename(rename_schema) + WF_100_WDB.append(df) elif TMPFILE_WF200_SUB1 in name: rename_schema = WF_200_TMP_RENAME df = df.rename(rename_schema) WF_200.append(df) - tmp_WF_collects = (WF_100, WF_200) + tmp_WF_collects = (WF_100_UMBREIT, WF_100_WDB, WF_200) all_tmps_preproc: list[pl.DataFrame] = [] for collect in tmp_WF_collects: @@ -1394,6 +1406,29 @@ def generate_test_result_data( # %% test_results = generate_test_result_data(raw_data, pipe_res) test_results.head() +# %% +col = "WF-100_Umbreit_Anz-Best-Kunde_verg_3_Monate" +# col = "WF-100_Umbreit_Anz-Kunden_verg_3_Monate" +# test_results.filter(pl.col(col) >= 3) + +# %% +# RELEVANT_DATE = get_starting_date(60) +# +# title_no = 7753822 +# title_no = 5383912 +# filter_ = sql.and_( +# db.EXT_AUFPAUF.c.TITELNR == title_no, +# db.EXT_AUFPAUF.c.AUFTRAGS_DATUM >= RELEVANT_DATE, +# db.EXT_AUFPAUF.c.AUFP_VORMERKUNG == "J", +# ) +# stmt = sql.select( +# db.EXT_AUFPAUF, +# ).where(filter_) +# relevant_titles = pl.read_database( +# stmt, +# engine, +# ) +# relevant_titles # %%