From 4af438513dda110a86fda99bdb11580765425ff5 Mon Sep 17 00:00:00 2001 From: foefl Date: Thu, 11 Dec 2025 11:30:48 +0100 Subject: [PATCH] adapted queries in preparation of regular meeting --- data_analysis/02-3_oracle_workflow_test.py | 39 ++-- .../not_in_title_table_20251211-1.json | 175 ++++++++++++++++++ data_analysis/queries.sql | 100 +++++++++- 3 files changed, 300 insertions(+), 14 deletions(-) create mode 100644 data_analysis/not_in_title_table_20251211-1.json diff --git a/data_analysis/02-3_oracle_workflow_test.py b/data_analysis/02-3_oracle_workflow_test.py index 265fad3..2f34710 100644 --- a/data_analysis/02-3_oracle_workflow_test.py +++ b/data_analysis/02-3_oracle_workflow_test.py @@ -92,6 +92,9 @@ join_condition = sql.and_( db.ext_bedpbed.c.BEDP_TITELNR == db.EXT_AUFPAUF.c.TITELNR, db.ext_bedpbed.c.BEDP_MAN == db.EXT_AUFPAUF.c.MANDANT, ) +join_condition = sql.and_( + db.ext_bedpbed.c.BEDP_TITELNR == db.EXT_AUFPAUF.c.TITELNR, +) where_condition = sql.and_( db.EXT_AUFPAUF.c.AUFTRAGS_DATUM > start_date, db.EXT_AUFPAUF.c.KUNDE_RECHNUNG.not_in(filter_K_rech), @@ -214,13 +217,23 @@ df.head() # %% # // NO LIVE DATA NEEDED # SAVING/LOADING -p_save = Path.cwd() / "raw_data_from_sql_query_20251203-3.arrow" +p_save = Path.cwd() / "raw_data_from_sql_query_20251211-1.arrow" # df.write_ipc(p_save) df = pl.read_ipc(p_save) # %% print(len(df)) df.head() # %% +# ** CHECK: unique title number? +df.with_columns(titlenumber_count=pl.col("BEDP_TITELNR").count().over("BEDP_TITELNR")).select( + ["BEDP_TITELNR", "titlenumber_count"] +).unique().filter(pl.col("titlenumber_count") > 1) +# %% +# ** CHECK: distribution of MELDENUMMER +df.filter(pl.col("MELDENUMMER").is_not_null() & pl.col("MELDENUMMER").is_in((17, 18))).select( + pl.len() +) +# %% # ** CHECK: differences MANDANT in BEDP and in TINFO # 4591588: in title database with different MANDANT (are MANDANTFUEHR and BEDP_MAN feasible for matching?) df.filter(pl.col("BEDP_MAN") != pl.col("MANDFUEHR")).select(pl.col("BEDP_MAN").unique()) @@ -253,7 +266,8 @@ df.filter(pl.col("BEDP_MAN") == 60).filter(pl.col("MANDFUEHR").is_null()) # ).null_count() # ) # print("Unique value counts: ", df.select(pl.col("BEDP_MAN").value_counts())) - +# %% +df.filter(pl.col("MELDENUMMER").is_null()).filter(pl.col("MANDFUEHR").is_not_null()) # %% # ** PREFILTER # always needed, entries filtered out are to be disposed @@ -286,7 +300,7 @@ agg_t = ( # .filter(pl.col("count_customer") >= 0) # !! should be 3 ) # .filter(pl.col("MELDENUMMER") == 18) agg_t - +# %% df.filter(pl.col("MELDENUMMER") == 18).select(pl.col("MENGE_VORMERKER").is_null().sum()) # %% @@ -302,11 +316,15 @@ df.filter(pl.col("BEDP_MENGE_BEDARF_VM") > pl.col("MENGE_VORMERKER")) not_in_title_table = df.filter(pl.col("MELDENUMMER").is_null()) EXPORT_FEAT = "BEDP_TITELNR" to_save = {EXPORT_FEAT: not_in_title_table.select(EXPORT_FEAT).to_series().to_list()} -p_save_not_in_title_table = Path.cwd() / "not_in_title_table_20251203-2.json" +p_save_not_in_title_table = Path.cwd() / "not_in_title_table_20251211-1.json" print(to_save) # with open(p_save_not_in_title_table, "w") as file: # json.dump(to_save, file, indent=4) # %% +df.group_by("BEDP_MAN").agg(pl.len()) +# %% +df.filter(pl.col("MELDENUMMER").is_null()).group_by("BEDP_MAN").agg(pl.len().alias("count")) +# %% print(len(df.filter(pl.col("MELDENUMMER") == 18))) # df.filter(pl.col("MELDENUMMER") == 18).filter((pl.col("BEDP_MENGE_BEDARF_VM").is_not_null()) & (pl.col("BEDP_MENGE_BEDARF_VM") > 0)) # %% @@ -452,7 +470,7 @@ def workflow_900( def workflow_910( pipe_result: types.PipelineResult, ) -> types.PipelineResult: - filter_mandant = pl.col("BEDP_MAN").is_in((1, 90)) + filter_mandant = pl.col("MANDFUEHR").is_in((1, 90)) filter_ignore_MNR26 = pl.col("MELDENUMMER") != 26 res = _apply_several_filters( @@ -483,7 +501,7 @@ def workflow_100_umbreit( vm_criterion: str, ) -> types.PipelineResult: filter_meldenummer = pl.col("MELDENUMMER") == 18 - filter_mandant = pl.col("BEDP_MAN") == 1 + filter_mandant = pl.col("MANDFUEHR") == 1 filter_number_vm = pl.col(vm_criterion) > 0 res = _apply_several_filters( @@ -515,7 +533,7 @@ def workflow_100_petersen( # // WDB branch filter_meldenummer = pl.col("MELDENUMMER") == 18 - filter_mandant = pl.col("BEDP_MAN") == 90 + filter_mandant = pl.col("MANDFUEHR") == 90 filter_WDB = pl.col("VERLAGSNR").is_in((76008, 76070)) filter_number_vm = pl.col(vm_criterion) > 0 @@ -540,7 +558,7 @@ def workflow_100_petersen( # order quantity 0, no further action in other WFs filter_meldenummer = pl.col("MELDENUMMER") == 18 - filter_mandant = pl.col("BEDP_MAN") == 90 + filter_mandant = pl.col("MANDFUEHR") == 90 filter_WDB = pl.col("VERLAGSNR").is_in((76008, 76070)) filter_number_vm = pl.col(vm_criterion) == 0 @@ -565,7 +583,7 @@ def workflow_100_petersen( # // other branch filter_meldenummer = pl.col("MELDENUMMER") == 18 - filter_mandant = pl.col("BEDP_MAN") == 90 + filter_mandant = pl.col("MANDFUEHR") == 90 filter_number_vm = pl.col(vm_criterion) > 0 res = _apply_several_filters( @@ -591,7 +609,7 @@ def workflow_100_petersen( # %% # SAVING/LOADING -p_save = Path.cwd() / "raw_data_from_sql_query_20251203-3.arrow" +p_save = Path.cwd() / "raw_data_from_sql_query_20251211-1.arrow" df = pl.read_ipc(p_save) print(f"Number of entries: {len(df)}") @@ -648,7 +666,6 @@ pipe_res.results # raw_data.filter(pl.col("BEDARFNR") == 166982).filter(pl.col("BEDP_SEQUENZ") == 1) # %% pipe_res.open.filter(pl.col("BEDP_MENGE_BEDARF_VM") > pl.col("MENGE_VORMERKER")) -# print(f"Base data and pipe result in line: {}") # %% pipe_res = workflow_910(pipe_res) print(f"Length of base data: {len(raw_data):>18}") diff --git a/data_analysis/not_in_title_table_20251211-1.json b/data_analysis/not_in_title_table_20251211-1.json new file mode 100644 index 0000000..e7f62e3 --- /dev/null +++ b/data_analysis/not_in_title_table_20251211-1.json @@ -0,0 +1,175 @@ +{ + "BEDP_TITELNR": [ + 5641810, + 9388245, + 5690882, + 8420618, + 5625063, + 4894841, + 8047302, + 7133112, + 5355081, + 6871073, + 9435273, + 4136531, + 7687300, + 2682366, + 4364686, + 2430598, + 2037163, + 2789480, + 2770577, + 2787037, + 6003708, + 4407203, + 8776286, + 5402902, + 5838480, + 4522891, + 3980696, + 950637, + 4965472, + 4228186, + 4210552, + 5002965, + 5545604, + 5880206, + 2241251, + 6370663, + 7683723, + 7010822, + 5161076, + 4147313, + 5793208, + 7907745, + 4261009, + 2717881, + 6067021, + 4365985, + 8040512, + 8890058, + 1780135, + 7262230, + 4410469, + 9000191, + 6444167, + 4948035, + 252810, + 4976957, + 6135037, + 5989608, + 5729058, + 4395070, + 5625122, + 7888648, + 6110254, + 9787272, + 4336175, + 5497657, + 2793591, + 6893056, + 3030639, + 7010792, + 5491873, + 258070, + 3853173, + 6046715, + 6125576, + 1504007, + 4262953, + 7935360, + 922162, + 1049053, + 9720614, + 5591810, + 2544914, + 2107970, + 2285493, + 8594367, + 7966115, + 7966119, + 7580940, + 6261428, + 3370678, + 6132326, + 8254295, + 8254294, + 6261430, + 4838001, + 8139591, + 8139588, + 8139587, + 8139586, + 8139585, + 8254301, + 3369002, + 4836770, + 4836769, + 4838000, + 6132323, + 6178366, + 6178370, + 6178371, + 4837537, + 6132318, + 6132322, + 5227665, + 5227661, + 2837820, + 4836779, + 3370676, + 9436407, + 4012212, + 4427503, + 4577066, + 9418557, + 2008168, + 7580941, + 6086598, + 6132319, + 8139590, + 8630511, + 7965895, + 3408132, + 5227666, + 5989581, + 5132452, + 4424591, + 7055141, + 2770583, + 2770591, + 9037465, + 5700267, + 4837536, + 4267436, + 4121829, + 717868, + 6633287, + 5335386, + 4836777, + 4154513, + 2770540, + 5730873, + 6160255, + 6939447, + 5545606, + 6178367, + 2010002, + 5494490, + 9206119, + 5227663, + 3369003, + 3030637, + 7414855, + 7945698, + 5514005, + 2537012, + 4263003, + 3408130, + 6924305, + 7966118, + 139058, + 4250548, + 2770562 + ] +} \ No newline at end of file diff --git a/data_analysis/queries.sql b/data_analysis/queries.sql index fefc2d4..7d87821 100644 --- a/data_analysis/queries.sql +++ b/data_analysis/queries.sql @@ -34,6 +34,100 @@ set timing on -- AND bedp.BEDP_MAN = t_info.MANDFUEHR; -- -- WHERE bedp.BEDP_MAN IN (1, 90) AND t_info.MELDENUMMER != 26; -- PROMPT ###################################### +PROMPT ################################################# +-- SELECT COUNT(*) FROM ( +-- SELECT /*+ NO_USE_HASH(bedp t_info) */ +-- view1.BEDP_TITELNR, +-- t_info.MELDENUMMER, +-- t_info.VERLAGSNR, +-- t_info.MANDFUEHR +-- FROM +-- (SELECT DISTINCT bedp.BEDP_TITELNR FROM EXT_BEDPBED bedp) view1 +-- LEFT JOIN EXT_TITEL_INFO t_info +-- ON view1.BEDP_TITELNR = t_info.TI_NUMMER +-- ) sub1 WHERE sub1.MANDFUEHR IN (1,90) AND sub1.MELDENUMMER in (17, 18); + + +-- SELECT * FROM ( +-- SELECT * FROM ( +-- SELECT /*+ NO_USE_HASH(bedp t_info) */ +-- view1.BEDP_TITELNR, +-- t_info.MELDENUMMER, +-- t_info.VERLAGSNR, +-- t_info.MANDFUEHR +-- FROM +-- (SELECT DISTINCT bedp.BEDP_TITELNR FROM EXT_BEDPBED bedp) view1 +-- LEFT JOIN EXT_TITEL_INFO t_info +-- ON view1.BEDP_TITELNR = t_info.TI_NUMMER +-- ) sub1 WHERE sub1.MANDFUEHR IN (1,90) AND sub1.MELDENUMMER in (17, 18) +-- ) titles +-- JOIN EXT_AUFPAUF auf +-- ON titles.BEDP_TITELNR = auf.TITELNR +-- WHERE auf.AUFTRAGS_DATUM > TO_DATE('2025-12-05', 'YYYY-MM-DD') +-- FETCH FIRST 20 ROWS ONLY; + +-- DESC EXT_AUFPAUF; + +-- ############################## +SELECT AUFTRAGS_ART, COUNT(*) AS anzahl FROM ( + SELECT * FROM ( + SELECT * FROM ( + SELECT /*+ NO_USE_HASH(bedp t_info) */ + view1.BEDP_TITELNR, + t_info.MELDENUMMER, + t_info.VERLAGSNR, + t_info.MANDFUEHR + FROM + (SELECT DISTINCT bedp.BEDP_TITELNR FROM EXT_BEDPBED bedp) view1 + LEFT JOIN EXT_TITEL_INFO t_info + ON view1.BEDP_TITELNR = t_info.TI_NUMMER + ) sub1 WHERE sub1.MANDFUEHR IN (1,90) AND sub1.MELDENUMMER in (17, 18) + ) titles + JOIN EXT_AUFPAUF auf + ON titles.BEDP_TITELNR = auf.TITELNR + WHERE auf.AUFTRAGS_DATUM > TO_DATE('2025-09-05', 'YYYY-MM-DD') +) +GROUP BY AUFTRAGS_ART +ORDER BY anzahl DESC; + +PROMPT >> +SELECT EINGANGS_ART, COUNT(*) AS anzahl FROM ( + SELECT * FROM ( + SELECT * FROM ( + SELECT /*+ NO_USE_HASH(bedp t_info) */ + view1.BEDP_TITELNR, + t_info.MELDENUMMER, + t_info.VERLAGSNR, + t_info.MANDFUEHR + FROM + (SELECT DISTINCT bedp.BEDP_TITELNR FROM EXT_BEDPBED bedp) view1 + LEFT JOIN EXT_TITEL_INFO t_info + ON view1.BEDP_TITELNR = t_info.TI_NUMMER + ) sub1 WHERE sub1.MANDFUEHR IN (1,90) AND sub1.MELDENUMMER in (17, 18) + ) titles + JOIN EXT_AUFPAUF auf + ON titles.BEDP_TITELNR = auf.TITELNR + WHERE auf.AUFTRAGS_DATUM > TO_DATE('2025-09-05', 'YYYY-MM-DD') +) +GROUP BY EINGANGS_ART +ORDER BY anzahl DESC; + + + + +-- EXPLAIN PLAN FOR +-- SELECT COUNT(*) FROM ( +-- SELECT /*+gather_plan_statistics*/ /*+ NO_USE_HASH(bedp t_info) */ +-- bedp.BEDP_TITELNR, +-- t_info.MELDENUMMER, +-- t_info.VERLAGSNR, +-- t_info.MANDFUEHR +-- FROM EXT_BEDPBED bedp +-- LEFT JOIN EXT_TITEL_INFO t_info +-- ON bedp.BEDP_TITELNR = t_info.TI_NUMMER +-- ); +-- SELECT * FROM TABLE(DBMS_XPLAN.DISPLAY(format => 'ALL')); + -- SELECT * FROM EXT_TITEL_INFO t_info WHERE t_info.TI_NUMMER = 6132326; -- SELECT * FROM EXT_TITEL_INFO t_info WHERE t_info.TI_NUMMER = 5641810; @@ -52,6 +146,6 @@ set timing on -- FROM EXT_AUFPAUF auf -- WHERE auf.AUFTRAGS_DATUM > TO_DATE('2025-11-18', 'YYYY-MM-DD'); -SELECT * -FROM EXT_AUFPAUF auf -WHERE auf.AUFTRAGS_DATUM > TO_DATE('2025-11-18', 'YYYY-MM-DD'); \ No newline at end of file +-- SELECT * +-- FROM EXT_AUFPAUF auf +-- WHERE auf.AUFTRAGS_DATUM > TO_DATE('2025-11-18', 'YYYY-MM-DD'); \ No newline at end of file