diff --git a/data_analysis/02-3_oracle_workflow_test.py b/data_analysis/02-3_oracle_workflow_test.py index 2f34710..90c3280 100644 --- a/data_analysis/02-3_oracle_workflow_test.py +++ b/data_analysis/02-3_oracle_workflow_test.py @@ -308,7 +308,13 @@ df.filter(pl.col("MELDENUMMER") == 18).select(pl.col("MENGE_VORMERKER").is_null( # ** not known at this point # there are entries where BEDP_MENGE_BEDARF_VM > MENGE_VORMERKER --> # BEDP_MENGE_BEDARF_VM as reference or ground truth not suitable -df.filter(pl.col("BEDP_MENGE_BEDARF_VM") > pl.col("MENGE_VORMERKER")) +df_diff_VM_bedp_tinfo = df.filter(pl.col("BEDP_MENGE_BEDARF_VM") > pl.col("MENGE_VORMERKER")) +p_save_diff_VM_bedp_tinfo = ( + Path.cwd() / "diff_BEDP-MENGE-BEDARF-VM_TINF-MENGE-VORMERKER_20251211-1.xlsx" +) +from polars.datatypes import classes as pl_dtypes + +df_diff_VM_bedp_tinfo.to_pandas().to_excel(p_save_diff_VM_bedp_tinfo, index=False) # why are there entries where "BEDP_MENGE_BEDARF_VM" > "MENGE_VORMERKER"? # %% # ** CHECK: titles with request where no title information is found diff --git a/data_analysis/queries.sql b/data_analysis/queries.sql index 7d87821..04d17b8 100644 --- a/data_analysis/queries.sql +++ b/data_analysis/queries.sql @@ -19,19 +19,23 @@ set timing on -- AND bedp.BEDP_MAN = t_info.MANDFUEHR; -- PROMPT #################################### --- PROMPT All allowed --- SELECT --- bedp.BEDARFNR, --- bedp.BEDP_SEQUENZ, --- bedp.BEDP_TITELNR, --- bedp.BEDP_MAN, --- bedp.BEDP_MENGE_BEDARF_VM, --- t_info.MELDENUMMER, --- t_info.MENGE_VORMERKER --- FROM EXT_BEDPBED bedp --- LEFT JOIN EXT_TITEL_INFO t_info --- ON bedp.BEDP_TITELNR = t_info.TI_NUMMER --- AND bedp.BEDP_MAN = t_info.MANDFUEHR; +PROMPT >>>>>>>>> All allowed +SELECT COUNT(*) FROM ( + SELECT + bedp.BEDARFNR, + bedp.BEDP_SEQUENZ, + bedp.BEDP_TITELNR, + bedp.BEDP_MAN, + bedp.BEDP_MENGE_BEDARF_VM, + t_info.MELDENUMMER, + t_info.VERLAGSNR + t_info.MENGE_VORMERKER + t_info.MANDFUEHR + FROM EXT_BEDPBED bedp + LEFT JOIN EXT_TITEL_INFO t_info + ON bedp.BEDP_TITELNR = t_info.TI_NUMMER +); + -- -- WHERE bedp.BEDP_MAN IN (1, 90) AND t_info.MELDENUMMER != 26; -- PROMPT ###################################### PROMPT ################################################# @@ -69,48 +73,48 @@ PROMPT ################################################# -- DESC EXT_AUFPAUF; -- ############################## -SELECT AUFTRAGS_ART, COUNT(*) AS anzahl FROM ( - SELECT * FROM ( - SELECT * FROM ( - SELECT /*+ NO_USE_HASH(bedp t_info) */ - view1.BEDP_TITELNR, - t_info.MELDENUMMER, - t_info.VERLAGSNR, - t_info.MANDFUEHR - FROM - (SELECT DISTINCT bedp.BEDP_TITELNR FROM EXT_BEDPBED bedp) view1 - LEFT JOIN EXT_TITEL_INFO t_info - ON view1.BEDP_TITELNR = t_info.TI_NUMMER - ) sub1 WHERE sub1.MANDFUEHR IN (1,90) AND sub1.MELDENUMMER in (17, 18) - ) titles - JOIN EXT_AUFPAUF auf - ON titles.BEDP_TITELNR = auf.TITELNR - WHERE auf.AUFTRAGS_DATUM > TO_DATE('2025-09-05', 'YYYY-MM-DD') -) -GROUP BY AUFTRAGS_ART -ORDER BY anzahl DESC; +-- SELECT AUFTRAGS_ART, COUNT(*) AS anzahl FROM ( +-- SELECT * FROM ( +-- SELECT * FROM ( +-- SELECT /*+ NO_USE_HASH(bedp t_info) */ +-- view1.BEDP_TITELNR, +-- t_info.MELDENUMMER, +-- t_info.VERLAGSNR, +-- t_info.MANDFUEHR +-- FROM +-- (SELECT DISTINCT bedp.BEDP_TITELNR FROM EXT_BEDPBED bedp) view1 +-- LEFT JOIN EXT_TITEL_INFO t_info +-- ON view1.BEDP_TITELNR = t_info.TI_NUMMER +-- ) sub1 WHERE sub1.MANDFUEHR IN (1,90) AND sub1.MELDENUMMER in (17, 18) +-- ) titles +-- JOIN EXT_AUFPAUF auf +-- ON titles.BEDP_TITELNR = auf.TITELNR +-- WHERE auf.AUFTRAGS_DATUM > TO_DATE('2025-09-05', 'YYYY-MM-DD') +-- ) +-- GROUP BY AUFTRAGS_ART +-- ORDER BY anzahl DESC; -PROMPT >> -SELECT EINGANGS_ART, COUNT(*) AS anzahl FROM ( - SELECT * FROM ( - SELECT * FROM ( - SELECT /*+ NO_USE_HASH(bedp t_info) */ - view1.BEDP_TITELNR, - t_info.MELDENUMMER, - t_info.VERLAGSNR, - t_info.MANDFUEHR - FROM - (SELECT DISTINCT bedp.BEDP_TITELNR FROM EXT_BEDPBED bedp) view1 - LEFT JOIN EXT_TITEL_INFO t_info - ON view1.BEDP_TITELNR = t_info.TI_NUMMER - ) sub1 WHERE sub1.MANDFUEHR IN (1,90) AND sub1.MELDENUMMER in (17, 18) - ) titles - JOIN EXT_AUFPAUF auf - ON titles.BEDP_TITELNR = auf.TITELNR - WHERE auf.AUFTRAGS_DATUM > TO_DATE('2025-09-05', 'YYYY-MM-DD') -) -GROUP BY EINGANGS_ART -ORDER BY anzahl DESC; +-- PROMPT >> +-- SELECT EINGANGS_ART, COUNT(*) AS anzahl FROM ( +-- SELECT * FROM ( +-- SELECT * FROM ( +-- SELECT /*+ NO_USE_HASH(bedp t_info) */ +-- view1.BEDP_TITELNR, +-- t_info.MELDENUMMER, +-- t_info.VERLAGSNR, +-- t_info.MANDFUEHR +-- FROM +-- (SELECT DISTINCT bedp.BEDP_TITELNR FROM EXT_BEDPBED bedp) view1 +-- LEFT JOIN EXT_TITEL_INFO t_info +-- ON view1.BEDP_TITELNR = t_info.TI_NUMMER +-- ) sub1 WHERE sub1.MANDFUEHR IN (1,90) AND sub1.MELDENUMMER in (17, 18) +-- ) titles +-- JOIN EXT_AUFPAUF auf +-- ON titles.BEDP_TITELNR = auf.TITELNR +-- WHERE auf.AUFTRAGS_DATUM > TO_DATE('2025-09-05', 'YYYY-MM-DD') +-- ) +-- GROUP BY EINGANGS_ART +-- ORDER BY anzahl DESC; diff --git a/data_analysis/query_raw_data.sql b/data_analysis/query_raw_data.sql new file mode 100644 index 0000000..bca139e --- /dev/null +++ b/data_analysis/query_raw_data.sql @@ -0,0 +1,17 @@ +SELECT COUNT(*) FROM ( + SELECT + bedp.BEDARFNR, + bedp.BEDP_SEQUENZ, + bedp.BEDP_TITELNR, + bedp.BEDP_MAN, + bedp.BEDP_MENGE_BEDARF_VM, + t_info.MELDENUMMER, + t_info.VERLAGSNR, + t_info.MENGE_VORMERKER, + t_info.MANDFUEHR + FROM EXT_BEDPBED bedp + LEFT JOIN EXT_TITEL_INFO t_info + ON bedp.BEDP_TITELNR = t_info.TI_NUMMER +); + +SELECT * FROM EXT_TITEL_INFO t_info WHERE t_info.TI_NUMMER = 2787037; \ No newline at end of file diff --git a/pdm.lock b/pdm.lock index 51e4eaa..eb97ec1 100644 --- a/pdm.lock +++ b/pdm.lock @@ -5,7 +5,7 @@ groups = ["default", "data", "dev", "lint", "nb", "tests"] strategy = ["inherit_metadata"] lock_version = "4.5.0" -content_hash = "sha256:26311ae8e74c466dc51234b41c9a1777f0ff5140863e5f290a9d7d68396c5b46" +content_hash = "sha256:60c870b0938a478c52ed769bfe1f33892f9c93b95a6a700d86a9853b1e8f2e8f" [[metadata.targets]] requires_python = ">=3.11" @@ -3045,3 +3045,14 @@ files = [ {file = "widgetsnbextension-4.0.15-py3-none-any.whl", hash = "sha256:8156704e4346a571d9ce73b84bee86a29906c9abfd7223b7228a28899ccf3366"}, {file = "widgetsnbextension-4.0.15.tar.gz", hash = "sha256:de8610639996f1567952d763a5a41af8af37f2575a41f9852a38f947eb82a3b9"}, ] + +[[package]] +name = "xlsxwriter" +version = "3.2.9" +requires_python = ">=3.8" +summary = "A Python module for creating Excel XLSX files." +groups = ["data"] +files = [ + {file = "xlsxwriter-3.2.9-py3-none-any.whl", hash = "sha256:9a5db42bc5dff014806c58a20b9eae7322a134abb6fce3c92c181bfb275ec5b3"}, + {file = "xlsxwriter-3.2.9.tar.gz", hash = "sha256:254b1c37a368c444eac6e2f867405cc9e461b0ed97a3233b2ac1e574efb4140c"}, +] diff --git a/pyproject.toml b/pyproject.toml index 6b4649a..ab367dd 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -152,4 +152,5 @@ data = [ "pandas>=2.3.3", "polars>=1.35.1", "pyarrow>=22.0.0", + "xlsxwriter>=3.2.9", ]