adapted queries in preparation of regular meeting

This commit is contained in:
2025-12-11 11:30:48 +01:00
parent 6a7ccb8e27
commit 4af438513d
3 changed files with 300 additions and 14 deletions

View File

@@ -92,6 +92,9 @@ join_condition = sql.and_(
db.ext_bedpbed.c.BEDP_TITELNR == db.EXT_AUFPAUF.c.TITELNR,
db.ext_bedpbed.c.BEDP_MAN == db.EXT_AUFPAUF.c.MANDANT,
)
join_condition = sql.and_(
db.ext_bedpbed.c.BEDP_TITELNR == db.EXT_AUFPAUF.c.TITELNR,
)
where_condition = sql.and_(
db.EXT_AUFPAUF.c.AUFTRAGS_DATUM > start_date,
db.EXT_AUFPAUF.c.KUNDE_RECHNUNG.not_in(filter_K_rech),
@@ -214,13 +217,23 @@ df.head()
# %%
# // NO LIVE DATA NEEDED
# SAVING/LOADING
p_save = Path.cwd() / "raw_data_from_sql_query_20251203-3.arrow"
p_save = Path.cwd() / "raw_data_from_sql_query_20251211-1.arrow"
# df.write_ipc(p_save)
df = pl.read_ipc(p_save)
# %%
print(len(df))
df.head()
# %%
# ** CHECK: unique title number?
df.with_columns(titlenumber_count=pl.col("BEDP_TITELNR").count().over("BEDP_TITELNR")).select(
["BEDP_TITELNR", "titlenumber_count"]
).unique().filter(pl.col("titlenumber_count") > 1)
# %%
# ** CHECK: distribution of MELDENUMMER
df.filter(pl.col("MELDENUMMER").is_not_null() & pl.col("MELDENUMMER").is_in((17, 18))).select(
pl.len()
)
# %%
# ** CHECK: differences MANDANT in BEDP and in TINFO
# 4591588: in title database with different MANDANT (are MANDANTFUEHR and BEDP_MAN feasible for matching?)
df.filter(pl.col("BEDP_MAN") != pl.col("MANDFUEHR")).select(pl.col("BEDP_MAN").unique())
@@ -253,7 +266,8 @@ df.filter(pl.col("BEDP_MAN") == 60).filter(pl.col("MANDFUEHR").is_null())
# ).null_count()
# )
# print("Unique value counts: ", df.select(pl.col("BEDP_MAN").value_counts()))
# %%
df.filter(pl.col("MELDENUMMER").is_null()).filter(pl.col("MANDFUEHR").is_not_null())
# %%
# ** PREFILTER
# always needed, entries filtered out are to be disposed
@@ -286,7 +300,7 @@ agg_t = (
# .filter(pl.col("count_customer") >= 0) # !! should be 3
) # .filter(pl.col("MELDENUMMER") == 18)
agg_t
# %%
df.filter(pl.col("MELDENUMMER") == 18).select(pl.col("MENGE_VORMERKER").is_null().sum())
# %%
@@ -302,11 +316,15 @@ df.filter(pl.col("BEDP_MENGE_BEDARF_VM") > pl.col("MENGE_VORMERKER"))
not_in_title_table = df.filter(pl.col("MELDENUMMER").is_null())
EXPORT_FEAT = "BEDP_TITELNR"
to_save = {EXPORT_FEAT: not_in_title_table.select(EXPORT_FEAT).to_series().to_list()}
p_save_not_in_title_table = Path.cwd() / "not_in_title_table_20251203-2.json"
p_save_not_in_title_table = Path.cwd() / "not_in_title_table_20251211-1.json"
print(to_save)
# with open(p_save_not_in_title_table, "w") as file:
# json.dump(to_save, file, indent=4)
# %%
df.group_by("BEDP_MAN").agg(pl.len())
# %%
df.filter(pl.col("MELDENUMMER").is_null()).group_by("BEDP_MAN").agg(pl.len().alias("count"))
# %%
print(len(df.filter(pl.col("MELDENUMMER") == 18)))
# df.filter(pl.col("MELDENUMMER") == 18).filter((pl.col("BEDP_MENGE_BEDARF_VM").is_not_null()) & (pl.col("BEDP_MENGE_BEDARF_VM") > 0))
# %%
@@ -452,7 +470,7 @@ def workflow_900(
def workflow_910(
pipe_result: types.PipelineResult,
) -> types.PipelineResult:
filter_mandant = pl.col("BEDP_MAN").is_in((1, 90))
filter_mandant = pl.col("MANDFUEHR").is_in((1, 90))
filter_ignore_MNR26 = pl.col("MELDENUMMER") != 26
res = _apply_several_filters(
@@ -483,7 +501,7 @@ def workflow_100_umbreit(
vm_criterion: str,
) -> types.PipelineResult:
filter_meldenummer = pl.col("MELDENUMMER") == 18
filter_mandant = pl.col("BEDP_MAN") == 1
filter_mandant = pl.col("MANDFUEHR") == 1
filter_number_vm = pl.col(vm_criterion) > 0
res = _apply_several_filters(
@@ -515,7 +533,7 @@ def workflow_100_petersen(
# // WDB branch
filter_meldenummer = pl.col("MELDENUMMER") == 18
filter_mandant = pl.col("BEDP_MAN") == 90
filter_mandant = pl.col("MANDFUEHR") == 90
filter_WDB = pl.col("VERLAGSNR").is_in((76008, 76070))
filter_number_vm = pl.col(vm_criterion) > 0
@@ -540,7 +558,7 @@ def workflow_100_petersen(
# order quantity 0, no further action in other WFs
filter_meldenummer = pl.col("MELDENUMMER") == 18
filter_mandant = pl.col("BEDP_MAN") == 90
filter_mandant = pl.col("MANDFUEHR") == 90
filter_WDB = pl.col("VERLAGSNR").is_in((76008, 76070))
filter_number_vm = pl.col(vm_criterion) == 0
@@ -565,7 +583,7 @@ def workflow_100_petersen(
# // other branch
filter_meldenummer = pl.col("MELDENUMMER") == 18
filter_mandant = pl.col("BEDP_MAN") == 90
filter_mandant = pl.col("MANDFUEHR") == 90
filter_number_vm = pl.col(vm_criterion) > 0
res = _apply_several_filters(
@@ -591,7 +609,7 @@ def workflow_100_petersen(
# %%
# SAVING/LOADING
p_save = Path.cwd() / "raw_data_from_sql_query_20251203-3.arrow"
p_save = Path.cwd() / "raw_data_from_sql_query_20251211-1.arrow"
df = pl.read_ipc(p_save)
print(f"Number of entries: {len(df)}")
@@ -648,7 +666,6 @@ pipe_res.results
# raw_data.filter(pl.col("BEDARFNR") == 166982).filter(pl.col("BEDP_SEQUENZ") == 1)
# %%
pipe_res.open.filter(pl.col("BEDP_MENGE_BEDARF_VM") > pl.col("MENGE_VORMERKER"))
# print(f"Base data and pipe result in line: {}")
# %%
pipe_res = workflow_910(pipe_res)
print(f"Length of base data: {len(raw_data):>18}")