generated from dopt-python/py311
adapted queries in preparation of regular meeting
This commit is contained in:
@@ -92,6 +92,9 @@ join_condition = sql.and_(
|
||||
db.ext_bedpbed.c.BEDP_TITELNR == db.EXT_AUFPAUF.c.TITELNR,
|
||||
db.ext_bedpbed.c.BEDP_MAN == db.EXT_AUFPAUF.c.MANDANT,
|
||||
)
|
||||
join_condition = sql.and_(
|
||||
db.ext_bedpbed.c.BEDP_TITELNR == db.EXT_AUFPAUF.c.TITELNR,
|
||||
)
|
||||
where_condition = sql.and_(
|
||||
db.EXT_AUFPAUF.c.AUFTRAGS_DATUM > start_date,
|
||||
db.EXT_AUFPAUF.c.KUNDE_RECHNUNG.not_in(filter_K_rech),
|
||||
@@ -214,13 +217,23 @@ df.head()
|
||||
# %%
|
||||
# // NO LIVE DATA NEEDED
|
||||
# SAVING/LOADING
|
||||
p_save = Path.cwd() / "raw_data_from_sql_query_20251203-3.arrow"
|
||||
p_save = Path.cwd() / "raw_data_from_sql_query_20251211-1.arrow"
|
||||
# df.write_ipc(p_save)
|
||||
df = pl.read_ipc(p_save)
|
||||
# %%
|
||||
print(len(df))
|
||||
df.head()
|
||||
# %%
|
||||
# ** CHECK: unique title number?
|
||||
df.with_columns(titlenumber_count=pl.col("BEDP_TITELNR").count().over("BEDP_TITELNR")).select(
|
||||
["BEDP_TITELNR", "titlenumber_count"]
|
||||
).unique().filter(pl.col("titlenumber_count") > 1)
|
||||
# %%
|
||||
# ** CHECK: distribution of MELDENUMMER
|
||||
df.filter(pl.col("MELDENUMMER").is_not_null() & pl.col("MELDENUMMER").is_in((17, 18))).select(
|
||||
pl.len()
|
||||
)
|
||||
# %%
|
||||
# ** CHECK: differences MANDANT in BEDP and in TINFO
|
||||
# 4591588: in title database with different MANDANT (are MANDANTFUEHR and BEDP_MAN feasible for matching?)
|
||||
df.filter(pl.col("BEDP_MAN") != pl.col("MANDFUEHR")).select(pl.col("BEDP_MAN").unique())
|
||||
@@ -253,7 +266,8 @@ df.filter(pl.col("BEDP_MAN") == 60).filter(pl.col("MANDFUEHR").is_null())
|
||||
# ).null_count()
|
||||
# )
|
||||
# print("Unique value counts: ", df.select(pl.col("BEDP_MAN").value_counts()))
|
||||
|
||||
# %%
|
||||
df.filter(pl.col("MELDENUMMER").is_null()).filter(pl.col("MANDFUEHR").is_not_null())
|
||||
# %%
|
||||
# ** PREFILTER
|
||||
# always needed, entries filtered out are to be disposed
|
||||
@@ -286,7 +300,7 @@ agg_t = (
|
||||
# .filter(pl.col("count_customer") >= 0) # !! should be 3
|
||||
) # .filter(pl.col("MELDENUMMER") == 18)
|
||||
agg_t
|
||||
|
||||
# %%
|
||||
df.filter(pl.col("MELDENUMMER") == 18).select(pl.col("MENGE_VORMERKER").is_null().sum())
|
||||
|
||||
# %%
|
||||
@@ -302,11 +316,15 @@ df.filter(pl.col("BEDP_MENGE_BEDARF_VM") > pl.col("MENGE_VORMERKER"))
|
||||
not_in_title_table = df.filter(pl.col("MELDENUMMER").is_null())
|
||||
EXPORT_FEAT = "BEDP_TITELNR"
|
||||
to_save = {EXPORT_FEAT: not_in_title_table.select(EXPORT_FEAT).to_series().to_list()}
|
||||
p_save_not_in_title_table = Path.cwd() / "not_in_title_table_20251203-2.json"
|
||||
p_save_not_in_title_table = Path.cwd() / "not_in_title_table_20251211-1.json"
|
||||
print(to_save)
|
||||
# with open(p_save_not_in_title_table, "w") as file:
|
||||
# json.dump(to_save, file, indent=4)
|
||||
# %%
|
||||
df.group_by("BEDP_MAN").agg(pl.len())
|
||||
# %%
|
||||
df.filter(pl.col("MELDENUMMER").is_null()).group_by("BEDP_MAN").agg(pl.len().alias("count"))
|
||||
# %%
|
||||
print(len(df.filter(pl.col("MELDENUMMER") == 18)))
|
||||
# df.filter(pl.col("MELDENUMMER") == 18).filter((pl.col("BEDP_MENGE_BEDARF_VM").is_not_null()) & (pl.col("BEDP_MENGE_BEDARF_VM") > 0))
|
||||
# %%
|
||||
@@ -452,7 +470,7 @@ def workflow_900(
|
||||
def workflow_910(
|
||||
pipe_result: types.PipelineResult,
|
||||
) -> types.PipelineResult:
|
||||
filter_mandant = pl.col("BEDP_MAN").is_in((1, 90))
|
||||
filter_mandant = pl.col("MANDFUEHR").is_in((1, 90))
|
||||
filter_ignore_MNR26 = pl.col("MELDENUMMER") != 26
|
||||
|
||||
res = _apply_several_filters(
|
||||
@@ -483,7 +501,7 @@ def workflow_100_umbreit(
|
||||
vm_criterion: str,
|
||||
) -> types.PipelineResult:
|
||||
filter_meldenummer = pl.col("MELDENUMMER") == 18
|
||||
filter_mandant = pl.col("BEDP_MAN") == 1
|
||||
filter_mandant = pl.col("MANDFUEHR") == 1
|
||||
filter_number_vm = pl.col(vm_criterion) > 0
|
||||
|
||||
res = _apply_several_filters(
|
||||
@@ -515,7 +533,7 @@ def workflow_100_petersen(
|
||||
|
||||
# // WDB branch
|
||||
filter_meldenummer = pl.col("MELDENUMMER") == 18
|
||||
filter_mandant = pl.col("BEDP_MAN") == 90
|
||||
filter_mandant = pl.col("MANDFUEHR") == 90
|
||||
filter_WDB = pl.col("VERLAGSNR").is_in((76008, 76070))
|
||||
filter_number_vm = pl.col(vm_criterion) > 0
|
||||
|
||||
@@ -540,7 +558,7 @@ def workflow_100_petersen(
|
||||
|
||||
# order quantity 0, no further action in other WFs
|
||||
filter_meldenummer = pl.col("MELDENUMMER") == 18
|
||||
filter_mandant = pl.col("BEDP_MAN") == 90
|
||||
filter_mandant = pl.col("MANDFUEHR") == 90
|
||||
filter_WDB = pl.col("VERLAGSNR").is_in((76008, 76070))
|
||||
filter_number_vm = pl.col(vm_criterion) == 0
|
||||
|
||||
@@ -565,7 +583,7 @@ def workflow_100_petersen(
|
||||
|
||||
# // other branch
|
||||
filter_meldenummer = pl.col("MELDENUMMER") == 18
|
||||
filter_mandant = pl.col("BEDP_MAN") == 90
|
||||
filter_mandant = pl.col("MANDFUEHR") == 90
|
||||
filter_number_vm = pl.col(vm_criterion) > 0
|
||||
|
||||
res = _apply_several_filters(
|
||||
@@ -591,7 +609,7 @@ def workflow_100_petersen(
|
||||
|
||||
# %%
|
||||
# SAVING/LOADING
|
||||
p_save = Path.cwd() / "raw_data_from_sql_query_20251203-3.arrow"
|
||||
p_save = Path.cwd() / "raw_data_from_sql_query_20251211-1.arrow"
|
||||
df = pl.read_ipc(p_save)
|
||||
print(f"Number of entries: {len(df)}")
|
||||
|
||||
@@ -648,7 +666,6 @@ pipe_res.results
|
||||
# raw_data.filter(pl.col("BEDARFNR") == 166982).filter(pl.col("BEDP_SEQUENZ") == 1)
|
||||
# %%
|
||||
pipe_res.open.filter(pl.col("BEDP_MENGE_BEDARF_VM") > pl.col("MENGE_VORMERKER"))
|
||||
# print(f"Base data and pipe result in line: {}")
|
||||
# %%
|
||||
pipe_res = workflow_910(pipe_res)
|
||||
print(f"Length of base data: {len(raw_data):>18}")
|
||||
|
||||
Reference in New Issue
Block a user