aggregates for production orders

2026-06-08 09:18:09 +02:00
parent c0cb16a893
commit 0ac2689b68
2 changed files with 114 additions and 0 deletions
--- a/prototypes/02-1_integrate_wokflow.py
+++ b/prototypes/02-1_integrate_wokflow.py
--- a/prototypes/02-2_aggregates.py
+++ b/prototypes/02-2_aggregates.py
@@ -0,0 +1,114 @@
+# %%
+import datetime
+import importlib
+from pathlib import Path
+
+import external_code
+import polars as pl
+import sqlalchemy as sql
+
+from wattanalyse import db
+
+importlib.reload(db)
+importlib.reload(external_code)
+# %%
+PROJECT_BASE = Path(__file__).parents[1]
+DATA_PTH = PROJECT_BASE / "data"
+assert DATA_PTH.exists()
+
+# %%
+# // load data
+target = DATA_PTH / "PSM_20260507.arrow"
+data_raw = pl.scan_ipc(target)
+
+# %%
+# 0. read data (from customer's database)
+# 1. cleanup obtained new data
+# ~~2. load data from internal database~~
+# ~~3. integrate with with new data (whole snapshot)~~
+# 2. process on order level
+# 3. save results to internal database
+# 4. post-process results
+# 5. write to external database
+
+# // (1) cleanup obtained new data
+# load data from internal database
+# integrate with with new data (whole snapshot)
+res = external_code.preprocess_psm(data_raw)
+data = res.data
+
+print(f"Data:\n{data.collect()}\n\n---\n\nFiltered:\n{res.filtered}")
+
+# %%
+# // (2) processing order level
+df = external_code.process_order_level(data)
+
+
+# ?? What is if "Konfektionär" is NULL?
+# If this is NULL, then the aggregates for "Konfektionär" will not work. Instead, they are
+# calculated for all NULL entries which might incorporate different production orders which
+# belong to different "Konfektionär". Thus, these values will be calculated, but should not be
+# considered.
+
+# %%
+# // (3) save results to internal database
+external_code.dump_order_level_to_internal_database_wipe(df)
+# %%
+# now load data from database
+df = external_code.load_order_level_from_internal_database()
+df
+# %%
+tmp = df.clone()
+
+# two ways to define the aggregate for date deviations: just use < 0 or use Boolean flag
+# defined by the user-specified boundaries
+USE_BOUNDARIES = False
+filter_date_deviation_early: pl.Expr
+filter_date_deviation_late: pl.Expr
+if USE_BOUNDARIES:
+    filter_date_deviation_early = pl.col("Terminunterschreitung")
+    filter_date_deviation_late = pl.col("Terminüberschreitung")
+else:
+    filter_date_deviation_early = pl.col("Terminabweichung_Anzahl_Tage") < 0
+    filter_date_deviation_late = pl.col("Terminabweichung_Anzahl_Tage") > 0
+
+
+tmp.select(
+    pl.col("Terminabweichung_Anzahl_Tage")
+    .filter(filter_date_deviation_early)
+    .mean()
+    .abs()
+    .round(mode="half_away_from_zero")
+    .cast(pl.Int64)
+    .alias("Mittlere_Tage_Unterschreitung"),
+    pl.col("Terminabweichung_Anzahl_Tage")
+    .filter(filter_date_deviation_late)
+    .mean()
+    .abs()
+    .round(mode="half_away_from_zero")
+    .cast(pl.Int64)
+    .alias("Mittlere_Tage_Ueberschreitung"),
+    pl.col("Terminabweichung_Anzahl_Tage")
+    .std(ddof=1)
+    .alias("Standardabweichung_Lieferterminabweichung"),
+    pl.col("Import-Ist_Anzahl_Aenderungen")
+    .mean()
+    .abs()
+    .round(mode="half_away_from_zero")
+    .cast(pl.Int64)
+    .alias("Mittlere_Anzahl_Anpassungen_Liefertermin"),
+    pl.col("Tage_zu_letzter_PSM_Historie")
+    .list.explode()
+    .mean()
+    .abs()
+    .round(mode="half_away_from_zero")
+    .cast(pl.Int64)
+    .alias("Mittlere_Abstaende_PSM"),
+    pl.col("Durchlaufzeit_Anzahl_Tage")
+    .mean()
+    .round(mode="half_away_from_zero")
+    .cast(pl.Int64)
+    .alias("Mittlere_Durchlaufzeit"),
+)
+
+# %%