generated from dopt-python/py311
aggregates for production orders
This commit is contained in:
114
prototypes/02-2_aggregates.py
Normal file
114
prototypes/02-2_aggregates.py
Normal file
@@ -0,0 +1,114 @@
|
||||
# %%
|
||||
import datetime
|
||||
import importlib
|
||||
from pathlib import Path
|
||||
|
||||
import external_code
|
||||
import polars as pl
|
||||
import sqlalchemy as sql
|
||||
|
||||
from wattanalyse import db
|
||||
|
||||
importlib.reload(db)
|
||||
importlib.reload(external_code)
|
||||
# %%
|
||||
PROJECT_BASE = Path(__file__).parents[1]
|
||||
DATA_PTH = PROJECT_BASE / "data"
|
||||
assert DATA_PTH.exists()
|
||||
|
||||
# %%
|
||||
# // load data
|
||||
target = DATA_PTH / "PSM_20260507.arrow"
|
||||
data_raw = pl.scan_ipc(target)
|
||||
|
||||
# %%
|
||||
# 0. read data (from customer's database)
|
||||
# 1. cleanup obtained new data
|
||||
# ~~2. load data from internal database~~
|
||||
# ~~3. integrate with with new data (whole snapshot)~~
|
||||
# 2. process on order level
|
||||
# 3. save results to internal database
|
||||
# 4. post-process results
|
||||
# 5. write to external database
|
||||
|
||||
# // (1) cleanup obtained new data
|
||||
# load data from internal database
|
||||
# integrate with with new data (whole snapshot)
|
||||
res = external_code.preprocess_psm(data_raw)
|
||||
data = res.data
|
||||
|
||||
print(f"Data:\n{data.collect()}\n\n---\n\nFiltered:\n{res.filtered}")
|
||||
|
||||
# %%
|
||||
# // (2) processing order level
|
||||
df = external_code.process_order_level(data)
|
||||
|
||||
|
||||
# ?? What is if "Konfektionär" is NULL?
|
||||
# If this is NULL, then the aggregates for "Konfektionär" will not work. Instead, they are
|
||||
# calculated for all NULL entries which might incorporate different production orders which
|
||||
# belong to different "Konfektionär". Thus, these values will be calculated, but should not be
|
||||
# considered.
|
||||
|
||||
# %%
|
||||
# // (3) save results to internal database
|
||||
external_code.dump_order_level_to_internal_database_wipe(df)
|
||||
# %%
|
||||
# now load data from database
|
||||
df = external_code.load_order_level_from_internal_database()
|
||||
df
|
||||
# %%
|
||||
tmp = df.clone()
|
||||
|
||||
# two ways to define the aggregate for date deviations: just use < 0 or use Boolean flag
|
||||
# defined by the user-specified boundaries
|
||||
USE_BOUNDARIES = False
|
||||
filter_date_deviation_early: pl.Expr
|
||||
filter_date_deviation_late: pl.Expr
|
||||
if USE_BOUNDARIES:
|
||||
filter_date_deviation_early = pl.col("Terminunterschreitung")
|
||||
filter_date_deviation_late = pl.col("Terminüberschreitung")
|
||||
else:
|
||||
filter_date_deviation_early = pl.col("Terminabweichung_Anzahl_Tage") < 0
|
||||
filter_date_deviation_late = pl.col("Terminabweichung_Anzahl_Tage") > 0
|
||||
|
||||
|
||||
tmp.select(
|
||||
pl.col("Terminabweichung_Anzahl_Tage")
|
||||
.filter(filter_date_deviation_early)
|
||||
.mean()
|
||||
.abs()
|
||||
.round(mode="half_away_from_zero")
|
||||
.cast(pl.Int64)
|
||||
.alias("Mittlere_Tage_Unterschreitung"),
|
||||
pl.col("Terminabweichung_Anzahl_Tage")
|
||||
.filter(filter_date_deviation_late)
|
||||
.mean()
|
||||
.abs()
|
||||
.round(mode="half_away_from_zero")
|
||||
.cast(pl.Int64)
|
||||
.alias("Mittlere_Tage_Ueberschreitung"),
|
||||
pl.col("Terminabweichung_Anzahl_Tage")
|
||||
.std(ddof=1)
|
||||
.alias("Standardabweichung_Lieferterminabweichung"),
|
||||
pl.col("Import-Ist_Anzahl_Aenderungen")
|
||||
.mean()
|
||||
.abs()
|
||||
.round(mode="half_away_from_zero")
|
||||
.cast(pl.Int64)
|
||||
.alias("Mittlere_Anzahl_Anpassungen_Liefertermin"),
|
||||
pl.col("Tage_zu_letzter_PSM_Historie")
|
||||
.list.explode()
|
||||
.mean()
|
||||
.abs()
|
||||
.round(mode="half_away_from_zero")
|
||||
.cast(pl.Int64)
|
||||
.alias("Mittlere_Abstaende_PSM"),
|
||||
pl.col("Durchlaufzeit_Anzahl_Tage")
|
||||
.mean()
|
||||
.round(mode="half_away_from_zero")
|
||||
.cast(pl.Int64)
|
||||
.alias("Mittlere_Durchlaufzeit"),
|
||||
)
|
||||
|
||||
# %%
|
||||
Reference in New Issue
Block a user