generated from dopt-python/py311
successful saving/loading of production order aggregate table
This commit is contained in:
@@ -20,7 +20,7 @@ from wattanalyse import db
|
||||
|
||||
@dc.dataclass(slots=True, eq=False)
|
||||
class PreProcessResult:
|
||||
data: pl.DataFrame
|
||||
data: pl.LazyFrame
|
||||
filtered: pl.DataFrame
|
||||
|
||||
|
||||
@@ -58,7 +58,7 @@ NUMBER_YEARS_UPPER_BOUND_DATES: Final[int] = 4
|
||||
|
||||
# // (1) preprocess
|
||||
def preprocess_psm(
|
||||
data: pl.DataFrame,
|
||||
data: pl.LazyFrame,
|
||||
) -> PreProcessResult:
|
||||
data = data.rename(RENAMING_SCHEME)
|
||||
REGEX_PATTERN = r"^[\s\-#+/$]+$"
|
||||
@@ -69,7 +69,23 @@ def preprocess_psm(
|
||||
.name.keep()
|
||||
)
|
||||
data = data.with_columns(pl.col("Konfektionär").str.strip_chars(" \n\t"))
|
||||
filtered_data = pl.DataFrame(schema=data.schema)
|
||||
filtered_data = pl.LazyFrame(schema=data.collect_schema())
|
||||
|
||||
# drop duplicates
|
||||
# use null count as information measure, least amount of nulls should be contained
|
||||
base_columns = data.columns
|
||||
data = data.with_columns(pl.sum_horizontal(pl.all().is_null()).alias("null_count"))
|
||||
data = data.sort(PRIM_KEYS + ["Meldezeitpunkt_Historie", "null_count"], descending=False)
|
||||
filtered_data = pl.concat(
|
||||
[
|
||||
filtered_data,
|
||||
data.filter(
|
||||
~pl.struct(PRIM_KEYS + ["Meldezeitpunkt_Historie"]).is_first_distinct()
|
||||
).select(base_columns),
|
||||
]
|
||||
)
|
||||
data = data.filter(pl.struct(PRIM_KEYS + ["Meldezeitpunkt_Historie"]).is_first_distinct())
|
||||
data = data.drop("null_count")
|
||||
|
||||
# any NULL values in critical columns
|
||||
NOT_NULL_COLS = ("PA", "PA_Pos", "Meldezeitpunkt_Historie")
|
||||
@@ -115,11 +131,13 @@ def preprocess_psm(
|
||||
filtered_data = pl.concat([filtered_data, data.filter(pl.any_horizontal(cond))])
|
||||
data = data.filter(~pl.any_horizontal(cond))
|
||||
|
||||
return PreProcessResult(data=data, filtered=filtered_data)
|
||||
return PreProcessResult(data=data, filtered=filtered_data.collect())
|
||||
|
||||
|
||||
# // (2) process on order level
|
||||
def process_order_level(data: pl.DataFrame) -> pl.DataFrame:
|
||||
def process_order_level(
|
||||
data: pl.LazyFrame,
|
||||
) -> pl.LazyFrame:
|
||||
# ** renaming
|
||||
# data = data.rename(RENAMING_SCHEME) # TODO delete, done in pre-processing
|
||||
data = data.sort(PRIM_KEYS + ["Meldezeitpunkt_Historie"], descending=False)
|
||||
@@ -255,42 +273,67 @@ def process_order_level(data: pl.DataFrame) -> pl.DataFrame:
|
||||
# ** order specific aggregates
|
||||
data = (
|
||||
data.with_columns(
|
||||
pl.when(
|
||||
(pl.col("Liefertermin_Ist").is_not_null())
|
||||
& (pl.col("Liefertermin_Soll").is_not_null())
|
||||
)
|
||||
.then((pl.col("Liefertermin_Ist") - pl.col("Liefertermin_Soll")).dt.total_days())
|
||||
.otherwise(None)
|
||||
(pl.col("Liefertermin_Ist") - pl.col("Liefertermin_Soll"))
|
||||
.dt.total_days()
|
||||
.alias("Terminabweichung_Anzahl_Tage")
|
||||
)
|
||||
.with_columns(
|
||||
pl.when(pl.col("Terminabweichung_Anzahl_Tage") < LOWER_BOUND_DATE_DEVIATION)
|
||||
.then(pl.lit(True))
|
||||
.otherwise(pl.lit(False))
|
||||
.alias("Terminunterschreitung"),
|
||||
pl.when(pl.col("Terminabweichung_Anzahl_Tage") > UPPER_BOUND_DATE_DEVIATION)
|
||||
.then(pl.lit(True))
|
||||
.otherwise(pl.lit(False))
|
||||
.alias("Terminüberschreitung"),
|
||||
pl.when(
|
||||
(pl.col("Liefertermin_Ist").is_not_null())
|
||||
& (pl.col("Prod-Start").is_not_null())
|
||||
)
|
||||
.then((pl.col("Liefertermin_Ist") - pl.col("Prod-Start")).dt.total_days())
|
||||
.otherwise(None)
|
||||
(pl.col("Terminabweichung_Anzahl_Tage") < LOWER_BOUND_DATE_DEVIATION).alias(
|
||||
"Terminunterschreitung"
|
||||
),
|
||||
(pl.col("Terminabweichung_Anzahl_Tage") > UPPER_BOUND_DATE_DEVIATION).alias(
|
||||
"Terminüberschreitung"
|
||||
),
|
||||
(pl.col("Liefertermin_Ist") - pl.col("Prod-Start"))
|
||||
.dt.total_days()
|
||||
.alias("Durchlaufzeit_Anzahl_Tage"),
|
||||
)
|
||||
.with_columns(
|
||||
pl.when(
|
||||
(pl.col("Durchlaufzeit_Anzahl_Tage").is_not_null())
|
||||
& (pl.col("Durchlaufzeit_Anzahl_Tage") < 0)
|
||||
)
|
||||
pl.when(pl.col("Durchlaufzeit_Anzahl_Tage") < 0)
|
||||
.then(None)
|
||||
.otherwise(pl.col("Durchlaufzeit_Anzahl_Tage"))
|
||||
.alias("Durchlaufzeit_Anzahl_Tage")
|
||||
)
|
||||
)
|
||||
|
||||
# data = (
|
||||
# data.with_columns(
|
||||
# pl.when(
|
||||
# (pl.col("Liefertermin_Ist").is_not_null())
|
||||
# & (pl.col("Liefertermin_Soll").is_not_null())
|
||||
# )
|
||||
# .then((pl.col("Liefertermin_Ist") - pl.col("Liefertermin_Soll")).dt.total_days())
|
||||
# .otherwise(None)
|
||||
# .alias("Terminabweichung_Anzahl_Tage")
|
||||
# )
|
||||
# .with_columns(
|
||||
# pl.when(pl.col("Terminabweichung_Anzahl_Tage") < LOWER_BOUND_DATE_DEVIATION)
|
||||
# .then(pl.lit(True))
|
||||
# .otherwise(pl.lit(False))
|
||||
# .alias("Terminunterschreitung"),
|
||||
# pl.when(pl.col("Terminabweichung_Anzahl_Tage") > UPPER_BOUND_DATE_DEVIATION)
|
||||
# .then(pl.lit(True))
|
||||
# .otherwise(pl.lit(False))
|
||||
# .alias("Terminüberschreitung"),
|
||||
# pl.when(
|
||||
# (pl.col("Liefertermin_Ist").is_not_null())
|
||||
# & (pl.col("Prod-Start").is_not_null())
|
||||
# )
|
||||
# .then((pl.col("Liefertermin_Ist") - pl.col("Prod-Start")).dt.total_days())
|
||||
# .otherwise(None)
|
||||
# .alias("Durchlaufzeit_Anzahl_Tage"),
|
||||
# )
|
||||
# .with_columns(
|
||||
# pl.when(
|
||||
# (pl.col("Durchlaufzeit_Anzahl_Tage").is_not_null())
|
||||
# & (pl.col("Durchlaufzeit_Anzahl_Tage") < 0)
|
||||
# )
|
||||
# .then(None)
|
||||
# .otherwise(pl.col("Durchlaufzeit_Anzahl_Tage"))
|
||||
# .alias("Durchlaufzeit_Anzahl_Tage")
|
||||
# )
|
||||
# )
|
||||
|
||||
return data
|
||||
|
||||
|
||||
@@ -313,7 +356,7 @@ def _parse_to_json(
|
||||
|
||||
|
||||
def dump_order_level_to_internal_database_staging(
|
||||
data: pl.DataFrame,
|
||||
data: pl.LazyFrame,
|
||||
) -> None:
|
||||
|
||||
staging_data = data.with_columns(
|
||||
@@ -324,6 +367,7 @@ def dump_order_level_to_internal_database_staging(
|
||||
)
|
||||
.name.keep()
|
||||
)
|
||||
staging_data = staging_data.collect()
|
||||
rows_inserted = staging_data.write_database(
|
||||
"Produktionsauftrag-Einzelsicht_Staging",
|
||||
connection=db.DB_URI,
|
||||
@@ -355,7 +399,7 @@ def dump_order_level_to_internal_database_staging(
|
||||
|
||||
|
||||
def dump_order_level_to_internal_database_wipe(
|
||||
data: pl.DataFrame,
|
||||
data: pl.LazyFrame,
|
||||
) -> None:
|
||||
|
||||
staging_data = data.with_columns(
|
||||
@@ -370,6 +414,7 @@ def dump_order_level_to_internal_database_wipe(
|
||||
with db.ENGINE_INTERNAL.begin() as conn:
|
||||
conn.execute(sql.text('DELETE FROM "Produktionsauftrag-Einzelsicht";'))
|
||||
|
||||
staging_data = staging_data.collect()
|
||||
rows_inserted = staging_data.write_database(
|
||||
"Produktionsauftrag-Einzelsicht",
|
||||
connection=db.DB_URI,
|
||||
|
||||
Reference in New Issue
Block a user