refactoring and enhanced user configuration

This commit is contained in:
2026-06-11 09:46:14 +02:00
parent 2a6777becc
commit 7f864bc76b
5 changed files with 94 additions and 83 deletions

View File

@@ -2,7 +2,6 @@ from __future__ import annotations
import dataclasses as dc
import datetime
import enum
import json
import warnings
from typing import TYPE_CHECKING, Any, Final, TypeAlias, cast
@@ -12,21 +11,13 @@ import sqlalchemy as sql
from dopt_basics.datastructures import flatten
from wattanalyse import db
from wattanalyse.constants import QualityPsm
from wattanalyse.types import SqlInsertStmts, SqlStatement
if TYPE_CHECKING:
from oracledb import Connection as OracleConnection
from polars._typing import SchemaDict
# 1. cleanup obtained new data
# ~~2. load data from internal database~~
# ~~3. integrate with with new data (whole snapshot)~~
# 2. process on order level
# 3. save results to internal database
# 4. post-process results
# 5. write to external database
SqlStatement: TypeAlias = str
@dc.dataclass(slots=True, eq=False)
class PreProcessResult:
@@ -36,22 +27,10 @@ class PreProcessResult:
DROP_COLUMNS: Final[list[str]] = cast(
list[str],
list(flatten(((x.lower(), x.upper(), x.capitalize()) for x in ("id", "index", "idx")))),
list(flatten(((x.lower(), x.upper(), x.capitalize()) for x in ("id", "index", "idx")))), # type: ignore
)
@dc.dataclass(slots=True, kw_only=True)
class SqlInsertStmts:
delete: str
insert: str
class QualityPsm(enum.StrEnum):
FEHLEND = enum.auto()
UNPLAUSIBEL = enum.auto()
PLAUSIBEL = enum.auto()
PSM_SCORES: dict[QualityPsm, int] = {
QualityPsm.FEHLEND: 1,
QualityPsm.UNPLAUSIBEL: 0,
@@ -81,6 +60,16 @@ NUMBER_YEARS_UPPER_BOUND_DATES: Final[int] = 4
TAB_NAME_PSM: Final[str] = "EXTERN_PSM"
TAB_NAME_MIS: Final[str] = "EXTERN_MIS"
USE_BOUNDARIES: Final[bool] = False
filter_date_deviation_early: pl.Expr
filter_date_deviation_late: pl.Expr
if USE_BOUNDARIES:
filter_date_deviation_early = pl.col("Terminunterschreitung")
filter_date_deviation_late = pl.col("Terminüberschreitung")
else:
filter_date_deviation_early = pl.col("Terminabweichung_Anzahl_Tage") < 0
filter_date_deviation_late = pl.col("Terminabweichung_Anzahl_Tage") > 0
# // (0) load data
def load_PSM_data(
@@ -458,18 +447,6 @@ def load_order_level_from_internal_database() -> pl.DataFrame:
# // (4) post-process results
USE_BOUNDARIES: Final[bool] = False
filter_date_deviation_early: pl.Expr
filter_date_deviation_late: pl.Expr
if USE_BOUNDARIES:
filter_date_deviation_early = pl.col("Terminunterschreitung")
filter_date_deviation_late = pl.col("Terminüberschreitung")
else:
filter_date_deviation_early = pl.col("Terminabweichung_Anzahl_Tage") < 0
filter_date_deviation_late = pl.col("Terminabweichung_Anzahl_Tage") > 0
def aggregate_production_orders(
data: pl.LazyFrame,
) -> pl.LazyFrame: