basic steps for concept of architecture

2026-06-03 16:07:20 +02:00
parent 8c6e36e43d
commit ae4d684d4f
1 changed files with 71 additions and 14 deletions
--- a/prototypes/01_first-look_20260603.py
+++ b/prototypes/01_first-look_20260603.py
@@ -1,4 +1,5 @@
 # %%
+import enum
 from pathlib import Path

 import polars as pl
@@ -19,8 +20,15 @@ assert data_t1_PSM.exists()
 # // MIS-Aufträge
 pl.read_csv(data_t1_jobs, encoding="windows-1252", separator=";")

+
 # %%
 # // PSM
+class QualityPsm(enum.StrEnum):
+    FEHLEND = enum.auto()
+    UNPLAUSIBEL = enum.auto()
+    PLAUSIBEL = enum.auto()
+
+
 schema_PSM: dict[str, type[pl.DataType]] = {
    "VK Auftrag": pl.UInt32,
    "Artikelbez.": pl.String,
@@ -134,8 +142,6 @@ for idx, entry in enumerate(series, start=1):
 # %%
 series[1]
 # %%
-tmp.columns
-# %%
 tmp = psm.filter((pl.col.PA == 16003) & (pl.col("PA Pos") == 10)).sort(
    "PSM gemeldet am", descending=False
 )
@@ -156,7 +162,7 @@ plausi_features_endpoint_only = [
    "Teile verpackt in Karton",
 ]
 plausi_features = plausi_features_all
-# plausi_features = plausi_features_endpoint_only
+plausi_features = plausi_features_endpoint_only
 # %%
 IDX = None
 if IDX is None:
@@ -169,36 +175,40 @@ print(tmp_1)
 tmp_1 = tmp_1.with_columns(
    pl.all_horizontal(pl.col("*").is_null() | (pl.col("*") == 0)).alias("is_empty")
 )
-# %%
-# tmp_1 = tmp_1.transpose()
-# %%
-# tmp_1.shift(1)

-# %%
 conditions = [
    pl.col(plausi_features[i]) >= pl.col(plausi_features[i + 1])
    for i in range(len(plausi_features) - 1)
 ]

-# 4. Filter anwenden
-# pl.all_horizontal stellt sicher, dass die Bedingung für JEDES Paar in der Zeile stimmt
-df_markiert = tmp_1.with_columns(
+df_marked = tmp_1.with_columns(
    pl.when(pl.all_horizontal(conditions) | pl.col("is_empty"))
    .then(pl.lit(True))
    .otherwise(pl.lit(False))
    .alias("Produktionsstückzahlen_valide")
 )
-print(df_markiert)
+# print(df_marked)
+
+# %%
+df_score = df_marked.with_columns(
+    pl.when(pl.col("is_empty"))
+    .then(pl.lit(QualityPsm.FEHLEND))
+    .when(pl.col("Produktionsstückzahlen_valide"))
+    .then(pl.lit(QualityPsm.PLAUSIBEL))
+    .otherwise(pl.lit(QualityPsm.UNPLAUSIBEL))
+    .alias("Qualität Produktionsfortschritt")
+)
+print(df_score)

 # df_valide = tmp_1.filter(pl.all_horizontal(conditions))
 # df_invalide = tmp_1.filter(
 #     ~pl.all_horizontal(conditions)
 # )  # Das Tilde-Zeichen ~ bedeutet "NOT"

-# print("--- Valide Zeilen ---")
+# print("--- valid rows ---")
 # print(df_valide)

-# print("\n--- Invalide Zeilen ---")
+# print("\n--- invalid rows ---")
 # print(df_invalide)


@@ -226,3 +236,50 @@ print(df_valide)
 print("\n--- Invalide Zeilen ---")
 print(df_invalide)
 # %%
+# // principle of aggregated data in Polars
+# map the database structure to a Polars dataframe and just insert or update the
+# corresponding entries of the defined database table
+# We use an upsert strategy, keep local copies of the data and merge them with new entries.
+# This ensures that we always have a clean and complete history.
+
+# 1. Testdaten: Auftrag 1 ist valide, Auftrag 2 enthält dein invalides Beispiel
+df = pl.DataFrame(
+    {
+        "auftrag_id": [1, 2],
+        "EP-1": [[0, 100, 100, 100], [0, 0, 100, 100]],
+        "EP-2": [[0, 0, 100, 100], [0, 100, 100, 100]],  # Auftrag 2 kippt hier bei Index 1!
+        "EP-3": [[0, 0, 0, 100], [0, 0, 0, 100]],
+    }
+)
+df.head()
+
+# %%
+ep_spalten = ["EP-1", "EP-2", "EP-3"]
+
+# --- SCHRITT 1: Die Listen synchron entfalten (Explode) ---
+# Polars macht aus den Listen temporär wieder "flache" Zeilen unter Beibehaltung der auftrag_id
+df_flach = df.select(["auftrag_id"] + ep_spalten).explode(ep_spalten)
+df_flach
+# %%
+
+# --- SCHRITT 2: Unsere bekannte Paar-Logik anwenden ---
+bedingungen = [
+    pl.col(ep_spalten[i]) >= pl.col(ep_spalten[i + 1]) for i in range(len(ep_spalten) - 1)
+]
+
+# Wir prüfen für jede Zeile (jeden Zeitpunkt), ob das Schema stimmt
+df_flach = df_flach.with_columns(pl.all_horizontal(bedingungen).alias("zeitpunkt_valide"))
+df_flach
+# %%
+# --- SCHRITT 3: Zurück auf Auftragsebene aggregieren ---
+# Ein Auftrag ist nur dann komplett valide, wenn JEDER EINZELNE Zeitpunkt valide war (.all())
+df_status = df_flach.group_by("auftrag_id").agg(
+    pl.col("zeitpunkt_valide").all().alias("ist_valide")
+)
+
+
+# --- SCHRITT 4: Das Ergebnis an deinen Original-Dataframe hängen ---
+df_final = df.join(df_status, on="auftrag_id", how="left")
+
+print(df_final)
+# %%