generated from dopt-python/py311
basic steps for concept of architecture
This commit is contained in:
@@ -1,4 +1,5 @@
|
|||||||
# %%
|
# %%
|
||||||
|
import enum
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
|
|
||||||
import polars as pl
|
import polars as pl
|
||||||
@@ -19,8 +20,15 @@ assert data_t1_PSM.exists()
|
|||||||
# // MIS-Aufträge
|
# // MIS-Aufträge
|
||||||
pl.read_csv(data_t1_jobs, encoding="windows-1252", separator=";")
|
pl.read_csv(data_t1_jobs, encoding="windows-1252", separator=";")
|
||||||
|
|
||||||
|
|
||||||
# %%
|
# %%
|
||||||
# // PSM
|
# // PSM
|
||||||
|
class QualityPsm(enum.StrEnum):
|
||||||
|
FEHLEND = enum.auto()
|
||||||
|
UNPLAUSIBEL = enum.auto()
|
||||||
|
PLAUSIBEL = enum.auto()
|
||||||
|
|
||||||
|
|
||||||
schema_PSM: dict[str, type[pl.DataType]] = {
|
schema_PSM: dict[str, type[pl.DataType]] = {
|
||||||
"VK Auftrag": pl.UInt32,
|
"VK Auftrag": pl.UInt32,
|
||||||
"Artikelbez.": pl.String,
|
"Artikelbez.": pl.String,
|
||||||
@@ -134,8 +142,6 @@ for idx, entry in enumerate(series, start=1):
|
|||||||
# %%
|
# %%
|
||||||
series[1]
|
series[1]
|
||||||
# %%
|
# %%
|
||||||
tmp.columns
|
|
||||||
# %%
|
|
||||||
tmp = psm.filter((pl.col.PA == 16003) & (pl.col("PA Pos") == 10)).sort(
|
tmp = psm.filter((pl.col.PA == 16003) & (pl.col("PA Pos") == 10)).sort(
|
||||||
"PSM gemeldet am", descending=False
|
"PSM gemeldet am", descending=False
|
||||||
)
|
)
|
||||||
@@ -156,7 +162,7 @@ plausi_features_endpoint_only = [
|
|||||||
"Teile verpackt in Karton",
|
"Teile verpackt in Karton",
|
||||||
]
|
]
|
||||||
plausi_features = plausi_features_all
|
plausi_features = plausi_features_all
|
||||||
# plausi_features = plausi_features_endpoint_only
|
plausi_features = plausi_features_endpoint_only
|
||||||
# %%
|
# %%
|
||||||
IDX = None
|
IDX = None
|
||||||
if IDX is None:
|
if IDX is None:
|
||||||
@@ -169,36 +175,40 @@ print(tmp_1)
|
|||||||
tmp_1 = tmp_1.with_columns(
|
tmp_1 = tmp_1.with_columns(
|
||||||
pl.all_horizontal(pl.col("*").is_null() | (pl.col("*") == 0)).alias("is_empty")
|
pl.all_horizontal(pl.col("*").is_null() | (pl.col("*") == 0)).alias("is_empty")
|
||||||
)
|
)
|
||||||
# %%
|
|
||||||
# tmp_1 = tmp_1.transpose()
|
|
||||||
# %%
|
|
||||||
# tmp_1.shift(1)
|
|
||||||
|
|
||||||
# %%
|
|
||||||
conditions = [
|
conditions = [
|
||||||
pl.col(plausi_features[i]) >= pl.col(plausi_features[i + 1])
|
pl.col(plausi_features[i]) >= pl.col(plausi_features[i + 1])
|
||||||
for i in range(len(plausi_features) - 1)
|
for i in range(len(plausi_features) - 1)
|
||||||
]
|
]
|
||||||
|
|
||||||
# 4. Filter anwenden
|
df_marked = tmp_1.with_columns(
|
||||||
# pl.all_horizontal stellt sicher, dass die Bedingung für JEDES Paar in der Zeile stimmt
|
|
||||||
df_markiert = tmp_1.with_columns(
|
|
||||||
pl.when(pl.all_horizontal(conditions) | pl.col("is_empty"))
|
pl.when(pl.all_horizontal(conditions) | pl.col("is_empty"))
|
||||||
.then(pl.lit(True))
|
.then(pl.lit(True))
|
||||||
.otherwise(pl.lit(False))
|
.otherwise(pl.lit(False))
|
||||||
.alias("Produktionsstückzahlen_valide")
|
.alias("Produktionsstückzahlen_valide")
|
||||||
)
|
)
|
||||||
print(df_markiert)
|
# print(df_marked)
|
||||||
|
|
||||||
|
# %%
|
||||||
|
df_score = df_marked.with_columns(
|
||||||
|
pl.when(pl.col("is_empty"))
|
||||||
|
.then(pl.lit(QualityPsm.FEHLEND))
|
||||||
|
.when(pl.col("Produktionsstückzahlen_valide"))
|
||||||
|
.then(pl.lit(QualityPsm.PLAUSIBEL))
|
||||||
|
.otherwise(pl.lit(QualityPsm.UNPLAUSIBEL))
|
||||||
|
.alias("Qualität Produktionsfortschritt")
|
||||||
|
)
|
||||||
|
print(df_score)
|
||||||
|
|
||||||
# df_valide = tmp_1.filter(pl.all_horizontal(conditions))
|
# df_valide = tmp_1.filter(pl.all_horizontal(conditions))
|
||||||
# df_invalide = tmp_1.filter(
|
# df_invalide = tmp_1.filter(
|
||||||
# ~pl.all_horizontal(conditions)
|
# ~pl.all_horizontal(conditions)
|
||||||
# ) # Das Tilde-Zeichen ~ bedeutet "NOT"
|
# ) # Das Tilde-Zeichen ~ bedeutet "NOT"
|
||||||
|
|
||||||
# print("--- Valide Zeilen ---")
|
# print("--- valid rows ---")
|
||||||
# print(df_valide)
|
# print(df_valide)
|
||||||
|
|
||||||
# print("\n--- Invalide Zeilen ---")
|
# print("\n--- invalid rows ---")
|
||||||
# print(df_invalide)
|
# print(df_invalide)
|
||||||
|
|
||||||
|
|
||||||
@@ -226,3 +236,50 @@ print(df_valide)
|
|||||||
print("\n--- Invalide Zeilen ---")
|
print("\n--- Invalide Zeilen ---")
|
||||||
print(df_invalide)
|
print(df_invalide)
|
||||||
# %%
|
# %%
|
||||||
|
# // principle of aggregated data in Polars
|
||||||
|
# map the database structure to a Polars dataframe and just insert or update the
|
||||||
|
# corresponding entries of the defined database table
|
||||||
|
# We use an upsert strategy, keep local copies of the data and merge them with new entries.
|
||||||
|
# This ensures that we always have a clean and complete history.
|
||||||
|
|
||||||
|
# 1. Testdaten: Auftrag 1 ist valide, Auftrag 2 enthält dein invalides Beispiel
|
||||||
|
df = pl.DataFrame(
|
||||||
|
{
|
||||||
|
"auftrag_id": [1, 2],
|
||||||
|
"EP-1": [[0, 100, 100, 100], [0, 0, 100, 100]],
|
||||||
|
"EP-2": [[0, 0, 100, 100], [0, 100, 100, 100]], # Auftrag 2 kippt hier bei Index 1!
|
||||||
|
"EP-3": [[0, 0, 0, 100], [0, 0, 0, 100]],
|
||||||
|
}
|
||||||
|
)
|
||||||
|
df.head()
|
||||||
|
|
||||||
|
# %%
|
||||||
|
ep_spalten = ["EP-1", "EP-2", "EP-3"]
|
||||||
|
|
||||||
|
# --- SCHRITT 1: Die Listen synchron entfalten (Explode) ---
|
||||||
|
# Polars macht aus den Listen temporär wieder "flache" Zeilen unter Beibehaltung der auftrag_id
|
||||||
|
df_flach = df.select(["auftrag_id"] + ep_spalten).explode(ep_spalten)
|
||||||
|
df_flach
|
||||||
|
# %%
|
||||||
|
|
||||||
|
# --- SCHRITT 2: Unsere bekannte Paar-Logik anwenden ---
|
||||||
|
bedingungen = [
|
||||||
|
pl.col(ep_spalten[i]) >= pl.col(ep_spalten[i + 1]) for i in range(len(ep_spalten) - 1)
|
||||||
|
]
|
||||||
|
|
||||||
|
# Wir prüfen für jede Zeile (jeden Zeitpunkt), ob das Schema stimmt
|
||||||
|
df_flach = df_flach.with_columns(pl.all_horizontal(bedingungen).alias("zeitpunkt_valide"))
|
||||||
|
df_flach
|
||||||
|
# %%
|
||||||
|
# --- SCHRITT 3: Zurück auf Auftragsebene aggregieren ---
|
||||||
|
# Ein Auftrag ist nur dann komplett valide, wenn JEDER EINZELNE Zeitpunkt valide war (.all())
|
||||||
|
df_status = df_flach.group_by("auftrag_id").agg(
|
||||||
|
pl.col("zeitpunkt_valide").all().alias("ist_valide")
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
# --- SCHRITT 4: Das Ergebnis an deinen Original-Dataframe hängen ---
|
||||||
|
df_final = df.join(df_status, on="auftrag_id", how="left")
|
||||||
|
|
||||||
|
print(df_final)
|
||||||
|
# %%
|
||||||
|
|||||||
Reference in New Issue
Block a user