generated from dopt-python/py311
76 lines
1.7 KiB
Python
76 lines
1.7 KiB
Python
# %%
|
|
from pathlib import Path
|
|
|
|
import pandas as pd
|
|
import polars as pl
|
|
|
|
# %%
|
|
f_data = Path.cwd() / "../data/20251105/"
|
|
# %%
|
|
files = tuple(f_data.glob("*.csv"))
|
|
files
|
|
# %%
|
|
file_pattern = "*aufpauf*"
|
|
rel_file = tuple(f_data.glob(file_pattern))[0]
|
|
rel_file
|
|
# %%
|
|
df = pd.read_csv(
|
|
rel_file, sep=";", parse_dates=["AUFTRAGS_DATUM", "AUFP_DATUM_ANLAGE"], dayfirst=True
|
|
)
|
|
df.head()
|
|
# %%
|
|
df["AUFP_DATUM_ANLAGE"].dtype
|
|
# %%
|
|
len(df)
|
|
# %%
|
|
pd.unique(df["TITELNR"]).shape
|
|
# %%
|
|
df["TITELNR"].value_counts()
|
|
# %%
|
|
df.loc[df["TITELNR"] == 0]
|
|
# %%
|
|
# prim_keys = df[["AUFTRAGSNUMMER", "TITELNR"]].copy()
|
|
prim_keys = df.copy()
|
|
# %%
|
|
unique_prim_key_pairs = set(zip(prim_keys["AUFTRAGSNUMMER"], prim_keys["TITELNR"]))
|
|
# %%
|
|
len(unique_prim_key_pairs)
|
|
# %%
|
|
duplicates = prim_keys[prim_keys.duplicated(subset=["AUFTRAGSNUMMER", "TITELNR"])]
|
|
duplicates["AUFTRAGS_ART"].unique()
|
|
# %%
|
|
duplicates["EINGANGS_ART"].unique()
|
|
# %%
|
|
duplicates.loc[duplicates["AUFP_VORMERKUNG"] == "J"]
|
|
# %%
|
|
# Auftragsart 99 always "AUFP_VORMERKUNG == NaN"
|
|
filt = df.loc[df["AUFTRAGS_ART"] == 99]
|
|
print(len(filt))
|
|
filt["AUFP_VORMERKUNG"].isna().sum()
|
|
# %%
|
|
# %%
|
|
# specific title number and all orders, sorted
|
|
dt = pd.Timestamp(year=2025, month=11, day=5, hour=13)
|
|
|
|
filt = df.loc[df["TITELNR"] == 6315273]
|
|
filt = filt.loc[filt["AUFP_VORMERKUNG"] == "J"]
|
|
filt = filt.loc[filt["AUFTRAGS_DATUM"] >= dt]
|
|
filt = filt.sort_values("AUFTRAGS_DATUM", ascending=False)
|
|
order_size = filt["AUFP_MENGE_AUFTRAG"].sum()
|
|
order_size
|
|
# %%
|
|
|
|
# %%
|
|
df = pl.read_csv(
|
|
rel_file,
|
|
separator=";",
|
|
try_parse_dates=True,
|
|
infer_schema=True,
|
|
infer_schema_length=100_000,
|
|
)
|
|
df
|
|
# %%
|
|
res = df.select(pl.col("AUFTRAGSNUMMER"), pl.col("AUFTRAGS_DATUM").dt.day().alias("day_num"))
|
|
res
|
|
# %%
|