generated from dopt-python/py311
add all prototype and data analysis code
This commit is contained in:
75
data_analysis/01_analyse_data.py
Normal file
75
data_analysis/01_analyse_data.py
Normal file
@@ -0,0 +1,75 @@
|
||||
# %%
|
||||
from pathlib import Path
|
||||
|
||||
import pandas as pd
|
||||
import polars as pl
|
||||
|
||||
# %%
|
||||
f_data = Path.cwd() / "../data/20251105/"
|
||||
# %%
|
||||
files = tuple(f_data.glob("*.csv"))
|
||||
files
|
||||
# %%
|
||||
file_pattern = "*aufpauf*"
|
||||
rel_file = tuple(f_data.glob(file_pattern))[0]
|
||||
rel_file
|
||||
# %%
|
||||
df = pd.read_csv(
|
||||
rel_file, sep=";", parse_dates=["AUFTRAGS_DATUM", "AUFP_DATUM_ANLAGE"], dayfirst=True
|
||||
)
|
||||
df.head()
|
||||
# %%
|
||||
df["AUFP_DATUM_ANLAGE"].dtype
|
||||
# %%
|
||||
len(df)
|
||||
# %%
|
||||
pd.unique(df["TITELNR"]).shape
|
||||
# %%
|
||||
df["TITELNR"].value_counts()
|
||||
# %%
|
||||
df.loc[df["TITELNR"] == 0]
|
||||
# %%
|
||||
# prim_keys = df[["AUFTRAGSNUMMER", "TITELNR"]].copy()
|
||||
prim_keys = df.copy()
|
||||
# %%
|
||||
unique_prim_key_pairs = set(zip(prim_keys["AUFTRAGSNUMMER"], prim_keys["TITELNR"]))
|
||||
# %%
|
||||
len(unique_prim_key_pairs)
|
||||
# %%
|
||||
duplicates = prim_keys[prim_keys.duplicated(subset=["AUFTRAGSNUMMER", "TITELNR"])]
|
||||
duplicates["AUFTRAGS_ART"].unique()
|
||||
# %%
|
||||
duplicates["EINGANGS_ART"].unique()
|
||||
# %%
|
||||
duplicates.loc[duplicates["AUFP_VORMERKUNG"] == "J"]
|
||||
# %%
|
||||
# Auftragsart 99 always "AUFP_VORMERKUNG == NaN"
|
||||
filt = df.loc[df["AUFTRAGS_ART"] == 99]
|
||||
print(len(filt))
|
||||
filt["AUFP_VORMERKUNG"].isna().sum()
|
||||
# %%
|
||||
# %%
|
||||
# specific title number and all orders, sorted
|
||||
dt = pd.Timestamp(year=2025, month=11, day=5, hour=13)
|
||||
|
||||
filt = df.loc[df["TITELNR"] == 6315273]
|
||||
filt = filt.loc[filt["AUFP_VORMERKUNG"] == "J"]
|
||||
filt = filt.loc[filt["AUFTRAGS_DATUM"] >= dt]
|
||||
filt = filt.sort_values("AUFTRAGS_DATUM", ascending=False)
|
||||
order_size = filt["AUFP_MENGE_AUFTRAG"].sum()
|
||||
order_size
|
||||
# %%
|
||||
|
||||
# %%
|
||||
df = pl.read_csv(
|
||||
rel_file,
|
||||
separator=";",
|
||||
try_parse_dates=True,
|
||||
infer_schema=True,
|
||||
infer_schema_length=100_000,
|
||||
)
|
||||
df
|
||||
# %%
|
||||
res = df.select(pl.col("AUFTRAGSNUMMER"), pl.col("AUFTRAGS_DATUM").dt.day().alias("day_num"))
|
||||
res
|
||||
# %%
|
||||
Reference in New Issue
Block a user