# %%
import datetime
import importlib
from pathlib import Path

import external_code
import polars as pl
import sqlalchemy as sql

from wattanalyse import db

importlib.reload(db)
importlib.reload(external_code)
# %%
PROJECT_BASE = Path(__file__).parents[1]
DATA_PTH = PROJECT_BASE / "data"
assert DATA_PTH.exists()

# %%
# // load data
target = DATA_PTH / "PSM_20260507.arrow"
data_raw = pl.scan_ipc(target)

# %%
# 0. read data (from customer's database)
# 1. cleanup obtained new data
# ~~2. load data from internal database~~
# ~~3. integrate with with new data (whole snapshot)~~
# 2. process on order level
# 3. save results to internal database
# 4. post-process results
# 5. write to external database

# // (1) cleanup obtained new data
# load data from internal database
# integrate with with new data (whole snapshot)
res = external_code.preprocess_psm(data_raw)
data = res.data

print(f"Data:\n{data.collect()}\n\n---\n\nFiltered:\n{res.filtered}")

# %%
# // (2) processing order level
df = external_code.process_order_level(data)


# ?? What is if "Konfektionär" is NULL?
# If this is NULL, then the aggregates for "Konfektionär" will not work. Instead, they are
# calculated for all NULL entries which might incorporate different production orders which
# belong to different "Konfektionär". Thus, these values will be calculated, but should not be
# considered.

# %%
# // (3) save results to internal database
external_code.dump_order_level_to_internal_database_wipe(df)
# %%
# now load data from database
df = external_code.load_order_level_from_internal_database()
df
# %%
tmp = df.clone()

# two ways to define the aggregate for date deviations: just use < 0 or use Boolean flag
# defined by the user-specified boundaries
USE_BOUNDARIES = False
filter_date_deviation_early: pl.Expr
filter_date_deviation_late: pl.Expr
if USE_BOUNDARIES:
    filter_date_deviation_early = pl.col("Terminunterschreitung")
    filter_date_deviation_late = pl.col("Terminüberschreitung")
else:
    filter_date_deviation_early = pl.col("Terminabweichung_Anzahl_Tage") < 0
    filter_date_deviation_late = pl.col("Terminabweichung_Anzahl_Tage") > 0


tmp.select(
    pl.col("Terminabweichung_Anzahl_Tage")
    .filter(filter_date_deviation_early)
    .mean()
    .abs()
    .round(mode="half_away_from_zero")
    .cast(pl.Int64)
    .alias("Mittlere_Tage_Unterschreitung"),
    pl.col("Terminabweichung_Anzahl_Tage")
    .filter(filter_date_deviation_late)
    .mean()
    .abs()
    .round(mode="half_away_from_zero")
    .cast(pl.Int64)
    .alias("Mittlere_Tage_Ueberschreitung"),
    pl.col("Terminabweichung_Anzahl_Tage")
    .std(ddof=1)
    .alias("Standardabweichung_Lieferterminabweichung"),
    pl.col("Import-Ist_Anzahl_Aenderungen")
    .mean()
    .abs()
    .round(mode="half_away_from_zero")
    .cast(pl.Int64)
    .alias("Mittlere_Anzahl_Anpassungen_Liefertermin"),
    pl.col("Tage_zu_letzter_PSM_Historie")
    .list.explode()
    .mean()
    .abs()
    .round(mode="half_away_from_zero")
    .cast(pl.Int64)
    .alias("Mittlere_Abstaende_PSM"),
    pl.col("Durchlaufzeit_Anzahl_Tage")
    .mean()
    .round(mode="half_away_from_zero")
    .cast(pl.Int64)
    .alias("Mittlere_Durchlaufzeit"),
)

# %%