src/delta_barth/analysis/forecast.py aktualisiert

This commit is contained in:
frasu 2025-04-13 14:46:01 +00:00
parent d507d51136
commit 65e3b6ffab

View File

@ -218,7 +218,8 @@ def _process_sales(
[(year, month) for year in years for month in range(1, 13) if (year < current_year or (year == current_year and month <= current_month))], columns=["jahr", "monat"] [(year, month) for year in years for month in range(1, 13) if (year < current_year or (year == current_year and month <= current_month))], columns=["jahr", "monat"]
) )
monthly_sum = pd.merge(all_month_year_combinations, old_monthly_sum, on=["jahr", "monat"], how='left') monthly_sum = pd.merge(all_month_year_combinations, old_monthly_sum, on=["jahr", "monat"], how="left")
monthly_sum[SALES_FEAT] = monthly_sum[SALES_FEAT].fillna(0)
monthly_sum[DATE_FEAT] = ( monthly_sum[DATE_FEAT] = (
monthly_sum["monat"].astype(str) + "." + monthly_sum["jahr"].astype(str) monthly_sum["monat"].astype(str) + "." + monthly_sum["jahr"].astype(str)
) )
@ -228,7 +229,6 @@ def _process_sales(
features = ["jahr", "monat"] features = ["jahr", "monat"]
target = SALES_FEAT target = SALES_FEAT
# ?? --- new: dates and forecast
last_date = pd.to_datetime(datetime.datetime.now().strftime("%m.%Y"), format="%m.%Y") last_date = pd.to_datetime(datetime.datetime.now().strftime("%m.%Y"), format="%m.%Y")
future_dates = pd.date_range( future_dates = pd.date_range(
start=last_date + pd.DateOffset(months=1), periods=6, freq="MS" start=last_date + pd.DateOffset(months=1), periods=6, freq="MS"
@ -248,7 +248,6 @@ def _process_sales(
"early_stopping_rounds": [20, 50], "early_stopping_rounds": [20, 50],
} }
# ?? --- new: best_estimator (internal usage only)
best_estimator = None best_estimator = None
best_params: BestParametersXGBRegressor | None = None best_params: BestParametersXGBRegressor | None = None
best_score_mae: float | None = float("inf") best_score_mae: float | None = float("inf")
@ -285,9 +284,8 @@ def _process_sales(
X_train, X_test = train[features], test[features] X_train, X_test = train[features], test[features]
y_train, y_test = train[target], test[target] y_train, y_test = train[target], test[target]
# ?? --- new: adapted condition to fit new for-loop
# test set size fixed at 6 --> first iteration: baseline - 6 entries # test set size fixed at 6 --> first iteration: baseline - 6 entries
# for each new year 10 new data points needed # for each new year 10 new data points (i.e., sales strictly positive) needed
if len(train[train[SALES_FEAT] > 0]) >= 30 + 10 * add_year: if len(train[train[SALES_FEAT] > 0]) >= 30 + 10 * add_year:
too_few_month_points = False too_few_month_points = False