diff --git a/src/delta_barth/analysis/forecast.py b/src/delta_barth/analysis/forecast.py index a9cf701..1cd3fd9 100644 --- a/src/delta_barth/analysis/forecast.py +++ b/src/delta_barth/analysis/forecast.py @@ -218,7 +218,8 @@ def _process_sales( [(year, month) for year in years for month in range(1, 13) if (year < current_year or (year == current_year and month <= current_month))], columns=["jahr", "monat"] ) - monthly_sum = pd.merge(all_month_year_combinations, old_monthly_sum, on=["jahr", "monat"], how='left') + monthly_sum = pd.merge(all_month_year_combinations, old_monthly_sum, on=["jahr", "monat"], how="left") + monthly_sum[SALES_FEAT] = monthly_sum[SALES_FEAT].fillna(0) monthly_sum[DATE_FEAT] = ( monthly_sum["monat"].astype(str) + "." + monthly_sum["jahr"].astype(str) ) @@ -228,7 +229,6 @@ def _process_sales( features = ["jahr", "monat"] target = SALES_FEAT - # ?? --- new: dates and forecast last_date = pd.to_datetime(datetime.datetime.now().strftime("%m.%Y"), format="%m.%Y") future_dates = pd.date_range( start=last_date + pd.DateOffset(months=1), periods=6, freq="MS" @@ -248,7 +248,6 @@ def _process_sales( "early_stopping_rounds": [20, 50], } - # ?? --- new: best_estimator (internal usage only) best_estimator = None best_params: BestParametersXGBRegressor | None = None best_score_mae: float | None = float("inf") @@ -285,9 +284,8 @@ def _process_sales( X_train, X_test = train[features], test[features] y_train, y_test = train[target], test[target] - # ?? --- new: adapted condition to fit new for-loop # test set size fixed at 6 --> first iteration: baseline - 6 entries - # for each new year 10 new data points needed + # for each new year 10 new data points (i.e., sales strictly positive) needed if len(train[train[SALES_FEAT] > 0]) >= 30 + 10 * add_year: too_few_month_points = False