From e8f3a7aea8cdf5456414e2af080f811b5b274eb3 Mon Sep 17 00:00:00 2001 From: foefl Date: Fri, 11 Apr 2025 12:44:20 +0200 Subject: [PATCH] adapt forecast dataframe to be compatible with pipeline output --- src/delta_barth/analysis/forecast.py | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/src/delta_barth/analysis/forecast.py b/src/delta_barth/analysis/forecast.py index 90c8dec..239aa64 100644 --- a/src/delta_barth/analysis/forecast.py +++ b/src/delta_barth/analysis/forecast.py @@ -225,7 +225,7 @@ def _process_sales( future_dates = pd.date_range( start=last_date + pd.DateOffset(months=1), periods=6, freq="MS" ) - forecast = pd.DataFrame({"datum": future_dates.strftime("%m.%Y")}).set_index("datum") + forecast = pd.DataFrame({"datum": future_dates}).set_index("datum") # Randomized Search kfold = KFold(n_splits=5, shuffle=True) @@ -268,6 +268,12 @@ def _process_sales( # (i for i, date in enumerate(dates) if date >= starting_date), len(dates) - 1 # ) # print("start idx: ", start_index, "length dates: ", len(dates)) + # starting_date = datetime.datetime.now() - relativedelta(months=36) + starting_date = dates.max() - relativedelta(months=36) + start_index = next( + (i for i, date in enumerate(dates) if date >= starting_date), len(dates) - 1 + ) + print("start idx: ", start_index, "length dates: ", len(dates)) def get_index_date( dates: pd.DatetimeIndex, @@ -352,6 +358,9 @@ def _process_sales( ) y_future = best_estimator.predict(X_future) # type: ignore forecast["vorhersage"] = y_future + forecast["jahr"] = forecast.index.year # type: ignore + forecast["monat"] = forecast.index.month # type: ignore + forecast = forecast.reset_index(drop=True) best_score_mae = best_score_mae if not math.isinf(best_score_mae) else None