adapt forecast dataframe to be compatible with pipeline output

This commit is contained in:
Florian Förster 2025-04-11 12:44:20 +02:00
parent 8936f798ab
commit e8f3a7aea8

View File

@ -225,7 +225,7 @@ def _process_sales(
future_dates = pd.date_range( future_dates = pd.date_range(
start=last_date + pd.DateOffset(months=1), periods=6, freq="MS" start=last_date + pd.DateOffset(months=1), periods=6, freq="MS"
) )
forecast = pd.DataFrame({"datum": future_dates.strftime("%m.%Y")}).set_index("datum") forecast = pd.DataFrame({"datum": future_dates}).set_index("datum")
# Randomized Search # Randomized Search
kfold = KFold(n_splits=5, shuffle=True) kfold = KFold(n_splits=5, shuffle=True)
@ -268,6 +268,12 @@ def _process_sales(
# (i for i, date in enumerate(dates) if date >= starting_date), len(dates) - 1 # (i for i, date in enumerate(dates) if date >= starting_date), len(dates) - 1
# ) # )
# print("start idx: ", start_index, "length dates: ", len(dates)) # print("start idx: ", start_index, "length dates: ", len(dates))
# starting_date = datetime.datetime.now() - relativedelta(months=36)
starting_date = dates.max() - relativedelta(months=36)
start_index = next(
(i for i, date in enumerate(dates) if date >= starting_date), len(dates) - 1
)
print("start idx: ", start_index, "length dates: ", len(dates))
def get_index_date( def get_index_date(
dates: pd.DatetimeIndex, dates: pd.DatetimeIndex,
@ -352,6 +358,9 @@ def _process_sales(
) )
y_future = best_estimator.predict(X_future) # type: ignore y_future = best_estimator.predict(X_future) # type: ignore
forecast["vorhersage"] = y_future forecast["vorhersage"] = y_future
forecast["jahr"] = forecast.index.year # type: ignore
forecast["monat"] = forecast.index.month # type: ignore
forecast = forecast.reset_index(drop=True)
best_score_mae = best_score_mae if not math.isinf(best_score_mae) else None best_score_mae = best_score_mae if not math.isinf(best_score_mae) else None