refactoring
This commit is contained in:
@@ -72,14 +72,14 @@ def _parse_df_to_results(
|
||||
@wrap_result()
|
||||
def _parse_api_resp_to_df_wrapped(
|
||||
resp: SalesPrognosisResponse,
|
||||
) -> pd.DataFrame: # pragma: no cover
|
||||
) -> pd.DataFrame:
|
||||
return _parse_api_resp_to_df(resp)
|
||||
|
||||
|
||||
@wrap_result()
|
||||
def _parse_df_to_api_resp_wrapped(
|
||||
data: pd.DataFrame,
|
||||
) -> SalesPrognosisResponse: # pragma: no cover
|
||||
) -> SalesPrognosisResponse:
|
||||
return _parse_df_to_api_resp(data)
|
||||
|
||||
|
||||
@@ -109,7 +109,7 @@ def _parse_df_to_results_wrapped(
|
||||
# TODO set min number of data points as constant, not parameter
|
||||
|
||||
|
||||
def _preprocess_sales_per_customer(
|
||||
def _preprocess_sales(
|
||||
resp: SalesPrognosisResponse,
|
||||
feature_map: Mapping[str, str],
|
||||
target_features: Set[str],
|
||||
@@ -151,9 +151,8 @@ def _preprocess_sales_per_customer(
|
||||
return pipe
|
||||
|
||||
|
||||
def _process_sales_per_customer(
|
||||
def _process_sales(
|
||||
pipe: PipeResult[SalesPrognosisResultsExport],
|
||||
# company_id: int,
|
||||
min_num_data_points: int = 100,
|
||||
) -> PipeResult[SalesPrognosisResultsExport]:
|
||||
"""n = 1
|
||||
@@ -174,11 +173,9 @@ def _process_sales_per_customer(
|
||||
PipeResult
|
||||
_description_
|
||||
"""
|
||||
# cust_data: CustomerDataSalesForecast = CustomerDataSalesForecast()
|
||||
|
||||
cust_data: CustomerDataSalesForecast = CustomerDataSalesForecast()
|
||||
# filter data
|
||||
# TODO change away from nested DataFrames: just use "f_umsatz_fakt"
|
||||
# TODO with strong type checks
|
||||
data = pipe.data
|
||||
assert data is not None, "processing not existing pipe result"
|
||||
data = data.copy()
|
||||
@@ -208,48 +205,46 @@ def _process_sales_per_customer(
|
||||
if len(df_cust) < min_num_data_points:
|
||||
pipe.fail(STATUS_HANDLER.pipe_states.TOO_FEW_POINTS)
|
||||
return pipe
|
||||
else:
|
||||
# Entwicklung der Umsätze: definierte Zeiträume Monat
|
||||
df_cust["jahr"] = df_cust[DATE_FEAT].dt.year
|
||||
df_cust["monat"] = df_cust[DATE_FEAT].dt.month
|
||||
|
||||
monthly_sum = df_cust.groupby(["jahr", "monat"])[SALES_FEAT].sum().reset_index()
|
||||
monthly_sum[DATE_FEAT] = (
|
||||
monthly_sum["monat"].astype(str) + "." + monthly_sum["jahr"].astype(str)
|
||||
)
|
||||
monthly_sum[DATE_FEAT] = pd.to_datetime(monthly_sum[DATE_FEAT], format="%m.%Y")
|
||||
monthly_sum = monthly_sum.set_index(DATE_FEAT)
|
||||
# Entwicklung der Umsätze: definierte Zeiträume Monat
|
||||
df_cust["jahr"] = df_cust[DATE_FEAT].dt.year
|
||||
df_cust["monat"] = df_cust[DATE_FEAT].dt.month
|
||||
|
||||
train = monthly_sum.iloc[:-5].copy()
|
||||
test = monthly_sum.iloc[-5:].copy()
|
||||
monthly_sum = df_cust.groupby(["jahr", "monat"])[SALES_FEAT].sum().reset_index()
|
||||
monthly_sum[DATE_FEAT] = (
|
||||
monthly_sum["monat"].astype(str) + "." + monthly_sum["jahr"].astype(str)
|
||||
)
|
||||
monthly_sum[DATE_FEAT] = pd.to_datetime(monthly_sum[DATE_FEAT], format="%m.%Y")
|
||||
monthly_sum = monthly_sum.set_index(DATE_FEAT)
|
||||
|
||||
features = ["jahr", "monat"]
|
||||
target = SALES_FEAT
|
||||
train = monthly_sum.iloc[:-5].copy()
|
||||
test = monthly_sum.iloc[-5:].copy()
|
||||
|
||||
X_train, y_train = train[features], train[target]
|
||||
X_test, y_test = test[features], test[target]
|
||||
features = ["jahr", "monat"]
|
||||
target = SALES_FEAT
|
||||
|
||||
reg = XGBRegressor(
|
||||
base_score=0.5,
|
||||
booster="gbtree",
|
||||
n_estimators=1000,
|
||||
early_stopping_rounds=50,
|
||||
objective="reg:squarederror",
|
||||
max_depth=3,
|
||||
learning_rate=0.01,
|
||||
)
|
||||
reg.fit(
|
||||
X_train, y_train, eval_set=[(X_train, y_train), (X_test, y_test)], verbose=100
|
||||
)
|
||||
X_train, y_train = train[features], train[target]
|
||||
X_test, y_test = test[features], test[target]
|
||||
|
||||
test.loc[:, "vorhersage"] = reg.predict(X_test)
|
||||
test = test.reset_index(drop=True)
|
||||
reg = XGBRegressor(
|
||||
base_score=0.5,
|
||||
booster="gbtree",
|
||||
n_estimators=1000,
|
||||
early_stopping_rounds=50,
|
||||
objective="reg:squarederror",
|
||||
max_depth=3,
|
||||
learning_rate=0.01,
|
||||
)
|
||||
reg.fit(X_train, y_train, eval_set=[(X_train, y_train), (X_test, y_test)], verbose=100)
|
||||
|
||||
pipe.success(test, STATUS_HANDLER.SUCCESS)
|
||||
return pipe
|
||||
test.loc[:, "vorhersage"] = reg.predict(X_test)
|
||||
test = test.reset_index(drop=True)
|
||||
|
||||
pipe.success(test, STATUS_HANDLER.SUCCESS)
|
||||
return pipe
|
||||
|
||||
|
||||
def _postprocess_sales_per_customer(
|
||||
def _postprocess_sales(
|
||||
pipe: PipeResult[SalesPrognosisResultsExport],
|
||||
feature_map: Mapping[str, str],
|
||||
) -> PipeResult[SalesPrognosisResultsExport]:
|
||||
@@ -287,7 +282,7 @@ def _export_on_fail(
|
||||
return SalesPrognosisResultsExport(response=response, status=status)
|
||||
|
||||
|
||||
def pipeline(
|
||||
def pipeline_sales(
|
||||
session: Session,
|
||||
company_id: int | None = None,
|
||||
start_date: Datetime | None = None,
|
||||
@@ -300,7 +295,7 @@ def pipeline(
|
||||
if status != STATUS_HANDLER.SUCCESS:
|
||||
return _export_on_fail(status)
|
||||
|
||||
pipe = _preprocess_sales_per_customer(
|
||||
pipe = _preprocess_sales(
|
||||
response,
|
||||
feature_map=COL_MAP_SALES_PROGNOSIS,
|
||||
target_features=FEATURES_SALES_PROGNOSIS,
|
||||
@@ -308,14 +303,14 @@ def pipeline(
|
||||
if pipe.status != STATUS_HANDLER.SUCCESS:
|
||||
return _export_on_fail(pipe.status)
|
||||
|
||||
pipe = _process_sales_per_customer(
|
||||
pipe = _process_sales(
|
||||
pipe,
|
||||
min_num_data_points=MIN_NUMBER_DATAPOINTS,
|
||||
)
|
||||
if pipe.status != STATUS_HANDLER.SUCCESS:
|
||||
return _export_on_fail(pipe.status)
|
||||
|
||||
pipe = _postprocess_sales_per_customer(
|
||||
pipe = _postprocess_sales(
|
||||
pipe,
|
||||
feature_map=DualDict(),
|
||||
)
|
||||
|
||||
@@ -11,7 +11,7 @@ def pipeline_sales_forecast(
|
||||
company_id: int | None,
|
||||
start_date: Datetime | None,
|
||||
) -> tuple[JsonResponse, JsonStatus]:
|
||||
result = forecast.pipeline(SESSION, company_id=company_id, start_date=start_date)
|
||||
result = forecast.pipeline_sales(SESSION, company_id=company_id, start_date=start_date)
|
||||
response = JsonResponse(result.response.model_dump_json())
|
||||
status = JsonStatus(result.status.model_dump_json())
|
||||
|
||||
|
||||
Reference in New Issue
Block a user