from __future__ import annotations import dataclasses as dc from collections.abc import Mapping, Set from datetime import datetime as Datetime from typing import TYPE_CHECKING, Final import pandas as pd from sklearn.metrics import mean_squared_error from xgboost import XGBRegressor from delta_barth.analysis import parse from delta_barth.api.requests import ( SalesPrognosisResponse, SalesPrognosisResults, SalesPrognosisResultsExport, get_sales_prognosis_data, ) from delta_barth.constants import ( COL_MAP_SALES_PROGNOSIS, FEATURES_SALES_PROGNOSIS, MIN_NUMBER_DATAPOINTS, ) from delta_barth.errors import STATUS_HANDLER, wrap_result from delta_barth.types import ( DualDict, PipeResult, ) if TYPE_CHECKING: from delta_barth.api.common import Session from delta_barth.types import Status def _parse_api_resp_to_df( resp: SalesPrognosisResponse, ) -> pd.DataFrame: """n >= 2 Parameters ---------- resp : SalesPrognosisResponse _description_ Returns ------- pd.DataFrame _description_ """ data = resp.model_dump()["daten"] return pd.DataFrame(data) def _parse_df_to_api_resp( data: pd.DataFrame, ) -> SalesPrognosisResponse: df_formatted = data.to_dict(orient="records") return SalesPrognosisResponse(daten=tuple(df_formatted)) # type: ignore def _parse_df_to_results( data: pd.DataFrame, ) -> SalesPrognosisResults: df_formatted = data.to_dict(orient="records") return SalesPrognosisResults(daten=tuple(df_formatted)) # type: ignore @wrap_result() def _parse_api_resp_to_df_wrapped( resp: SalesPrognosisResponse, ) -> pd.DataFrame: return _parse_api_resp_to_df(resp) @wrap_result() def _parse_df_to_api_resp_wrapped( data: pd.DataFrame, ) -> SalesPrognosisResponse: return _parse_df_to_api_resp(data) @wrap_result() def _parse_df_to_results_wrapped( data: pd.DataFrame, ) -> SalesPrognosisResults: return _parse_df_to_results(data) # ------------------------------------------------------------------------------ # Input: # DataFrame df mit Columns f_umsatz_fakt, firmen, art, v_warengrp # kunde (muss enthalten sein in df['firmen']['firma_refid']) # Output: # Integer umsetzung (Prognose möglich): 0 ja, 1 nein (zu wenig Daten verfügbar), # 2 nein (Daten nicht für Prognose geeignet) # DataFrame test: Jahr, Monat, Vorhersage # ------------------------------------------------------------------------------- # Prognose Umsatz je Firma # TODO: check usage of separate exception and handle it in API function # TODO set min number of data points as constant, not parameter def _preprocess_sales( resp: SalesPrognosisResponse, feature_map: Mapping[str, str], target_features: Set[str], ) -> PipeResult[SalesPrognosisResultsExport]: """n = 1 Parameters ---------- resp : SalesPrognosisResponse _description_ feature_map : Mapping[str, str] _description_ target_features : Set[str] _description_ Returns ------- PipeResult _description_ """ pipe: PipeResult[SalesPrognosisResultsExport] = PipeResult(None, STATUS_HANDLER.SUCCESS) res = _parse_api_resp_to_df_wrapped(resp) if res.status != STATUS_HANDLER.SUCCESS: pipe.fail(res.status) return pipe df = res.result res = parse.process_features_wrapped( df, feature_map=feature_map, target_features=target_features, ) if res.status != STATUS_HANDLER.SUCCESS: pipe.fail(res.status) return pipe pipe.success(res.unwrap(), res.status) return pipe def _process_sales( pipe: PipeResult[SalesPrognosisResultsExport], min_num_data_points: int = 100, ) -> PipeResult[SalesPrognosisResultsExport]: """n = 1 Input-Data: fields: ["artikel_refid", "firma_refid", "betrag", "menge", "buchungs_datum"] - data already prefiltered if customer ID was provided ("firma_refid" same for all entries) Parameters ---------- pipe : PipeResult _description_ min_num_data_points : int, optional _description_, by default 100 Returns ------- PipeResult _description_ """ # cust_data: CustomerDataSalesForecast = CustomerDataSalesForecast() # filter data data = pipe.data assert data is not None, "processing not existing pipe result" data = data.copy() # df_firma = data[ # (data["firma_refid"] == company_id) & (data["beleg_typ"] == 1) & (data["betrag"] > 0) # ] # for transaction in df_firma["vorgang_refid"].unique(): # cust_data.order.append(transaction) # cust_data.date.append( # df_firma[df_firma["vorgang_refid"] == transaction]["buchungs_datum"].iloc[0] # ) # cust_data.sales.append( # df_firma[df_firma["vorgang_refid"] == transaction]["betrag"].sum() # ) # df_cust = pd.DataFrame(dc.asdict(cust_data)) # df_cust = df_cust.sort_values(by="date").reset_index() DATE_FEAT: Final[str] = "buchungs_datum" SALES_FEAT: Final[str] = "betrag" df_firma = data[(data["betrag"] > 0)] df_cust = df_firma.copy() df_cust = df_cust.sort_values(by=DATE_FEAT).reset_index() # check data availability # TODO rework criteria if len(df_cust) < min_num_data_points: pipe.fail(STATUS_HANDLER.pipe_states.TOO_FEW_POINTS) return pipe # Entwicklung der Umsätze: definierte Zeiträume Monat df_cust["jahr"] = df_cust[DATE_FEAT].dt.year df_cust["monat"] = df_cust[DATE_FEAT].dt.month monthly_sum = df_cust.groupby(["jahr", "monat"])[SALES_FEAT].sum().reset_index() monthly_sum[DATE_FEAT] = ( monthly_sum["monat"].astype(str) + "." + monthly_sum["jahr"].astype(str) ) monthly_sum[DATE_FEAT] = pd.to_datetime(monthly_sum[DATE_FEAT], format="%m.%Y") monthly_sum = monthly_sum.set_index(DATE_FEAT) train = monthly_sum.iloc[:-5].copy() test = monthly_sum.iloc[-5:].copy() features = ["jahr", "monat"] target = SALES_FEAT X_train, y_train = train[features], train[target] X_test, y_test = test[features], test[target] reg = XGBRegressor( base_score=0.5, booster="gbtree", n_estimators=1000, early_stopping_rounds=50, objective="reg:squarederror", max_depth=3, learning_rate=0.01, ) reg.fit(X_train, y_train, eval_set=[(X_train, y_train), (X_test, y_test)], verbose=100) test.loc[:, "vorhersage"] = reg.predict(X_test) test = test.drop(SALES_FEAT, axis=1) test = test.reset_index(drop=True) pipe.success(test, STATUS_HANDLER.SUCCESS) return pipe def _postprocess_sales( pipe: PipeResult[SalesPrognosisResultsExport], feature_map: Mapping[str, str], ) -> PipeResult[SalesPrognosisResultsExport]: data = pipe.data assert data is not None, "processing not existing pipe result" # convert features back to original naming res = parse.process_features_wrapped( data, feature_map=feature_map, target_features=set(), ) if res.status != STATUS_HANDLER.SUCCESS: pipe.fail(res.status) return pipe # res = _parse_df_to_api_resp_wrapped(res.result) res = _parse_df_to_results_wrapped(res.unwrap()) if res.status != STATUS_HANDLER.SUCCESS: pipe.fail(res.status) return pipe export_response = SalesPrognosisResultsExport( response=res.unwrap(), status=res.status, ) pipe.export(export_response) return pipe def _export_on_fail( status: Status, ) -> SalesPrognosisResultsExport: response = SalesPrognosisResults(daten=tuple()) return SalesPrognosisResultsExport(response=response, status=status) def pipeline_sales( session: Session, company_id: int | None = None, start_date: Datetime | None = None, ) -> SalesPrognosisResultsExport: response, status = get_sales_prognosis_data( session, company_id=company_id, start_date=start_date, ) if status != STATUS_HANDLER.SUCCESS: return _export_on_fail(status) pipe = _preprocess_sales( response, feature_map=COL_MAP_SALES_PROGNOSIS, target_features=FEATURES_SALES_PROGNOSIS, ) if pipe.status != STATUS_HANDLER.SUCCESS: return _export_on_fail(pipe.status) pipe = _process_sales( pipe, min_num_data_points=MIN_NUMBER_DATAPOINTS, ) if pipe.status != STATUS_HANDLER.SUCCESS: return _export_on_fail(pipe.status) pipe = _postprocess_sales( pipe, feature_map=DualDict(), ) if pipe.status != STATUS_HANDLER.SUCCESS: return _export_on_fail(pipe.status) assert pipe.results is not None, "needed export response not set in pipeline" return pipe.results def pipeline_sales_dummy( session: Session, company_id: int | None = None, start_date: Datetime | None = None, ) -> SalesPrognosisResultsExport: # pragma: no cover """prototype dummy function for tests by DelBar""" ...