src/delta_barth/analysis/forecast.py aktualisiert

re-arrange code segments
idea of timedelta based algorithm
2025-04-13 14:45:55 +00:00 · 2025-04-11 13:10:39 +02:00 · 2025-04-11 13:06:12 +02:00 · 2025-04-11 13:04:55 +02:00 · 2025-04-11 12:30:31 +02:00 · 2025-04-11 12:23:05 +02:00
16 changed files with 354 additions and 67 deletions
--- a/.gitignore
+++ b/.gitignore
@@ -3,6 +3,7 @@ prototypes/
 data/
 reports/
 *.code-workspace
+docs/

 # credentials
 CREDENTIALS*
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -1,6 +1,6 @@
 [project]
 name = "delta-barth"
-version = "0.5.1"
+version = "0.5.7dev1"
 description = "workflows and pipelines for the Python-based Plugin of Delta Barth's ERP system"
 authors = [
    {name = "Florian Förster", email = "f.foerster@d-opt.com"},
@@ -73,7 +73,7 @@ directory = "reports/coverage"


 [tool.bumpversion]
-current_version = "0.5.1"
+current_version = "0.5.7dev1"
 parse = """(?x)
    (?P<major>0|[1-9]\\d*)\\.
    (?P<minor>0|[1-9]\\d*)\\.
--- a/scripts/bump_patch.ps1
+++ b/scripts/bump_patch.ps1
@@ -0,0 +1,2 @@
+pdm run bump-my-version bump patch
+pdm run bump-my-version show current_version
--- a/src/delta_barth/_csharp/json_types.py
+++ b/src/delta_barth/_csharp/json_types.py
@@ -42,7 +42,11 @@ def delta_barth_api_error() -> str:


 def status_err() -> str:
-    status = Status(code=102, description="internal error occurred", message="caused by test")
+    status = Status(
+        code=102,
+        description="internal error occurred: 'Limit-Überschreitung'",
+        message="caused by test",
+    )
    return status.model_dump_json()


--- a/src/delta_barth/analysis/forecast.py
+++ b/src/delta_barth/analysis/forecast.py
@@ -1,5 +1,6 @@
 from __future__ import annotations

+import copy
 import datetime
 import math
 from collections.abc import Mapping, Set
@@ -7,10 +8,15 @@ from dataclasses import asdict
 from datetime import datetime as Datetime
 from typing import TYPE_CHECKING, Final, TypeAlias, cast

+import dopt_basics.datetime
 import numpy as np
 import pandas as pd
 import scipy.stats
 import sqlalchemy as sql
+
+# --- new: for calculating timedelta
+from dateutil.relativedelta import relativedelta
+from dopt_basics.datetime import TimeUnitsTimedelta
 from sklearn.metrics import mean_absolute_error, r2_score
 from sklearn.model_selection import KFold, RandomizedSearchCV
 from xgboost import XGBRegressor
@@ -26,6 +32,7 @@ from delta_barth.api.requests import (
 )
 from delta_barth.constants import (
    COL_MAP_SALES_PROGNOSIS,
+    DEFAULT_DB_ERR_CODE,
    DUMMY_DATA_PATH,
    FEATURES_SALES_PROGNOSIS,
    SALES_BASE_NUM_DATAPOINTS_MONTHS,
@@ -110,7 +117,7 @@ def _parse_df_to_results_wrapped(
    return _parse_df_to_results(data)


-@wrap_result()
+@wrap_result(code_on_error=DEFAULT_DB_ERR_CODE)
 def _write_sales_forecast_stats_wrapped(
    stats: SalesForecastStatistics,
 ) -> None:
@@ -182,16 +189,14 @@ def _process_sales(
    PipeResult
        _description_
    """
-    # cust_data: CustomerDataSalesForecast = CustomerDataSalesForecast()
-
    # filter data
    data = pipe.data
    assert data is not None, "processing not existing pipe result"

    DATE_FEAT: Final[str] = "buchungs_datum"
    SALES_FEAT: Final[str] = "betrag"
-    df_firma = data[(data["betrag"] > 0)]
-    df_cust = df_firma.copy()
+    df_filter = data[(data["betrag"] > 0)]
+    df_cust = df_filter.copy()
    df_cust = df_cust.sort_values(by=DATE_FEAT).reset_index()
    len_ds = len(df_cust)

@@ -205,7 +210,18 @@ def _process_sales(
    df_cust["jahr"] = df_cust[DATE_FEAT].dt.year
    df_cust["monat"] = df_cust[DATE_FEAT].dt.month

-    monthly_sum = df_cust.groupby(["jahr", "monat"])[SALES_FEAT].sum().reset_index()
+    current_year = datetime.now().year
+    current_month = datetime.now().month
+    years = range(df_cust["jahr"].min(), current_year + 1)
+
+    old_monthly_sum = df_cust.groupby(["jahr", "monat"])[SALES_FEAT].sum().reset_index()
+
+    all_month_year_combinations = pd.DataFrame(
+        [(year, month) for year in years for month in range(1, 13) if (year < current_year or (year == current_year and month <= current_month))], columns=["jahr", "monat"]
+    )
+
+    monthly_sum = pd.merge(all_month_year_combinations, old_monthly_sum, on=["jahr", "monat"], how="left")
+    monthly_sum[SALES_FEAT] = monthly_sum[SALES_FEAT].fillna(0)
    monthly_sum[DATE_FEAT] = (
        monthly_sum["monat"].astype(str) + "." + monthly_sum["jahr"].astype(str)
    )
@@ -214,13 +230,17 @@ def _process_sales(

    features = ["jahr", "monat"]
    target = SALES_FEAT
-    current_year = datetime.datetime.now().year
-    first_year = cast(int, df_cust["jahr"].min())
+
+    last_date = pd.to_datetime(datetime.datetime.now().strftime("%m.%Y"), format="%m.%Y")
+    future_dates = pd.date_range(
+        start=last_date + pd.DateOffset(months=1), periods=6, freq="MS"
+    )
+    forecast = pd.DataFrame({"datum": future_dates}).set_index("datum")

    # Randomized Search
    kfold = KFold(n_splits=5, shuffle=True)
    params: ParamSearchXGBRegressor = {
-        "n_estimators": scipy.stats.poisson(mu=1000),
+        "n_estimators": scipy.stats.poisson(mu=100),
        "learning_rate": [0.03, 0.04, 0.05],
        "max_depth": range(2, 9),
        "min_child_weight": range(1, 5),
@@ -230,26 +250,68 @@ def _process_sales(
        "early_stopping_rounds": [20, 50],
    }

+    best_estimator = None
    best_params: BestParametersXGBRegressor | None = None
    best_score_mae: float | None = float("inf")
    best_score_r2: float | None = None
    best_start_year: int | None = None
    too_few_month_points: bool = True
-    forecast: pd.DataFrame | None = None
+    
+    stride = dopt_basics.datetime.timedelta_from_val(365, TimeUnitsTimedelta.DAYS)
+    dates = cast(pd.DatetimeIndex, monthly_sum.index)
+    min_date = dates.min()
+
+    # baseline: 3 years - 36 months
+    starting_date = datetime.datetime.now() - relativedelta(months=36)
+
+    def get_index_date(
+        dates: pd.DatetimeIndex,
+        starting_date: datetime.datetime | pd.Timestamp,
+    ) -> tuple[pd.Timestamp, bool]:
+        target, succ = next(
+            ((date, True) for date in dates if date >= starting_date), (dates[-1], False)
+        )
+        return target, succ
+
+    first_date, succ = get_index_date(dates, starting_date)
+    if not succ:
+        # !! return early
+        ...
+
+    date_span = first_date - min_date
+    steps = date_span.days // stride.days
+
+    for step in range(steps + 1):
+        print("step: ", step)
+        target_date = first_date - step * stride
+        print("target date: ", target_date)
+        split_date = dates[-6]
+
+        index_date, succ = get_index_date(dates, target_date)
+
+        if not succ:
+            break
+
+        if index_date >= split_date:
+            print("Skip because of date difference")
+            continue

-    for start_year in range(current_year - 4, first_year - 1, -1):
        train = cast(
            pd.DataFrame,
-            monthly_sum[monthly_sum.index.year >= start_year].iloc[:-5].copy(),  # type: ignore
+            monthly_sum.loc[index_date:split_date].copy(),  # type: ignore
        )
+        print(train)
+        print("Length train: ", len(train))
        test = cast(
            pd.DataFrame,
-            monthly_sum[monthly_sum.index.year >= start_year].iloc[-5:].copy(),  # type: ignore
+            monthly_sum.loc[split_date:].copy(),  # type: ignore
        )
        X_train, X_test = train[features], test[features]
        y_train, y_test = train[target], test[target]

-        if len(train) >= (base_num_data_points_months + 10 * (current_year - 4 - start_year)):
+        # test set size fixed at 6 --> first iteration: baseline - 6 entries
+        # for each new year 10 new data points (i.e., sales strictly positive) needed
+        if len(train[train[SALES_FEAT] > 0]) >= 30 + 10 * step:
            too_few_month_points = False

            rand = RandomizedSearchCV(
@@ -272,13 +334,22 @@ def _process_sales(
                    best_params = cast(BestParametersXGBRegressor, rand.best_params_)
                    best_score_mae = error
                    best_score_r2 = cast(float, r2_score(y_test, y_pred))
-                    best_start_year = start_year
-                    print("executed")
-                    forecast = test.copy()
-                    forecast.loc[:, "vorhersage"] = y_pred
+                    # --- new: use target_date for best_start_year
+                    best_start_year = target_date.year
+                    # --- new: store best_estimator
+                    best_estimator = copy.copy(rand.best_estimator_)
+
+    # ?? --- new: use best_estimator to calculate future values and store them in forecast
+    if best_estimator is not None:
+        X_future = pd.DataFrame(
+            {"jahr": future_dates.year, "monat": future_dates.month}, index=future_dates
+        )
+        y_future = best_estimator.predict(X_future)  # type: ignore
+        forecast["vorhersage"] = y_future
+        forecast["jahr"] = forecast.index.year  # type: ignore
+        forecast["monat"] = forecast.index.month  # type: ignore
+        forecast = forecast.reset_index(drop=True)

-    if forecast is not None:
-        forecast = forecast.drop(SALES_FEAT, axis=1).reset_index(drop=True)
    best_score_mae = best_score_mae if not math.isinf(best_score_mae) else None

    if too_few_month_points:
@@ -294,7 +365,9 @@ def _process_sales(
        pipe.stats(stats)
        return pipe

-    assert forecast is not None, "forecast is None, but was attempted to be returned"
+    assert "vorhersage" in forecast.columns, (
+        "forecast does not contain prognosis values, but was attempted to be returned"
+    )
    status = STATUS_HANDLER.SUCCESS
    pipe.success(forecast, status)
    stats = SalesForecastStatistics(
--- a/src/delta_barth/api/requests.py
+++ b/src/delta_barth/api/requests.py
@@ -7,6 +7,7 @@ import requests
 from dopt_basics.io import combine_route
 from pydantic import BaseModel, PositiveInt, SkipValidation

+from delta_barth.constants import API_CON_TIMEOUT
 from delta_barth.errors import STATUS_HANDLER
 from delta_barth.types import DelBarApiError, ExportResponse, ResponseType, Status

@@ -55,7 +56,7 @@ def get_sales_prognosis_data(
    company_id: int | None = None,
    start_date: Datetime | None = None,
 ) -> tuple[SalesPrognosisResponse, Status]:
-    resp, status = session.assert_login()
+    _, status = session.assert_login()
    if status != STATUS_HANDLER.SUCCESS:
        response = SalesPrognosisResponse(daten=tuple())
        return response, status
@@ -67,11 +68,18 @@ def get_sales_prognosis_data(
        FirmaId=company_id,
        BuchungsDatum=start_date,
    )
-    resp = requests.get(
-        URL,
-        params=sales_prog_req.model_dump(mode="json", exclude_none=True),
-        headers=session.headers,  # type: ignore[argumentType]
-    )
+    empty_response = SalesPrognosisResponse(daten=tuple())
+    try:
+        resp = requests.get(
+            URL,
+            params=sales_prog_req.model_dump(mode="json", exclude_none=True),
+            headers=session.headers,  # type: ignore[argumentType]
+            timeout=API_CON_TIMEOUT,
+        )
+    except requests.exceptions.Timeout:
+        return empty_response, STATUS_HANDLER.pipe_states.CONNECTION_TIMEOUT
+    except requests.exceptions.RequestException:
+        return empty_response, STATUS_HANDLER.pipe_states.CONNECTION_ERROR

    response: SalesPrognosisResponse
    status: Status
@@ -79,7 +87,7 @@ def get_sales_prognosis_data(
        response = SalesPrognosisResponse(**resp.json())
        status = STATUS_HANDLER.SUCCESS
    else:
-        response = SalesPrognosisResponse(daten=tuple())
+        response = empty_response
        err = DelBarApiError(status_code=resp.status_code, **resp.json())
        status = STATUS_HANDLER.api_error(err)

--- a/src/delta_barth/constants.py
+++ b/src/delta_barth/constants.py
@@ -15,9 +15,9 @@ assert dummy_data_pth.exists(), f"dummy data path not found: {dummy_data_pth}"
 DUMMY_DATA_PATH: Final[Path] = dummy_data_pth

 # ** logging
-ENABLE_LOGGING: Final[bool] = False
+ENABLE_LOGGING: Final[bool] = True
 LOGGING_TO_FILE: Final[bool] = True
-LOGGING_TO_STDERR: Final[bool] = True
+LOGGING_TO_STDERR: Final[bool] = False
 LOG_FILENAME: Final[str] = "dopt-delbar.log"

 # ** databases
@@ -25,6 +25,7 @@ DB_ECHO: Final[bool] = True

 # ** error handling
 DEFAULT_INTERNAL_ERR_CODE: Final[int] = 100
+DEFAULT_DB_ERR_CODE: Final[int] = 150
 DEFAULT_API_ERR_CODE: Final[int] = 400


@@ -38,6 +39,8 @@ class KnownDelBarApiErrorCodes(enum.Enum):
    COMMON = frozenset((400, 401, 409, 500))


+# ** API
+API_CON_TIMEOUT: Final[float] = 10.0  # secs to response
 # ** API response parsing
 # ** column mapping [API-Response --> Target-Features]
 COL_MAP_SALES_PROGNOSIS: Final[DualDict[str, str]] = DualDict(
--- a/src/delta_barth/databases.py
+++ b/src/delta_barth/databases.py
@@ -22,8 +22,8 @@ perf_meas = sql.Table(
    "performance_measurement",
    metadata,
    sql.Column("id", sql.Integer, primary_key=True),
-    sql.Column("execution_duration", sql.Float),
    sql.Column("pipeline_name", sql.String(length=30)),
+    sql.Column("execution_duration", sql.Float),
 )
 # ** ---- forecasts
 sf_stats = sql.Table(
--- a/src/delta_barth/errors.py
+++ b/src/delta_barth/errors.py
@@ -53,9 +53,19 @@ class UApiError(Exception):
 ## ** internal error handling
 DATA_PIPELINE_STATUS_DESCR: Final[tuple[StatusDescription, ...]] = (
    ("SUCCESS", 0, "Erfolg"),
-    ("TOO_FEW_POINTS", 1, "Datensatz besitzt nicht genügend Datenpunkte"),
-    ("TOO_FEW_MONTH_POINTS", 2, "nach Aggregation pro Monat nicht genügend Datenpunkte"),
-    ("NO_RELIABLE_FORECAST", 3, "Prognosequalität des Modells unzureichend"),
+    (
+        "CONNECTION_TIMEOUT",
+        1,
+        "Der Verbindungsaufbau zum API-Server dauerte zu lange. Ist der Server erreichbar?",
+    ),
+    (
+        "CONNECTION_ERROR",
+        2,
+        "Es ist keine Verbindung zum API-Server möglich. Ist der Server erreichbar?",
+    ),
+    ("TOO_FEW_POINTS", 3, "Datensatz besitzt nicht genügend Datenpunkte"),
+    ("TOO_FEW_MONTH_POINTS", 4, "nach Aggregation pro Monat nicht genügend Datenpunkte"),
+    ("NO_RELIABLE_FORECAST", 5, "Prognosequalität des Modells unzureichend"),
 )


--- a/src/delta_barth/management.py
+++ b/src/delta_barth/management.py
@@ -14,9 +14,11 @@ SESSION: Final[Session] = Session(HTTP_BASE_CONTENT_HEADERS)

 def setup(
    data_path: str,
+    base_url: str,
 ) -> None:  # pragma: no cover
    # at this point: no logging configured
    SESSION.set_data_path(data_path)
+    SESSION.set_base_url(base_url=base_url)
    SESSION.setup()
    logger.info("[EXT-CALL MANAGEMENT] Successfully set up current session")

@@ -37,6 +39,7 @@ def set_credentials(
    logger.info("[EXT-CALL MANAGEMENT] Successfully set credentials for current session")


+# ** not part of external API, only internal
 def get_credentials() -> str:  # pragma: no cover
    logger.info("[EXT-CALL MANAGEMENT] Getting credentials for current session...")
    creds = SESSION.creds
@@ -44,12 +47,15 @@ def get_credentials() -> str:  # pragma: no cover
    return creds.model_dump_json()


-# ** legacy: not part of external API
 def set_base_url(
    base_url: str,
 ) -> None:  # pragma: no cover
    SESSION.set_base_url(base_url=base_url)


+def get_data_path() -> str:  # pragma: no cover
+    return str(SESSION.data_path)
+
+
 def get_base_url() -> str:  # pragma: no cover
    return SESSION.base_url
--- a/src/delta_barth/pipelines.py
+++ b/src/delta_barth/pipelines.py
@@ -1,24 +1,83 @@
 """collection of configured data pipelines, intended to be invoked from C#"""

+import time
 from datetime import datetime as Datetime
+from typing import Final

+import sqlalchemy as sql
+
+from delta_barth import databases as db
 from delta_barth.analysis import forecast
+from delta_barth.constants import DEFAULT_DB_ERR_CODE
+from delta_barth.errors import STATUS_HANDLER, wrap_result
 from delta_barth.logging import logger_pipelines as logger
 from delta_barth.management import SESSION
-from delta_barth.types import JsonExportResponse
+from delta_barth.types import JsonExportResponse, PipelineMetrics
+
+
+def _write_performance_metrics(
+    pipeline_name: str,
+    time_start: int,
+    time_end: int,
+) -> PipelineMetrics:
+    if time_end < time_start:
+        raise ValueError("Ending time smaller than starting time")
+    execution_duration = (time_end - time_start) / 1e9
+    metrics = PipelineMetrics(
+        pipeline_name=pipeline_name,
+        execution_duration=execution_duration,
+    )
+
+    with SESSION.db_engine.begin() as con:
+        con.execute(sql.insert(db.perf_meas).values(**metrics))
+
+    return metrics
+
+
+@wrap_result(code_on_error=DEFAULT_DB_ERR_CODE)
+def _write_performance_metrics_wrapped(
+    pipeline_name: str,
+    time_start: int,
+    time_end: int,
+) -> PipelineMetrics:
+    return _write_performance_metrics(pipeline_name, time_start, time_end)


 def pipeline_sales_forecast(
    company_id: int | None,
    start_date: Datetime | None,
 ) -> JsonExportResponse:
+    PIPELINE_NAME: Final[str] = "sales_forecast"
    logger.info("[EXT-CALL PIPELINES] Starting main sales forecast pipeline...")
+    t_start = time.perf_counter_ns()
    result = forecast.pipeline_sales_forecast(
        SESSION, company_id=company_id, start_date=start_date
    )
    export = JsonExportResponse(result.model_dump_json())
-
+    t_end = time.perf_counter_ns()
    logger.info("[EXT-CALL PIPELINES] Main sales forecast pipeline successful")
+    logger.info("[EXT-CALL PIPELINES] Writing performance metrics...")
+    res = _write_performance_metrics_wrapped(
+        pipeline_name=PIPELINE_NAME,
+        time_start=t_start,
+        time_end=t_end,
+    )
+    if res.status != STATUS_HANDLER.SUCCESS:
+        logger.error(
+            (
+                "[DB-WRITE][METRICS] Pipeline: >%s< - Error on writing "
+                "pipeline metrics to database: %s"
+            ),
+            PIPELINE_NAME,
+            res.status,
+        )
+    else:
+        metrics = res.unwrap()
+        logger.info(
+            "[METRICS] Pipeline: >%s< - Execution time: %.6f",
+            PIPELINE_NAME,
+            metrics["execution_duration"],
+        )

    return export

@@ -27,14 +86,38 @@ def pipeline_sales_forecast_dummy(
    company_id: int | None,
    start_date: Datetime | None,
 ) -> JsonExportResponse:
+    PIPELINE_NAME: Final[str] = "sales_forecast_dummy"
    logger.info("[EXT-CALL PIPELINES] Starting dummy sales forecast pipeline...")
+    t_start = time.perf_counter_ns()
    result = forecast.pipeline_sales_dummy(
        SESSION,
        company_id=company_id,
        start_date=start_date,
    )
    export = JsonExportResponse(result.model_dump_json())
-
+    t_end = time.perf_counter_ns()
    logger.info("[EXT-CALL PIPELINES] Dummy sales forecast pipeline successful")
+    logger.info("[EXT-CALL PIPELINES] Writing performance metrics...")
+    res = _write_performance_metrics_wrapped(
+        pipeline_name=PIPELINE_NAME,
+        time_start=t_start,
+        time_end=t_end,
+    )
+    if res.status != STATUS_HANDLER.SUCCESS:
+        logger.error(
+            (
+                "[DB-WRITE][METRICS] Pipeline: >%s< - Error on writing "
+                "pipeline metrics to database: %s"
+            ),
+            PIPELINE_NAME,
+            res.status,
+        )
+    else:
+        metrics = res.unwrap()
+        logger.info(
+            "[METRICS] Pipeline: >%s< - Execution time: %.6f",
+            PIPELINE_NAME,
+            metrics["execution_duration"],
+        )

    return export
--- a/src/delta_barth/session.py
+++ b/src/delta_barth/session.py
@@ -14,7 +14,7 @@ from delta_barth.api.common import (
    LoginResponse,
    validate_credentials,
 )
-from delta_barth.constants import DB_ECHO
+from delta_barth.constants import API_CON_TIMEOUT, DB_ECHO
 from delta_barth.errors import STATUS_HANDLER
 from delta_barth.logging import logger_session as logger
 from delta_barth.types import DelBarApiError, Status
@@ -191,11 +191,18 @@ class Session:
            databaseName=self.creds.database,
            mandantName=self.creds.mandant,
        )
-        resp = requests.put(
-            URL,
-            login_req.model_dump_json(),
-            headers=self.headers,  # type: ignore
-        )
+        empty_response = LoginResponse(token="")
+        try:
+            resp = requests.put(
+                URL,
+                login_req.model_dump_json(),
+                headers=self.headers,  # type: ignore
+                timeout=API_CON_TIMEOUT,
+            )
+        except requests.exceptions.Timeout:  # pragma: no cover
+            return empty_response, STATUS_HANDLER.pipe_states.CONNECTION_TIMEOUT
+        except requests.exceptions.RequestException:  # pragma: no cover
+            return empty_response, STATUS_HANDLER.pipe_states.CONNECTION_ERROR

        response: LoginResponse
        status: Status
@@ -204,7 +211,7 @@ class Session:
            status = STATUS_HANDLER.pipe_states.SUCCESS
            self._add_session_token(response.token)
        else:
-            response = LoginResponse(token="")
+            response = empty_response
            err = DelBarApiError(status_code=resp.status_code, **resp.json())
            status = STATUS_HANDLER.api_error(err)

@@ -216,12 +223,17 @@ class Session:
        ROUTE: Final[str] = "user/logout"
        URL: Final = combine_route(self.base_url, ROUTE)

-        resp = requests.put(
-            URL,
-            headers=self.headers,  # type: ignore
-        )
+        try:
+            resp = requests.put(
+                URL,
+                headers=self.headers,  # type: ignore
+                timeout=API_CON_TIMEOUT,
+            )
+        except requests.exceptions.Timeout:  # pragma: no cover
+            return None, STATUS_HANDLER.pipe_states.CONNECTION_TIMEOUT
+        except requests.exceptions.RequestException:  # pragma: no cover
+            return None, STATUS_HANDLER.pipe_states.CONNECTION_ERROR

-        response = None
        status: Status
        if resp.status_code == 200:
            status = STATUS_HANDLER.SUCCESS
@@ -230,7 +242,7 @@ class Session:
            err = DelBarApiError(status_code=resp.status_code, **resp.json())
            status = STATUS_HANDLER.api_error(err)

-        return response, status
+        return None, status

    def assert_login(
        self,
@@ -246,11 +258,18 @@ class Session:
        ROUTE: Final[str] = "verkauf/umsatzprognosedaten"
        URL: Final = combine_route(self.base_url, ROUTE)
        params: dict[str, int] = {"FirmaId": 999999}
-        resp = requests.get(
-            URL,
-            params=params,
-            headers=self.headers,  # type: ignore
-        )
+        empty_response = LoginResponse(token="")
+        try:
+            resp = requests.get(
+                URL,
+                params=params,
+                headers=self.headers,  # type: ignore
+                timeout=API_CON_TIMEOUT,
+            )
+        except requests.exceptions.Timeout:  # pragma: no cover
+            return empty_response, STATUS_HANDLER.pipe_states.CONNECTION_TIMEOUT
+        except requests.exceptions.RequestException:  # pragma: no cover
+            return empty_response, STATUS_HANDLER.pipe_states.CONNECTION_ERROR

        response: LoginResponse
        status: Status
@@ -261,7 +280,7 @@ class Session:
            self._remove_session_token()
            response, status = self.login()
        else:
-            response = LoginResponse(token="")
+            response = empty_response
            err = DelBarApiError(status_code=resp.status_code, **resp.json())
            status = STATUS_HANDLER.api_error(err)

--- a/src/delta_barth/types.py
+++ b/src/delta_barth/types.py
@@ -47,6 +47,8 @@ class ExportResponse(BaseModel):
@dataclass(slots=True)
 class DataPipeStates:
    SUCCESS: Status
+    CONNECTION_TIMEOUT: Status
+    CONNECTION_ERROR: Status
    TOO_FEW_POINTS: Status
    TOO_FEW_MONTH_POINTS: Status
    NO_RELIABLE_FORECAST: Status
@@ -139,7 +141,13 @@ class Statistics:
    pass


-# ** forecasts
+# ** ---- performance
+class PipelineMetrics(t.TypedDict):
+    pipeline_name: str
+    execution_duration: float
+
+
+# ** ---- forecasts
@dataclass(slots=True)
 class CustomerDataSalesForecast:
    order: list[int] = field(default_factory=list)
--- a/tests/api/test_requests.py
+++ b/tests/api/test_requests.py
@@ -1,8 +1,10 @@
 from datetime import datetime as Datetime

 import pytest
+import requests

 from delta_barth.api import requests as requests_
+from delta_barth.api.common import LoginResponse


@pytest.mark.api_con_required
@@ -94,3 +96,31 @@ def test_get_sales_prognosis_data_FailApiServer(session, mock_get):
    assert status.api_server_error.message == json["message"]
    assert status.api_server_error.code == json["code"]
    assert status.api_server_error.hints == json["hints"]
+
+
+def test_get_sales_prognosis_data_FailGetTimeout(session, mock_get):
+    mock_get.side_effect = requests.exceptions.Timeout("Test timeout")
+
+    def assert_login():
+        return LoginResponse(token=""), requests_.STATUS_HANDLER.SUCCESS
+
+    session.assert_login = assert_login
+
+    resp, status = requests_.get_sales_prognosis_data(session, None, None)
+    assert resp is not None
+    assert len(resp.daten) == 0
+    assert status.code == 1
+
+
+def test_get_sales_prognosis_data_FailGetRequestException(session, mock_get):
+    mock_get.side_effect = requests.exceptions.RequestException("Test not timeout")
+
+    def assert_login():
+        return LoginResponse(token=""), requests_.STATUS_HANDLER.SUCCESS
+
+    session.assert_login = assert_login
+
+    resp, status = requests_.get_sales_prognosis_data(session, None, None)
+    assert resp is not None
+    assert len(resp.daten) == 0
+    assert status.code == 2
--- a/tests/conftest.py
+++ b/tests/conftest.py
@@ -95,7 +95,7 @@ def mock_put():
        yield mock


-@pytest.fixture
+@pytest.fixture(scope="function")
 def mock_get():
    with patch("requests.get") as mock:
        yield mock
--- a/tests/test_pipelines.py
+++ b/tests/test_pipelines.py
@@ -3,20 +3,44 @@ import json
 from unittest.mock import patch

 import pytest
+import sqlalchemy as sql

 import delta_barth.pipelines
+from delta_barth import databases as db
 from delta_barth import pipelines as pl
 from delta_barth.errors import STATUS_HANDLER


+def test_write_performance_metrics(session):
+    pipe_name = "test_pipe"
+    t_start = 20_000_000_000
+    t_end = 30_000_000_000
+
+    with patch("delta_barth.pipelines.SESSION", session):
+        metrics = pl._write_performance_metrics(
+            pipeline_name=pipe_name,
+            time_start=t_start,
+            time_end=t_end,
+        )
+    assert metrics["pipeline_name"] == pipe_name
+    assert metrics["execution_duration"] == 10
+
+    with session.db_engine.begin() as con:
+        ret = con.execute(sql.select(db.perf_meas))
+
+    metrics = ret.all()[-1]
+    assert metrics.pipeline_name == pipe_name
+    assert metrics.execution_duration == 10
+
+
@patch("delta_barth.analysis.forecast.SALES_BASE_NUM_DATAPOINTS_MONTHS", 1)
-def test_sales_prognosis_pipeline(exmpl_api_sales_prognosis_resp):
+def test_sales_prognosis_pipeline(exmpl_api_sales_prognosis_resp, session):
    with patch(
        "delta_barth.analysis.forecast.get_sales_prognosis_data",
    ) as mock:
        mock.return_value = (exmpl_api_sales_prognosis_resp, STATUS_HANDLER.SUCCESS)
-        importlib.reload(delta_barth.pipelines)
-        json_export = pl.pipeline_sales_forecast(None, None)
+        with patch("delta_barth.pipelines.SESSION", session):
+            json_export = pl.pipeline_sales_forecast(None, None)

    assert isinstance(json_export, str)
    parsed_resp = json.loads(json_export)
@@ -27,9 +51,18 @@ def test_sales_prognosis_pipeline(exmpl_api_sales_prognosis_resp):
    assert "code" in parsed_resp["status"]
    assert parsed_resp["status"]["code"] == 0

+    with session.db_engine.begin() as con:
+        ret = con.execute(sql.select(db.perf_meas))

-def test_sales_prognosis_pipeline_dummy():
-    json_export = pl.pipeline_sales_forecast_dummy(None, None)
+    metrics = ret.all()[-1]
+    assert metrics.pipeline_name == "sales_forecast"
+    assert metrics.execution_duration > 0
+
+
+@pytest.mark.new
+def test_sales_prognosis_pipeline_dummy(session):
+    with patch("delta_barth.pipelines.SESSION", session):
+        json_export = pl.pipeline_sales_forecast_dummy(None, None)

    assert isinstance(json_export, str)
    parsed_resp = json.loads(json_export)
@@ -43,3 +76,10 @@ def test_sales_prognosis_pipeline_dummy():
    assert entry["vorhersage"] == pytest.approx(47261.058594)
    assert "code" in parsed_resp["status"]
    assert parsed_resp["status"]["code"] == 0
+
+    with session.db_engine.begin() as con:
+        ret = con.execute(sql.select(db.perf_meas))
+
+    metrics = ret.all()[-1]
+    assert metrics.pipeline_name == "sales_forecast_dummy"
+    assert metrics.execution_duration > 0
Author	SHA1	Message	Date
frasu	0eb39deec5	src/delta_barth/analysis/forecast.py aktualisiert	2025-04-13 14:45:55 +00:00
foefl	8501f551b2	re-arrange code segments	2025-04-11 13:10:39 +02:00
foefl	da594fb5ba	idea of timedelta based algorithm	2025-04-11 13:06:12 +02:00
foefl	e8f3a7aea8	adapt forecast dataframe to be compatible with pipeline output	2025-04-11 13:04:55 +02:00
foefl	8936f798ab	force enough data points	2025-04-11 12:30:31 +02:00
foefl	e1b375396a	idea of timedelta based algorithm	2025-04-11 12:23:05 +02:00
foefl	5d1f5199d3	prototype ideas	2025-04-11 10:37:49 +02:00
frasu	f49744ca45	src/delta_barth/analysis/forecast.py aktualisiert	2025-04-10 17:33:00 +00:00
frasu	2934326258	src/delta_barth/analysis/forecast.py aktualisiert	2025-04-10 17:10:56 +00:00
frasu	4ef8fc5e9d	src/delta_barth/analysis/forecast.py aktualisiert	2025-04-10 14:58:01 +00:00
foefl	14c4efedf7	add hints for changes	2025-04-10 14:39:41 +02:00
foefl	2055ee5c8b	remove unneeded print statement	2025-04-10 14:07:31 +02:00
foefl	6caa087efd	re-enable logging	2025-04-10 11:12:57 +02:00
foefl	2d48be0009	update gitignore to exclude doc folders	2025-04-10 07:37:23 +02:00
foefl	fdb9812ecf	add script to bump patch version	2025-04-10 07:13:35 +02:00
foefl	9f90aec324	bump version	2025-04-09 09:28:27 +02:00
foefl	dc848fd840	increase timeout timespan	2025-04-09 09:27:23 +02:00
foefl	a0d189ac9f	add logging of pipeline metrics in database	2025-04-04 13:37:05 +02:00
foefl	6a418118d2	prepare metrics writing process	2025-04-03 16:05:46 +02:00
foefl	5d78fc9e02	added handling for API connectivity errors	2025-04-03 12:51:14 +02:00
foefl	b93b070682	adapt C# JSON type	2025-04-03 11:22:00 +02:00
foefl	30641103ec	rework session management: interface to C#	2025-04-03 09:26:56 +02:00