Compare commits
7 Commits
| Author | SHA1 | Date | |
|---|---|---|---|
| 58fd5bd921 | |||
| c2757cca26 | |||
| c46c90f548 | |||
| fc4d54dc4b | |||
| 5d53551923 | |||
| 6a7f59116f | |||
| 063531a08e |
19
pdm.lock
generated
19
pdm.lock
generated
@@ -5,7 +5,7 @@
|
|||||||
groups = ["default", "dev", "lint", "nb", "tests"]
|
groups = ["default", "dev", "lint", "nb", "tests"]
|
||||||
strategy = ["inherit_metadata"]
|
strategy = ["inherit_metadata"]
|
||||||
lock_version = "4.5.0"
|
lock_version = "4.5.0"
|
||||||
content_hash = "sha256:4931e32f8c146a72ad5b0a13c02485ea5ddc727de32fbe7c5e9314bbab05966c"
|
content_hash = "sha256:545c39ef89d18d28a7bca4b08c93e6fb900c42612089300b867a4e0955acd6ab"
|
||||||
|
|
||||||
[[metadata.targets]]
|
[[metadata.targets]]
|
||||||
requires_python = ">=3.11"
|
requires_python = ">=3.11"
|
||||||
@@ -579,7 +579,7 @@ files = [
|
|||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "dopt-basics"
|
name = "dopt-basics"
|
||||||
version = "0.1.2"
|
version = "0.1.3"
|
||||||
requires_python = ">=3.11"
|
requires_python = ">=3.11"
|
||||||
summary = "basic cross-project tools for Python-based d-opt projects"
|
summary = "basic cross-project tools for Python-based d-opt projects"
|
||||||
groups = ["default"]
|
groups = ["default"]
|
||||||
@@ -587,8 +587,8 @@ dependencies = [
|
|||||||
"tzdata>=2025.1",
|
"tzdata>=2025.1",
|
||||||
]
|
]
|
||||||
files = [
|
files = [
|
||||||
{file = "dopt_basics-0.1.2-py3-none-any.whl", hash = "sha256:dae8b7e31197fb173d98c74ed6f227c3dceaadf980139f0852a7f031d2e78b84"},
|
{file = "dopt_basics-0.1.3-py3-none-any.whl", hash = "sha256:974c2b442e47f0f05e66ff821ae48a9b12f7b77a8a3bc06fe8ac232e2bc27608"},
|
||||||
{file = "dopt_basics-0.1.2.tar.gz", hash = "sha256:dc54942db95b0608fa44f7b612ee3247dad50d2538ad88a1697b3357a8b05634"},
|
{file = "dopt_basics-0.1.3.tar.gz", hash = "sha256:22ba30cbd385cb8929cb6a13fe01e253cd7d9617ef637e41609f2468691450e8"},
|
||||||
]
|
]
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
@@ -2414,6 +2414,17 @@ files = [
|
|||||||
{file = "tinycss2-1.4.0.tar.gz", hash = "sha256:10c0972f6fc0fbee87c3edb76549357415e94548c1ae10ebccdea16fb404a9b7"},
|
{file = "tinycss2-1.4.0.tar.gz", hash = "sha256:10c0972f6fc0fbee87c3edb76549357415e94548c1ae10ebccdea16fb404a9b7"},
|
||||||
]
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "tomli-w"
|
||||||
|
version = "1.2.0"
|
||||||
|
requires_python = ">=3.9"
|
||||||
|
summary = "A lil' TOML writer"
|
||||||
|
groups = ["dev"]
|
||||||
|
files = [
|
||||||
|
{file = "tomli_w-1.2.0-py3-none-any.whl", hash = "sha256:188306098d013b691fcadc011abd66727d3c414c571bb01b1a174ba8c983cf90"},
|
||||||
|
{file = "tomli_w-1.2.0.tar.gz", hash = "sha256:2dd14fac5a47c27be9cd4c976af5a12d87fb1f0b4512f81d69cce3b35ae25021"},
|
||||||
|
]
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "tomlkit"
|
name = "tomlkit"
|
||||||
version = "0.13.2"
|
version = "0.13.2"
|
||||||
|
|||||||
@@ -1,11 +1,11 @@
|
|||||||
[project]
|
[project]
|
||||||
name = "delta-barth"
|
name = "delta-barth"
|
||||||
version = "0.5.7dev1"
|
version = "0.5.7"
|
||||||
description = "workflows and pipelines for the Python-based Plugin of Delta Barth's ERP system"
|
description = "workflows and pipelines for the Python-based Plugin of Delta Barth's ERP system"
|
||||||
authors = [
|
authors = [
|
||||||
{name = "Florian Förster", email = "f.foerster@d-opt.com"},
|
{name = "Florian Förster", email = "f.foerster@d-opt.com"},
|
||||||
]
|
]
|
||||||
dependencies = ["scikit-learn>=1.6.1", "pandas>=2.2.3", "xgboost>=2.1.4", "joblib>=1.4.2", "typing-extensions>=4.12.2", "requests>=2.32.3", "pydantic>=2.10.6", "dopt-basics>=0.1.2", "SQLAlchemy>=2.0.39"]
|
dependencies = ["scikit-learn>=1.6.1", "pandas>=2.2.3", "xgboost>=2.1.4", "joblib>=1.4.2", "typing-extensions>=4.12.2", "requests>=2.32.3", "pydantic>=2.10.6", "dopt-basics>=0.1.3", "SQLAlchemy>=2.0.39"]
|
||||||
requires-python = ">=3.11"
|
requires-python = ">=3.11"
|
||||||
readme = "README.md"
|
readme = "README.md"
|
||||||
license = {text = "LicenseRef-Proprietary"}
|
license = {text = "LicenseRef-Proprietary"}
|
||||||
@@ -44,7 +44,8 @@ filterwarnings = [
|
|||||||
]
|
]
|
||||||
markers = [
|
markers = [
|
||||||
"api_con_required: tests require an API connection (deselect with '-m \"not api_con_required\"')",
|
"api_con_required: tests require an API connection (deselect with '-m \"not api_con_required\"')",
|
||||||
"new: to test only new tests, usually removed afterwards (deselect with '-m \"not quick\"')",
|
"new: to test only new tests, usually removed afterwards (deselect with '-m \"not new\"')",
|
||||||
|
"forecast: main components of forecast pipeline (deselect with '-m \"not forecast\"')"
|
||||||
]
|
]
|
||||||
log_cli = true
|
log_cli = true
|
||||||
|
|
||||||
@@ -73,7 +74,7 @@ directory = "reports/coverage"
|
|||||||
|
|
||||||
|
|
||||||
[tool.bumpversion]
|
[tool.bumpversion]
|
||||||
current_version = "0.5.7dev1"
|
current_version = "0.5.7"
|
||||||
parse = """(?x)
|
parse = """(?x)
|
||||||
(?P<major>0|[1-9]\\d*)\\.
|
(?P<major>0|[1-9]\\d*)\\.
|
||||||
(?P<minor>0|[1-9]\\d*)\\.
|
(?P<minor>0|[1-9]\\d*)\\.
|
||||||
@@ -145,6 +146,7 @@ dev = [
|
|||||||
"pdoc3>=0.11.5",
|
"pdoc3>=0.11.5",
|
||||||
"bump-my-version>=1.1.1",
|
"bump-my-version>=1.1.1",
|
||||||
"nox>=2025.2.9",
|
"nox>=2025.2.9",
|
||||||
|
"tomli-w>=1.2.0",
|
||||||
]
|
]
|
||||||
nb = [
|
nb = [
|
||||||
"jupyterlab>=4.3.5",
|
"jupyterlab>=4.3.5",
|
||||||
|
|||||||
@@ -8,7 +8,6 @@ from dataclasses import asdict
|
|||||||
from datetime import datetime as Datetime
|
from datetime import datetime as Datetime
|
||||||
from typing import TYPE_CHECKING, Final, TypeAlias, cast
|
from typing import TYPE_CHECKING, Final, TypeAlias, cast
|
||||||
|
|
||||||
import dopt_basics.datetime
|
|
||||||
import numpy as np
|
import numpy as np
|
||||||
import pandas as pd
|
import pandas as pd
|
||||||
import scipy.stats
|
import scipy.stats
|
||||||
@@ -16,7 +15,6 @@ import sqlalchemy as sql
|
|||||||
|
|
||||||
# --- new: for calculating timedelta
|
# --- new: for calculating timedelta
|
||||||
from dateutil.relativedelta import relativedelta
|
from dateutil.relativedelta import relativedelta
|
||||||
from dopt_basics.datetime import TimeUnitsTimedelta
|
|
||||||
from sklearn.metrics import mean_absolute_error, r2_score
|
from sklearn.metrics import mean_absolute_error, r2_score
|
||||||
from sklearn.model_selection import KFold, RandomizedSearchCV
|
from sklearn.model_selection import KFold, RandomizedSearchCV
|
||||||
from xgboost import XGBRegressor
|
from xgboost import XGBRegressor
|
||||||
@@ -35,7 +33,6 @@ from delta_barth.constants import (
|
|||||||
DEFAULT_DB_ERR_CODE,
|
DEFAULT_DB_ERR_CODE,
|
||||||
DUMMY_DATA_PATH,
|
DUMMY_DATA_PATH,
|
||||||
FEATURES_SALES_PROGNOSIS,
|
FEATURES_SALES_PROGNOSIS,
|
||||||
SALES_BASE_NUM_DATAPOINTS_MONTHS,
|
|
||||||
SALES_MIN_NUM_DATAPOINTS,
|
SALES_MIN_NUM_DATAPOINTS,
|
||||||
)
|
)
|
||||||
from delta_barth.errors import STATUS_HANDLER, wrap_result
|
from delta_barth.errors import STATUS_HANDLER, wrap_result
|
||||||
@@ -210,17 +207,25 @@ def _process_sales(
|
|||||||
df_cust["jahr"] = df_cust[DATE_FEAT].dt.year
|
df_cust["jahr"] = df_cust[DATE_FEAT].dt.year
|
||||||
df_cust["monat"] = df_cust[DATE_FEAT].dt.month
|
df_cust["monat"] = df_cust[DATE_FEAT].dt.month
|
||||||
|
|
||||||
current_year = datetime.now().year
|
monthly_sum_data_only = df_cust.groupby(["jahr", "monat"])[SALES_FEAT].sum().reset_index()
|
||||||
current_month = datetime.now().month
|
|
||||||
|
current_year = datetime.datetime.now().year
|
||||||
|
current_month = datetime.datetime.now().month
|
||||||
years = range(df_cust["jahr"].min(), current_year + 1)
|
years = range(df_cust["jahr"].min(), current_year + 1)
|
||||||
|
|
||||||
old_monthly_sum = df_cust.groupby(["jahr", "monat"])[SALES_FEAT].sum().reset_index()
|
|
||||||
|
|
||||||
all_month_year_combinations = pd.DataFrame(
|
all_month_year_combinations = pd.DataFrame(
|
||||||
[(year, month) for year in years for month in range(1, 13) if (year < current_year or (year == current_year and month <= current_month))], columns=["jahr", "monat"]
|
[
|
||||||
|
(year, month)
|
||||||
|
for year in years
|
||||||
|
for month in range(1, 13)
|
||||||
|
if (year < current_year or (year == current_year and month <= current_month))
|
||||||
|
],
|
||||||
|
columns=["jahr", "monat"],
|
||||||
)
|
)
|
||||||
|
|
||||||
monthly_sum = pd.merge(all_month_year_combinations, old_monthly_sum, on=["jahr", "monat"], how="left")
|
monthly_sum = pd.merge(
|
||||||
|
all_month_year_combinations, monthly_sum_data_only, on=["jahr", "monat"], how="left"
|
||||||
|
)
|
||||||
monthly_sum[SALES_FEAT] = monthly_sum[SALES_FEAT].fillna(0)
|
monthly_sum[SALES_FEAT] = monthly_sum[SALES_FEAT].fillna(0)
|
||||||
monthly_sum[DATE_FEAT] = (
|
monthly_sum[DATE_FEAT] = (
|
||||||
monthly_sum["monat"].astype(str) + "." + monthly_sum["jahr"].astype(str)
|
monthly_sum["monat"].astype(str) + "." + monthly_sum["jahr"].astype(str)
|
||||||
@@ -256,52 +261,24 @@ def _process_sales(
|
|||||||
best_score_r2: float | None = None
|
best_score_r2: float | None = None
|
||||||
best_start_year: int | None = None
|
best_start_year: int | None = None
|
||||||
too_few_month_points: bool = True
|
too_few_month_points: bool = True
|
||||||
|
|
||||||
stride = dopt_basics.datetime.timedelta_from_val(365, TimeUnitsTimedelta.DAYS)
|
|
||||||
dates = cast(pd.DatetimeIndex, monthly_sum.index)
|
|
||||||
min_date = dates.min()
|
|
||||||
|
|
||||||
|
dates = cast(pd.DatetimeIndex, monthly_sum.index)
|
||||||
# baseline: 3 years - 36 months
|
# baseline: 3 years - 36 months
|
||||||
starting_date = datetime.datetime.now() - relativedelta(months=36)
|
starting_date = datetime.datetime.now() - relativedelta(months=36)
|
||||||
|
|
||||||
def get_index_date(
|
target_index, _ = next(
|
||||||
dates: pd.DatetimeIndex,
|
((i, True) for i, date in enumerate(dates) if date >= starting_date),
|
||||||
starting_date: datetime.datetime | pd.Timestamp,
|
(len(dates) - 1, False),
|
||||||
) -> tuple[pd.Timestamp, bool]:
|
)
|
||||||
target, succ = next(
|
|
||||||
((date, True) for date in dates if date >= starting_date), (dates[-1], False)
|
|
||||||
)
|
|
||||||
return target, succ
|
|
||||||
|
|
||||||
first_date, succ = get_index_date(dates, starting_date)
|
for add_year, date_idx in enumerate(range(target_index, -1, -12)):
|
||||||
if not succ:
|
first_date = dates[date_idx]
|
||||||
# !! return early
|
|
||||||
...
|
|
||||||
|
|
||||||
date_span = first_date - min_date
|
|
||||||
steps = date_span.days // stride.days
|
|
||||||
|
|
||||||
for step in range(steps + 1):
|
|
||||||
print("step: ", step)
|
|
||||||
target_date = first_date - step * stride
|
|
||||||
print("target date: ", target_date)
|
|
||||||
split_date = dates[-6]
|
split_date = dates[-6]
|
||||||
|
|
||||||
index_date, succ = get_index_date(dates, target_date)
|
|
||||||
|
|
||||||
if not succ:
|
|
||||||
break
|
|
||||||
|
|
||||||
if index_date >= split_date:
|
|
||||||
print("Skip because of date difference")
|
|
||||||
continue
|
|
||||||
|
|
||||||
train = cast(
|
train = cast(
|
||||||
pd.DataFrame,
|
pd.DataFrame,
|
||||||
monthly_sum.loc[index_date:split_date].copy(), # type: ignore
|
monthly_sum.loc[first_date:split_date].copy(), # type: ignore
|
||||||
)
|
)
|
||||||
print(train)
|
|
||||||
print("Length train: ", len(train))
|
|
||||||
test = cast(
|
test = cast(
|
||||||
pd.DataFrame,
|
pd.DataFrame,
|
||||||
monthly_sum.loc[split_date:].copy(), # type: ignore
|
monthly_sum.loc[split_date:].copy(), # type: ignore
|
||||||
@@ -311,7 +288,7 @@ def _process_sales(
|
|||||||
|
|
||||||
# test set size fixed at 6 --> first iteration: baseline - 6 entries
|
# test set size fixed at 6 --> first iteration: baseline - 6 entries
|
||||||
# for each new year 10 new data points (i.e., sales strictly positive) needed
|
# for each new year 10 new data points (i.e., sales strictly positive) needed
|
||||||
if len(train[train[SALES_FEAT] > 0]) >= 30 + 10 * step:
|
if len(train[train[SALES_FEAT] > 0]) >= (base_num_data_points_months + 10 * add_year):
|
||||||
too_few_month_points = False
|
too_few_month_points = False
|
||||||
|
|
||||||
rand = RandomizedSearchCV(
|
rand = RandomizedSearchCV(
|
||||||
@@ -334,12 +311,11 @@ def _process_sales(
|
|||||||
best_params = cast(BestParametersXGBRegressor, rand.best_params_)
|
best_params = cast(BestParametersXGBRegressor, rand.best_params_)
|
||||||
best_score_mae = error
|
best_score_mae = error
|
||||||
best_score_r2 = cast(float, r2_score(y_test, y_pred))
|
best_score_r2 = cast(float, r2_score(y_test, y_pred))
|
||||||
# --- new: use target_date for best_start_year
|
# --- new: use first_date for best_start_year
|
||||||
best_start_year = target_date.year
|
best_start_year = first_date.year
|
||||||
# --- new: store best_estimator
|
# --- new: store best_estimator
|
||||||
best_estimator = copy.copy(rand.best_estimator_)
|
best_estimator = copy.copy(rand.best_estimator_)
|
||||||
|
|
||||||
# ?? --- new: use best_estimator to calculate future values and store them in forecast
|
|
||||||
if best_estimator is not None:
|
if best_estimator is not None:
|
||||||
X_future = pd.DataFrame(
|
X_future = pd.DataFrame(
|
||||||
{"jahr": future_dates.year, "monat": future_dates.month}, index=future_dates
|
{"jahr": future_dates.year, "monat": future_dates.month}, index=future_dates
|
||||||
@@ -456,7 +432,7 @@ def pipeline_sales_forecast(
|
|||||||
pipe = _process_sales(
|
pipe = _process_sales(
|
||||||
pipe,
|
pipe,
|
||||||
min_num_data_points=SALES_MIN_NUM_DATAPOINTS,
|
min_num_data_points=SALES_MIN_NUM_DATAPOINTS,
|
||||||
base_num_data_points_months=SALES_BASE_NUM_DATAPOINTS_MONTHS,
|
base_num_data_points_months=SESSION.cfg.forecast.threshold_month_data_points,
|
||||||
)
|
)
|
||||||
if pipe.statistics is not None:
|
if pipe.statistics is not None:
|
||||||
res = _write_sales_forecast_stats_wrapped(pipe.statistics)
|
res = _write_sales_forecast_stats_wrapped(pipe.statistics)
|
||||||
|
|||||||
43
src/delta_barth/config.py
Normal file
43
src/delta_barth/config.py
Normal file
@@ -0,0 +1,43 @@
|
|||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
from pathlib import Path
|
||||||
|
|
||||||
|
import dopt_basics.configs
|
||||||
|
from pydantic import BaseModel
|
||||||
|
|
||||||
|
|
||||||
|
class Config(BaseModel):
|
||||||
|
forecast: CfgForecast
|
||||||
|
|
||||||
|
|
||||||
|
class CfgForecast(BaseModel):
|
||||||
|
threshold_month_data_points: int
|
||||||
|
|
||||||
|
|
||||||
|
class LazyCfgLoader:
|
||||||
|
def __init__(
|
||||||
|
self,
|
||||||
|
cfg_path: Path,
|
||||||
|
) -> None:
|
||||||
|
cfg_path = cfg_path.resolve()
|
||||||
|
assert cfg_path.exists(), f"config path {cfg_path} seems not to exist"
|
||||||
|
assert cfg_path.is_file(), f"config path {cfg_path} seems not to be a file"
|
||||||
|
self._path = cfg_path
|
||||||
|
self._cfg: Config | None = None
|
||||||
|
|
||||||
|
@property
|
||||||
|
def path(self) -> Path:
|
||||||
|
return self._path
|
||||||
|
|
||||||
|
def _load(self) -> Config:
|
||||||
|
cfg = dopt_basics.configs.load_toml(self.path)
|
||||||
|
|
||||||
|
return Config(**cfg)
|
||||||
|
|
||||||
|
def reload(self) -> None:
|
||||||
|
self._cfg = self._load()
|
||||||
|
|
||||||
|
def get(self) -> Config:
|
||||||
|
if self._cfg is None:
|
||||||
|
self._cfg = self._load()
|
||||||
|
return self._cfg
|
||||||
@@ -5,6 +5,7 @@ from typing import Final
|
|||||||
from delta_barth.types import DualDict, HttpContentHeaders
|
from delta_barth.types import DualDict, HttpContentHeaders
|
||||||
|
|
||||||
# ** config
|
# ** config
|
||||||
|
CFG_FILENAME: Final[str] = "dopt-cfg.toml"
|
||||||
|
|
||||||
# ** lib path
|
# ** lib path
|
||||||
lib_path = Path(__file__).parent
|
lib_path = Path(__file__).parent
|
||||||
@@ -63,4 +64,6 @@ FEATURES_SALES_PROGNOSIS: Final[frozenset[str]] = frozenset(
|
|||||||
# ** Pipelines
|
# ** Pipelines
|
||||||
# ** Forecast
|
# ** Forecast
|
||||||
SALES_MIN_NUM_DATAPOINTS: Final[int] = 36
|
SALES_MIN_NUM_DATAPOINTS: Final[int] = 36
|
||||||
SALES_BASE_NUM_DATAPOINTS_MONTHS: Final[int] = 36
|
# !! now in config
|
||||||
|
# TODO remove later till proven stable
|
||||||
|
# SALES_BASE_NUM_DATAPOINTS_MONTHS: Final[int] = 36
|
||||||
|
|||||||
2
src/delta_barth/dopt-cfg.toml
Normal file
2
src/delta_barth/dopt-cfg.toml
Normal file
@@ -0,0 +1,2 @@
|
|||||||
|
[forecast]
|
||||||
|
threshold_month_data_points = 28
|
||||||
@@ -31,6 +31,8 @@ logger_status = logging.getLogger("delta_barth.status")
|
|||||||
logger_status.setLevel(logging.DEBUG)
|
logger_status.setLevel(logging.DEBUG)
|
||||||
logger_session = logging.getLogger("delta_barth.session")
|
logger_session = logging.getLogger("delta_barth.session")
|
||||||
logger_session.setLevel(logging.DEBUG)
|
logger_session.setLevel(logging.DEBUG)
|
||||||
|
logger_config = logging.getLogger("delta_barth.config")
|
||||||
|
logger_config.setLevel(logging.DEBUG)
|
||||||
logger_management = logging.getLogger("delta_barth.management")
|
logger_management = logging.getLogger("delta_barth.management")
|
||||||
logger_management.setLevel(logging.DEBUG)
|
logger_management.setLevel(logging.DEBUG)
|
||||||
logger_wrapped_results = logging.getLogger("delta_barth.wrapped_results")
|
logger_wrapped_results = logging.getLogger("delta_barth.wrapped_results")
|
||||||
|
|||||||
@@ -1,5 +1,6 @@
|
|||||||
from __future__ import annotations
|
from __future__ import annotations
|
||||||
|
|
||||||
|
import shutil
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
from typing import TYPE_CHECKING, Final
|
from typing import TYPE_CHECKING, Final
|
||||||
|
|
||||||
@@ -14,12 +15,19 @@ from delta_barth.api.common import (
|
|||||||
LoginResponse,
|
LoginResponse,
|
||||||
validate_credentials,
|
validate_credentials,
|
||||||
)
|
)
|
||||||
from delta_barth.constants import API_CON_TIMEOUT, DB_ECHO
|
from delta_barth.config import LazyCfgLoader
|
||||||
|
from delta_barth.constants import (
|
||||||
|
API_CON_TIMEOUT,
|
||||||
|
CFG_FILENAME,
|
||||||
|
DB_ECHO,
|
||||||
|
LIB_PATH,
|
||||||
|
)
|
||||||
from delta_barth.errors import STATUS_HANDLER
|
from delta_barth.errors import STATUS_HANDLER
|
||||||
from delta_barth.logging import logger_session as logger
|
from delta_barth.logging import logger_session as logger
|
||||||
from delta_barth.types import DelBarApiError, Status
|
from delta_barth.types import DelBarApiError, Status
|
||||||
|
|
||||||
if TYPE_CHECKING:
|
if TYPE_CHECKING:
|
||||||
|
from delta_barth.config import Config
|
||||||
from delta_barth.types import ApiCredentials, HttpContentHeaders
|
from delta_barth.types import ApiCredentials, HttpContentHeaders
|
||||||
|
|
||||||
|
|
||||||
@@ -41,6 +49,7 @@ class Session:
|
|||||||
base_headers: HttpContentHeaders,
|
base_headers: HttpContentHeaders,
|
||||||
db_folder: str = "data",
|
db_folder: str = "data",
|
||||||
logging_folder: str = "logs",
|
logging_folder: str = "logs",
|
||||||
|
cfg_folder: str = "config",
|
||||||
) -> None:
|
) -> None:
|
||||||
self._setup: bool = False
|
self._setup: bool = False
|
||||||
self._data_path: Path | None = None
|
self._data_path: Path | None = None
|
||||||
@@ -49,6 +58,10 @@ class Session:
|
|||||||
self._db_engine: sql.Engine | None = None
|
self._db_engine: sql.Engine | None = None
|
||||||
self._logging_dir: Path | None = None
|
self._logging_dir: Path | None = None
|
||||||
self._logging_folder = logging_folder
|
self._logging_folder = logging_folder
|
||||||
|
self._cfg_path: Path | None = None
|
||||||
|
self._cfg_folder = cfg_folder
|
||||||
|
self._cfg_loader: LazyCfgLoader | None = None
|
||||||
|
self._cfg: Config | None = None
|
||||||
self._creds: ApiCredentials | None = None
|
self._creds: ApiCredentials | None = None
|
||||||
self._base_url: str | None = None
|
self._base_url: str | None = None
|
||||||
self._headers = base_headers
|
self._headers = base_headers
|
||||||
@@ -59,6 +72,7 @@ class Session:
|
|||||||
# at this point: no logging configured
|
# at this point: no logging configured
|
||||||
assert not self._setup, "tried to setup session twice"
|
assert not self._setup, "tried to setup session twice"
|
||||||
self._setup_logging()
|
self._setup_logging()
|
||||||
|
self._setup_config()
|
||||||
self._setup_db_management()
|
self._setup_db_management()
|
||||||
self._setup = True
|
self._setup = True
|
||||||
logger.info("[SESSION] Setup procedure successful")
|
logger.info("[SESSION] Setup procedure successful")
|
||||||
@@ -68,6 +82,32 @@ class Session:
|
|||||||
assert self._data_path is not None, "accessed data path not set"
|
assert self._data_path is not None, "accessed data path not set"
|
||||||
return self._data_path
|
return self._data_path
|
||||||
|
|
||||||
|
@property
|
||||||
|
def cfg_path(self) -> Path:
|
||||||
|
if self._cfg_path is not None and self._setup:
|
||||||
|
return self._cfg_path
|
||||||
|
|
||||||
|
root = (self.data_path / self._cfg_folder).resolve()
|
||||||
|
cfg_path = root / CFG_FILENAME
|
||||||
|
if not root.exists():
|
||||||
|
root.mkdir(parents=False)
|
||||||
|
self._cfg_path = cfg_path
|
||||||
|
return self._cfg_path
|
||||||
|
|
||||||
|
@property
|
||||||
|
def cfg(self) -> Config:
|
||||||
|
assert self._cfg is not None, "tried to access not set config from session"
|
||||||
|
return self._cfg
|
||||||
|
|
||||||
|
def _setup_config(self) -> None:
|
||||||
|
if not self.cfg_path.exists():
|
||||||
|
src_cfg = LIB_PATH / CFG_FILENAME
|
||||||
|
shutil.copyfile(src_cfg, self.cfg_path)
|
||||||
|
|
||||||
|
self._cfg_loader = LazyCfgLoader(self.cfg_path)
|
||||||
|
self._cfg = self._cfg_loader.get()
|
||||||
|
logger.info("[SESSION] Successfully read and setup config")
|
||||||
|
|
||||||
@property
|
@property
|
||||||
def db_engine(self) -> sql.Engine:
|
def db_engine(self) -> sql.Engine:
|
||||||
assert self._db_engine is not None, "accessed database engine not set"
|
assert self._db_engine is not None, "accessed database engine not set"
|
||||||
@@ -78,10 +118,10 @@ class Session:
|
|||||||
if self._db_path is not None and self._setup:
|
if self._db_path is not None and self._setup:
|
||||||
return self._db_path
|
return self._db_path
|
||||||
|
|
||||||
db_root = (self.data_path / self._db_folder).resolve()
|
root = (self.data_path / self._db_folder).resolve()
|
||||||
db_path = db_root / "dopt-data.db"
|
db_path = root / "dopt-data.db"
|
||||||
if not db_root.exists():
|
if not root.exists():
|
||||||
db_root.mkdir(parents=False)
|
root.mkdir(parents=False)
|
||||||
self._db_path = db_path
|
self._db_path = db_path
|
||||||
return self._db_path
|
return self._db_path
|
||||||
|
|
||||||
|
|||||||
2
tests/_test_data/dopt-cfg.toml
Normal file
2
tests/_test_data/dopt-cfg.toml
Normal file
@@ -0,0 +1,2 @@
|
|||||||
|
[forecast]
|
||||||
|
threshold_month_data_points = 28
|
||||||
@@ -1,3 +1,4 @@
|
|||||||
|
import datetime
|
||||||
from datetime import datetime as Datetime
|
from datetime import datetime as Datetime
|
||||||
from unittest.mock import patch
|
from unittest.mock import patch
|
||||||
|
|
||||||
@@ -255,6 +256,7 @@ def test_preprocess_sales_FailOnTargetFeature(
|
|||||||
assert pipe.results is None
|
assert pipe.results is None
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.forecast
|
||||||
def test_process_sales_Success(sales_data_real_preproc):
|
def test_process_sales_Success(sales_data_real_preproc):
|
||||||
data = sales_data_real_preproc.copy()
|
data = sales_data_real_preproc.copy()
|
||||||
pipe = PipeResult(data, STATUS_HANDLER.SUCCESS)
|
pipe = PipeResult(data, STATUS_HANDLER.SUCCESS)
|
||||||
@@ -277,6 +279,7 @@ def test_process_sales_Success(sales_data_real_preproc):
|
|||||||
assert pipe.statistics.xgb_params is not None
|
assert pipe.statistics.xgb_params is not None
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.forecast
|
||||||
def test_process_sales_FailTooFewPoints(sales_data_real_preproc):
|
def test_process_sales_FailTooFewPoints(sales_data_real_preproc):
|
||||||
data = sales_data_real_preproc.copy()
|
data = sales_data_real_preproc.copy()
|
||||||
data = data.iloc[:20, :]
|
data = data.iloc[:20, :]
|
||||||
@@ -303,6 +306,7 @@ def test_process_sales_FailTooFewPoints(sales_data_real_preproc):
|
|||||||
assert pipe.statistics.xgb_params is None
|
assert pipe.statistics.xgb_params is None
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.forecast
|
||||||
def test_process_sales_FailTooFewMonthPoints(sales_data_real_preproc):
|
def test_process_sales_FailTooFewMonthPoints(sales_data_real_preproc):
|
||||||
data = sales_data_real_preproc.copy()
|
data = sales_data_real_preproc.copy()
|
||||||
pipe = PipeResult(data, STATUS_HANDLER.SUCCESS)
|
pipe = PipeResult(data, STATUS_HANDLER.SUCCESS)
|
||||||
@@ -329,8 +333,19 @@ def test_process_sales_FailTooFewMonthPoints(sales_data_real_preproc):
|
|||||||
assert pipe.statistics.xgb_params is None
|
assert pipe.statistics.xgb_params is None
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.forecast
|
||||||
def test_process_sales_FailNoReliableForecast(sales_data_real_preproc):
|
def test_process_sales_FailNoReliableForecast(sales_data_real_preproc):
|
||||||
data = sales_data_real_preproc.copy()
|
# prepare fake data
|
||||||
|
df = sales_data_real_preproc.copy()
|
||||||
|
f_dates = "buchungs_datum"
|
||||||
|
end = datetime.datetime.now()
|
||||||
|
start = df[f_dates].max()
|
||||||
|
fake_dates = pd.date_range(start, end, freq="MS")
|
||||||
|
fake_data = [(1234, 1014, 1024, 1000, 10, date) for date in fake_dates]
|
||||||
|
fake_df = pd.DataFrame(fake_data, columns=df.columns)
|
||||||
|
enhanced_df = pd.concat((df, fake_df), ignore_index=True)
|
||||||
|
|
||||||
|
data = enhanced_df.copy()
|
||||||
data["betrag"] = 10000
|
data["betrag"] = 10000
|
||||||
print(data["betrag"])
|
print(data["betrag"])
|
||||||
data = data.iloc[:20000, :]
|
data = data.iloc[:20000, :]
|
||||||
@@ -340,7 +355,7 @@ def test_process_sales_FailNoReliableForecast(sales_data_real_preproc):
|
|||||||
def __init__(self, *args, **kwargs) -> None:
|
def __init__(self, *args, **kwargs) -> None:
|
||||||
class Predictor:
|
class Predictor:
|
||||||
def predict(self, *args, **kwargs):
|
def predict(self, *args, **kwargs):
|
||||||
return np.array([1, 1, 1, 1])
|
return np.array([1, 1, 1, 1], dtype=np.float64)
|
||||||
|
|
||||||
self.best_estimator_ = Predictor()
|
self.best_estimator_ = Predictor()
|
||||||
|
|
||||||
@@ -354,7 +369,7 @@ def test_process_sales_FailNoReliableForecast(sales_data_real_preproc):
|
|||||||
pipe = fc._process_sales(
|
pipe = fc._process_sales(
|
||||||
pipe,
|
pipe,
|
||||||
min_num_data_points=1,
|
min_num_data_points=1,
|
||||||
base_num_data_points_months=-100,
|
base_num_data_points_months=1,
|
||||||
)
|
)
|
||||||
|
|
||||||
assert pipe.status != STATUS_HANDLER.SUCCESS
|
assert pipe.status != STATUS_HANDLER.SUCCESS
|
||||||
@@ -415,27 +430,16 @@ def test_export_on_fail():
|
|||||||
assert res.status.description == status.description
|
assert res.status.description == status.description
|
||||||
|
|
||||||
|
|
||||||
@patch("delta_barth.analysis.forecast.SALES_BASE_NUM_DATAPOINTS_MONTHS", 1)
|
|
||||||
def test_pipeline_sales_forecast_SuccessDbWrite(exmpl_api_sales_prognosis_resp, session):
|
def test_pipeline_sales_forecast_SuccessDbWrite(exmpl_api_sales_prognosis_resp, session):
|
||||||
with patch(
|
with (
|
||||||
"delta_barth.analysis.forecast.get_sales_prognosis_data",
|
patch(
|
||||||
) as mock:
|
"delta_barth.analysis.forecast.get_sales_prognosis_data",
|
||||||
mock.return_value = exmpl_api_sales_prognosis_resp, STATUS_HANDLER.SUCCESS
|
) as get_mock,
|
||||||
with patch("delta_barth.analysis.forecast.SESSION", session):
|
patch("delta_barth.analysis.forecast.SESSION", session) as sess_mock,
|
||||||
result = fc.pipeline_sales_forecast(None) # type: ignore
|
):
|
||||||
print(result)
|
get_mock.return_value = exmpl_api_sales_prognosis_resp, STATUS_HANDLER.SUCCESS
|
||||||
assert result.status == STATUS_HANDLER.SUCCESS
|
sess_mock.cfg.forecast.threshold_month_data_points = 1
|
||||||
assert len(result.response.daten) > 0
|
|
||||||
|
|
||||||
|
|
||||||
@patch("delta_barth.analysis.forecast.SALES_BASE_NUM_DATAPOINTS_MONTHS", 1)
|
|
||||||
def test_pipeline_sales_forecast_FailDbWrite(exmpl_api_sales_prognosis_resp):
|
|
||||||
with patch(
|
|
||||||
"delta_barth.analysis.forecast.get_sales_prognosis_data",
|
|
||||||
) as mock:
|
|
||||||
mock.return_value = exmpl_api_sales_prognosis_resp, STATUS_HANDLER.SUCCESS
|
|
||||||
result = fc.pipeline_sales_forecast(None) # type: ignore
|
result = fc.pipeline_sales_forecast(None) # type: ignore
|
||||||
print(result)
|
|
||||||
assert result.status == STATUS_HANDLER.SUCCESS
|
assert result.status == STATUS_HANDLER.SUCCESS
|
||||||
assert len(result.response.daten) > 0
|
assert len(result.response.daten) > 0
|
||||||
|
|
||||||
|
|||||||
@@ -8,6 +8,7 @@ from unittest.mock import patch
|
|||||||
|
|
||||||
import pandas as pd
|
import pandas as pd
|
||||||
import pytest
|
import pytest
|
||||||
|
import tomli_w
|
||||||
|
|
||||||
import delta_barth.session
|
import delta_barth.session
|
||||||
from delta_barth.api.requests import SalesPrognosisResponse
|
from delta_barth.api.requests import SalesPrognosisResponse
|
||||||
@@ -33,6 +34,28 @@ def api_base_url(credentials) -> str:
|
|||||||
return credentials["base_url"]
|
return credentials["base_url"]
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.fixture(scope="session")
|
||||||
|
def pth_dummy_cfg() -> Path:
|
||||||
|
pwd = Path.cwd()
|
||||||
|
assert "barth" in pwd.parent.name.lower(), "not in project root directory"
|
||||||
|
data_pth = pwd / "./tests/_test_data/dopt-cfg.toml"
|
||||||
|
assert data_pth.exists(), "file to dummy CFG not found"
|
||||||
|
return data_pth
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.fixture(scope="function")
|
||||||
|
def pth_cfg(pth_dummy_cfg, tmp_path) -> Path:
|
||||||
|
with open(pth_dummy_cfg, "rb") as file:
|
||||||
|
cfg_data = tomllib.load(file)
|
||||||
|
|
||||||
|
target = tmp_path / "dummy_cfg.toml"
|
||||||
|
target.touch()
|
||||||
|
with open(target, "wb") as file:
|
||||||
|
tomli_w.dump(cfg_data, file)
|
||||||
|
|
||||||
|
return target
|
||||||
|
|
||||||
|
|
||||||
@pytest.fixture(scope="session")
|
@pytest.fixture(scope="session")
|
||||||
def sales_data_real() -> pd.DataFrame:
|
def sales_data_real() -> pd.DataFrame:
|
||||||
pwd = Path.cwd()
|
pwd = Path.cwd()
|
||||||
|
|||||||
40
tests/test_config.py
Normal file
40
tests/test_config.py
Normal file
@@ -0,0 +1,40 @@
|
|||||||
|
import tomllib
|
||||||
|
|
||||||
|
import tomli_w
|
||||||
|
|
||||||
|
from delta_barth import config
|
||||||
|
|
||||||
|
|
||||||
|
def test_CfgLoader_Init(pth_cfg):
|
||||||
|
loader = config.LazyCfgLoader(pth_cfg)
|
||||||
|
|
||||||
|
assert loader.path == pth_cfg
|
||||||
|
assert loader._cfg is None
|
||||||
|
|
||||||
|
|
||||||
|
def test_CfgLoader_Get(pth_cfg):
|
||||||
|
loader = config.LazyCfgLoader(pth_cfg)
|
||||||
|
|
||||||
|
parsed_cfg = loader.get()
|
||||||
|
assert isinstance(parsed_cfg, config.Config)
|
||||||
|
assert parsed_cfg.forecast.threshold_month_data_points == 28
|
||||||
|
|
||||||
|
|
||||||
|
def test_CfgLoader_Reload(pth_cfg):
|
||||||
|
loader = config.LazyCfgLoader(pth_cfg)
|
||||||
|
|
||||||
|
parsed_cfg = loader.get()
|
||||||
|
assert isinstance(parsed_cfg, config.Config)
|
||||||
|
assert parsed_cfg.forecast.threshold_month_data_points == 28
|
||||||
|
# modify config and reload
|
||||||
|
with open(pth_cfg, "rb") as file:
|
||||||
|
cfg_data = tomllib.load(file)
|
||||||
|
cfg_data["forecast"]["threshold_month_data_points"] = 30
|
||||||
|
with open(pth_cfg, "wb") as file:
|
||||||
|
tomli_w.dump(cfg_data, file)
|
||||||
|
|
||||||
|
assert parsed_cfg.forecast.threshold_month_data_points == 28
|
||||||
|
loader.reload()
|
||||||
|
parsed_cfg = loader.get()
|
||||||
|
assert isinstance(parsed_cfg, config.Config)
|
||||||
|
assert parsed_cfg.forecast.threshold_month_data_points == 30
|
||||||
@@ -1,17 +1,15 @@
|
|||||||
import importlib
|
|
||||||
import json
|
import json
|
||||||
from unittest.mock import patch
|
from unittest.mock import patch
|
||||||
|
|
||||||
import pytest
|
import pytest
|
||||||
import sqlalchemy as sql
|
import sqlalchemy as sql
|
||||||
|
|
||||||
import delta_barth.pipelines
|
|
||||||
from delta_barth import databases as db
|
from delta_barth import databases as db
|
||||||
from delta_barth import pipelines as pl
|
from delta_barth import pipelines as pl
|
||||||
from delta_barth.errors import STATUS_HANDLER
|
from delta_barth.errors import STATUS_HANDLER
|
||||||
|
|
||||||
|
|
||||||
def test_write_performance_metrics(session):
|
def test_write_performance_metrics_Success(session):
|
||||||
pipe_name = "test_pipe"
|
pipe_name = "test_pipe"
|
||||||
t_start = 20_000_000_000
|
t_start = 20_000_000_000
|
||||||
t_end = 30_000_000_000
|
t_end = 30_000_000_000
|
||||||
@@ -33,14 +31,31 @@ def test_write_performance_metrics(session):
|
|||||||
assert metrics.execution_duration == 10
|
assert metrics.execution_duration == 10
|
||||||
|
|
||||||
|
|
||||||
@patch("delta_barth.analysis.forecast.SALES_BASE_NUM_DATAPOINTS_MONTHS", 1)
|
def test_write_performance_metrics_FailStartingTime(session):
|
||||||
def test_sales_prognosis_pipeline(exmpl_api_sales_prognosis_resp, session):
|
pipe_name = "test_pipe"
|
||||||
with patch(
|
t_start = 30_000_000_000
|
||||||
"delta_barth.analysis.forecast.get_sales_prognosis_data",
|
t_end = 20_000_000_000
|
||||||
) as mock:
|
|
||||||
mock.return_value = (exmpl_api_sales_prognosis_resp, STATUS_HANDLER.SUCCESS)
|
with patch("delta_barth.pipelines.SESSION", session):
|
||||||
with patch("delta_barth.pipelines.SESSION", session):
|
with pytest.raises(ValueError):
|
||||||
json_export = pl.pipeline_sales_forecast(None, None)
|
_ = pl._write_performance_metrics(
|
||||||
|
pipeline_name=pipe_name,
|
||||||
|
time_start=t_start,
|
||||||
|
time_end=t_end,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
def test_sales_prognosis_pipeline(exmpl_api_sales_prognosis_resp, session, monkeypatch):
|
||||||
|
with (
|
||||||
|
patch(
|
||||||
|
"delta_barth.analysis.forecast.get_sales_prognosis_data",
|
||||||
|
) as get_mock,
|
||||||
|
patch("delta_barth.pipelines.SESSION", session),
|
||||||
|
patch("delta_barth.analysis.forecast.SESSION", session) as sess_mock,
|
||||||
|
):
|
||||||
|
get_mock.return_value = (exmpl_api_sales_prognosis_resp, STATUS_HANDLER.SUCCESS)
|
||||||
|
sess_mock.cfg.forecast.threshold_month_data_points = 1
|
||||||
|
json_export = pl.pipeline_sales_forecast(None, None)
|
||||||
|
|
||||||
assert isinstance(json_export, str)
|
assert isinstance(json_export, str)
|
||||||
parsed_resp = json.loads(json_export)
|
parsed_resp = json.loads(json_export)
|
||||||
@@ -59,7 +74,6 @@ def test_sales_prognosis_pipeline(exmpl_api_sales_prognosis_resp, session):
|
|||||||
assert metrics.execution_duration > 0
|
assert metrics.execution_duration > 0
|
||||||
|
|
||||||
|
|
||||||
@pytest.mark.new
|
|
||||||
def test_sales_prognosis_pipeline_dummy(session):
|
def test_sales_prognosis_pipeline_dummy(session):
|
||||||
with patch("delta_barth.pipelines.SESSION", session):
|
with patch("delta_barth.pipelines.SESSION", session):
|
||||||
json_export = pl.pipeline_sales_forecast_dummy(None, None)
|
json_export = pl.pipeline_sales_forecast_dummy(None, None)
|
||||||
|
|||||||
@@ -62,8 +62,27 @@ def test_session_setup_db_management(tmp_path):
|
|||||||
assert db_path.exists()
|
assert db_path.exists()
|
||||||
|
|
||||||
|
|
||||||
|
def test_session_setup_config(tmp_path, pth_cfg):
|
||||||
|
str_path = str(tmp_path)
|
||||||
|
foldername: str = "cfg_test"
|
||||||
|
target_cfg_dir = tmp_path / foldername
|
||||||
|
session = delta_barth.session.Session(HTTP_BASE_CONTENT_HEADERS, cfg_folder=foldername)
|
||||||
|
session.set_data_path(str_path)
|
||||||
|
cfg_path = session.cfg_path
|
||||||
|
assert cfg_path.parent.exists()
|
||||||
|
assert cfg_path.parent == target_cfg_dir
|
||||||
|
assert not cfg_path.exists()
|
||||||
|
session.setup()
|
||||||
|
cfg_path2 = session.cfg_path
|
||||||
|
assert cfg_path2 == cfg_path
|
||||||
|
assert session._cfg is not None
|
||||||
|
assert cfg_path.exists()
|
||||||
|
assert session.cfg.forecast.threshold_month_data_points == 28
|
||||||
|
|
||||||
|
|
||||||
@patch("delta_barth.logging.ENABLE_LOGGING", True)
|
@patch("delta_barth.logging.ENABLE_LOGGING", True)
|
||||||
@patch("delta_barth.logging.LOGGING_TO_FILE", True)
|
@patch("delta_barth.logging.LOGGING_TO_FILE", True)
|
||||||
|
@patch("delta_barth.logging.LOGGING_TO_STDERR", True)
|
||||||
def test_session_setup_logging(tmp_path):
|
def test_session_setup_logging(tmp_path):
|
||||||
str_path = str(tmp_path)
|
str_path = str(tmp_path)
|
||||||
foldername: str = "logging_test"
|
foldername: str = "logging_test"
|
||||||
|
|||||||
Reference in New Issue
Block a user