major overhaul for integrated pipeline format

This commit is contained in:
2025-03-12 16:02:00 +01:00
parent 09b64d7c4a
commit b4d853e3ba
4 changed files with 478 additions and 81 deletions

Binary file not shown.

View File

@@ -1,25 +1,107 @@
import importlib
from datetime import datetime as Datetime
from pathlib import Path
from unittest.mock import patch
import pandas as pd
import pytest
from pydantic import ValidationError
from delta_barth.analysis import forecast as fc
from delta_barth.errors import STATUS_HANDLER
from delta_barth.types import DualDict, PipeResult
def test_sales_per_customer_success(sales_data):
customer_id = 1133
res = fc.sales_per_customer(sales_data, customer_id)
assert res.status.code == 0
assert res.data is not None
@pytest.fixture(scope="function")
def feature_map() -> DualDict[str, str]:
return DualDict(
artikelId="artikel_refid",
firmaId="firma_refid",
betrag="betrag",
menge="menge",
buchungsDatum="buchungs_datum",
)
def test_sales_per_customer_too_few_data_points(sales_data):
customer_id = 1000
res = fc.sales_per_customer(sales_data, customer_id)
@pytest.fixture(scope="module")
def target_features() -> frozenset[str]:
return frozenset(
(
"firma_refid",
"betrag",
"buchungs_datum",
)
)
assert res.status.code == 1
assert res.data is None
@pytest.fixture(scope="function")
def valid_df() -> pd.DataFrame:
data = {
"artikelId": [1, 2, 3],
"warengruppeId": [1, 2, 3],
"firmaId": [100, 200, 300],
"betrag": [1200.25, 1500.50, 1750.75],
"menge": [100, 200, 300],
"buchungsDatum": [Datetime(2024, 1, 1), Datetime(2024, 6, 1), Datetime(2024, 10, 26)],
}
return pd.DataFrame(data)
@pytest.fixture(scope="function")
def invalid_df() -> pd.DataFrame:
data = {
"artikelId": [1, 2, 3],
"warengruppeId": [1, 2, 3],
"firmaId": [100, 200, 300],
"betrag": [1200.25, 1500.50, 1750.75],
"menge": [100, 200, 300],
"buchungsDatum": ["test", "test2", "test3"],
}
return pd.DataFrame(data)
@pytest.fixture(scope="function")
def valid_results() -> pd.DataFrame:
data = {
"jahr": [2023, 2023, 2024],
"monat": [1, 2, 3],
"betrag": [100, 200, 300],
"vorhersage": [1200.25, 1500.50, 1750.75],
}
return pd.DataFrame(data)
@pytest.fixture(scope="function")
def invalid_results() -> pd.DataFrame:
data = {
"jahr": [2023, 2023, 2024],
"monat": [1, 2, 3],
"betrag": [100, 200, 300],
"vorhersage": ["test", "test2", "test3"],
}
return pd.DataFrame(data)
@pytest.fixture(scope="function")
def sales_data_real_preproc(sales_data_real, feature_map) -> pd.DataFrame:
data = sales_data_real.copy()
data_feats = data.columns
mapped_feats: list[str] = []
for feat in data_feats:
if feat in feature_map:
mapped_feats.append(feature_map[feat])
else:
mapped_feats.append(feat)
data.columns = mapped_feats
return data
def test_parse_api_resp_to_df(exmpl_api_sales_prognosis_resp):
resp = exmpl_api_sales_prognosis_resp
df = fc.parse_api_resp_to_df(resp)
df = fc._parse_api_resp_to_df(resp)
features = set(
(
"artikelId",
@@ -33,26 +115,131 @@ def test_parse_api_resp_to_df(exmpl_api_sales_prognosis_resp):
assert all(col in features for col in df.columns)
def test_preprocess_sales_per_customer(exmpl_api_sales_prognosis_resp):
def test_parse_df_to_api_resp_ValidData(valid_df):
ret = fc._parse_df_to_api_resp(valid_df)
assert len(ret.daten) > 0
def test_parse_df_to_api_resp_InvalidData(invalid_df):
with pytest.raises(ValidationError):
_ = fc._parse_df_to_api_resp(invalid_df)
def test_parse_df_to_results_ValidData(valid_results):
ret = fc._parse_df_to_results(valid_results)
assert len(ret.daten) > 0
def test_parse_df_to_results_InvalidData(invalid_results):
with pytest.raises(ValidationError):
_ = fc._parse_df_to_results(invalid_results)
def test_preprocess_sales_per_customer_Success(
exmpl_api_sales_prognosis_resp,
feature_map,
target_features,
):
resp = exmpl_api_sales_prognosis_resp
feat_mapping: dict[str, str] = {
"artikelId": "artikel_refid",
"firmaId": "firma_refid",
"betrag": "betrag",
"menge": "menge",
"buchungsDatum": "buchungs_datum",
}
target_features: frozenset[str] = frozenset(
(
"firma_refid",
"betrag",
"buchungs_datum",
)
)
df = fc.preprocess_sales_per_customer(
pipe = fc._preprocess_sales_per_customer(
resp,
feature_map=feat_mapping,
feature_map=feature_map,
target_features=target_features,
)
assert pipe.status == STATUS_HANDLER.SUCCESS
assert pipe.data is not None
df = pipe.data
assert len(df.columns) == 5
assert any(feat not in df.columns for feat in feat_mapping.keys())
assert any(feat not in df.columns for feat in feature_map.keys())
def test_preprocess_sales_per_customer_FailOnTargetFeature(
exmpl_api_sales_prognosis_resp,
feature_map,
target_features,
):
resp = exmpl_api_sales_prognosis_resp
target_features = {"not_known_feature", "test2"}
pipe = fc._preprocess_sales_per_customer(
resp,
feature_map=feature_map,
target_features=target_features,
)
assert pipe.status.code != 0
assert pipe.data is None
assert pipe.results is None
def test_sales_per_customer_Success(sales_data_real_preproc):
data = sales_data_real_preproc.copy()
# fc._preprocess_sales_per_customer()
pipe = PipeResult(data, STATUS_HANDLER.SUCCESS)
pipe = fc._process_sales_per_customer(pipe)
assert pipe.status == STATUS_HANDLER.SUCCESS
assert pipe.data is not None
assert pipe.results is None
def test_sales_per_customer_FailTooFewPoints(sales_data_real_preproc):
data = sales_data_real_preproc.copy()
data = data.iloc[:20, :]
# fc._preprocess_sales_per_customer()
pipe = PipeResult(data, STATUS_HANDLER.SUCCESS)
pipe = fc._process_sales_per_customer(pipe)
assert pipe.status != STATUS_HANDLER.SUCCESS
assert pipe.status == STATUS_HANDLER.pipe_states.TOO_FEW_POINTS
assert pipe.data is None
assert pipe.results is None
def test_postprocess_sales_per_customer_Success(
valid_results,
):
data = valid_results
pipe = PipeResult(data, STATUS_HANDLER.SUCCESS)
pipe = fc._postprocess_sales_per_customer(
pipe,
feature_map=DualDict(),
)
assert pipe.status == STATUS_HANDLER.SUCCESS
assert pipe.data is None
assert pipe.results is not None
def test_postprocess_sales_per_customer_FailValidation(
invalid_results,
):
data = invalid_results
pipe = PipeResult(data, STATUS_HANDLER.SUCCESS)
pipe = fc._postprocess_sales_per_customer(
pipe,
feature_map=DualDict(),
)
assert pipe.status != STATUS_HANDLER.SUCCESS
assert pipe.data is None
assert pipe.results is None
assert "ValidationError" in pipe.status.description
@pytest.mark.new
def test_sales_prognosis_pipeline(monkeypatch, exmpl_api_sales_prognosis_resp):
def mock_request(*args, **kwargs):
return exmpl_api_sales_prognosis_resp, STATUS_HANDLER.SUCCESS
import delta_barth.api.requests
monkeypatch.setattr(delta_barth.api.requests, "get_sales_prognosis_data", mock_request)
importlib.reload(delta_barth.api.requests)
with patch(
"delta_barth.api.requests.get_sales_prognosis_data",
new=mock_request,
):
importlib.reload(delta_barth.analysis.forecast) # type: ignore
result = fc.pipeline(None) # type: ignore
assert result.status == STATUS_HANDLER.SUCCESS
assert len(result.response.daten) > 0

View File

@@ -45,7 +45,7 @@ def _cvt_str_ts(value: str) -> Any:
@pytest.fixture(scope="session")
def sales_data() -> pd.DataFrame:
def sales_data_db_export() -> pd.DataFrame:
pwd = Path.cwd()
assert "barth" in pwd.parent.name.lower(), "not in project root directory"
data_pth = pwd / "./tests/_test_data/swm_f_umsatz_fakt.csv"
@@ -63,6 +63,22 @@ def sales_data() -> pd.DataFrame:
return data
@pytest.fixture(scope="session")
def sales_data_real() -> pd.DataFrame:
pwd = Path.cwd()
assert "barth" in pwd.parent.name.lower(), "not in project root directory"
data_pth = pwd / "./tests/_test_data/exmp_sales_prognosis_resp.json"
assert data_pth.exists(), "file to API sales data not found"
with open(data_pth, "r") as file:
data = json.load(file)
parsed = SalesPrognosisResponse(**data)
data = parsed.model_dump()["daten"]
return pd.DataFrame(data)
@pytest.fixture(scope="session")
def exmpl_api_sales_prognosis_resp() -> SalesPrognosisResponse:
pwd = Path.cwd()
@@ -74,3 +90,13 @@ def exmpl_api_sales_prognosis_resp() -> SalesPrognosisResponse:
data = json.load(file)
return SalesPrognosisResponse(**data)
@pytest.fixture(scope="session")
def exmpl_api_sales_prognosis_output() -> pd.DataFrame:
pwd = Path.cwd()
assert "barth" in pwd.parent.name.lower(), "not in project root directory"
data_pth = pwd / "./tests/_test_data/exmp_sales_prognosis_ouput.pkl"
assert data_pth.exists(), "file to API sales data not found"
return pd.read_pickle(data_pth)