add preprocessing steps

This commit is contained in:
2025-02-27 13:13:29 +01:00
parent 83d0691d67
commit 5e5486fe53
6 changed files with 136 additions and 8 deletions

Binary file not shown.

View File

@@ -1,3 +1,5 @@
import pytest
from delta_barth.analysis import forecast as fc
@@ -15,3 +17,44 @@ def test_sales_per_customer_too_few_data_points(sales_data):
assert err == 1
assert res is None
def test_parse_api_resp_to_df(exmpl_api_sales_prognosis_resp):
resp = exmpl_api_sales_prognosis_resp
df = fc.parse_api_resp_to_df(resp)
features = set(
(
"artikelId",
"warengruppeId",
"firmaId",
"betrag",
"menge",
"buchungsDatum",
)
)
assert all(col in features for col in df.columns)
def test_preprocess_sales_per_customer(exmpl_api_sales_prognosis_resp):
resp = exmpl_api_sales_prognosis_resp
feat_mapping: dict[str, str] = {
"artikelId": "artikel_refid",
"firmaId": "firma_refid",
"betrag": "betrag",
"menge": "menge",
"buchungsDatum": "buchungs_datum",
}
target_features: frozenset[str] = frozenset(
(
"firma_refid",
"betrag",
"buchungs_datum",
)
)
df = fc.preprocess_sales_per_customer(
resp,
feature_map=feat_mapping,
target_features=target_features,
)
assert len(df.columns) == 5
assert any(feat not in df.columns for feat in feat_mapping.keys())

View File

@@ -1,7 +1,7 @@
import pandas as pd
import pytest
from delta_barth.analysis import parse
from delta_barth.analysis import forecast, parse
from delta_barth.errors import FeaturesMissingError
@@ -10,12 +10,12 @@ def test_check_needed_features():
data = pd.DataFrame(
data=[[1, 2, 3, 4, 5]], columns=["feat1", "feat2", "feat3", "feat4", "feat5"]
)
parse.check_needed_features(data, target_features)
parse._check_needed_features(data, target_features)
data = pd.DataFrame(
data=[[1, 2, 3, 4, 5]], columns=["featX", "feat2", "feat3", "feat4", "feat5"]
)
with pytest.raises(FeaturesMissingError):
parse.check_needed_features(data, target_features)
parse._check_needed_features(data, target_features)
def test_map_features_to_targets():
@@ -23,7 +23,7 @@ def test_map_features_to_targets():
data = pd.DataFrame(
data=[[1, 2, 3, 4, 5]], columns=["feat1", "feat2", "feat3", "feat4", "feat5"]
)
data = parse.map_features_to_targets(data, feature_map)
data = parse._map_features_to_targets(data, feature_map)
assert "feat10" in data.columns
assert "feat20" in data.columns
assert "feat50" in data.columns
@@ -32,3 +32,28 @@ def test_map_features_to_targets():
assert "feat1" not in data.columns
assert "feat2" not in data.columns
assert "feat5" not in data.columns
def test_preprocess_features(exmpl_api_sales_prognosis_resp):
resp = exmpl_api_sales_prognosis_resp
df = forecast.parse_api_resp_to_df(resp)
feat_mapping: dict[str, str] = {
"artikelId": "artikel_refid",
"firmaId": "firma_refid",
"betrag": "betrag",
"menge": "menge",
"buchungsDatum": "buchungs_datum",
}
target_features: frozenset[str] = frozenset(
(
"firma_refid",
"betrag",
"buchungs_datum",
)
)
assert all(feat in df.columns for feat in feat_mapping.keys())
data = parse.preprocess_features(df, feat_mapping, target_features)
assert len(data.columns) == len(df.columns)
assert (data.columns != df.columns).any()
assert any(feat not in data.columns for feat in feat_mapping.keys())

View File

@@ -1,10 +1,16 @@
from __future__ import annotations
import pickle
import tomllib
from pathlib import Path
from typing import Any, cast
from typing import TYPE_CHECKING, Any, cast
import pandas as pd
import pytest
if TYPE_CHECKING:
from delta_barth.api.common import SalesPrognosisResponse
@pytest.fixture(scope="session")
def credentials() -> dict[str, str]:
@@ -56,3 +62,16 @@ def sales_data() -> pd.DataFrame:
data["buchungs_datum"] = pd.to_datetime(data["buchungs_datum"])
return data
@pytest.fixture(scope="session")
def exmpl_api_sales_prognosis_resp() -> SalesPrognosisResponse:
pwd = Path.cwd()
assert "barth" in pwd.parent.name.lower(), "not in project root directory"
data_pth = pwd / "./tests/_test_data/exmp_sales_prognosis_resp.pkl"
assert data_pth.exists(), "file to API sales data not found"
with open(data_pth, "rb") as file:
data = cast("SalesPrognosisResponse", pickle.load(file))
return data