added feature mapping

This commit is contained in:
Florian Förster 2025-02-26 16:24:34 +01:00
parent 96186110a6
commit 0509ba79c8
4 changed files with 100 additions and 0 deletions

View File

@ -0,0 +1,41 @@
from __future__ import annotations
from collections.abc import Mapping, Set
from typing import TYPE_CHECKING
from delta_barth.errors import FeaturesMissingError
if TYPE_CHECKING:
import pandas as pd
def check_needed_features(
data: pd.DataFrame,
features: Set,
) -> None:
data_feats = set(data.columns)
missing_features = features - data_feats
if missing_features:
raise FeaturesMissingError(
f"The datset does not contain all needed features: "
f"Missing features are: {missing_features}"
)
def map_features_to_targets(
data: pd.DataFrame,
feature_map: Mapping[str, str],
) -> pd.DataFrame:
data_feats = data.columns
mapped_feats: list[str] = []
for feat in data_feats:
if feat in feature_map:
mapped_feats.append(feature_map[feat])
else:
mapped_feats.append(feat)
data.columns = mapped_feats
return data

View File

@ -3,6 +3,7 @@ from typing import Final
from delta_barth.types import CurrentConnection, HttpContentHeaders from delta_barth.types import CurrentConnection, HttpContentHeaders
# ** API connection management
HTTP_BASE_CONTENT_HEADERS: Final[HttpContentHeaders] = { HTTP_BASE_CONTENT_HEADERS: Final[HttpContentHeaders] = {
"Content-type": "application/json", "Content-type": "application/json",
"Accept": "application/json", "Accept": "application/json",
@ -15,3 +16,23 @@ HTTP_CURRENT_CONNECTION: Final[CurrentConnection] = CurrentConnection(
class KnownApiErrorCodes(enum.Enum): class KnownApiErrorCodes(enum.Enum):
COMMON = frozenset((400, 401, 409, 500)) COMMON = frozenset((400, 401, 409, 500))
# ** API response parsing
# ** column mapping [API-Response --> Target-Features]
COL_MAP_SALES_PROGNOSIS: Final[dict[str, str]] = {
"artikelId": "artikel_refid",
"firmaId": "firma_refid",
"betrag": "betrag",
"menge": "menge",
"buchungsDatum": "buchungs_datum",
}
FEATURES_SALES_PROGNOSIS: Final[frozenset[str]] = frozenset(
(
"firma_refid",
"beleg_typ",
"betrag",
"vorgang_refid",
"buchungs_datum",
)
)

View File

@ -8,3 +8,7 @@ class UnknownApiErrorCode(Exception):
class ApiConnectionError(Exception): class ApiConnectionError(Exception):
"""exception raised if an established connection is needed, but the current session is not connected""" """exception raised if an established connection is needed, but the current session is not connected"""
class FeaturesMissingError(Exception):
"""exception raised if needed features are missing"""

View File

@ -0,0 +1,34 @@
import pandas as pd
import pytest
from delta_barth.analysis import parse
from delta_barth.errors import FeaturesMissingError
def test_check_needed_features():
target_features = set(("feat1", "feat2", "feat3"))
data = pd.DataFrame(
data=[[1, 2, 3, 4, 5]], columns=["feat1", "feat2", "feat3", "feat4", "feat5"]
)
parse.check_needed_features(data, target_features)
data = pd.DataFrame(
data=[[1, 2, 3, 4, 5]], columns=["featX", "feat2", "feat3", "feat4", "feat5"]
)
with pytest.raises(FeaturesMissingError):
parse.check_needed_features(data, target_features)
def test_map_features_to_targets():
feature_map = dict(feat1="feat10", feat2="feat20", feat5="feat50")
data = pd.DataFrame(
data=[[1, 2, 3, 4, 5]], columns=["feat1", "feat2", "feat3", "feat4", "feat5"]
)
data = parse.map_features_to_targets(data, feature_map)
assert "feat10" in data.columns
assert "feat20" in data.columns
assert "feat50" in data.columns
assert "feat3" in data.columns
assert "feat4" in data.columns
assert "feat1" not in data.columns
assert "feat2" not in data.columns
assert "feat5" not in data.columns