added feature mapping
This commit is contained in:
parent
96186110a6
commit
0509ba79c8
41
src/delta_barth/analysis/parse.py
Normal file
41
src/delta_barth/analysis/parse.py
Normal file
@ -0,0 +1,41 @@
|
|||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
from collections.abc import Mapping, Set
|
||||||
|
from typing import TYPE_CHECKING
|
||||||
|
|
||||||
|
from delta_barth.errors import FeaturesMissingError
|
||||||
|
|
||||||
|
if TYPE_CHECKING:
|
||||||
|
import pandas as pd
|
||||||
|
|
||||||
|
|
||||||
|
def check_needed_features(
|
||||||
|
data: pd.DataFrame,
|
||||||
|
features: Set,
|
||||||
|
) -> None:
|
||||||
|
data_feats = set(data.columns)
|
||||||
|
missing_features = features - data_feats
|
||||||
|
|
||||||
|
if missing_features:
|
||||||
|
raise FeaturesMissingError(
|
||||||
|
f"The datset does not contain all needed features: "
|
||||||
|
f"Missing features are: {missing_features}"
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
def map_features_to_targets(
|
||||||
|
data: pd.DataFrame,
|
||||||
|
feature_map: Mapping[str, str],
|
||||||
|
) -> pd.DataFrame:
|
||||||
|
data_feats = data.columns
|
||||||
|
mapped_feats: list[str] = []
|
||||||
|
|
||||||
|
for feat in data_feats:
|
||||||
|
if feat in feature_map:
|
||||||
|
mapped_feats.append(feature_map[feat])
|
||||||
|
else:
|
||||||
|
mapped_feats.append(feat)
|
||||||
|
|
||||||
|
data.columns = mapped_feats
|
||||||
|
|
||||||
|
return data
|
||||||
@ -3,6 +3,7 @@ from typing import Final
|
|||||||
|
|
||||||
from delta_barth.types import CurrentConnection, HttpContentHeaders
|
from delta_barth.types import CurrentConnection, HttpContentHeaders
|
||||||
|
|
||||||
|
# ** API connection management
|
||||||
HTTP_BASE_CONTENT_HEADERS: Final[HttpContentHeaders] = {
|
HTTP_BASE_CONTENT_HEADERS: Final[HttpContentHeaders] = {
|
||||||
"Content-type": "application/json",
|
"Content-type": "application/json",
|
||||||
"Accept": "application/json",
|
"Accept": "application/json",
|
||||||
@ -15,3 +16,23 @@ HTTP_CURRENT_CONNECTION: Final[CurrentConnection] = CurrentConnection(
|
|||||||
|
|
||||||
class KnownApiErrorCodes(enum.Enum):
|
class KnownApiErrorCodes(enum.Enum):
|
||||||
COMMON = frozenset((400, 401, 409, 500))
|
COMMON = frozenset((400, 401, 409, 500))
|
||||||
|
|
||||||
|
|
||||||
|
# ** API response parsing
|
||||||
|
# ** column mapping [API-Response --> Target-Features]
|
||||||
|
COL_MAP_SALES_PROGNOSIS: Final[dict[str, str]] = {
|
||||||
|
"artikelId": "artikel_refid",
|
||||||
|
"firmaId": "firma_refid",
|
||||||
|
"betrag": "betrag",
|
||||||
|
"menge": "menge",
|
||||||
|
"buchungsDatum": "buchungs_datum",
|
||||||
|
}
|
||||||
|
FEATURES_SALES_PROGNOSIS: Final[frozenset[str]] = frozenset(
|
||||||
|
(
|
||||||
|
"firma_refid",
|
||||||
|
"beleg_typ",
|
||||||
|
"betrag",
|
||||||
|
"vorgang_refid",
|
||||||
|
"buchungs_datum",
|
||||||
|
)
|
||||||
|
)
|
||||||
|
|||||||
@ -8,3 +8,7 @@ class UnknownApiErrorCode(Exception):
|
|||||||
|
|
||||||
class ApiConnectionError(Exception):
|
class ApiConnectionError(Exception):
|
||||||
"""exception raised if an established connection is needed, but the current session is not connected"""
|
"""exception raised if an established connection is needed, but the current session is not connected"""
|
||||||
|
|
||||||
|
|
||||||
|
class FeaturesMissingError(Exception):
|
||||||
|
"""exception raised if needed features are missing"""
|
||||||
|
|||||||
34
tests/analysis/test_parse.py
Normal file
34
tests/analysis/test_parse.py
Normal file
@ -0,0 +1,34 @@
|
|||||||
|
import pandas as pd
|
||||||
|
import pytest
|
||||||
|
|
||||||
|
from delta_barth.analysis import parse
|
||||||
|
from delta_barth.errors import FeaturesMissingError
|
||||||
|
|
||||||
|
|
||||||
|
def test_check_needed_features():
|
||||||
|
target_features = set(("feat1", "feat2", "feat3"))
|
||||||
|
data = pd.DataFrame(
|
||||||
|
data=[[1, 2, 3, 4, 5]], columns=["feat1", "feat2", "feat3", "feat4", "feat5"]
|
||||||
|
)
|
||||||
|
parse.check_needed_features(data, target_features)
|
||||||
|
data = pd.DataFrame(
|
||||||
|
data=[[1, 2, 3, 4, 5]], columns=["featX", "feat2", "feat3", "feat4", "feat5"]
|
||||||
|
)
|
||||||
|
with pytest.raises(FeaturesMissingError):
|
||||||
|
parse.check_needed_features(data, target_features)
|
||||||
|
|
||||||
|
|
||||||
|
def test_map_features_to_targets():
|
||||||
|
feature_map = dict(feat1="feat10", feat2="feat20", feat5="feat50")
|
||||||
|
data = pd.DataFrame(
|
||||||
|
data=[[1, 2, 3, 4, 5]], columns=["feat1", "feat2", "feat3", "feat4", "feat5"]
|
||||||
|
)
|
||||||
|
data = parse.map_features_to_targets(data, feature_map)
|
||||||
|
assert "feat10" in data.columns
|
||||||
|
assert "feat20" in data.columns
|
||||||
|
assert "feat50" in data.columns
|
||||||
|
assert "feat3" in data.columns
|
||||||
|
assert "feat4" in data.columns
|
||||||
|
assert "feat1" not in data.columns
|
||||||
|
assert "feat2" not in data.columns
|
||||||
|
assert "feat5" not in data.columns
|
||||||
Loading…
x
Reference in New Issue
Block a user