diff --git a/src/delta_barth/analysis/parse.py b/src/delta_barth/analysis/parse.py new file mode 100644 index 0000000..7693d92 --- /dev/null +++ b/src/delta_barth/analysis/parse.py @@ -0,0 +1,41 @@ +from __future__ import annotations + +from collections.abc import Mapping, Set +from typing import TYPE_CHECKING + +from delta_barth.errors import FeaturesMissingError + +if TYPE_CHECKING: + import pandas as pd + + +def check_needed_features( + data: pd.DataFrame, + features: Set, +) -> None: + data_feats = set(data.columns) + missing_features = features - data_feats + + if missing_features: + raise FeaturesMissingError( + f"The datset does not contain all needed features: " + f"Missing features are: {missing_features}" + ) + + +def map_features_to_targets( + data: pd.DataFrame, + feature_map: Mapping[str, str], +) -> pd.DataFrame: + data_feats = data.columns + mapped_feats: list[str] = [] + + for feat in data_feats: + if feat in feature_map: + mapped_feats.append(feature_map[feat]) + else: + mapped_feats.append(feat) + + data.columns = mapped_feats + + return data diff --git a/src/delta_barth/constants.py b/src/delta_barth/constants.py index 81c1c4b..543ffa5 100644 --- a/src/delta_barth/constants.py +++ b/src/delta_barth/constants.py @@ -3,6 +3,7 @@ from typing import Final from delta_barth.types import CurrentConnection, HttpContentHeaders +# ** API connection management HTTP_BASE_CONTENT_HEADERS: Final[HttpContentHeaders] = { "Content-type": "application/json", "Accept": "application/json", @@ -15,3 +16,23 @@ HTTP_CURRENT_CONNECTION: Final[CurrentConnection] = CurrentConnection( class KnownApiErrorCodes(enum.Enum): COMMON = frozenset((400, 401, 409, 500)) + + +# ** API response parsing +# ** column mapping [API-Response --> Target-Features] +COL_MAP_SALES_PROGNOSIS: Final[dict[str, str]] = { + "artikelId": "artikel_refid", + "firmaId": "firma_refid", + "betrag": "betrag", + "menge": "menge", + "buchungsDatum": "buchungs_datum", +} +FEATURES_SALES_PROGNOSIS: Final[frozenset[str]] = frozenset( + ( + "firma_refid", + "beleg_typ", + "betrag", + "vorgang_refid", + "buchungs_datum", + ) +) diff --git a/src/delta_barth/errors.py b/src/delta_barth/errors.py index f46b561..430a192 100644 --- a/src/delta_barth/errors.py +++ b/src/delta_barth/errors.py @@ -8,3 +8,7 @@ class UnknownApiErrorCode(Exception): class ApiConnectionError(Exception): """exception raised if an established connection is needed, but the current session is not connected""" + + +class FeaturesMissingError(Exception): + """exception raised if needed features are missing""" diff --git a/tests/analysis/test_parse.py b/tests/analysis/test_parse.py new file mode 100644 index 0000000..ad92e19 --- /dev/null +++ b/tests/analysis/test_parse.py @@ -0,0 +1,34 @@ +import pandas as pd +import pytest + +from delta_barth.analysis import parse +from delta_barth.errors import FeaturesMissingError + + +def test_check_needed_features(): + target_features = set(("feat1", "feat2", "feat3")) + data = pd.DataFrame( + data=[[1, 2, 3, 4, 5]], columns=["feat1", "feat2", "feat3", "feat4", "feat5"] + ) + parse.check_needed_features(data, target_features) + data = pd.DataFrame( + data=[[1, 2, 3, 4, 5]], columns=["featX", "feat2", "feat3", "feat4", "feat5"] + ) + with pytest.raises(FeaturesMissingError): + parse.check_needed_features(data, target_features) + + +def test_map_features_to_targets(): + feature_map = dict(feat1="feat10", feat2="feat20", feat5="feat50") + data = pd.DataFrame( + data=[[1, 2, 3, 4, 5]], columns=["feat1", "feat2", "feat3", "feat4", "feat5"] + ) + data = parse.map_features_to_targets(data, feature_map) + assert "feat10" in data.columns + assert "feat20" in data.columns + assert "feat50" in data.columns + assert "feat3" in data.columns + assert "feat4" in data.columns + assert "feat1" not in data.columns + assert "feat2" not in data.columns + assert "feat5" not in data.columns