54 lines
1.2 KiB
Python
54 lines
1.2 KiB
Python
from __future__ import annotations
|
|
|
|
from collections.abc import Mapping, Set
|
|
from typing import TYPE_CHECKING
|
|
|
|
from delta_barth.errors import FeaturesMissingError
|
|
|
|
if TYPE_CHECKING:
|
|
import pandas as pd
|
|
|
|
|
|
def _check_needed_features(
|
|
data: pd.DataFrame,
|
|
features: Set[str],
|
|
) -> None:
|
|
data_feats = set(data.columns)
|
|
missing_features = features - data_feats
|
|
|
|
if missing_features:
|
|
raise FeaturesMissingError(
|
|
f"The datset does not contain all needed features: "
|
|
f"Missing features are: {missing_features}"
|
|
)
|
|
|
|
|
|
def _map_features_to_targets(
|
|
data: pd.DataFrame,
|
|
feature_map: Mapping[str, str],
|
|
) -> pd.DataFrame:
|
|
data = data.copy()
|
|
data_feats = data.columns
|
|
mapped_feats: list[str] = []
|
|
|
|
for feat in data_feats:
|
|
if feat in feature_map:
|
|
mapped_feats.append(feature_map[feat])
|
|
else:
|
|
mapped_feats.append(feat)
|
|
|
|
data.columns = mapped_feats
|
|
|
|
return data
|
|
|
|
|
|
def preprocess_features(
|
|
data: pd.DataFrame,
|
|
feature_map: Mapping[str, str],
|
|
target_features: Set[str],
|
|
) -> pd.DataFrame:
|
|
data = _map_features_to_targets(data, feature_map)
|
|
_check_needed_features(data, target_features)
|
|
|
|
return data
|