from __future__ import annotations from collections.abc import Mapping, Set from typing import TYPE_CHECKING from delta_barth.errors import FeaturesMissingError if TYPE_CHECKING: import pandas as pd def _check_needed_features( data: pd.DataFrame, features: Set[str], ) -> None: data_feats = set(data.columns) missing_features = features - data_feats if missing_features: raise FeaturesMissingError( f"The datset does not contain all needed features: " f"Missing features are: {missing_features}" ) def _map_features_to_targets( data: pd.DataFrame, feature_map: Mapping[str, str], ) -> pd.DataFrame: data = data.copy() data_feats = data.columns mapped_feats: list[str] = [] for feat in data_feats: if feat in feature_map: mapped_feats.append(feature_map[feat]) else: mapped_feats.append(feat) data.columns = mapped_feats return data def preprocess_features( data: pd.DataFrame, feature_map: Mapping[str, str], target_features: Set[str], ) -> pd.DataFrame: data = _map_features_to_targets(data, feature_map) _check_needed_features(data, target_features) return data