54 lines
1.2 KiB
Python

from __future__ import annotations
from collections.abc import Mapping, Set
from typing import TYPE_CHECKING
from delta_barth.errors import FeaturesMissingError
if TYPE_CHECKING:
import pandas as pd
def _check_needed_features(
data: pd.DataFrame,
features: Set[str],
) -> None:
data_feats = set(data.columns)
missing_features = features - data_feats
if missing_features:
raise FeaturesMissingError(
f"The datset does not contain all needed features: "
f"Missing features are: {missing_features}"
)
def _map_features_to_targets(
data: pd.DataFrame,
feature_map: Mapping[str, str],
) -> pd.DataFrame:
data = data.copy()
data_feats = data.columns
mapped_feats: list[str] = []
for feat in data_feats:
if feat in feature_map:
mapped_feats.append(feature_map[feat])
else:
mapped_feats.append(feat)
data.columns = mapped_feats
return data
def preprocess_features(
data: pd.DataFrame,
feature_map: Mapping[str, str],
target_features: Set[str],
) -> pd.DataFrame:
data = _map_features_to_targets(data, feature_map)
_check_needed_features(data, target_features)
return data