74 lines
3.1 KiB
Python
74 lines
3.1 KiB
Python
"""testing each function in a consecutive way like each one is
|
|
executed in in a pipeline
|
|
"""
|
|
|
|
from lang_main.analysis import preprocessing as ppc
|
|
from lang_main.analysis import shared
|
|
|
|
|
|
def test_load_data(raw_data_path, raw_data_date_cols):
|
|
(data,) = ppc.load_raw_data(raw_data_path, raw_data_date_cols)
|
|
assert len(data) == 1000
|
|
|
|
|
|
def test_remove_simple_duplicates(raw_data_path, raw_data_date_cols):
|
|
(data,) = ppc.load_raw_data(raw_data_path, raw_data_date_cols)
|
|
(data,) = ppc.remove_duplicates(data)
|
|
assert len(data) == 999
|
|
|
|
|
|
def test_remove_na(raw_data_path, raw_data_date_cols):
|
|
(data,) = ppc.load_raw_data(raw_data_path, raw_data_date_cols)
|
|
(data,) = ppc.remove_duplicates(data)
|
|
target_features: tuple[str] = ('VorgangsBeschreibung',)
|
|
(data,) = ppc.remove_NA(data, target_features)
|
|
assert len(data) == 998
|
|
|
|
|
|
# def test_string_cleansing():
|
|
# string = 'Ölleckage durch\nundichten \t Ölsumpf,, aber Dichtung intakt??!!!'
|
|
# cleaned_string = shared.clean_string_slim(string)
|
|
# target_string = 'Ölleckage durch undichten Ölsumpf, aber Dichtung intakt!'
|
|
# assert cleaned_string == target_string
|
|
|
|
|
|
def test_entry_wise_cleansing(raw_data_path, raw_data_date_cols):
|
|
(data,) = ppc.load_raw_data(raw_data_path, raw_data_date_cols)
|
|
(data,) = ppc.remove_duplicates(data)
|
|
target_features: tuple[str] = ('VorgangsBeschreibung',)
|
|
(data,) = ppc.remove_NA(data, target_features)
|
|
starting_string = 'Ölleckage durch\nundichten \t Ölsumpf,, aber Dichtung intakt??!!!'
|
|
cleaned_string = shared.clean_string_slim(starting_string)
|
|
target_string = 'Ölleckage durch undichten Ölsumpf, aber Dichtung intakt!'
|
|
assert cleaned_string == target_string
|
|
starting_string = 'Ölleckage durch\nundichten Ölsumpf,, aber Dichtung intakt??!!!'
|
|
assert data.at[0, 'VorgangsBeschreibung'] == starting_string
|
|
(data,) = shared.entry_wise_cleansing(
|
|
data,
|
|
target_features=target_features,
|
|
cleansing_func=shared.clean_string_slim,
|
|
)
|
|
assert data.at[0, 'VorgangsBeschreibung'] == target_string
|
|
|
|
|
|
def test_analyse_feature(raw_data_path, raw_data_date_cols):
|
|
(data,) = ppc.load_raw_data(raw_data_path, raw_data_date_cols)
|
|
(data,) = ppc.remove_duplicates(data)
|
|
target_features: tuple[str] = ('VorgangsBeschreibung',)
|
|
(data,) = ppc.remove_NA(data, target_features)
|
|
starting_string = 'Ölleckage durch\nundichten \t Ölsumpf,, aber Dichtung intakt??!!!'
|
|
cleaned_string = shared.clean_string_slim(starting_string)
|
|
target_string = 'Ölleckage durch undichten Ölsumpf, aber Dichtung intakt!'
|
|
assert cleaned_string == target_string
|
|
starting_string = 'Ölleckage durch\nundichten Ölsumpf,, aber Dichtung intakt??!!!'
|
|
assert data.at[0, 'VorgangsBeschreibung'] == starting_string
|
|
(data,) = shared.entry_wise_cleansing(
|
|
data,
|
|
target_features=target_features,
|
|
cleansing_func=shared.clean_string_slim,
|
|
)
|
|
assert data.at[0, 'VorgangsBeschreibung'] == target_string
|
|
|
|
(data,) = ppc.analyse_feature(data, target_feature=target_features[0])
|
|
assert len(data) == 139
|