added test cases
This commit is contained in:
parent
5a789b7605
commit
9291b53f93
@ -1,4 +1,6 @@
|
|||||||
# lang_main: Config file
|
# lang_main: Config file
|
||||||
|
[info]
|
||||||
|
pkg = 'lang_main'
|
||||||
|
|
||||||
[paths]
|
[paths]
|
||||||
inputs = './inputs/'
|
inputs = './inputs/'
|
||||||
@ -40,6 +42,7 @@ threshold_edge_number = 330
|
|||||||
threshold_unique_texts = 4
|
threshold_unique_texts = 4
|
||||||
criterion_feature = 'HObjektText'
|
criterion_feature = 'HObjektText'
|
||||||
feature_name_obj_id = 'ObjektID'
|
feature_name_obj_id = 'ObjektID'
|
||||||
|
feature_name_obj_text = 'HObjektText'
|
||||||
|
|
||||||
[time_analysis.preparation]
|
[time_analysis.preparation]
|
||||||
name_delta_feat_to_repair = 'Zeitspanne bis zur Behebung [Tage]'
|
name_delta_feat_to_repair = 'Zeitspanne bis zur Behebung [Tage]'
|
||||||
|
|||||||
@ -112,8 +112,6 @@ def candidates_by_index(
|
|||||||
)
|
)
|
||||||
# cosine similarity
|
# cosine similarity
|
||||||
cos_sim = cast(npt.NDArray, model.similarity(embds, embds).numpy())
|
cos_sim = cast(npt.NDArray, model.similarity(embds, embds).numpy())
|
||||||
# TODO check removal
|
|
||||||
# cos_sim = cast(npt.NDArray, sentence_transformers.util.cos_sim(embds, embds).numpy())
|
|
||||||
np.fill_diagonal(cos_sim, 0.0)
|
np.fill_diagonal(cos_sim, 0.0)
|
||||||
cos_sim = np.triu(cos_sim)
|
cos_sim = np.triu(cos_sim)
|
||||||
cos_sim_idx = np.argwhere(cos_sim >= cos_sim_threshold)
|
cos_sim_idx = np.argwhere(cos_sim >= cos_sim_threshold)
|
||||||
|
|||||||
@ -142,7 +142,7 @@ def filter_activities_per_obj_id(
|
|||||||
threshold_num_activities: int = 1,
|
threshold_num_activities: int = 1,
|
||||||
) -> tuple[DataFrame, Series]:
|
) -> tuple[DataFrame, Series]:
|
||||||
data = data.copy()
|
data = data.copy()
|
||||||
# filter only relevant activities count occurrences for each ObjectID
|
# filter only relevant activities, count occurrences for each ObjectID
|
||||||
logger.info('Filtering activities per ObjectID...')
|
logger.info('Filtering activities per ObjectID...')
|
||||||
filt_rel_activities = data[activity_feature].isin(relevant_activity_types)
|
filt_rel_activities = data[activity_feature].isin(relevant_activity_types)
|
||||||
data_filter_activities = data.loc[filt_rel_activities].copy()
|
data_filter_activities = data.loc[filt_rel_activities].copy()
|
||||||
@ -249,6 +249,7 @@ def _transform_timeline_candidates(
|
|||||||
def _map_obj_id_to_texts(
|
def _map_obj_id_to_texts(
|
||||||
data: DataFrame,
|
data: DataFrame,
|
||||||
feature_obj_id: str = 'ObjektID',
|
feature_obj_id: str = 'ObjektID',
|
||||||
|
feature_obj_text: str = 'HObjektText',
|
||||||
) -> dict[ObjectID, str]:
|
) -> dict[ObjectID, str]:
|
||||||
data = data.copy()
|
data = data.copy()
|
||||||
obj_ids = cast(Iterable[ObjectID], data[feature_obj_id].unique())
|
obj_ids = cast(Iterable[ObjectID], data[feature_obj_id].unique())
|
||||||
@ -256,9 +257,9 @@ def _map_obj_id_to_texts(
|
|||||||
obj_id_to_text: dict[ObjectID, str] = {}
|
obj_id_to_text: dict[ObjectID, str] = {}
|
||||||
|
|
||||||
for obj_id in tqdm(obj_ids):
|
for obj_id in tqdm(obj_ids):
|
||||||
data_per_obj = cast(DataFrame, data.loc[data['ObjektID'] == obj_id])
|
data_per_obj = cast(DataFrame, data.loc[data[feature_obj_id] == obj_id])
|
||||||
# just take first entry
|
# just take first entry
|
||||||
obj_text = cast(str, data_per_obj['HObjektText'].dropna().iat[0])
|
obj_text = cast(str, data_per_obj[feature_obj_text].dropna().iat[0])
|
||||||
obj_text = obj_text.strip(r' ,.:')
|
obj_text = obj_text.strip(r' ,.:')
|
||||||
obj_id_to_text[obj_id] = obj_text
|
obj_id_to_text[obj_id] = obj_text
|
||||||
|
|
||||||
@ -272,6 +273,7 @@ def get_timeline_candidates(
|
|||||||
model: SentenceTransformer,
|
model: SentenceTransformer,
|
||||||
cos_sim_threshold: float,
|
cos_sim_threshold: float,
|
||||||
feature_obj_id: str = 'ObjektID',
|
feature_obj_id: str = 'ObjektID',
|
||||||
|
feature_obj_text: str = 'HObjektText',
|
||||||
model_input_feature: str = 'nlp_model_input',
|
model_input_feature: str = 'nlp_model_input',
|
||||||
) -> tuple[TimelineCandidates, dict[ObjectID, str]]:
|
) -> tuple[TimelineCandidates, dict[ObjectID, str]]:
|
||||||
logger.info('Obtaining timeline candidates...')
|
logger.info('Obtaining timeline candidates...')
|
||||||
@ -290,6 +292,7 @@ def get_timeline_candidates(
|
|||||||
map_obj_text = _map_obj_id_to_texts(
|
map_obj_text = _map_obj_id_to_texts(
|
||||||
data=data,
|
data=data,
|
||||||
feature_obj_id=feature_obj_id,
|
feature_obj_id=feature_obj_id,
|
||||||
|
feature_obj_text=feature_obj_text,
|
||||||
)
|
)
|
||||||
logger.info('ObjectIDs successfully mapped to text descriptors.')
|
logger.info('ObjectIDs successfully mapped to text descriptors.')
|
||||||
|
|
||||||
|
|||||||
@ -145,6 +145,9 @@ UNIQUE_CRITERION_FEATURE: Final[str] = CONFIG['time_analysis']['uniqueness'][
|
|||||||
'criterion_feature'
|
'criterion_feature'
|
||||||
]
|
]
|
||||||
FEATURE_NAME_OBJ_ID: Final[str] = CONFIG['time_analysis']['uniqueness']['feature_name_obj_id']
|
FEATURE_NAME_OBJ_ID: Final[str] = CONFIG['time_analysis']['uniqueness']['feature_name_obj_id']
|
||||||
|
FEATURE_NAME_OBJ_TEXT: Final[str] = CONFIG['time_analysis']['uniqueness'][
|
||||||
|
'feature_name_obj_text'
|
||||||
|
]
|
||||||
# ** time_analysis.preparation
|
# ** time_analysis.preparation
|
||||||
# NAME_DELTA_FEAT_TO_REPAIR: Final[str] = 'delta_to_repair'
|
# NAME_DELTA_FEAT_TO_REPAIR: Final[str] = 'delta_to_repair'
|
||||||
CONFIG['time_analysis']['preparation']['name_delta_feat_to_repair']
|
CONFIG['time_analysis']['preparation']['name_delta_feat_to_repair']
|
||||||
|
|||||||
@ -42,6 +42,7 @@ threshold_edge_number = 330
|
|||||||
threshold_unique_texts = 4
|
threshold_unique_texts = 4
|
||||||
criterion_feature = 'HObjektText'
|
criterion_feature = 'HObjektText'
|
||||||
feature_name_obj_id = 'ObjektID'
|
feature_name_obj_id = 'ObjektID'
|
||||||
|
feature_name_obj_text = 'HObjektText'
|
||||||
|
|
||||||
[time_analysis.preparation]
|
[time_analysis.preparation]
|
||||||
name_delta_feat_to_repair = 'Zeitspanne bis zur Behebung [Tage]'
|
name_delta_feat_to_repair = 'Zeitspanne bis zur Behebung [Tage]'
|
||||||
|
|||||||
@ -29,6 +29,7 @@ from lang_main.constants import (
|
|||||||
CYTO_BASE_NETWORK_NAME,
|
CYTO_BASE_NETWORK_NAME,
|
||||||
DATE_COLS,
|
DATE_COLS,
|
||||||
FEATURE_NAME_OBJ_ID,
|
FEATURE_NAME_OBJ_ID,
|
||||||
|
FEATURE_NAME_OBJ_TEXT,
|
||||||
MODEL_INPUT_FEATURES,
|
MODEL_INPUT_FEATURES,
|
||||||
NAME_DELTA_FEAT_TO_REPAIR,
|
NAME_DELTA_FEAT_TO_REPAIR,
|
||||||
SAVE_PATH_FOLDER,
|
SAVE_PATH_FOLDER,
|
||||||
@ -287,6 +288,7 @@ def build_timeline_pipe() -> Pipeline:
|
|||||||
'model': STFR_MODEL,
|
'model': STFR_MODEL,
|
||||||
'cos_sim_threshold': THRESHOLD_TIMELINE_SIMILARITY,
|
'cos_sim_threshold': THRESHOLD_TIMELINE_SIMILARITY,
|
||||||
'feature_obj_id': FEATURE_NAME_OBJ_ID,
|
'feature_obj_id': FEATURE_NAME_OBJ_ID,
|
||||||
|
'feature_obj_text': FEATURE_NAME_OBJ_TEXT,
|
||||||
'model_input_feature': 'nlp_model_input',
|
'model_input_feature': 'nlp_model_input',
|
||||||
},
|
},
|
||||||
save_result=True,
|
save_result=True,
|
||||||
|
|||||||
BIN
tests/_comparison_results/preprocess_pre_cleaned.pkl
Normal file
BIN
tests/_comparison_results/preprocess_pre_cleaned.pkl
Normal file
Binary file not shown.
BIN
tests/_comparison_results/preprocess_pre_cleaned.xlsx
Normal file
BIN
tests/_comparison_results/preprocess_pre_cleaned.xlsx
Normal file
Binary file not shown.
BIN
tests/_comparison_results/timeline_01_act_per_objid.pkl
Normal file
BIN
tests/_comparison_results/timeline_01_act_per_objid.pkl
Normal file
Binary file not shown.
BIN
tests/_comparison_results/timeline_01_act_per_objid.xlsx
Normal file
BIN
tests/_comparison_results/timeline_01_act_per_objid.xlsx
Normal file
Binary file not shown.
BIN
tests/_comparison_results/timeline_01_df_filtered.pkl
Normal file
BIN
tests/_comparison_results/timeline_01_df_filtered.pkl
Normal file
Binary file not shown.
BIN
tests/_comparison_results/timeline_01_df_filtered.xlsx
Normal file
BIN
tests/_comparison_results/timeline_01_df_filtered.xlsx
Normal file
Binary file not shown.
BIN
tests/_comparison_results/timeline_01_timeline_cands_dict.pkl
Normal file
BIN
tests/_comparison_results/timeline_01_timeline_cands_dict.pkl
Normal file
Binary file not shown.
@ -3,6 +3,7 @@ executed in in a pipeline
|
|||||||
"""
|
"""
|
||||||
|
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
|
|
||||||
from lang_main import model_loader
|
from lang_main import model_loader
|
||||||
from lang_main.analysis import preprocessing as ppc
|
from lang_main.analysis import preprocessing as ppc
|
||||||
from lang_main.analysis import shared
|
from lang_main.analysis import shared
|
||||||
|
|||||||
316
tests/analysis/test_timeline.py
Normal file
316
tests/analysis/test_timeline.py
Normal file
@ -0,0 +1,316 @@
|
|||||||
|
from pathlib import Path
|
||||||
|
from typing import cast
|
||||||
|
|
||||||
|
import numpy as np
|
||||||
|
import pandas as pd
|
||||||
|
import pytest
|
||||||
|
|
||||||
|
from lang_main import io, model_loader
|
||||||
|
from lang_main.analysis import timeline as tl
|
||||||
|
from lang_main.types import STFRModelTypes, TimelineCandidates
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.fixture(scope='module')
|
||||||
|
def data_timeline_filter_activities() -> pd.DataFrame:
|
||||||
|
pth_data = Path('./tests/_comparison_results/timeline_01_df_filtered.pkl')
|
||||||
|
return pd.read_pickle(pth_data)
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.fixture(scope='module')
|
||||||
|
def data_timeline_number_activities() -> pd.Series:
|
||||||
|
pth_data = Path('./tests/_comparison_results/timeline_01_act_per_objid.pkl')
|
||||||
|
return pd.read_pickle(pth_data)
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.fixture(scope='module')
|
||||||
|
def STFR_model():
|
||||||
|
model = model_loader.load_sentence_transformer(
|
||||||
|
model_name=STFRModelTypes.ALL_MINI_LM_L6_V2,
|
||||||
|
)
|
||||||
|
return model
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.fixture(scope='module')
|
||||||
|
def data_timeline_cands_dict() -> TimelineCandidates:
|
||||||
|
pth_data = './tests/_comparison_results/timeline_01_timeline_cands_dict.pkl'
|
||||||
|
tl_cands = cast(TimelineCandidates, io.load_pickle(pth_data))
|
||||||
|
return tl_cands
|
||||||
|
|
||||||
|
|
||||||
|
def test_cleanup_descriptions(data_pre_cleaned):
|
||||||
|
data = data_pre_cleaned.copy()
|
||||||
|
data.at[0, 'VorgangsBeschreibung'] = np.nan
|
||||||
|
data.at[3, 'VorgangsBeschreibung'] = np.nan
|
||||||
|
data.at[5, 'ErledigungsBeschreibung'] = np.nan
|
||||||
|
properties = ('VorgangsBeschreibung', 'ErledigungsBeschreibung')
|
||||||
|
(data_proc,) = tl.cleanup_descriptions(data, properties=properties)
|
||||||
|
assert data_proc.at[0, 'VorgangsBeschreibung'] == 'N.V.'
|
||||||
|
assert data_proc.at[3, 'VorgangsBeschreibung'] == 'N.V.'
|
||||||
|
assert data_proc.at[5, 'ErledigungsBeschreibung'] == 'N.V.'
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.parametrize('convert_to_days', [True, False])
|
||||||
|
def test_calc_delta_to_repair(data_pre_cleaned, convert_to_days):
|
||||||
|
feat_start = 'ErstellungsDatum'
|
||||||
|
feat_end = 'ErledigungsDatum'
|
||||||
|
name_delta_feature = 'Test'
|
||||||
|
(data,) = tl.calc_delta_to_repair(
|
||||||
|
data_pre_cleaned,
|
||||||
|
date_feature_start=feat_start,
|
||||||
|
date_feature_end=feat_end,
|
||||||
|
name_delta_feature=name_delta_feature,
|
||||||
|
convert_to_days=convert_to_days,
|
||||||
|
)
|
||||||
|
assert name_delta_feature in data.columns
|
||||||
|
test_date = data_pre_cleaned.at[0, feat_end] - data_pre_cleaned.at[0, feat_start]
|
||||||
|
if convert_to_days:
|
||||||
|
assert test_date.days == data.at[0, name_delta_feature]
|
||||||
|
else:
|
||||||
|
assert test_date == data.at[0, name_delta_feature]
|
||||||
|
|
||||||
|
|
||||||
|
def test_non_relevant_obj_ids(data_pre_cleaned):
|
||||||
|
feature_uniqueness = 'HObjektText'
|
||||||
|
feature_obj_id = 'ObjektID'
|
||||||
|
threshold = 1
|
||||||
|
data = data_pre_cleaned.copy()
|
||||||
|
data.at[0, feature_obj_id] = 1
|
||||||
|
ids_to_ignore = tl._non_relevant_obj_ids(
|
||||||
|
data,
|
||||||
|
thresh_unique_feat_per_id=threshold,
|
||||||
|
feature_uniqueness=feature_uniqueness,
|
||||||
|
feature_obj_id=feature_obj_id,
|
||||||
|
)
|
||||||
|
assert len(ids_to_ignore) == 1
|
||||||
|
assert ids_to_ignore == (1,)
|
||||||
|
|
||||||
|
|
||||||
|
def test_remove_non_relevant_obj_ids(data_pre_cleaned):
|
||||||
|
feature_uniqueness = 'HObjektText'
|
||||||
|
feature_obj_id = 'ObjektID'
|
||||||
|
threshold = 1
|
||||||
|
data = data_pre_cleaned.copy()
|
||||||
|
data.at[0, feature_obj_id] = 1
|
||||||
|
|
||||||
|
(data_proc,) = tl.remove_non_relevant_obj_ids(
|
||||||
|
data,
|
||||||
|
thresh_unique_feat_per_id=threshold,
|
||||||
|
feature_uniqueness=feature_uniqueness,
|
||||||
|
feature_obj_id=feature_obj_id,
|
||||||
|
)
|
||||||
|
unique_obj_ids = data_proc[feature_obj_id].unique()
|
||||||
|
assert 1 not in unique_obj_ids
|
||||||
|
assert len(unique_obj_ids) == 2
|
||||||
|
|
||||||
|
|
||||||
|
def test_generate_model_input(data_pre_cleaned):
|
||||||
|
target_feature_name = 'nlp_model_input'
|
||||||
|
model_input_features = (
|
||||||
|
'VorgangsTypName',
|
||||||
|
'VorgangsBeschreibung',
|
||||||
|
)
|
||||||
|
data = data_pre_cleaned.copy()
|
||||||
|
(data_proc,) = tl.generate_model_input(
|
||||||
|
data,
|
||||||
|
target_feature_name=target_feature_name,
|
||||||
|
model_input_features=model_input_features,
|
||||||
|
)
|
||||||
|
feat1 = data.at[0, model_input_features[0]]
|
||||||
|
feat2 = data.at[0, model_input_features[1]]
|
||||||
|
test_result = f'{feat1} - {feat2}'
|
||||||
|
assert data_proc.at[0, target_feature_name] == test_result
|
||||||
|
|
||||||
|
|
||||||
|
def test_filter_activities_per_obj_id(data_pre_cleaned):
|
||||||
|
activity_feature = 'VorgangsTypName'
|
||||||
|
relevant_activity_types = ('Störungsmeldung',)
|
||||||
|
feature_obj_id = 'ObjektID'
|
||||||
|
threshold_num_activities = 1 # at least 2 occurrences per ObjID
|
||||||
|
|
||||||
|
data = data_pre_cleaned.copy()
|
||||||
|
data = data.iloc[:5]
|
||||||
|
|
||||||
|
df_filtered, act_per_obj_id = tl.filter_activities_per_obj_id(
|
||||||
|
data,
|
||||||
|
activity_feature=activity_feature,
|
||||||
|
relevant_activity_types=relevant_activity_types,
|
||||||
|
feature_obj_id=feature_obj_id,
|
||||||
|
threshold_num_activities=threshold_num_activities,
|
||||||
|
)
|
||||||
|
assert len(df_filtered) == 2
|
||||||
|
assert df_filtered.iat[0, 1] == act_per_obj_id.index[0]
|
||||||
|
assert act_per_obj_id.iat[0] == 2
|
||||||
|
|
||||||
|
|
||||||
|
def test_get_timeline_candidates_index(
|
||||||
|
data_timeline_filter_activities,
|
||||||
|
data_timeline_number_activities,
|
||||||
|
STFR_model,
|
||||||
|
):
|
||||||
|
data = data_timeline_filter_activities.copy()
|
||||||
|
target_feature_name = 'nlp_model_input'
|
||||||
|
model_input_features = ('VorgangsBeschreibung',)
|
||||||
|
(data,) = tl.generate_model_input(
|
||||||
|
data,
|
||||||
|
target_feature_name=target_feature_name,
|
||||||
|
model_input_features=model_input_features,
|
||||||
|
)
|
||||||
|
data_num_act = data_timeline_number_activities.copy()
|
||||||
|
cos_sim_threshold = 0.8
|
||||||
|
tl_cands_iter = tl._get_timeline_candidates_index(
|
||||||
|
data=data,
|
||||||
|
num_activities_per_obj_id=data_num_act,
|
||||||
|
model=STFR_model,
|
||||||
|
cos_sim_threshold=cos_sim_threshold,
|
||||||
|
feature_obj_id='ObjektID',
|
||||||
|
model_input_feature=target_feature_name,
|
||||||
|
)
|
||||||
|
tl_cands_idx = tuple(tl_cands_iter) # format tuple((ObjID, (Idx1, Idx2)))
|
||||||
|
assert len(tl_cands_idx) == 0
|
||||||
|
|
||||||
|
cos_sim_threshold = 0.0
|
||||||
|
tl_cands_iter = tl._get_timeline_candidates_index(
|
||||||
|
data=data,
|
||||||
|
num_activities_per_obj_id=data_num_act,
|
||||||
|
model=STFR_model,
|
||||||
|
cos_sim_threshold=cos_sim_threshold,
|
||||||
|
feature_obj_id='ObjektID',
|
||||||
|
model_input_feature=target_feature_name,
|
||||||
|
)
|
||||||
|
tl_cands_idx = tuple(tl_cands_iter)
|
||||||
|
assert len(tl_cands_idx) == 1
|
||||||
|
assert tl_cands_idx == ((3, (np.int64(3), np.int64(4))),)
|
||||||
|
|
||||||
|
|
||||||
|
def test_transform_timeline_candidates(
|
||||||
|
data_timeline_filter_activities,
|
||||||
|
data_timeline_number_activities,
|
||||||
|
STFR_model,
|
||||||
|
):
|
||||||
|
data = data_timeline_filter_activities.copy()
|
||||||
|
target_feature_name = 'nlp_model_input'
|
||||||
|
model_input_features = ('VorgangsBeschreibung',)
|
||||||
|
(data,) = tl.generate_model_input(
|
||||||
|
data,
|
||||||
|
target_feature_name=target_feature_name,
|
||||||
|
model_input_features=model_input_features,
|
||||||
|
)
|
||||||
|
data_num_act = data_timeline_number_activities.copy()
|
||||||
|
cos_sim_threshold = 0.0
|
||||||
|
tl_cands_iter = tl._get_timeline_candidates_index(
|
||||||
|
data=data,
|
||||||
|
num_activities_per_obj_id=data_num_act,
|
||||||
|
model=STFR_model,
|
||||||
|
cos_sim_threshold=cos_sim_threshold,
|
||||||
|
feature_obj_id='ObjektID',
|
||||||
|
model_input_feature=target_feature_name,
|
||||||
|
)
|
||||||
|
tl_cands_dict = tl._transform_timeline_candidates(tl_cands_iter)
|
||||||
|
|
||||||
|
assert 3 in tl_cands_dict
|
||||||
|
assert tl_cands_dict[3] == ((np.int64(3), np.int64(4)),)
|
||||||
|
|
||||||
|
|
||||||
|
def test_map_obj_id_to_texts(data_pre_cleaned):
|
||||||
|
data = data_pre_cleaned.iloc[:5].copy()
|
||||||
|
feature_obj_id = 'ObjektID'
|
||||||
|
feature_obj_text = 'HObjektText'
|
||||||
|
map_text = tl._map_obj_id_to_texts(
|
||||||
|
data=data,
|
||||||
|
feature_obj_id=feature_obj_id,
|
||||||
|
feature_obj_text=feature_obj_text,
|
||||||
|
)
|
||||||
|
assert len(map_text) == 3 # three unique IDs
|
||||||
|
assert map_text[1] == 'Fräsmaschine-FS435X'
|
||||||
|
assert map_text[2] == 'Schleifmaschine-S4x87'
|
||||||
|
assert map_text[3] == 'Bohrbearbeitungszentrum-BBZ35'
|
||||||
|
|
||||||
|
|
||||||
|
def test_get_timeline_candidates(
|
||||||
|
data_timeline_filter_activities,
|
||||||
|
data_timeline_number_activities,
|
||||||
|
STFR_model,
|
||||||
|
):
|
||||||
|
data = data_timeline_filter_activities.copy()
|
||||||
|
target_feature_name = 'nlp_model_input'
|
||||||
|
model_input_features = ('VorgangsBeschreibung',)
|
||||||
|
(data,) = tl.generate_model_input(
|
||||||
|
data,
|
||||||
|
target_feature_name=target_feature_name,
|
||||||
|
model_input_features=model_input_features,
|
||||||
|
)
|
||||||
|
data_num_act = data_timeline_number_activities.copy()
|
||||||
|
cos_sim_threshold = 0.0
|
||||||
|
tl_cands_dict, map_text = tl.get_timeline_candidates(
|
||||||
|
data=data,
|
||||||
|
num_activities_per_obj_id=data_num_act,
|
||||||
|
model=STFR_model,
|
||||||
|
cos_sim_threshold=cos_sim_threshold,
|
||||||
|
feature_obj_id='ObjektID',
|
||||||
|
feature_obj_text='HObjektText',
|
||||||
|
model_input_feature=target_feature_name,
|
||||||
|
)
|
||||||
|
assert 3 in tl_cands_dict
|
||||||
|
assert tl_cands_dict[3] == ((np.int64(3), np.int64(4)),)
|
||||||
|
assert len(map_text) == 1 # three unique IDs
|
||||||
|
assert map_text[3] == 'Bohrbearbeitungszentrum-BBZ35'
|
||||||
|
|
||||||
|
|
||||||
|
def test_filter_timeline_cands(data_pre_cleaned, data_timeline_cands_dict):
|
||||||
|
obj_id = 3
|
||||||
|
entry_idx = 0
|
||||||
|
sort_feature = 'ErstellungsDatum'
|
||||||
|
data_proc = tl.filter_timeline_cands(
|
||||||
|
data_pre_cleaned,
|
||||||
|
data_timeline_cands_dict,
|
||||||
|
obj_id=obj_id,
|
||||||
|
entry_idx=entry_idx,
|
||||||
|
sort_feature=sort_feature,
|
||||||
|
)
|
||||||
|
assert 3 in data_proc.index
|
||||||
|
assert 4 in data_proc.index
|
||||||
|
assert data_proc.at[3, 'ObjektID'] == 3
|
||||||
|
assert data_proc.at[4, 'ObjektID'] == 3
|
||||||
|
assert data_proc.at[3, 'VorgangsTypName'] == 'Störungsmeldung'
|
||||||
|
assert data_proc.at[4, 'VorgangsTypName'] == 'Störungsmeldung'
|
||||||
|
assert (
|
||||||
|
data_proc.at[3, 'ErledigungsBeschreibung']
|
||||||
|
== 'Beseitigung der Blockierung und Überprüfung des Antriebs'
|
||||||
|
)
|
||||||
|
assert (
|
||||||
|
data_proc.at[4, 'ErledigungsBeschreibung']
|
||||||
|
== 'Reinigung der Leitungen und Austausch des Kühlmittels'
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.parametrize('convert_to_days', [True, False])
|
||||||
|
def test_calc_delta_to_next_failure(
|
||||||
|
data_pre_cleaned,
|
||||||
|
data_timeline_cands_dict,
|
||||||
|
convert_to_days,
|
||||||
|
):
|
||||||
|
obj_id = 3
|
||||||
|
entry_idx = 0
|
||||||
|
sort_feature = 'ErstellungsDatum'
|
||||||
|
data_tl_filtered = tl.filter_timeline_cands(
|
||||||
|
data_pre_cleaned,
|
||||||
|
data_timeline_cands_dict,
|
||||||
|
obj_id=obj_id,
|
||||||
|
entry_idx=entry_idx,
|
||||||
|
sort_feature=sort_feature,
|
||||||
|
)
|
||||||
|
name_delta_feature = 'test_delta'
|
||||||
|
data_proc = tl.calc_delta_to_next_failure(
|
||||||
|
data_tl_filtered,
|
||||||
|
date_feature=sort_feature,
|
||||||
|
name_delta_feature=name_delta_feature,
|
||||||
|
convert_to_days=convert_to_days,
|
||||||
|
)
|
||||||
|
test_date = data_proc.at[4, sort_feature] - data_pre_cleaned.at[3, sort_feature]
|
||||||
|
test_date_last = pd.Timedelta(0)
|
||||||
|
if convert_to_days:
|
||||||
|
assert test_date.days == data_proc.at[3, name_delta_feature]
|
||||||
|
assert test_date_last.days == data_proc.at[4, name_delta_feature]
|
||||||
|
else:
|
||||||
|
assert test_date == data_proc.at[3, name_delta_feature]
|
||||||
|
assert test_date_last == data_proc.at[4, name_delta_feature]
|
||||||
@ -1,9 +1,10 @@
|
|||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
from lang_main.analysis import graphs
|
|
||||||
|
|
||||||
import pandas as pd
|
import pandas as pd
|
||||||
import pytest
|
import pytest
|
||||||
|
|
||||||
|
from lang_main.analysis import graphs
|
||||||
|
|
||||||
DATE_COLS: tuple[str, ...] = (
|
DATE_COLS: tuple[str, ...] = (
|
||||||
'VorgangsDatum',
|
'VorgangsDatum',
|
||||||
'ErledigungsDatum',
|
'ErledigungsDatum',
|
||||||
@ -25,6 +26,12 @@ def raw_data_date_cols():
|
|||||||
return DATE_COLS
|
return DATE_COLS
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.fixture(scope='session')
|
||||||
|
def data_pre_cleaned() -> pd.DataFrame:
|
||||||
|
pth_data = Path('./tests/_comparison_results/preprocess_pre_cleaned.pkl')
|
||||||
|
return pd.read_pickle(pth_data)
|
||||||
|
|
||||||
|
|
||||||
@pytest.fixture(scope='session')
|
@pytest.fixture(scope='session')
|
||||||
def data_analyse_feature() -> pd.DataFrame:
|
def data_analyse_feature() -> pd.DataFrame:
|
||||||
pth_data = Path('./tests/_comparison_results/analyse_feature.pkl')
|
pth_data = Path('./tests/_comparison_results/analyse_feature.pkl')
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user