added test cases

This commit is contained in:
Florian Förster
2025-01-22 16:54:15 +01:00
parent 30fe71e80a
commit fb28b8548b
28 changed files with 17721 additions and 17 deletions

View File

@@ -5,9 +5,6 @@ from typing import cast
import networkx as nx
import numpy as np
import numpy.typing as npt
# import sentence_transformers # TODO check removal
# import sentence_transformers.util # TODO check removal
from networkx import Graph
from pandas import DataFrame, Series
from sentence_transformers import SentenceTransformer

View File

@@ -47,7 +47,7 @@ def _non_relevant_obj_ids(
feats_per_obj_id = feats_per_obj_id.dropna()
unique_feats_per_obj_id = len(feats_per_obj_id.unique())
if unique_feats_per_obj_id > thresh_unique_feat_per_id:
if unique_feats_per_obj_id >= thresh_unique_feat_per_id:
ids_to_ignore.add(obj_id)
return tuple(ids_to_ignore)

View File

@@ -119,7 +119,7 @@ def _preprocess_STFR_model_name(
raise FileNotFoundError(
f'Target model >{model_name}< not found under {model_path}'
)
model_name_or_path = str(model_path)
model_name_or_path = str(model_path) # pragma: no cover
else:
model_name_or_path = model_name

View File

@@ -30,11 +30,12 @@ from lang_main.constants import (
DATE_COLS,
FEATURE_NAME_OBJ_ID,
FEATURE_NAME_OBJ_TEXT,
MAX_EDGE_NUMBER,
MODEL_INPUT_FEATURES,
NAME_DELTA_FEAT_TO_REPAIR,
SAVE_PATH_FOLDER,
TARGET_FEATURE,
THRESHOLD_AMOUNT_CHARACTERS,
THRESHOLD_EDGE_NUMBER,
THRESHOLD_NUM_ACTIVITIES,
THRESHOLD_SIMILARITY,
THRESHOLD_TIMELINE_SIMILARITY,
@@ -72,7 +73,7 @@ def build_base_target_feature_pipe() -> Pipeline:
pipe_target_feat.add(
entry_wise_cleansing,
{
'target_features': ('VorgangsBeschreibung',),
'target_features': (TARGET_FEATURE,),
'cleansing_func': clean_string_slim,
},
save_result=True,
@@ -81,7 +82,7 @@ def build_base_target_feature_pipe() -> Pipeline:
pipe_target_feat.add(
analyse_feature,
{
'target_feature': 'VorgangsBeschreibung',
'target_feature': TARGET_FEATURE,
},
save_result=True,
)
@@ -140,7 +141,7 @@ def build_tk_graph_post_pipe() -> Pipeline:
pipe_graph_postprocessing.add(
graphs.filter_graph_by_number_edges,
{
'limit': THRESHOLD_EDGE_NUMBER,
'limit': MAX_EDGE_NUMBER,
'property': 'weight',
},
)