update config usage

This commit is contained in:
Florian Förster 2025-01-22 16:53:30 +01:00
parent 0e36e78906
commit 27445a679b
3 changed files with 53 additions and 46 deletions

View File

@ -1,10 +1,8 @@
# lang_main: Config file # d-opt -- lang_main: config file
[paths] [paths]
inputs = './data/' inputs = '../lang-data/in/'
# results = './results/dummy_N_1000/' results = '../lang-data/out/'
# dataset = '../data/Dummy_Dataset_N_1000.csv'
results = './data/'
models = '../lang-models' models = '../lang-models'
[logging] [logging]
@ -28,15 +26,15 @@ date_cols = [
"Arbeitsbeginn", "Arbeitsbeginn",
"ErstellungsDatum", "ErstellungsDatum",
] ]
target_feature = "VorgangsBeschreibung"
threshold_amount_characters = 5 threshold_amount_characters = 5
threshold_similarity = 0.8 threshold_similarity = 0.92
[graph_postprocessing] [graph_postprocessing]
threshold_edge_number = 330 max_edge_number = -1
# threshold_edge_weight = 150
[time_analysis.uniqueness] [time_analysis.uniqueness]
threshold_unique_texts = 4 threshold_unique_texts = 5
criterion_feature = 'HObjektText' criterion_feature = 'HObjektText'
feature_name_obj_id = 'ObjektID' feature_name_obj_id = 'ObjektID'
feature_name_obj_text = 'HObjektText' feature_name_obj_text = 'HObjektText'
@ -46,11 +44,6 @@ name_delta_feat_to_repair = 'Zeitspanne bis zur Behebung [Tage]'
name_delta_feat_to_next_failure = 'Zeitspanne bis zum nächsten Ereignis [Tage]' name_delta_feat_to_next_failure = 'Zeitspanne bis zum nächsten Ereignis [Tage]'
[time_analysis.model_input] [time_analysis.model_input]
# input_features = [
# 'VorgangsTypName',
# 'VorgangsArtText',
# 'VorgangsBeschreibung',
# ]
input_features = [ input_features = [
'VorgangsBeschreibung', 'VorgangsBeschreibung',
] ]
@ -59,5 +52,5 @@ activity_types = [
'Reparaturauftrag (Portal)', 'Reparaturauftrag (Portal)',
'Störungsmeldung', 'Störungsmeldung',
] ]
threshold_num_acitivities = 1 threshold_num_activities = 1
threshold_similarity = 0.8 threshold_similarity = 0.8

View File

@ -123,19 +123,29 @@ TAG_OF_INTEREST: frozenset[str] = frozenset()
# ** export # ** export
# ** preprocessing # ** preprocessing
DATE_COLS: Final[list[str]] = CONFIG['preprocess']['date_cols'] DATE_COLS: Final[list[str]] = CONFIG['preprocess']['date_cols']
THRESHOLD_AMOUNT_CHARACTERS: Final[float] = CONFIG['preprocess'][ TARGET_FEATURE: Final[str] = CONFIG['preprocess']['target_feature']
'threshold_amount_characters' threshold_amount_characters: int = CONFIG['preprocess']['threshold_amount_characters']
] if threshold_amount_characters < 0:
THRESHOLD_SIMILARITY: Final[float] = CONFIG['preprocess']['threshold_similarity'] threshold_amount_characters = 0
THRESHOLD_AMOUNT_CHARACTERS: Final[int] = threshold_amount_characters
threshold_similarity: float = CONFIG['preprocess']['threshold_similarity']
if threshold_similarity < 0 or threshold_similarity > 1:
raise ValueError(
(
'[CONFIG][preprocess][threshold_similarity] Preprocessing similarity '
'threshold must be between 0 and 1.'
)
)
THRESHOLD_SIMILARITY: Final[float] = threshold_similarity
# ** token analysis # ** token analysis
# ** graph postprocessing # ** graph postprocessing
EDGE_WEIGHT_DECIMALS: Final[int] = 4 EDGE_WEIGHT_DECIMALS: Final[int] = 6
threshold_edge_number: int | None = None max_edge_number: int | None = None
cfg_threshold_edge_number: int = CONFIG['graph_postprocessing']['threshold_edge_number'] max_edge_number_cfg: int = CONFIG['graph_postprocessing']['max_edge_number']
if cfg_threshold_edge_number >= 0: if max_edge_number_cfg >= 0:
threshold_edge_number = cfg_threshold_edge_number max_edge_number = max_edge_number_cfg
THRESHOLD_EDGE_NUMBER: Final[int | None] = threshold_edge_number MAX_EDGE_NUMBER: Final[int | None] = max_edge_number
PROPERTY_NAME_DEGREE_WEIGHTED: Final[str] = 'degree_weighted' PROPERTY_NAME_DEGREE_WEIGHTED: Final[str] = 'degree_weighted'
PROPERTY_NAME_BETWEENNESS_CENTRALITY: Final[str] = 'betweenness_centrality' PROPERTY_NAME_BETWEENNESS_CENTRALITY: Final[str] = 'betweenness_centrality'
PROPERTY_NAME_IMPORTANCE: Final[str] = 'importance' PROPERTY_NAME_IMPORTANCE: Final[str] = 'importance'
@ -163,9 +173,10 @@ CYTO_ITER_NEIGHBOUR_DEPTH: Final[int] = 2
CYTO_NETWORK_ZOOM_FACTOR: Final[float] = 0.96 CYTO_NETWORK_ZOOM_FACTOR: Final[float] = 0.96
# ** time_analysis.uniqueness # ** time_analysis.uniqueness
THRESHOLD_UNIQUE_TEXTS: Final[int] = CONFIG['time_analysis']['uniqueness'][ threshold_unique_texts: int = CONFIG['time_analysis']['uniqueness']['threshold_unique_texts']
'threshold_unique_texts' if threshold_unique_texts < 0:
] threshold_unique_texts = 0
THRESHOLD_UNIQUE_TEXTS: Final[int] = threshold_unique_texts
UNIQUE_CRITERION_FEATURE: Final[str] = CONFIG['time_analysis']['uniqueness'][ UNIQUE_CRITERION_FEATURE: Final[str] = CONFIG['time_analysis']['uniqueness'][
'criterion_feature' 'criterion_feature'
] ]
@ -174,8 +185,6 @@ FEATURE_NAME_OBJ_TEXT: Final[str] = CONFIG['time_analysis']['uniqueness'][
'feature_name_obj_text' 'feature_name_obj_text'
] ]
# ** time_analysis.preparation # ** time_analysis.preparation
# NAME_DELTA_FEAT_TO_REPAIR: Final[str] = 'delta_to_repair'
CONFIG['time_analysis']['preparation']['name_delta_feat_to_repair']
NAME_DELTA_FEAT_TO_REPAIR: Final[str] = CONFIG['time_analysis']['preparation'][ NAME_DELTA_FEAT_TO_REPAIR: Final[str] = CONFIG['time_analysis']['preparation'][
'name_delta_feat_to_repair' 'name_delta_feat_to_repair'
] ]
@ -190,9 +199,21 @@ ACTIVITY_FEATURE: Final[str] = CONFIG['time_analysis']['model_input']['activity_
ACTIVITY_TYPES: Final[tuple[str, ...]] = tuple( ACTIVITY_TYPES: Final[tuple[str, ...]] = tuple(
CONFIG['time_analysis']['model_input']['activity_types'] CONFIG['time_analysis']['model_input']['activity_types']
) )
THRESHOLD_NUM_ACTIVITIES: Final[int] = CONFIG['time_analysis']['model_input'][ threshold_num_activities: int = CONFIG['time_analysis']['model_input'][
'threshold_num_acitivities' 'threshold_num_activities'
] ]
THRESHOLD_TIMELINE_SIMILARITY: Final[float] = CONFIG['time_analysis']['model_input'][ if threshold_num_activities < 0:
threshold_num_activities = 0
THRESHOLD_NUM_ACTIVITIES: Final[int] = threshold_num_activities
threshold_timeline_similarity: float = CONFIG['time_analysis']['model_input'][
'threshold_similarity' 'threshold_similarity'
] ]
if threshold_timeline_similarity < 0 or threshold_timeline_similarity > 1:
raise ValueError(
(
'[CONFIG][time_analysis.model_input][threshold_similarity] Timeline similarity '
'threshold must be between 0 and 1.'
)
)
THRESHOLD_TIMELINE_SIMILARITY: Final[float] = threshold_timeline_similarity

View File

@ -1,9 +1,7 @@
# lang_main: Config file # d-opt -- lang_main: config file
[paths] [paths]
inputs = '../data/in/' inputs = '../data/in/'
# results = './results/dummy_N_1000/'
# dataset = '../data/Dummy_Dataset_N_1000.csv'
results = '../data/out/' results = '../data/out/'
models = './lang-models' models = './lang-models'
@ -28,15 +26,15 @@ date_cols = [
"Arbeitsbeginn", "Arbeitsbeginn",
"ErstellungsDatum", "ErstellungsDatum",
] ]
target_feature = "VorgangsBeschreibung"
threshold_amount_characters = 5 threshold_amount_characters = 5
threshold_similarity = 0.9 threshold_similarity = 0.92
[graph_postprocessing] [graph_postprocessing]
threshold_edge_number = 330 max_edge_number = -1
# threshold_edge_weight = 150
[time_analysis.uniqueness] [time_analysis.uniqueness]
threshold_unique_texts = 4 threshold_unique_texts = 5
criterion_feature = 'HObjektText' criterion_feature = 'HObjektText'
feature_name_obj_id = 'ObjektID' feature_name_obj_id = 'ObjektID'
feature_name_obj_text = 'HObjektText' feature_name_obj_text = 'HObjektText'
@ -46,11 +44,6 @@ name_delta_feat_to_repair = 'Zeitspanne bis zur Behebung [Tage]'
name_delta_feat_to_next_failure = 'Zeitspanne bis zum nächsten Ereignis [Tage]' name_delta_feat_to_next_failure = 'Zeitspanne bis zum nächsten Ereignis [Tage]'
[time_analysis.model_input] [time_analysis.model_input]
# input_features = [
# 'VorgangsTypName',
# 'VorgangsArtText',
# 'VorgangsBeschreibung',
# ]
input_features = [ input_features = [
'VorgangsBeschreibung', 'VorgangsBeschreibung',
] ]
@ -59,5 +52,5 @@ activity_types = [
'Reparaturauftrag (Portal)', 'Reparaturauftrag (Portal)',
'Störungsmeldung', 'Störungsmeldung',
] ]
threshold_num_acitivities = 1 threshold_num_activities = 1
threshold_similarity = 0.8 threshold_similarity = 0.8