update config usage

This commit is contained in:
Florian Förster 2025-01-22 16:53:30 +01:00
parent 0e36e78906
commit 27445a679b
3 changed files with 53 additions and 46 deletions

View File

@ -1,10 +1,8 @@
# lang_main: Config file
# d-opt -- lang_main: config file
[paths]
inputs = './data/'
# results = './results/dummy_N_1000/'
# dataset = '../data/Dummy_Dataset_N_1000.csv'
results = './data/'
inputs = '../lang-data/in/'
results = '../lang-data/out/'
models = '../lang-models'
[logging]
@ -28,15 +26,15 @@ date_cols = [
"Arbeitsbeginn",
"ErstellungsDatum",
]
target_feature = "VorgangsBeschreibung"
threshold_amount_characters = 5
threshold_similarity = 0.8
threshold_similarity = 0.92
[graph_postprocessing]
threshold_edge_number = 330
# threshold_edge_weight = 150
max_edge_number = -1
[time_analysis.uniqueness]
threshold_unique_texts = 4
threshold_unique_texts = 5
criterion_feature = 'HObjektText'
feature_name_obj_id = 'ObjektID'
feature_name_obj_text = 'HObjektText'
@ -46,11 +44,6 @@ name_delta_feat_to_repair = 'Zeitspanne bis zur Behebung [Tage]'
name_delta_feat_to_next_failure = 'Zeitspanne bis zum nächsten Ereignis [Tage]'
[time_analysis.model_input]
# input_features = [
# 'VorgangsTypName',
# 'VorgangsArtText',
# 'VorgangsBeschreibung',
# ]
input_features = [
'VorgangsBeschreibung',
]
@ -59,5 +52,5 @@ activity_types = [
'Reparaturauftrag (Portal)',
'Störungsmeldung',
]
threshold_num_acitivities = 1
threshold_num_activities = 1
threshold_similarity = 0.8

View File

@ -123,19 +123,29 @@ TAG_OF_INTEREST: frozenset[str] = frozenset()
# ** export
# ** preprocessing
DATE_COLS: Final[list[str]] = CONFIG['preprocess']['date_cols']
THRESHOLD_AMOUNT_CHARACTERS: Final[float] = CONFIG['preprocess'][
'threshold_amount_characters'
]
THRESHOLD_SIMILARITY: Final[float] = CONFIG['preprocess']['threshold_similarity']
TARGET_FEATURE: Final[str] = CONFIG['preprocess']['target_feature']
threshold_amount_characters: int = CONFIG['preprocess']['threshold_amount_characters']
if threshold_amount_characters < 0:
threshold_amount_characters = 0
THRESHOLD_AMOUNT_CHARACTERS: Final[int] = threshold_amount_characters
threshold_similarity: float = CONFIG['preprocess']['threshold_similarity']
if threshold_similarity < 0 or threshold_similarity > 1:
raise ValueError(
(
'[CONFIG][preprocess][threshold_similarity] Preprocessing similarity '
'threshold must be between 0 and 1.'
)
)
THRESHOLD_SIMILARITY: Final[float] = threshold_similarity
# ** token analysis
# ** graph postprocessing
EDGE_WEIGHT_DECIMALS: Final[int] = 4
threshold_edge_number: int | None = None
cfg_threshold_edge_number: int = CONFIG['graph_postprocessing']['threshold_edge_number']
if cfg_threshold_edge_number >= 0:
threshold_edge_number = cfg_threshold_edge_number
THRESHOLD_EDGE_NUMBER: Final[int | None] = threshold_edge_number
EDGE_WEIGHT_DECIMALS: Final[int] = 6
max_edge_number: int | None = None
max_edge_number_cfg: int = CONFIG['graph_postprocessing']['max_edge_number']
if max_edge_number_cfg >= 0:
max_edge_number = max_edge_number_cfg
MAX_EDGE_NUMBER: Final[int | None] = max_edge_number
PROPERTY_NAME_DEGREE_WEIGHTED: Final[str] = 'degree_weighted'
PROPERTY_NAME_BETWEENNESS_CENTRALITY: Final[str] = 'betweenness_centrality'
PROPERTY_NAME_IMPORTANCE: Final[str] = 'importance'
@ -163,9 +173,10 @@ CYTO_ITER_NEIGHBOUR_DEPTH: Final[int] = 2
CYTO_NETWORK_ZOOM_FACTOR: Final[float] = 0.96
# ** time_analysis.uniqueness
THRESHOLD_UNIQUE_TEXTS: Final[int] = CONFIG['time_analysis']['uniqueness'][
'threshold_unique_texts'
]
threshold_unique_texts: int = CONFIG['time_analysis']['uniqueness']['threshold_unique_texts']
if threshold_unique_texts < 0:
threshold_unique_texts = 0
THRESHOLD_UNIQUE_TEXTS: Final[int] = threshold_unique_texts
UNIQUE_CRITERION_FEATURE: Final[str] = CONFIG['time_analysis']['uniqueness'][
'criterion_feature'
]
@ -174,8 +185,6 @@ FEATURE_NAME_OBJ_TEXT: Final[str] = CONFIG['time_analysis']['uniqueness'][
'feature_name_obj_text'
]
# ** time_analysis.preparation
# NAME_DELTA_FEAT_TO_REPAIR: Final[str] = 'delta_to_repair'
CONFIG['time_analysis']['preparation']['name_delta_feat_to_repair']
NAME_DELTA_FEAT_TO_REPAIR: Final[str] = CONFIG['time_analysis']['preparation'][
'name_delta_feat_to_repair'
]
@ -190,9 +199,21 @@ ACTIVITY_FEATURE: Final[str] = CONFIG['time_analysis']['model_input']['activity_
ACTIVITY_TYPES: Final[tuple[str, ...]] = tuple(
CONFIG['time_analysis']['model_input']['activity_types']
)
THRESHOLD_NUM_ACTIVITIES: Final[int] = CONFIG['time_analysis']['model_input'][
'threshold_num_acitivities'
threshold_num_activities: int = CONFIG['time_analysis']['model_input'][
'threshold_num_activities'
]
THRESHOLD_TIMELINE_SIMILARITY: Final[float] = CONFIG['time_analysis']['model_input'][
if threshold_num_activities < 0:
threshold_num_activities = 0
THRESHOLD_NUM_ACTIVITIES: Final[int] = threshold_num_activities
threshold_timeline_similarity: float = CONFIG['time_analysis']['model_input'][
'threshold_similarity'
]
if threshold_timeline_similarity < 0 or threshold_timeline_similarity > 1:
raise ValueError(
(
'[CONFIG][time_analysis.model_input][threshold_similarity] Timeline similarity '
'threshold must be between 0 and 1.'
)
)
THRESHOLD_TIMELINE_SIMILARITY: Final[float] = threshold_timeline_similarity

View File

@ -1,9 +1,7 @@
# lang_main: Config file
# d-opt -- lang_main: config file
[paths]
inputs = '../data/in/'
# results = './results/dummy_N_1000/'
# dataset = '../data/Dummy_Dataset_N_1000.csv'
results = '../data/out/'
models = './lang-models'
@ -28,15 +26,15 @@ date_cols = [
"Arbeitsbeginn",
"ErstellungsDatum",
]
target_feature = "VorgangsBeschreibung"
threshold_amount_characters = 5
threshold_similarity = 0.9
threshold_similarity = 0.92
[graph_postprocessing]
threshold_edge_number = 330
# threshold_edge_weight = 150
max_edge_number = -1
[time_analysis.uniqueness]
threshold_unique_texts = 4
threshold_unique_texts = 5
criterion_feature = 'HObjektText'
feature_name_obj_id = 'ObjektID'
feature_name_obj_text = 'HObjektText'
@ -46,11 +44,6 @@ name_delta_feat_to_repair = 'Zeitspanne bis zur Behebung [Tage]'
name_delta_feat_to_next_failure = 'Zeitspanne bis zum nächsten Ereignis [Tage]'
[time_analysis.model_input]
# input_features = [
# 'VorgangsTypName',
# 'VorgangsArtText',
# 'VorgangsBeschreibung',
# ]
input_features = [
'VorgangsBeschreibung',
]
@ -59,5 +52,5 @@ activity_types = [
'Reparaturauftrag (Portal)',
'Störungsmeldung',
]
threshold_num_acitivities = 1
threshold_num_activities = 1
threshold_similarity = 0.8