lang-main/scripts/lang_main_config.toml
2024-07-17 18:30:26 +02:00

59 lines
1.4 KiB
TOML

# lang_main: Config file
[paths]
inputs = './inputs/'
results = './results/test_20240619/'
dataset = '../data/02_202307/Export4.csv'
#results = './results/Export7/'
#dataset = './01_03_Rohdaten_202403/Export7_59499_Zeilen.csv'
#results = './results/Export7_trunc/'
#dataset = './01_03_Rohdaten_202403/Export7_trunc.csv'
# only debugging features, production-ready pipelines should always
# be fully executed
[control]
preprocessing_skip = true
token_analysis_skip = true
graph_postprocessing_skip = true
graph_rescaling_skip = true
graph_static_rendering_skip = false
time_analysis_skip = true
#[export_filenames]
#filename_cossim_filter_candidates = 'CosSim-FilterCandidates'
[preprocess]
filename_cossim_filter_candidates = 'CosSim-FilterCandidates'
date_cols = [
"VorgangsDatum",
"ErledigungsDatum",
"Arbeitsbeginn",
"ErstellungsDatum",
]
threshold_amount_characters = 5
threshold_similarity = 0.8
[graph_postprocessing]
threshold_edge_weight = 150
[time_analysis.uniqueness]
threshold_unique_texts = 4
criterion_feature = 'HObjektText'
feature_name_obj_id = 'ObjektID'
[time_analysis.model_input]
# input_features = [
# 'VorgangsTypName',
# 'VorgangsArtText',
# 'VorgangsBeschreibung',
# ]
input_features = [
'VorgangsBeschreibung',
]
activity_feature = 'VorgangsTypName'
activity_types = [
'Reparaturauftrag (Portal)',
'Störungsmeldung',
]
threshold_num_acitivities = 1
threshold_similarity = 0.8