38 lines
1003 B
TOML
38 lines
1003 B
TOML
# lang_main: Config file
|
|
|
|
[paths]
|
|
inputs = 'A:/Arbeitsaufgaben/lang-main/scripts'
|
|
results = 'A:/Arbeitsaufgaben/lang-main/scripts/results/test_20240529/'
|
|
dataset = 'A:/Arbeitsaufgaben/lang-main/data/02_202307/Export4.csv'
|
|
#results = './results/Export7/'
|
|
#dataset = './01_03_Rohdaten_202403/Export7_59499_Zeilen.csv'
|
|
#results = './results/Export7_trunc/'
|
|
#dataset = './01_03_Rohdaten_202403/Export7_trunc.csv'
|
|
|
|
[control]
|
|
preprocessing = true
|
|
preprocessing_skip = false
|
|
token_analysis = false
|
|
token_analysis_skip = true
|
|
graph_postprocessing = false
|
|
graph_postprocessing_skip = true
|
|
|
|
#[export_filenames]
|
|
#filename_cossim_filter_candidates = 'CosSim-FilterCandidates'
|
|
|
|
[preprocess]
|
|
filename_cossim_filter_candidates = 'CosSim-FilterCandidates'
|
|
date_cols = [
|
|
"VorgangsDatum",
|
|
"ErledigungsDatum",
|
|
"Arbeitsbeginn",
|
|
"ErstellungsDatum",
|
|
]
|
|
threshold_amount_characters = 5
|
|
threshold_similarity = 0.8
|
|
|
|
[graph_postprocessing]
|
|
threshold_edge_weight = 150
|
|
|
|
[time_analysis]
|
|
threshold_unique_texts = 5 |