more robust graph filtering

This commit is contained in:
Florian Förster
2024-09-12 15:19:24 +02:00
parent e85334ab06
commit 27d40d5c99
13 changed files with 1332 additions and 133 deletions

View File

@@ -2,12 +2,10 @@
[paths]
inputs = './inputs/'
results = '../scripts/results/test_20240619/'
# results = './results/dummy_N_1000/'
# dataset = '../data/Dummy_Dataset_N_1000.csv'
results = './results/'
dataset = '../data/02_202307/Export4.csv'
#results = './results/Export7/'
#dataset = './01_03_Rohdaten_202403/Export7_59499_Zeilen.csv'
#results = './results/Export7_trunc/'
#dataset = './01_03_Rohdaten_202403/Export7_trunc.csv'
# only debugging features, production-ready pipelines should always
# be fully executed
@@ -19,21 +17,18 @@ graph_rescaling_skip = false
graph_static_rendering_skip = false
time_analysis_skip = true
#[export_filenames]
#filename_cossim_filter_candidates = 'CosSim-FilterCandidates'
[preprocess]
filename_cossim_filter_candidates = 'CosSim-FilterCandidates'
date_cols = [
"VorgangsDatum",
"ErledigungsDatum",
"Arbeitsbeginn",
"VorgangsDatum",
"ErledigungsDatum",
"Arbeitsbeginn",
"ErstellungsDatum",
]
threshold_amount_characters = 5
threshold_similarity = 0.8
[graph_postprocessing]
threshold_edge_number = 300
threshold_edge_weight = 150
[time_analysis.uniqueness]
@@ -41,6 +36,10 @@ threshold_unique_texts = 4
criterion_feature = 'HObjektText'
feature_name_obj_id = 'ObjektID'
[time_analysis.preparation]
name_delta_feat_to_repair = 'Zeitspanne bis zur Behebung [Tage]'
name_delta_feat_to_next_failure = 'Zeitspanne bis zum nächsten Ereignis [Tage]'
[time_analysis.model_input]
# input_features = [
# 'VorgangsTypName',

File diff suppressed because one or more lines are too long