improved path_handling
This commit is contained in:
56
tests/lang_main_config.toml
Normal file
56
tests/lang_main_config.toml
Normal file
@@ -0,0 +1,56 @@
|
||||
# lang_main: Config file
|
||||
|
||||
[paths]
|
||||
inputs = '../inputs/'
|
||||
results = './results/test_new2/'
|
||||
dataset = './01_2_Rohdaten_neu/Export4.csv'
|
||||
#results = './results/Export7/'
|
||||
#dataset = './01_03_Rohdaten_202403/Export7_59499_Zeilen.csv'
|
||||
#results = './results/Export7_trunc/'
|
||||
#dataset = './01_03_Rohdaten_202403/Export7_trunc.csv'
|
||||
|
||||
[control]
|
||||
preprocessing = true
|
||||
preprocessing_skip = false
|
||||
token_analysis = false
|
||||
token_analysis_skip = false
|
||||
graph_postprocessing = false
|
||||
graph_postprocessing_skip = false
|
||||
time_analysis = false
|
||||
time_analysis_skip = false
|
||||
|
||||
#[export_filenames]
|
||||
#filename_cossim_filter_candidates = 'CosSim-FilterCandidates'
|
||||
|
||||
[preprocess]
|
||||
filename_cossim_filter_candidates = 'CosSim-FilterCandidates'
|
||||
date_cols = [
|
||||
"VorgangsDatum",
|
||||
"ErledigungsDatum",
|
||||
"Arbeitsbeginn",
|
||||
"ErstellungsDatum",
|
||||
]
|
||||
threshold_amount_characters = 5
|
||||
threshold_similarity = 0.8
|
||||
|
||||
[graph_postprocessing]
|
||||
threshold_edge_weight = 150
|
||||
|
||||
[time_analysis.uniqueness]
|
||||
threshold_unique_texts = 4
|
||||
criterion_feature = 'HObjektText'
|
||||
feature_name_obj_id = 'ObjektID'
|
||||
|
||||
[time_analysis.model_input]
|
||||
input_features = [
|
||||
'VorgangsTypName',
|
||||
'VorgangsArtText',
|
||||
'VorgangsBeschreibung',
|
||||
]
|
||||
activity_feature = 'VorgangsTypName'
|
||||
activity_types = [
|
||||
'Reparaturauftrag (Portal)',
|
||||
'Störungsmeldung',
|
||||
]
|
||||
threshold_num_acitivities = 1
|
||||
threshold_similarity = 0.8
|
||||
@@ -1,15 +1,9 @@
|
||||
import re
|
||||
from lang_main.constants import (
|
||||
INPUT_PATH_FOLDER,
|
||||
PATH_TO_DATASET,
|
||||
SAVE_PATH_FOLDER,
|
||||
)
|
||||
|
||||
|
||||
string = """
|
||||
Hallo mein Name ist Max Mustermann und ich bin am 01.01.2024 geboren.
|
||||
"""
|
||||
|
||||
patt = r'(\d{1,2}\.)?(\d{1,2}\.)([\d]{2,4})?'
|
||||
patt2 = r'[ ]{2,}'
|
||||
pattern = re.compile(patt)
|
||||
pattern2 = re.compile(patt2)
|
||||
res = pattern.sub('', string)
|
||||
res = pattern2.sub(' ', res)
|
||||
|
||||
print(res)
|
||||
print(SAVE_PATH_FOLDER, '\n')
|
||||
print(INPUT_PATH_FOLDER, '\n')
|
||||
print(PATH_TO_DATASET, '\n')
|
||||
|
||||
Reference in New Issue
Block a user