improved path_handling

This commit is contained in:
Florian Förster 2024-05-31 10:18:39 +02:00
parent 9cafc9fb97
commit b3cc012791
5 changed files with 75 additions and 22 deletions

View File

@ -1,9 +1,9 @@
# lang_main: Config file # lang_main: Config file
[paths] [paths]
inputs = 'A:/Arbeitsaufgaben/lang-main/scripts/inputs/' inputs = './inputs/'
results = 'A:/Arbeitsaufgaben/lang-main/scripts/results/test_20240529/' results = './results/test_20240529/'
dataset = 'A:/Arbeitsaufgaben/lang-main/data/02_202307/Export4.csv' dataset = '../data/02_202307/Export4.csv'
#results = './results/Export7/' #results = './results/Export7/'
#dataset = './01_03_Rohdaten_202403/Export7_59499_Zeilen.csv' #dataset = './01_03_Rohdaten_202403/Export7_59499_Zeilen.csv'
#results = './results/Export7_trunc/' #results = './results/Export7_trunc/'

View File

@ -26,7 +26,7 @@ USE_INTERNAL_CONFIG: Final[bool] = False
pkg_dir = Path(__file__).parent pkg_dir = Path(__file__).parent
cfg_path_internal = pkg_dir / CONFIG_FILENAME cfg_path_internal = pkg_dir / CONFIG_FILENAME
caller_file = Path(inspect.stack()[-1].filename) caller_file = Path(inspect.stack()[-1].filename)
CALLER_PATH: Final[Path] = caller_file.parent CALLER_PATH: Final[Path] = caller_file.parent.resolve()
# load config data: internal/external # load config data: internal/external
if USE_INTERNAL_CONFIG: if USE_INTERNAL_CONFIG:

View File

@ -1,12 +1,15 @@
from pathlib import Path from pathlib import Path
from typing import Final from typing import Final
from lang_main import CONFIG from lang_main import CALLER_PATH, CONFIG
# ** paths # ** paths
INPUT_PATH_FOLDER: Final[Path] = Path(CONFIG['paths']['inputs']) input_path_conf = Path(CONFIG['paths']['inputs'])
SAVE_PATH_FOLDER: Final[Path] = Path(CONFIG['paths']['results']) INPUT_PATH_FOLDER: Final[Path] = (CALLER_PATH / input_path_conf).resolve()
PATH_TO_DATASET: Final[Path] = Path(CONFIG['paths']['dataset']) save_path_conf = Path(CONFIG['paths']['results'])
SAVE_PATH_FOLDER: Final[Path] = (CALLER_PATH / save_path_conf).resolve()
path_dataset_conf = Path(CONFIG['paths']['dataset'])
PATH_TO_DATASET: Final[Path] = (CALLER_PATH / path_dataset_conf).resolve()
# ** control # ** control
DO_PREPROCESSING: Final[bool] = CONFIG['control']['preprocessing'] DO_PREPROCESSING: Final[bool] = CONFIG['control']['preprocessing']
SKIP_PREPROCESSING: Final[bool] = CONFIG['control']['preprocessing_skip'] SKIP_PREPROCESSING: Final[bool] = CONFIG['control']['preprocessing_skip']

View File

@ -0,0 +1,56 @@
# lang_main: Config file
[paths]
inputs = '../inputs/'
results = './results/test_new2/'
dataset = './01_2_Rohdaten_neu/Export4.csv'
#results = './results/Export7/'
#dataset = './01_03_Rohdaten_202403/Export7_59499_Zeilen.csv'
#results = './results/Export7_trunc/'
#dataset = './01_03_Rohdaten_202403/Export7_trunc.csv'
[control]
preprocessing = true
preprocessing_skip = false
token_analysis = false
token_analysis_skip = false
graph_postprocessing = false
graph_postprocessing_skip = false
time_analysis = false
time_analysis_skip = false
#[export_filenames]
#filename_cossim_filter_candidates = 'CosSim-FilterCandidates'
[preprocess]
filename_cossim_filter_candidates = 'CosSim-FilterCandidates'
date_cols = [
"VorgangsDatum",
"ErledigungsDatum",
"Arbeitsbeginn",
"ErstellungsDatum",
]
threshold_amount_characters = 5
threshold_similarity = 0.8
[graph_postprocessing]
threshold_edge_weight = 150
[time_analysis.uniqueness]
threshold_unique_texts = 4
criterion_feature = 'HObjektText'
feature_name_obj_id = 'ObjektID'
[time_analysis.model_input]
input_features = [
'VorgangsTypName',
'VorgangsArtText',
'VorgangsBeschreibung',
]
activity_feature = 'VorgangsTypName'
activity_types = [
'Reparaturauftrag (Portal)',
'Störungsmeldung',
]
threshold_num_acitivities = 1
threshold_similarity = 0.8

View File

@ -1,15 +1,9 @@
import re from lang_main.constants import (
INPUT_PATH_FOLDER,
PATH_TO_DATASET,
SAVE_PATH_FOLDER,
)
print(SAVE_PATH_FOLDER, '\n')
string = """ print(INPUT_PATH_FOLDER, '\n')
Hallo mein Name ist Max Mustermann und ich bin am 01.01.2024 geboren. print(PATH_TO_DATASET, '\n')
"""
patt = r'(\d{1,2}\.)?(\d{1,2}\.)([\d]{2,4})?'
patt2 = r'[ ]{2,}'
pattern = re.compile(patt)
pattern2 = re.compile(patt2)
res = pattern.sub('', string)
res = pattern2.sub(' ', res)
print(res)