improved path_handling
This commit is contained in:
parent
9cafc9fb97
commit
b3cc012791
@ -1,9 +1,9 @@
|
|||||||
# lang_main: Config file
|
# lang_main: Config file
|
||||||
|
|
||||||
[paths]
|
[paths]
|
||||||
inputs = 'A:/Arbeitsaufgaben/lang-main/scripts/inputs/'
|
inputs = './inputs/'
|
||||||
results = 'A:/Arbeitsaufgaben/lang-main/scripts/results/test_20240529/'
|
results = './results/test_20240529/'
|
||||||
dataset = 'A:/Arbeitsaufgaben/lang-main/data/02_202307/Export4.csv'
|
dataset = '../data/02_202307/Export4.csv'
|
||||||
#results = './results/Export7/'
|
#results = './results/Export7/'
|
||||||
#dataset = './01_03_Rohdaten_202403/Export7_59499_Zeilen.csv'
|
#dataset = './01_03_Rohdaten_202403/Export7_59499_Zeilen.csv'
|
||||||
#results = './results/Export7_trunc/'
|
#results = './results/Export7_trunc/'
|
||||||
|
|||||||
@ -26,7 +26,7 @@ USE_INTERNAL_CONFIG: Final[bool] = False
|
|||||||
pkg_dir = Path(__file__).parent
|
pkg_dir = Path(__file__).parent
|
||||||
cfg_path_internal = pkg_dir / CONFIG_FILENAME
|
cfg_path_internal = pkg_dir / CONFIG_FILENAME
|
||||||
caller_file = Path(inspect.stack()[-1].filename)
|
caller_file = Path(inspect.stack()[-1].filename)
|
||||||
CALLER_PATH: Final[Path] = caller_file.parent
|
CALLER_PATH: Final[Path] = caller_file.parent.resolve()
|
||||||
|
|
||||||
# load config data: internal/external
|
# load config data: internal/external
|
||||||
if USE_INTERNAL_CONFIG:
|
if USE_INTERNAL_CONFIG:
|
||||||
|
|||||||
@ -1,12 +1,15 @@
|
|||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
from typing import Final
|
from typing import Final
|
||||||
|
|
||||||
from lang_main import CONFIG
|
from lang_main import CALLER_PATH, CONFIG
|
||||||
|
|
||||||
# ** paths
|
# ** paths
|
||||||
INPUT_PATH_FOLDER: Final[Path] = Path(CONFIG['paths']['inputs'])
|
input_path_conf = Path(CONFIG['paths']['inputs'])
|
||||||
SAVE_PATH_FOLDER: Final[Path] = Path(CONFIG['paths']['results'])
|
INPUT_PATH_FOLDER: Final[Path] = (CALLER_PATH / input_path_conf).resolve()
|
||||||
PATH_TO_DATASET: Final[Path] = Path(CONFIG['paths']['dataset'])
|
save_path_conf = Path(CONFIG['paths']['results'])
|
||||||
|
SAVE_PATH_FOLDER: Final[Path] = (CALLER_PATH / save_path_conf).resolve()
|
||||||
|
path_dataset_conf = Path(CONFIG['paths']['dataset'])
|
||||||
|
PATH_TO_DATASET: Final[Path] = (CALLER_PATH / path_dataset_conf).resolve()
|
||||||
# ** control
|
# ** control
|
||||||
DO_PREPROCESSING: Final[bool] = CONFIG['control']['preprocessing']
|
DO_PREPROCESSING: Final[bool] = CONFIG['control']['preprocessing']
|
||||||
SKIP_PREPROCESSING: Final[bool] = CONFIG['control']['preprocessing_skip']
|
SKIP_PREPROCESSING: Final[bool] = CONFIG['control']['preprocessing_skip']
|
||||||
|
|||||||
56
tests/lang_main_config.toml
Normal file
56
tests/lang_main_config.toml
Normal file
@ -0,0 +1,56 @@
|
|||||||
|
# lang_main: Config file
|
||||||
|
|
||||||
|
[paths]
|
||||||
|
inputs = '../inputs/'
|
||||||
|
results = './results/test_new2/'
|
||||||
|
dataset = './01_2_Rohdaten_neu/Export4.csv'
|
||||||
|
#results = './results/Export7/'
|
||||||
|
#dataset = './01_03_Rohdaten_202403/Export7_59499_Zeilen.csv'
|
||||||
|
#results = './results/Export7_trunc/'
|
||||||
|
#dataset = './01_03_Rohdaten_202403/Export7_trunc.csv'
|
||||||
|
|
||||||
|
[control]
|
||||||
|
preprocessing = true
|
||||||
|
preprocessing_skip = false
|
||||||
|
token_analysis = false
|
||||||
|
token_analysis_skip = false
|
||||||
|
graph_postprocessing = false
|
||||||
|
graph_postprocessing_skip = false
|
||||||
|
time_analysis = false
|
||||||
|
time_analysis_skip = false
|
||||||
|
|
||||||
|
#[export_filenames]
|
||||||
|
#filename_cossim_filter_candidates = 'CosSim-FilterCandidates'
|
||||||
|
|
||||||
|
[preprocess]
|
||||||
|
filename_cossim_filter_candidates = 'CosSim-FilterCandidates'
|
||||||
|
date_cols = [
|
||||||
|
"VorgangsDatum",
|
||||||
|
"ErledigungsDatum",
|
||||||
|
"Arbeitsbeginn",
|
||||||
|
"ErstellungsDatum",
|
||||||
|
]
|
||||||
|
threshold_amount_characters = 5
|
||||||
|
threshold_similarity = 0.8
|
||||||
|
|
||||||
|
[graph_postprocessing]
|
||||||
|
threshold_edge_weight = 150
|
||||||
|
|
||||||
|
[time_analysis.uniqueness]
|
||||||
|
threshold_unique_texts = 4
|
||||||
|
criterion_feature = 'HObjektText'
|
||||||
|
feature_name_obj_id = 'ObjektID'
|
||||||
|
|
||||||
|
[time_analysis.model_input]
|
||||||
|
input_features = [
|
||||||
|
'VorgangsTypName',
|
||||||
|
'VorgangsArtText',
|
||||||
|
'VorgangsBeschreibung',
|
||||||
|
]
|
||||||
|
activity_feature = 'VorgangsTypName'
|
||||||
|
activity_types = [
|
||||||
|
'Reparaturauftrag (Portal)',
|
||||||
|
'Störungsmeldung',
|
||||||
|
]
|
||||||
|
threshold_num_acitivities = 1
|
||||||
|
threshold_similarity = 0.8
|
||||||
@ -1,15 +1,9 @@
|
|||||||
import re
|
from lang_main.constants import (
|
||||||
|
INPUT_PATH_FOLDER,
|
||||||
|
PATH_TO_DATASET,
|
||||||
|
SAVE_PATH_FOLDER,
|
||||||
|
)
|
||||||
|
|
||||||
|
print(SAVE_PATH_FOLDER, '\n')
|
||||||
string = """
|
print(INPUT_PATH_FOLDER, '\n')
|
||||||
Hallo mein Name ist Max Mustermann und ich bin am 01.01.2024 geboren.
|
print(PATH_TO_DATASET, '\n')
|
||||||
"""
|
|
||||||
|
|
||||||
patt = r'(\d{1,2}\.)?(\d{1,2}\.)([\d]{2,4})?'
|
|
||||||
patt2 = r'[ ]{2,}'
|
|
||||||
pattern = re.compile(patt)
|
|
||||||
pattern2 = re.compile(patt2)
|
|
||||||
res = pattern.sub('', string)
|
|
||||||
res = pattern2.sub(' ', res)
|
|
||||||
|
|
||||||
print(res)
|
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user