diff --git a/scripts/lang_main_config.toml b/scripts/lang_main_config.toml index 3d0fdd7..9a1fa04 100644 --- a/scripts/lang_main_config.toml +++ b/scripts/lang_main_config.toml @@ -1,9 +1,9 @@ # lang_main: Config file [paths] -inputs = 'A:/Arbeitsaufgaben/lang-main/scripts/inputs/' -results = 'A:/Arbeitsaufgaben/lang-main/scripts/results/test_20240529/' -dataset = 'A:/Arbeitsaufgaben/lang-main/data/02_202307/Export4.csv' +inputs = './inputs/' +results = './results/test_20240529/' +dataset = '../data/02_202307/Export4.csv' #results = './results/Export7/' #dataset = './01_03_Rohdaten_202403/Export7_59499_Zeilen.csv' #results = './results/Export7_trunc/' diff --git a/src/lang_main/__init__.py b/src/lang_main/__init__.py index a8332ee..adf3a67 100644 --- a/src/lang_main/__init__.py +++ b/src/lang_main/__init__.py @@ -26,7 +26,7 @@ USE_INTERNAL_CONFIG: Final[bool] = False pkg_dir = Path(__file__).parent cfg_path_internal = pkg_dir / CONFIG_FILENAME caller_file = Path(inspect.stack()[-1].filename) -CALLER_PATH: Final[Path] = caller_file.parent +CALLER_PATH: Final[Path] = caller_file.parent.resolve() # load config data: internal/external if USE_INTERNAL_CONFIG: diff --git a/src/lang_main/constants.py b/src/lang_main/constants.py index c60439f..f70b329 100644 --- a/src/lang_main/constants.py +++ b/src/lang_main/constants.py @@ -1,12 +1,15 @@ from pathlib import Path from typing import Final -from lang_main import CONFIG +from lang_main import CALLER_PATH, CONFIG # ** paths -INPUT_PATH_FOLDER: Final[Path] = Path(CONFIG['paths']['inputs']) -SAVE_PATH_FOLDER: Final[Path] = Path(CONFIG['paths']['results']) -PATH_TO_DATASET: Final[Path] = Path(CONFIG['paths']['dataset']) +input_path_conf = Path(CONFIG['paths']['inputs']) +INPUT_PATH_FOLDER: Final[Path] = (CALLER_PATH / input_path_conf).resolve() +save_path_conf = Path(CONFIG['paths']['results']) +SAVE_PATH_FOLDER: Final[Path] = (CALLER_PATH / save_path_conf).resolve() +path_dataset_conf = Path(CONFIG['paths']['dataset']) +PATH_TO_DATASET: Final[Path] = (CALLER_PATH / path_dataset_conf).resolve() # ** control DO_PREPROCESSING: Final[bool] = CONFIG['control']['preprocessing'] SKIP_PREPROCESSING: Final[bool] = CONFIG['control']['preprocessing_skip'] diff --git a/tests/lang_main_config.toml b/tests/lang_main_config.toml new file mode 100644 index 0000000..9f85e7c --- /dev/null +++ b/tests/lang_main_config.toml @@ -0,0 +1,56 @@ +# lang_main: Config file + +[paths] +inputs = '../inputs/' +results = './results/test_new2/' +dataset = './01_2_Rohdaten_neu/Export4.csv' +#results = './results/Export7/' +#dataset = './01_03_Rohdaten_202403/Export7_59499_Zeilen.csv' +#results = './results/Export7_trunc/' +#dataset = './01_03_Rohdaten_202403/Export7_trunc.csv' + +[control] +preprocessing = true +preprocessing_skip = false +token_analysis = false +token_analysis_skip = false +graph_postprocessing = false +graph_postprocessing_skip = false +time_analysis = false +time_analysis_skip = false + +#[export_filenames] +#filename_cossim_filter_candidates = 'CosSim-FilterCandidates' + +[preprocess] +filename_cossim_filter_candidates = 'CosSim-FilterCandidates' +date_cols = [ + "VorgangsDatum", + "ErledigungsDatum", + "Arbeitsbeginn", + "ErstellungsDatum", +] +threshold_amount_characters = 5 +threshold_similarity = 0.8 + +[graph_postprocessing] +threshold_edge_weight = 150 + +[time_analysis.uniqueness] +threshold_unique_texts = 4 +criterion_feature = 'HObjektText' +feature_name_obj_id = 'ObjektID' + +[time_analysis.model_input] +input_features = [ + 'VorgangsTypName', + 'VorgangsArtText', + 'VorgangsBeschreibung', +] +activity_feature = 'VorgangsTypName' +activity_types = [ + 'Reparaturauftrag (Portal)', + 'Störungsmeldung', +] +threshold_num_acitivities = 1 +threshold_similarity = 0.8 \ No newline at end of file diff --git a/tests/pre_test_examples.py b/tests/pre_test_examples.py index 85d3b5f..7b3b755 100644 --- a/tests/pre_test_examples.py +++ b/tests/pre_test_examples.py @@ -1,15 +1,9 @@ -import re +from lang_main.constants import ( + INPUT_PATH_FOLDER, + PATH_TO_DATASET, + SAVE_PATH_FOLDER, +) - -string = """ -Hallo mein Name ist Max Mustermann und ich bin am 01.01.2024 geboren. -""" - -patt = r'(\d{1,2}\.)?(\d{1,2}\.)([\d]{2,4})?' -patt2 = r'[ ]{2,}' -pattern = re.compile(patt) -pattern2 = re.compile(patt2) -res = pattern.sub('', string) -res = pattern2.sub(' ', res) - -print(res) +print(SAVE_PATH_FOLDER, '\n') +print(INPUT_PATH_FOLDER, '\n') +print(PATH_TO_DATASET, '\n')