diff --git a/lang_main_config.toml b/lang_main_config.toml index c50b611..38ce082 100644 --- a/lang_main_config.toml +++ b/lang_main_config.toml @@ -1,9 +1,12 @@ # d-opt -- lang_main: config file [paths] -inputs = '../lang-data/in/' -results = '../lang-data/out/' -models = '../lang-models' +inputs = './lang-data/in/' +results = './lang-data/out/' +models = './lang-models' + +[models] +use_large_model = true [logging] enabled = true diff --git a/lang_main_config_old.toml b/lang_main_config_old.toml new file mode 100644 index 0000000..c50b611 --- /dev/null +++ b/lang_main_config_old.toml @@ -0,0 +1,56 @@ +# d-opt -- lang_main: config file + +[paths] +inputs = '../lang-data/in/' +results = '../lang-data/out/' +models = '../lang-models' + +[logging] +enabled = true +stderr = true +file = true + +# control which pipelines are executed +[control] +preprocessing_skip = false +token_analysis_skip = false +graph_postprocessing_skip = false +graph_rescaling_skip = false +graph_static_rendering_skip = true +time_analysis_skip = true + +[preprocess] +date_cols = [ + "VorgangsDatum", + "ErledigungsDatum", + "Arbeitsbeginn", + "ErstellungsDatum", +] +target_feature = "VorgangsBeschreibung" +threshold_amount_characters = 5 +threshold_similarity = 0.92 + +[graph_postprocessing] +max_edge_number = -1 + +[time_analysis.uniqueness] +threshold_unique_texts = 5 +criterion_feature = 'HObjektText' +feature_name_obj_id = 'ObjektID' +feature_name_obj_text = 'HObjektText' + +[time_analysis.preparation] +name_delta_feat_to_repair = 'Zeitspanne bis zur Behebung [Tage]' +name_delta_feat_to_next_failure = 'Zeitspanne bis zum nächsten Ereignis [Tage]' + +[time_analysis.model_input] +input_features = [ + 'VorgangsBeschreibung', +] +activity_feature = 'VorgangsTypName' +activity_types = [ + 'Reparaturauftrag (Portal)', + 'Störungsmeldung', +] +threshold_num_activities = 1 +threshold_similarity = 0.8 \ No newline at end of file diff --git a/noxfile.py b/noxfile.py new file mode 100644 index 0000000..882975a --- /dev/null +++ b/noxfile.py @@ -0,0 +1,41 @@ +import os +import shlex +from typing import Final + +import nox +from nox import Session + +os.environ['PDM_IGNORE_SAVED_PYTHON'] = '1' +PYTHON_VERS: Final[list[str]] = ['3.11'] + + +@nox.session(name='min', python=PYTHON_VERS, reuse_venv=True) +def tests_wo_models_cyto(session: Session) -> None: + """Run all tests despite model loading and Cytoscape tests""" + cmd_raw = 'pdm install -G cytoscape,spacy-sm,spacy-md,spacy-trf -dG tests' + cmd = shlex.split(cmd_raw) + session.run_install(*cmd, external=True) + cmd_tests_raw = 'pytest -m "not mload and not cyto"' + cmd_tests = shlex.split(cmd_tests_raw) + session.run(*cmd_tests) + + +@nox.session(name='all', python=PYTHON_VERS, reuse_venv=True, default=False) +def tests_all(session: Session) -> None: + """Run all tests, including Cytoscape Docker container""" + cmd_raw = 'pdm install -G cytoscape,spacy-sm,spacy-md,spacy-trf -dG tests' + cmd = shlex.split(cmd_raw) + session.run_install(*cmd, external=True) + + cmd_tests_raw = 'pytest -n 4' + cmd_tests = shlex.split(cmd_tests_raw) + session.run(*cmd_tests) + + session.run('docker', 'desktop', 'start', external=True) + session.run('docker', 'start', 'cyrest', external=True) + + cmd_tests_raw = 'pytest -m "cyto"' + cmd_tests = shlex.split(cmd_tests_raw) + session.run(*cmd_tests) + + session.run('docker', 'stop', 'cyrest', external=True) diff --git a/src/lang_main/config.py b/src/lang_main/config.py index 42e4b00..6fd2f12 100644 --- a/src/lang_main/config.py +++ b/src/lang_main/config.py @@ -36,7 +36,7 @@ BASE_FOLDERNAME: Final[str] = os.environ.get('LANG_MAIN_BASE_FOLDERNAME', 'lang- CONFIG_FILENAME: Final[str] = 'lang_main_config.toml' CYTO_STYLESHEET_FILENAME: Final[str] = r'cytoscape_config/lang_main.xml' PKG_DIR: Final[Path] = Path(__file__).parent -STOP_FOLDER: Final[str] = os.environ.get('LANG_MAIN_STOP_SEARCH_FOLDERNAME', 'src') +STOP_FOLDER: Final[str] = os.environ.get('LANG_MAIN_STOP_SEARCH_FOLDERNAME', 'lang-main') def load_toml_config( diff --git a/src/lang_main/model_loader.py b/src/lang_main/model_loader.py index 0ac98c0..94f3e7e 100644 --- a/src/lang_main/model_loader.py +++ b/src/lang_main/model_loader.py @@ -115,11 +115,11 @@ def _preprocess_STFR_model_name( # !! without any user names folder_name = model_name.split('/')[-1] model_path = MODEL_BASE_FOLDER / folder_name - if not model_path.exists(): + if not model_path.exists(): # pragma: no cover raise FileNotFoundError( f'Target model >{model_name}< not found under {model_path}' ) - model_name_or_path = str(model_path) # pragma: no cover + model_name_or_path = str(model_path) else: model_name_or_path = model_name diff --git a/tests/test_config.py b/tests/test_config.py index 70ec918..e5284bd 100644 --- a/tests/test_config.py +++ b/tests/test_config.py @@ -52,7 +52,7 @@ def test_load_cfg_func(monkeypatch, tmp_path): stop_folder_name=stop_folder, lookup_cwd=False, ) - assert loaded_cfg['paths']['models'] == '../lang-models' + assert loaded_cfg['paths']['models'] == './lang-models' loaded_cfg = config.load_cfg( starting_path=pkg_dir, @@ -60,4 +60,4 @@ def test_load_cfg_func(monkeypatch, tmp_path): stop_folder_name=stop_folder, lookup_cwd=True, ) - assert loaded_cfg['paths']['models'] == '../lang-models' + assert loaded_cfg['paths']['models'] == './lang-models' diff --git a/tests/test_model_loader.py b/tests/test_model_loader.py index 09179bc..e998030 100644 --- a/tests/test_model_loader.py +++ b/tests/test_model_loader.py @@ -26,7 +26,7 @@ from lang_main.types import LanguageModels 'model_name', [ STFRModelTypes.ALL_MINI_LM_L6_V2, - STFRModelTypes.ALL_MPNET_BASE_V2, + STFRModelTypes.ALL_MINI_LM_L12_V2, ], ) @pytest.mark.mload @@ -58,10 +58,9 @@ def test_preprocess_STFR_model_name() -> None: model_name_exist = STFRModelTypes.E5_BASE_STS_EN_DE backend_exist = STFRBackends.ONNX - with pytest.raises(FileNotFoundError): - _ = model_loader._preprocess_STFR_model_name( - model_name=model_name_exist, backend=backend_exist, force_download=False - ) + _ = model_loader._preprocess_STFR_model_name( + model_name=model_name_exist, backend=backend_exist, force_download=False + ) @pytest.mark.parametrize( @@ -75,7 +74,7 @@ def test_preprocess_STFR_model_name() -> None: 'model_name', [ STFRModelTypes.ALL_MINI_LM_L6_V2, - STFRModelTypes.ALL_MPNET_BASE_V2, + STFRModelTypes.ALL_MINI_LM_L12_V2, ], ) @pytest.mark.mload @@ -94,9 +93,7 @@ def test_load_sentence_transformer_onnx(model_name, similarity_func) -> None: @pytest.mark.parametrize( 'model_name', [ - SpacyModelTypes.DE_CORE_NEWS_SM, SpacyModelTypes.DE_CORE_NEWS_MD, - SpacyModelTypes.DE_CORE_NEWS_LG, SpacyModelTypes.DE_DEP_NEWS_TRF, ], ) @@ -111,7 +108,7 @@ def test_load_spacy_model(model_name): def test_load_spacy_model_fail(): model_name = 'not_existing' with pytest.raises(LanguageModelNotFoundError): - model = model_loader.load_spacy(model_name) + _ = model_loader.load_spacy(model_name) @pytest.mark.mload