updated tests and config nox

This commit is contained in:
Florian Förster 2025-01-23 11:57:52 +01:00
parent 4aec6aa6c4
commit add8da4c5c
7 changed files with 114 additions and 17 deletions

View File

@ -1,9 +1,12 @@
# d-opt -- lang_main: config file
[paths]
inputs = '../lang-data/in/'
results = '../lang-data/out/'
models = '../lang-models'
inputs = './lang-data/in/'
results = './lang-data/out/'
models = './lang-models'
[models]
use_large_model = true
[logging]
enabled = true

56
lang_main_config_old.toml Normal file
View File

@ -0,0 +1,56 @@
# d-opt -- lang_main: config file
[paths]
inputs = '../lang-data/in/'
results = '../lang-data/out/'
models = '../lang-models'
[logging]
enabled = true
stderr = true
file = true
# control which pipelines are executed
[control]
preprocessing_skip = false
token_analysis_skip = false
graph_postprocessing_skip = false
graph_rescaling_skip = false
graph_static_rendering_skip = true
time_analysis_skip = true
[preprocess]
date_cols = [
"VorgangsDatum",
"ErledigungsDatum",
"Arbeitsbeginn",
"ErstellungsDatum",
]
target_feature = "VorgangsBeschreibung"
threshold_amount_characters = 5
threshold_similarity = 0.92
[graph_postprocessing]
max_edge_number = -1
[time_analysis.uniqueness]
threshold_unique_texts = 5
criterion_feature = 'HObjektText'
feature_name_obj_id = 'ObjektID'
feature_name_obj_text = 'HObjektText'
[time_analysis.preparation]
name_delta_feat_to_repair = 'Zeitspanne bis zur Behebung [Tage]'
name_delta_feat_to_next_failure = 'Zeitspanne bis zum nächsten Ereignis [Tage]'
[time_analysis.model_input]
input_features = [
'VorgangsBeschreibung',
]
activity_feature = 'VorgangsTypName'
activity_types = [
'Reparaturauftrag (Portal)',
'Störungsmeldung',
]
threshold_num_activities = 1
threshold_similarity = 0.8

41
noxfile.py Normal file
View File

@ -0,0 +1,41 @@
import os
import shlex
from typing import Final
import nox
from nox import Session
os.environ['PDM_IGNORE_SAVED_PYTHON'] = '1'
PYTHON_VERS: Final[list[str]] = ['3.11']
@nox.session(name='min', python=PYTHON_VERS, reuse_venv=True)
def tests_wo_models_cyto(session: Session) -> None:
"""Run all tests despite model loading and Cytoscape tests"""
cmd_raw = 'pdm install -G cytoscape,spacy-sm,spacy-md,spacy-trf -dG tests'
cmd = shlex.split(cmd_raw)
session.run_install(*cmd, external=True)
cmd_tests_raw = 'pytest -m "not mload and not cyto"'
cmd_tests = shlex.split(cmd_tests_raw)
session.run(*cmd_tests)
@nox.session(name='all', python=PYTHON_VERS, reuse_venv=True, default=False)
def tests_all(session: Session) -> None:
"""Run all tests, including Cytoscape Docker container"""
cmd_raw = 'pdm install -G cytoscape,spacy-sm,spacy-md,spacy-trf -dG tests'
cmd = shlex.split(cmd_raw)
session.run_install(*cmd, external=True)
cmd_tests_raw = 'pytest -n 4'
cmd_tests = shlex.split(cmd_tests_raw)
session.run(*cmd_tests)
session.run('docker', 'desktop', 'start', external=True)
session.run('docker', 'start', 'cyrest', external=True)
cmd_tests_raw = 'pytest -m "cyto"'
cmd_tests = shlex.split(cmd_tests_raw)
session.run(*cmd_tests)
session.run('docker', 'stop', 'cyrest', external=True)

View File

@ -36,7 +36,7 @@ BASE_FOLDERNAME: Final[str] = os.environ.get('LANG_MAIN_BASE_FOLDERNAME', 'lang-
CONFIG_FILENAME: Final[str] = 'lang_main_config.toml'
CYTO_STYLESHEET_FILENAME: Final[str] = r'cytoscape_config/lang_main.xml'
PKG_DIR: Final[Path] = Path(__file__).parent
STOP_FOLDER: Final[str] = os.environ.get('LANG_MAIN_STOP_SEARCH_FOLDERNAME', 'src')
STOP_FOLDER: Final[str] = os.environ.get('LANG_MAIN_STOP_SEARCH_FOLDERNAME', 'lang-main')
def load_toml_config(

View File

@ -115,11 +115,11 @@ def _preprocess_STFR_model_name(
# !! without any user names
folder_name = model_name.split('/')[-1]
model_path = MODEL_BASE_FOLDER / folder_name
if not model_path.exists():
if not model_path.exists(): # pragma: no cover
raise FileNotFoundError(
f'Target model >{model_name}< not found under {model_path}'
)
model_name_or_path = str(model_path) # pragma: no cover
model_name_or_path = str(model_path)
else:
model_name_or_path = model_name

View File

@ -52,7 +52,7 @@ def test_load_cfg_func(monkeypatch, tmp_path):
stop_folder_name=stop_folder,
lookup_cwd=False,
)
assert loaded_cfg['paths']['models'] == '../lang-models'
assert loaded_cfg['paths']['models'] == './lang-models'
loaded_cfg = config.load_cfg(
starting_path=pkg_dir,
@ -60,4 +60,4 @@ def test_load_cfg_func(monkeypatch, tmp_path):
stop_folder_name=stop_folder,
lookup_cwd=True,
)
assert loaded_cfg['paths']['models'] == '../lang-models'
assert loaded_cfg['paths']['models'] == './lang-models'

View File

@ -26,7 +26,7 @@ from lang_main.types import LanguageModels
'model_name',
[
STFRModelTypes.ALL_MINI_LM_L6_V2,
STFRModelTypes.ALL_MPNET_BASE_V2,
STFRModelTypes.ALL_MINI_LM_L12_V2,
],
)
@pytest.mark.mload
@ -58,10 +58,9 @@ def test_preprocess_STFR_model_name() -> None:
model_name_exist = STFRModelTypes.E5_BASE_STS_EN_DE
backend_exist = STFRBackends.ONNX
with pytest.raises(FileNotFoundError):
_ = model_loader._preprocess_STFR_model_name(
model_name=model_name_exist, backend=backend_exist, force_download=False
)
_ = model_loader._preprocess_STFR_model_name(
model_name=model_name_exist, backend=backend_exist, force_download=False
)
@pytest.mark.parametrize(
@ -75,7 +74,7 @@ def test_preprocess_STFR_model_name() -> None:
'model_name',
[
STFRModelTypes.ALL_MINI_LM_L6_V2,
STFRModelTypes.ALL_MPNET_BASE_V2,
STFRModelTypes.ALL_MINI_LM_L12_V2,
],
)
@pytest.mark.mload
@ -94,9 +93,7 @@ def test_load_sentence_transformer_onnx(model_name, similarity_func) -> None:
@pytest.mark.parametrize(
'model_name',
[
SpacyModelTypes.DE_CORE_NEWS_SM,
SpacyModelTypes.DE_CORE_NEWS_MD,
SpacyModelTypes.DE_CORE_NEWS_LG,
SpacyModelTypes.DE_DEP_NEWS_TRF,
],
)
@ -111,7 +108,7 @@ def test_load_spacy_model(model_name):
def test_load_spacy_model_fail():
model_name = 'not_existing'
with pytest.raises(LanguageModelNotFoundError):
model = model_loader.load_spacy(model_name)
_ = model_loader.load_spacy(model_name)
@pytest.mark.mload