diff --git a/src/lang_main/constants.py b/src/lang_main/constants.py index b0842d4..a19c92f 100644 --- a/src/lang_main/constants.py +++ b/src/lang_main/constants.py @@ -72,7 +72,6 @@ SKIP_TIME_ANALYSIS: Final[bool] = CONFIG['control']['time_analysis_skip'] # ** models # ** loading -# MODEL_BASE_FOLDER_NAME: Final[str] = 'lang-models' model_folder_cfg = LIB_PATH / Path(CONFIG['paths']['models']) MODEL_BASE_FOLDER: Final[Path] = model_folder_cfg.resolve() if not MODEL_BASE_FOLDER.exists(): @@ -84,9 +83,18 @@ os.environ['SENTENCE_TRANSFORMERS_HOME'] = str(MODEL_BASE_FOLDER) # LANG_MAIN_STFR_MODEL : Sentence Transformer model used; if not provided, use constant value defined in library; more internal use # LANG_MAIN_STFR_BACKEND : STFR backend, choice between "torch" and "onnx" -SPACY_MODEL_NAME: Final[str | SpacyModelTypes] = os.environ.get( - 'LANG_MAIN_SPACY_MODEL', SpacyModelTypes.DE_DEP_NEWS_TRF -) +# config option for switch between spaCy TRF und medium model +# env variable has prio: if set, use this +SPACY_USE_LARGE_MODEL: Final[bool] = CONFIG['models']['use_large_model'] +spacy_model_name: str | SpacyModelTypes | None +spacy_model_name = os.environ.get('LANG_MAIN_SPACY_MODEL', None) +if spacy_model_name is None: + if SPACY_USE_LARGE_MODEL: + spacy_model_name = SpacyModelTypes.DE_DEP_NEWS_TRF + else: + spacy_model_name = SpacyModelTypes.DE_CORE_NEWS_MD + +SPACY_MODEL_NAME: Final[str | SpacyModelTypes] = spacy_model_name STFR_MODEL_NAME: Final[str | STFRModelTypes] = os.environ.get( 'LANG_MAIN_STFR_MODEL', STFRModelTypes.E5_BASE_STS_EN_DE ) diff --git a/src/lang_main/lang_main_config.toml b/src/lang_main/lang_main_config.toml index 278ecf7..258d08b 100644 --- a/src/lang_main/lang_main_config.toml +++ b/src/lang_main/lang_main_config.toml @@ -5,6 +5,9 @@ inputs = '../data/in/' results = '../data/out/' models = './lang-models' +[models] +use_large_model = true + [logging] enabled = true stderr = true