adaption for deployment structure

This commit is contained in:
Florian Förster 2024-12-11 18:15:07 +01:00
parent 38aa0739ad
commit 123869e203
11 changed files with 106 additions and 110 deletions

0
build.py Normal file
View File

View File

@ -123,6 +123,7 @@ exclude_also = [
[tool.coverage.html] [tool.coverage.html]
directory = "reports/coverage" directory = "reports/coverage"
[dependency-groups] [dependency-groups]
dev = [ dev = [
"cython>=3.0.10", "cython>=3.0.10",

View File

@ -4,7 +4,7 @@ from lang_main.constants import (
INPUT_PATH_FOLDER, INPUT_PATH_FOLDER,
PATH_TO_DATASET, PATH_TO_DATASET,
SAVE_PATH_FOLDER, SAVE_PATH_FOLDER,
input_path_conf, input_path_cfg,
) )
print(SAVE_PATH_FOLDER, '\n') print(SAVE_PATH_FOLDER, '\n')
@ -12,4 +12,4 @@ print(INPUT_PATH_FOLDER, '\n')
print(PATH_TO_DATASET, '\n') print(PATH_TO_DATASET, '\n')
print('------------------------') print('------------------------')
print(Path.cwd(), '\n', input_path_conf) print(Path.cwd(), '\n', input_path_cfg)

View File

@ -6,7 +6,6 @@ from lang_main.config import (
CONFIG_FILENAME, CONFIG_FILENAME,
CYTO_STYLESHEET_FILENAME, CYTO_STYLESHEET_FILENAME,
PKG_DIR, PKG_DIR,
PREFER_INTERNAL_CONFIG,
STOP_FOLDER, STOP_FOLDER,
get_config_paths, get_config_paths,
load_cfg, load_cfg,
@ -23,13 +22,19 @@ CONFIG: Final[dict[str, Any]] = load_cfg(
starting_path=PKG_DIR, starting_path=PKG_DIR,
glob_pattern=CONFIG_FILENAME, glob_pattern=CONFIG_FILENAME,
stop_folder_name=STOP_FOLDER, stop_folder_name=STOP_FOLDER,
cfg_path_internal=cfg_path_internal,
prefer_internal_config=PREFER_INTERNAL_CONFIG,
) )
base_parent_path = search_base_path(PKG_DIR, stop_folder_name=BASE_FOLDERNAME)
if base_parent_path is None: lib_path = search_base_path(PKG_DIR, stop_folder_name=STOP_FOLDER)
raise FileNotFoundError('Could not resolve base path of library') if lib_path is None:
BASE_PATH: Final[Path] = base_parent_path raise FileNotFoundError('Could not resolve library path of application')
LIB_PATH: Final[Path] = lib_path
print(f'Library path is: {LIB_PATH}', flush=True)
root_path = search_base_path(PKG_DIR, stop_folder_name=BASE_FOLDERNAME)
if root_path is None:
raise FileNotFoundError('Could not resolve root path of application')
ROOT_PATH: Final[Path] = root_path
print(f'Root path is: {ROOT_PATH}', flush=True)
# ** Cytoscape configuration # ** Cytoscape configuration

View File

@ -2,11 +2,13 @@ from __future__ import annotations
import logging import logging
import os import os
import sys
import tomllib import tomllib
from pathlib import Path from pathlib import Path
from typing import Any, Final from typing import Any, Final
from lang_main.errors import LangMainConfigNotFoundError
# from lang_main.loggers import logger_config as logger
from lang_main.search import search_cwd, search_iterative from lang_main.search import search_cwd, search_iterative
_has_py4cyto: bool = True _has_py4cyto: bool = True
@ -29,10 +31,10 @@ if _has_py4cyto:
p4c.py4cytoscape_logger.detail_logger.addHandler(logging.NullHandler()) p4c.py4cytoscape_logger.detail_logger.addHandler(logging.NullHandler())
# ** lang-main config # ** lang-main config
BASE_FOLDERNAME: Final[str] = 'lang-main' # ENV variable: LANG_MAIN_BASE_FOLDERNAME
BASE_FOLDERNAME: Final[str] = os.environ.get('LANG_MAIN_BASE_FOLDERNAME', 'lang-main')
CONFIG_FILENAME: Final[str] = 'lang_main_config.toml' CONFIG_FILENAME: Final[str] = 'lang_main_config.toml'
CYTO_STYLESHEET_FILENAME: Final[str] = r'cytoscape_config/lang_main.xml' CYTO_STYLESHEET_FILENAME: Final[str] = r'cytoscape_config/lang_main.xml'
PREFER_INTERNAL_CONFIG: Final[bool] = False
PKG_DIR: Final[Path] = Path(__file__).parent PKG_DIR: Final[Path] = Path(__file__).parent
STOP_FOLDER: Final[str] = 'python' STOP_FOLDER: Final[str] = 'python'
@ -42,7 +44,7 @@ def load_toml_config(
) -> dict[str, Any]: ) -> dict[str, Any]:
with open(path_to_toml, 'rb') as f: with open(path_to_toml, 'rb') as f:
data = tomllib.load(f) data = tomllib.load(f)
print('Loaded TOML config file successfully.', file=sys.stderr, flush=True) print('Loaded TOML config file successfully.', flush=True)
return data return data
@ -63,26 +65,54 @@ def load_cfg(
starting_path: Path, starting_path: Path,
glob_pattern: str, glob_pattern: str,
stop_folder_name: str | None, stop_folder_name: str | None,
cfg_path_internal: Path,
prefer_internal_config: bool = False,
) -> dict[str, Any]: ) -> dict[str, Any]:
cfg_path: Path | None """Look for configuration file. Internal configs are not used any more because
# look for external config first, if not found use internal one the library behaviour is only guaranteed by external configurations.
if prefer_internal_config:
cfg_path = cfg_path_internal
else:
cfg_path = search_cwd(glob_pattern)
if cfg_path is None: Parameters
cfg_path = search_iterative( ----------
starting_path=starting_path, starting_path : Path
glob_pattern=glob_pattern, path to start for the lookup
stop_folder_name=stop_folder_name, glob_pattern : str
) pattern of the config file naming scheme
# backup: use internal config stop_folder_name : str | None
if cfg_path is None: folder name at which the lookup should stop, the parent folder
cfg_path = cfg_path_internal is also searched, e.g.
if starting_path is path/to/start/folder and stop_folder_name is 'to',
then path/ is also searched
Returns
-------
dict[str, Any]
loaded config file
Raises
------
LangMainConfigNotFoundError
if no config file was found
"""
cfg_path: Path | None
print('Looking for cfg file in CWD.', flush=True)
cfg_path = search_cwd(glob_pattern)
if cfg_path is None:
print(
(
f'Looking iteratively for config file. Start: {starting_path}, '
f'stop folder: {stop_folder_name}'
),
flush=True,
)
cfg_path = search_iterative(
starting_path=starting_path,
glob_pattern=glob_pattern,
stop_folder_name=stop_folder_name,
)
if cfg_path is None:
raise LangMainConfigNotFoundError('Config file was not found.')
config = load_toml_config(path_to_toml=cfg_path) config = load_toml_config(path_to_toml=cfg_path)
print(f'Loaded config from: >>{cfg_path}<<')
return config.copy() return config.copy()

View File

@ -6,7 +6,11 @@ import os
from sentence_transformers import SimilarityFunction from sentence_transformers import SimilarityFunction
from lang_main import CONFIG, CYTO_PATH_STYLESHEET, BASE_PATH from lang_main import (
CONFIG,
CYTO_PATH_STYLESHEET,
LIB_PATH,
)
from lang_main.types import ( from lang_main.types import (
CytoLayoutProperties, CytoLayoutProperties,
CytoLayouts, CytoLayouts,
@ -47,15 +51,18 @@ LOGGING_DEFAULT_GRAPHS: Final[bool] = False
PICKLE_PROTOCOL_VERSION: Final[int] = 5 PICKLE_PROTOCOL_VERSION: Final[int] = 5
# ** paths # ** paths
input_path_conf = Path.cwd() / Path(CONFIG['paths']['inputs']) # config placed in library path of application (usually "bin")
INPUT_PATH_FOLDER: Final[Path] = input_path_conf.resolve() input_path_cfg = LIB_PATH / Path(CONFIG['paths']['inputs'])
INPUT_PATH_FOLDER: Final[Path] = input_path_cfg.resolve()
# TODO reactivate later # TODO reactivate later
# if not INPUT_PATH_FOLDER.exists(): if not INPUT_PATH_FOLDER.exists():
# raise FileNotFoundError(f'Input path >>{INPUT_PATH_FOLDER}<< does not exist.') raise FileNotFoundError(f'Input path >>{INPUT_PATH_FOLDER}<< does not exist.')
save_path_conf = Path.cwd() / Path(CONFIG['paths']['results']) save_path_cfg = LIB_PATH / Path(CONFIG['paths']['results'])
SAVE_PATH_FOLDER: Final[Path] = save_path_conf.resolve() SAVE_PATH_FOLDER: Final[Path] = save_path_cfg.resolve()
path_dataset_conf = Path.cwd() / Path(CONFIG['paths']['dataset']) if not SAVE_PATH_FOLDER.exists():
PATH_TO_DATASET: Final[Path] = path_dataset_conf.resolve() raise FileNotFoundError(f'Output path >>{SAVE_PATH_FOLDER}<< does not exist.')
path_dataset_cfg = LIB_PATH / Path(CONFIG['paths']['dataset'])
PATH_TO_DATASET: Final[Path] = path_dataset_cfg.resolve()
# if not PATH_TO_DATASET.exists(): # if not PATH_TO_DATASET.exists():
# raise FileNotFoundError(f'Dataset path >>{PATH_TO_DATASET}<< does not exist.') # raise FileNotFoundError(f'Dataset path >>{PATH_TO_DATASET}<< does not exist.')
# ** control # ** control
@ -69,12 +76,13 @@ SKIP_TIME_ANALYSIS: Final[bool] = CONFIG['control']['time_analysis_skip']
# ** models # ** models
# ** loading # ** loading
MODEL_BASE_FOLDER_NAME: Final[str] = 'lang-models' # MODEL_BASE_FOLDER_NAME: Final[str] = 'lang-models'
MODEL_BASE_FOLDER: Final[Path] = BASE_PATH / MODEL_BASE_FOLDER_NAME model_folder_cfg = LIB_PATH / Path(CONFIG['paths']['models'])
MODEL_BASE_FOLDER: Final[Path] = model_folder_cfg.resolve()
if not MODEL_BASE_FOLDER.exists(): if not MODEL_BASE_FOLDER.exists():
raise FileNotFoundError('Language model folder not found.') raise FileNotFoundError('Language model folder not found.')
os.environ['SENTENCE_TRANSFORMERS_HOME'] = str(MODEL_BASE_FOLDER) os.environ['SENTENCE_TRANSFORMERS_HOME'] = str(MODEL_BASE_FOLDER)
SPACY_MODEL_NAME: Final[SpacyModelTypes] = SpacyModelTypes.DE_DEP_NEWS_TRF SPACY_MODEL_NAME: Final[SpacyModelTypes] = SpacyModelTypes.DE_CORE_NEWS_SM
STFR_MODEL_NAME: Final[STFRModelTypes] = STFRModelTypes.ALL_MPNET_BASE_V2 STFR_MODEL_NAME: Final[STFRModelTypes] = STFRModelTypes.ALL_MPNET_BASE_V2
STFR_DEVICE: Final[STFRDeviceTypes] = STFRDeviceTypes.CPU STFR_DEVICE: Final[STFRDeviceTypes] = STFRDeviceTypes.CPU
STFR_SIMILARITY: Final[SimilarityFunction] = SimilarityFunction.COSINE STFR_SIMILARITY: Final[SimilarityFunction] = SimilarityFunction.COSINE

View File

@ -1,4 +1,8 @@
# ** meta exceptions # ** meta exceptions
class LangMainConfigNotFoundError(Exception):
"""Error raised if a config file could not be found successfully"""
class LanguageModelNotFoundError(Exception): class LanguageModelNotFoundError(Exception):
"""Error raised if a given language model could not be loaded successfully""" """Error raised if a given language model could not be loaded successfully"""

View File

@ -3,11 +3,12 @@
pkg = 'lang_main_internal' pkg = 'lang_main_internal'
[paths] [paths]
inputs = './inputs/' inputs = './data/in/'
# results = './results/dummy_N_1000/' # results = './results/dummy_N_1000/'
# dataset = '../data/Dummy_Dataset_N_1000.csv' # dataset = '../data/Dummy_Dataset_N_1000.csv'
results = './results/test_20240807/' results = './data/out/'
dataset = '../data/02_202307/Export4.csv' dataset = '../data/02_202307/Export4.csv'
models = '../../lang-models'
[logging] [logging]
enabled = true enabled = true
@ -17,11 +18,11 @@ file = true
# only debugging features, production-ready pipelines should always # only debugging features, production-ready pipelines should always
# be fully executed # be fully executed
[control] [control]
preprocessing_skip = true preprocessing_skip = false
token_analysis_skip = false token_analysis_skip = false
graph_postprocessing_skip = false graph_postprocessing_skip = false
graph_rescaling_skip = false graph_rescaling_skip = false
graph_static_rendering_skip = false graph_static_rendering_skip = true
time_analysis_skip = true time_analysis_skip = true
[preprocess] [preprocess]

View File

@ -1,57 +0,0 @@
# lang_main: Config file
[paths]
inputs = './inputs/'
results = './results/test_new2/'
dataset = './01_2_Rohdaten_neu/Export4.csv'
#results = './results/Export7/'
#dataset = './01_03_Rohdaten_202403/Export7_59499_Zeilen.csv'
#results = './results/Export7_trunc/'
#dataset = './01_03_Rohdaten_202403/Export7_trunc.csv'
# only debugging features, production-ready pipelines should always
# be fully executed
[control]
preprocessing_skip = false
token_analysis_skip = false
graph_postprocessing_skip = false
time_analysis_skip = false
#[export_filenames]
#filename_cossim_filter_candidates = 'CosSim-FilterCandidates'
[preprocess]
filename_cossim_filter_candidates = 'CosSim-FilterCandidates'
date_cols = [
"VorgangsDatum",
"ErledigungsDatum",
"Arbeitsbeginn",
"ErstellungsDatum",
]
threshold_amount_characters = 5
threshold_similarity = 0.8
[graph_postprocessing]
threshold_edge_weight = 150
[time_analysis.uniqueness]
threshold_unique_texts = 4
criterion_feature = 'HObjektText'
feature_name_obj_id = 'ObjektID'
[time_analysis.model_input]
# input_features = [
# 'VorgangsTypName',
# 'VorgangsArtText',
# 'VorgangsBeschreibung',
# ]
input_features = [
'VorgangsBeschreibung',
]
activity_feature = 'VorgangsTypName'
activity_types = [
'Reparaturauftrag (Portal)',
'Störungsmeldung',
]
threshold_num_acitivities = 1
threshold_similarity = 0.8

View File

@ -4,8 +4,8 @@ from pathlib import Path
from time import gmtime from time import gmtime
from typing import Final from typing import Final
from lang_main import LIB_PATH
from lang_main.constants import ( from lang_main.constants import (
BASE_PATH,
ENABLE_LOGGING, ENABLE_LOGGING,
LOGGING_TO_FILE, LOGGING_TO_FILE,
LOGGING_TO_STDERR, LOGGING_TO_STDERR,
@ -16,11 +16,13 @@ from lang_main.types import LoggingLevels
logging.Formatter.converter = gmtime logging.Formatter.converter = gmtime
LOG_FMT: Final[str] = '%(asctime)s | lang_main:%(module)s:%(levelname)s | %(message)s' LOG_FMT: Final[str] = '%(asctime)s | lang_main:%(module)s:%(levelname)s | %(message)s'
LOG_DATE_FMT: Final[str] = '%Y-%m-%d %H:%M:%S +0000' LOG_DATE_FMT: Final[str] = '%Y-%m-%d %H:%M:%S +0000'
LOG_FILE_FOLDER: Final[Path] = BASE_PATH / 'logs' LOG_FILE_FOLDER: Final[Path] = LIB_PATH / 'logs'
if not LOG_FILE_FOLDER.exists(): if not LOG_FILE_FOLDER.exists():
LOG_FILE_FOLDER.mkdir(parents=True) LOG_FILE_FOLDER.mkdir(parents=True)
LOG_FILE_PATH: Final[Path] = LOG_FILE_FOLDER / 'lang-main.log' LOG_FILE_PATH: Final[Path] = LOG_FILE_FOLDER / 'lang-main.log'
LOGGING_LEVEL_STDERR: Final[LoggingLevels] = LoggingLevels.INFO
LOGGING_LEVEL_FILE: Final[LoggingLevels] = LoggingLevels.DEBUG
# ** formatters # ** formatters
logger_all_formater = logging.Formatter(fmt=LOG_FMT, datefmt=LOG_DATE_FMT) logger_all_formater = logging.Formatter(fmt=LOG_FMT, datefmt=LOG_DATE_FMT)
@ -29,7 +31,7 @@ logger_all_formater = logging.Formatter(fmt=LOG_FMT, datefmt=LOG_DATE_FMT)
null_handler = logging.NullHandler() null_handler = logging.NullHandler()
if ENABLE_LOGGING and LOGGING_TO_STDERR: if ENABLE_LOGGING and LOGGING_TO_STDERR:
logger_all_handler_stderr = logging.StreamHandler() logger_all_handler_stderr = logging.StreamHandler()
logger_all_handler_stderr.setLevel(LoggingLevels.WARNING) logger_all_handler_stderr.setLevel(LOGGING_LEVEL_STDERR)
logger_all_handler_stderr.setFormatter(logger_all_formater) logger_all_handler_stderr.setFormatter(logger_all_formater)
else: else:
logger_all_handler_stderr = null_handler logger_all_handler_stderr = null_handler
@ -41,14 +43,13 @@ if ENABLE_LOGGING and LOGGING_TO_FILE:
maxBytes=5_242_880, maxBytes=5_242_880,
backupCount=1, backupCount=1,
) )
logger_all_handler_file.setLevel(LoggingLevels.DEBUG) logger_all_handler_file.setLevel(LOGGING_LEVEL_FILE)
logger_all_handler_file.setFormatter(logger_all_formater) logger_all_handler_file.setFormatter(logger_all_formater)
else: else:
logger_all_handler_file = null_handler logger_all_handler_file = null_handler
# ** logging levels # ** logging levels
LOGGING_LEVEL_ALL: Final[LoggingLevels] = LoggingLevels.DEBUG
LOGGING_LEVEL_PREPROCESS: Final[LoggingLevels] = LoggingLevels.DEBUG LOGGING_LEVEL_PREPROCESS: Final[LoggingLevels] = LoggingLevels.DEBUG
LOGGING_LEVEL_PIPELINES: Final[LoggingLevels] = LoggingLevels.DEBUG LOGGING_LEVEL_PIPELINES: Final[LoggingLevels] = LoggingLevels.DEBUG
LOGGING_LEVEL_GRAPHS: Final[LoggingLevels] = LoggingLevels.DEBUG LOGGING_LEVEL_GRAPHS: Final[LoggingLevels] = LoggingLevels.DEBUG
@ -56,12 +57,15 @@ LOGGING_LEVEL_TIMELINE: Final[LoggingLevels] = LoggingLevels.DEBUG
LOGGING_LEVEL_TOKEN_ANALYSIS: Final[LoggingLevels] = LoggingLevels.DEBUG LOGGING_LEVEL_TOKEN_ANALYSIS: Final[LoggingLevels] = LoggingLevels.DEBUG
LOGGING_LEVEL_SHARED_HELPERS: Final[LoggingLevels] = LoggingLevels.DEBUG LOGGING_LEVEL_SHARED_HELPERS: Final[LoggingLevels] = LoggingLevels.DEBUG
LOGGING_LEVEL_RENDERING: Final[LoggingLevels] = LoggingLevels.DEBUG LOGGING_LEVEL_RENDERING: Final[LoggingLevels] = LoggingLevels.DEBUG
LOGGING_LEVEL_CONFIG: Final[LoggingLevels] = LoggingLevels.DEBUG
# ** loggers and configuration # ** loggers and configuration
logger_all = logging.getLogger('lang_main') logger_all = logging.getLogger('lang_main')
logger_all.addHandler(logger_all_handler_stderr) logger_all.addHandler(logger_all_handler_stderr)
logger_all.addHandler(logger_all_handler_file) logger_all.addHandler(logger_all_handler_file)
logger_config = logging.getLogger('lang_main.config')
logger_config.setLevel(LOGGING_LEVEL_CONFIG)
logger_shared_helpers = logging.getLogger('lang_main.shared') logger_shared_helpers = logging.getLogger('lang_main.shared')
logger_shared_helpers.setLevel(LOGGING_LEVEL_SHARED_HELPERS) logger_shared_helpers.setLevel(LOGGING_LEVEL_SHARED_HELPERS)
logger_preprocess = logging.getLogger('lang_main.analysis.preprocessing') logger_preprocess = logging.getLogger('lang_main.analysis.preprocessing')

View File

@ -1,5 +1,5 @@
from lang_main import BASE_PATH from lang_main import ROOT_PATH
def test_base_path(): def test_base_path():
assert BASE_PATH is not None assert ROOT_PATH is not None