adaption for deployment structure
This commit is contained in:
parent
38aa0739ad
commit
123869e203
@ -123,6 +123,7 @@ exclude_also = [
|
||||
|
||||
[tool.coverage.html]
|
||||
directory = "reports/coverage"
|
||||
|
||||
[dependency-groups]
|
||||
dev = [
|
||||
"cython>=3.0.10",
|
||||
|
||||
@ -4,7 +4,7 @@ from lang_main.constants import (
|
||||
INPUT_PATH_FOLDER,
|
||||
PATH_TO_DATASET,
|
||||
SAVE_PATH_FOLDER,
|
||||
input_path_conf,
|
||||
input_path_cfg,
|
||||
)
|
||||
|
||||
print(SAVE_PATH_FOLDER, '\n')
|
||||
@ -12,4 +12,4 @@ print(INPUT_PATH_FOLDER, '\n')
|
||||
print(PATH_TO_DATASET, '\n')
|
||||
|
||||
print('------------------------')
|
||||
print(Path.cwd(), '\n', input_path_conf)
|
||||
print(Path.cwd(), '\n', input_path_cfg)
|
||||
|
||||
@ -6,7 +6,6 @@ from lang_main.config import (
|
||||
CONFIG_FILENAME,
|
||||
CYTO_STYLESHEET_FILENAME,
|
||||
PKG_DIR,
|
||||
PREFER_INTERNAL_CONFIG,
|
||||
STOP_FOLDER,
|
||||
get_config_paths,
|
||||
load_cfg,
|
||||
@ -23,13 +22,19 @@ CONFIG: Final[dict[str, Any]] = load_cfg(
|
||||
starting_path=PKG_DIR,
|
||||
glob_pattern=CONFIG_FILENAME,
|
||||
stop_folder_name=STOP_FOLDER,
|
||||
cfg_path_internal=cfg_path_internal,
|
||||
prefer_internal_config=PREFER_INTERNAL_CONFIG,
|
||||
)
|
||||
base_parent_path = search_base_path(PKG_DIR, stop_folder_name=BASE_FOLDERNAME)
|
||||
if base_parent_path is None:
|
||||
raise FileNotFoundError('Could not resolve base path of library')
|
||||
BASE_PATH: Final[Path] = base_parent_path
|
||||
|
||||
lib_path = search_base_path(PKG_DIR, stop_folder_name=STOP_FOLDER)
|
||||
if lib_path is None:
|
||||
raise FileNotFoundError('Could not resolve library path of application')
|
||||
LIB_PATH: Final[Path] = lib_path
|
||||
print(f'Library path is: {LIB_PATH}', flush=True)
|
||||
|
||||
root_path = search_base_path(PKG_DIR, stop_folder_name=BASE_FOLDERNAME)
|
||||
if root_path is None:
|
||||
raise FileNotFoundError('Could not resolve root path of application')
|
||||
ROOT_PATH: Final[Path] = root_path
|
||||
print(f'Root path is: {ROOT_PATH}', flush=True)
|
||||
|
||||
|
||||
# ** Cytoscape configuration
|
||||
|
||||
@ -2,11 +2,13 @@ from __future__ import annotations
|
||||
|
||||
import logging
|
||||
import os
|
||||
import sys
|
||||
import tomllib
|
||||
from pathlib import Path
|
||||
from typing import Any, Final
|
||||
|
||||
from lang_main.errors import LangMainConfigNotFoundError
|
||||
|
||||
# from lang_main.loggers import logger_config as logger
|
||||
from lang_main.search import search_cwd, search_iterative
|
||||
|
||||
_has_py4cyto: bool = True
|
||||
@ -29,10 +31,10 @@ if _has_py4cyto:
|
||||
p4c.py4cytoscape_logger.detail_logger.addHandler(logging.NullHandler())
|
||||
|
||||
# ** lang-main config
|
||||
BASE_FOLDERNAME: Final[str] = 'lang-main'
|
||||
# ENV variable: LANG_MAIN_BASE_FOLDERNAME
|
||||
BASE_FOLDERNAME: Final[str] = os.environ.get('LANG_MAIN_BASE_FOLDERNAME', 'lang-main')
|
||||
CONFIG_FILENAME: Final[str] = 'lang_main_config.toml'
|
||||
CYTO_STYLESHEET_FILENAME: Final[str] = r'cytoscape_config/lang_main.xml'
|
||||
PREFER_INTERNAL_CONFIG: Final[bool] = False
|
||||
PKG_DIR: Final[Path] = Path(__file__).parent
|
||||
STOP_FOLDER: Final[str] = 'python'
|
||||
|
||||
@ -42,7 +44,7 @@ def load_toml_config(
|
||||
) -> dict[str, Any]:
|
||||
with open(path_to_toml, 'rb') as f:
|
||||
data = tomllib.load(f)
|
||||
print('Loaded TOML config file successfully.', file=sys.stderr, flush=True)
|
||||
print('Loaded TOML config file successfully.', flush=True)
|
||||
|
||||
return data
|
||||
|
||||
@ -63,26 +65,54 @@ def load_cfg(
|
||||
starting_path: Path,
|
||||
glob_pattern: str,
|
||||
stop_folder_name: str | None,
|
||||
cfg_path_internal: Path,
|
||||
prefer_internal_config: bool = False,
|
||||
) -> dict[str, Any]:
|
||||
"""Look for configuration file. Internal configs are not used any more because
|
||||
the library behaviour is only guaranteed by external configurations.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
starting_path : Path
|
||||
path to start for the lookup
|
||||
glob_pattern : str
|
||||
pattern of the config file naming scheme
|
||||
stop_folder_name : str | None
|
||||
folder name at which the lookup should stop, the parent folder
|
||||
is also searched, e.g.
|
||||
if starting_path is path/to/start/folder and stop_folder_name is 'to',
|
||||
then path/ is also searched
|
||||
|
||||
Returns
|
||||
-------
|
||||
dict[str, Any]
|
||||
loaded config file
|
||||
|
||||
Raises
|
||||
------
|
||||
LangMainConfigNotFoundError
|
||||
if no config file was found
|
||||
"""
|
||||
cfg_path: Path | None
|
||||
# look for external config first, if not found use internal one
|
||||
if prefer_internal_config:
|
||||
cfg_path = cfg_path_internal
|
||||
else:
|
||||
print('Looking for cfg file in CWD.', flush=True)
|
||||
cfg_path = search_cwd(glob_pattern)
|
||||
|
||||
if cfg_path is None:
|
||||
print(
|
||||
(
|
||||
f'Looking iteratively for config file. Start: {starting_path}, '
|
||||
f'stop folder: {stop_folder_name}'
|
||||
),
|
||||
flush=True,
|
||||
)
|
||||
cfg_path = search_iterative(
|
||||
starting_path=starting_path,
|
||||
glob_pattern=glob_pattern,
|
||||
stop_folder_name=stop_folder_name,
|
||||
)
|
||||
# backup: use internal config
|
||||
|
||||
if cfg_path is None:
|
||||
cfg_path = cfg_path_internal
|
||||
raise LangMainConfigNotFoundError('Config file was not found.')
|
||||
|
||||
config = load_toml_config(path_to_toml=cfg_path)
|
||||
print(f'Loaded config from: >>{cfg_path}<<')
|
||||
|
||||
return config.copy()
|
||||
|
||||
@ -6,7 +6,11 @@ import os
|
||||
|
||||
from sentence_transformers import SimilarityFunction
|
||||
|
||||
from lang_main import CONFIG, CYTO_PATH_STYLESHEET, BASE_PATH
|
||||
from lang_main import (
|
||||
CONFIG,
|
||||
CYTO_PATH_STYLESHEET,
|
||||
LIB_PATH,
|
||||
)
|
||||
from lang_main.types import (
|
||||
CytoLayoutProperties,
|
||||
CytoLayouts,
|
||||
@ -47,15 +51,18 @@ LOGGING_DEFAULT_GRAPHS: Final[bool] = False
|
||||
PICKLE_PROTOCOL_VERSION: Final[int] = 5
|
||||
|
||||
# ** paths
|
||||
input_path_conf = Path.cwd() / Path(CONFIG['paths']['inputs'])
|
||||
INPUT_PATH_FOLDER: Final[Path] = input_path_conf.resolve()
|
||||
# config placed in library path of application (usually "bin")
|
||||
input_path_cfg = LIB_PATH / Path(CONFIG['paths']['inputs'])
|
||||
INPUT_PATH_FOLDER: Final[Path] = input_path_cfg.resolve()
|
||||
# TODO reactivate later
|
||||
# if not INPUT_PATH_FOLDER.exists():
|
||||
# raise FileNotFoundError(f'Input path >>{INPUT_PATH_FOLDER}<< does not exist.')
|
||||
save_path_conf = Path.cwd() / Path(CONFIG['paths']['results'])
|
||||
SAVE_PATH_FOLDER: Final[Path] = save_path_conf.resolve()
|
||||
path_dataset_conf = Path.cwd() / Path(CONFIG['paths']['dataset'])
|
||||
PATH_TO_DATASET: Final[Path] = path_dataset_conf.resolve()
|
||||
if not INPUT_PATH_FOLDER.exists():
|
||||
raise FileNotFoundError(f'Input path >>{INPUT_PATH_FOLDER}<< does not exist.')
|
||||
save_path_cfg = LIB_PATH / Path(CONFIG['paths']['results'])
|
||||
SAVE_PATH_FOLDER: Final[Path] = save_path_cfg.resolve()
|
||||
if not SAVE_PATH_FOLDER.exists():
|
||||
raise FileNotFoundError(f'Output path >>{SAVE_PATH_FOLDER}<< does not exist.')
|
||||
path_dataset_cfg = LIB_PATH / Path(CONFIG['paths']['dataset'])
|
||||
PATH_TO_DATASET: Final[Path] = path_dataset_cfg.resolve()
|
||||
# if not PATH_TO_DATASET.exists():
|
||||
# raise FileNotFoundError(f'Dataset path >>{PATH_TO_DATASET}<< does not exist.')
|
||||
# ** control
|
||||
@ -69,12 +76,13 @@ SKIP_TIME_ANALYSIS: Final[bool] = CONFIG['control']['time_analysis_skip']
|
||||
|
||||
# ** models
|
||||
# ** loading
|
||||
MODEL_BASE_FOLDER_NAME: Final[str] = 'lang-models'
|
||||
MODEL_BASE_FOLDER: Final[Path] = BASE_PATH / MODEL_BASE_FOLDER_NAME
|
||||
# MODEL_BASE_FOLDER_NAME: Final[str] = 'lang-models'
|
||||
model_folder_cfg = LIB_PATH / Path(CONFIG['paths']['models'])
|
||||
MODEL_BASE_FOLDER: Final[Path] = model_folder_cfg.resolve()
|
||||
if not MODEL_BASE_FOLDER.exists():
|
||||
raise FileNotFoundError('Language model folder not found.')
|
||||
os.environ['SENTENCE_TRANSFORMERS_HOME'] = str(MODEL_BASE_FOLDER)
|
||||
SPACY_MODEL_NAME: Final[SpacyModelTypes] = SpacyModelTypes.DE_DEP_NEWS_TRF
|
||||
SPACY_MODEL_NAME: Final[SpacyModelTypes] = SpacyModelTypes.DE_CORE_NEWS_SM
|
||||
STFR_MODEL_NAME: Final[STFRModelTypes] = STFRModelTypes.ALL_MPNET_BASE_V2
|
||||
STFR_DEVICE: Final[STFRDeviceTypes] = STFRDeviceTypes.CPU
|
||||
STFR_SIMILARITY: Final[SimilarityFunction] = SimilarityFunction.COSINE
|
||||
|
||||
@ -1,4 +1,8 @@
|
||||
# ** meta exceptions
|
||||
class LangMainConfigNotFoundError(Exception):
|
||||
"""Error raised if a config file could not be found successfully"""
|
||||
|
||||
|
||||
class LanguageModelNotFoundError(Exception):
|
||||
"""Error raised if a given language model could not be loaded successfully"""
|
||||
|
||||
|
||||
@ -3,11 +3,12 @@
|
||||
pkg = 'lang_main_internal'
|
||||
|
||||
[paths]
|
||||
inputs = './inputs/'
|
||||
inputs = './data/in/'
|
||||
# results = './results/dummy_N_1000/'
|
||||
# dataset = '../data/Dummy_Dataset_N_1000.csv'
|
||||
results = './results/test_20240807/'
|
||||
results = './data/out/'
|
||||
dataset = '../data/02_202307/Export4.csv'
|
||||
models = '../../lang-models'
|
||||
|
||||
[logging]
|
||||
enabled = true
|
||||
@ -17,11 +18,11 @@ file = true
|
||||
# only debugging features, production-ready pipelines should always
|
||||
# be fully executed
|
||||
[control]
|
||||
preprocessing_skip = true
|
||||
preprocessing_skip = false
|
||||
token_analysis_skip = false
|
||||
graph_postprocessing_skip = false
|
||||
graph_rescaling_skip = false
|
||||
graph_static_rendering_skip = false
|
||||
graph_static_rendering_skip = true
|
||||
time_analysis_skip = true
|
||||
|
||||
[preprocess]
|
||||
|
||||
@ -1,57 +0,0 @@
|
||||
# lang_main: Config file
|
||||
|
||||
[paths]
|
||||
inputs = './inputs/'
|
||||
results = './results/test_new2/'
|
||||
dataset = './01_2_Rohdaten_neu/Export4.csv'
|
||||
#results = './results/Export7/'
|
||||
#dataset = './01_03_Rohdaten_202403/Export7_59499_Zeilen.csv'
|
||||
#results = './results/Export7_trunc/'
|
||||
#dataset = './01_03_Rohdaten_202403/Export7_trunc.csv'
|
||||
|
||||
# only debugging features, production-ready pipelines should always
|
||||
# be fully executed
|
||||
[control]
|
||||
preprocessing_skip = false
|
||||
token_analysis_skip = false
|
||||
graph_postprocessing_skip = false
|
||||
time_analysis_skip = false
|
||||
|
||||
#[export_filenames]
|
||||
#filename_cossim_filter_candidates = 'CosSim-FilterCandidates'
|
||||
|
||||
[preprocess]
|
||||
filename_cossim_filter_candidates = 'CosSim-FilterCandidates'
|
||||
date_cols = [
|
||||
"VorgangsDatum",
|
||||
"ErledigungsDatum",
|
||||
"Arbeitsbeginn",
|
||||
"ErstellungsDatum",
|
||||
]
|
||||
threshold_amount_characters = 5
|
||||
threshold_similarity = 0.8
|
||||
|
||||
[graph_postprocessing]
|
||||
threshold_edge_weight = 150
|
||||
|
||||
[time_analysis.uniqueness]
|
||||
threshold_unique_texts = 4
|
||||
criterion_feature = 'HObjektText'
|
||||
feature_name_obj_id = 'ObjektID'
|
||||
|
||||
[time_analysis.model_input]
|
||||
# input_features = [
|
||||
# 'VorgangsTypName',
|
||||
# 'VorgangsArtText',
|
||||
# 'VorgangsBeschreibung',
|
||||
# ]
|
||||
input_features = [
|
||||
'VorgangsBeschreibung',
|
||||
]
|
||||
activity_feature = 'VorgangsTypName'
|
||||
activity_types = [
|
||||
'Reparaturauftrag (Portal)',
|
||||
'Störungsmeldung',
|
||||
]
|
||||
threshold_num_acitivities = 1
|
||||
threshold_similarity = 0.8
|
||||
@ -4,8 +4,8 @@ from pathlib import Path
|
||||
from time import gmtime
|
||||
from typing import Final
|
||||
|
||||
from lang_main import LIB_PATH
|
||||
from lang_main.constants import (
|
||||
BASE_PATH,
|
||||
ENABLE_LOGGING,
|
||||
LOGGING_TO_FILE,
|
||||
LOGGING_TO_STDERR,
|
||||
@ -16,11 +16,13 @@ from lang_main.types import LoggingLevels
|
||||
logging.Formatter.converter = gmtime
|
||||
LOG_FMT: Final[str] = '%(asctime)s | lang_main:%(module)s:%(levelname)s | %(message)s'
|
||||
LOG_DATE_FMT: Final[str] = '%Y-%m-%d %H:%M:%S +0000'
|
||||
LOG_FILE_FOLDER: Final[Path] = BASE_PATH / 'logs'
|
||||
LOG_FILE_FOLDER: Final[Path] = LIB_PATH / 'logs'
|
||||
if not LOG_FILE_FOLDER.exists():
|
||||
LOG_FILE_FOLDER.mkdir(parents=True)
|
||||
|
||||
LOG_FILE_PATH: Final[Path] = LOG_FILE_FOLDER / 'lang-main.log'
|
||||
LOGGING_LEVEL_STDERR: Final[LoggingLevels] = LoggingLevels.INFO
|
||||
LOGGING_LEVEL_FILE: Final[LoggingLevels] = LoggingLevels.DEBUG
|
||||
|
||||
# ** formatters
|
||||
logger_all_formater = logging.Formatter(fmt=LOG_FMT, datefmt=LOG_DATE_FMT)
|
||||
@ -29,7 +31,7 @@ logger_all_formater = logging.Formatter(fmt=LOG_FMT, datefmt=LOG_DATE_FMT)
|
||||
null_handler = logging.NullHandler()
|
||||
if ENABLE_LOGGING and LOGGING_TO_STDERR:
|
||||
logger_all_handler_stderr = logging.StreamHandler()
|
||||
logger_all_handler_stderr.setLevel(LoggingLevels.WARNING)
|
||||
logger_all_handler_stderr.setLevel(LOGGING_LEVEL_STDERR)
|
||||
logger_all_handler_stderr.setFormatter(logger_all_formater)
|
||||
else:
|
||||
logger_all_handler_stderr = null_handler
|
||||
@ -41,14 +43,13 @@ if ENABLE_LOGGING and LOGGING_TO_FILE:
|
||||
maxBytes=5_242_880,
|
||||
backupCount=1,
|
||||
)
|
||||
logger_all_handler_file.setLevel(LoggingLevels.DEBUG)
|
||||
logger_all_handler_file.setLevel(LOGGING_LEVEL_FILE)
|
||||
logger_all_handler_file.setFormatter(logger_all_formater)
|
||||
else:
|
||||
logger_all_handler_file = null_handler
|
||||
|
||||
|
||||
# ** logging levels
|
||||
LOGGING_LEVEL_ALL: Final[LoggingLevels] = LoggingLevels.DEBUG
|
||||
LOGGING_LEVEL_PREPROCESS: Final[LoggingLevels] = LoggingLevels.DEBUG
|
||||
LOGGING_LEVEL_PIPELINES: Final[LoggingLevels] = LoggingLevels.DEBUG
|
||||
LOGGING_LEVEL_GRAPHS: Final[LoggingLevels] = LoggingLevels.DEBUG
|
||||
@ -56,12 +57,15 @@ LOGGING_LEVEL_TIMELINE: Final[LoggingLevels] = LoggingLevels.DEBUG
|
||||
LOGGING_LEVEL_TOKEN_ANALYSIS: Final[LoggingLevels] = LoggingLevels.DEBUG
|
||||
LOGGING_LEVEL_SHARED_HELPERS: Final[LoggingLevels] = LoggingLevels.DEBUG
|
||||
LOGGING_LEVEL_RENDERING: Final[LoggingLevels] = LoggingLevels.DEBUG
|
||||
LOGGING_LEVEL_CONFIG: Final[LoggingLevels] = LoggingLevels.DEBUG
|
||||
|
||||
# ** loggers and configuration
|
||||
logger_all = logging.getLogger('lang_main')
|
||||
logger_all.addHandler(logger_all_handler_stderr)
|
||||
logger_all.addHandler(logger_all_handler_file)
|
||||
|
||||
logger_config = logging.getLogger('lang_main.config')
|
||||
logger_config.setLevel(LOGGING_LEVEL_CONFIG)
|
||||
logger_shared_helpers = logging.getLogger('lang_main.shared')
|
||||
logger_shared_helpers.setLevel(LOGGING_LEVEL_SHARED_HELPERS)
|
||||
logger_preprocess = logging.getLogger('lang_main.analysis.preprocessing')
|
||||
|
||||
@ -1,5 +1,5 @@
|
||||
from lang_main import BASE_PATH
|
||||
from lang_main import ROOT_PATH
|
||||
|
||||
|
||||
def test_base_path():
|
||||
assert BASE_PATH is not None
|
||||
assert ROOT_PATH is not None
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user