major changes in several aspects

2024-11-07 17:30:33 +01:00 · 2024-11-07 17:30:33 +01:00 · a0ca71ea87
commit a0ca71ea87
parent 27d40d5c99
22 changed files with 1628 additions and 479 deletions
--- a/lang-main.code-workspace
+++ b/lang-main.code-workspace
@ -0,0 +1,8 @@
 {
 	"folders": [
 		{
 			"path": "."
 		}
 	],
 	"settings": {}
 }
--- a/notebooks/dummy_data_generation.ipynb
+++ b/notebooks/dummy_data_generation.ipynb
@ -1236,7 +1236,7 @@
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
-   "version": "3.11.9"
+   "version": "3.11.10"
  }
 },
 "nbformat": 4,
--- a/notebooks/misc.ipynb
+++ b/notebooks/misc.ipynb
@ -11,6 +11,251 @@
    "%autoreload 2"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "6e1c76cc-6f99-484a-b73c-c99d9d567bbd",
   "metadata": {},
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": 10,
   "id": "f34b343b-a7b6-4a6d-8db4-7f2f71addc54",
   "metadata": {},
   "outputs": [],
   "source": [
    "from importlib.util import find_spec"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 11,
   "id": "59c32d49-5bad-4f48-b91f-f069487ff543",
   "metadata": {},
   "outputs": [],
   "source": [
    "importlib.util.find_spec('nunpy')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 14,
   "id": "dd3d9434-374d-42a4-9c08-230495d0f5a6",
   "metadata": {},
   "outputs": [],
   "source": [
    "_has_dash: bool = True if (find_spec('dash') and find_spec('kalido')) else False"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 15,
   "id": "4da22bea-d086-4097-86f6-37784b59e02b",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "False"
      ]
     },
     "execution_count": 15,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "_has_dash"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 9,
   "id": "dfc51145-759e-43ee-a850-580149679c5b",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "False"
      ]
     },
     "execution_count": 9,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "_has_py4cyto"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "dbdbc7c1-4dd6-49d6-b570-3fc7eb60938f",
   "metadata": {},
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": 18,
   "id": "36aa5c9c-c29a-4a05-8854-30b086a9240e",
   "metadata": {},
   "outputs": [],
   "source": [
    "from pathlib import Path"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 20,
   "id": "b8afbf49-e61c-49ce-af53-694b01f90702",
   "metadata": {},
   "outputs": [],
   "source": [
    "curr_path = Path.cwd()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 28,
   "id": "c353967a-2702-4370-b4f2-914ac3b950a3",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "[]"
      ]
     },
     "execution_count": 28,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "list(curr_path.glob('./lang_main_consdfig.toml'))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 27,
   "id": "48ee9b11-60fa-43d4-87de-c9a2172a563b",
   "metadata": {},
   "outputs": [],
   "source": [
    "pkg_dir = curr_path"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 36,
   "id": "02d404e1-a352-44f6-a4e9-96b6aed0c3a3",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "WindowsPath('A:/Arbeitsaufgaben/lang-main/notebooks')"
      ]
     },
     "execution_count": 36,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "pkg_dir"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 44,
   "id": "fc9142c2-9e59-4113-811c-7a03068857f2",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "[WindowsPath('A:/Arbeitsaufgaben/lang-main/notebooks'),\n",
       " WindowsPath('A:/Arbeitsaufgaben/lang-main'),\n",
       " WindowsPath('A:/Arbeitsaufgaben'),\n",
       " WindowsPath('A:/')]"
      ]
     },
     "execution_count": 44,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "list((pkg_dir/'home.py').parents)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 40,
   "id": "ac1312bf-332f-4008-8951-c53e35f37990",
   "metadata": {},
   "outputs": [],
   "source": [
    "cfg_found = False\n",
    "for it in range(len(pkg_dir.parents)):\n",
    "    search_path = pkg_dir.parents[it]\n",
    "    res = tuple(search_path.glob(f'lang_main*.toml'))\n",
    "    if res:\n",
    "        cfg_found = True\n",
    "        target = res[0]\n",
    "        break\n",
    "    if search_path.name == 'python':\n",
    "        break"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 41,
   "id": "a9bb96cc-f8a7-4749-ae2b-62e363d18f54",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "True"
      ]
     },
     "execution_count": 41,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "cfg_found"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 30,
   "id": "58472aba-e887-4c50-857a-894c1c5c9003",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "WindowsPath('A:/Arbeitsaufgaben/lang-main/lang_main_config.toml')"
      ]
     },
     "execution_count": 30,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "target"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
@ -11562,7 +11807,7 @@
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
-   "version": "3.11.9"
+   "version": "3.11.10"
  }
 },
 "nbformat": 4,
--- a/pdm.lock
+++ b/pdm.lock
--- a/pyproject.toml
+++ b/pyproject.toml
@ -8,21 +8,31 @@ authors = [
 dependencies = [
    "pandas>=2.2.2",
    "networkx>=3.3",
-    "spacy[lookups,transformers]>=3.7.4",
+    "spacy>=3.7.4",
-    "sentence-transformers>=2.7.0",
+    "sentence-transformers[onnx]>=3.2.0",
-    "numpy<=1.26.4",
+    "numpy>=1.26.4",
    "pip>=24.0",
    "typing-extensions>=4.12.2",
-    "plotly>=5.22.0",
+    "tqdm>=4.67.0",
-    "dash>=2.17.0",
+    "python-dateutil>=2.9.0.post0",
    "dash-cytoscape>=1.0.1",
    "py4cytoscape>=1.9.0",
    "kaleido==0.2.1",
 ]
 requires-python = ">=3.11"
 readme = "README.md"
 license = {text = "LicenseRef-Proprietary"}
 [project.optional-dependencies]
 dash = [
    "dash-cytoscape>=1.0.2",
    "dash>=2.18.2",
    "kaleido==0.2.1",
 ]
 plot = [
    "kaleido==0.2.1",
    "plotly>=5.24.1",
 ]
 cytoscape = [
    "py4cytoscape>=1.11.0",
 ]
 [build-system]
 requires = ["pdm-backend"]
 build-backend = "pdm.backend"
--- a/scripts/dash_timeline_static.py
+++ b/scripts/dash_timeline_static.py
@ -1,9 +1,9 @@
 import time
 import webbrowser
 from collections.abc import Collection, Iterable
 from pathlib import Path
 from threading import Thread
 from typing import Any, Final, cast
 from pathlib import Path
 # import dash_cytoscape as cyto
 import plotly.express as px
--- a/scripts/lang_main_config.old.toml
+++ b/scripts/lang_main_config.old.toml
@ -0,0 +1,58 @@
 # lang_main: Config file
 [paths]
 inputs = './inputs/'
 # results = './results/dummy_N_1000/'
 # dataset = '../data/Dummy_Dataset_N_1000.csv'
 results = './results/test_20240807/'
 dataset = '../data/02_202307/Export4.csv'
 # only debugging features, production-ready pipelines should always
 # be fully executed
 [control]
 preprocessing_skip = true
 token_analysis_skip = true
 graph_postprocessing_skip = false
 graph_rescaling_skip = false
 graph_static_rendering_skip = true
 time_analysis_skip = true
 [preprocess]
 date_cols = [
    "VorgangsDatum",
    "ErledigungsDatum",
    "Arbeitsbeginn",
    "ErstellungsDatum",
 ]
 threshold_amount_characters = 5
 threshold_similarity = 0.8
 [graph_postprocessing]
 threshold_edge_number = 330
 # threshold_edge_weight = 150
 [time_analysis.uniqueness]
 threshold_unique_texts = 4
 criterion_feature = 'HObjektText'
 feature_name_obj_id = 'ObjektID'
 [time_analysis.preparation]
 name_delta_feat_to_repair = 'Zeitspanne bis zur Behebung [Tage]'
 name_delta_feat_to_next_failure = 'Zeitspanne bis zum nächsten Ereignis [Tage]'
 [time_analysis.model_input]
 # input_features = [
 #     'VorgangsTypName',
 #     'VorgangsArtText',
 #     'VorgangsBeschreibung',
 # ]
 input_features = [
    'VorgangsBeschreibung',
 ]
 activity_feature = 'VorgangsTypName'
 activity_types = [
    'Reparaturauftrag (Portal)',
    'Störungsmeldung',
 ]
 threshold_num_acitivities = 1
 threshold_similarity = 0.8
--- a/scripts/lang_main_config.toml
+++ b/scripts/lang_main_config.toml
@ -7,14 +7,19 @@ inputs = './inputs/'
 results = './results/test_20240807/'
 dataset = '../data/02_202307/Export4.csv'
 [logging]
 enabled = true
 stderr = true
 file = true
 # only debugging features, production-ready pipelines should always
 # be fully executed
 [control]
-preprocessing_skip = true
+preprocessing_skip = false
 token_analysis_skip = true
-graph_postprocessing_skip = false
+graph_postprocessing_skip = true
-graph_rescaling_skip = false
+graph_rescaling_skip = true
-graph_static_rendering_skip = false
+graph_static_rendering_skip = true
 time_analysis_skip = true
 [preprocess]
--- a/src/lang_main/init.py
+++ b/src/lang_main/init.py
@ -1,56 +1,127 @@
 import logging
 import os
 import shutil
 import sys
 from pathlib import Path
 from time import gmtime
 from typing import Any, Final
-import py4cytoscape as p4c
+_has_py4cyto: bool = True
 try:
    import py4cytoscape as p4c
 except ImportError:
    _has_py4cyto = False
 from lang_main.io import load_toml_config
 # ** py4cytoscape config
-p4c.set_summary_logger(False)
+if _has_py4cyto:
-p4c.py4cytoscape_logger.detail_logger.setLevel('ERROR')
+    p4c.set_summary_logger(False)
-p4c.py4cytoscape_logger.detail_logger.removeHandler(p4c.py4cytoscape_logger.detail_handler)
+    p4c.py4cytoscape_logger.detail_logger.setLevel('ERROR')
-p4c.py4cytoscape_logger.detail_logger.addHandler(logging.NullHandler())
+    p4c.py4cytoscape_logger.detail_logger.removeHandler(
        p4c.py4cytoscape_logger.detail_handler
    )
    p4c.py4cytoscape_logger.detail_logger.addHandler(logging.NullHandler())
 # ** lang-main config
 logging.Formatter.converter = gmtime
 LOG_FMT: Final[str] = '%(asctime)s | lang_main:%(module)s:%(levelname)s | %(message)s'
 LOG_DATE_FMT: Final[str] = '%Y-%m-%d %H:%M:%S +0000'
 logging.basicConfig(
    stream=sys.stdout,
    format=LOG_FMT,
    datefmt=LOG_DATE_FMT,
 )
 CONFIG_FILENAME: Final[str] = 'lang_main_config.toml'
 CYTO_STYLESHEET_FILENAME: Final[str] = r'cytoscape_config/lang_main.xml'
-USE_INTERNAL_CONFIG: Final[bool] = False
+PREFER_INTERNAL_CONFIG: Final[bool] = False
 pkg_dir = Path(__file__).parent
 cfg_path_internal = (pkg_dir / CONFIG_FILENAME).resolve()
 cyto_stylesheet_path = (pkg_dir / CYTO_STYLESHEET_FILENAME).resolve()
 # ** load config data: internal/external
 if USE_INTERNAL_CONFIG:
    loaded_cfg = load_toml_config(path_to_toml=cfg_path_internal)
 else:
    cfg_path_external = (Path.cwd() / CONFIG_FILENAME).resolve()
    if not cfg_path_external.exists():
        shutil.copy(cfg_path_internal, cfg_path_external)
        sys.exit(
            (
                'No config file was found. A new one with default values was created '
                'in the execution path. Please fill in the necessary values and '
                'restart the programm.'
            )
        )
    # raise NotImplementedError("External config data not implemented yet.")
    loaded_cfg = load_toml_config(path_to_toml=cfg_path_external)
-CONFIG: Final[dict[str, Any]] = loaded_cfg.copy()
+# ** load config data: internal/external
 # look for external config first, if not found use internal one
 def search_cwd(
    glob_pattern: str = CONFIG_FILENAME,
 ) -> Path | None:
    """Searches the current working directory and looks for files
    matching the glob pattern.
    Returns the first match encountered.
    Parameters
    ----------
    glob_pattern : str, optional
        pattern to look for, first match will be returned,
        by default CONFIG_FILENAME
    Returns
    -------
    Path | None
        Path if corresponding object was found, None otherwise
    """
    cfg_path: Path | None = None
    res = tuple(Path.cwd().glob(glob_pattern))
    if res:
        cfg_path = res[0]
    return cfg_path
 def search_iterative(
    starting_path: Path,
    glob_pattern: str = CONFIG_FILENAME,
    stop_folder_name: str | None = None,
 ) -> Path | None:
    """Iteratively searches the parent directories of the starting path
    and look for files matching the glob pattern. The starting path is not
    searched, only its parents. Therefore the starting path can also point
    to a file. The folder in which it is placed in will be searched.
    Returns the first match encountered.
    Parameters
    ----------
    starting_path : Path
        non-inclusive starting path
    glob_pattern : str, optional
        pattern to look for, first match will be returned,
        by default CONFIG_FILENAME
    stop_folder_name : str, optional
        name of the last folder in the directory tree to search, by default 'python'
    Returns
    -------
    Path | None
        Path if corresponding object was found, None otherwise
    """
    cfg_path: Path | None = None
    for it in range(len(starting_path.parents)):
        search_path = starting_path.parents[it]  # do not look in library folder
        res = tuple(search_path.glob(glob_pattern))
        if res:
            cfg_path = res[0]
            break
        if stop_folder_name is not None and search_path.name == stop_folder_name:
            # library is placed inside a whole python installation for deployment
            # only look up to this folder
            break
    return cfg_path
 def load_cfg() -> dict[str, Any]:
    cfg_path: Path | None
    if PREFER_INTERNAL_CONFIG:
        cfg_path = cfg_path_internal
    else:
        cfg_path = search_cwd(glob_pattern=CONFIG_FILENAME)
        if cfg_path is None:
            cfg_path = search_iterative(
                starting_path=pkg_dir,
                glob_pattern=CONFIG_FILENAME,
                stop_folder_name='python',
            )
        # backup: use internal config
        if cfg_path is None:
            cfg_path = cfg_path_internal
    config = load_toml_config(path_to_toml=cfg_path)
    return config.copy()
 CONFIG: Final[dict[str, Any]] = load_cfg()
 # ** Cytoscape configuration
 # stylesheet
@ -64,7 +135,7 @@ CYTO_PATH_STYLESHEET: Final[Path] = cyto_stylesheet_path
 # TODO check removal
 # append Graphviz binary folder to system path if not already contained
-if sys.platform == 'win32':
+# if sys.platform == 'win32':
-    path = Path(r'C:\Program Files\Graphviz\bin')
+#     path = Path(r'C:\Program Files\Graphviz\bin')
-    if path.is_dir() and str(path).lower() not in os.environ['PATH'].lower():
+#     if path.is_dir() and str(path).lower() not in os.environ['PATH'].lower():
-        os.environ['PATH'] += f';{path}'
+#         os.environ['PATH'] += f';{path}'
--- a/src/lang_main/analysis/graphs.py
+++ b/src/lang_main/analysis/graphs.py
@ -386,7 +386,7 @@ def pipe_rescale_graph_edge_weights(
 def normalise_array_linear(
-    array: npt.NDArray[np.float_],
+    array: npt.NDArray[np.float32],
 ) -> npt.NDArray[np.float32]:
    """apply standard linear normalisation
@ -445,7 +445,7 @@ def verify_property(
    graph: Graph | DiGraph,
    property: str,
 ) -> None:
-    for idx, (node_1, node_2) in enumerate(graph.edges):
+    for node_1, node_2 in graph.edges:
        if property not in graph[node_1][node_2]:
            raise EdgePropertyNotContainedError(
                (
--- a/src/lang_main/analysis/preprocessing.py
+++ b/src/lang_main/analysis/preprocessing.py
@ -1,8 +1,10 @@
 from __future__ import annotations
 from collections.abc import Collection
 from itertools import combinations
 from math import factorial
 from pathlib import Path
-from typing import cast
+from typing import TYPE_CHECKING, cast
 import numpy as np
 import pandas as pd
@ -21,15 +23,10 @@ from lang_main.analysis.shared import (
    similar_index_groups,
 )
 from lang_main.loggers import logger_preprocess as logger
 from lang_main.pipelines.base import Pipeline
 from lang_main.types import Embedding, PandasIndex
-# TODO removal
+if TYPE_CHECKING:
-# pattern_escape_seq = re.compile(r'[\t\n\r\f\v]+')
+    from lang_main.pipelines.base import Pipeline
 # pattern_escape_seq_sentences = re.compile(r' *[\t\n\r\f\v]+')
 # pattern_repeated_chars = re.compile(r'([,;.:!?-_\+]){2,}')
 # pattern_dates = re.compile(r'(\d{1,2}\.)?(\d{1,2}\.)([\d]{2,4})?')
 # pattern_whitespace = re.compile(r'[ ]{2,}')
 # ** (1) dataset preparation: loading and simple preprocessing
--- a/src/lang_main/analysis/shared.py
+++ b/src/lang_main/analysis/shared.py
@ -5,8 +5,9 @@ from typing import cast
 import networkx as nx
 import numpy as np
 import numpy.typing as npt
-import sentence_transformers
+
-import sentence_transformers.util
+# import sentence_transformers # TODO check removal
 # import sentence_transformers.util # TODO check removal
 from networkx import Graph
 from pandas import DataFrame, Series
 from sentence_transformers import SentenceTransformer
@ -76,7 +77,6 @@ def candidates_by_index(
    data_model_input: Series,
    model: SentenceTransformer,
    cos_sim_threshold: float = 0.5,
    # ) -> Iterator[tuple[PandasIndex, PandasIndex]]:
 ) -> Iterator[tuple[PandasIndex, PandasIndex]]:
    """function to filter candidate indices based on cosine similarity
    using SentenceTransformer model in batch mode,
@ -111,7 +111,9 @@ def candidates_by_index(
        ),
    )
    # cosine similarity
-    cos_sim = cast(npt.NDArray, sentence_transformers.util.cos_sim(embds, embds).numpy())
+    cos_sim = cast(npt.NDArray, model.similarity(embds, embds).numpy())
    # TODO check removal
    # cos_sim = cast(npt.NDArray, sentence_transformers.util.cos_sim(embds, embds).numpy())
    np.fill_diagonal(cos_sim, 0.0)
    cos_sim = np.triu(cos_sim)
    cos_sim_idx = np.argwhere(cos_sim >= cos_sim_threshold)
--- a/src/lang_main/analysis/tokens.py
+++ b/src/lang_main/analysis/tokens.py
@ -11,6 +11,11 @@ from lang_main.analysis.graphs import (
    TokenGraph,
    update_graph,
 )
 from lang_main.constants import (
    POS_INDIRECT,
    POS_OF_INTEREST,
    TAG_OF_INTEREST,
 )
 from lang_main.loggers import logger_token_analysis as logger
 from lang_main.types import (
    PandasIndex,
@ -19,24 +24,8 @@ from lang_main.types import (
    SpacyToken,
 )
 # ** POS
 # POS_OF_INTEREST: frozenset[str] = frozenset(['NOUN', 'PROPN', 'ADJ', 'VERB', 'AUX'])
 # POS_OF_INTEREST: frozenset[str] = frozenset(['NOUN', 'ADJ', 'VERB', 'AUX'])
 # POS_OF_INTEREST: frozenset[str] = frozenset(['NOUN', 'PROPN'])
 # POS_OF_INTEREST: frozenset[str] = frozenset(['NOUN', 'PROPN', 'VERB', 'AUX'])
 POS_OF_INTEREST: frozenset[str] = frozenset(['NOUN', 'PROPN', 'VERB', 'AUX', 'ADV'])
 # POS_INDIRECT: frozenset[str] = frozenset(['AUX', 'VERB'])
 POS_INDIRECT: frozenset[str] = frozenset(['AUX'])
 # ** TAG
 # TAG_OF_INTEREST: frozenset[str] = frozenset(['ADJD'])
 TAG_OF_INTEREST: frozenset[str] = frozenset()
 # ** obtaining connection in texts
 def pre_clean_word(string: str) -> str:
    pattern = r'[^A-Za-zäöüÄÖÜ]+'
    string = re.sub(pattern, '', string)
@ -49,7 +38,6 @@ def is_str_date(
    string: str,
    fuzzy: bool = False,
 ) -> bool:
    # print(string)
    try:
        # check if string is a number
        # if length is greater than 8, it is not a date
--- a/src/lang_main/constants.py
+++ b/src/lang_main/constants.py
@ -1,6 +1,10 @@
 from enum import Enum  # noqa: I001
 from importlib.util import find_spec
 from pathlib import Path
 from typing import Final
 from sentence_transformers import SimilarityFunction
 from lang_main import CONFIG, CYTO_PATH_STYLESHEET
 from lang_main import model_loader as m_load
 from lang_main.types import (
@ -8,7 +12,12 @@ from lang_main.types import (
    CytoLayouts,
    LanguageModels,
    ModelLoaderMap,
    ONNXExecutionProvider,  # noqa: F401
    STFRBackends,
    STFRDeviceTypes,
    STFRModelArgs,
    STFRModels,
    STFRQuantFilenames,  # noqa: F401
 )
 __all__ = [
@ -16,8 +25,23 @@ __all__ = [
    'CYTO_PATH_STYLESHEET',
 ]
 # ** dependencies
 _has_py4cyto: bool = True if find_spec('py4cytoscape') else False
 _has_dash: bool = True if (find_spec('dash') and find_spec('kaleido')) else False
 _has_plotly: bool = True if (find_spec('plotly') and find_spec('kaleido')) else False
 class Dependencies(Enum):
    PY4C = _has_py4cyto
    DASH = _has_dash
    PLOT = _has_plotly
 # ** logging
 # graphs
 ENABLE_LOGGING: Final[bool] = CONFIG['logging']['enabled']
 LOGGING_TO_FILE: Final[bool] = CONFIG['logging']['file']
 LOGGING_TO_STDERR: Final[bool] = CONFIG['logging']['stderr']
 LOGGING_DEFAULT_GRAPHS: Final[bool] = False
 # ** paths
@ -44,14 +68,25 @@ SKIP_TIME_ANALYSIS: Final[bool] = CONFIG['control']['time_analysis_skip']
 # ** models
 # ** loading
 SPACY_MODEL_NAME: Final[str] = 'de_dep_news_trf'
-STFR_MODEL_NAME: Final[str] = 'sentence-transformers/all-mpnet-base-v2'
+STFR_MODEL_NAME: Final[STFRModels] = STFRModels.ALL_MPNET_BASE_V2
 STFR_DEVICE: Final[STFRDeviceTypes] = STFRDeviceTypes.CPU
 STFR_SIMILARITY: Final[SimilarityFunction] = SimilarityFunction.COSINE
 STFR_BACKEND: Final[STFRBackends] = STFRBackends.TORCH
 STFR_MODEL_ARGS: Final[STFRModelArgs] = {}
 # STFR_MODEL_ARGS: Final[STFRModelArgs] = {
 #     'file_name': STFRQuantFilenames.ONNX_Q_UINT8,
 #     'provider': ONNXExecutionProvider.CPU,
 #     'export': False,
 # }
 MODEL_LOADER_MAP: Final[ModelLoaderMap] = {
    LanguageModels.SENTENCE_TRANSFORMER: {
        'func': m_load.load_sentence_transformer,
        'kwargs': {
            'model_name': STFR_MODEL_NAME,
            'similarity_func': STFR_SIMILARITY,
            'backend': STFR_BACKEND,
            'device': STFR_DEVICE,
            'model_kwargs': STFR_MODEL_ARGS,
        },
    },
    LanguageModels.SPACY: {
@ -61,6 +96,19 @@ MODEL_LOADER_MAP: Final[ModelLoaderMap] = {
        },
    },
 }
 # ** language dependency analysis
 # ** POS
 # POS_OF_INTEREST: frozenset[str] = frozenset(['NOUN', 'PROPN', 'ADJ', 'VERB', 'AUX'])
 # POS_OF_INTEREST: frozenset[str] = frozenset(['NOUN', 'ADJ', 'VERB', 'AUX'])
 # POS_OF_INTEREST: frozenset[str] = frozenset(['NOUN', 'PROPN'])
 # POS_OF_INTEREST: frozenset[str] = frozenset(['NOUN', 'PROPN', 'VERB', 'AUX'])
 POS_OF_INTEREST: frozenset[str] = frozenset(['NOUN', 'PROPN', 'VERB', 'AUX', 'ADV'])
 # POS_INDIRECT: frozenset[str] = frozenset(['AUX', 'VERB'])
 POS_INDIRECT: frozenset[str] = frozenset(['AUX'])
 # ** TAG
 # TAG_OF_INTEREST: frozenset[str] = frozenset(['ADJD'])
 TAG_OF_INTEREST: frozenset[str] = frozenset()
 # ** export
 # ** preprocessing
@ -74,7 +122,6 @@ THRESHOLD_SIMILARITY: Final[float] = CONFIG['preprocess']['threshold_similarity'
 # ** graph postprocessing
 EDGE_WEIGHT_DECIMALS: Final[int] = 4
 THRESHOLD_EDGE_NUMBER: Final[int] = CONFIG['graph_postprocessing']['threshold_edge_number']
 # THRESHOLD_EDGE_WEIGHT: Final[int] = CONFIG['graph_postprocessing']['threshold_edge_weight']
 PROPERTY_NAME_DEGREE_WEIGHTED: Final[str] = 'degree_weighted'
 # ** graph exports (Cytoscape)
--- a/src/lang_main/errors.py
+++ b/src/lang_main/errors.py
@ -14,3 +14,25 @@ class EmptyEdgesError(EmptyGraphError):
 class GraphRenderError(Exception):
    """Error raised if a graph object can not be rendered"""
 class DependencyMissingError(Exception):
    """Error raised if needed dependency could not be found"""
 # ** pipelines to perform given actions on dataset in a customisable manner
 class NoPerformableActionError(Exception):
    """Error describing that no action is available in the current pipeline"""
 class WrongActionTypeError(Exception):
    """Error raised if added action type is not supported by corresponding pipeline"""
 class OutputInPipelineContainerError(Exception):
    """Error raised if an output was detected by one of the performed
    actions in a PipelineContainer. Each action in a PipelineContainer is itself a
    procedure which does not have any parameters or return values and should therefore not
    return any values."""
--- a/src/lang_main/lang_main_config.toml
+++ b/src/lang_main/lang_main_config.toml
@ -7,6 +7,11 @@ inputs = './inputs/'
 results = './results/test_20240807/'
 dataset = '../data/02_202307/Export4.csv'
 [logging]
 enabled = true
 stderr = true
 file = true
 # only debugging features, production-ready pipelines should always
 # be fully executed
 [control]
--- a/src/lang_main/loggers.py
+++ b/src/lang_main/loggers.py
@ -1,16 +1,65 @@
 import logging
 import logging.handlers
 from pathlib import Path
 from time import gmtime
 from typing import Final
 from lang_main.constants import (
    ENABLE_LOGGING,
    LOGGING_TO_FILE,
    LOGGING_TO_STDERR,
 )
 from lang_main.types import LoggingLevels
-# ** logging
+# ** config
-LOGGING_LEVEL_PREPROCESS: Final[LoggingLevels] = LoggingLevels.INFO
+logging.Formatter.converter = gmtime
-LOGGING_LEVEL_PIPELINES: Final[LoggingLevels] = LoggingLevels.INFO
+LOG_FMT: Final[str] = '%(asctime)s | lang_main:%(module)s:%(levelname)s | %(message)s'
-LOGGING_LEVEL_GRAPHS: Final[LoggingLevels] = LoggingLevels.INFO
+LOG_DATE_FMT: Final[str] = '%Y-%m-%d %H:%M:%S +0000'
 LOG_FILE_PATH: Final[Path] = Path.cwd() / 'lang-main.log'
 # logging.basicConfig(
 #     format=LOG_FMT,
 #     datefmt=LOG_DATE_FMT,
 # )
 # ** formatters
 logger_all_formater = logging.Formatter(fmt=LOG_FMT, datefmt=LOG_DATE_FMT)
 # ** handlers
 null_handler = logging.NullHandler()
 if ENABLE_LOGGING and LOGGING_TO_STDERR:
    logger_all_handler_stderr = logging.StreamHandler()
    logger_all_handler_stderr.setLevel(LoggingLevels.WARNING)
    logger_all_handler_stderr.setFormatter(logger_all_formater)
 else:
    logger_all_handler_stderr = null_handler
 if ENABLE_LOGGING and LOGGING_TO_FILE:
    logger_all_handler_file = logging.handlers.RotatingFileHandler(
        LOG_FILE_PATH,
        encoding='utf-8',
        maxBytes=5_242_880,
        backupCount=1,
    )
    logger_all_handler_file.setLevel(LoggingLevels.DEBUG)
    logger_all_handler_file.setFormatter(logger_all_formater)
 else:
    logger_all_handler_file = null_handler
 # ** logging levels
 LOGGING_LEVEL_ALL: Final[LoggingLevels] = LoggingLevels.DEBUG
 LOGGING_LEVEL_PREPROCESS: Final[LoggingLevels] = LoggingLevels.DEBUG
 LOGGING_LEVEL_PIPELINES: Final[LoggingLevels] = LoggingLevels.DEBUG
 LOGGING_LEVEL_GRAPHS: Final[LoggingLevels] = LoggingLevels.DEBUG
 LOGGING_LEVEL_TIMELINE: Final[LoggingLevels] = LoggingLevels.DEBUG
-LOGGING_LEVEL_TOKEN_ANALYSIS: Final[LoggingLevels] = LoggingLevels.INFO
+LOGGING_LEVEL_TOKEN_ANALYSIS: Final[LoggingLevels] = LoggingLevels.DEBUG
-LOGGING_LEVEL_SHARED_HELPERS: Final[LoggingLevels] = LoggingLevels.INFO
+LOGGING_LEVEL_SHARED_HELPERS: Final[LoggingLevels] = LoggingLevels.DEBUG
-LOGGING_LEVEL_RENDERING: Final[LoggingLevels] = LoggingLevels.INFO
+LOGGING_LEVEL_RENDERING: Final[LoggingLevels] = LoggingLevels.DEBUG
 # ** loggers and configuration
 logger_all = logging.getLogger('lang_main')
 logger_all.addHandler(logger_all_handler_stderr)
 logger_all.addHandler(logger_all_handler_file)
 logger_shared_helpers = logging.getLogger('lang_main.shared')
 logger_shared_helpers.setLevel(LOGGING_LEVEL_SHARED_HELPERS)
--- a/src/lang_main/model_loader.py
+++ b/src/lang_main/model_loader.py
@ -1,18 +1,28 @@
 from __future__ import annotations
-from typing import Literal, overload
+from typing import (
    TYPE_CHECKING,
    Any,
    Literal,
    overload,
 )
 import spacy
 from sentence_transformers import SentenceTransformer
 from lang_main.constants import STFR_SIMILARITY
 from lang_main.types import (
    LanguageModels,
    Model,
    ModelLoaderMap,
    SpacyModel,
    STFRBackends,
    STFRDeviceTypes,
 )
 if TYPE_CHECKING:
    from sentence_transformers import SimilarityFunction
@overload
 def instantiate_model(
@ -48,6 +58,15 @@ def load_spacy(
 def load_sentence_transformer(
    model_name: str,
-    device: STFRDeviceTypes,
+    similarity_func: SimilarityFunction = STFR_SIMILARITY,
    backend: STFRBackends = STFRBackends.TORCH,
    device: STFRDeviceTypes = STFRDeviceTypes.CPU,
    model_kwargs: dict[str, Any] | None = None,
 ) -> SentenceTransformer:
-    return SentenceTransformer(model_name_or_path=model_name, device=device)
+    return SentenceTransformer(
        model_name_or_path=model_name,
        similarity_fn_name=similarity_func,
        backend=backend,  # type: ignore Literal matches Enum
        device=device,
        model_kwargs=model_kwargs,
    )
--- a/src/lang_main/pipelines/base.py
+++ b/src/lang_main/pipelines/base.py
@ -6,27 +6,15 @@ from pathlib import Path
 from typing import Any, Never, cast
 from typing_extensions import override
 from lang_main.errors import (
    NoPerformableActionError,
    OutputInPipelineContainerError,
    WrongActionTypeError,
 )
 from lang_main.io import load_pickle, save_pickle
 from lang_main.loggers import logger_pipelines as logger
 from lang_main.types import ResultHandling
 # ** pipelines to perform given actions on dataset in a customisable manner
 class NoPerformableActionError(Exception):
    """Error describing that no action is available in the current pipeline"""
 class WrongActionTypeError(Exception):
    """Error raised if added action type is not supported by corresponding pipeline"""
 class OutputInPipelineContainerError(Exception):
    """Error raised if an output was detected by one of the performed
    actions in a PipelineContainer. Each action in a PipelineContainer is itself a
    procedure which does not have any parameters or return values and should therefore not
    return any values."""
 class BasePipeline(ABC):
    def __init__(
--- a/src/lang_main/pipelines/predefined.py
+++ b/src/lang_main/pipelines/predefined.py
@ -42,7 +42,6 @@ from lang_main.constants import (
    UNIQUE_CRITERION_FEATURE,
 )
 from lang_main.pipelines.base import Pipeline
 from lang_main.render import cytoscape as cyto
 from lang_main.types import EntryPoints, LanguageModels
 # ** Models
@ -137,13 +136,6 @@ def build_tk_graph_post_pipe() -> Pipeline:
    pipe_graph_postprocessing = Pipeline(
        name='Graph_Postprocessing', working_dir=SAVE_PATH_FOLDER
    )
    # pipe_graph_postprocessing.add(
    #     graphs.filter_graph_by_edge_weight,
    #     {
    #         'bound_lower': THRESHOLD_EDGE_WEIGHT,
    #         'bound_upper': None,
    #     },
    # )
    pipe_graph_postprocessing.add(
        graphs.filter_graph_by_number_edges,
        {
@ -190,6 +182,10 @@ def build_tk_graph_render_pipe(
    export_folder: Path = SAVE_PATH_FOLDER,
    base_network_name: str = CYTO_BASE_NETWORK_NAME,
 ) -> Pipeline:
    # optional dependency: late import
    # raises exception if necessary modules are not found
    from lang_main.render import cytoscape as cyto
    pipe_graph_rendering = Pipeline(
        name='Graph_Static-Rendering',
        working_dir=SAVE_PATH_FOLDER,
--- a/src/lang_main/render/init.py
+++ b/src/lang_main/render/init.py
@ -0,0 +1,7 @@
 from lang_main.constants import Dependencies
 from lang_main.errors import DependencyMissingError
 if not Dependencies.PY4C.value:
    raise DependencyMissingError(
        'The module >>render<< needs the package >>Py4Cytoscape<<. Package not found.'
    )
--- a/src/lang_main/types.py
+++ b/src/lang_main/types.py
@ -3,6 +3,7 @@ from collections.abc import Callable, Hashable
 from typing import (
    Any,
    Literal,
    NotRequired,
    Required,
    TypeAlias,
    TypedDict,
@ -40,6 +41,27 @@ class LanguageModels(enum.StrEnum):
    SPACY = enum.auto()
 class ONNXExecutionProvider(enum.StrEnum):
    CPU = 'CPUExecutionProvider'
 class STFRModels(enum.StrEnum):
    ALL_MPNET_BASE_V2 = 'all-mpnet-base-v2'
    ALL_DISTILROBERTA_V1 = 'all-distilroberta-v1'
    ALL_MINI_LM_L12_V2 = 'all-MiniLM-L12-v2'
    ALL_MINI_LM_L6_V2 = 'all-MiniLM-L6-v2'
 class STFRQuantFilenames(enum.StrEnum):
    ONNX_Q_UINT8 = 'onnx/model_quint8_avx2.onnx'
 class STFRModelArgs(TypedDict):
    provider: NotRequired[ONNXExecutionProvider]
    file_name: NotRequired[STFRQuantFilenames]
    export: NotRequired[bool]
 Model: TypeAlias = SentenceTransformer | SpacyModel
 ModelLoaderFunc: TypeAlias = Callable[..., Model]
@ -52,7 +74,12 @@ class ModelLoaderInfo(TypedDict):
 ModelLoaderMap: TypeAlias = dict[LanguageModels, ModelLoaderInfo]
-# ** devices
+class STFRBackends(enum.StrEnum):
    TORCH = enum.auto()
    ONNX = enum.auto()
    OPENVINO = enum.auto()
 class STFRDeviceTypes(enum.StrEnum):
    CPU = enum.auto()
    GPU = enum.auto()