lang-main/src/lang_main/__init__.py
Florian Förster 9edcd5be4e initial commit
2024-05-08 14:46:43 +02:00

58 lines
1.9 KiB
Python

from typing import Final, Any
import inspect
from pathlib import Path
from lang_main.shared import (
save_pickle,
load_pickle,
create_saving_folder,
load_toml_config,
)
from lang_main.analysis.preprocessing import Embedding, PandasIndex
from lang_main.analysis.graphs import TokenGraph
__all__ = [
'save_pickle',
'load_pickle',
'create_saving_folder',
'Embedding',
'PandasIndex',
'TokenGraph',
]
USE_INTERNAL_CONFIG: Final[bool] = True
# load config data: internal/external
if USE_INTERNAL_CONFIG:
curr_file_dir = Path(inspect.getfile(inspect.currentframe())) # type: ignore
pkg_dir = curr_file_dir.parent
config_path = Path(pkg_dir, 'config.toml')
loaded_config = load_toml_config(path_to_toml=config_path)
CONFIG: Final[dict[str, Any]] = loaded_config.copy()
else:
raise NotImplementedError("External config data not implemented yet.")
# ** paths
SAVE_PATH_FOLDER: Final[Path] = Path(CONFIG['paths']['results'])
PATH_TO_DATASET: Final[Path] = Path(CONFIG['paths']['dataset'])
# ** control
DO_PREPROCESSING: Final[bool] = CONFIG['control']['preprocessing']
DO_TOKEN_ANALYSIS: Final[bool] = CONFIG['control']['token_analysis']
DO_GRAPH_POSTPROCESSING: Final[bool] = CONFIG['control']['graph_postprocessing']
# ** export
# ** preprocessing
FILENAME_COSSIM_FILTER_CANDIDATES: Final[str] =\
CONFIG['preprocess']['filename_cossim_filter_candidates']
DATE_COLS: Final[list[str]] = CONFIG['preprocess']['date_cols']
THRESHOLD_AMOUNT_CHARACTERS: Final[float] =\
CONFIG['preprocess']['threshold_amount_characters']
THRESHOLD_SIMILARITY: Final[float] = CONFIG['preprocess']['threshold_similarity']
# ** token analysis
# ** graph postprocessing
THRESHOLD_EDGE_WEIGHT: Final[int] = CONFIG['graph_postprocessing']['threshold_edge_weight']
# ** time analysis
THRESHOLD_UNIQUE_TEXTS: Final[int] = CONFIG['time_analysis']['threshold_unique_texts']