from typing import Final, Any import inspect from pathlib import Path from lang_main.shared import ( save_pickle, load_pickle, create_saving_folder, load_toml_config, ) from lang_main.analysis.preprocessing import Embedding, PandasIndex from lang_main.analysis.graphs import TokenGraph __all__ = [ 'save_pickle', 'load_pickle', 'create_saving_folder', 'Embedding', 'PandasIndex', 'TokenGraph', ] USE_INTERNAL_CONFIG: Final[bool] = True # load config data: internal/external if USE_INTERNAL_CONFIG: curr_file_dir = Path(inspect.getfile(inspect.currentframe())) # type: ignore pkg_dir = curr_file_dir.parent config_path = Path(pkg_dir, 'config.toml') loaded_config = load_toml_config(path_to_toml=config_path) CONFIG: Final[dict[str, Any]] = loaded_config.copy() else: raise NotImplementedError("External config data not implemented yet.") # ** paths SAVE_PATH_FOLDER: Final[Path] = Path(CONFIG['paths']['results']) PATH_TO_DATASET: Final[Path] = Path(CONFIG['paths']['dataset']) # ** control DO_PREPROCESSING: Final[bool] = CONFIG['control']['preprocessing'] DO_TOKEN_ANALYSIS: Final[bool] = CONFIG['control']['token_analysis'] DO_GRAPH_POSTPROCESSING: Final[bool] = CONFIG['control']['graph_postprocessing'] # ** export # ** preprocessing FILENAME_COSSIM_FILTER_CANDIDATES: Final[str] =\ CONFIG['preprocess']['filename_cossim_filter_candidates'] DATE_COLS: Final[list[str]] = CONFIG['preprocess']['date_cols'] THRESHOLD_AMOUNT_CHARACTERS: Final[float] =\ CONFIG['preprocess']['threshold_amount_characters'] THRESHOLD_SIMILARITY: Final[float] = CONFIG['preprocess']['threshold_similarity'] # ** token analysis # ** graph postprocessing THRESHOLD_EDGE_WEIGHT: Final[int] = CONFIG['graph_postprocessing']['threshold_edge_weight'] # ** time analysis THRESHOLD_UNIQUE_TEXTS: Final[int] = CONFIG['time_analysis']['threshold_unique_texts']