58 lines
1.9 KiB
Python
58 lines
1.9 KiB
Python
from typing import Final, Any
|
|
import inspect
|
|
from pathlib import Path
|
|
|
|
from lang_main.shared import (
|
|
save_pickle,
|
|
load_pickle,
|
|
create_saving_folder,
|
|
load_toml_config,
|
|
)
|
|
from lang_main.analysis.preprocessing import Embedding, PandasIndex
|
|
from lang_main.analysis.graphs import TokenGraph
|
|
|
|
|
|
__all__ = [
|
|
'save_pickle',
|
|
'load_pickle',
|
|
'create_saving_folder',
|
|
'Embedding',
|
|
'PandasIndex',
|
|
'TokenGraph',
|
|
]
|
|
|
|
USE_INTERNAL_CONFIG: Final[bool] = True
|
|
|
|
# load config data: internal/external
|
|
if USE_INTERNAL_CONFIG:
|
|
curr_file_dir = Path(inspect.getfile(inspect.currentframe())) # type: ignore
|
|
pkg_dir = curr_file_dir.parent
|
|
config_path = Path(pkg_dir, 'config.toml')
|
|
loaded_config = load_toml_config(path_to_toml=config_path)
|
|
CONFIG: Final[dict[str, Any]] = loaded_config.copy()
|
|
else:
|
|
raise NotImplementedError("External config data not implemented yet.")
|
|
|
|
# ** paths
|
|
SAVE_PATH_FOLDER: Final[Path] = Path(CONFIG['paths']['results'])
|
|
PATH_TO_DATASET: Final[Path] = Path(CONFIG['paths']['dataset'])
|
|
# ** control
|
|
DO_PREPROCESSING: Final[bool] = CONFIG['control']['preprocessing']
|
|
DO_TOKEN_ANALYSIS: Final[bool] = CONFIG['control']['token_analysis']
|
|
DO_GRAPH_POSTPROCESSING: Final[bool] = CONFIG['control']['graph_postprocessing']
|
|
# ** export
|
|
|
|
# ** preprocessing
|
|
FILENAME_COSSIM_FILTER_CANDIDATES: Final[str] =\
|
|
CONFIG['preprocess']['filename_cossim_filter_candidates']
|
|
DATE_COLS: Final[list[str]] = CONFIG['preprocess']['date_cols']
|
|
THRESHOLD_AMOUNT_CHARACTERS: Final[float] =\
|
|
CONFIG['preprocess']['threshold_amount_characters']
|
|
THRESHOLD_SIMILARITY: Final[float] = CONFIG['preprocess']['threshold_similarity']
|
|
# ** token analysis
|
|
|
|
# ** graph postprocessing
|
|
THRESHOLD_EDGE_WEIGHT: Final[int] = CONFIG['graph_postprocessing']['threshold_edge_weight']
|
|
# ** time analysis
|
|
THRESHOLD_UNIQUE_TEXTS: Final[int] = CONFIG['time_analysis']['threshold_unique_texts']
|