from pathlib import Path import pandas as pd import pytest from lang_main.analysis import graphs DATE_COLS: tuple[str, ...] = ( 'VorgangsDatum', 'ErledigungsDatum', 'Arbeitsbeginn', 'ErstellungsDatum', ) @pytest.fixture(scope='session') def raw_data_path(): pth_data = Path('./tests/_comparison_results/Dummy_Dataset_N_1000.csv') assert pth_data.exists() return pth_data @pytest.fixture(scope='session') def raw_data_date_cols(): return DATE_COLS @pytest.fixture(scope='session') def data_pre_cleaned() -> pd.DataFrame: pth_data = Path('./tests/_comparison_results/preprocess_pre_cleaned.pkl') return pd.read_pickle(pth_data) @pytest.fixture(scope='session') def data_analyse_feature() -> pd.DataFrame: pth_data = Path('./tests/_comparison_results/analyse_feature.pkl') return pd.read_pickle(pth_data) @pytest.fixture(scope='session') def data_numeric_pre_filter_feature() -> pd.DataFrame: pth_data = Path('./tests/_comparison_results/numeric_pre_filter.pkl') return pd.read_pickle(pth_data) @pytest.fixture(scope='session') def data_merge_similarity_duplicates() -> pd.DataFrame: pth_data = Path('./tests/_comparison_results/merge_similarity_candidates.pkl') return pd.read_pickle(pth_data) @pytest.fixture(scope='session') def data_tk_graph_built(): pth_data = Path('./tests/_comparison_results/tk_graph_built.pkl') return graphs.TokenGraph.from_file(pth_data)