From b136b74f4ea6e4ad668edb114bb30bc3dd8fa2a8 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Florian=20F=C3=B6rster?= Date: Wed, 29 Jan 2025 11:19:18 +0100 Subject: [PATCH] usage of cfg TK-Graph export filename --- lang_main_config.toml | 5 +-- lang_main_config_old.toml | 3 ++ lang_main_config_old2.toml | 63 -------------------------------------- pdm.lock | 28 +++++------------ pyproject.toml | 6 ++-- src/tom_plugin/pipeline.py | 3 +- 6 files changed, 19 insertions(+), 89 deletions(-) delete mode 100644 lang_main_config_old2.toml diff --git a/lang_main_config.toml b/lang_main_config.toml index a50886c..9dd7da8 100644 --- a/lang_main_config.toml +++ b/lang_main_config.toml @@ -3,10 +3,11 @@ [paths] inputs = './lang-data/in/' results = './lang-data/out/' -models = './lang-models/converted' +models = './lang-models' +graph_export_filename = 'EXPORT-TokenGraph' [models] -use_large_model = false +use_large_model = true [logging] enabled = true diff --git a/lang_main_config_old.toml b/lang_main_config_old.toml index 5011212..a50886c 100644 --- a/lang_main_config_old.toml +++ b/lang_main_config_old.toml @@ -5,6 +5,9 @@ inputs = './lang-data/in/' results = './lang-data/out/' models = './lang-models/converted' +[models] +use_large_model = false + [logging] enabled = true stderr = true diff --git a/lang_main_config_old2.toml b/lang_main_config_old2.toml deleted file mode 100644 index 075f4de..0000000 --- a/lang_main_config_old2.toml +++ /dev/null @@ -1,63 +0,0 @@ -# lang_main: Config file - -[paths] -inputs = './lang-data/in' -# results = './results/dummy_N_1000/' -# dataset = '../data/Dummy_Dataset_N_1000.csv' -results = './lang-data/out' -models = './lang-models/converted' - -[logging] -enabled = true -stderr = true -file = true - -# control which pipelines are executed -[control] -preprocessing_skip = false -token_analysis_skip = false -graph_postprocessing_skip = false -graph_rescaling_skip = false -graph_static_rendering_skip = false -time_analysis_skip = true - -[preprocess] -date_cols = [ - "VorgangsDatum", - "ErledigungsDatum", - "Arbeitsbeginn", - "ErstellungsDatum", -] -threshold_amount_characters = 5 -threshold_similarity = 0.8 - -[graph_postprocessing] -threshold_edge_number = 500 -# threshold_edge_weight = 150 - -[time_analysis.uniqueness] -threshold_unique_texts = 4 -criterion_feature = 'HObjektText' -feature_name_obj_id = 'ObjektID' -feature_name_obj_text = 'HObjektText' - -[time_analysis.preparation] -name_delta_feat_to_repair = 'Zeitspanne bis zur Behebung [Tage]' -name_delta_feat_to_next_failure = 'Zeitspanne bis zum nächsten Ereignis [Tage]' - -[time_analysis.model_input] -# input_features = [ -# 'VorgangsTypName', -# 'VorgangsArtText', -# 'VorgangsBeschreibung', -# ] -input_features = [ - 'VorgangsBeschreibung', -] -activity_feature = 'VorgangsTypName' -activity_types = [ - 'Reparaturauftrag (Portal)', - 'Störungsmeldung', -] -threshold_num_acitivities = 1 -threshold_similarity = 0.8 \ No newline at end of file diff --git a/pdm.lock b/pdm.lock index daffc0d..64cc33a 100644 --- a/pdm.lock +++ b/pdm.lock @@ -5,7 +5,7 @@ groups = ["default", "dev"] strategy = ["inherit_metadata"] lock_version = "4.5.0" -content_hash = "sha256:3ab6b2982d5f572e18f99aaeb49ab8e52ec4a9ddc44855cbed6157dadabfdc88" +content_hash = "sha256:6f65fda39d1b2fcc5a02c37e264b4b55594c7c6123a2615133ce3513cff731dc" [[metadata.targets]] requires_python = ">=3.11" @@ -1383,7 +1383,7 @@ files = [ [[package]] name = "lang-main" -version = "0.1.0" +version = "0.1.1" requires_python = ">=3.11" summary = "Several tools to analyse TOM's data with strong focus on language processing" groups = ["default"] @@ -1392,7 +1392,6 @@ dependencies = [ "numpy>=1.26.4", "onnx==1.16.1", "pandas>=2.2.2", - "pip>=24.0", "python-dateutil>=2.9.0.post0", "sentence-transformers[onnx]>=3.2.0", "spacy>=3.7.4", @@ -1400,13 +1399,13 @@ dependencies = [ "typing-extensions>=4.12.2", ] files = [ - {file = "lang_main-0.1.0-py3-none-any.whl", hash = "sha256:82e742d1bb62ce8aa160002258c1be4121b34c60e8f229e383388b9399ea22e3"}, - {file = "lang_main-0.1.0.tar.gz", hash = "sha256:71ebb7b40121d5e18c6aa7e9053d85ccb4cff258427638bb93c9fcb90a6378e7"}, + {file = "lang_main-0.1.1-py3-none-any.whl", hash = "sha256:bb8e038bb6b604f877ee2932b6d1c28f7443448cd4bbf3cbe5454381f1c12714"}, + {file = "lang_main-0.1.1.tar.gz", hash = "sha256:d532109d7cc2614bc67e6e938c0c8f97b5987f2c1552be01e9fadef8714b3af7"}, ] [[package]] name = "lang-main" -version = "0.1.0" +version = "0.1.1" extras = ["spacy-md", "spacy-trf"] requires_python = ">=3.11" summary = "Several tools to analyse TOM's data with strong focus on language processing" @@ -1414,11 +1413,11 @@ groups = ["default"] dependencies = [ "de-core-news-md @ https://github.com/explosion/spacy-models/releases/download/de_core_news_md-3.8.0/de_core_news_md-3.8.0-py3-none-any.whl", "de-dep-news-trf @ https://github.com/explosion/spacy-models/releases/download/de_dep_news_trf-3.8.0/de_dep_news_trf-3.8.0-py3-none-any.whl", - "lang-main==0.1.0", + "lang-main==0.1.1", ] files = [ - {file = "lang_main-0.1.0-py3-none-any.whl", hash = "sha256:82e742d1bb62ce8aa160002258c1be4121b34c60e8f229e383388b9399ea22e3"}, - {file = "lang_main-0.1.0.tar.gz", hash = "sha256:71ebb7b40121d5e18c6aa7e9053d85ccb4cff258427638bb93c9fcb90a6378e7"}, + {file = "lang_main-0.1.1-py3-none-any.whl", hash = "sha256:bb8e038bb6b604f877ee2932b6d1c28f7443448cd4bbf3cbe5454381f1c12714"}, + {file = "lang_main-0.1.1.tar.gz", hash = "sha256:d532109d7cc2614bc67e6e938c0c8f97b5987f2c1552be01e9fadef8714b3af7"}, ] [[package]] @@ -2256,17 +2255,6 @@ files = [ {file = "pillow-11.0.0.tar.gz", hash = "sha256:72bacbaf24ac003fea9bff9837d1eedb6088758d41e100c1552930151f677739"}, ] -[[package]] -name = "pip" -version = "24.3.1" -requires_python = ">=3.8" -summary = "The PyPA recommended tool for installing Python packages." -groups = ["default"] -files = [ - {file = "pip-24.3.1-py3-none-any.whl", hash = "sha256:3790624780082365f47549d032f3770eeb2b1e8bd1f7b2e02dace1afa361b4ed"}, - {file = "pip-24.3.1.tar.gz", hash = "sha256:ebcb60557f2aefabc2e0f918751cd24ea0d56d8ec5445fe1807f1d2109660b99"}, -] - [[package]] name = "platformdirs" version = "4.3.6" diff --git a/pyproject.toml b/pyproject.toml index 07b2936..59f82b8 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,11 +1,11 @@ [project] name = "tom-plugin" -version = "0.1.0" +version = "0.1.1" description = "Wrapper for TOM plugins with different helper CLIs, primarily integration testing" authors = [ {name = "d-opt GmbH, resp. Florian Förster", email = "f.foerster@d-opt.com"}, ] -dependencies = ["lang-main[spacy-md,spacy-trf]>=0.1.0"] +dependencies = ["lang-main[spacy-md,spacy-trf]>=0.1.1"] requires-python = ">=3.11" readme = "README.md" license = {text = "MIT"} @@ -97,7 +97,7 @@ dev = [ ] [tool.bumpversion] -current_version = "0.1.0" +current_version = "0.1.1" parse = """(?x) (?P0|[1-9]\\d*)\\. (?P0|[1-9]\\d*)\\. diff --git a/src/tom_plugin/pipeline.py b/src/tom_plugin/pipeline.py index cf6d8ae..82381da 100644 --- a/src/tom_plugin/pipeline.py +++ b/src/tom_plugin/pipeline.py @@ -20,6 +20,7 @@ from lang_main.constants import ( SKIP_PREPROCESSING, SKIP_TIME_ANALYSIS, SKIP_TOKEN_ANALYSIS, + TK_GRAPH_EXPORT_FILENAME, ) from lang_main.errors import DependencyMissingError from lang_main.io import create_saving_folder, get_entry_point, load_pickle @@ -204,7 +205,7 @@ def _run_graph_edge_rescaling( save_to_GraphML( tk_graph_rescaled_undirected, saving_path=target_folder, - filename='TokenGraph-undirected-rescaled', + filename=TK_GRAPH_EXPORT_FILENAME, ) return target_folder