diff --git a/pyproject.toml b/pyproject.toml index 9c5be83..3a0772f 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,6 +1,6 @@ [project] name = "lang-main" -version = "0.1.2dev1" +version = "0.1.2" description = "Several tools to analyse TOM's data with strong focus on language processing" authors = [ {name = "d-opt GmbH, resp. Florian Förster", email = "f.foerster@d-opt.com"}, @@ -132,7 +132,7 @@ directory = "reports/coverage" [tool.bumpversion] -current_version = "0.1.2dev1" +current_version = "0.1.2" parse = """(?x) (?P0|[1-9]\\d*)\\. (?P0|[1-9]\\d*)\\. diff --git a/src/lang_main/analysis/graphs.py b/src/lang_main/analysis/graphs.py index 64fb6d3..11a9595 100644 --- a/src/lang_main/analysis/graphs.py +++ b/src/lang_main/analysis/graphs.py @@ -467,6 +467,27 @@ def pipe_rescale_graph_edge_weights( return graph.rescale_edge_weights() +def pipe_graph_split( + graph: TokenGraph, +) -> tuple[TokenGraph, Graph]: + """splits TokenGraph instance into itself and its undirected version + + Parameters + ---------- + graph : TokenGraph + token graph pushed through pipeline + + Returns + ------- + tuple[TokenGraph, Graph] + token graph (directed) and undirected version with no changes made + """ + graph = graph.copy() + graph_undir = graph.to_undirected(inplace=False) + + return graph, graph_undir + + def normalise_array_linear( array: npt.NDArray[np.float32], ) -> npt.NDArray[np.float32]: diff --git a/src/lang_main/pipelines/predefined.py b/src/lang_main/pipelines/predefined.py index 4a399f3..2ca9262 100644 --- a/src/lang_main/pipelines/predefined.py +++ b/src/lang_main/pipelines/predefined.py @@ -164,11 +164,15 @@ def build_tk_graph_post_pipe() -> Pipeline: def build_tk_graph_rescaling_pipe( save_result: bool, exit_point: EntryPoints, + enable_rescaling: bool = True, ) -> Pipeline: pipe_graph_rescaling = Pipeline(name='Graph_Rescaling', working_dir=SAVE_PATH_FOLDER) - pipe_graph_rescaling.add( - graphs.pipe_rescale_graph_edge_weights, - ) + if enable_rescaling: + pipe_graph_rescaling.add( + graphs.pipe_rescale_graph_edge_weights, + ) + else: + pipe_graph_rescaling.add(graphs.pipe_graph_split) pipe_graph_rescaling.add( graphs.pipe_add_graph_metrics, save_result=save_result, diff --git a/tests/analysis/test_graphs.py b/tests/analysis/test_graphs.py index 929c679..8e87c5f 100644 --- a/tests/analysis/test_graphs.py +++ b/tests/analysis/test_graphs.py @@ -326,6 +326,15 @@ def test_pipe_rescale_graph_edge_weights(tk_graph): assert rescaled_undir[1][2]['weight'] == pytest.approx(1.0) +def test_pipe_graph_split(tk_graph): + graph_copy, graph_undir = graphs.pipe_graph_split(tk_graph) + assert len(graph_copy.nodes) == len(tk_graph.nodes) + assert len(graph_copy.edges) == len(tk_graph.edges) + assert len(graph_copy.nodes) == len(graph_undir.nodes) + assert len(graph_undir.nodes) == len(tk_graph.nodes) + assert len(graph_undir.edges) != len(tk_graph.edges) + + @pytest.mark.parametrize('import_graph', ['graph', 'tk_graph']) def test_rescale_edge_weights(import_graph, request): test_graph = request.getfixturevalue(import_graph) diff --git a/tests/analysis/test_preprocessing.py b/tests/analysis/test_preprocessing.py index c52754d..7690610 100644 --- a/tests/analysis/test_preprocessing.py +++ b/tests/analysis/test_preprocessing.py @@ -29,13 +29,6 @@ def test_remove_na(raw_data_path, raw_data_date_cols): assert len(data) == 998 -# def test_string_cleansing(): -# string = 'Ölleckage durch\nundichten \t Ölsumpf,, aber Dichtung intakt??!!!' -# cleaned_string = shared.clean_string_slim(string) -# target_string = 'Ölleckage durch undichten Ölsumpf, aber Dichtung intakt!' -# assert cleaned_string == target_string - - def test_entry_wise_cleansing(raw_data_path, raw_data_date_cols): (data,) = ppc.load_raw_data(raw_data_path, raw_data_date_cols) (data,) = ppc.remove_duplicates(data) diff --git a/tests/pipelines/test_predefined.py b/tests/pipelines/test_predefined.py index b00c024..531074f 100644 --- a/tests/pipelines/test_predefined.py +++ b/tests/pipelines/test_predefined.py @@ -28,14 +28,26 @@ def test_build_tk_graph_post_pipe(): assert len(pipe.actions) == 3 -def test_build_tk_graph_rescaling_pipe(): +def test_build_tk_graph_rescaling_pipe_with_edge_rescaling(): pipe = pre.build_tk_graph_rescaling_pipe( - save_result=False, exit_point=EntryPoints.TK_GRAPH_ANALYSIS_RESCALED + save_result=False, + exit_point=EntryPoints.TK_GRAPH_ANALYSIS_RESCALED, + enable_rescaling=True, ) assert pipe.name == 'Graph_Rescaling' assert len(pipe.actions) == 2 +def test_build_tk_graph_rescaling_pipe_without_edge_rescaling(): + pipe = pre.build_tk_graph_rescaling_pipe( + save_result=False, + exit_point=EntryPoints.TK_GRAPH_ANALYSIS_RESCALED, + enable_rescaling=False, + ) + assert pipe.name == 'Graph_Rescaling' + assert len(pipe.actions) == 1 + + @pytest.mark.parametrize('with_subgraphs', [True, False]) def test_build_tk_graph_render_pipe(with_subgraphs): pipe = pre.build_tk_graph_render_pipe(with_subgraphs=with_subgraphs) diff --git a/tests/work_dir/Pipe-test_Step-1_valid_action.pkl b/tests/work_dir/Pipe-test_Step-1_valid_action.pkl index 9de24f9..fed48a4 100644 Binary files a/tests/work_dir/Pipe-test_Step-1_valid_action.pkl and b/tests/work_dir/Pipe-test_Step-1_valid_action.pkl differ