add option to disable edge rescaling to provide edge weights as integers, closes #1

This commit is contained in:
Florian Förster 2025-06-20 09:06:51 +02:00
parent a7718c12cd
commit 63eb274975
7 changed files with 53 additions and 14 deletions

View File

@ -1,6 +1,6 @@
[project] [project]
name = "lang-main" name = "lang-main"
version = "0.1.2dev1" version = "0.1.2"
description = "Several tools to analyse TOM's data with strong focus on language processing" description = "Several tools to analyse TOM's data with strong focus on language processing"
authors = [ authors = [
{name = "d-opt GmbH, resp. Florian Förster", email = "f.foerster@d-opt.com"}, {name = "d-opt GmbH, resp. Florian Förster", email = "f.foerster@d-opt.com"},
@ -132,7 +132,7 @@ directory = "reports/coverage"
[tool.bumpversion] [tool.bumpversion]
current_version = "0.1.2dev1" current_version = "0.1.2"
parse = """(?x) parse = """(?x)
(?P<major>0|[1-9]\\d*)\\. (?P<major>0|[1-9]\\d*)\\.
(?P<minor>0|[1-9]\\d*)\\. (?P<minor>0|[1-9]\\d*)\\.

View File

@ -467,6 +467,27 @@ def pipe_rescale_graph_edge_weights(
return graph.rescale_edge_weights() return graph.rescale_edge_weights()
def pipe_graph_split(
graph: TokenGraph,
) -> tuple[TokenGraph, Graph]:
"""splits TokenGraph instance into itself and its undirected version
Parameters
----------
graph : TokenGraph
token graph pushed through pipeline
Returns
-------
tuple[TokenGraph, Graph]
token graph (directed) and undirected version with no changes made
"""
graph = graph.copy()
graph_undir = graph.to_undirected(inplace=False)
return graph, graph_undir
def normalise_array_linear( def normalise_array_linear(
array: npt.NDArray[np.float32], array: npt.NDArray[np.float32],
) -> npt.NDArray[np.float32]: ) -> npt.NDArray[np.float32]:

View File

@ -164,11 +164,15 @@ def build_tk_graph_post_pipe() -> Pipeline:
def build_tk_graph_rescaling_pipe( def build_tk_graph_rescaling_pipe(
save_result: bool, save_result: bool,
exit_point: EntryPoints, exit_point: EntryPoints,
enable_rescaling: bool = True,
) -> Pipeline: ) -> Pipeline:
pipe_graph_rescaling = Pipeline(name='Graph_Rescaling', working_dir=SAVE_PATH_FOLDER) pipe_graph_rescaling = Pipeline(name='Graph_Rescaling', working_dir=SAVE_PATH_FOLDER)
pipe_graph_rescaling.add( if enable_rescaling:
graphs.pipe_rescale_graph_edge_weights, pipe_graph_rescaling.add(
) graphs.pipe_rescale_graph_edge_weights,
)
else:
pipe_graph_rescaling.add(graphs.pipe_graph_split)
pipe_graph_rescaling.add( pipe_graph_rescaling.add(
graphs.pipe_add_graph_metrics, graphs.pipe_add_graph_metrics,
save_result=save_result, save_result=save_result,

View File

@ -326,6 +326,15 @@ def test_pipe_rescale_graph_edge_weights(tk_graph):
assert rescaled_undir[1][2]['weight'] == pytest.approx(1.0) assert rescaled_undir[1][2]['weight'] == pytest.approx(1.0)
def test_pipe_graph_split(tk_graph):
graph_copy, graph_undir = graphs.pipe_graph_split(tk_graph)
assert len(graph_copy.nodes) == len(tk_graph.nodes)
assert len(graph_copy.edges) == len(tk_graph.edges)
assert len(graph_copy.nodes) == len(graph_undir.nodes)
assert len(graph_undir.nodes) == len(tk_graph.nodes)
assert len(graph_undir.edges) != len(tk_graph.edges)
@pytest.mark.parametrize('import_graph', ['graph', 'tk_graph']) @pytest.mark.parametrize('import_graph', ['graph', 'tk_graph'])
def test_rescale_edge_weights(import_graph, request): def test_rescale_edge_weights(import_graph, request):
test_graph = request.getfixturevalue(import_graph) test_graph = request.getfixturevalue(import_graph)

View File

@ -29,13 +29,6 @@ def test_remove_na(raw_data_path, raw_data_date_cols):
assert len(data) == 998 assert len(data) == 998
# def test_string_cleansing():
# string = 'Ölleckage durch\nundichten \t Ölsumpf,, aber Dichtung intakt??!!!'
# cleaned_string = shared.clean_string_slim(string)
# target_string = 'Ölleckage durch undichten Ölsumpf, aber Dichtung intakt!'
# assert cleaned_string == target_string
def test_entry_wise_cleansing(raw_data_path, raw_data_date_cols): def test_entry_wise_cleansing(raw_data_path, raw_data_date_cols):
(data,) = ppc.load_raw_data(raw_data_path, raw_data_date_cols) (data,) = ppc.load_raw_data(raw_data_path, raw_data_date_cols)
(data,) = ppc.remove_duplicates(data) (data,) = ppc.remove_duplicates(data)

View File

@ -28,14 +28,26 @@ def test_build_tk_graph_post_pipe():
assert len(pipe.actions) == 3 assert len(pipe.actions) == 3
def test_build_tk_graph_rescaling_pipe(): def test_build_tk_graph_rescaling_pipe_with_edge_rescaling():
pipe = pre.build_tk_graph_rescaling_pipe( pipe = pre.build_tk_graph_rescaling_pipe(
save_result=False, exit_point=EntryPoints.TK_GRAPH_ANALYSIS_RESCALED save_result=False,
exit_point=EntryPoints.TK_GRAPH_ANALYSIS_RESCALED,
enable_rescaling=True,
) )
assert pipe.name == 'Graph_Rescaling' assert pipe.name == 'Graph_Rescaling'
assert len(pipe.actions) == 2 assert len(pipe.actions) == 2
def test_build_tk_graph_rescaling_pipe_without_edge_rescaling():
pipe = pre.build_tk_graph_rescaling_pipe(
save_result=False,
exit_point=EntryPoints.TK_GRAPH_ANALYSIS_RESCALED,
enable_rescaling=False,
)
assert pipe.name == 'Graph_Rescaling'
assert len(pipe.actions) == 1
@pytest.mark.parametrize('with_subgraphs', [True, False]) @pytest.mark.parametrize('with_subgraphs', [True, False])
def test_build_tk_graph_render_pipe(with_subgraphs): def test_build_tk_graph_render_pipe(with_subgraphs):
pipe = pre.build_tk_graph_render_pipe(with_subgraphs=with_subgraphs) pipe = pre.build_tk_graph_render_pipe(with_subgraphs=with_subgraphs)