add option to disable edge rescaling to provide edge weights as integers, closes #1

This commit is contained in:
Florian Förster 2025-06-20 09:06:51 +02:00
parent a7718c12cd
commit 63eb274975
7 changed files with 53 additions and 14 deletions

View File

@ -1,6 +1,6 @@
[project]
name = "lang-main"
version = "0.1.2dev1"
version = "0.1.2"
description = "Several tools to analyse TOM's data with strong focus on language processing"
authors = [
{name = "d-opt GmbH, resp. Florian Förster", email = "f.foerster@d-opt.com"},
@ -132,7 +132,7 @@ directory = "reports/coverage"
[tool.bumpversion]
current_version = "0.1.2dev1"
current_version = "0.1.2"
parse = """(?x)
(?P<major>0|[1-9]\\d*)\\.
(?P<minor>0|[1-9]\\d*)\\.

View File

@ -467,6 +467,27 @@ def pipe_rescale_graph_edge_weights(
return graph.rescale_edge_weights()
def pipe_graph_split(
graph: TokenGraph,
) -> tuple[TokenGraph, Graph]:
"""splits TokenGraph instance into itself and its undirected version
Parameters
----------
graph : TokenGraph
token graph pushed through pipeline
Returns
-------
tuple[TokenGraph, Graph]
token graph (directed) and undirected version with no changes made
"""
graph = graph.copy()
graph_undir = graph.to_undirected(inplace=False)
return graph, graph_undir
def normalise_array_linear(
array: npt.NDArray[np.float32],
) -> npt.NDArray[np.float32]:

View File

@ -164,11 +164,15 @@ def build_tk_graph_post_pipe() -> Pipeline:
def build_tk_graph_rescaling_pipe(
save_result: bool,
exit_point: EntryPoints,
enable_rescaling: bool = True,
) -> Pipeline:
pipe_graph_rescaling = Pipeline(name='Graph_Rescaling', working_dir=SAVE_PATH_FOLDER)
pipe_graph_rescaling.add(
graphs.pipe_rescale_graph_edge_weights,
)
if enable_rescaling:
pipe_graph_rescaling.add(
graphs.pipe_rescale_graph_edge_weights,
)
else:
pipe_graph_rescaling.add(graphs.pipe_graph_split)
pipe_graph_rescaling.add(
graphs.pipe_add_graph_metrics,
save_result=save_result,

View File

@ -326,6 +326,15 @@ def test_pipe_rescale_graph_edge_weights(tk_graph):
assert rescaled_undir[1][2]['weight'] == pytest.approx(1.0)
def test_pipe_graph_split(tk_graph):
graph_copy, graph_undir = graphs.pipe_graph_split(tk_graph)
assert len(graph_copy.nodes) == len(tk_graph.nodes)
assert len(graph_copy.edges) == len(tk_graph.edges)
assert len(graph_copy.nodes) == len(graph_undir.nodes)
assert len(graph_undir.nodes) == len(tk_graph.nodes)
assert len(graph_undir.edges) != len(tk_graph.edges)
@pytest.mark.parametrize('import_graph', ['graph', 'tk_graph'])
def test_rescale_edge_weights(import_graph, request):
test_graph = request.getfixturevalue(import_graph)

View File

@ -29,13 +29,6 @@ def test_remove_na(raw_data_path, raw_data_date_cols):
assert len(data) == 998
# def test_string_cleansing():
# string = 'Ölleckage durch\nundichten \t Ölsumpf,, aber Dichtung intakt??!!!'
# cleaned_string = shared.clean_string_slim(string)
# target_string = 'Ölleckage durch undichten Ölsumpf, aber Dichtung intakt!'
# assert cleaned_string == target_string
def test_entry_wise_cleansing(raw_data_path, raw_data_date_cols):
(data,) = ppc.load_raw_data(raw_data_path, raw_data_date_cols)
(data,) = ppc.remove_duplicates(data)

View File

@ -28,14 +28,26 @@ def test_build_tk_graph_post_pipe():
assert len(pipe.actions) == 3
def test_build_tk_graph_rescaling_pipe():
def test_build_tk_graph_rescaling_pipe_with_edge_rescaling():
pipe = pre.build_tk_graph_rescaling_pipe(
save_result=False, exit_point=EntryPoints.TK_GRAPH_ANALYSIS_RESCALED
save_result=False,
exit_point=EntryPoints.TK_GRAPH_ANALYSIS_RESCALED,
enable_rescaling=True,
)
assert pipe.name == 'Graph_Rescaling'
assert len(pipe.actions) == 2
def test_build_tk_graph_rescaling_pipe_without_edge_rescaling():
pipe = pre.build_tk_graph_rescaling_pipe(
save_result=False,
exit_point=EntryPoints.TK_GRAPH_ANALYSIS_RESCALED,
enable_rescaling=False,
)
assert pipe.name == 'Graph_Rescaling'
assert len(pipe.actions) == 1
@pytest.mark.parametrize('with_subgraphs', [True, False])
def test_build_tk_graph_render_pipe(with_subgraphs):
pipe = pre.build_tk_graph_render_pipe(with_subgraphs=with_subgraphs)