started adding comprehensive unit tests
This commit is contained in:
0
tests/analysis/__init__.py
Normal file
0
tests/analysis/__init__.py
Normal file
168
tests/analysis/test_graphs.py
Normal file
168
tests/analysis/test_graphs.py
Normal file
@@ -0,0 +1,168 @@
|
||||
import networkx as nx
|
||||
import pytest
|
||||
|
||||
from lang_main.analysis import graphs
|
||||
|
||||
TK_GRAPH_NAME = 'TEST_TOKEN_GRAPH'
|
||||
|
||||
|
||||
def build_init_graph(token_graph: bool):
|
||||
edge_weights = [
|
||||
{'weight': 1},
|
||||
{'weight': 2},
|
||||
{'weight': 3},
|
||||
{'weight': 4},
|
||||
{'weight': 5},
|
||||
{'weight': 6},
|
||||
]
|
||||
edges = [
|
||||
(1, 2),
|
||||
(1, 3),
|
||||
(2, 4),
|
||||
(3, 4),
|
||||
(1, 4),
|
||||
(2, 1),
|
||||
]
|
||||
edges_to_add = []
|
||||
for i, edge in enumerate(edges):
|
||||
edge = list(edge)
|
||||
edge.append(edge_weights[i]) # type: ignore
|
||||
edges_to_add.append(tuple(edge))
|
||||
|
||||
if token_graph:
|
||||
G = graphs.TokenGraph(name=TK_GRAPH_NAME, enable_logging=False)
|
||||
else:
|
||||
G = nx.DiGraph()
|
||||
|
||||
G.add_edges_from(edges_to_add)
|
||||
|
||||
return G
|
||||
|
||||
|
||||
@pytest.fixture(scope='module')
|
||||
def graph():
|
||||
return build_init_graph(token_graph=False)
|
||||
|
||||
|
||||
@pytest.fixture(scope='module')
|
||||
def tk_graph():
|
||||
return build_init_graph(token_graph=True)
|
||||
|
||||
|
||||
def test_graph_size(graph):
|
||||
assert len(graph.nodes) == 4
|
||||
assert len(graph.edges) == 6
|
||||
|
||||
|
||||
def test_save_to_GraphML(graph, tmp_path):
|
||||
filename = 'test_graphML'
|
||||
graphs.save_to_GraphML(graph, saving_path=tmp_path, filename=filename)
|
||||
saved_file = (tmp_path / filename).with_suffix('.graphml')
|
||||
assert saved_file.exists()
|
||||
|
||||
|
||||
def test_metadata_retrieval(graph):
|
||||
metadata = graphs.get_graph_metadata(graph)
|
||||
assert metadata['num_nodes'] == 4
|
||||
assert metadata['num_edges'] == 6
|
||||
assert metadata['min_edge_weight'] == 1
|
||||
assert metadata['max_edge_weight'] == 6
|
||||
assert metadata['node_memory'] == 112
|
||||
assert metadata['edge_memory'] == 336
|
||||
assert metadata['total_memory'] == 448
|
||||
|
||||
|
||||
def test_graph_update_batch():
|
||||
graph_obj = build_init_graph(token_graph=False)
|
||||
graphs.update_graph(graph_obj, batch=((4, 5), (5, 6)), weight_connection=8)
|
||||
metadata = graphs.get_graph_metadata(graph_obj)
|
||||
assert metadata['num_nodes'] == 6
|
||||
assert metadata['num_edges'] == 8
|
||||
assert metadata['min_edge_weight'] == 1
|
||||
assert metadata['max_edge_weight'] == 8
|
||||
|
||||
|
||||
def test_graph_update_single_new():
|
||||
graph_obj = build_init_graph(token_graph=False)
|
||||
graphs.update_graph(graph_obj, parent=4, child=5, weight_connection=7)
|
||||
metadata = graphs.get_graph_metadata(graph_obj)
|
||||
assert metadata['num_nodes'] == 5
|
||||
assert metadata['num_edges'] == 7
|
||||
assert metadata['min_edge_weight'] == 1
|
||||
assert metadata['max_edge_weight'] == 7
|
||||
|
||||
|
||||
def test_graph_update_single_existing():
|
||||
graph_obj = build_init_graph(token_graph=False)
|
||||
graphs.update_graph(graph_obj, parent=1, child=4, weight_connection=5)
|
||||
metadata = graphs.get_graph_metadata(graph_obj)
|
||||
assert metadata['num_nodes'] == 4
|
||||
assert metadata['num_edges'] == 6
|
||||
assert metadata['min_edge_weight'] == 1
|
||||
assert metadata['max_edge_weight'] == 10
|
||||
|
||||
|
||||
@pytest.mark.parametrize('cast_int', [True, False])
|
||||
def test_graph_undirected_conversion(graph, cast_int):
|
||||
graph_undir = graphs.convert_graph_to_undirected(graph, cast_int=cast_int)
|
||||
# edges: (1, 2, w=1) und (2, 1, w=6) --> undirected: (1, 2, w=7)
|
||||
assert graph_undir[1][2]['weight'] == pytest.approx(7.0)
|
||||
|
||||
|
||||
def test_graph_cytoscape_conversion(graph):
|
||||
cyto_graph, weight_data = graphs.convert_graph_to_cytoscape(graph)
|
||||
node = cyto_graph[0]
|
||||
edge = cyto_graph[-1]
|
||||
assert node['data']['id'] == 1 # type: ignore
|
||||
assert edge['data']['source'] == 3 # type: ignore
|
||||
assert edge['data']['target'] == 4 # type: ignore
|
||||
assert edge['data']['weight'] == 4 # type: ignore
|
||||
assert weight_data['min'] == 1
|
||||
assert weight_data['max'] == 6
|
||||
|
||||
|
||||
def test_tk_graph_properties(tk_graph):
|
||||
assert tk_graph.name == TK_GRAPH_NAME
|
||||
assert isinstance(tk_graph.directed, graphs.TokenGraph)
|
||||
assert isinstance(tk_graph.undirected, nx.Graph)
|
||||
tk_graph.update_metadata()
|
||||
metadata_directed = tk_graph.metadata_directed
|
||||
assert metadata_directed['num_nodes'] == 4
|
||||
assert metadata_directed['num_edges'] == 6
|
||||
assert metadata_directed['min_edge_weight'] == 1
|
||||
assert metadata_directed['max_edge_weight'] == 6
|
||||
assert metadata_directed['node_memory'] == 112
|
||||
assert metadata_directed['edge_memory'] == 336
|
||||
assert metadata_directed['total_memory'] == 448
|
||||
metadata_undirected = tk_graph.metadata_undirected
|
||||
assert metadata_undirected['num_nodes'] == 4
|
||||
assert metadata_undirected['num_edges'] == 5
|
||||
assert metadata_undirected['min_edge_weight'] == 2
|
||||
assert metadata_undirected['max_edge_weight'] == 7
|
||||
assert metadata_undirected['node_memory'] == 112
|
||||
assert metadata_undirected['edge_memory'] == 280
|
||||
assert metadata_undirected['total_memory'] == 392
|
||||
|
||||
|
||||
def test_graph_degree_filter(tk_graph):
|
||||
filtered_graph = graphs.filter_graph_by_node_degree(
|
||||
tk_graph,
|
||||
bound_lower=3,
|
||||
bound_upper=3,
|
||||
)
|
||||
assert len(filtered_graph.nodes) == 2
|
||||
|
||||
|
||||
def test_graph_edge_number_filter(tk_graph):
|
||||
number_edges_limit = 1
|
||||
filtered_graph = graphs.filter_graph_by_number_edges(
|
||||
tk_graph,
|
||||
limit=number_edges_limit,
|
||||
)
|
||||
assert len(filtered_graph.edges) == number_edges_limit
|
||||
filtered_graph = graphs.filter_graph_by_node_degree(
|
||||
filtered_graph,
|
||||
bound_lower=1,
|
||||
bound_upper=None,
|
||||
)
|
||||
assert len(filtered_graph.nodes) == 2, 'one edge should result in only two nodes'
|
||||
73
tests/analysis/test_preprocessing.py
Normal file
73
tests/analysis/test_preprocessing.py
Normal file
@@ -0,0 +1,73 @@
|
||||
"""testing each function in a consecutive way like each one is
|
||||
executed in in a pipeline
|
||||
"""
|
||||
|
||||
from lang_main.analysis import preprocessing as ppc
|
||||
from lang_main.analysis import shared
|
||||
|
||||
|
||||
def test_load_data(raw_data_path, raw_data_date_cols):
|
||||
(data,) = ppc.load_raw_data(raw_data_path, raw_data_date_cols)
|
||||
assert len(data) == 1000
|
||||
|
||||
|
||||
def test_remove_simple_duplicates(raw_data_path, raw_data_date_cols):
|
||||
(data,) = ppc.load_raw_data(raw_data_path, raw_data_date_cols)
|
||||
(data,) = ppc.remove_duplicates(data)
|
||||
assert len(data) == 999
|
||||
|
||||
|
||||
def test_remove_na(raw_data_path, raw_data_date_cols):
|
||||
(data,) = ppc.load_raw_data(raw_data_path, raw_data_date_cols)
|
||||
(data,) = ppc.remove_duplicates(data)
|
||||
target_features: tuple[str] = ('VorgangsBeschreibung',)
|
||||
(data,) = ppc.remove_NA(data, target_features)
|
||||
assert len(data) == 998
|
||||
|
||||
|
||||
# def test_string_cleansing():
|
||||
# string = 'Ölleckage durch\nundichten \t Ölsumpf,, aber Dichtung intakt??!!!'
|
||||
# cleaned_string = shared.clean_string_slim(string)
|
||||
# target_string = 'Ölleckage durch undichten Ölsumpf, aber Dichtung intakt!'
|
||||
# assert cleaned_string == target_string
|
||||
|
||||
|
||||
def test_entry_wise_cleansing(raw_data_path, raw_data_date_cols):
|
||||
(data,) = ppc.load_raw_data(raw_data_path, raw_data_date_cols)
|
||||
(data,) = ppc.remove_duplicates(data)
|
||||
target_features: tuple[str] = ('VorgangsBeschreibung',)
|
||||
(data,) = ppc.remove_NA(data, target_features)
|
||||
starting_string = 'Ölleckage durch\nundichten \t Ölsumpf,, aber Dichtung intakt??!!!'
|
||||
cleaned_string = shared.clean_string_slim(starting_string)
|
||||
target_string = 'Ölleckage durch undichten Ölsumpf, aber Dichtung intakt!'
|
||||
assert cleaned_string == target_string
|
||||
starting_string = 'Ölleckage durch\nundichten Ölsumpf,, aber Dichtung intakt??!!!'
|
||||
assert data.at[0, 'VorgangsBeschreibung'] == starting_string
|
||||
(data,) = shared.entry_wise_cleansing(
|
||||
data,
|
||||
target_features=target_features,
|
||||
cleansing_func=shared.clean_string_slim,
|
||||
)
|
||||
assert data.at[0, 'VorgangsBeschreibung'] == target_string
|
||||
|
||||
|
||||
def test_analyse_feature(raw_data_path, raw_data_date_cols):
|
||||
(data,) = ppc.load_raw_data(raw_data_path, raw_data_date_cols)
|
||||
(data,) = ppc.remove_duplicates(data)
|
||||
target_features: tuple[str] = ('VorgangsBeschreibung',)
|
||||
(data,) = ppc.remove_NA(data, target_features)
|
||||
starting_string = 'Ölleckage durch\nundichten \t Ölsumpf,, aber Dichtung intakt??!!!'
|
||||
cleaned_string = shared.clean_string_slim(starting_string)
|
||||
target_string = 'Ölleckage durch undichten Ölsumpf, aber Dichtung intakt!'
|
||||
assert cleaned_string == target_string
|
||||
starting_string = 'Ölleckage durch\nundichten Ölsumpf,, aber Dichtung intakt??!!!'
|
||||
assert data.at[0, 'VorgangsBeschreibung'] == starting_string
|
||||
(data,) = shared.entry_wise_cleansing(
|
||||
data,
|
||||
target_features=target_features,
|
||||
cleansing_func=shared.clean_string_slim,
|
||||
)
|
||||
assert data.at[0, 'VorgangsBeschreibung'] == target_string
|
||||
|
||||
(data,) = ppc.analyse_feature(data, target_feature=target_features[0])
|
||||
assert len(data) == 139
|
||||
Reference in New Issue
Block a user