improved test coverage, automation scripts

This commit is contained in:
Florian Förster
2024-11-26 16:11:25 +01:00
parent 9291b53f93
commit 38aa0739ad
33 changed files with 979 additions and 297 deletions

View File

@@ -1,5 +1,3 @@
from pathlib import Path
import pytest
from lang_main import model_loader
@@ -44,14 +42,14 @@ def test_obtain_relevant_descendants(spacy_model):
doc = spacy_model(SENTENCE)
sent1 = tuple(doc.sents)[0] # first sentence
word1 = sent1[1] # word "ging" (POS:VERB)
descendants1 = ('0912393', 'schnell', 'Wiese', 'Menschen')
descendants1 = ('ID', '0912393', 'schnell', 'Wiese', 'Menschen')
rel_descs = tokens.obtain_relevant_descendants(word1)
rel_descs = tuple((token.text for token in rel_descs))
assert descendants1 == rel_descs
sent2 = tuple(doc.sents)[1] # first sentence
word2 = sent2[1] # word "konnte" (POS:AUX)
descendants2 = ('mit', 'Probleme', 'Tragen', 'Tasche')
descendants2 = ('Probleme', 'Tragen', 'Tasche')
rel_descs = tokens.obtain_relevant_descendants(word2)
rel_descs = tuple((token.text for token in rel_descs))
assert descendants2 == rel_descs
@@ -62,7 +60,7 @@ def test_add_doc_info_to_graph(spacy_model):
tk_graph = graphs.TokenGraph()
tokens.add_doc_info_to_graph(tk_graph, doc, weight=2)
assert len(tk_graph.nodes) == 11
assert len(tk_graph.edges) == 17
assert len(tk_graph.edges) == 16
assert '0912393' in tk_graph.nodes

View File

@@ -0,0 +1,227 @@
from pathlib import Path
import pytest
from lang_main import io
from lang_main.errors import (
NoPerformableActionError,
OutputInPipelineContainerError,
WrongActionTypeError,
)
from lang_main.pipelines import base
PIPELINE_NAME = 'test'
@pytest.fixture(scope='module')
def working_dir() -> Path:
work_dir = Path.cwd() / 'tests/work_dir'
if not work_dir.exists():
work_dir.mkdir()
return work_dir
@pytest.fixture(scope='function')
def pipeline_container(working_dir) -> base.PipelineContainer:
return base.PipelineContainer(name=PIPELINE_NAME, working_dir=working_dir)
@pytest.fixture(scope='function')
def pipeline(working_dir) -> base.Pipeline:
return base.Pipeline(name=PIPELINE_NAME, working_dir=working_dir)
def test_empty_pipeline_container(pipeline_container, working_dir):
container = pipeline_container
assert container.name == PIPELINE_NAME
assert container.working_dir == working_dir
assert len(container.actions) == 0
assert len(container.action_names) == 0
assert len(container.action_skip) == 0
assert container.curr_proc_idx == 1
with pytest.raises(NoPerformableActionError):
container.prep_run()
assert container.post_run() is None
@pytest.mark.parametrize('skip', [True, False])
def test_pipeline_container_valid(pipeline_container, skip):
test_string = 'test'
def valid_action(): # pragma: no cover
nonlocal test_string
test_string += '_2'
pipeline_container.add(valid_action, skip=skip)
assert len(pipeline_container.actions) == 1
assert len(pipeline_container.action_names) == 1
assert len(pipeline_container.action_skip) == 1
ret = pipeline_container.run()
assert pipeline_container.curr_proc_idx == 2
assert ret is None
if skip:
assert test_string == 'test'
else:
assert test_string == 'test_2'
pipeline_container.prep_run()
assert pipeline_container.curr_proc_idx == 1
def test_pipeline_container_invalid_action(pipeline_container):
test_string = 'test'
def invalid_action():
nonlocal test_string
test_string += '_2'
new = 'ret'
return new
with pytest.raises(WrongActionTypeError):
pipeline_container.add(test_string, skip=False)
pipeline_container.add(invalid_action, skip=False)
with pytest.raises(OutputInPipelineContainerError):
pipeline_container.run()
def test_empty_pipeline(pipeline, working_dir):
pipe = pipeline
assert pipe.name == PIPELINE_NAME
assert pipe.working_dir == working_dir
assert len(pipe.actions) == 0
assert len(pipe.action_names) == 0
assert len(pipe.actions_kwargs) == 0
assert len(pipe.save_results) == 0
assert len(pipe.load_results) == 0
assert pipe.curr_proc_idx == 1
assert pipe._intermediate_result is None
with pytest.raises(NoPerformableActionError):
pipe.prep_run()
assert pipe.post_run() is None
@pytest.mark.parametrize('alter_content', [True, False])
def test_pipeline_valid(pipeline, alter_content):
pipe = pipeline
test_string = 'test'
# action preparation
def valid_action(string, add_content=False):
if add_content:
string += '_2'
return string
pipe.add(valid_action, {'add_content': alter_content})
assert len(pipe.actions) == 1
assert len(pipe.action_names) == 1
assert len(pipe.actions_kwargs) == 1
assert len(pipe.save_results) == 1
assert len(pipe.load_results) == 1
assert pipe.save_results[0] == (False, None)
assert pipe.load_results[0] == (False, None)
# filenames and saving/loading
target_filename = f'Pipe-{pipe.name}_Step-{pipe.curr_proc_idx}_valid_action'
target_pth = (pipe.working_dir / target_filename).with_suffix('.pkl')
ret_pth, action_name = pipe.get_result_path(0, filename=None)
assert ret_pth == target_pth
assert action_name == 'valid_action'
filename = 'test'
ret_pth, action_name = pipe.get_result_path(0, filename=filename)
target_pth = (pipe.working_dir / filename).with_suffix('.pkl')
assert ret_pth == target_pth
assert action_name == 'valid_action'
# load non-existing files
with pytest.raises(FileNotFoundError):
pipe.load_step(0, 'non_existing')
# running
ret = pipe.run(starting_values=(test_string,))
assert isinstance(ret, tuple)
assert pipe._intermediate_result == ret
assert pipe.curr_proc_idx == 2
assert ret is not None
if alter_content:
assert ret[0] == 'test_2'
else:
assert ret[0] == 'test'
pipe.prep_run()
assert pipe.curr_proc_idx == 1
# load existing files
loaded_res = pipe.load_step(0, None)
assert loaded_res is not None
assert isinstance(loaded_res, tuple)
assert loaded_res[0] == ret[0]
def test_pipeline_valid_action_load(pipeline, working_dir):
pipe = pipeline
test_string = 'test'
# action preparation
def valid_action(string, add_content=False):
if add_content:
string += '_2'
return string
pipe.add(valid_action, {'add_content': False}, load_result=True)
assert len(pipe.actions) == 1
assert len(pipe.action_names) == 1
assert len(pipe.actions_kwargs) == 1
assert len(pipe.save_results) == 1
assert len(pipe.load_results) == 1
assert pipe.save_results[0] == (False, None)
assert pipe.load_results[0] == (True, None)
ret = pipe.run(starting_values=(test_string,))
assert isinstance(ret, tuple)
assert pipe._intermediate_result == ret
assert pipe.curr_proc_idx == 2
assert ret is not None
# load non-tuple result
filename = 'non_tuple.pkl'
save_pth = working_dir / filename
io.save_pickle(test_string, save_pth)
with pytest.raises(TypeError):
pipe.load_step(0, filename)
def test_pipeline_multiple_actions(pipeline):
pipe = pipeline
test_string = 'test'
# action preparation
def valid_action(string, add_content=True):
if add_content:
string += '_2'
return string
def valid_action_2(string, add_content=True):
if add_content:
string += '_3'
return string
pipe.add(valid_action, {'add_content': True})
pipe.add(valid_action_2)
assert len(pipe.actions) == 2
assert len(pipe.action_names) == 2
assert len(pipe.actions_kwargs) == 2
assert len(pipe.save_results) == 2
assert len(pipe.load_results) == 2
assert pipe.save_results[1] == (False, None)
assert pipe.load_results[1] == (False, None)
ret = pipe.run(starting_values=(test_string,))
assert isinstance(ret, tuple)
assert pipe._intermediate_result == ret
assert pipe.curr_proc_idx == 3
assert ret is not None
assert ret[0] == 'test_2_3'

View File

@@ -0,0 +1,52 @@
import pytest
from lang_main.pipelines import predefined as pre
from lang_main.types import EntryPoints
def test_build_base_target_feature_pipe():
pipe = pre.build_base_target_feature_pipe()
assert pipe.name == 'Target_Feature'
assert len(pipe.actions) == 5
def test_build_merge_duplicates_pipe():
pipe = pre.build_merge_duplicates_pipe()
assert pipe.name == 'Merge_Duplicates'
assert len(pipe.actions) == 2
def test_build_tk_graph_pipe():
pipe = pre.build_tk_graph_pipe()
assert pipe.name == 'Token_Analysis'
assert len(pipe.actions) == 1
def test_build_tk_graph_post_pipe():
pipe = pre.build_tk_graph_post_pipe()
assert pipe.name == 'Graph_Postprocessing'
assert len(pipe.actions) == 3
def test_build_tk_graph_rescaling_pipe():
pipe = pre.build_tk_graph_rescaling_pipe(
save_result=False, exit_point=EntryPoints.TK_GRAPH_ANALYSIS_RESCALED
)
assert pipe.name == 'Graph_Rescaling'
assert len(pipe.actions) == 2
@pytest.mark.parametrize('with_subgraphs', [True, False])
def test_build_tk_graph_render_pipe(with_subgraphs):
pipe = pre.build_tk_graph_render_pipe(with_subgraphs=with_subgraphs)
assert pipe.name == 'Graph_Static-Rendering'
if with_subgraphs:
assert len(pipe.actions) == 6
else:
assert len(pipe.actions) == 4
def test_build_timeline_pipe():
pipe = pre.build_timeline_pipe()
assert pipe.name == 'Timeline_Analysis'
assert len(pipe.actions) == 6

0
tests/render/__init__.py Normal file
View File

View File

@@ -0,0 +1,227 @@
"""tests for Cytoscape API requests, needs running Cytoscape server;
Tests assume that no Cytoscape instance is running.
The validation of the correct behaviour can only be done with a running instance,
especially for layout and formatting tasks. A static test suite is not helpful in
this case.
"""
import py4cytoscape as p4c
import pytest
from py4cytoscape.exceptions import CyError
from requests.exceptions import RequestException
from lang_main.constants import CYTO_BASE_NETWORK_NAME, CYTO_SELECTION_PROPERTY
from lang_main.errors import GraphRenderError
from lang_main.render import cytoscape as cyto
_cyto_available: bool = True
try:
p4c.cytoscape_ping()
except RequestException:
_cyto_available = False
@pytest.fixture(scope='module')
def avail() -> bool:
return _cyto_available
@pytest.mark.cyto
def test_verify_connection(avail):
if avail:
cyto.verify_connection()
else:
with pytest.raises(RequestException):
cyto.verify_connection()
def test_verify_graph_render_size(data_tk_graph_built):
cyto.verify_graph_render_size(
data_tk_graph_built, max_node_count=None, max_edge_count=None
)
with pytest.raises(GraphRenderError):
cyto.verify_graph_render_size(
data_tk_graph_built, max_node_count=0, max_edge_count=None
)
with pytest.raises(GraphRenderError):
cyto.verify_graph_render_size(
data_tk_graph_built, max_node_count=None, max_edge_count=0
)
@pytest.mark.cyto
def test_change_default_layout(avail):
if avail:
cyto.change_default_layout()
else:
with pytest.raises(RequestException):
cyto.change_default_layout()
@pytest.mark.cyto
def test_import_to_cytoscape(avail, data_tk_graph_built):
if avail:
cyto.import_to_cytoscape(data_tk_graph_built)
else:
with pytest.raises(RequestException):
cyto.import_to_cytoscape(data_tk_graph_built)
@pytest.mark.cyto
def test_verify_table_property(avail, data_tk_graph_built):
if avail:
cyto.import_to_cytoscape(data_tk_graph_built)
contained = cyto.verify_table_property(property='TEST', table_type='node')
assert not contained
contained = cyto.verify_table_property(property='name', table_type='node')
assert contained
contained = cyto.verify_table_property(property='degree_weighted', table_type='node')
assert contained
else:
with pytest.raises(RequestException):
contained = cyto.verify_table_property(property='TEST', table_type='node')
@pytest.mark.cyto
def test_analyse_network(avail, data_tk_graph_built):
if avail:
cyto.import_to_cytoscape(data_tk_graph_built)
cyto.analyse_network()
contained = cyto.verify_table_property(property='name', table_type='node')
assert contained
contained = cyto.verify_table_property(
property=CYTO_SELECTION_PROPERTY, table_type='node'
)
assert contained
else:
with pytest.raises(RequestException):
cyto.analyse_network(data_tk_graph_built)
@pytest.mark.cyto
def test_reset_current_network_to_base(avail):
if avail:
cyto.reset_current_network_to_base()
else:
with pytest.raises(RequestException):
cyto.reset_current_network_to_base()
@pytest.mark.cyto
def test_fit_content(avail, data_tk_graph_built):
if avail:
cyto.import_to_cytoscape(data_tk_graph_built)
cyto.fit_content()
else:
with pytest.raises(RequestException):
cyto.fit_content()
@pytest.mark.cyto
def test_export_network_to_image(avail, tmp_path, data_tk_graph_built):
filename = 'test_export'
if avail:
cyto.import_to_cytoscape(data_tk_graph_built)
cyto.export_network_to_image(filename=filename, target_folder=tmp_path)
target_file = tmp_path / f'{filename}.svg'
assert target_file.exists()
assert target_file.name == f'{filename}.svg'
else:
with pytest.raises(RequestException):
cyto.export_network_to_image(filename=filename, target_folder=tmp_path)
@pytest.mark.cyto
def test_layout_network(avail, data_tk_graph_built):
if avail:
cyto.import_to_cytoscape(data_tk_graph_built)
cyto.layout_network()
else:
with pytest.raises(RequestException):
cyto.layout_network()
@pytest.mark.cyto
def test_apply_style_to_network(avail, data_tk_graph_built, tmp_path):
if avail:
layout_not_existing = 'testing'
pth_not_existing = tmp_path / 'test.xml'
with pytest.raises(FileNotFoundError):
cyto.apply_style_to_network(
style_name=layout_not_existing,
pth_to_stylesheet=pth_not_existing,
)
cyto.import_to_cytoscape(data_tk_graph_built)
# not existing: so transfer necessary,
# but fails nevertheless because style is imported
# using the name provided by this style configuration
with pytest.raises(CyError):
cyto.apply_style_to_network(style_name=layout_not_existing)
cyto.apply_style_to_network()
else:
with pytest.raises(RequestException):
cyto.apply_style_to_network()
@pytest.mark.cyto
def test_get_subgraph_node_selection(avail, data_tk_graph_built):
num_subgraphs = 2
if avail:
cyto.import_to_cytoscape(data_tk_graph_built)
cyto.analyse_network()
suids = cyto.get_subgraph_node_selection(num_subgraphs=num_subgraphs)
assert len(suids) > 0
else:
with pytest.raises(RequestException):
cyto.get_subgraph_node_selection(num_subgraphs=num_subgraphs)
@pytest.mark.cyto
def test_select_neighbours_of_node(avail, data_tk_graph_built):
num_subgraphs = 2
if avail:
cyto.import_to_cytoscape(data_tk_graph_built)
suids = cyto.get_subgraph_node_selection(num_subgraphs=num_subgraphs)
assert len(suids) > 0
cyto.select_neighbours_of_node(suids[0], neighbour_iter_depth=2)
else:
with pytest.raises(RequestException):
cyto.select_neighbours_of_node(123, neighbour_iter_depth=2)
@pytest.mark.cyto
def test_make_subnetwork(avail, data_tk_graph_built, tmp_path):
if avail:
cyto.import_to_cytoscape(data_tk_graph_built)
suids = cyto.get_subgraph_node_selection(num_subgraphs=2)
assert len(suids) > 0
cyto.select_neighbours_of_node(suids[0], neighbour_iter_depth=2)
cyto.make_subnetwork(0, target_folder=tmp_path, export_image=True)
subnetwork_name = CYTO_BASE_NETWORK_NAME + '_sub_1'
networks = p4c.get_network_list()
assert len(networks) > 1
file = (tmp_path / subnetwork_name).with_suffix('.svg')
assert file.exists()
else:
with pytest.raises(RequestException):
cyto.make_subnetwork(0, target_folder=tmp_path, export_image=True)
@pytest.mark.cyto
def test_build_subnetworks(avail, data_tk_graph_built, tmp_path):
if avail:
cyto.import_to_cytoscape(data_tk_graph_built)
suids = cyto.get_subgraph_node_selection(num_subgraphs=1)
assert len(suids) > 0
cyto.build_subnetworks(suids, export_image=True, target_folder=tmp_path)
subnetwork_name = CYTO_BASE_NETWORK_NAME + '_sub_1'
networks = p4c.get_network_list()
assert len(networks) > 1
file = (tmp_path / subnetwork_name).with_suffix('.svg')
assert file.exists()
else:
with pytest.raises(RequestException):
cyto.build_subnetworks([123], export_image=True, target_folder=tmp_path)

View File

@@ -1,7 +1,64 @@
from lang_main import config, pkg_dir
import sys
from importlib import reload
from pathlib import Path
from unittest.mock import patch
from lang_main import config
def test_p4c_dependency():
assert config._has_py4cyto
with patch.dict(sys.modules, {'py4cytoscape': None}):
reload(sys.modules['lang_main.config'])
assert not config._has_py4cyto
def test_load_config():
toml_path = pkg_dir / 'lang_main_config.toml'
toml_path = config.PKG_DIR / 'lang_main_config.toml'
loaded_cfg = config.load_toml_config(toml_path)
assert loaded_cfg['info']['pkg'] == 'lang_main_internal'
def test_get_config_path():
pkg_dir = config.PKG_DIR
filename = config.CONFIG_FILENAME
cyto_stylesheet_name = config.CYTO_STYLESHEET_FILENAME
cfg_pth_internal = (pkg_dir / filename).resolve()
cyto_cfg_pth = (pkg_dir / cyto_stylesheet_name).resolve()
cfg_internal, cyto_internal = config.get_config_paths(
root_folder=pkg_dir,
cfg_name=filename,
cyto_stylesheet_name=cyto_stylesheet_name,
)
assert cfg_internal == cfg_pth_internal
assert cyto_internal == cyto_cfg_pth
def test_load_cfg(monkeypatch, tmp_path):
monkeypatch.setattr(Path, 'cwd', lambda: tmp_path)
pkg_dir = config.PKG_DIR
filename = config.CONFIG_FILENAME
stop_folder = config.STOP_FOLDER
cfg_pth_internal = (pkg_dir / filename).resolve()
ref_config = config.load_toml_config(cfg_pth_internal)
assert ref_config['info']['pkg'] == 'lang_main_internal'
loaded_cfg = config.load_cfg(
starting_path=pkg_dir,
glob_pattern=filename,
stop_folder_name=stop_folder,
cfg_path_internal=cfg_pth_internal,
prefer_internal_config=True,
)
assert loaded_cfg['info']['pkg'] == 'lang_main_internal'
loaded_cfg = config.load_cfg(
starting_path=pkg_dir,
glob_pattern=filename,
stop_folder_name=stop_folder,
cfg_path_internal=cfg_pth_internal,
prefer_internal_config=False,
)
assert loaded_cfg['info']['pkg'] == 'lang_main'

View File

@@ -15,6 +15,9 @@ def test_create_saving_folder(tmp_path, overwrite):
io.create_saving_folder(target_dir, overwrite_existing=overwrite)
assert target_dir.exists()
assert target_dir.is_dir()
io.create_saving_folder(str(target_dir), overwrite_existing=overwrite)
assert target_dir.exists()
assert target_dir.is_dir()
def test_save_load(tmp_path):

View File

@@ -11,6 +11,7 @@ from lang_main.constants import (
STFRDeviceTypes,
STFRModelTypes,
)
from lang_main.errors import LanguageModelNotFoundError
from lang_main.types import LanguageModels
@@ -62,10 +63,7 @@ def test_load_sentence_transformer(
],
)
@pytest.mark.mload
def test_load_sentence_transformer_onnx(
model_name,
similarity_func,
) -> None:
def test_load_sentence_transformer_onnx(model_name, similarity_func) -> None:
model = model_loader.load_sentence_transformer(
model_name=model_name,
similarity_func=similarity_func,
@@ -86,15 +84,19 @@ def test_load_sentence_transformer_onnx(
],
)
@pytest.mark.mload
def test_load_spacy_model(
model_name,
):
def test_load_spacy_model(model_name):
model = model_loader.load_spacy(
model_name=model_name,
)
assert isinstance(model, Language)
def test_load_spacy_model_fail():
model_name = 'not_existing'
with pytest.raises(LanguageModelNotFoundError):
model = model_loader.load_spacy(model_name)
@pytest.mark.mload
def test_instantiate_spacy_model():
model = model_loader.instantiate_model(

64
tests/test_search.py Normal file
View File

@@ -0,0 +1,64 @@
from pathlib import Path
import pytest
from lang_main import search
FILE_SEARCH = 'test.txt'
@pytest.fixture(scope='module')
def base_folder(tmp_path_factory) -> Path:
folder_structure = 'path/to/base/folder/'
pth = tmp_path_factory.mktemp('search')
pth = pth / folder_structure
pth.mkdir(parents=True, exist_ok=True)
return pth
@pytest.fixture(scope='module')
def target_file_pth(base_folder) -> Path:
# place in folder 'path' of TMP path
target_folder = base_folder.parents[2]
target_file = target_folder / FILE_SEARCH
with open(target_file, 'w') as file:
file.write('TEST')
return target_file
def test_search_base_path(base_folder):
stop_folder = '123' # should not exist
found = search.search_base_path(base_folder, stop_folder_name=stop_folder)
assert found is None
stop_folder = 'to'
found = search.search_base_path(base_folder, stop_folder_name=stop_folder)
assert found is not None
assert found.name == 'path'
@pytest.mark.parametrize('stop_folder_name', ['to', 'base', None])
def test_search_iterative(base_folder, target_file_pth, stop_folder_name):
# target in parent of 'to': 'path'
ret = search.search_iterative(base_folder, FILE_SEARCH, stop_folder_name)
if stop_folder_name == 'to' or stop_folder_name is None:
assert ret is not None
assert ret.name == FILE_SEARCH
assert ret == target_file_pth
elif stop_folder_name == 'base':
assert ret is None
def test_search_cwd(monkeypatch, base_folder, target_file_pth):
monkeypatch.setattr(Path, 'cwd', lambda: base_folder)
assert Path.cwd() == base_folder
ret = search.search_cwd(FILE_SEARCH)
assert ret is None
target_folder = target_file_pth.parent
monkeypatch.setattr(Path, 'cwd', lambda: target_folder)
assert Path.cwd() == target_folder
ret = search.search_cwd(FILE_SEARCH)
assert ret is not None
assert ret == target_file_pth

Binary file not shown.

Binary file not shown.

Binary file not shown.