improved test coverage, automation scripts

This commit is contained in:
Florian Förster 2024-11-26 16:11:25 +01:00
parent 9291b53f93
commit 38aa0739ad
33 changed files with 979 additions and 297 deletions

View File

@ -1 +1,3 @@
# lang-main # lang-main
Alpha Release stadium, no documentation yet

1
build.ps1 Normal file
View File

@ -0,0 +1 @@
pdm build -d build/

View File

@ -5,7 +5,7 @@
FROM ubuntu:22.04 FROM ubuntu:22.04
# environment variables # environment variables
ENV CYTOSCAPE_VERSION=3.10.2 ENV CYTOSCAPE_VERSION=3.10.3
ENV VIRTUAL_SCREEN_WIDTH=1920 ENV VIRTUAL_SCREEN_WIDTH=1920
ENV VIRTUAL_SCREEN_HEIGHT=1080 ENV VIRTUAL_SCREEN_HEIGHT=1080
ENV CYREST_PORT=1234 ENV CYREST_PORT=1234

View File

@ -0,0 +1,3 @@
$container_name = "cyrest"
docker start $container_name

View File

@ -0,0 +1,3 @@
$container_name = "cyrest"
docker stop $container_name

View File

@ -0,0 +1,12 @@
param(
[Parameter(Mandatory=$true)]$old_cytoscape_version,
[Parameter(Mandatory=$true)]$new_cytoscape_version
)
$container_name = "cyrest"
docker build -t snoringsloth/cytoscape:$new_cytoscape_version -t snoringsloth/cytoscape:latest .
docker push snoringsloth/cytoscape:latest
docker push snoringsloth/cytoscape:$new_cytoscape_version
docker rm $container_name
docker image rm snoringsloth/cytoscape:$old_cytoscape_version
docker create -p 1234:1234 --name $container_name snoringsloth/cytoscape:latest

28
pdm.lock generated
View File

@ -5,7 +5,7 @@
groups = ["default", "cytoscape", "dash", "dev", "notebooks", "plot", "spacy-lg", "spacy-md", "spacy-sm", "trials"] groups = ["default", "cytoscape", "dash", "dev", "notebooks", "plot", "spacy-lg", "spacy-md", "spacy-sm", "trials"]
strategy = ["inherit_metadata"] strategy = ["inherit_metadata"]
lock_version = "4.5.0" lock_version = "4.5.0"
content_hash = "sha256:3fda7527ac9298ec38cef4c36dc495defec823f631affe62daf9aedd3611000a" content_hash = "sha256:bd78b1a41d8bc73b5112c999a6fc6cfcb512e7b9ef6f6bf437a9363df9296961"
[[metadata.targets]] [[metadata.targets]]
requires_python = ">=3.11" requires_python = ">=3.11"
@ -1015,6 +1015,17 @@ files = [
{file = "evaluate-0.4.3.tar.gz", hash = "sha256:3a5700cf83aabee9549264e1e5666f116367c61dbd4d38352015e859a5e2098d"}, {file = "evaluate-0.4.3.tar.gz", hash = "sha256:3a5700cf83aabee9549264e1e5666f116367c61dbd4d38352015e859a5e2098d"},
] ]
[[package]]
name = "execnet"
version = "2.1.1"
requires_python = ">=3.8"
summary = "execnet: rapid multi-Python deployment"
groups = ["dev"]
files = [
{file = "execnet-2.1.1-py3-none-any.whl", hash = "sha256:26dee51f1b80cebd6d0ca8e74dd8745419761d3bef34163928cbebbdc4749fdc"},
{file = "execnet-2.1.1.tar.gz", hash = "sha256:5189b52c6121c24feae288166ab41b32549c7e2348652736540b9e6e7d4e72e3"},
]
[[package]] [[package]]
name = "executing" name = "executing"
version = "2.1.0" version = "2.1.0"
@ -3124,6 +3135,21 @@ files = [
{file = "pytest_cov-6.0.0-py3-none-any.whl", hash = "sha256:eee6f1b9e61008bd34975a4d5bab25801eb31898b032dd55addc93e96fcaaa35"}, {file = "pytest_cov-6.0.0-py3-none-any.whl", hash = "sha256:eee6f1b9e61008bd34975a4d5bab25801eb31898b032dd55addc93e96fcaaa35"},
] ]
[[package]]
name = "pytest-xdist"
version = "3.6.1"
requires_python = ">=3.8"
summary = "pytest xdist plugin for distributed testing, most importantly across multiple CPUs"
groups = ["dev"]
dependencies = [
"execnet>=2.1",
"pytest>=7.0.0",
]
files = [
{file = "pytest_xdist-3.6.1-py3-none-any.whl", hash = "sha256:9ed4adfb68a016610848639bb7e02c9352d5d9f03d04809919e2dafc3be4cca7"},
{file = "pytest_xdist-3.6.1.tar.gz", hash = "sha256:ead156a4db231eec769737f57668ef58a2084a34b2e55c4a8fa20d861107300d"},
]
[[package]] [[package]]
name = "python-dateutil" name = "python-dateutil"
version = "2.9.0.post0" version = "2.9.0.post0"

View File

@ -1,7 +1,7 @@
[project] [project]
name = "lang-main" name = "lang-main"
version = "0.1.0dev1" version = "0.1.0a1"
description = "Several tools to analyse maintenance data with strong focus on language processing" description = "Several tools to analyse TOM's data with strong focus on language processing"
authors = [ authors = [
{name = "d-opt GmbH, resp. Florian Förster", email = "f.foerster@d-opt.com"}, {name = "d-opt GmbH, resp. Florian Förster", email = "f.foerster@d-opt.com"},
] ]
@ -54,6 +54,9 @@ build-backend = "pdm.backend"
[tool.pdm] [tool.pdm]
distribution = true distribution = true
[tool.pdm.build]
package-dir = "src"
[tool.pdm.dev-dependencies] [tool.pdm.dev-dependencies]
notebooks = [ notebooks = [
"jupyterlab>=4.2.0", "jupyterlab>=4.2.0",
@ -66,13 +69,6 @@ notebooks = [
# --config-setting="--global-option=build_ext" --config-setting="--global-option=-IC:\Program Files\Graphviz\include" --config-setting="--global-option=-LC:\Program Files\Graphviz\lib" # --config-setting="--global-option=build_ext" --config-setting="--global-option=-IC:\Program Files\Graphviz\include" --config-setting="--global-option=-LC:\Program Files\Graphviz\lib"
trials = [ trials = [
] ]
dev = [
"cython>=3.0.10",
"openpyxl>=3.1.5",
"seaborn>=0.13.2",
"pytest>=8.3.3",
"pytest-cov>=6.0.0",
]
[tool.ruff] [tool.ruff]
line-length = 94 line-length = 94
@ -103,6 +99,7 @@ filterwarnings = [
] ]
markers = [ markers = [
"mload: marks tests with loading of language models (deselect with '-m \"not mload\"')", "mload: marks tests with loading of language models (deselect with '-m \"not mload\"')",
"cyto: marks tests which interact with Cytoscape (deselect with '-m \"not cyto\"')",
] ]
log_cli = true log_cli = true
@ -120,7 +117,22 @@ exclude_also = [
"@overload", "@overload",
"if logging", "if logging",
"if TYPE_CHECKING", "if TYPE_CHECKING",
"@pytest.fixture",
"if __name__ == __main__:",
] ]
[tool.coverage.html] [tool.coverage.html]
directory = "reports/coverage" directory = "reports/coverage"
[dependency-groups]
dev = [
"cython>=3.0.10",
"openpyxl>=3.1.5",
"seaborn>=0.13.2",
"pytest>=8.3.3",
"pytest-cov>=6.0.0",
"pytest-xdist>=3.6.1",
]
notebooks = [
"jupyterlab>=4.2.0",
"ipywidgets>=8.1.2",
]

8
run_tests.ps1 Normal file
View File

@ -0,0 +1,8 @@
pdm run pytest --cov -n 4
# run docker desktop
. "C:\Program Files\Docker\Docker\Docker Desktop.exe"
docker start cyrest
# run Cytoscape tests in single process
pdm run coverage run -a -m pytest -m "cyto"
docker stop cyrest
pdm run coverage html

View File

@ -1,177 +1,32 @@
import logging
import os
from pathlib import Path from pathlib import Path
from typing import Any, Final from typing import Any, Final
from lang_main.config import load_toml_config from lang_main.config import (
BASE_FOLDERNAME,
_has_py4cyto: bool = True CONFIG_FILENAME,
try: CYTO_STYLESHEET_FILENAME,
import py4cytoscape as p4c PKG_DIR,
except ImportError: PREFER_INTERNAL_CONFIG,
_has_py4cyto = False STOP_FOLDER,
get_config_paths,
# ** external packages config load_cfg,
# ** Huggingface Hub caching )
os.environ['HF_HUB_DISABLE_SYMLINKS_WARNING'] = 'set' from lang_main.search import search_base_path
# ** py4cytoscape config
if _has_py4cyto:
p4c.set_summary_logger(False)
p4c.py4cytoscape_logger.detail_logger.setLevel('ERROR')
p4c.py4cytoscape_logger.detail_logger.removeHandler(
p4c.py4cytoscape_logger.detail_handler
)
p4c.py4cytoscape_logger.detail_logger.addHandler(logging.NullHandler())
# ** lang-main config # ** lang-main config
BASE_FOLDERNAME: Final[str] = 'lang-main' cfg_path_internal, cyto_stylesheet_path = get_config_paths(
CONFIG_FILENAME: Final[str] = 'lang_main_config.toml' PKG_DIR, CONFIG_FILENAME, CYTO_STYLESHEET_FILENAME
CYTO_STYLESHEET_FILENAME: Final[str] = r'cytoscape_config/lang_main.xml' )
PREFER_INTERNAL_CONFIG: Final[bool] = False
pkg_dir = Path(__file__).parent
cfg_path_internal = (pkg_dir / CONFIG_FILENAME).resolve()
cyto_stylesheet_path = (pkg_dir / CYTO_STYLESHEET_FILENAME).resolve()
# ** load config data: internal/external CONFIG: Final[dict[str, Any]] = load_cfg(
# look for external config first, if not found use internal one starting_path=PKG_DIR,
def search_cwd(
glob_pattern: str = CONFIG_FILENAME,
) -> Path | None:
"""Searches the current working directory and looks for files
matching the glob pattern.
Returns the first match encountered.
Parameters
----------
glob_pattern : str, optional
pattern to look for, first match will be returned,
by default CONFIG_FILENAME
Returns
-------
Path | None
Path if corresponding object was found, None otherwise
"""
cfg_path: Path | None = None
res = tuple(Path.cwd().glob(glob_pattern))
if res:
cfg_path = res[0]
return cfg_path
def search_iterative(
starting_path: Path,
glob_pattern: str = CONFIG_FILENAME,
stop_folder_name: str | None = None,
) -> Path | None:
"""Iteratively searches the parent directories of the starting path
and look for files matching the glob pattern. The starting path is not
searched, only its parents. Therefore the starting path can also point
to a file. The folder in which it is placed in will be searched.
Returns the first match encountered.
Parameters
----------
starting_path : Path
non-inclusive starting path
glob_pattern : str, optional
pattern to look for, first match will be returned,
by default CONFIG_FILENAME
stop_folder_name : str, optional
name of the last folder in the directory tree to search, by default None
Returns
-------
Path | None
Path if corresponding object was found, None otherwise
"""
file_path: Path | None = None
stop_folder_reached: bool = False
for it in range(len(starting_path.parents)):
search_path = starting_path.parents[it] # do not look in library folder
res = tuple(search_path.glob(glob_pattern))
if res:
file_path = res[0]
break
elif stop_folder_reached:
break
if stop_folder_name is not None and search_path.name == stop_folder_name:
# library is placed inside a whole python installation for deployment
# if this folder is reached, only look up one parent above
stop_folder_reached = True
return file_path
def search_base_path(
starting_path: Path,
stop_folder_name: str | None = None,
) -> Path | None:
"""Iteratively searches the parent directories of the starting path
and look for folders matching the given name. If a match is encountered,
the parent path will be returned.
Example:
starting_path = path/to/start/folder
stop_folder_name = 'to'
returned path = 'path/'
Parameters
----------
starting_path : Path
non-inclusive starting path
stop_folder_name : str, optional
name of the last folder in the directory tree to search, by default None
Returns
-------
Path | None
Path if corresponding base path was found, None otherwise
"""
stop_folder_path: Path | None = None
base_path: Path | None = None
for it in range(len(starting_path.parents)):
search_path = starting_path.parents[it] # do not look in library folder
if stop_folder_name is not None and search_path.name == stop_folder_name:
# library is placed inside a whole python installation for deployment
# only look up to this folder
stop_folder_path = search_path
break
if stop_folder_path is not None:
base_path = stop_folder_path.parent
return base_path
def load_cfg() -> dict[str, Any]:
cfg_path: Path | None
if PREFER_INTERNAL_CONFIG:
cfg_path = cfg_path_internal
else:
cfg_path = search_cwd(glob_pattern=CONFIG_FILENAME)
if cfg_path is None:
cfg_path = search_iterative(
starting_path=pkg_dir,
glob_pattern=CONFIG_FILENAME, glob_pattern=CONFIG_FILENAME,
stop_folder_name='python', stop_folder_name=STOP_FOLDER,
) cfg_path_internal=cfg_path_internal,
# backup: use internal config prefer_internal_config=PREFER_INTERNAL_CONFIG,
if cfg_path is None: )
cfg_path = cfg_path_internal base_parent_path = search_base_path(PKG_DIR, stop_folder_name=BASE_FOLDERNAME)
config = load_toml_config(path_to_toml=cfg_path)
return config.copy()
CONFIG: Final[dict[str, Any]] = load_cfg()
base_parent_path = search_base_path(pkg_dir, stop_folder_name=BASE_FOLDERNAME)
if base_parent_path is None: if base_parent_path is None:
raise FileNotFoundError('Could not resolve base path of library') raise FileNotFoundError('Could not resolve base path of library')
BASE_PATH: Final[Path] = base_parent_path BASE_PATH: Final[Path] = base_parent_path
@ -185,11 +40,3 @@ if not cyto_stylesheet_path.exists():
) )
CYTO_PATH_STYLESHEET: Final[Path] = cyto_stylesheet_path CYTO_PATH_STYLESHEET: Final[Path] = cyto_stylesheet_path
# TODO check removal
# append Graphviz binary folder to system path if not already contained
# if sys.platform == 'win32':
# path = Path(r'C:\Program Files\Graphviz\bin')
# if path.is_dir() and str(path).lower() not in os.environ['PATH'].lower():
# os.environ['PATH'] += f';{path}'

View File

@ -3,7 +3,7 @@ from typing import cast
from pandas import DataFrame, Series from pandas import DataFrame, Series
from sentence_transformers import SentenceTransformer from sentence_transformers import SentenceTransformer
from tqdm.auto import tqdm # TODO: check deletion from tqdm.auto import tqdm
from lang_main.analysis.shared import ( from lang_main.analysis.shared import (
candidates_by_index, candidates_by_index,

View File

@ -241,35 +241,36 @@ def build_token_graph(
return graph, docs_mapping return graph, docs_mapping
def build_token_graph_simple( # TODO check removal
data: DataFrame, # def build_token_graph_simple(
model: SpacyModel, # data: DataFrame,
) -> tuple[TokenGraph, dict[PandasIndex, SpacyDoc]]: # model: SpacyModel,
graph = TokenGraph() # ) -> tuple[TokenGraph, dict[PandasIndex, SpacyDoc]]:
model_input = cast(tuple[str], tuple(data['entry'].to_list())) # graph = TokenGraph()
weights = cast(tuple[int], tuple(data['num_occur'].to_list())) # model_input = cast(tuple[str], tuple(data['entry'].to_list()))
indices = cast(tuple[list[PandasIndex]], tuple(data['batched_idxs'].to_list())) # weights = cast(tuple[int], tuple(data['num_occur'].to_list()))
index: int = 0 # indices = cast(tuple[list[PandasIndex]], tuple(data['batched_idxs'].to_list()))
docs_mapping: dict[PandasIndex, SpacyDoc] = {} # index: int = 0
# docs_mapping: dict[PandasIndex, SpacyDoc] = {}
for doc in tqdm(model.pipe(model_input, batch_size=50), total=len(model_input)): # for doc in tqdm(model.pipe(model_input, batch_size=50), total=len(model_input)):
add_doc_info_to_graph( # add_doc_info_to_graph(
graph=graph, # graph=graph,
doc=doc, # doc=doc,
weight=weights[index], # weight=weights[index],
) # )
corresponding_indices = indices[index] # corresponding_indices = indices[index]
for idx in corresponding_indices: # for idx in corresponding_indices:
docs_mapping[idx] = doc # docs_mapping[idx] = doc
index += 1 # index += 1
# metadata # # metadata
graph.update_metadata() # graph.update_metadata()
# convert to undirected # # convert to undirected
graph.to_undirected(logging=False) # graph.to_undirected(logging=False)
return graph, docs_mapping # return graph, docs_mapping
# TODO check removal # TODO check removal

View File

@ -1,11 +1,40 @@
from __future__ import annotations from __future__ import annotations
import logging
import os
import sys import sys
import tomllib import tomllib
from typing import TYPE_CHECKING, Any from pathlib import Path
from typing import Any, Final
if TYPE_CHECKING: from lang_main.search import search_cwd, search_iterative
from pathlib import Path
_has_py4cyto: bool = True
try:
import py4cytoscape as p4c
except ImportError:
_has_py4cyto = False
# ** external packages config
# ** Huggingface Hub caching
os.environ['HF_HUB_DISABLE_SYMLINKS_WARNING'] = 'set'
# ** py4cytoscape config
if _has_py4cyto:
p4c.set_summary_logger(False)
p4c.py4cytoscape_logger.detail_logger.setLevel('ERROR')
p4c.py4cytoscape_logger.detail_logger.removeHandler(
p4c.py4cytoscape_logger.detail_handler
)
p4c.py4cytoscape_logger.detail_logger.addHandler(logging.NullHandler())
# ** lang-main config
BASE_FOLDERNAME: Final[str] = 'lang-main'
CONFIG_FILENAME: Final[str] = 'lang_main_config.toml'
CYTO_STYLESHEET_FILENAME: Final[str] = r'cytoscape_config/lang_main.xml'
PREFER_INTERNAL_CONFIG: Final[bool] = False
PKG_DIR: Final[Path] = Path(__file__).parent
STOP_FOLDER: Final[str] = 'python'
def load_toml_config( def load_toml_config(
@ -14,4 +43,46 @@ def load_toml_config(
with open(path_to_toml, 'rb') as f: with open(path_to_toml, 'rb') as f:
data = tomllib.load(f) data = tomllib.load(f)
print('Loaded TOML config file successfully.', file=sys.stderr, flush=True) print('Loaded TOML config file successfully.', file=sys.stderr, flush=True)
return data return data
# ** load config data: internal/external
def get_config_paths(
root_folder: Path,
cfg_name: str,
cyto_stylesheet_name: str,
) -> tuple[Path, Path]:
cfg_path_internal = (root_folder / cfg_name).resolve()
cyto_stylesheet_path = (root_folder / cyto_stylesheet_name).resolve()
return cfg_path_internal, cyto_stylesheet_path
def load_cfg(
starting_path: Path,
glob_pattern: str,
stop_folder_name: str | None,
cfg_path_internal: Path,
prefer_internal_config: bool = False,
) -> dict[str, Any]:
cfg_path: Path | None
# look for external config first, if not found use internal one
if prefer_internal_config:
cfg_path = cfg_path_internal
else:
cfg_path = search_cwd(glob_pattern)
if cfg_path is None:
cfg_path = search_iterative(
starting_path=starting_path,
glob_pattern=glob_pattern,
stop_folder_name=stop_folder_name,
)
# backup: use internal config
if cfg_path is None:
cfg_path = cfg_path_internal
config = load_toml_config(path_to_toml=cfg_path)
return config.copy()

View File

@ -1,6 +1,6 @@
# lang_main: Config file # lang_main: Config file
[info] [info]
pkg = 'lang_main' pkg = 'lang_main_internal'
[paths] [paths]
inputs = './inputs/' inputs = './inputs/'

View File

@ -60,7 +60,7 @@ def load_spacy(
model_name: str, model_name: str,
) -> SpacyModel: ) -> SpacyModel:
try: try:
spacy_model_obj = importlib.import_module(SPACY_MODEL_NAME) spacy_model_obj = importlib.import_module(model_name)
except ModuleNotFoundError: except ModuleNotFoundError:
raise LanguageModelNotFoundError( raise LanguageModelNotFoundError(
( (

View File

@ -148,14 +148,10 @@ class Pipeline(BasePipeline):
) -> None: ) -> None:
# init base class # init base class
super().__init__(name=name, working_dir=working_dir) super().__init__(name=name, working_dir=working_dir)
# name of pipeline # name of pipeline
self.name = name self.name = name
# working directory for pipeline == output path # working directory for pipeline == output path
self.working_dir = working_dir self.working_dir = working_dir
# if not self.working_dir.exists():
# self.working_dir.mkdir(parents=True)
# container for actions to perform during pass # container for actions to perform during pass
self.actions_kwargs: list[dict[str, Any]] = [] self.actions_kwargs: list[dict[str, Any]] = []
self.save_results: ResultHandling = [] self.save_results: ResultHandling = []
@ -192,28 +188,6 @@ class Pipeline(BasePipeline):
else: else:
self.panic_wrong_action_type(action=action, compatible_type=Callable.__name__) self.panic_wrong_action_type(action=action, compatible_type=Callable.__name__)
# TODO: add multiple entries by utilising simple add method
"""
def add_multi(
self,
action: FunctionType | Sequence[FunctionType],
action_kwargs: dict[str, Any] | Sequence[dict[str, Any]],
) -> None:
if isinstance(action, Sequence):
if len(action_kwargs) != len(action):
raise ValueError(("Sequences for actions and corresponding keyword "
"arguments must have the same length."))
self.actions.extend(action)
self.actions_kwargs.extend(action_kwargs)
elif isinstance(action, FunctionType):
self.actions.append(action)
self.actions_kwargs.append(action_kwargs)
else:
raise TypeError(("Action must be function or sequence of functions, "
f"but is of type >>{type(action)}<<."))
"""
def get_result_path( def get_result_path(
self, self,
action_idx: int, action_idx: int,
@ -253,11 +227,7 @@ class Pipeline(BasePipeline):
action_idx: int, action_idx: int,
filename: str | None, filename: str | None,
) -> None: ) -> None:
# target_filename = f'Pipe-{self.name}_Step-{self.curr_proc_idx}_' + filename
# target_path = self.working_dir.joinpath(target_filename)
# target_path = target_path.with_suffix('.pkl')
target_path, _ = self.get_result_path(action_idx, filename) target_path, _ = self.get_result_path(action_idx, filename)
# saving file locally
save_pickle(obj=self._intermediate_result, path=target_path) save_pickle(obj=self._intermediate_result, path=target_path)
@override @override
@ -270,6 +240,7 @@ class Pipeline(BasePipeline):
if self.load_results[idx][0]: if self.load_results[idx][0]:
filename = self.load_results[idx][1] filename = self.load_results[idx][1]
ret = self.load_step(action_idx=idx, filename=filename) ret = self.load_step(action_idx=idx, filename=filename)
self._intermediate_result = ret
logger.info( logger.info(
'[No Calculation] Loaded result for action >>%s<< successfully', '[No Calculation] Loaded result for action >>%s<< successfully',
self.action_names[idx], self.action_names[idx],
@ -279,18 +250,12 @@ class Pipeline(BasePipeline):
# calculation # calculation
if idx == 0: if idx == 0:
args = starting_values args = starting_values
# ret = action(*starting_values, **action_kwargs)
else: else:
args = ret args = ret
if args is not None: if args is not None:
ret = action(*args, **action_kwargs) ret = action(*args, **action_kwargs)
# elif args is not None:
# ret = action(*args)
# elif args is None and action_kwargs:
# ret = action(**action_kwargs)
else: else:
# ret = action()
ret = action(**action_kwargs) ret = action(**action_kwargs)
if ret is not None and not isinstance(ret, tuple): if ret is not None and not isinstance(ret, tuple):

View File

@ -60,7 +60,7 @@ SPACY_MODEL = m_load.instantiate_model(
# ** pipeline configuration # ** pipeline configuration
# ** target feature preparation # ** target feature preparation
def build_base_target_feature_pipe() -> Pipeline: def build_base_target_feature_pipe() -> Pipeline:
pipe_target_feat = Pipeline(name='TargetFeature', working_dir=SAVE_PATH_FOLDER) pipe_target_feat = Pipeline(name='Target_Feature', working_dir=SAVE_PATH_FOLDER)
pipe_target_feat.add( pipe_target_feat.add(
load_raw_data, load_raw_data,
{ {
@ -185,7 +185,15 @@ def build_tk_graph_render_pipe(
) -> Pipeline: ) -> Pipeline:
# optional dependency: late import # optional dependency: late import
# raises exception if necessary modules are not found # raises exception if necessary modules are not found
try:
from lang_main.render import cytoscape as cyto from lang_main.render import cytoscape as cyto
except ImportError:
raise ImportError(
(
'Dependencies for Cytoscape interaction not found.'
'Install package with optional dependencies.'
)
)
pipe_graph_rendering = Pipeline( pipe_graph_rendering = Pipeline(
name='Graph_Static-Rendering', name='Graph_Static-Rendering',

View File

@ -60,7 +60,7 @@ def verify_connection() -> None:
""" """
try: try:
p4c.cytoscape_ping() p4c.cytoscape_ping()
except CyError as error: except CyError as error: # pragma: no cover
logger.error('[CyError] CyREST or Cytoscape version not supported.') logger.error('[CyError] CyREST or Cytoscape version not supported.')
raise error raise error
except RequestException as error: except RequestException as error:
@ -164,6 +164,7 @@ def verify_table_property(
network_name: str = CYTO_BASE_NETWORK_NAME, network_name: str = CYTO_BASE_NETWORK_NAME,
) -> bool: ) -> bool:
table = p4c.get_table_columns(table=table_type, network=network_name) table = p4c.get_table_columns(table=table_type, network=network_name)
logger.debug('Table >>%s<< wiht columns: %s', table, table.columns)
return property in table.columns return property in table.columns
@ -174,7 +175,7 @@ def analyse_network(
) -> None: ) -> None:
node_table = p4c.get_table_columns(table='node', network=network_name) node_table = p4c.get_table_columns(table='node', network=network_name)
net_analyse_possible: bool = True net_analyse_possible: bool = True
if len(node_table) < 4: if len(node_table) < 4: # pragma: no cover
net_analyse_possible = False net_analyse_possible = False
if net_analyse_possible: if net_analyse_possible:
@ -186,7 +187,7 @@ def analyse_network(
* node_table['BetweennessCentrality'] * node_table['BetweennessCentrality']
* node_table['stress_norm'] * node_table['stress_norm']
) )
else: else: # pragma: no cover
node_table[CYTO_SELECTION_PROPERTY] = 1 node_table[CYTO_SELECTION_PROPERTY] = 1
p4c.load_table_data(node_table, data_key_column='name', network=network_name) p4c.load_table_data(node_table, data_key_column='name', network=network_name)
@ -231,7 +232,7 @@ def export_network_to_image(
by default 'A4' by default 'A4'
""" """
logger.debug('Exporting image to file...') logger.debug('Exporting image to file...')
if not target_folder.exists(): if not target_folder.exists(): # pragma: no cover
target_folder.mkdir(parents=True) target_folder.mkdir(parents=True)
dst_file_pth = (target_folder / filename).with_suffix(f'.{filetype.lower()}') dst_file_pth = (target_folder / filename).with_suffix(f'.{filetype.lower()}')
@ -252,13 +253,6 @@ def export_network_to_image(
export_text_as_font=text_as_font, export_text_as_font=text_as_font,
page_size=pdf_export_page_size, page_size=pdf_export_page_size,
) )
# TODO remove if Cytoscape >= 3.10.* is running in container
# p4c.export_image(
# filename=filename,
# type=filetype,
# network=network_name,
# overwrite_file=True,
# )
logger.debug('Exported image to sandbox.') logger.debug('Exported image to sandbox.')
logger.debug('Transferring image from sandbox to target destination...') logger.debug('Transferring image from sandbox to target destination...')
sandbox_filename = f'{filename}.{filetype.lower()}' sandbox_filename = f'{filename}.{filetype.lower()}'
@ -328,6 +322,7 @@ def apply_style_to_network(
""" """
logger.debug('Applying style to network...') logger.debug('Applying style to network...')
styles_avail = cast(list[str], p4c.get_visual_style_names()) styles_avail = cast(list[str], p4c.get_visual_style_names())
logger.debug('Available styles: %s', styles_avail)
if style_name not in styles_avail: if style_name not in styles_avail:
if not pth_to_stylesheet.exists(): if not pth_to_stylesheet.exists():
# existence for standard path verified at import, but not for other # existence for standard path verified at import, but not for other
@ -348,12 +343,6 @@ def apply_style_to_network(
p4c.set_visual_style(style_name, network=network_name) p4c.set_visual_style(style_name, network=network_name)
# node size mapping, only if needed property is available # node size mapping, only if needed property is available
# TODO check removal
# size_prop_available = verify_table_property(
# property=node_size_property,
# network_name=network_name,
# )
# if size_prop_available:
scheme = p4c.scheme_c_number_continuous( scheme = p4c.scheme_c_number_continuous(
start_value=min_node_size, end_value=max_node_size start_value=min_node_size, end_value=max_node_size
) )
@ -365,13 +354,6 @@ def apply_style_to_network(
default_number=min_node_size, default_number=min_node_size,
) )
p4c.set_node_size_mapping(**node_size_map) p4c.set_node_size_mapping(**node_size_map)
# TODO removal
# else:
# node_table = p4c.get_table_columns(table='node', network=network_name)
# nodes_SUID = node_table['SUID'].to_list()
# p4c.set_node_size_bypass(nodes_SUID, new_sizes=min_node_size, network=network_name)
# p4c.set_visual_style(style_name, network=network_name)
# time.sleep(1) # if not waited image export could be without applied style
fit_content(network_name=network_name) fit_content(network_name=network_name)
logger.debug('Style application to network successful.') logger.debug('Style application to network successful.')
@ -402,7 +384,7 @@ def get_subgraph_node_selection(
node_table = p4c.get_table_columns(table='node', network=network_name) node_table = p4c.get_table_columns(table='node', network=network_name)
node_table = node_table.sort_values(by=CYTO_SELECTION_PROPERTY, ascending=False) node_table = node_table.sort_values(by=CYTO_SELECTION_PROPERTY, ascending=False)
p4c.load_table_data(node_table, data_key_column='name', network=network_name) p4c.load_table_data(node_table, data_key_column='name', network=network_name)
node_table_choice = node_table.iloc[:num_subgraphs, :] node_table_choice = node_table.iloc[:num_subgraphs]
logger.debug('Selection of nodes for subgraph generation successful.') logger.debug('Selection of nodes for subgraph generation successful.')
return node_table_choice['SUID'].to_list() return node_table_choice['SUID'].to_list()

View File

@ -9,8 +9,8 @@ re_parenthesis_1 = re.compile(r'[(]+')
re_parenthesis_2 = re.compile(r'[)]+') re_parenthesis_2 = re.compile(r'[)]+')
@cy_log @cy_log # pragma: no cover
def select_edges_connecting_selected_nodes(network=None, base_url=DEFAULT_BASE_URL): # noqa: F405 def select_edges_connecting_selected_nodes(network=None, base_url=DEFAULT_BASE_URL): # noqa: F405 # pragma: no cover
"""Select edges in a Cytoscape Network connecting the selected nodes, including self loops connecting single nodes. """Select edges in a Cytoscape Network connecting the selected nodes, including self loops connecting single nodes.
Any edges selected beforehand are deselected before any new edges are selected Any edges selected beforehand are deselected before any new edges are selected

110
src/lang_main/search.py Normal file
View File

@ -0,0 +1,110 @@
from pathlib import Path
def search_cwd(
glob_pattern: str,
) -> Path | None:
"""Searches the current working directory and looks for files
matching the glob pattern.
Returns the first match encountered.
Parameters
----------
glob_pattern : str, optional
pattern to look for, first match will be returned
Returns
-------
Path | None
Path if corresponding object was found, None otherwise
"""
path_found: Path | None = None
res = tuple(Path.cwd().glob(glob_pattern))
if res:
path_found = res[0]
return path_found
def search_iterative(
starting_path: Path,
glob_pattern: str,
stop_folder_name: str | None = None,
) -> Path | None:
"""Iteratively searches the parent directories of the starting path
and look for files matching the glob pattern. The starting path is not
searched, only its parents. Therefore the starting path can also point
to a file. The folder in which it is placed in will be searched.
Returns the first match encountered.
The parent of the stop folder will be searched if it exists.
Parameters
----------
starting_path : Path
non-inclusive starting path
glob_pattern : str, optional
pattern to look for, first match will be returned
stop_folder_name : str, optional
name of the last folder in the directory tree to search, by default None
Returns
-------
Path | None
Path if corresponding object was found, None otherwise
"""
file_path: Path | None = None
stop_folder_reached: bool = False
for search_path in starting_path.parents:
res = tuple(search_path.glob(glob_pattern))
if res:
file_path = res[0]
break
elif stop_folder_reached:
break
if stop_folder_name is not None and search_path.name == stop_folder_name:
# library is placed inside a whole python installation for deployment
# if this folder is reached, only look up one parent above
stop_folder_reached = True
return file_path
def search_base_path(
starting_path: Path,
stop_folder_name: str | None = None,
) -> Path | None:
"""Iteratively searches the parent directories of the starting path
and look for folders matching the given name. If a match is encountered,
the parent path will be returned.
Example:
starting_path = path/to/start/folder
stop_folder_name = 'to'
returned path = 'path/'
Parameters
----------
starting_path : Path
non-inclusive starting path
stop_folder_name : str, optional
name of the last folder in the directory tree to search, by default None
Returns
-------
Path | None
Path if corresponding base path was found, None otherwise
"""
stop_folder_path: Path | None = None
base_path: Path | None = None
for search_path in starting_path.parents:
if stop_folder_name is not None and search_path.name == stop_folder_name:
# library is placed inside a whole python installation for deployment
# only look up to this folder
stop_folder_path = search_path
break
if stop_folder_path is not None:
base_path = stop_folder_path.parent
return base_path

1
test.ps1 Normal file
View File

@ -0,0 +1 @@
pdm run coverage run -p -m pytest -n 6

View File

@ -1,5 +1,3 @@
from pathlib import Path
import pytest import pytest
from lang_main import model_loader from lang_main import model_loader
@ -44,14 +42,14 @@ def test_obtain_relevant_descendants(spacy_model):
doc = spacy_model(SENTENCE) doc = spacy_model(SENTENCE)
sent1 = tuple(doc.sents)[0] # first sentence sent1 = tuple(doc.sents)[0] # first sentence
word1 = sent1[1] # word "ging" (POS:VERB) word1 = sent1[1] # word "ging" (POS:VERB)
descendants1 = ('0912393', 'schnell', 'Wiese', 'Menschen') descendants1 = ('ID', '0912393', 'schnell', 'Wiese', 'Menschen')
rel_descs = tokens.obtain_relevant_descendants(word1) rel_descs = tokens.obtain_relevant_descendants(word1)
rel_descs = tuple((token.text for token in rel_descs)) rel_descs = tuple((token.text for token in rel_descs))
assert descendants1 == rel_descs assert descendants1 == rel_descs
sent2 = tuple(doc.sents)[1] # first sentence sent2 = tuple(doc.sents)[1] # first sentence
word2 = sent2[1] # word "konnte" (POS:AUX) word2 = sent2[1] # word "konnte" (POS:AUX)
descendants2 = ('mit', 'Probleme', 'Tragen', 'Tasche') descendants2 = ('Probleme', 'Tragen', 'Tasche')
rel_descs = tokens.obtain_relevant_descendants(word2) rel_descs = tokens.obtain_relevant_descendants(word2)
rel_descs = tuple((token.text for token in rel_descs)) rel_descs = tuple((token.text for token in rel_descs))
assert descendants2 == rel_descs assert descendants2 == rel_descs
@ -62,7 +60,7 @@ def test_add_doc_info_to_graph(spacy_model):
tk_graph = graphs.TokenGraph() tk_graph = graphs.TokenGraph()
tokens.add_doc_info_to_graph(tk_graph, doc, weight=2) tokens.add_doc_info_to_graph(tk_graph, doc, weight=2)
assert len(tk_graph.nodes) == 11 assert len(tk_graph.nodes) == 11
assert len(tk_graph.edges) == 17 assert len(tk_graph.edges) == 16
assert '0912393' in tk_graph.nodes assert '0912393' in tk_graph.nodes

View File

@ -0,0 +1,227 @@
from pathlib import Path
import pytest
from lang_main import io
from lang_main.errors import (
NoPerformableActionError,
OutputInPipelineContainerError,
WrongActionTypeError,
)
from lang_main.pipelines import base
PIPELINE_NAME = 'test'
@pytest.fixture(scope='module')
def working_dir() -> Path:
work_dir = Path.cwd() / 'tests/work_dir'
if not work_dir.exists():
work_dir.mkdir()
return work_dir
@pytest.fixture(scope='function')
def pipeline_container(working_dir) -> base.PipelineContainer:
return base.PipelineContainer(name=PIPELINE_NAME, working_dir=working_dir)
@pytest.fixture(scope='function')
def pipeline(working_dir) -> base.Pipeline:
return base.Pipeline(name=PIPELINE_NAME, working_dir=working_dir)
def test_empty_pipeline_container(pipeline_container, working_dir):
container = pipeline_container
assert container.name == PIPELINE_NAME
assert container.working_dir == working_dir
assert len(container.actions) == 0
assert len(container.action_names) == 0
assert len(container.action_skip) == 0
assert container.curr_proc_idx == 1
with pytest.raises(NoPerformableActionError):
container.prep_run()
assert container.post_run() is None
@pytest.mark.parametrize('skip', [True, False])
def test_pipeline_container_valid(pipeline_container, skip):
test_string = 'test'
def valid_action(): # pragma: no cover
nonlocal test_string
test_string += '_2'
pipeline_container.add(valid_action, skip=skip)
assert len(pipeline_container.actions) == 1
assert len(pipeline_container.action_names) == 1
assert len(pipeline_container.action_skip) == 1
ret = pipeline_container.run()
assert pipeline_container.curr_proc_idx == 2
assert ret is None
if skip:
assert test_string == 'test'
else:
assert test_string == 'test_2'
pipeline_container.prep_run()
assert pipeline_container.curr_proc_idx == 1
def test_pipeline_container_invalid_action(pipeline_container):
test_string = 'test'
def invalid_action():
nonlocal test_string
test_string += '_2'
new = 'ret'
return new
with pytest.raises(WrongActionTypeError):
pipeline_container.add(test_string, skip=False)
pipeline_container.add(invalid_action, skip=False)
with pytest.raises(OutputInPipelineContainerError):
pipeline_container.run()
def test_empty_pipeline(pipeline, working_dir):
pipe = pipeline
assert pipe.name == PIPELINE_NAME
assert pipe.working_dir == working_dir
assert len(pipe.actions) == 0
assert len(pipe.action_names) == 0
assert len(pipe.actions_kwargs) == 0
assert len(pipe.save_results) == 0
assert len(pipe.load_results) == 0
assert pipe.curr_proc_idx == 1
assert pipe._intermediate_result is None
with pytest.raises(NoPerformableActionError):
pipe.prep_run()
assert pipe.post_run() is None
@pytest.mark.parametrize('alter_content', [True, False])
def test_pipeline_valid(pipeline, alter_content):
pipe = pipeline
test_string = 'test'
# action preparation
def valid_action(string, add_content=False):
if add_content:
string += '_2'
return string
pipe.add(valid_action, {'add_content': alter_content})
assert len(pipe.actions) == 1
assert len(pipe.action_names) == 1
assert len(pipe.actions_kwargs) == 1
assert len(pipe.save_results) == 1
assert len(pipe.load_results) == 1
assert pipe.save_results[0] == (False, None)
assert pipe.load_results[0] == (False, None)
# filenames and saving/loading
target_filename = f'Pipe-{pipe.name}_Step-{pipe.curr_proc_idx}_valid_action'
target_pth = (pipe.working_dir / target_filename).with_suffix('.pkl')
ret_pth, action_name = pipe.get_result_path(0, filename=None)
assert ret_pth == target_pth
assert action_name == 'valid_action'
filename = 'test'
ret_pth, action_name = pipe.get_result_path(0, filename=filename)
target_pth = (pipe.working_dir / filename).with_suffix('.pkl')
assert ret_pth == target_pth
assert action_name == 'valid_action'
# load non-existing files
with pytest.raises(FileNotFoundError):
pipe.load_step(0, 'non_existing')
# running
ret = pipe.run(starting_values=(test_string,))
assert isinstance(ret, tuple)
assert pipe._intermediate_result == ret
assert pipe.curr_proc_idx == 2
assert ret is not None
if alter_content:
assert ret[0] == 'test_2'
else:
assert ret[0] == 'test'
pipe.prep_run()
assert pipe.curr_proc_idx == 1
# load existing files
loaded_res = pipe.load_step(0, None)
assert loaded_res is not None
assert isinstance(loaded_res, tuple)
assert loaded_res[0] == ret[0]
def test_pipeline_valid_action_load(pipeline, working_dir):
pipe = pipeline
test_string = 'test'
# action preparation
def valid_action(string, add_content=False):
if add_content:
string += '_2'
return string
pipe.add(valid_action, {'add_content': False}, load_result=True)
assert len(pipe.actions) == 1
assert len(pipe.action_names) == 1
assert len(pipe.actions_kwargs) == 1
assert len(pipe.save_results) == 1
assert len(pipe.load_results) == 1
assert pipe.save_results[0] == (False, None)
assert pipe.load_results[0] == (True, None)
ret = pipe.run(starting_values=(test_string,))
assert isinstance(ret, tuple)
assert pipe._intermediate_result == ret
assert pipe.curr_proc_idx == 2
assert ret is not None
# load non-tuple result
filename = 'non_tuple.pkl'
save_pth = working_dir / filename
io.save_pickle(test_string, save_pth)
with pytest.raises(TypeError):
pipe.load_step(0, filename)
def test_pipeline_multiple_actions(pipeline):
pipe = pipeline
test_string = 'test'
# action preparation
def valid_action(string, add_content=True):
if add_content:
string += '_2'
return string
def valid_action_2(string, add_content=True):
if add_content:
string += '_3'
return string
pipe.add(valid_action, {'add_content': True})
pipe.add(valid_action_2)
assert len(pipe.actions) == 2
assert len(pipe.action_names) == 2
assert len(pipe.actions_kwargs) == 2
assert len(pipe.save_results) == 2
assert len(pipe.load_results) == 2
assert pipe.save_results[1] == (False, None)
assert pipe.load_results[1] == (False, None)
ret = pipe.run(starting_values=(test_string,))
assert isinstance(ret, tuple)
assert pipe._intermediate_result == ret
assert pipe.curr_proc_idx == 3
assert ret is not None
assert ret[0] == 'test_2_3'

View File

@ -0,0 +1,52 @@
import pytest
from lang_main.pipelines import predefined as pre
from lang_main.types import EntryPoints
def test_build_base_target_feature_pipe():
pipe = pre.build_base_target_feature_pipe()
assert pipe.name == 'Target_Feature'
assert len(pipe.actions) == 5
def test_build_merge_duplicates_pipe():
pipe = pre.build_merge_duplicates_pipe()
assert pipe.name == 'Merge_Duplicates'
assert len(pipe.actions) == 2
def test_build_tk_graph_pipe():
pipe = pre.build_tk_graph_pipe()
assert pipe.name == 'Token_Analysis'
assert len(pipe.actions) == 1
def test_build_tk_graph_post_pipe():
pipe = pre.build_tk_graph_post_pipe()
assert pipe.name == 'Graph_Postprocessing'
assert len(pipe.actions) == 3
def test_build_tk_graph_rescaling_pipe():
pipe = pre.build_tk_graph_rescaling_pipe(
save_result=False, exit_point=EntryPoints.TK_GRAPH_ANALYSIS_RESCALED
)
assert pipe.name == 'Graph_Rescaling'
assert len(pipe.actions) == 2
@pytest.mark.parametrize('with_subgraphs', [True, False])
def test_build_tk_graph_render_pipe(with_subgraphs):
pipe = pre.build_tk_graph_render_pipe(with_subgraphs=with_subgraphs)
assert pipe.name == 'Graph_Static-Rendering'
if with_subgraphs:
assert len(pipe.actions) == 6
else:
assert len(pipe.actions) == 4
def test_build_timeline_pipe():
pipe = pre.build_timeline_pipe()
assert pipe.name == 'Timeline_Analysis'
assert len(pipe.actions) == 6

0
tests/render/__init__.py Normal file
View File

View File

@ -0,0 +1,227 @@
"""tests for Cytoscape API requests, needs running Cytoscape server;
Tests assume that no Cytoscape instance is running.
The validation of the correct behaviour can only be done with a running instance,
especially for layout and formatting tasks. A static test suite is not helpful in
this case.
"""
import py4cytoscape as p4c
import pytest
from py4cytoscape.exceptions import CyError
from requests.exceptions import RequestException
from lang_main.constants import CYTO_BASE_NETWORK_NAME, CYTO_SELECTION_PROPERTY
from lang_main.errors import GraphRenderError
from lang_main.render import cytoscape as cyto
_cyto_available: bool = True
try:
p4c.cytoscape_ping()
except RequestException:
_cyto_available = False
@pytest.fixture(scope='module')
def avail() -> bool:
return _cyto_available
@pytest.mark.cyto
def test_verify_connection(avail):
if avail:
cyto.verify_connection()
else:
with pytest.raises(RequestException):
cyto.verify_connection()
def test_verify_graph_render_size(data_tk_graph_built):
cyto.verify_graph_render_size(
data_tk_graph_built, max_node_count=None, max_edge_count=None
)
with pytest.raises(GraphRenderError):
cyto.verify_graph_render_size(
data_tk_graph_built, max_node_count=0, max_edge_count=None
)
with pytest.raises(GraphRenderError):
cyto.verify_graph_render_size(
data_tk_graph_built, max_node_count=None, max_edge_count=0
)
@pytest.mark.cyto
def test_change_default_layout(avail):
if avail:
cyto.change_default_layout()
else:
with pytest.raises(RequestException):
cyto.change_default_layout()
@pytest.mark.cyto
def test_import_to_cytoscape(avail, data_tk_graph_built):
if avail:
cyto.import_to_cytoscape(data_tk_graph_built)
else:
with pytest.raises(RequestException):
cyto.import_to_cytoscape(data_tk_graph_built)
@pytest.mark.cyto
def test_verify_table_property(avail, data_tk_graph_built):
if avail:
cyto.import_to_cytoscape(data_tk_graph_built)
contained = cyto.verify_table_property(property='TEST', table_type='node')
assert not contained
contained = cyto.verify_table_property(property='name', table_type='node')
assert contained
contained = cyto.verify_table_property(property='degree_weighted', table_type='node')
assert contained
else:
with pytest.raises(RequestException):
contained = cyto.verify_table_property(property='TEST', table_type='node')
@pytest.mark.cyto
def test_analyse_network(avail, data_tk_graph_built):
if avail:
cyto.import_to_cytoscape(data_tk_graph_built)
cyto.analyse_network()
contained = cyto.verify_table_property(property='name', table_type='node')
assert contained
contained = cyto.verify_table_property(
property=CYTO_SELECTION_PROPERTY, table_type='node'
)
assert contained
else:
with pytest.raises(RequestException):
cyto.analyse_network(data_tk_graph_built)
@pytest.mark.cyto
def test_reset_current_network_to_base(avail):
if avail:
cyto.reset_current_network_to_base()
else:
with pytest.raises(RequestException):
cyto.reset_current_network_to_base()
@pytest.mark.cyto
def test_fit_content(avail, data_tk_graph_built):
if avail:
cyto.import_to_cytoscape(data_tk_graph_built)
cyto.fit_content()
else:
with pytest.raises(RequestException):
cyto.fit_content()
@pytest.mark.cyto
def test_export_network_to_image(avail, tmp_path, data_tk_graph_built):
filename = 'test_export'
if avail:
cyto.import_to_cytoscape(data_tk_graph_built)
cyto.export_network_to_image(filename=filename, target_folder=tmp_path)
target_file = tmp_path / f'{filename}.svg'
assert target_file.exists()
assert target_file.name == f'{filename}.svg'
else:
with pytest.raises(RequestException):
cyto.export_network_to_image(filename=filename, target_folder=tmp_path)
@pytest.mark.cyto
def test_layout_network(avail, data_tk_graph_built):
if avail:
cyto.import_to_cytoscape(data_tk_graph_built)
cyto.layout_network()
else:
with pytest.raises(RequestException):
cyto.layout_network()
@pytest.mark.cyto
def test_apply_style_to_network(avail, data_tk_graph_built, tmp_path):
if avail:
layout_not_existing = 'testing'
pth_not_existing = tmp_path / 'test.xml'
with pytest.raises(FileNotFoundError):
cyto.apply_style_to_network(
style_name=layout_not_existing,
pth_to_stylesheet=pth_not_existing,
)
cyto.import_to_cytoscape(data_tk_graph_built)
# not existing: so transfer necessary,
# but fails nevertheless because style is imported
# using the name provided by this style configuration
with pytest.raises(CyError):
cyto.apply_style_to_network(style_name=layout_not_existing)
cyto.apply_style_to_network()
else:
with pytest.raises(RequestException):
cyto.apply_style_to_network()
@pytest.mark.cyto
def test_get_subgraph_node_selection(avail, data_tk_graph_built):
num_subgraphs = 2
if avail:
cyto.import_to_cytoscape(data_tk_graph_built)
cyto.analyse_network()
suids = cyto.get_subgraph_node_selection(num_subgraphs=num_subgraphs)
assert len(suids) > 0
else:
with pytest.raises(RequestException):
cyto.get_subgraph_node_selection(num_subgraphs=num_subgraphs)
@pytest.mark.cyto
def test_select_neighbours_of_node(avail, data_tk_graph_built):
num_subgraphs = 2
if avail:
cyto.import_to_cytoscape(data_tk_graph_built)
suids = cyto.get_subgraph_node_selection(num_subgraphs=num_subgraphs)
assert len(suids) > 0
cyto.select_neighbours_of_node(suids[0], neighbour_iter_depth=2)
else:
with pytest.raises(RequestException):
cyto.select_neighbours_of_node(123, neighbour_iter_depth=2)
@pytest.mark.cyto
def test_make_subnetwork(avail, data_tk_graph_built, tmp_path):
if avail:
cyto.import_to_cytoscape(data_tk_graph_built)
suids = cyto.get_subgraph_node_selection(num_subgraphs=2)
assert len(suids) > 0
cyto.select_neighbours_of_node(suids[0], neighbour_iter_depth=2)
cyto.make_subnetwork(0, target_folder=tmp_path, export_image=True)
subnetwork_name = CYTO_BASE_NETWORK_NAME + '_sub_1'
networks = p4c.get_network_list()
assert len(networks) > 1
file = (tmp_path / subnetwork_name).with_suffix('.svg')
assert file.exists()
else:
with pytest.raises(RequestException):
cyto.make_subnetwork(0, target_folder=tmp_path, export_image=True)
@pytest.mark.cyto
def test_build_subnetworks(avail, data_tk_graph_built, tmp_path):
if avail:
cyto.import_to_cytoscape(data_tk_graph_built)
suids = cyto.get_subgraph_node_selection(num_subgraphs=1)
assert len(suids) > 0
cyto.build_subnetworks(suids, export_image=True, target_folder=tmp_path)
subnetwork_name = CYTO_BASE_NETWORK_NAME + '_sub_1'
networks = p4c.get_network_list()
assert len(networks) > 1
file = (tmp_path / subnetwork_name).with_suffix('.svg')
assert file.exists()
else:
with pytest.raises(RequestException):
cyto.build_subnetworks([123], export_image=True, target_folder=tmp_path)

View File

@ -1,7 +1,64 @@
from lang_main import config, pkg_dir import sys
from importlib import reload
from pathlib import Path
from unittest.mock import patch
from lang_main import config
def test_p4c_dependency():
assert config._has_py4cyto
with patch.dict(sys.modules, {'py4cytoscape': None}):
reload(sys.modules['lang_main.config'])
assert not config._has_py4cyto
def test_load_config(): def test_load_config():
toml_path = pkg_dir / 'lang_main_config.toml' toml_path = config.PKG_DIR / 'lang_main_config.toml'
loaded_cfg = config.load_toml_config(toml_path) loaded_cfg = config.load_toml_config(toml_path)
assert loaded_cfg['info']['pkg'] == 'lang_main_internal'
def test_get_config_path():
pkg_dir = config.PKG_DIR
filename = config.CONFIG_FILENAME
cyto_stylesheet_name = config.CYTO_STYLESHEET_FILENAME
cfg_pth_internal = (pkg_dir / filename).resolve()
cyto_cfg_pth = (pkg_dir / cyto_stylesheet_name).resolve()
cfg_internal, cyto_internal = config.get_config_paths(
root_folder=pkg_dir,
cfg_name=filename,
cyto_stylesheet_name=cyto_stylesheet_name,
)
assert cfg_internal == cfg_pth_internal
assert cyto_internal == cyto_cfg_pth
def test_load_cfg(monkeypatch, tmp_path):
monkeypatch.setattr(Path, 'cwd', lambda: tmp_path)
pkg_dir = config.PKG_DIR
filename = config.CONFIG_FILENAME
stop_folder = config.STOP_FOLDER
cfg_pth_internal = (pkg_dir / filename).resolve()
ref_config = config.load_toml_config(cfg_pth_internal)
assert ref_config['info']['pkg'] == 'lang_main_internal'
loaded_cfg = config.load_cfg(
starting_path=pkg_dir,
glob_pattern=filename,
stop_folder_name=stop_folder,
cfg_path_internal=cfg_pth_internal,
prefer_internal_config=True,
)
assert loaded_cfg['info']['pkg'] == 'lang_main_internal'
loaded_cfg = config.load_cfg(
starting_path=pkg_dir,
glob_pattern=filename,
stop_folder_name=stop_folder,
cfg_path_internal=cfg_pth_internal,
prefer_internal_config=False,
)
assert loaded_cfg['info']['pkg'] == 'lang_main' assert loaded_cfg['info']['pkg'] == 'lang_main'

View File

@ -15,6 +15,9 @@ def test_create_saving_folder(tmp_path, overwrite):
io.create_saving_folder(target_dir, overwrite_existing=overwrite) io.create_saving_folder(target_dir, overwrite_existing=overwrite)
assert target_dir.exists() assert target_dir.exists()
assert target_dir.is_dir() assert target_dir.is_dir()
io.create_saving_folder(str(target_dir), overwrite_existing=overwrite)
assert target_dir.exists()
assert target_dir.is_dir()
def test_save_load(tmp_path): def test_save_load(tmp_path):

View File

@ -11,6 +11,7 @@ from lang_main.constants import (
STFRDeviceTypes, STFRDeviceTypes,
STFRModelTypes, STFRModelTypes,
) )
from lang_main.errors import LanguageModelNotFoundError
from lang_main.types import LanguageModels from lang_main.types import LanguageModels
@ -62,10 +63,7 @@ def test_load_sentence_transformer(
], ],
) )
@pytest.mark.mload @pytest.mark.mload
def test_load_sentence_transformer_onnx( def test_load_sentence_transformer_onnx(model_name, similarity_func) -> None:
model_name,
similarity_func,
) -> None:
model = model_loader.load_sentence_transformer( model = model_loader.load_sentence_transformer(
model_name=model_name, model_name=model_name,
similarity_func=similarity_func, similarity_func=similarity_func,
@ -86,15 +84,19 @@ def test_load_sentence_transformer_onnx(
], ],
) )
@pytest.mark.mload @pytest.mark.mload
def test_load_spacy_model( def test_load_spacy_model(model_name):
model_name,
):
model = model_loader.load_spacy( model = model_loader.load_spacy(
model_name=model_name, model_name=model_name,
) )
assert isinstance(model, Language) assert isinstance(model, Language)
def test_load_spacy_model_fail():
model_name = 'not_existing'
with pytest.raises(LanguageModelNotFoundError):
model = model_loader.load_spacy(model_name)
@pytest.mark.mload @pytest.mark.mload
def test_instantiate_spacy_model(): def test_instantiate_spacy_model():
model = model_loader.instantiate_model( model = model_loader.instantiate_model(

64
tests/test_search.py Normal file
View File

@ -0,0 +1,64 @@
from pathlib import Path
import pytest
from lang_main import search
FILE_SEARCH = 'test.txt'
@pytest.fixture(scope='module')
def base_folder(tmp_path_factory) -> Path:
folder_structure = 'path/to/base/folder/'
pth = tmp_path_factory.mktemp('search')
pth = pth / folder_structure
pth.mkdir(parents=True, exist_ok=True)
return pth
@pytest.fixture(scope='module')
def target_file_pth(base_folder) -> Path:
# place in folder 'path' of TMP path
target_folder = base_folder.parents[2]
target_file = target_folder / FILE_SEARCH
with open(target_file, 'w') as file:
file.write('TEST')
return target_file
def test_search_base_path(base_folder):
stop_folder = '123' # should not exist
found = search.search_base_path(base_folder, stop_folder_name=stop_folder)
assert found is None
stop_folder = 'to'
found = search.search_base_path(base_folder, stop_folder_name=stop_folder)
assert found is not None
assert found.name == 'path'
@pytest.mark.parametrize('stop_folder_name', ['to', 'base', None])
def test_search_iterative(base_folder, target_file_pth, stop_folder_name):
# target in parent of 'to': 'path'
ret = search.search_iterative(base_folder, FILE_SEARCH, stop_folder_name)
if stop_folder_name == 'to' or stop_folder_name is None:
assert ret is not None
assert ret.name == FILE_SEARCH
assert ret == target_file_pth
elif stop_folder_name == 'base':
assert ret is None
def test_search_cwd(monkeypatch, base_folder, target_file_pth):
monkeypatch.setattr(Path, 'cwd', lambda: base_folder)
assert Path.cwd() == base_folder
ret = search.search_cwd(FILE_SEARCH)
assert ret is None
target_folder = target_file_pth.parent
monkeypatch.setattr(Path, 'cwd', lambda: target_folder)
assert Path.cwd() == target_folder
ret = search.search_cwd(FILE_SEARCH)
assert ret is not None
assert ret == target_file_pth

Binary file not shown.

Binary file not shown.

Binary file not shown.