improved test coverage, automation scripts
This commit is contained in:
parent
9291b53f93
commit
38aa0739ad
@ -5,7 +5,7 @@
|
||||
FROM ubuntu:22.04
|
||||
|
||||
# environment variables
|
||||
ENV CYTOSCAPE_VERSION=3.10.2
|
||||
ENV CYTOSCAPE_VERSION=3.10.3
|
||||
ENV VIRTUAL_SCREEN_WIDTH=1920
|
||||
ENV VIRTUAL_SCREEN_HEIGHT=1080
|
||||
ENV CYREST_PORT=1234
|
||||
|
||||
3
cytoscape_docker/start.ps1
Normal file
3
cytoscape_docker/start.ps1
Normal file
@ -0,0 +1,3 @@
|
||||
$container_name = "cyrest"
|
||||
|
||||
docker start $container_name
|
||||
3
cytoscape_docker/stop.ps1
Normal file
3
cytoscape_docker/stop.ps1
Normal file
@ -0,0 +1,3 @@
|
||||
$container_name = "cyrest"
|
||||
|
||||
docker stop $container_name
|
||||
12
cytoscape_docker/update.ps1
Normal file
12
cytoscape_docker/update.ps1
Normal file
@ -0,0 +1,12 @@
|
||||
param(
|
||||
[Parameter(Mandatory=$true)]$old_cytoscape_version,
|
||||
[Parameter(Mandatory=$true)]$new_cytoscape_version
|
||||
)
|
||||
$container_name = "cyrest"
|
||||
|
||||
docker build -t snoringsloth/cytoscape:$new_cytoscape_version -t snoringsloth/cytoscape:latest .
|
||||
docker push snoringsloth/cytoscape:latest
|
||||
docker push snoringsloth/cytoscape:$new_cytoscape_version
|
||||
docker rm $container_name
|
||||
docker image rm snoringsloth/cytoscape:$old_cytoscape_version
|
||||
docker create -p 1234:1234 --name $container_name snoringsloth/cytoscape:latest
|
||||
28
pdm.lock
generated
28
pdm.lock
generated
@ -5,7 +5,7 @@
|
||||
groups = ["default", "cytoscape", "dash", "dev", "notebooks", "plot", "spacy-lg", "spacy-md", "spacy-sm", "trials"]
|
||||
strategy = ["inherit_metadata"]
|
||||
lock_version = "4.5.0"
|
||||
content_hash = "sha256:3fda7527ac9298ec38cef4c36dc495defec823f631affe62daf9aedd3611000a"
|
||||
content_hash = "sha256:bd78b1a41d8bc73b5112c999a6fc6cfcb512e7b9ef6f6bf437a9363df9296961"
|
||||
|
||||
[[metadata.targets]]
|
||||
requires_python = ">=3.11"
|
||||
@ -1015,6 +1015,17 @@ files = [
|
||||
{file = "evaluate-0.4.3.tar.gz", hash = "sha256:3a5700cf83aabee9549264e1e5666f116367c61dbd4d38352015e859a5e2098d"},
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "execnet"
|
||||
version = "2.1.1"
|
||||
requires_python = ">=3.8"
|
||||
summary = "execnet: rapid multi-Python deployment"
|
||||
groups = ["dev"]
|
||||
files = [
|
||||
{file = "execnet-2.1.1-py3-none-any.whl", hash = "sha256:26dee51f1b80cebd6d0ca8e74dd8745419761d3bef34163928cbebbdc4749fdc"},
|
||||
{file = "execnet-2.1.1.tar.gz", hash = "sha256:5189b52c6121c24feae288166ab41b32549c7e2348652736540b9e6e7d4e72e3"},
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "executing"
|
||||
version = "2.1.0"
|
||||
@ -3124,6 +3135,21 @@ files = [
|
||||
{file = "pytest_cov-6.0.0-py3-none-any.whl", hash = "sha256:eee6f1b9e61008bd34975a4d5bab25801eb31898b032dd55addc93e96fcaaa35"},
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "pytest-xdist"
|
||||
version = "3.6.1"
|
||||
requires_python = ">=3.8"
|
||||
summary = "pytest xdist plugin for distributed testing, most importantly across multiple CPUs"
|
||||
groups = ["dev"]
|
||||
dependencies = [
|
||||
"execnet>=2.1",
|
||||
"pytest>=7.0.0",
|
||||
]
|
||||
files = [
|
||||
{file = "pytest_xdist-3.6.1-py3-none-any.whl", hash = "sha256:9ed4adfb68a016610848639bb7e02c9352d5d9f03d04809919e2dafc3be4cca7"},
|
||||
{file = "pytest_xdist-3.6.1.tar.gz", hash = "sha256:ead156a4db231eec769737f57668ef58a2084a34b2e55c4a8fa20d861107300d"},
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "python-dateutil"
|
||||
version = "2.9.0.post0"
|
||||
|
||||
@ -1,7 +1,7 @@
|
||||
[project]
|
||||
name = "lang-main"
|
||||
version = "0.1.0dev1"
|
||||
description = "Several tools to analyse maintenance data with strong focus on language processing"
|
||||
version = "0.1.0a1"
|
||||
description = "Several tools to analyse TOM's data with strong focus on language processing"
|
||||
authors = [
|
||||
{name = "d-opt GmbH, resp. Florian Förster", email = "f.foerster@d-opt.com"},
|
||||
]
|
||||
@ -54,6 +54,9 @@ build-backend = "pdm.backend"
|
||||
[tool.pdm]
|
||||
distribution = true
|
||||
|
||||
[tool.pdm.build]
|
||||
package-dir = "src"
|
||||
|
||||
[tool.pdm.dev-dependencies]
|
||||
notebooks = [
|
||||
"jupyterlab>=4.2.0",
|
||||
@ -66,13 +69,6 @@ notebooks = [
|
||||
# --config-setting="--global-option=build_ext" --config-setting="--global-option=-IC:\Program Files\Graphviz\include" --config-setting="--global-option=-LC:\Program Files\Graphviz\lib"
|
||||
trials = [
|
||||
]
|
||||
dev = [
|
||||
"cython>=3.0.10",
|
||||
"openpyxl>=3.1.5",
|
||||
"seaborn>=0.13.2",
|
||||
"pytest>=8.3.3",
|
||||
"pytest-cov>=6.0.0",
|
||||
]
|
||||
|
||||
[tool.ruff]
|
||||
line-length = 94
|
||||
@ -103,6 +99,7 @@ filterwarnings = [
|
||||
]
|
||||
markers = [
|
||||
"mload: marks tests with loading of language models (deselect with '-m \"not mload\"')",
|
||||
"cyto: marks tests which interact with Cytoscape (deselect with '-m \"not cyto\"')",
|
||||
]
|
||||
log_cli = true
|
||||
|
||||
@ -120,7 +117,22 @@ exclude_also = [
|
||||
"@overload",
|
||||
"if logging",
|
||||
"if TYPE_CHECKING",
|
||||
"@pytest.fixture",
|
||||
"if __name__ == __main__:",
|
||||
]
|
||||
|
||||
[tool.coverage.html]
|
||||
directory = "reports/coverage"
|
||||
directory = "reports/coverage"
|
||||
[dependency-groups]
|
||||
dev = [
|
||||
"cython>=3.0.10",
|
||||
"openpyxl>=3.1.5",
|
||||
"seaborn>=0.13.2",
|
||||
"pytest>=8.3.3",
|
||||
"pytest-cov>=6.0.0",
|
||||
"pytest-xdist>=3.6.1",
|
||||
]
|
||||
notebooks = [
|
||||
"jupyterlab>=4.2.0",
|
||||
"ipywidgets>=8.1.2",
|
||||
]
|
||||
8
run_tests.ps1
Normal file
8
run_tests.ps1
Normal file
@ -0,0 +1,8 @@
|
||||
pdm run pytest --cov -n 4
|
||||
# run docker desktop
|
||||
. "C:\Program Files\Docker\Docker\Docker Desktop.exe"
|
||||
docker start cyrest
|
||||
# run Cytoscape tests in single process
|
||||
pdm run coverage run -a -m pytest -m "cyto"
|
||||
docker stop cyrest
|
||||
pdm run coverage html
|
||||
@ -1,177 +1,32 @@
|
||||
import logging
|
||||
import os
|
||||
from pathlib import Path
|
||||
from typing import Any, Final
|
||||
|
||||
from lang_main.config import load_toml_config
|
||||
|
||||
_has_py4cyto: bool = True
|
||||
try:
|
||||
import py4cytoscape as p4c
|
||||
except ImportError:
|
||||
_has_py4cyto = False
|
||||
|
||||
# ** external packages config
|
||||
# ** Huggingface Hub caching
|
||||
os.environ['HF_HUB_DISABLE_SYMLINKS_WARNING'] = 'set'
|
||||
|
||||
# ** py4cytoscape config
|
||||
if _has_py4cyto:
|
||||
p4c.set_summary_logger(False)
|
||||
p4c.py4cytoscape_logger.detail_logger.setLevel('ERROR')
|
||||
p4c.py4cytoscape_logger.detail_logger.removeHandler(
|
||||
p4c.py4cytoscape_logger.detail_handler
|
||||
)
|
||||
p4c.py4cytoscape_logger.detail_logger.addHandler(logging.NullHandler())
|
||||
from lang_main.config import (
|
||||
BASE_FOLDERNAME,
|
||||
CONFIG_FILENAME,
|
||||
CYTO_STYLESHEET_FILENAME,
|
||||
PKG_DIR,
|
||||
PREFER_INTERNAL_CONFIG,
|
||||
STOP_FOLDER,
|
||||
get_config_paths,
|
||||
load_cfg,
|
||||
)
|
||||
from lang_main.search import search_base_path
|
||||
|
||||
# ** lang-main config
|
||||
BASE_FOLDERNAME: Final[str] = 'lang-main'
|
||||
CONFIG_FILENAME: Final[str] = 'lang_main_config.toml'
|
||||
CYTO_STYLESHEET_FILENAME: Final[str] = r'cytoscape_config/lang_main.xml'
|
||||
PREFER_INTERNAL_CONFIG: Final[bool] = False
|
||||
pkg_dir = Path(__file__).parent
|
||||
cfg_path_internal = (pkg_dir / CONFIG_FILENAME).resolve()
|
||||
cyto_stylesheet_path = (pkg_dir / CYTO_STYLESHEET_FILENAME).resolve()
|
||||
cfg_path_internal, cyto_stylesheet_path = get_config_paths(
|
||||
PKG_DIR, CONFIG_FILENAME, CYTO_STYLESHEET_FILENAME
|
||||
)
|
||||
|
||||
|
||||
# ** load config data: internal/external
|
||||
# look for external config first, if not found use internal one
|
||||
def search_cwd(
|
||||
glob_pattern: str = CONFIG_FILENAME,
|
||||
) -> Path | None:
|
||||
"""Searches the current working directory and looks for files
|
||||
matching the glob pattern.
|
||||
Returns the first match encountered.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
glob_pattern : str, optional
|
||||
pattern to look for, first match will be returned,
|
||||
by default CONFIG_FILENAME
|
||||
|
||||
Returns
|
||||
-------
|
||||
Path | None
|
||||
Path if corresponding object was found, None otherwise
|
||||
"""
|
||||
cfg_path: Path | None = None
|
||||
res = tuple(Path.cwd().glob(glob_pattern))
|
||||
if res:
|
||||
cfg_path = res[0]
|
||||
|
||||
return cfg_path
|
||||
|
||||
|
||||
def search_iterative(
|
||||
starting_path: Path,
|
||||
glob_pattern: str = CONFIG_FILENAME,
|
||||
stop_folder_name: str | None = None,
|
||||
) -> Path | None:
|
||||
"""Iteratively searches the parent directories of the starting path
|
||||
and look for files matching the glob pattern. The starting path is not
|
||||
searched, only its parents. Therefore the starting path can also point
|
||||
to a file. The folder in which it is placed in will be searched.
|
||||
Returns the first match encountered.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
starting_path : Path
|
||||
non-inclusive starting path
|
||||
glob_pattern : str, optional
|
||||
pattern to look for, first match will be returned,
|
||||
by default CONFIG_FILENAME
|
||||
stop_folder_name : str, optional
|
||||
name of the last folder in the directory tree to search, by default None
|
||||
|
||||
Returns
|
||||
-------
|
||||
Path | None
|
||||
Path if corresponding object was found, None otherwise
|
||||
"""
|
||||
file_path: Path | None = None
|
||||
stop_folder_reached: bool = False
|
||||
for it in range(len(starting_path.parents)):
|
||||
search_path = starting_path.parents[it] # do not look in library folder
|
||||
res = tuple(search_path.glob(glob_pattern))
|
||||
if res:
|
||||
file_path = res[0]
|
||||
break
|
||||
elif stop_folder_reached:
|
||||
break
|
||||
|
||||
if stop_folder_name is not None and search_path.name == stop_folder_name:
|
||||
# library is placed inside a whole python installation for deployment
|
||||
# if this folder is reached, only look up one parent above
|
||||
stop_folder_reached = True
|
||||
|
||||
return file_path
|
||||
|
||||
|
||||
def search_base_path(
|
||||
starting_path: Path,
|
||||
stop_folder_name: str | None = None,
|
||||
) -> Path | None:
|
||||
"""Iteratively searches the parent directories of the starting path
|
||||
and look for folders matching the given name. If a match is encountered,
|
||||
the parent path will be returned.
|
||||
|
||||
Example:
|
||||
starting_path = path/to/start/folder
|
||||
stop_folder_name = 'to'
|
||||
returned path = 'path/'
|
||||
|
||||
Parameters
|
||||
----------
|
||||
starting_path : Path
|
||||
non-inclusive starting path
|
||||
stop_folder_name : str, optional
|
||||
name of the last folder in the directory tree to search, by default None
|
||||
|
||||
Returns
|
||||
-------
|
||||
Path | None
|
||||
Path if corresponding base path was found, None otherwise
|
||||
"""
|
||||
stop_folder_path: Path | None = None
|
||||
base_path: Path | None = None
|
||||
for it in range(len(starting_path.parents)):
|
||||
search_path = starting_path.parents[it] # do not look in library folder
|
||||
if stop_folder_name is not None and search_path.name == stop_folder_name:
|
||||
# library is placed inside a whole python installation for deployment
|
||||
# only look up to this folder
|
||||
stop_folder_path = search_path
|
||||
break
|
||||
|
||||
if stop_folder_path is not None:
|
||||
base_path = stop_folder_path.parent
|
||||
|
||||
return base_path
|
||||
|
||||
|
||||
def load_cfg() -> dict[str, Any]:
|
||||
cfg_path: Path | None
|
||||
if PREFER_INTERNAL_CONFIG:
|
||||
cfg_path = cfg_path_internal
|
||||
else:
|
||||
cfg_path = search_cwd(glob_pattern=CONFIG_FILENAME)
|
||||
|
||||
if cfg_path is None:
|
||||
cfg_path = search_iterative(
|
||||
starting_path=pkg_dir,
|
||||
glob_pattern=CONFIG_FILENAME,
|
||||
stop_folder_name='python',
|
||||
)
|
||||
# backup: use internal config
|
||||
if cfg_path is None:
|
||||
cfg_path = cfg_path_internal
|
||||
|
||||
config = load_toml_config(path_to_toml=cfg_path)
|
||||
|
||||
return config.copy()
|
||||
|
||||
|
||||
CONFIG: Final[dict[str, Any]] = load_cfg()
|
||||
base_parent_path = search_base_path(pkg_dir, stop_folder_name=BASE_FOLDERNAME)
|
||||
CONFIG: Final[dict[str, Any]] = load_cfg(
|
||||
starting_path=PKG_DIR,
|
||||
glob_pattern=CONFIG_FILENAME,
|
||||
stop_folder_name=STOP_FOLDER,
|
||||
cfg_path_internal=cfg_path_internal,
|
||||
prefer_internal_config=PREFER_INTERNAL_CONFIG,
|
||||
)
|
||||
base_parent_path = search_base_path(PKG_DIR, stop_folder_name=BASE_FOLDERNAME)
|
||||
if base_parent_path is None:
|
||||
raise FileNotFoundError('Could not resolve base path of library')
|
||||
BASE_PATH: Final[Path] = base_parent_path
|
||||
@ -185,11 +40,3 @@ if not cyto_stylesheet_path.exists():
|
||||
)
|
||||
|
||||
CYTO_PATH_STYLESHEET: Final[Path] = cyto_stylesheet_path
|
||||
|
||||
|
||||
# TODO check removal
|
||||
# append Graphviz binary folder to system path if not already contained
|
||||
# if sys.platform == 'win32':
|
||||
# path = Path(r'C:\Program Files\Graphviz\bin')
|
||||
# if path.is_dir() and str(path).lower() not in os.environ['PATH'].lower():
|
||||
# os.environ['PATH'] += f';{path}'
|
||||
|
||||
@ -3,7 +3,7 @@ from typing import cast
|
||||
|
||||
from pandas import DataFrame, Series
|
||||
from sentence_transformers import SentenceTransformer
|
||||
from tqdm.auto import tqdm # TODO: check deletion
|
||||
from tqdm.auto import tqdm
|
||||
|
||||
from lang_main.analysis.shared import (
|
||||
candidates_by_index,
|
||||
|
||||
@ -241,35 +241,36 @@ def build_token_graph(
|
||||
return graph, docs_mapping
|
||||
|
||||
|
||||
def build_token_graph_simple(
|
||||
data: DataFrame,
|
||||
model: SpacyModel,
|
||||
) -> tuple[TokenGraph, dict[PandasIndex, SpacyDoc]]:
|
||||
graph = TokenGraph()
|
||||
model_input = cast(tuple[str], tuple(data['entry'].to_list()))
|
||||
weights = cast(tuple[int], tuple(data['num_occur'].to_list()))
|
||||
indices = cast(tuple[list[PandasIndex]], tuple(data['batched_idxs'].to_list()))
|
||||
index: int = 0
|
||||
docs_mapping: dict[PandasIndex, SpacyDoc] = {}
|
||||
# TODO check removal
|
||||
# def build_token_graph_simple(
|
||||
# data: DataFrame,
|
||||
# model: SpacyModel,
|
||||
# ) -> tuple[TokenGraph, dict[PandasIndex, SpacyDoc]]:
|
||||
# graph = TokenGraph()
|
||||
# model_input = cast(tuple[str], tuple(data['entry'].to_list()))
|
||||
# weights = cast(tuple[int], tuple(data['num_occur'].to_list()))
|
||||
# indices = cast(tuple[list[PandasIndex]], tuple(data['batched_idxs'].to_list()))
|
||||
# index: int = 0
|
||||
# docs_mapping: dict[PandasIndex, SpacyDoc] = {}
|
||||
|
||||
for doc in tqdm(model.pipe(model_input, batch_size=50), total=len(model_input)):
|
||||
add_doc_info_to_graph(
|
||||
graph=graph,
|
||||
doc=doc,
|
||||
weight=weights[index],
|
||||
)
|
||||
corresponding_indices = indices[index]
|
||||
for idx in corresponding_indices:
|
||||
docs_mapping[idx] = doc
|
||||
# for doc in tqdm(model.pipe(model_input, batch_size=50), total=len(model_input)):
|
||||
# add_doc_info_to_graph(
|
||||
# graph=graph,
|
||||
# doc=doc,
|
||||
# weight=weights[index],
|
||||
# )
|
||||
# corresponding_indices = indices[index]
|
||||
# for idx in corresponding_indices:
|
||||
# docs_mapping[idx] = doc
|
||||
|
||||
index += 1
|
||||
# index += 1
|
||||
|
||||
# metadata
|
||||
graph.update_metadata()
|
||||
# convert to undirected
|
||||
graph.to_undirected(logging=False)
|
||||
# # metadata
|
||||
# graph.update_metadata()
|
||||
# # convert to undirected
|
||||
# graph.to_undirected(logging=False)
|
||||
|
||||
return graph, docs_mapping
|
||||
# return graph, docs_mapping
|
||||
|
||||
|
||||
# TODO check removal
|
||||
|
||||
@ -1,11 +1,40 @@
|
||||
from __future__ import annotations
|
||||
|
||||
import logging
|
||||
import os
|
||||
import sys
|
||||
import tomllib
|
||||
from typing import TYPE_CHECKING, Any
|
||||
from pathlib import Path
|
||||
from typing import Any, Final
|
||||
|
||||
if TYPE_CHECKING:
|
||||
from pathlib import Path
|
||||
from lang_main.search import search_cwd, search_iterative
|
||||
|
||||
_has_py4cyto: bool = True
|
||||
try:
|
||||
import py4cytoscape as p4c
|
||||
except ImportError:
|
||||
_has_py4cyto = False
|
||||
|
||||
# ** external packages config
|
||||
# ** Huggingface Hub caching
|
||||
os.environ['HF_HUB_DISABLE_SYMLINKS_WARNING'] = 'set'
|
||||
|
||||
# ** py4cytoscape config
|
||||
if _has_py4cyto:
|
||||
p4c.set_summary_logger(False)
|
||||
p4c.py4cytoscape_logger.detail_logger.setLevel('ERROR')
|
||||
p4c.py4cytoscape_logger.detail_logger.removeHandler(
|
||||
p4c.py4cytoscape_logger.detail_handler
|
||||
)
|
||||
p4c.py4cytoscape_logger.detail_logger.addHandler(logging.NullHandler())
|
||||
|
||||
# ** lang-main config
|
||||
BASE_FOLDERNAME: Final[str] = 'lang-main'
|
||||
CONFIG_FILENAME: Final[str] = 'lang_main_config.toml'
|
||||
CYTO_STYLESHEET_FILENAME: Final[str] = r'cytoscape_config/lang_main.xml'
|
||||
PREFER_INTERNAL_CONFIG: Final[bool] = False
|
||||
PKG_DIR: Final[Path] = Path(__file__).parent
|
||||
STOP_FOLDER: Final[str] = 'python'
|
||||
|
||||
|
||||
def load_toml_config(
|
||||
@ -14,4 +43,46 @@ def load_toml_config(
|
||||
with open(path_to_toml, 'rb') as f:
|
||||
data = tomllib.load(f)
|
||||
print('Loaded TOML config file successfully.', file=sys.stderr, flush=True)
|
||||
|
||||
return data
|
||||
|
||||
|
||||
# ** load config data: internal/external
|
||||
def get_config_paths(
|
||||
root_folder: Path,
|
||||
cfg_name: str,
|
||||
cyto_stylesheet_name: str,
|
||||
) -> tuple[Path, Path]:
|
||||
cfg_path_internal = (root_folder / cfg_name).resolve()
|
||||
cyto_stylesheet_path = (root_folder / cyto_stylesheet_name).resolve()
|
||||
|
||||
return cfg_path_internal, cyto_stylesheet_path
|
||||
|
||||
|
||||
def load_cfg(
|
||||
starting_path: Path,
|
||||
glob_pattern: str,
|
||||
stop_folder_name: str | None,
|
||||
cfg_path_internal: Path,
|
||||
prefer_internal_config: bool = False,
|
||||
) -> dict[str, Any]:
|
||||
cfg_path: Path | None
|
||||
# look for external config first, if not found use internal one
|
||||
if prefer_internal_config:
|
||||
cfg_path = cfg_path_internal
|
||||
else:
|
||||
cfg_path = search_cwd(glob_pattern)
|
||||
|
||||
if cfg_path is None:
|
||||
cfg_path = search_iterative(
|
||||
starting_path=starting_path,
|
||||
glob_pattern=glob_pattern,
|
||||
stop_folder_name=stop_folder_name,
|
||||
)
|
||||
# backup: use internal config
|
||||
if cfg_path is None:
|
||||
cfg_path = cfg_path_internal
|
||||
|
||||
config = load_toml_config(path_to_toml=cfg_path)
|
||||
|
||||
return config.copy()
|
||||
|
||||
@ -1,6 +1,6 @@
|
||||
# lang_main: Config file
|
||||
[info]
|
||||
pkg = 'lang_main'
|
||||
pkg = 'lang_main_internal'
|
||||
|
||||
[paths]
|
||||
inputs = './inputs/'
|
||||
|
||||
@ -60,7 +60,7 @@ def load_spacy(
|
||||
model_name: str,
|
||||
) -> SpacyModel:
|
||||
try:
|
||||
spacy_model_obj = importlib.import_module(SPACY_MODEL_NAME)
|
||||
spacy_model_obj = importlib.import_module(model_name)
|
||||
except ModuleNotFoundError:
|
||||
raise LanguageModelNotFoundError(
|
||||
(
|
||||
|
||||
@ -148,14 +148,10 @@ class Pipeline(BasePipeline):
|
||||
) -> None:
|
||||
# init base class
|
||||
super().__init__(name=name, working_dir=working_dir)
|
||||
|
||||
# name of pipeline
|
||||
self.name = name
|
||||
# working directory for pipeline == output path
|
||||
self.working_dir = working_dir
|
||||
# if not self.working_dir.exists():
|
||||
# self.working_dir.mkdir(parents=True)
|
||||
|
||||
# container for actions to perform during pass
|
||||
self.actions_kwargs: list[dict[str, Any]] = []
|
||||
self.save_results: ResultHandling = []
|
||||
@ -192,28 +188,6 @@ class Pipeline(BasePipeline):
|
||||
else:
|
||||
self.panic_wrong_action_type(action=action, compatible_type=Callable.__name__)
|
||||
|
||||
# TODO: add multiple entries by utilising simple add method
|
||||
"""
|
||||
def add_multi(
|
||||
self,
|
||||
action: FunctionType | Sequence[FunctionType],
|
||||
action_kwargs: dict[str, Any] | Sequence[dict[str, Any]],
|
||||
) -> None:
|
||||
|
||||
if isinstance(action, Sequence):
|
||||
if len(action_kwargs) != len(action):
|
||||
raise ValueError(("Sequences for actions and corresponding keyword "
|
||||
"arguments must have the same length."))
|
||||
self.actions.extend(action)
|
||||
self.actions_kwargs.extend(action_kwargs)
|
||||
elif isinstance(action, FunctionType):
|
||||
self.actions.append(action)
|
||||
self.actions_kwargs.append(action_kwargs)
|
||||
else:
|
||||
raise TypeError(("Action must be function or sequence of functions, "
|
||||
f"but is of type >>{type(action)}<<."))
|
||||
"""
|
||||
|
||||
def get_result_path(
|
||||
self,
|
||||
action_idx: int,
|
||||
@ -253,11 +227,7 @@ class Pipeline(BasePipeline):
|
||||
action_idx: int,
|
||||
filename: str | None,
|
||||
) -> None:
|
||||
# target_filename = f'Pipe-{self.name}_Step-{self.curr_proc_idx}_' + filename
|
||||
# target_path = self.working_dir.joinpath(target_filename)
|
||||
# target_path = target_path.with_suffix('.pkl')
|
||||
target_path, _ = self.get_result_path(action_idx, filename)
|
||||
# saving file locally
|
||||
save_pickle(obj=self._intermediate_result, path=target_path)
|
||||
|
||||
@override
|
||||
@ -270,6 +240,7 @@ class Pipeline(BasePipeline):
|
||||
if self.load_results[idx][0]:
|
||||
filename = self.load_results[idx][1]
|
||||
ret = self.load_step(action_idx=idx, filename=filename)
|
||||
self._intermediate_result = ret
|
||||
logger.info(
|
||||
'[No Calculation] Loaded result for action >>%s<< successfully',
|
||||
self.action_names[idx],
|
||||
@ -279,18 +250,12 @@ class Pipeline(BasePipeline):
|
||||
# calculation
|
||||
if idx == 0:
|
||||
args = starting_values
|
||||
# ret = action(*starting_values, **action_kwargs)
|
||||
else:
|
||||
args = ret
|
||||
|
||||
if args is not None:
|
||||
ret = action(*args, **action_kwargs)
|
||||
# elif args is not None:
|
||||
# ret = action(*args)
|
||||
# elif args is None and action_kwargs:
|
||||
# ret = action(**action_kwargs)
|
||||
else:
|
||||
# ret = action()
|
||||
ret = action(**action_kwargs)
|
||||
|
||||
if ret is not None and not isinstance(ret, tuple):
|
||||
|
||||
@ -60,7 +60,7 @@ SPACY_MODEL = m_load.instantiate_model(
|
||||
# ** pipeline configuration
|
||||
# ** target feature preparation
|
||||
def build_base_target_feature_pipe() -> Pipeline:
|
||||
pipe_target_feat = Pipeline(name='TargetFeature', working_dir=SAVE_PATH_FOLDER)
|
||||
pipe_target_feat = Pipeline(name='Target_Feature', working_dir=SAVE_PATH_FOLDER)
|
||||
pipe_target_feat.add(
|
||||
load_raw_data,
|
||||
{
|
||||
@ -185,7 +185,15 @@ def build_tk_graph_render_pipe(
|
||||
) -> Pipeline:
|
||||
# optional dependency: late import
|
||||
# raises exception if necessary modules are not found
|
||||
from lang_main.render import cytoscape as cyto
|
||||
try:
|
||||
from lang_main.render import cytoscape as cyto
|
||||
except ImportError:
|
||||
raise ImportError(
|
||||
(
|
||||
'Dependencies for Cytoscape interaction not found.'
|
||||
'Install package with optional dependencies.'
|
||||
)
|
||||
)
|
||||
|
||||
pipe_graph_rendering = Pipeline(
|
||||
name='Graph_Static-Rendering',
|
||||
|
||||
@ -60,7 +60,7 @@ def verify_connection() -> None:
|
||||
"""
|
||||
try:
|
||||
p4c.cytoscape_ping()
|
||||
except CyError as error:
|
||||
except CyError as error: # pragma: no cover
|
||||
logger.error('[CyError] CyREST or Cytoscape version not supported.')
|
||||
raise error
|
||||
except RequestException as error:
|
||||
@ -164,6 +164,7 @@ def verify_table_property(
|
||||
network_name: str = CYTO_BASE_NETWORK_NAME,
|
||||
) -> bool:
|
||||
table = p4c.get_table_columns(table=table_type, network=network_name)
|
||||
logger.debug('Table >>%s<< wiht columns: %s', table, table.columns)
|
||||
|
||||
return property in table.columns
|
||||
|
||||
@ -174,7 +175,7 @@ def analyse_network(
|
||||
) -> None:
|
||||
node_table = p4c.get_table_columns(table='node', network=network_name)
|
||||
net_analyse_possible: bool = True
|
||||
if len(node_table) < 4:
|
||||
if len(node_table) < 4: # pragma: no cover
|
||||
net_analyse_possible = False
|
||||
|
||||
if net_analyse_possible:
|
||||
@ -186,7 +187,7 @@ def analyse_network(
|
||||
* node_table['BetweennessCentrality']
|
||||
* node_table['stress_norm']
|
||||
)
|
||||
else:
|
||||
else: # pragma: no cover
|
||||
node_table[CYTO_SELECTION_PROPERTY] = 1
|
||||
|
||||
p4c.load_table_data(node_table, data_key_column='name', network=network_name)
|
||||
@ -231,7 +232,7 @@ def export_network_to_image(
|
||||
by default 'A4'
|
||||
"""
|
||||
logger.debug('Exporting image to file...')
|
||||
if not target_folder.exists():
|
||||
if not target_folder.exists(): # pragma: no cover
|
||||
target_folder.mkdir(parents=True)
|
||||
dst_file_pth = (target_folder / filename).with_suffix(f'.{filetype.lower()}')
|
||||
|
||||
@ -252,13 +253,6 @@ def export_network_to_image(
|
||||
export_text_as_font=text_as_font,
|
||||
page_size=pdf_export_page_size,
|
||||
)
|
||||
# TODO remove if Cytoscape >= 3.10.* is running in container
|
||||
# p4c.export_image(
|
||||
# filename=filename,
|
||||
# type=filetype,
|
||||
# network=network_name,
|
||||
# overwrite_file=True,
|
||||
# )
|
||||
logger.debug('Exported image to sandbox.')
|
||||
logger.debug('Transferring image from sandbox to target destination...')
|
||||
sandbox_filename = f'{filename}.{filetype.lower()}'
|
||||
@ -328,6 +322,7 @@ def apply_style_to_network(
|
||||
"""
|
||||
logger.debug('Applying style to network...')
|
||||
styles_avail = cast(list[str], p4c.get_visual_style_names())
|
||||
logger.debug('Available styles: %s', styles_avail)
|
||||
if style_name not in styles_avail:
|
||||
if not pth_to_stylesheet.exists():
|
||||
# existence for standard path verified at import, but not for other
|
||||
@ -348,12 +343,6 @@ def apply_style_to_network(
|
||||
|
||||
p4c.set_visual_style(style_name, network=network_name)
|
||||
# node size mapping, only if needed property is available
|
||||
# TODO check removal
|
||||
# size_prop_available = verify_table_property(
|
||||
# property=node_size_property,
|
||||
# network_name=network_name,
|
||||
# )
|
||||
# if size_prop_available:
|
||||
scheme = p4c.scheme_c_number_continuous(
|
||||
start_value=min_node_size, end_value=max_node_size
|
||||
)
|
||||
@ -365,13 +354,6 @@ def apply_style_to_network(
|
||||
default_number=min_node_size,
|
||||
)
|
||||
p4c.set_node_size_mapping(**node_size_map)
|
||||
# TODO removal
|
||||
# else:
|
||||
# node_table = p4c.get_table_columns(table='node', network=network_name)
|
||||
# nodes_SUID = node_table['SUID'].to_list()
|
||||
# p4c.set_node_size_bypass(nodes_SUID, new_sizes=min_node_size, network=network_name)
|
||||
# p4c.set_visual_style(style_name, network=network_name)
|
||||
# time.sleep(1) # if not waited image export could be without applied style
|
||||
fit_content(network_name=network_name)
|
||||
logger.debug('Style application to network successful.')
|
||||
|
||||
@ -402,7 +384,7 @@ def get_subgraph_node_selection(
|
||||
node_table = p4c.get_table_columns(table='node', network=network_name)
|
||||
node_table = node_table.sort_values(by=CYTO_SELECTION_PROPERTY, ascending=False)
|
||||
p4c.load_table_data(node_table, data_key_column='name', network=network_name)
|
||||
node_table_choice = node_table.iloc[:num_subgraphs, :]
|
||||
node_table_choice = node_table.iloc[:num_subgraphs]
|
||||
logger.debug('Selection of nodes for subgraph generation successful.')
|
||||
|
||||
return node_table_choice['SUID'].to_list()
|
||||
|
||||
@ -9,8 +9,8 @@ re_parenthesis_1 = re.compile(r'[(]+')
|
||||
re_parenthesis_2 = re.compile(r'[)]+')
|
||||
|
||||
|
||||
@cy_log
|
||||
def select_edges_connecting_selected_nodes(network=None, base_url=DEFAULT_BASE_URL): # noqa: F405
|
||||
@cy_log # pragma: no cover
|
||||
def select_edges_connecting_selected_nodes(network=None, base_url=DEFAULT_BASE_URL): # noqa: F405 # pragma: no cover
|
||||
"""Select edges in a Cytoscape Network connecting the selected nodes, including self loops connecting single nodes.
|
||||
|
||||
Any edges selected beforehand are deselected before any new edges are selected
|
||||
|
||||
110
src/lang_main/search.py
Normal file
110
src/lang_main/search.py
Normal file
@ -0,0 +1,110 @@
|
||||
from pathlib import Path
|
||||
|
||||
|
||||
def search_cwd(
|
||||
glob_pattern: str,
|
||||
) -> Path | None:
|
||||
"""Searches the current working directory and looks for files
|
||||
matching the glob pattern.
|
||||
Returns the first match encountered.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
glob_pattern : str, optional
|
||||
pattern to look for, first match will be returned
|
||||
|
||||
Returns
|
||||
-------
|
||||
Path | None
|
||||
Path if corresponding object was found, None otherwise
|
||||
"""
|
||||
path_found: Path | None = None
|
||||
res = tuple(Path.cwd().glob(glob_pattern))
|
||||
if res:
|
||||
path_found = res[0]
|
||||
|
||||
return path_found
|
||||
|
||||
|
||||
def search_iterative(
|
||||
starting_path: Path,
|
||||
glob_pattern: str,
|
||||
stop_folder_name: str | None = None,
|
||||
) -> Path | None:
|
||||
"""Iteratively searches the parent directories of the starting path
|
||||
and look for files matching the glob pattern. The starting path is not
|
||||
searched, only its parents. Therefore the starting path can also point
|
||||
to a file. The folder in which it is placed in will be searched.
|
||||
Returns the first match encountered.
|
||||
The parent of the stop folder will be searched if it exists.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
starting_path : Path
|
||||
non-inclusive starting path
|
||||
glob_pattern : str, optional
|
||||
pattern to look for, first match will be returned
|
||||
stop_folder_name : str, optional
|
||||
name of the last folder in the directory tree to search, by default None
|
||||
|
||||
Returns
|
||||
-------
|
||||
Path | None
|
||||
Path if corresponding object was found, None otherwise
|
||||
"""
|
||||
file_path: Path | None = None
|
||||
stop_folder_reached: bool = False
|
||||
for search_path in starting_path.parents:
|
||||
res = tuple(search_path.glob(glob_pattern))
|
||||
if res:
|
||||
file_path = res[0]
|
||||
break
|
||||
elif stop_folder_reached:
|
||||
break
|
||||
|
||||
if stop_folder_name is not None and search_path.name == stop_folder_name:
|
||||
# library is placed inside a whole python installation for deployment
|
||||
# if this folder is reached, only look up one parent above
|
||||
stop_folder_reached = True
|
||||
|
||||
return file_path
|
||||
|
||||
|
||||
def search_base_path(
|
||||
starting_path: Path,
|
||||
stop_folder_name: str | None = None,
|
||||
) -> Path | None:
|
||||
"""Iteratively searches the parent directories of the starting path
|
||||
and look for folders matching the given name. If a match is encountered,
|
||||
the parent path will be returned.
|
||||
|
||||
Example:
|
||||
starting_path = path/to/start/folder
|
||||
stop_folder_name = 'to'
|
||||
returned path = 'path/'
|
||||
|
||||
Parameters
|
||||
----------
|
||||
starting_path : Path
|
||||
non-inclusive starting path
|
||||
stop_folder_name : str, optional
|
||||
name of the last folder in the directory tree to search, by default None
|
||||
|
||||
Returns
|
||||
-------
|
||||
Path | None
|
||||
Path if corresponding base path was found, None otherwise
|
||||
"""
|
||||
stop_folder_path: Path | None = None
|
||||
base_path: Path | None = None
|
||||
for search_path in starting_path.parents:
|
||||
if stop_folder_name is not None and search_path.name == stop_folder_name:
|
||||
# library is placed inside a whole python installation for deployment
|
||||
# only look up to this folder
|
||||
stop_folder_path = search_path
|
||||
break
|
||||
|
||||
if stop_folder_path is not None:
|
||||
base_path = stop_folder_path.parent
|
||||
|
||||
return base_path
|
||||
@ -1,5 +1,3 @@
|
||||
from pathlib import Path
|
||||
|
||||
import pytest
|
||||
|
||||
from lang_main import model_loader
|
||||
@ -44,14 +42,14 @@ def test_obtain_relevant_descendants(spacy_model):
|
||||
doc = spacy_model(SENTENCE)
|
||||
sent1 = tuple(doc.sents)[0] # first sentence
|
||||
word1 = sent1[1] # word "ging" (POS:VERB)
|
||||
descendants1 = ('0912393', 'schnell', 'Wiese', 'Menschen')
|
||||
descendants1 = ('ID', '0912393', 'schnell', 'Wiese', 'Menschen')
|
||||
rel_descs = tokens.obtain_relevant_descendants(word1)
|
||||
rel_descs = tuple((token.text for token in rel_descs))
|
||||
assert descendants1 == rel_descs
|
||||
|
||||
sent2 = tuple(doc.sents)[1] # first sentence
|
||||
word2 = sent2[1] # word "konnte" (POS:AUX)
|
||||
descendants2 = ('mit', 'Probleme', 'Tragen', 'Tasche')
|
||||
descendants2 = ('Probleme', 'Tragen', 'Tasche')
|
||||
rel_descs = tokens.obtain_relevant_descendants(word2)
|
||||
rel_descs = tuple((token.text for token in rel_descs))
|
||||
assert descendants2 == rel_descs
|
||||
@ -62,7 +60,7 @@ def test_add_doc_info_to_graph(spacy_model):
|
||||
tk_graph = graphs.TokenGraph()
|
||||
tokens.add_doc_info_to_graph(tk_graph, doc, weight=2)
|
||||
assert len(tk_graph.nodes) == 11
|
||||
assert len(tk_graph.edges) == 17
|
||||
assert len(tk_graph.edges) == 16
|
||||
assert '0912393' in tk_graph.nodes
|
||||
|
||||
|
||||
|
||||
227
tests/pipelines/test_base.py
Normal file
227
tests/pipelines/test_base.py
Normal file
@ -0,0 +1,227 @@
|
||||
from pathlib import Path
|
||||
|
||||
import pytest
|
||||
|
||||
from lang_main import io
|
||||
from lang_main.errors import (
|
||||
NoPerformableActionError,
|
||||
OutputInPipelineContainerError,
|
||||
WrongActionTypeError,
|
||||
)
|
||||
from lang_main.pipelines import base
|
||||
|
||||
PIPELINE_NAME = 'test'
|
||||
|
||||
|
||||
@pytest.fixture(scope='module')
|
||||
def working_dir() -> Path:
|
||||
work_dir = Path.cwd() / 'tests/work_dir'
|
||||
if not work_dir.exists():
|
||||
work_dir.mkdir()
|
||||
return work_dir
|
||||
|
||||
|
||||
@pytest.fixture(scope='function')
|
||||
def pipeline_container(working_dir) -> base.PipelineContainer:
|
||||
return base.PipelineContainer(name=PIPELINE_NAME, working_dir=working_dir)
|
||||
|
||||
|
||||
@pytest.fixture(scope='function')
|
||||
def pipeline(working_dir) -> base.Pipeline:
|
||||
return base.Pipeline(name=PIPELINE_NAME, working_dir=working_dir)
|
||||
|
||||
|
||||
def test_empty_pipeline_container(pipeline_container, working_dir):
|
||||
container = pipeline_container
|
||||
assert container.name == PIPELINE_NAME
|
||||
assert container.working_dir == working_dir
|
||||
assert len(container.actions) == 0
|
||||
assert len(container.action_names) == 0
|
||||
assert len(container.action_skip) == 0
|
||||
assert container.curr_proc_idx == 1
|
||||
|
||||
with pytest.raises(NoPerformableActionError):
|
||||
container.prep_run()
|
||||
|
||||
assert container.post_run() is None
|
||||
|
||||
|
||||
@pytest.mark.parametrize('skip', [True, False])
|
||||
def test_pipeline_container_valid(pipeline_container, skip):
|
||||
test_string = 'test'
|
||||
|
||||
def valid_action(): # pragma: no cover
|
||||
nonlocal test_string
|
||||
test_string += '_2'
|
||||
|
||||
pipeline_container.add(valid_action, skip=skip)
|
||||
assert len(pipeline_container.actions) == 1
|
||||
assert len(pipeline_container.action_names) == 1
|
||||
assert len(pipeline_container.action_skip) == 1
|
||||
|
||||
ret = pipeline_container.run()
|
||||
assert pipeline_container.curr_proc_idx == 2
|
||||
assert ret is None
|
||||
if skip:
|
||||
assert test_string == 'test'
|
||||
else:
|
||||
assert test_string == 'test_2'
|
||||
|
||||
pipeline_container.prep_run()
|
||||
assert pipeline_container.curr_proc_idx == 1
|
||||
|
||||
|
||||
def test_pipeline_container_invalid_action(pipeline_container):
|
||||
test_string = 'test'
|
||||
|
||||
def invalid_action():
|
||||
nonlocal test_string
|
||||
test_string += '_2'
|
||||
new = 'ret'
|
||||
return new
|
||||
|
||||
with pytest.raises(WrongActionTypeError):
|
||||
pipeline_container.add(test_string, skip=False)
|
||||
|
||||
pipeline_container.add(invalid_action, skip=False)
|
||||
with pytest.raises(OutputInPipelineContainerError):
|
||||
pipeline_container.run()
|
||||
|
||||
|
||||
def test_empty_pipeline(pipeline, working_dir):
|
||||
pipe = pipeline
|
||||
assert pipe.name == PIPELINE_NAME
|
||||
assert pipe.working_dir == working_dir
|
||||
assert len(pipe.actions) == 0
|
||||
assert len(pipe.action_names) == 0
|
||||
assert len(pipe.actions_kwargs) == 0
|
||||
assert len(pipe.save_results) == 0
|
||||
assert len(pipe.load_results) == 0
|
||||
assert pipe.curr_proc_idx == 1
|
||||
assert pipe._intermediate_result is None
|
||||
|
||||
with pytest.raises(NoPerformableActionError):
|
||||
pipe.prep_run()
|
||||
|
||||
assert pipe.post_run() is None
|
||||
|
||||
|
||||
@pytest.mark.parametrize('alter_content', [True, False])
|
||||
def test_pipeline_valid(pipeline, alter_content):
|
||||
pipe = pipeline
|
||||
test_string = 'test'
|
||||
|
||||
# action preparation
|
||||
def valid_action(string, add_content=False):
|
||||
if add_content:
|
||||
string += '_2'
|
||||
return string
|
||||
|
||||
pipe.add(valid_action, {'add_content': alter_content})
|
||||
assert len(pipe.actions) == 1
|
||||
assert len(pipe.action_names) == 1
|
||||
assert len(pipe.actions_kwargs) == 1
|
||||
assert len(pipe.save_results) == 1
|
||||
assert len(pipe.load_results) == 1
|
||||
assert pipe.save_results[0] == (False, None)
|
||||
assert pipe.load_results[0] == (False, None)
|
||||
# filenames and saving/loading
|
||||
target_filename = f'Pipe-{pipe.name}_Step-{pipe.curr_proc_idx}_valid_action'
|
||||
target_pth = (pipe.working_dir / target_filename).with_suffix('.pkl')
|
||||
ret_pth, action_name = pipe.get_result_path(0, filename=None)
|
||||
assert ret_pth == target_pth
|
||||
assert action_name == 'valid_action'
|
||||
filename = 'test'
|
||||
ret_pth, action_name = pipe.get_result_path(0, filename=filename)
|
||||
target_pth = (pipe.working_dir / filename).with_suffix('.pkl')
|
||||
assert ret_pth == target_pth
|
||||
assert action_name == 'valid_action'
|
||||
# load non-existing files
|
||||
with pytest.raises(FileNotFoundError):
|
||||
pipe.load_step(0, 'non_existing')
|
||||
|
||||
# running
|
||||
ret = pipe.run(starting_values=(test_string,))
|
||||
assert isinstance(ret, tuple)
|
||||
assert pipe._intermediate_result == ret
|
||||
assert pipe.curr_proc_idx == 2
|
||||
assert ret is not None
|
||||
if alter_content:
|
||||
assert ret[0] == 'test_2'
|
||||
else:
|
||||
assert ret[0] == 'test'
|
||||
|
||||
pipe.prep_run()
|
||||
assert pipe.curr_proc_idx == 1
|
||||
|
||||
# load existing files
|
||||
loaded_res = pipe.load_step(0, None)
|
||||
assert loaded_res is not None
|
||||
assert isinstance(loaded_res, tuple)
|
||||
assert loaded_res[0] == ret[0]
|
||||
|
||||
|
||||
def test_pipeline_valid_action_load(pipeline, working_dir):
|
||||
pipe = pipeline
|
||||
test_string = 'test'
|
||||
|
||||
# action preparation
|
||||
def valid_action(string, add_content=False):
|
||||
if add_content:
|
||||
string += '_2'
|
||||
return string
|
||||
|
||||
pipe.add(valid_action, {'add_content': False}, load_result=True)
|
||||
assert len(pipe.actions) == 1
|
||||
assert len(pipe.action_names) == 1
|
||||
assert len(pipe.actions_kwargs) == 1
|
||||
assert len(pipe.save_results) == 1
|
||||
assert len(pipe.load_results) == 1
|
||||
assert pipe.save_results[0] == (False, None)
|
||||
assert pipe.load_results[0] == (True, None)
|
||||
|
||||
ret = pipe.run(starting_values=(test_string,))
|
||||
assert isinstance(ret, tuple)
|
||||
assert pipe._intermediate_result == ret
|
||||
assert pipe.curr_proc_idx == 2
|
||||
assert ret is not None
|
||||
|
||||
# load non-tuple result
|
||||
filename = 'non_tuple.pkl'
|
||||
save_pth = working_dir / filename
|
||||
io.save_pickle(test_string, save_pth)
|
||||
with pytest.raises(TypeError):
|
||||
pipe.load_step(0, filename)
|
||||
|
||||
|
||||
def test_pipeline_multiple_actions(pipeline):
|
||||
pipe = pipeline
|
||||
test_string = 'test'
|
||||
|
||||
# action preparation
|
||||
def valid_action(string, add_content=True):
|
||||
if add_content:
|
||||
string += '_2'
|
||||
return string
|
||||
|
||||
def valid_action_2(string, add_content=True):
|
||||
if add_content:
|
||||
string += '_3'
|
||||
return string
|
||||
|
||||
pipe.add(valid_action, {'add_content': True})
|
||||
pipe.add(valid_action_2)
|
||||
assert len(pipe.actions) == 2
|
||||
assert len(pipe.action_names) == 2
|
||||
assert len(pipe.actions_kwargs) == 2
|
||||
assert len(pipe.save_results) == 2
|
||||
assert len(pipe.load_results) == 2
|
||||
assert pipe.save_results[1] == (False, None)
|
||||
assert pipe.load_results[1] == (False, None)
|
||||
|
||||
ret = pipe.run(starting_values=(test_string,))
|
||||
assert isinstance(ret, tuple)
|
||||
assert pipe._intermediate_result == ret
|
||||
assert pipe.curr_proc_idx == 3
|
||||
assert ret is not None
|
||||
assert ret[0] == 'test_2_3'
|
||||
52
tests/pipelines/test_predefined.py
Normal file
52
tests/pipelines/test_predefined.py
Normal file
@ -0,0 +1,52 @@
|
||||
import pytest
|
||||
|
||||
from lang_main.pipelines import predefined as pre
|
||||
from lang_main.types import EntryPoints
|
||||
|
||||
|
||||
def test_build_base_target_feature_pipe():
|
||||
pipe = pre.build_base_target_feature_pipe()
|
||||
assert pipe.name == 'Target_Feature'
|
||||
assert len(pipe.actions) == 5
|
||||
|
||||
|
||||
def test_build_merge_duplicates_pipe():
|
||||
pipe = pre.build_merge_duplicates_pipe()
|
||||
assert pipe.name == 'Merge_Duplicates'
|
||||
assert len(pipe.actions) == 2
|
||||
|
||||
|
||||
def test_build_tk_graph_pipe():
|
||||
pipe = pre.build_tk_graph_pipe()
|
||||
assert pipe.name == 'Token_Analysis'
|
||||
assert len(pipe.actions) == 1
|
||||
|
||||
|
||||
def test_build_tk_graph_post_pipe():
|
||||
pipe = pre.build_tk_graph_post_pipe()
|
||||
assert pipe.name == 'Graph_Postprocessing'
|
||||
assert len(pipe.actions) == 3
|
||||
|
||||
|
||||
def test_build_tk_graph_rescaling_pipe():
|
||||
pipe = pre.build_tk_graph_rescaling_pipe(
|
||||
save_result=False, exit_point=EntryPoints.TK_GRAPH_ANALYSIS_RESCALED
|
||||
)
|
||||
assert pipe.name == 'Graph_Rescaling'
|
||||
assert len(pipe.actions) == 2
|
||||
|
||||
|
||||
@pytest.mark.parametrize('with_subgraphs', [True, False])
|
||||
def test_build_tk_graph_render_pipe(with_subgraphs):
|
||||
pipe = pre.build_tk_graph_render_pipe(with_subgraphs=with_subgraphs)
|
||||
assert pipe.name == 'Graph_Static-Rendering'
|
||||
if with_subgraphs:
|
||||
assert len(pipe.actions) == 6
|
||||
else:
|
||||
assert len(pipe.actions) == 4
|
||||
|
||||
|
||||
def test_build_timeline_pipe():
|
||||
pipe = pre.build_timeline_pipe()
|
||||
assert pipe.name == 'Timeline_Analysis'
|
||||
assert len(pipe.actions) == 6
|
||||
0
tests/render/__init__.py
Normal file
0
tests/render/__init__.py
Normal file
227
tests/render/test_cytoscape.py
Normal file
227
tests/render/test_cytoscape.py
Normal file
@ -0,0 +1,227 @@
|
||||
"""tests for Cytoscape API requests, needs running Cytoscape server;
|
||||
Tests assume that no Cytoscape instance is running.
|
||||
The validation of the correct behaviour can only be done with a running instance,
|
||||
especially for layout and formatting tasks. A static test suite is not helpful in
|
||||
this case.
|
||||
"""
|
||||
|
||||
import py4cytoscape as p4c
|
||||
import pytest
|
||||
from py4cytoscape.exceptions import CyError
|
||||
from requests.exceptions import RequestException
|
||||
|
||||
from lang_main.constants import CYTO_BASE_NETWORK_NAME, CYTO_SELECTION_PROPERTY
|
||||
from lang_main.errors import GraphRenderError
|
||||
from lang_main.render import cytoscape as cyto
|
||||
|
||||
_cyto_available: bool = True
|
||||
try:
|
||||
p4c.cytoscape_ping()
|
||||
except RequestException:
|
||||
_cyto_available = False
|
||||
|
||||
|
||||
@pytest.fixture(scope='module')
|
||||
def avail() -> bool:
|
||||
return _cyto_available
|
||||
|
||||
|
||||
@pytest.mark.cyto
|
||||
def test_verify_connection(avail):
|
||||
if avail:
|
||||
cyto.verify_connection()
|
||||
else:
|
||||
with pytest.raises(RequestException):
|
||||
cyto.verify_connection()
|
||||
|
||||
|
||||
def test_verify_graph_render_size(data_tk_graph_built):
|
||||
cyto.verify_graph_render_size(
|
||||
data_tk_graph_built, max_node_count=None, max_edge_count=None
|
||||
)
|
||||
|
||||
with pytest.raises(GraphRenderError):
|
||||
cyto.verify_graph_render_size(
|
||||
data_tk_graph_built, max_node_count=0, max_edge_count=None
|
||||
)
|
||||
|
||||
with pytest.raises(GraphRenderError):
|
||||
cyto.verify_graph_render_size(
|
||||
data_tk_graph_built, max_node_count=None, max_edge_count=0
|
||||
)
|
||||
|
||||
|
||||
@pytest.mark.cyto
|
||||
def test_change_default_layout(avail):
|
||||
if avail:
|
||||
cyto.change_default_layout()
|
||||
else:
|
||||
with pytest.raises(RequestException):
|
||||
cyto.change_default_layout()
|
||||
|
||||
|
||||
@pytest.mark.cyto
|
||||
def test_import_to_cytoscape(avail, data_tk_graph_built):
|
||||
if avail:
|
||||
cyto.import_to_cytoscape(data_tk_graph_built)
|
||||
else:
|
||||
with pytest.raises(RequestException):
|
||||
cyto.import_to_cytoscape(data_tk_graph_built)
|
||||
|
||||
|
||||
@pytest.mark.cyto
|
||||
def test_verify_table_property(avail, data_tk_graph_built):
|
||||
if avail:
|
||||
cyto.import_to_cytoscape(data_tk_graph_built)
|
||||
contained = cyto.verify_table_property(property='TEST', table_type='node')
|
||||
assert not contained
|
||||
contained = cyto.verify_table_property(property='name', table_type='node')
|
||||
assert contained
|
||||
contained = cyto.verify_table_property(property='degree_weighted', table_type='node')
|
||||
assert contained
|
||||
else:
|
||||
with pytest.raises(RequestException):
|
||||
contained = cyto.verify_table_property(property='TEST', table_type='node')
|
||||
|
||||
|
||||
@pytest.mark.cyto
|
||||
def test_analyse_network(avail, data_tk_graph_built):
|
||||
if avail:
|
||||
cyto.import_to_cytoscape(data_tk_graph_built)
|
||||
cyto.analyse_network()
|
||||
contained = cyto.verify_table_property(property='name', table_type='node')
|
||||
assert contained
|
||||
contained = cyto.verify_table_property(
|
||||
property=CYTO_SELECTION_PROPERTY, table_type='node'
|
||||
)
|
||||
assert contained
|
||||
else:
|
||||
with pytest.raises(RequestException):
|
||||
cyto.analyse_network(data_tk_graph_built)
|
||||
|
||||
|
||||
@pytest.mark.cyto
|
||||
def test_reset_current_network_to_base(avail):
|
||||
if avail:
|
||||
cyto.reset_current_network_to_base()
|
||||
else:
|
||||
with pytest.raises(RequestException):
|
||||
cyto.reset_current_network_to_base()
|
||||
|
||||
|
||||
@pytest.mark.cyto
|
||||
def test_fit_content(avail, data_tk_graph_built):
|
||||
if avail:
|
||||
cyto.import_to_cytoscape(data_tk_graph_built)
|
||||
cyto.fit_content()
|
||||
else:
|
||||
with pytest.raises(RequestException):
|
||||
cyto.fit_content()
|
||||
|
||||
|
||||
@pytest.mark.cyto
|
||||
def test_export_network_to_image(avail, tmp_path, data_tk_graph_built):
|
||||
filename = 'test_export'
|
||||
if avail:
|
||||
cyto.import_to_cytoscape(data_tk_graph_built)
|
||||
cyto.export_network_to_image(filename=filename, target_folder=tmp_path)
|
||||
target_file = tmp_path / f'{filename}.svg'
|
||||
assert target_file.exists()
|
||||
assert target_file.name == f'{filename}.svg'
|
||||
else:
|
||||
with pytest.raises(RequestException):
|
||||
cyto.export_network_to_image(filename=filename, target_folder=tmp_path)
|
||||
|
||||
|
||||
@pytest.mark.cyto
|
||||
def test_layout_network(avail, data_tk_graph_built):
|
||||
if avail:
|
||||
cyto.import_to_cytoscape(data_tk_graph_built)
|
||||
cyto.layout_network()
|
||||
else:
|
||||
with pytest.raises(RequestException):
|
||||
cyto.layout_network()
|
||||
|
||||
|
||||
@pytest.mark.cyto
|
||||
def test_apply_style_to_network(avail, data_tk_graph_built, tmp_path):
|
||||
if avail:
|
||||
layout_not_existing = 'testing'
|
||||
pth_not_existing = tmp_path / 'test.xml'
|
||||
with pytest.raises(FileNotFoundError):
|
||||
cyto.apply_style_to_network(
|
||||
style_name=layout_not_existing,
|
||||
pth_to_stylesheet=pth_not_existing,
|
||||
)
|
||||
|
||||
cyto.import_to_cytoscape(data_tk_graph_built)
|
||||
# not existing: so transfer necessary,
|
||||
# but fails nevertheless because style is imported
|
||||
# using the name provided by this style configuration
|
||||
with pytest.raises(CyError):
|
||||
cyto.apply_style_to_network(style_name=layout_not_existing)
|
||||
cyto.apply_style_to_network()
|
||||
else:
|
||||
with pytest.raises(RequestException):
|
||||
cyto.apply_style_to_network()
|
||||
|
||||
|
||||
@pytest.mark.cyto
|
||||
def test_get_subgraph_node_selection(avail, data_tk_graph_built):
|
||||
num_subgraphs = 2
|
||||
if avail:
|
||||
cyto.import_to_cytoscape(data_tk_graph_built)
|
||||
cyto.analyse_network()
|
||||
suids = cyto.get_subgraph_node_selection(num_subgraphs=num_subgraphs)
|
||||
assert len(suids) > 0
|
||||
else:
|
||||
with pytest.raises(RequestException):
|
||||
cyto.get_subgraph_node_selection(num_subgraphs=num_subgraphs)
|
||||
|
||||
|
||||
@pytest.mark.cyto
|
||||
def test_select_neighbours_of_node(avail, data_tk_graph_built):
|
||||
num_subgraphs = 2
|
||||
if avail:
|
||||
cyto.import_to_cytoscape(data_tk_graph_built)
|
||||
suids = cyto.get_subgraph_node_selection(num_subgraphs=num_subgraphs)
|
||||
assert len(suids) > 0
|
||||
cyto.select_neighbours_of_node(suids[0], neighbour_iter_depth=2)
|
||||
else:
|
||||
with pytest.raises(RequestException):
|
||||
cyto.select_neighbours_of_node(123, neighbour_iter_depth=2)
|
||||
|
||||
|
||||
@pytest.mark.cyto
|
||||
def test_make_subnetwork(avail, data_tk_graph_built, tmp_path):
|
||||
if avail:
|
||||
cyto.import_to_cytoscape(data_tk_graph_built)
|
||||
suids = cyto.get_subgraph_node_selection(num_subgraphs=2)
|
||||
assert len(suids) > 0
|
||||
cyto.select_neighbours_of_node(suids[0], neighbour_iter_depth=2)
|
||||
cyto.make_subnetwork(0, target_folder=tmp_path, export_image=True)
|
||||
subnetwork_name = CYTO_BASE_NETWORK_NAME + '_sub_1'
|
||||
networks = p4c.get_network_list()
|
||||
assert len(networks) > 1
|
||||
file = (tmp_path / subnetwork_name).with_suffix('.svg')
|
||||
assert file.exists()
|
||||
else:
|
||||
with pytest.raises(RequestException):
|
||||
cyto.make_subnetwork(0, target_folder=tmp_path, export_image=True)
|
||||
|
||||
|
||||
@pytest.mark.cyto
|
||||
def test_build_subnetworks(avail, data_tk_graph_built, tmp_path):
|
||||
if avail:
|
||||
cyto.import_to_cytoscape(data_tk_graph_built)
|
||||
suids = cyto.get_subgraph_node_selection(num_subgraphs=1)
|
||||
assert len(suids) > 0
|
||||
cyto.build_subnetworks(suids, export_image=True, target_folder=tmp_path)
|
||||
subnetwork_name = CYTO_BASE_NETWORK_NAME + '_sub_1'
|
||||
networks = p4c.get_network_list()
|
||||
assert len(networks) > 1
|
||||
file = (tmp_path / subnetwork_name).with_suffix('.svg')
|
||||
assert file.exists()
|
||||
else:
|
||||
with pytest.raises(RequestException):
|
||||
cyto.build_subnetworks([123], export_image=True, target_folder=tmp_path)
|
||||
@ -1,7 +1,64 @@
|
||||
from lang_main import config, pkg_dir
|
||||
import sys
|
||||
from importlib import reload
|
||||
from pathlib import Path
|
||||
from unittest.mock import patch
|
||||
|
||||
from lang_main import config
|
||||
|
||||
|
||||
def test_p4c_dependency():
|
||||
assert config._has_py4cyto
|
||||
with patch.dict(sys.modules, {'py4cytoscape': None}):
|
||||
reload(sys.modules['lang_main.config'])
|
||||
assert not config._has_py4cyto
|
||||
|
||||
|
||||
def test_load_config():
|
||||
toml_path = pkg_dir / 'lang_main_config.toml'
|
||||
toml_path = config.PKG_DIR / 'lang_main_config.toml'
|
||||
loaded_cfg = config.load_toml_config(toml_path)
|
||||
assert loaded_cfg['info']['pkg'] == 'lang_main_internal'
|
||||
|
||||
|
||||
def test_get_config_path():
|
||||
pkg_dir = config.PKG_DIR
|
||||
filename = config.CONFIG_FILENAME
|
||||
cyto_stylesheet_name = config.CYTO_STYLESHEET_FILENAME
|
||||
|
||||
cfg_pth_internal = (pkg_dir / filename).resolve()
|
||||
cyto_cfg_pth = (pkg_dir / cyto_stylesheet_name).resolve()
|
||||
|
||||
cfg_internal, cyto_internal = config.get_config_paths(
|
||||
root_folder=pkg_dir,
|
||||
cfg_name=filename,
|
||||
cyto_stylesheet_name=cyto_stylesheet_name,
|
||||
)
|
||||
assert cfg_internal == cfg_pth_internal
|
||||
assert cyto_internal == cyto_cfg_pth
|
||||
|
||||
|
||||
def test_load_cfg(monkeypatch, tmp_path):
|
||||
monkeypatch.setattr(Path, 'cwd', lambda: tmp_path)
|
||||
pkg_dir = config.PKG_DIR
|
||||
filename = config.CONFIG_FILENAME
|
||||
stop_folder = config.STOP_FOLDER
|
||||
|
||||
cfg_pth_internal = (pkg_dir / filename).resolve()
|
||||
ref_config = config.load_toml_config(cfg_pth_internal)
|
||||
|
||||
assert ref_config['info']['pkg'] == 'lang_main_internal'
|
||||
loaded_cfg = config.load_cfg(
|
||||
starting_path=pkg_dir,
|
||||
glob_pattern=filename,
|
||||
stop_folder_name=stop_folder,
|
||||
cfg_path_internal=cfg_pth_internal,
|
||||
prefer_internal_config=True,
|
||||
)
|
||||
assert loaded_cfg['info']['pkg'] == 'lang_main_internal'
|
||||
loaded_cfg = config.load_cfg(
|
||||
starting_path=pkg_dir,
|
||||
glob_pattern=filename,
|
||||
stop_folder_name=stop_folder,
|
||||
cfg_path_internal=cfg_pth_internal,
|
||||
prefer_internal_config=False,
|
||||
)
|
||||
assert loaded_cfg['info']['pkg'] == 'lang_main'
|
||||
|
||||
@ -15,6 +15,9 @@ def test_create_saving_folder(tmp_path, overwrite):
|
||||
io.create_saving_folder(target_dir, overwrite_existing=overwrite)
|
||||
assert target_dir.exists()
|
||||
assert target_dir.is_dir()
|
||||
io.create_saving_folder(str(target_dir), overwrite_existing=overwrite)
|
||||
assert target_dir.exists()
|
||||
assert target_dir.is_dir()
|
||||
|
||||
|
||||
def test_save_load(tmp_path):
|
||||
|
||||
@ -11,6 +11,7 @@ from lang_main.constants import (
|
||||
STFRDeviceTypes,
|
||||
STFRModelTypes,
|
||||
)
|
||||
from lang_main.errors import LanguageModelNotFoundError
|
||||
from lang_main.types import LanguageModels
|
||||
|
||||
|
||||
@ -62,10 +63,7 @@ def test_load_sentence_transformer(
|
||||
],
|
||||
)
|
||||
@pytest.mark.mload
|
||||
def test_load_sentence_transformer_onnx(
|
||||
model_name,
|
||||
similarity_func,
|
||||
) -> None:
|
||||
def test_load_sentence_transformer_onnx(model_name, similarity_func) -> None:
|
||||
model = model_loader.load_sentence_transformer(
|
||||
model_name=model_name,
|
||||
similarity_func=similarity_func,
|
||||
@ -86,15 +84,19 @@ def test_load_sentence_transformer_onnx(
|
||||
],
|
||||
)
|
||||
@pytest.mark.mload
|
||||
def test_load_spacy_model(
|
||||
model_name,
|
||||
):
|
||||
def test_load_spacy_model(model_name):
|
||||
model = model_loader.load_spacy(
|
||||
model_name=model_name,
|
||||
)
|
||||
assert isinstance(model, Language)
|
||||
|
||||
|
||||
def test_load_spacy_model_fail():
|
||||
model_name = 'not_existing'
|
||||
with pytest.raises(LanguageModelNotFoundError):
|
||||
model = model_loader.load_spacy(model_name)
|
||||
|
||||
|
||||
@pytest.mark.mload
|
||||
def test_instantiate_spacy_model():
|
||||
model = model_loader.instantiate_model(
|
||||
|
||||
64
tests/test_search.py
Normal file
64
tests/test_search.py
Normal file
@ -0,0 +1,64 @@
|
||||
from pathlib import Path
|
||||
|
||||
import pytest
|
||||
|
||||
from lang_main import search
|
||||
|
||||
FILE_SEARCH = 'test.txt'
|
||||
|
||||
|
||||
@pytest.fixture(scope='module')
|
||||
def base_folder(tmp_path_factory) -> Path:
|
||||
folder_structure = 'path/to/base/folder/'
|
||||
pth = tmp_path_factory.mktemp('search')
|
||||
pth = pth / folder_structure
|
||||
pth.mkdir(parents=True, exist_ok=True)
|
||||
|
||||
return pth
|
||||
|
||||
|
||||
@pytest.fixture(scope='module')
|
||||
def target_file_pth(base_folder) -> Path:
|
||||
# place in folder 'path' of TMP path
|
||||
target_folder = base_folder.parents[2]
|
||||
target_file = target_folder / FILE_SEARCH
|
||||
with open(target_file, 'w') as file:
|
||||
file.write('TEST')
|
||||
|
||||
return target_file
|
||||
|
||||
|
||||
def test_search_base_path(base_folder):
|
||||
stop_folder = '123' # should not exist
|
||||
found = search.search_base_path(base_folder, stop_folder_name=stop_folder)
|
||||
assert found is None
|
||||
stop_folder = 'to'
|
||||
found = search.search_base_path(base_folder, stop_folder_name=stop_folder)
|
||||
assert found is not None
|
||||
assert found.name == 'path'
|
||||
|
||||
|
||||
@pytest.mark.parametrize('stop_folder_name', ['to', 'base', None])
|
||||
def test_search_iterative(base_folder, target_file_pth, stop_folder_name):
|
||||
# target in parent of 'to': 'path'
|
||||
ret = search.search_iterative(base_folder, FILE_SEARCH, stop_folder_name)
|
||||
if stop_folder_name == 'to' or stop_folder_name is None:
|
||||
assert ret is not None
|
||||
assert ret.name == FILE_SEARCH
|
||||
assert ret == target_file_pth
|
||||
elif stop_folder_name == 'base':
|
||||
assert ret is None
|
||||
|
||||
|
||||
def test_search_cwd(monkeypatch, base_folder, target_file_pth):
|
||||
monkeypatch.setattr(Path, 'cwd', lambda: base_folder)
|
||||
assert Path.cwd() == base_folder
|
||||
ret = search.search_cwd(FILE_SEARCH)
|
||||
assert ret is None
|
||||
|
||||
target_folder = target_file_pth.parent
|
||||
monkeypatch.setattr(Path, 'cwd', lambda: target_folder)
|
||||
assert Path.cwd() == target_folder
|
||||
ret = search.search_cwd(FILE_SEARCH)
|
||||
assert ret is not None
|
||||
assert ret == target_file_pth
|
||||
BIN
tests/work_dir/Pipe-test_Step-1_valid_action.pkl
Normal file
BIN
tests/work_dir/Pipe-test_Step-1_valid_action.pkl
Normal file
Binary file not shown.
BIN
tests/work_dir/Pipe-test_Step-2_valid_action_2.pkl
Normal file
BIN
tests/work_dir/Pipe-test_Step-2_valid_action_2.pkl
Normal file
Binary file not shown.
BIN
tests/work_dir/non_tuple.pkl
Normal file
BIN
tests/work_dir/non_tuple.pkl
Normal file
Binary file not shown.
Loading…
x
Reference in New Issue
Block a user