diff --git a/README.md b/README.md index 051e128..fadfb53 100644 --- a/README.md +++ b/README.md @@ -1 +1,3 @@ # lang-main + +Alpha Release stadium, no documentation yet \ No newline at end of file diff --git a/build.ps1 b/build.ps1 new file mode 100644 index 0000000..edb0a24 --- /dev/null +++ b/build.ps1 @@ -0,0 +1 @@ +pdm build -d build/ \ No newline at end of file diff --git a/cytoscape_docker/Dockerfile b/cytoscape_docker/Dockerfile index 8df3fe6..10e85cd 100644 --- a/cytoscape_docker/Dockerfile +++ b/cytoscape_docker/Dockerfile @@ -5,7 +5,7 @@ FROM ubuntu:22.04 # environment variables -ENV CYTOSCAPE_VERSION=3.10.2 +ENV CYTOSCAPE_VERSION=3.10.3 ENV VIRTUAL_SCREEN_WIDTH=1920 ENV VIRTUAL_SCREEN_HEIGHT=1080 ENV CYREST_PORT=1234 diff --git a/cytoscape_docker/start.ps1 b/cytoscape_docker/start.ps1 new file mode 100644 index 0000000..d0df03a --- /dev/null +++ b/cytoscape_docker/start.ps1 @@ -0,0 +1,3 @@ +$container_name = "cyrest" + +docker start $container_name \ No newline at end of file diff --git a/cytoscape_docker/stop.ps1 b/cytoscape_docker/stop.ps1 new file mode 100644 index 0000000..3cb4ab3 --- /dev/null +++ b/cytoscape_docker/stop.ps1 @@ -0,0 +1,3 @@ +$container_name = "cyrest" + +docker stop $container_name \ No newline at end of file diff --git a/cytoscape_docker/update.ps1 b/cytoscape_docker/update.ps1 new file mode 100644 index 0000000..6565c58 --- /dev/null +++ b/cytoscape_docker/update.ps1 @@ -0,0 +1,12 @@ +param( + [Parameter(Mandatory=$true)]$old_cytoscape_version, + [Parameter(Mandatory=$true)]$new_cytoscape_version +) +$container_name = "cyrest" + +docker build -t snoringsloth/cytoscape:$new_cytoscape_version -t snoringsloth/cytoscape:latest . +docker push snoringsloth/cytoscape:latest +docker push snoringsloth/cytoscape:$new_cytoscape_version +docker rm $container_name +docker image rm snoringsloth/cytoscape:$old_cytoscape_version +docker create -p 1234:1234 --name $container_name snoringsloth/cytoscape:latest \ No newline at end of file diff --git a/pdm.lock b/pdm.lock index 815a3e3..30dc78b 100644 --- a/pdm.lock +++ b/pdm.lock @@ -5,7 +5,7 @@ groups = ["default", "cytoscape", "dash", "dev", "notebooks", "plot", "spacy-lg", "spacy-md", "spacy-sm", "trials"] strategy = ["inherit_metadata"] lock_version = "4.5.0" -content_hash = "sha256:3fda7527ac9298ec38cef4c36dc495defec823f631affe62daf9aedd3611000a" +content_hash = "sha256:bd78b1a41d8bc73b5112c999a6fc6cfcb512e7b9ef6f6bf437a9363df9296961" [[metadata.targets]] requires_python = ">=3.11" @@ -1015,6 +1015,17 @@ files = [ {file = "evaluate-0.4.3.tar.gz", hash = "sha256:3a5700cf83aabee9549264e1e5666f116367c61dbd4d38352015e859a5e2098d"}, ] +[[package]] +name = "execnet" +version = "2.1.1" +requires_python = ">=3.8" +summary = "execnet: rapid multi-Python deployment" +groups = ["dev"] +files = [ + {file = "execnet-2.1.1-py3-none-any.whl", hash = "sha256:26dee51f1b80cebd6d0ca8e74dd8745419761d3bef34163928cbebbdc4749fdc"}, + {file = "execnet-2.1.1.tar.gz", hash = "sha256:5189b52c6121c24feae288166ab41b32549c7e2348652736540b9e6e7d4e72e3"}, +] + [[package]] name = "executing" version = "2.1.0" @@ -3124,6 +3135,21 @@ files = [ {file = "pytest_cov-6.0.0-py3-none-any.whl", hash = "sha256:eee6f1b9e61008bd34975a4d5bab25801eb31898b032dd55addc93e96fcaaa35"}, ] +[[package]] +name = "pytest-xdist" +version = "3.6.1" +requires_python = ">=3.8" +summary = "pytest xdist plugin for distributed testing, most importantly across multiple CPUs" +groups = ["dev"] +dependencies = [ + "execnet>=2.1", + "pytest>=7.0.0", +] +files = [ + {file = "pytest_xdist-3.6.1-py3-none-any.whl", hash = "sha256:9ed4adfb68a016610848639bb7e02c9352d5d9f03d04809919e2dafc3be4cca7"}, + {file = "pytest_xdist-3.6.1.tar.gz", hash = "sha256:ead156a4db231eec769737f57668ef58a2084a34b2e55c4a8fa20d861107300d"}, +] + [[package]] name = "python-dateutil" version = "2.9.0.post0" diff --git a/pyproject.toml b/pyproject.toml index f40a111..14766f6 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,7 +1,7 @@ [project] name = "lang-main" -version = "0.1.0dev1" -description = "Several tools to analyse maintenance data with strong focus on language processing" +version = "0.1.0a1" +description = "Several tools to analyse TOM's data with strong focus on language processing" authors = [ {name = "d-opt GmbH, resp. Florian Förster", email = "f.foerster@d-opt.com"}, ] @@ -54,6 +54,9 @@ build-backend = "pdm.backend" [tool.pdm] distribution = true +[tool.pdm.build] +package-dir = "src" + [tool.pdm.dev-dependencies] notebooks = [ "jupyterlab>=4.2.0", @@ -66,13 +69,6 @@ notebooks = [ # --config-setting="--global-option=build_ext" --config-setting="--global-option=-IC:\Program Files\Graphviz\include" --config-setting="--global-option=-LC:\Program Files\Graphviz\lib" trials = [ ] -dev = [ - "cython>=3.0.10", - "openpyxl>=3.1.5", - "seaborn>=0.13.2", - "pytest>=8.3.3", - "pytest-cov>=6.0.0", -] [tool.ruff] line-length = 94 @@ -103,6 +99,7 @@ filterwarnings = [ ] markers = [ "mload: marks tests with loading of language models (deselect with '-m \"not mload\"')", + "cyto: marks tests which interact with Cytoscape (deselect with '-m \"not cyto\"')", ] log_cli = true @@ -120,7 +117,22 @@ exclude_also = [ "@overload", "if logging", "if TYPE_CHECKING", + "@pytest.fixture", + "if __name__ == __main__:", ] [tool.coverage.html] -directory = "reports/coverage" \ No newline at end of file +directory = "reports/coverage" +[dependency-groups] +dev = [ + "cython>=3.0.10", + "openpyxl>=3.1.5", + "seaborn>=0.13.2", + "pytest>=8.3.3", + "pytest-cov>=6.0.0", + "pytest-xdist>=3.6.1", +] +notebooks = [ + "jupyterlab>=4.2.0", + "ipywidgets>=8.1.2", +] \ No newline at end of file diff --git a/run_tests.ps1 b/run_tests.ps1 new file mode 100644 index 0000000..17271f8 --- /dev/null +++ b/run_tests.ps1 @@ -0,0 +1,8 @@ +pdm run pytest --cov -n 4 +# run docker desktop +. "C:\Program Files\Docker\Docker\Docker Desktop.exe" +docker start cyrest +# run Cytoscape tests in single process +pdm run coverage run -a -m pytest -m "cyto" +docker stop cyrest +pdm run coverage html \ No newline at end of file diff --git a/src/lang_main/__init__.py b/src/lang_main/__init__.py index d89379e..0fd67a0 100644 --- a/src/lang_main/__init__.py +++ b/src/lang_main/__init__.py @@ -1,177 +1,32 @@ -import logging -import os from pathlib import Path from typing import Any, Final -from lang_main.config import load_toml_config - -_has_py4cyto: bool = True -try: - import py4cytoscape as p4c -except ImportError: - _has_py4cyto = False - -# ** external packages config -# ** Huggingface Hub caching -os.environ['HF_HUB_DISABLE_SYMLINKS_WARNING'] = 'set' - -# ** py4cytoscape config -if _has_py4cyto: - p4c.set_summary_logger(False) - p4c.py4cytoscape_logger.detail_logger.setLevel('ERROR') - p4c.py4cytoscape_logger.detail_logger.removeHandler( - p4c.py4cytoscape_logger.detail_handler - ) - p4c.py4cytoscape_logger.detail_logger.addHandler(logging.NullHandler()) +from lang_main.config import ( + BASE_FOLDERNAME, + CONFIG_FILENAME, + CYTO_STYLESHEET_FILENAME, + PKG_DIR, + PREFER_INTERNAL_CONFIG, + STOP_FOLDER, + get_config_paths, + load_cfg, +) +from lang_main.search import search_base_path # ** lang-main config -BASE_FOLDERNAME: Final[str] = 'lang-main' -CONFIG_FILENAME: Final[str] = 'lang_main_config.toml' -CYTO_STYLESHEET_FILENAME: Final[str] = r'cytoscape_config/lang_main.xml' -PREFER_INTERNAL_CONFIG: Final[bool] = False -pkg_dir = Path(__file__).parent -cfg_path_internal = (pkg_dir / CONFIG_FILENAME).resolve() -cyto_stylesheet_path = (pkg_dir / CYTO_STYLESHEET_FILENAME).resolve() +cfg_path_internal, cyto_stylesheet_path = get_config_paths( + PKG_DIR, CONFIG_FILENAME, CYTO_STYLESHEET_FILENAME +) -# ** load config data: internal/external -# look for external config first, if not found use internal one -def search_cwd( - glob_pattern: str = CONFIG_FILENAME, -) -> Path | None: - """Searches the current working directory and looks for files - matching the glob pattern. - Returns the first match encountered. - - Parameters - ---------- - glob_pattern : str, optional - pattern to look for, first match will be returned, - by default CONFIG_FILENAME - - Returns - ------- - Path | None - Path if corresponding object was found, None otherwise - """ - cfg_path: Path | None = None - res = tuple(Path.cwd().glob(glob_pattern)) - if res: - cfg_path = res[0] - - return cfg_path - - -def search_iterative( - starting_path: Path, - glob_pattern: str = CONFIG_FILENAME, - stop_folder_name: str | None = None, -) -> Path | None: - """Iteratively searches the parent directories of the starting path - and look for files matching the glob pattern. The starting path is not - searched, only its parents. Therefore the starting path can also point - to a file. The folder in which it is placed in will be searched. - Returns the first match encountered. - - Parameters - ---------- - starting_path : Path - non-inclusive starting path - glob_pattern : str, optional - pattern to look for, first match will be returned, - by default CONFIG_FILENAME - stop_folder_name : str, optional - name of the last folder in the directory tree to search, by default None - - Returns - ------- - Path | None - Path if corresponding object was found, None otherwise - """ - file_path: Path | None = None - stop_folder_reached: bool = False - for it in range(len(starting_path.parents)): - search_path = starting_path.parents[it] # do not look in library folder - res = tuple(search_path.glob(glob_pattern)) - if res: - file_path = res[0] - break - elif stop_folder_reached: - break - - if stop_folder_name is not None and search_path.name == stop_folder_name: - # library is placed inside a whole python installation for deployment - # if this folder is reached, only look up one parent above - stop_folder_reached = True - - return file_path - - -def search_base_path( - starting_path: Path, - stop_folder_name: str | None = None, -) -> Path | None: - """Iteratively searches the parent directories of the starting path - and look for folders matching the given name. If a match is encountered, - the parent path will be returned. - - Example: - starting_path = path/to/start/folder - stop_folder_name = 'to' - returned path = 'path/' - - Parameters - ---------- - starting_path : Path - non-inclusive starting path - stop_folder_name : str, optional - name of the last folder in the directory tree to search, by default None - - Returns - ------- - Path | None - Path if corresponding base path was found, None otherwise - """ - stop_folder_path: Path | None = None - base_path: Path | None = None - for it in range(len(starting_path.parents)): - search_path = starting_path.parents[it] # do not look in library folder - if stop_folder_name is not None and search_path.name == stop_folder_name: - # library is placed inside a whole python installation for deployment - # only look up to this folder - stop_folder_path = search_path - break - - if stop_folder_path is not None: - base_path = stop_folder_path.parent - - return base_path - - -def load_cfg() -> dict[str, Any]: - cfg_path: Path | None - if PREFER_INTERNAL_CONFIG: - cfg_path = cfg_path_internal - else: - cfg_path = search_cwd(glob_pattern=CONFIG_FILENAME) - - if cfg_path is None: - cfg_path = search_iterative( - starting_path=pkg_dir, - glob_pattern=CONFIG_FILENAME, - stop_folder_name='python', - ) - # backup: use internal config - if cfg_path is None: - cfg_path = cfg_path_internal - - config = load_toml_config(path_to_toml=cfg_path) - - return config.copy() - - -CONFIG: Final[dict[str, Any]] = load_cfg() -base_parent_path = search_base_path(pkg_dir, stop_folder_name=BASE_FOLDERNAME) +CONFIG: Final[dict[str, Any]] = load_cfg( + starting_path=PKG_DIR, + glob_pattern=CONFIG_FILENAME, + stop_folder_name=STOP_FOLDER, + cfg_path_internal=cfg_path_internal, + prefer_internal_config=PREFER_INTERNAL_CONFIG, +) +base_parent_path = search_base_path(PKG_DIR, stop_folder_name=BASE_FOLDERNAME) if base_parent_path is None: raise FileNotFoundError('Could not resolve base path of library') BASE_PATH: Final[Path] = base_parent_path @@ -185,11 +40,3 @@ if not cyto_stylesheet_path.exists(): ) CYTO_PATH_STYLESHEET: Final[Path] = cyto_stylesheet_path - - -# TODO check removal -# append Graphviz binary folder to system path if not already contained -# if sys.platform == 'win32': -# path = Path(r'C:\Program Files\Graphviz\bin') -# if path.is_dir() and str(path).lower() not in os.environ['PATH'].lower(): -# os.environ['PATH'] += f';{path}' diff --git a/src/lang_main/analysis/timeline.py b/src/lang_main/analysis/timeline.py index d5f582d..0c8c0cd 100644 --- a/src/lang_main/analysis/timeline.py +++ b/src/lang_main/analysis/timeline.py @@ -3,7 +3,7 @@ from typing import cast from pandas import DataFrame, Series from sentence_transformers import SentenceTransformer -from tqdm.auto import tqdm # TODO: check deletion +from tqdm.auto import tqdm from lang_main.analysis.shared import ( candidates_by_index, diff --git a/src/lang_main/analysis/tokens.py b/src/lang_main/analysis/tokens.py index f9009e7..2082d3c 100644 --- a/src/lang_main/analysis/tokens.py +++ b/src/lang_main/analysis/tokens.py @@ -241,35 +241,36 @@ def build_token_graph( return graph, docs_mapping -def build_token_graph_simple( - data: DataFrame, - model: SpacyModel, -) -> tuple[TokenGraph, dict[PandasIndex, SpacyDoc]]: - graph = TokenGraph() - model_input = cast(tuple[str], tuple(data['entry'].to_list())) - weights = cast(tuple[int], tuple(data['num_occur'].to_list())) - indices = cast(tuple[list[PandasIndex]], tuple(data['batched_idxs'].to_list())) - index: int = 0 - docs_mapping: dict[PandasIndex, SpacyDoc] = {} +# TODO check removal +# def build_token_graph_simple( +# data: DataFrame, +# model: SpacyModel, +# ) -> tuple[TokenGraph, dict[PandasIndex, SpacyDoc]]: +# graph = TokenGraph() +# model_input = cast(tuple[str], tuple(data['entry'].to_list())) +# weights = cast(tuple[int], tuple(data['num_occur'].to_list())) +# indices = cast(tuple[list[PandasIndex]], tuple(data['batched_idxs'].to_list())) +# index: int = 0 +# docs_mapping: dict[PandasIndex, SpacyDoc] = {} - for doc in tqdm(model.pipe(model_input, batch_size=50), total=len(model_input)): - add_doc_info_to_graph( - graph=graph, - doc=doc, - weight=weights[index], - ) - corresponding_indices = indices[index] - for idx in corresponding_indices: - docs_mapping[idx] = doc +# for doc in tqdm(model.pipe(model_input, batch_size=50), total=len(model_input)): +# add_doc_info_to_graph( +# graph=graph, +# doc=doc, +# weight=weights[index], +# ) +# corresponding_indices = indices[index] +# for idx in corresponding_indices: +# docs_mapping[idx] = doc - index += 1 +# index += 1 - # metadata - graph.update_metadata() - # convert to undirected - graph.to_undirected(logging=False) +# # metadata +# graph.update_metadata() +# # convert to undirected +# graph.to_undirected(logging=False) - return graph, docs_mapping +# return graph, docs_mapping # TODO check removal diff --git a/src/lang_main/config.py b/src/lang_main/config.py index a7d3a55..afcb819 100644 --- a/src/lang_main/config.py +++ b/src/lang_main/config.py @@ -1,11 +1,40 @@ from __future__ import annotations +import logging +import os import sys import tomllib -from typing import TYPE_CHECKING, Any +from pathlib import Path +from typing import Any, Final -if TYPE_CHECKING: - from pathlib import Path +from lang_main.search import search_cwd, search_iterative + +_has_py4cyto: bool = True +try: + import py4cytoscape as p4c +except ImportError: + _has_py4cyto = False + +# ** external packages config +# ** Huggingface Hub caching +os.environ['HF_HUB_DISABLE_SYMLINKS_WARNING'] = 'set' + +# ** py4cytoscape config +if _has_py4cyto: + p4c.set_summary_logger(False) + p4c.py4cytoscape_logger.detail_logger.setLevel('ERROR') + p4c.py4cytoscape_logger.detail_logger.removeHandler( + p4c.py4cytoscape_logger.detail_handler + ) + p4c.py4cytoscape_logger.detail_logger.addHandler(logging.NullHandler()) + +# ** lang-main config +BASE_FOLDERNAME: Final[str] = 'lang-main' +CONFIG_FILENAME: Final[str] = 'lang_main_config.toml' +CYTO_STYLESHEET_FILENAME: Final[str] = r'cytoscape_config/lang_main.xml' +PREFER_INTERNAL_CONFIG: Final[bool] = False +PKG_DIR: Final[Path] = Path(__file__).parent +STOP_FOLDER: Final[str] = 'python' def load_toml_config( @@ -14,4 +43,46 @@ def load_toml_config( with open(path_to_toml, 'rb') as f: data = tomllib.load(f) print('Loaded TOML config file successfully.', file=sys.stderr, flush=True) + return data + + +# ** load config data: internal/external +def get_config_paths( + root_folder: Path, + cfg_name: str, + cyto_stylesheet_name: str, +) -> tuple[Path, Path]: + cfg_path_internal = (root_folder / cfg_name).resolve() + cyto_stylesheet_path = (root_folder / cyto_stylesheet_name).resolve() + + return cfg_path_internal, cyto_stylesheet_path + + +def load_cfg( + starting_path: Path, + glob_pattern: str, + stop_folder_name: str | None, + cfg_path_internal: Path, + prefer_internal_config: bool = False, +) -> dict[str, Any]: + cfg_path: Path | None + # look for external config first, if not found use internal one + if prefer_internal_config: + cfg_path = cfg_path_internal + else: + cfg_path = search_cwd(glob_pattern) + + if cfg_path is None: + cfg_path = search_iterative( + starting_path=starting_path, + glob_pattern=glob_pattern, + stop_folder_name=stop_folder_name, + ) + # backup: use internal config + if cfg_path is None: + cfg_path = cfg_path_internal + + config = load_toml_config(path_to_toml=cfg_path) + + return config.copy() diff --git a/src/lang_main/lang_main_config.toml b/src/lang_main/lang_main_config.toml index c3d3e6e..729e8b8 100644 --- a/src/lang_main/lang_main_config.toml +++ b/src/lang_main/lang_main_config.toml @@ -1,6 +1,6 @@ # lang_main: Config file [info] -pkg = 'lang_main' +pkg = 'lang_main_internal' [paths] inputs = './inputs/' diff --git a/src/lang_main/model_loader.py b/src/lang_main/model_loader.py index cfc0eb9..7dc0cf6 100644 --- a/src/lang_main/model_loader.py +++ b/src/lang_main/model_loader.py @@ -60,7 +60,7 @@ def load_spacy( model_name: str, ) -> SpacyModel: try: - spacy_model_obj = importlib.import_module(SPACY_MODEL_NAME) + spacy_model_obj = importlib.import_module(model_name) except ModuleNotFoundError: raise LanguageModelNotFoundError( ( diff --git a/src/lang_main/pipelines/base.py b/src/lang_main/pipelines/base.py index 590e7ed..322269c 100644 --- a/src/lang_main/pipelines/base.py +++ b/src/lang_main/pipelines/base.py @@ -148,14 +148,10 @@ class Pipeline(BasePipeline): ) -> None: # init base class super().__init__(name=name, working_dir=working_dir) - # name of pipeline self.name = name # working directory for pipeline == output path self.working_dir = working_dir - # if not self.working_dir.exists(): - # self.working_dir.mkdir(parents=True) - # container for actions to perform during pass self.actions_kwargs: list[dict[str, Any]] = [] self.save_results: ResultHandling = [] @@ -192,28 +188,6 @@ class Pipeline(BasePipeline): else: self.panic_wrong_action_type(action=action, compatible_type=Callable.__name__) - # TODO: add multiple entries by utilising simple add method - """ - def add_multi( - self, - action: FunctionType | Sequence[FunctionType], - action_kwargs: dict[str, Any] | Sequence[dict[str, Any]], - ) -> None: - - if isinstance(action, Sequence): - if len(action_kwargs) != len(action): - raise ValueError(("Sequences for actions and corresponding keyword " - "arguments must have the same length.")) - self.actions.extend(action) - self.actions_kwargs.extend(action_kwargs) - elif isinstance(action, FunctionType): - self.actions.append(action) - self.actions_kwargs.append(action_kwargs) - else: - raise TypeError(("Action must be function or sequence of functions, " - f"but is of type >>{type(action)}<<.")) - """ - def get_result_path( self, action_idx: int, @@ -253,11 +227,7 @@ class Pipeline(BasePipeline): action_idx: int, filename: str | None, ) -> None: - # target_filename = f'Pipe-{self.name}_Step-{self.curr_proc_idx}_' + filename - # target_path = self.working_dir.joinpath(target_filename) - # target_path = target_path.with_suffix('.pkl') target_path, _ = self.get_result_path(action_idx, filename) - # saving file locally save_pickle(obj=self._intermediate_result, path=target_path) @override @@ -270,6 +240,7 @@ class Pipeline(BasePipeline): if self.load_results[idx][0]: filename = self.load_results[idx][1] ret = self.load_step(action_idx=idx, filename=filename) + self._intermediate_result = ret logger.info( '[No Calculation] Loaded result for action >>%s<< successfully', self.action_names[idx], @@ -279,18 +250,12 @@ class Pipeline(BasePipeline): # calculation if idx == 0: args = starting_values - # ret = action(*starting_values, **action_kwargs) else: args = ret if args is not None: ret = action(*args, **action_kwargs) - # elif args is not None: - # ret = action(*args) - # elif args is None and action_kwargs: - # ret = action(**action_kwargs) else: - # ret = action() ret = action(**action_kwargs) if ret is not None and not isinstance(ret, tuple): diff --git a/src/lang_main/pipelines/predefined.py b/src/lang_main/pipelines/predefined.py index 48337cc..8a5e6d0 100644 --- a/src/lang_main/pipelines/predefined.py +++ b/src/lang_main/pipelines/predefined.py @@ -60,7 +60,7 @@ SPACY_MODEL = m_load.instantiate_model( # ** pipeline configuration # ** target feature preparation def build_base_target_feature_pipe() -> Pipeline: - pipe_target_feat = Pipeline(name='TargetFeature', working_dir=SAVE_PATH_FOLDER) + pipe_target_feat = Pipeline(name='Target_Feature', working_dir=SAVE_PATH_FOLDER) pipe_target_feat.add( load_raw_data, { @@ -185,7 +185,15 @@ def build_tk_graph_render_pipe( ) -> Pipeline: # optional dependency: late import # raises exception if necessary modules are not found - from lang_main.render import cytoscape as cyto + try: + from lang_main.render import cytoscape as cyto + except ImportError: + raise ImportError( + ( + 'Dependencies for Cytoscape interaction not found.' + 'Install package with optional dependencies.' + ) + ) pipe_graph_rendering = Pipeline( name='Graph_Static-Rendering', diff --git a/src/lang_main/render/cytoscape.py b/src/lang_main/render/cytoscape.py index 1125b79..a4a918e 100644 --- a/src/lang_main/render/cytoscape.py +++ b/src/lang_main/render/cytoscape.py @@ -60,7 +60,7 @@ def verify_connection() -> None: """ try: p4c.cytoscape_ping() - except CyError as error: + except CyError as error: # pragma: no cover logger.error('[CyError] CyREST or Cytoscape version not supported.') raise error except RequestException as error: @@ -164,6 +164,7 @@ def verify_table_property( network_name: str = CYTO_BASE_NETWORK_NAME, ) -> bool: table = p4c.get_table_columns(table=table_type, network=network_name) + logger.debug('Table >>%s<< wiht columns: %s', table, table.columns) return property in table.columns @@ -174,7 +175,7 @@ def analyse_network( ) -> None: node_table = p4c.get_table_columns(table='node', network=network_name) net_analyse_possible: bool = True - if len(node_table) < 4: + if len(node_table) < 4: # pragma: no cover net_analyse_possible = False if net_analyse_possible: @@ -186,7 +187,7 @@ def analyse_network( * node_table['BetweennessCentrality'] * node_table['stress_norm'] ) - else: + else: # pragma: no cover node_table[CYTO_SELECTION_PROPERTY] = 1 p4c.load_table_data(node_table, data_key_column='name', network=network_name) @@ -231,7 +232,7 @@ def export_network_to_image( by default 'A4' """ logger.debug('Exporting image to file...') - if not target_folder.exists(): + if not target_folder.exists(): # pragma: no cover target_folder.mkdir(parents=True) dst_file_pth = (target_folder / filename).with_suffix(f'.{filetype.lower()}') @@ -252,13 +253,6 @@ def export_network_to_image( export_text_as_font=text_as_font, page_size=pdf_export_page_size, ) - # TODO remove if Cytoscape >= 3.10.* is running in container - # p4c.export_image( - # filename=filename, - # type=filetype, - # network=network_name, - # overwrite_file=True, - # ) logger.debug('Exported image to sandbox.') logger.debug('Transferring image from sandbox to target destination...') sandbox_filename = f'{filename}.{filetype.lower()}' @@ -328,6 +322,7 @@ def apply_style_to_network( """ logger.debug('Applying style to network...') styles_avail = cast(list[str], p4c.get_visual_style_names()) + logger.debug('Available styles: %s', styles_avail) if style_name not in styles_avail: if not pth_to_stylesheet.exists(): # existence for standard path verified at import, but not for other @@ -348,12 +343,6 @@ def apply_style_to_network( p4c.set_visual_style(style_name, network=network_name) # node size mapping, only if needed property is available - # TODO check removal - # size_prop_available = verify_table_property( - # property=node_size_property, - # network_name=network_name, - # ) - # if size_prop_available: scheme = p4c.scheme_c_number_continuous( start_value=min_node_size, end_value=max_node_size ) @@ -365,13 +354,6 @@ def apply_style_to_network( default_number=min_node_size, ) p4c.set_node_size_mapping(**node_size_map) - # TODO removal - # else: - # node_table = p4c.get_table_columns(table='node', network=network_name) - # nodes_SUID = node_table['SUID'].to_list() - # p4c.set_node_size_bypass(nodes_SUID, new_sizes=min_node_size, network=network_name) - # p4c.set_visual_style(style_name, network=network_name) - # time.sleep(1) # if not waited image export could be without applied style fit_content(network_name=network_name) logger.debug('Style application to network successful.') @@ -402,7 +384,7 @@ def get_subgraph_node_selection( node_table = p4c.get_table_columns(table='node', network=network_name) node_table = node_table.sort_values(by=CYTO_SELECTION_PROPERTY, ascending=False) p4c.load_table_data(node_table, data_key_column='name', network=network_name) - node_table_choice = node_table.iloc[:num_subgraphs, :] + node_table_choice = node_table.iloc[:num_subgraphs] logger.debug('Selection of nodes for subgraph generation successful.') return node_table_choice['SUID'].to_list() diff --git a/src/lang_main/render/cytoscape_monkeypatch.py b/src/lang_main/render/cytoscape_monkeypatch.py index 84cb173..d6c96bd 100644 --- a/src/lang_main/render/cytoscape_monkeypatch.py +++ b/src/lang_main/render/cytoscape_monkeypatch.py @@ -9,8 +9,8 @@ re_parenthesis_1 = re.compile(r'[(]+') re_parenthesis_2 = re.compile(r'[)]+') -@cy_log -def select_edges_connecting_selected_nodes(network=None, base_url=DEFAULT_BASE_URL): # noqa: F405 +@cy_log # pragma: no cover +def select_edges_connecting_selected_nodes(network=None, base_url=DEFAULT_BASE_URL): # noqa: F405 # pragma: no cover """Select edges in a Cytoscape Network connecting the selected nodes, including self loops connecting single nodes. Any edges selected beforehand are deselected before any new edges are selected diff --git a/src/lang_main/search.py b/src/lang_main/search.py new file mode 100644 index 0000000..7554817 --- /dev/null +++ b/src/lang_main/search.py @@ -0,0 +1,110 @@ +from pathlib import Path + + +def search_cwd( + glob_pattern: str, +) -> Path | None: + """Searches the current working directory and looks for files + matching the glob pattern. + Returns the first match encountered. + + Parameters + ---------- + glob_pattern : str, optional + pattern to look for, first match will be returned + + Returns + ------- + Path | None + Path if corresponding object was found, None otherwise + """ + path_found: Path | None = None + res = tuple(Path.cwd().glob(glob_pattern)) + if res: + path_found = res[0] + + return path_found + + +def search_iterative( + starting_path: Path, + glob_pattern: str, + stop_folder_name: str | None = None, +) -> Path | None: + """Iteratively searches the parent directories of the starting path + and look for files matching the glob pattern. The starting path is not + searched, only its parents. Therefore the starting path can also point + to a file. The folder in which it is placed in will be searched. + Returns the first match encountered. + The parent of the stop folder will be searched if it exists. + + Parameters + ---------- + starting_path : Path + non-inclusive starting path + glob_pattern : str, optional + pattern to look for, first match will be returned + stop_folder_name : str, optional + name of the last folder in the directory tree to search, by default None + + Returns + ------- + Path | None + Path if corresponding object was found, None otherwise + """ + file_path: Path | None = None + stop_folder_reached: bool = False + for search_path in starting_path.parents: + res = tuple(search_path.glob(glob_pattern)) + if res: + file_path = res[0] + break + elif stop_folder_reached: + break + + if stop_folder_name is not None and search_path.name == stop_folder_name: + # library is placed inside a whole python installation for deployment + # if this folder is reached, only look up one parent above + stop_folder_reached = True + + return file_path + + +def search_base_path( + starting_path: Path, + stop_folder_name: str | None = None, +) -> Path | None: + """Iteratively searches the parent directories of the starting path + and look for folders matching the given name. If a match is encountered, + the parent path will be returned. + + Example: + starting_path = path/to/start/folder + stop_folder_name = 'to' + returned path = 'path/' + + Parameters + ---------- + starting_path : Path + non-inclusive starting path + stop_folder_name : str, optional + name of the last folder in the directory tree to search, by default None + + Returns + ------- + Path | None + Path if corresponding base path was found, None otherwise + """ + stop_folder_path: Path | None = None + base_path: Path | None = None + for search_path in starting_path.parents: + if stop_folder_name is not None and search_path.name == stop_folder_name: + # library is placed inside a whole python installation for deployment + # only look up to this folder + stop_folder_path = search_path + break + + if stop_folder_path is not None: + base_path = stop_folder_path.parent + + return base_path diff --git a/test.ps1 b/test.ps1 new file mode 100644 index 0000000..4592715 --- /dev/null +++ b/test.ps1 @@ -0,0 +1 @@ +pdm run coverage run -p -m pytest -n 6 \ No newline at end of file diff --git a/tests/analysis/test_tokens.py b/tests/analysis/test_tokens.py index dc16ef2..cdf5e39 100644 --- a/tests/analysis/test_tokens.py +++ b/tests/analysis/test_tokens.py @@ -1,5 +1,3 @@ -from pathlib import Path - import pytest from lang_main import model_loader @@ -44,14 +42,14 @@ def test_obtain_relevant_descendants(spacy_model): doc = spacy_model(SENTENCE) sent1 = tuple(doc.sents)[0] # first sentence word1 = sent1[1] # word "ging" (POS:VERB) - descendants1 = ('0912393', 'schnell', 'Wiese', 'Menschen') + descendants1 = ('ID', '0912393', 'schnell', 'Wiese', 'Menschen') rel_descs = tokens.obtain_relevant_descendants(word1) rel_descs = tuple((token.text for token in rel_descs)) assert descendants1 == rel_descs sent2 = tuple(doc.sents)[1] # first sentence word2 = sent2[1] # word "konnte" (POS:AUX) - descendants2 = ('mit', 'Probleme', 'Tragen', 'Tasche') + descendants2 = ('Probleme', 'Tragen', 'Tasche') rel_descs = tokens.obtain_relevant_descendants(word2) rel_descs = tuple((token.text for token in rel_descs)) assert descendants2 == rel_descs @@ -62,7 +60,7 @@ def test_add_doc_info_to_graph(spacy_model): tk_graph = graphs.TokenGraph() tokens.add_doc_info_to_graph(tk_graph, doc, weight=2) assert len(tk_graph.nodes) == 11 - assert len(tk_graph.edges) == 17 + assert len(tk_graph.edges) == 16 assert '0912393' in tk_graph.nodes diff --git a/tests/pipelines/test_base.py b/tests/pipelines/test_base.py new file mode 100644 index 0000000..51178db --- /dev/null +++ b/tests/pipelines/test_base.py @@ -0,0 +1,227 @@ +from pathlib import Path + +import pytest + +from lang_main import io +from lang_main.errors import ( + NoPerformableActionError, + OutputInPipelineContainerError, + WrongActionTypeError, +) +from lang_main.pipelines import base + +PIPELINE_NAME = 'test' + + +@pytest.fixture(scope='module') +def working_dir() -> Path: + work_dir = Path.cwd() / 'tests/work_dir' + if not work_dir.exists(): + work_dir.mkdir() + return work_dir + + +@pytest.fixture(scope='function') +def pipeline_container(working_dir) -> base.PipelineContainer: + return base.PipelineContainer(name=PIPELINE_NAME, working_dir=working_dir) + + +@pytest.fixture(scope='function') +def pipeline(working_dir) -> base.Pipeline: + return base.Pipeline(name=PIPELINE_NAME, working_dir=working_dir) + + +def test_empty_pipeline_container(pipeline_container, working_dir): + container = pipeline_container + assert container.name == PIPELINE_NAME + assert container.working_dir == working_dir + assert len(container.actions) == 0 + assert len(container.action_names) == 0 + assert len(container.action_skip) == 0 + assert container.curr_proc_idx == 1 + + with pytest.raises(NoPerformableActionError): + container.prep_run() + + assert container.post_run() is None + + +@pytest.mark.parametrize('skip', [True, False]) +def test_pipeline_container_valid(pipeline_container, skip): + test_string = 'test' + + def valid_action(): # pragma: no cover + nonlocal test_string + test_string += '_2' + + pipeline_container.add(valid_action, skip=skip) + assert len(pipeline_container.actions) == 1 + assert len(pipeline_container.action_names) == 1 + assert len(pipeline_container.action_skip) == 1 + + ret = pipeline_container.run() + assert pipeline_container.curr_proc_idx == 2 + assert ret is None + if skip: + assert test_string == 'test' + else: + assert test_string == 'test_2' + + pipeline_container.prep_run() + assert pipeline_container.curr_proc_idx == 1 + + +def test_pipeline_container_invalid_action(pipeline_container): + test_string = 'test' + + def invalid_action(): + nonlocal test_string + test_string += '_2' + new = 'ret' + return new + + with pytest.raises(WrongActionTypeError): + pipeline_container.add(test_string, skip=False) + + pipeline_container.add(invalid_action, skip=False) + with pytest.raises(OutputInPipelineContainerError): + pipeline_container.run() + + +def test_empty_pipeline(pipeline, working_dir): + pipe = pipeline + assert pipe.name == PIPELINE_NAME + assert pipe.working_dir == working_dir + assert len(pipe.actions) == 0 + assert len(pipe.action_names) == 0 + assert len(pipe.actions_kwargs) == 0 + assert len(pipe.save_results) == 0 + assert len(pipe.load_results) == 0 + assert pipe.curr_proc_idx == 1 + assert pipe._intermediate_result is None + + with pytest.raises(NoPerformableActionError): + pipe.prep_run() + + assert pipe.post_run() is None + + +@pytest.mark.parametrize('alter_content', [True, False]) +def test_pipeline_valid(pipeline, alter_content): + pipe = pipeline + test_string = 'test' + + # action preparation + def valid_action(string, add_content=False): + if add_content: + string += '_2' + return string + + pipe.add(valid_action, {'add_content': alter_content}) + assert len(pipe.actions) == 1 + assert len(pipe.action_names) == 1 + assert len(pipe.actions_kwargs) == 1 + assert len(pipe.save_results) == 1 + assert len(pipe.load_results) == 1 + assert pipe.save_results[0] == (False, None) + assert pipe.load_results[0] == (False, None) + # filenames and saving/loading + target_filename = f'Pipe-{pipe.name}_Step-{pipe.curr_proc_idx}_valid_action' + target_pth = (pipe.working_dir / target_filename).with_suffix('.pkl') + ret_pth, action_name = pipe.get_result_path(0, filename=None) + assert ret_pth == target_pth + assert action_name == 'valid_action' + filename = 'test' + ret_pth, action_name = pipe.get_result_path(0, filename=filename) + target_pth = (pipe.working_dir / filename).with_suffix('.pkl') + assert ret_pth == target_pth + assert action_name == 'valid_action' + # load non-existing files + with pytest.raises(FileNotFoundError): + pipe.load_step(0, 'non_existing') + + # running + ret = pipe.run(starting_values=(test_string,)) + assert isinstance(ret, tuple) + assert pipe._intermediate_result == ret + assert pipe.curr_proc_idx == 2 + assert ret is not None + if alter_content: + assert ret[0] == 'test_2' + else: + assert ret[0] == 'test' + + pipe.prep_run() + assert pipe.curr_proc_idx == 1 + + # load existing files + loaded_res = pipe.load_step(0, None) + assert loaded_res is not None + assert isinstance(loaded_res, tuple) + assert loaded_res[0] == ret[0] + + +def test_pipeline_valid_action_load(pipeline, working_dir): + pipe = pipeline + test_string = 'test' + + # action preparation + def valid_action(string, add_content=False): + if add_content: + string += '_2' + return string + + pipe.add(valid_action, {'add_content': False}, load_result=True) + assert len(pipe.actions) == 1 + assert len(pipe.action_names) == 1 + assert len(pipe.actions_kwargs) == 1 + assert len(pipe.save_results) == 1 + assert len(pipe.load_results) == 1 + assert pipe.save_results[0] == (False, None) + assert pipe.load_results[0] == (True, None) + + ret = pipe.run(starting_values=(test_string,)) + assert isinstance(ret, tuple) + assert pipe._intermediate_result == ret + assert pipe.curr_proc_idx == 2 + assert ret is not None + + # load non-tuple result + filename = 'non_tuple.pkl' + save_pth = working_dir / filename + io.save_pickle(test_string, save_pth) + with pytest.raises(TypeError): + pipe.load_step(0, filename) + + +def test_pipeline_multiple_actions(pipeline): + pipe = pipeline + test_string = 'test' + + # action preparation + def valid_action(string, add_content=True): + if add_content: + string += '_2' + return string + + def valid_action_2(string, add_content=True): + if add_content: + string += '_3' + return string + + pipe.add(valid_action, {'add_content': True}) + pipe.add(valid_action_2) + assert len(pipe.actions) == 2 + assert len(pipe.action_names) == 2 + assert len(pipe.actions_kwargs) == 2 + assert len(pipe.save_results) == 2 + assert len(pipe.load_results) == 2 + assert pipe.save_results[1] == (False, None) + assert pipe.load_results[1] == (False, None) + + ret = pipe.run(starting_values=(test_string,)) + assert isinstance(ret, tuple) + assert pipe._intermediate_result == ret + assert pipe.curr_proc_idx == 3 + assert ret is not None + assert ret[0] == 'test_2_3' diff --git a/tests/pipelines/test_predefined.py b/tests/pipelines/test_predefined.py new file mode 100644 index 0000000..b00c024 --- /dev/null +++ b/tests/pipelines/test_predefined.py @@ -0,0 +1,52 @@ +import pytest + +from lang_main.pipelines import predefined as pre +from lang_main.types import EntryPoints + + +def test_build_base_target_feature_pipe(): + pipe = pre.build_base_target_feature_pipe() + assert pipe.name == 'Target_Feature' + assert len(pipe.actions) == 5 + + +def test_build_merge_duplicates_pipe(): + pipe = pre.build_merge_duplicates_pipe() + assert pipe.name == 'Merge_Duplicates' + assert len(pipe.actions) == 2 + + +def test_build_tk_graph_pipe(): + pipe = pre.build_tk_graph_pipe() + assert pipe.name == 'Token_Analysis' + assert len(pipe.actions) == 1 + + +def test_build_tk_graph_post_pipe(): + pipe = pre.build_tk_graph_post_pipe() + assert pipe.name == 'Graph_Postprocessing' + assert len(pipe.actions) == 3 + + +def test_build_tk_graph_rescaling_pipe(): + pipe = pre.build_tk_graph_rescaling_pipe( + save_result=False, exit_point=EntryPoints.TK_GRAPH_ANALYSIS_RESCALED + ) + assert pipe.name == 'Graph_Rescaling' + assert len(pipe.actions) == 2 + + +@pytest.mark.parametrize('with_subgraphs', [True, False]) +def test_build_tk_graph_render_pipe(with_subgraphs): + pipe = pre.build_tk_graph_render_pipe(with_subgraphs=with_subgraphs) + assert pipe.name == 'Graph_Static-Rendering' + if with_subgraphs: + assert len(pipe.actions) == 6 + else: + assert len(pipe.actions) == 4 + + +def test_build_timeline_pipe(): + pipe = pre.build_timeline_pipe() + assert pipe.name == 'Timeline_Analysis' + assert len(pipe.actions) == 6 diff --git a/tests/render/__init__.py b/tests/render/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/tests/render/test_cytoscape.py b/tests/render/test_cytoscape.py new file mode 100644 index 0000000..cb16ce7 --- /dev/null +++ b/tests/render/test_cytoscape.py @@ -0,0 +1,227 @@ +"""tests for Cytoscape API requests, needs running Cytoscape server; +Tests assume that no Cytoscape instance is running. +The validation of the correct behaviour can only be done with a running instance, +especially for layout and formatting tasks. A static test suite is not helpful in +this case. +""" + +import py4cytoscape as p4c +import pytest +from py4cytoscape.exceptions import CyError +from requests.exceptions import RequestException + +from lang_main.constants import CYTO_BASE_NETWORK_NAME, CYTO_SELECTION_PROPERTY +from lang_main.errors import GraphRenderError +from lang_main.render import cytoscape as cyto + +_cyto_available: bool = True +try: + p4c.cytoscape_ping() +except RequestException: + _cyto_available = False + + +@pytest.fixture(scope='module') +def avail() -> bool: + return _cyto_available + + +@pytest.mark.cyto +def test_verify_connection(avail): + if avail: + cyto.verify_connection() + else: + with pytest.raises(RequestException): + cyto.verify_connection() + + +def test_verify_graph_render_size(data_tk_graph_built): + cyto.verify_graph_render_size( + data_tk_graph_built, max_node_count=None, max_edge_count=None + ) + + with pytest.raises(GraphRenderError): + cyto.verify_graph_render_size( + data_tk_graph_built, max_node_count=0, max_edge_count=None + ) + + with pytest.raises(GraphRenderError): + cyto.verify_graph_render_size( + data_tk_graph_built, max_node_count=None, max_edge_count=0 + ) + + +@pytest.mark.cyto +def test_change_default_layout(avail): + if avail: + cyto.change_default_layout() + else: + with pytest.raises(RequestException): + cyto.change_default_layout() + + +@pytest.mark.cyto +def test_import_to_cytoscape(avail, data_tk_graph_built): + if avail: + cyto.import_to_cytoscape(data_tk_graph_built) + else: + with pytest.raises(RequestException): + cyto.import_to_cytoscape(data_tk_graph_built) + + +@pytest.mark.cyto +def test_verify_table_property(avail, data_tk_graph_built): + if avail: + cyto.import_to_cytoscape(data_tk_graph_built) + contained = cyto.verify_table_property(property='TEST', table_type='node') + assert not contained + contained = cyto.verify_table_property(property='name', table_type='node') + assert contained + contained = cyto.verify_table_property(property='degree_weighted', table_type='node') + assert contained + else: + with pytest.raises(RequestException): + contained = cyto.verify_table_property(property='TEST', table_type='node') + + +@pytest.mark.cyto +def test_analyse_network(avail, data_tk_graph_built): + if avail: + cyto.import_to_cytoscape(data_tk_graph_built) + cyto.analyse_network() + contained = cyto.verify_table_property(property='name', table_type='node') + assert contained + contained = cyto.verify_table_property( + property=CYTO_SELECTION_PROPERTY, table_type='node' + ) + assert contained + else: + with pytest.raises(RequestException): + cyto.analyse_network(data_tk_graph_built) + + +@pytest.mark.cyto +def test_reset_current_network_to_base(avail): + if avail: + cyto.reset_current_network_to_base() + else: + with pytest.raises(RequestException): + cyto.reset_current_network_to_base() + + +@pytest.mark.cyto +def test_fit_content(avail, data_tk_graph_built): + if avail: + cyto.import_to_cytoscape(data_tk_graph_built) + cyto.fit_content() + else: + with pytest.raises(RequestException): + cyto.fit_content() + + +@pytest.mark.cyto +def test_export_network_to_image(avail, tmp_path, data_tk_graph_built): + filename = 'test_export' + if avail: + cyto.import_to_cytoscape(data_tk_graph_built) + cyto.export_network_to_image(filename=filename, target_folder=tmp_path) + target_file = tmp_path / f'{filename}.svg' + assert target_file.exists() + assert target_file.name == f'{filename}.svg' + else: + with pytest.raises(RequestException): + cyto.export_network_to_image(filename=filename, target_folder=tmp_path) + + +@pytest.mark.cyto +def test_layout_network(avail, data_tk_graph_built): + if avail: + cyto.import_to_cytoscape(data_tk_graph_built) + cyto.layout_network() + else: + with pytest.raises(RequestException): + cyto.layout_network() + + +@pytest.mark.cyto +def test_apply_style_to_network(avail, data_tk_graph_built, tmp_path): + if avail: + layout_not_existing = 'testing' + pth_not_existing = tmp_path / 'test.xml' + with pytest.raises(FileNotFoundError): + cyto.apply_style_to_network( + style_name=layout_not_existing, + pth_to_stylesheet=pth_not_existing, + ) + + cyto.import_to_cytoscape(data_tk_graph_built) + # not existing: so transfer necessary, + # but fails nevertheless because style is imported + # using the name provided by this style configuration + with pytest.raises(CyError): + cyto.apply_style_to_network(style_name=layout_not_existing) + cyto.apply_style_to_network() + else: + with pytest.raises(RequestException): + cyto.apply_style_to_network() + + +@pytest.mark.cyto +def test_get_subgraph_node_selection(avail, data_tk_graph_built): + num_subgraphs = 2 + if avail: + cyto.import_to_cytoscape(data_tk_graph_built) + cyto.analyse_network() + suids = cyto.get_subgraph_node_selection(num_subgraphs=num_subgraphs) + assert len(suids) > 0 + else: + with pytest.raises(RequestException): + cyto.get_subgraph_node_selection(num_subgraphs=num_subgraphs) + + +@pytest.mark.cyto +def test_select_neighbours_of_node(avail, data_tk_graph_built): + num_subgraphs = 2 + if avail: + cyto.import_to_cytoscape(data_tk_graph_built) + suids = cyto.get_subgraph_node_selection(num_subgraphs=num_subgraphs) + assert len(suids) > 0 + cyto.select_neighbours_of_node(suids[0], neighbour_iter_depth=2) + else: + with pytest.raises(RequestException): + cyto.select_neighbours_of_node(123, neighbour_iter_depth=2) + + +@pytest.mark.cyto +def test_make_subnetwork(avail, data_tk_graph_built, tmp_path): + if avail: + cyto.import_to_cytoscape(data_tk_graph_built) + suids = cyto.get_subgraph_node_selection(num_subgraphs=2) + assert len(suids) > 0 + cyto.select_neighbours_of_node(suids[0], neighbour_iter_depth=2) + cyto.make_subnetwork(0, target_folder=tmp_path, export_image=True) + subnetwork_name = CYTO_BASE_NETWORK_NAME + '_sub_1' + networks = p4c.get_network_list() + assert len(networks) > 1 + file = (tmp_path / subnetwork_name).with_suffix('.svg') + assert file.exists() + else: + with pytest.raises(RequestException): + cyto.make_subnetwork(0, target_folder=tmp_path, export_image=True) + + +@pytest.mark.cyto +def test_build_subnetworks(avail, data_tk_graph_built, tmp_path): + if avail: + cyto.import_to_cytoscape(data_tk_graph_built) + suids = cyto.get_subgraph_node_selection(num_subgraphs=1) + assert len(suids) > 0 + cyto.build_subnetworks(suids, export_image=True, target_folder=tmp_path) + subnetwork_name = CYTO_BASE_NETWORK_NAME + '_sub_1' + networks = p4c.get_network_list() + assert len(networks) > 1 + file = (tmp_path / subnetwork_name).with_suffix('.svg') + assert file.exists() + else: + with pytest.raises(RequestException): + cyto.build_subnetworks([123], export_image=True, target_folder=tmp_path) diff --git a/tests/test_config.py b/tests/test_config.py index 6fe9c8c..a5c4db7 100644 --- a/tests/test_config.py +++ b/tests/test_config.py @@ -1,7 +1,64 @@ -from lang_main import config, pkg_dir +import sys +from importlib import reload +from pathlib import Path +from unittest.mock import patch + +from lang_main import config + + +def test_p4c_dependency(): + assert config._has_py4cyto + with patch.dict(sys.modules, {'py4cytoscape': None}): + reload(sys.modules['lang_main.config']) + assert not config._has_py4cyto def test_load_config(): - toml_path = pkg_dir / 'lang_main_config.toml' + toml_path = config.PKG_DIR / 'lang_main_config.toml' loaded_cfg = config.load_toml_config(toml_path) + assert loaded_cfg['info']['pkg'] == 'lang_main_internal' + + +def test_get_config_path(): + pkg_dir = config.PKG_DIR + filename = config.CONFIG_FILENAME + cyto_stylesheet_name = config.CYTO_STYLESHEET_FILENAME + + cfg_pth_internal = (pkg_dir / filename).resolve() + cyto_cfg_pth = (pkg_dir / cyto_stylesheet_name).resolve() + + cfg_internal, cyto_internal = config.get_config_paths( + root_folder=pkg_dir, + cfg_name=filename, + cyto_stylesheet_name=cyto_stylesheet_name, + ) + assert cfg_internal == cfg_pth_internal + assert cyto_internal == cyto_cfg_pth + + +def test_load_cfg(monkeypatch, tmp_path): + monkeypatch.setattr(Path, 'cwd', lambda: tmp_path) + pkg_dir = config.PKG_DIR + filename = config.CONFIG_FILENAME + stop_folder = config.STOP_FOLDER + + cfg_pth_internal = (pkg_dir / filename).resolve() + ref_config = config.load_toml_config(cfg_pth_internal) + + assert ref_config['info']['pkg'] == 'lang_main_internal' + loaded_cfg = config.load_cfg( + starting_path=pkg_dir, + glob_pattern=filename, + stop_folder_name=stop_folder, + cfg_path_internal=cfg_pth_internal, + prefer_internal_config=True, + ) + assert loaded_cfg['info']['pkg'] == 'lang_main_internal' + loaded_cfg = config.load_cfg( + starting_path=pkg_dir, + glob_pattern=filename, + stop_folder_name=stop_folder, + cfg_path_internal=cfg_pth_internal, + prefer_internal_config=False, + ) assert loaded_cfg['info']['pkg'] == 'lang_main' diff --git a/tests/test_io.py b/tests/test_io.py index 6930cda..63cecc5 100644 --- a/tests/test_io.py +++ b/tests/test_io.py @@ -15,6 +15,9 @@ def test_create_saving_folder(tmp_path, overwrite): io.create_saving_folder(target_dir, overwrite_existing=overwrite) assert target_dir.exists() assert target_dir.is_dir() + io.create_saving_folder(str(target_dir), overwrite_existing=overwrite) + assert target_dir.exists() + assert target_dir.is_dir() def test_save_load(tmp_path): diff --git a/tests/test_model_loader.py b/tests/test_model_loader.py index ec46ad9..f734409 100644 --- a/tests/test_model_loader.py +++ b/tests/test_model_loader.py @@ -11,6 +11,7 @@ from lang_main.constants import ( STFRDeviceTypes, STFRModelTypes, ) +from lang_main.errors import LanguageModelNotFoundError from lang_main.types import LanguageModels @@ -62,10 +63,7 @@ def test_load_sentence_transformer( ], ) @pytest.mark.mload -def test_load_sentence_transformer_onnx( - model_name, - similarity_func, -) -> None: +def test_load_sentence_transformer_onnx(model_name, similarity_func) -> None: model = model_loader.load_sentence_transformer( model_name=model_name, similarity_func=similarity_func, @@ -86,15 +84,19 @@ def test_load_sentence_transformer_onnx( ], ) @pytest.mark.mload -def test_load_spacy_model( - model_name, -): +def test_load_spacy_model(model_name): model = model_loader.load_spacy( model_name=model_name, ) assert isinstance(model, Language) +def test_load_spacy_model_fail(): + model_name = 'not_existing' + with pytest.raises(LanguageModelNotFoundError): + model = model_loader.load_spacy(model_name) + + @pytest.mark.mload def test_instantiate_spacy_model(): model = model_loader.instantiate_model( diff --git a/tests/test_search.py b/tests/test_search.py new file mode 100644 index 0000000..1c127ac --- /dev/null +++ b/tests/test_search.py @@ -0,0 +1,64 @@ +from pathlib import Path + +import pytest + +from lang_main import search + +FILE_SEARCH = 'test.txt' + + +@pytest.fixture(scope='module') +def base_folder(tmp_path_factory) -> Path: + folder_structure = 'path/to/base/folder/' + pth = tmp_path_factory.mktemp('search') + pth = pth / folder_structure + pth.mkdir(parents=True, exist_ok=True) + + return pth + + +@pytest.fixture(scope='module') +def target_file_pth(base_folder) -> Path: + # place in folder 'path' of TMP path + target_folder = base_folder.parents[2] + target_file = target_folder / FILE_SEARCH + with open(target_file, 'w') as file: + file.write('TEST') + + return target_file + + +def test_search_base_path(base_folder): + stop_folder = '123' # should not exist + found = search.search_base_path(base_folder, stop_folder_name=stop_folder) + assert found is None + stop_folder = 'to' + found = search.search_base_path(base_folder, stop_folder_name=stop_folder) + assert found is not None + assert found.name == 'path' + + +@pytest.mark.parametrize('stop_folder_name', ['to', 'base', None]) +def test_search_iterative(base_folder, target_file_pth, stop_folder_name): + # target in parent of 'to': 'path' + ret = search.search_iterative(base_folder, FILE_SEARCH, stop_folder_name) + if stop_folder_name == 'to' or stop_folder_name is None: + assert ret is not None + assert ret.name == FILE_SEARCH + assert ret == target_file_pth + elif stop_folder_name == 'base': + assert ret is None + + +def test_search_cwd(monkeypatch, base_folder, target_file_pth): + monkeypatch.setattr(Path, 'cwd', lambda: base_folder) + assert Path.cwd() == base_folder + ret = search.search_cwd(FILE_SEARCH) + assert ret is None + + target_folder = target_file_pth.parent + monkeypatch.setattr(Path, 'cwd', lambda: target_folder) + assert Path.cwd() == target_folder + ret = search.search_cwd(FILE_SEARCH) + assert ret is not None + assert ret == target_file_pth diff --git a/tests/work_dir/Pipe-test_Step-1_valid_action.pkl b/tests/work_dir/Pipe-test_Step-1_valid_action.pkl new file mode 100644 index 0000000..9de24f9 Binary files /dev/null and b/tests/work_dir/Pipe-test_Step-1_valid_action.pkl differ diff --git a/tests/work_dir/Pipe-test_Step-2_valid_action_2.pkl b/tests/work_dir/Pipe-test_Step-2_valid_action_2.pkl new file mode 100644 index 0000000..900f352 Binary files /dev/null and b/tests/work_dir/Pipe-test_Step-2_valid_action_2.pkl differ diff --git a/tests/work_dir/non_tuple.pkl b/tests/work_dir/non_tuple.pkl new file mode 100644 index 0000000..c70f21b Binary files /dev/null and b/tests/work_dir/non_tuple.pkl differ