From 4072b970126f510f5c1184fe95bfd658728419c5 Mon Sep 17 00:00:00 2001 From: foefl Date: Thu, 17 Apr 2025 11:55:01 +0200 Subject: [PATCH] add env setup for runtime to enable multiprocessing parameter search by joblib, closes #23 --- pdm.lock | 4 +- pyproject.toml | 6 +-- src/delta_barth/__init__.py | 25 ++++++----- src/delta_barth/_env.py | 33 +++++++++++++++ src/delta_barth/analysis/forecast.py | 3 +- tests/analysis/test_forecast.py | 1 + tests/test_env.py | 49 ++++++++++++++++++++++ tests/test_pipelines.py | 1 + tests/test_session.py | 62 +++++++++++++++++++++++++++- 9 files changed, 163 insertions(+), 21 deletions(-) create mode 100644 src/delta_barth/_env.py create mode 100644 tests/test_env.py diff --git a/pdm.lock b/pdm.lock index eb2b1d9..06b8431 100644 --- a/pdm.lock +++ b/pdm.lock @@ -5,7 +5,7 @@ groups = ["default", "dev", "lint", "nb", "tests"] strategy = ["inherit_metadata"] lock_version = "4.5.0" -content_hash = "sha256:545c39ef89d18d28a7bca4b08c93e6fb900c42612089300b867a4e0955acd6ab" +content_hash = "sha256:c3fd178d5c4736852fff59e2e4c5e3565b0fb80bf29ec5979e1e9c78d452ee1f" [[metadata.targets]] requires_python = ">=3.11" @@ -1623,7 +1623,7 @@ name = "psutil" version = "7.0.0" requires_python = ">=3.6" summary = "Cross-platform lib for process and system monitoring in Python. NOTE: the syntax of this script MUST be kept compatible with Python 2.7." -groups = ["nb"] +groups = ["default", "nb"] files = [ {file = "psutil-7.0.0-cp36-abi3-macosx_10_9_x86_64.whl", hash = "sha256:101d71dc322e3cffd7cea0650b09b3d08b8e7c4109dd6809fe452dfd00e58b25"}, {file = "psutil-7.0.0-cp36-abi3-macosx_11_0_arm64.whl", hash = "sha256:39db632f6bb862eeccf56660871433e111b6ea58f2caea825571951d4b6aa3da"}, diff --git a/pyproject.toml b/pyproject.toml index d84c2de..dcf5caf 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,11 +1,11 @@ [project] name = "delta-barth" -version = "0.5.8dev11" +version = "0.5.8" description = "workflows and pipelines for the Python-based Plugin of Delta Barth's ERP system" authors = [ {name = "Florian Förster", email = "f.foerster@d-opt.com"}, ] -dependencies = ["scikit-learn>=1.6.1", "pandas>=2.2.3", "xgboost>=2.1.4", "joblib>=1.4.2", "typing-extensions>=4.12.2", "requests>=2.32.3", "pydantic>=2.10.6", "dopt-basics>=0.1.3", "SQLAlchemy>=2.0.39"] +dependencies = ["scikit-learn>=1.6.1", "pandas>=2.2.3", "xgboost>=2.1.4", "joblib>=1.4.2", "typing-extensions>=4.12.2", "requests>=2.32.3", "pydantic>=2.10.6", "dopt-basics>=0.1.3", "SQLAlchemy>=2.0.39", "psutil>=7.0.0"] requires-python = ">=3.11" readme = "README.md" license = {text = "LicenseRef-Proprietary"} @@ -74,7 +74,7 @@ directory = "reports/coverage" [tool.bumpversion] -current_version = "0.5.8dev11" +current_version = "0.5.8" parse = """(?x) (?P0|[1-9]\\d*)\\. (?P0|[1-9]\\d*)\\. diff --git a/src/delta_barth/__init__.py b/src/delta_barth/__init__.py index 570282f..dc5e795 100644 --- a/src/delta_barth/__init__.py +++ b/src/delta_barth/__init__.py @@ -1,23 +1,22 @@ -import multiprocessing -import os -import sys -from pathlib import Path +# import multiprocessing +# import os +# import sys +# from pathlib import Path # os.environ["JOBLIB_DISABLE_SHARED_MEMORY"] = "1" -interpreter = r"A:\Arbeitsaufgaben\Delta-Barth\cs-wrapper\dopt.DeltaBarth\python\python.exe" -intp_pth = Path(interpreter).resolve() +# interpreter = r"A:\Arbeitsaufgaben\Delta-Barth\cs-wrapper\dopt.DeltaBarth\python\python.exe" +# intp_pth = Path(interpreter).resolve() -assert intp_pth.exists(), f"interpreter path seems not to exist: {intp_pth}" +# assert intp_pth.exists(), f"interpreter path seems not to exist: {intp_pth}" # multiprocessing.set_executable(str(intp_pth)) # setattr(sys, "frozen", True) # !! causes termination # sys.executable = str(intp_pth) -setattr(sys, "executable", str(intp_pth)) -setattr(sys, "_base_executable", str(intp_pth)) +# setattr(sys, "executable", str(intp_pth)) +# setattr(sys, "_base_executable", str(intp_pth)) # multiprocessing.set_start_method("spawn", force=True) - -target = Path(r"A:\Arbeitsaufgaben\Delta-Barth\cs-wrapper\dopt.DeltaBarth") -file = target / "executed.txt" -file.touch() +# target = Path(r"A:\Arbeitsaufgaben\Delta-Barth\cs-wrapper\dopt.DeltaBarth") +# file = target / "executed.txt" +# file.touch() diff --git a/src/delta_barth/_env.py b/src/delta_barth/_env.py new file mode 100644 index 0000000..8aae0e2 --- /dev/null +++ b/src/delta_barth/_env.py @@ -0,0 +1,33 @@ +from __future__ import annotations + +import sys +from pathlib import Path +from typing import Final + +from dopt_basics import io + +PY_RUNTIME_FOLDER: Final[str] = "python" + + +def prepare_env( + lib_path: Path, +) -> Path | None: + pyrt_folder = io.search_folder_path( + starting_path=lib_path, + stop_folder_name=PY_RUNTIME_FOLDER, + return_inclusive=True, + ) + if pyrt_folder is None: + return None + + pth_interpreter = pyrt_folder / "python.exe" + if not pth_interpreter.exists(): + raise FileNotFoundError( + f"dopt-delta-barth seems to be deployed in a standalone runtime, " + f"but the interpreter was not found under: {pth_interpreter}" + ) + + setattr(sys, "executable", str(pth_interpreter)) + setattr(sys, "_base_executable", str(pth_interpreter)) + + return pyrt_folder diff --git a/src/delta_barth/analysis/forecast.py b/src/delta_barth/analysis/forecast.py index 9e42dcd..0415ad7 100644 --- a/src/delta_barth/analysis/forecast.py +++ b/src/delta_barth/analysis/forecast.py @@ -38,6 +38,7 @@ from delta_barth.constants import ( DEFAULT_DB_ERR_CODE, DUMMY_DATA_PATH, FEATURES_SALES_PROGNOSIS, + MAX_NUM_WORKERS, SALES_MIN_NUM_DATAPOINTS, ) from delta_barth.errors import STATUS_HANDLER, wrap_result @@ -302,7 +303,7 @@ def _process_sales( params, scoring="neg_mean_absolute_error", cv=kfold, - n_jobs=-1, + n_jobs=MAX_NUM_WORKERS, n_iter=100, verbose=0, ) diff --git a/tests/analysis/test_forecast.py b/tests/analysis/test_forecast.py index 54b11fb..1488b01 100644 --- a/tests/analysis/test_forecast.py +++ b/tests/analysis/test_forecast.py @@ -430,6 +430,7 @@ def test_export_on_fail(): assert res.status.description == status.description +@patch("delta_barth.session.CFG_HOT_RELOAD", False) def test_pipeline_sales_forecast_SuccessDbWrite(exmpl_api_sales_prognosis_resp, session): with ( patch( diff --git a/tests/test_env.py b/tests/test_env.py new file mode 100644 index 0000000..b10fa20 --- /dev/null +++ b/tests/test_env.py @@ -0,0 +1,49 @@ +import importlib +import sys +from unittest.mock import patch + +import pytest + +import delta_barth.constants +from delta_barth import _env + + +@patch("delta_barth._env.PY_RUNTIME_FOLDER", "test123456") +def test_prepare_env_NoRuntimeFolder(tmp_path): + ret = _env.prepare_env(tmp_path) + assert ret is None + + +@patch("delta_barth._env.PY_RUNTIME_FOLDER", "base") +def test_prepare_env_FailNoInterpreter(tmp_path_factory): + mocked_lib_pth = tmp_path_factory.mktemp("path") / "to/base/folder/lib/" + mocked_lib_pth.mkdir(parents=True, exist_ok=True) + with pytest.raises(FileNotFoundError): + _ = _env.prepare_env(mocked_lib_pth) + + +@patch("delta_barth._env.PY_RUNTIME_FOLDER", "base") +def test_prepare_env_Success(tmp_path_factory): + mocked_lib_pth = tmp_path_factory.mktemp("path") / "to/base/folder/lib/" + mocked_lib_pth.mkdir(parents=True, exist_ok=True) + rt_path = mocked_lib_pth.parents[1] + mocked_interpreter = rt_path / "python.exe" + mocked_interpreter.touch() + assert mocked_interpreter.exists() + ret = _env.prepare_env(mocked_lib_pth) + assert ret == rt_path + # sys attributes + executable = getattr(sys, "executable") + assert executable == str(mocked_interpreter) + base_executable = getattr(sys, "_base_executable") + assert base_executable == str(mocked_interpreter) + + class MockPath: + def __init__(self, *args, **kwargs): + self.parent = mocked_lib_pth + + with patch("pathlib.Path", MockPath): + (mocked_lib_pth / "_dummy_data").mkdir(exist_ok=True) + importlib.reload(delta_barth.constants) + assert delta_barth.constants.DEPLOYMENT_STATUS + assert delta_barth.constants.RUNTIME_PATH == rt_path diff --git a/tests/test_pipelines.py b/tests/test_pipelines.py index 0751b5d..fb62f39 100644 --- a/tests/test_pipelines.py +++ b/tests/test_pipelines.py @@ -45,6 +45,7 @@ def test_write_performance_metrics_FailStartingTime(session): ) +@patch("delta_barth.session.CFG_HOT_RELOAD", False) def test_sales_prognosis_pipeline(exmpl_api_sales_prognosis_resp, session, monkeypatch): with ( patch( diff --git a/tests/test_session.py b/tests/test_session.py index 0199353..0b27402 100644 --- a/tests/test_session.py +++ b/tests/test_session.py @@ -1,15 +1,18 @@ +import tomllib from pathlib import Path from unittest.mock import patch import pytest +import tomli_w +import delta_barth.config import delta_barth.session from delta_barth import logging from delta_barth.constants import ( DEFAULT_API_ERR_CODE, HTTP_BASE_CONTENT_HEADERS, - LOG_FILENAME, ) +from delta_barth.logging import LOG_FILENAME def test_validate_path_Success(): @@ -62,7 +65,7 @@ def test_session_setup_db_management(tmp_path): assert db_path.exists() -def test_session_setup_config(tmp_path, pth_cfg): +def test_session_setup_config(tmp_path): str_path = str(tmp_path) foldername: str = "cfg_test" target_cfg_dir = tmp_path / foldername @@ -80,6 +83,61 @@ def test_session_setup_config(tmp_path, pth_cfg): assert session.cfg.forecast.threshold_month_data_points == 28 +@patch("delta_barth.session.CFG_HOT_RELOAD", False) +def test_session_reload_config_NoHotReload(tmp_path): + str_path = str(tmp_path) + foldername: str = "cfg_test" + target_cfg_dir = tmp_path / foldername + session = delta_barth.session.Session(HTTP_BASE_CONTENT_HEADERS, cfg_folder=foldername) + session.set_data_path(str_path) + cfg_path = session.cfg_path + assert cfg_path.parent.exists() + assert cfg_path.parent == target_cfg_dir + assert not cfg_path.exists() + session.setup() + assert cfg_path.exists() + parsed_cfg = session.cfg + assert isinstance(parsed_cfg, delta_barth.config.Config) + # modify config and reload + with open(cfg_path, "rb") as file: + cfg_data = tomllib.load(file) + cfg_data["forecast"]["threshold_month_data_points"] = 30 + with open(cfg_path, "wb") as file: + tomli_w.dump(cfg_data, file) + + assert session.cfg.forecast.threshold_month_data_points == 28 + + session.reload_cfg() + reload_cfg = session.cfg + assert isinstance(reload_cfg, delta_barth.config.Config) + assert reload_cfg.forecast.threshold_month_data_points == 30 + + +@patch("delta_barth.session.CFG_HOT_RELOAD", True) +def test_session_reload_config_HotReload(tmp_path): + str_path = str(tmp_path) + foldername: str = "cfg_test" + target_cfg_dir = tmp_path / foldername + session = delta_barth.session.Session(HTTP_BASE_CONTENT_HEADERS, cfg_folder=foldername) + session.set_data_path(str_path) + cfg_path = session.cfg_path + assert cfg_path.parent.exists() + assert cfg_path.parent == target_cfg_dir + assert not cfg_path.exists() + session.setup() + assert cfg_path.exists() + parsed_cfg = session.cfg + assert isinstance(parsed_cfg, delta_barth.config.Config) + # modify config and reload + with open(cfg_path, "rb") as file: + cfg_data = tomllib.load(file) + cfg_data["forecast"]["threshold_month_data_points"] = 30 + with open(cfg_path, "wb") as file: + tomli_w.dump(cfg_data, file) + + assert session.cfg.forecast.threshold_month_data_points == 30 + + @patch("delta_barth.logging.ENABLE_LOGGING", True) @patch("delta_barth.logging.LOGGING_TO_FILE", True) @patch("delta_barth.logging.LOGGING_TO_STDERR", True)