From a9c6e4a260445296f38198717d6317ad7a33bfa2 Mon Sep 17 00:00:00 2001 From: foefl Date: Fri, 27 Feb 2026 16:19:52 +0100 Subject: [PATCH] prepare multiprocessing --- pdm.lock | 4 +-- pyproject.toml | 2 +- src/KSG_anomaly_detection/_profile.py | 2 +- src/KSG_anomaly_detection/config_for_test.py | 3 +- src/KSG_anomaly_detection/delegator.py | 36 ++++++++++++++++++++ src/KSG_anomaly_detection/preparation.py | 11 +++--- 6 files changed, 49 insertions(+), 9 deletions(-) create mode 100644 src/KSG_anomaly_detection/delegator.py diff --git a/pdm.lock b/pdm.lock index 03b8e85..48a8942 100644 --- a/pdm.lock +++ b/pdm.lock @@ -5,7 +5,7 @@ groups = ["default", "dev", "lint", "nb", "open-cv", "tests"] strategy = ["inherit_metadata"] lock_version = "4.5.0" -content_hash = "sha256:dd33ca3d0a561a8f2634539cc333c0456a89a60141b902cb78408bf546abbc85" +content_hash = "sha256:3c47abf04bea7dfd195f350d89b59416b8492aaaf54257badfb8b4814b20e996" [[metadata.targets]] requires_python = ">=3.11,<3.15" @@ -1862,7 +1862,7 @@ name = "psutil" version = "7.2.2" requires_python = ">=3.6" summary = "Cross-platform lib for process and system monitoring." -groups = ["nb"] +groups = ["default", "nb"] files = [ {file = "psutil-7.2.2-cp313-cp313t-macosx_10_13_x86_64.whl", hash = "sha256:2edccc433cbfa046b980b0df0171cd25bcaeb3a68fe9022db0979e7aa74a826b"}, {file = "psutil-7.2.2-cp313-cp313t-macosx_11_0_arm64.whl", hash = "sha256:e78c8603dcd9a04c7364f1a3e670cea95d51ee865e4efb3556a3a63adef958ea"}, diff --git a/pyproject.toml b/pyproject.toml index c8794d5..170fbd1 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -6,7 +6,7 @@ authors = [ {name = "Susanne Franke", email = "s.franke@d-opt.de"}, {name = "Florian Förster", email = "f.foerster@d-opt.com"}, ] -dependencies = ["PySide6>=6.10.2", "numpy>=2.4.2", "pillow>=12.1.1", "pyvips[binary]>=3.1.1"] +dependencies = ["PySide6>=6.10.2", "numpy>=2.4.2", "pillow>=12.1.1", "pyvips[binary]>=3.1.1", "psutil>=7.2.2"] requires-python = "<3.15,>=3.11" readme = "README.md" license = {text = "LicenseRef-Proprietary"} diff --git a/src/KSG_anomaly_detection/_profile.py b/src/KSG_anomaly_detection/_profile.py index 9a80999..6253f55 100644 --- a/src/KSG_anomaly_detection/_profile.py +++ b/src/KSG_anomaly_detection/_profile.py @@ -6,7 +6,7 @@ from KSG_anomaly_detection.monitor import monitor_folder_simple profiler = cProfile.Profile() -PROFILE = True +PROFILE = False USE_NEW_IMPL = True ONLY_PREPARE = False diff --git a/src/KSG_anomaly_detection/config_for_test.py b/src/KSG_anomaly_detection/config_for_test.py index 5b591d3..cacc114 100644 --- a/src/KSG_anomaly_detection/config_for_test.py +++ b/src/KSG_anomaly_detection/config_for_test.py @@ -5,5 +5,6 @@ PATH = r"B:\projects\KSG\Ordnerstruktur" # Pfad zu den einzelnen Päckchen, die untersucht werden sollen FOLDER_LIST = [ - r"B:\projects\KSG\Ordnerstruktur\Verifizierdaten_1\20260225\614706_helles Entek\614706_helles Entek[3136761]_1" + r"B:\projects\KSG\Ordnerstruktur\Verifizierdaten_1\20260225\614706_helles Entek\614706_helles Entek[3136761]_1", + r"B:\projects\KSG\Ordnerstruktur\Verifizierdaten_1\20260225\614706_helles Entek\614706_helles Entek[3136761]_2", ] diff --git a/src/KSG_anomaly_detection/delegator.py b/src/KSG_anomaly_detection/delegator.py new file mode 100644 index 0000000..4f913f1 --- /dev/null +++ b/src/KSG_anomaly_detection/delegator.py @@ -0,0 +1,36 @@ +from __future__ import annotations + +import multiprocessing as mp +from collections.abc import Iterable, Sequence +from typing import Any, TypeVar + +import psutil + +T = TypeVar("T") + + +class MPPool: + def __init__(self) -> None: + self.num_workers = psutil.cpu_count(logical=False) or 4 + self.pool = mp.Pool(processes=self.num_workers) + + def chunk_data( + self, + data: list[T], + chunk_size: int | None = None, + ) -> Sequence[Sequence[T]]: + if chunk_size is None: + chunk_size = max(1, len(data) // self.num_workers) + chunks = [data[i : i + chunk_size] for i in range(0, len(data), chunk_size)] + + if len(chunks) > self.num_workers: + open_chunk = chunks[-1] + for idx, entry in enumerate(open_chunk): + chunks[idx].append(entry) + del chunks[-1] + + return chunks + + def stop(self) -> None: + self.pool.close() + self.pool.join() diff --git a/src/KSG_anomaly_detection/preparation.py b/src/KSG_anomaly_detection/preparation.py index 58947ce..4f2b35a 100644 --- a/src/KSG_anomaly_detection/preparation.py +++ b/src/KSG_anomaly_detection/preparation.py @@ -15,6 +15,7 @@ from KSG_anomaly_detection import config Image.MAX_IMAGE_PIXELS = None COLOUR_ASSIGNMENT = {"R": [255, 0, 0], "G": [0, 255, 0], "B": [0, 0, 0]} +RE_CHANNEL_MAPPING = re.compile(r"R_NG(\d+)_(\d+)\.jpg$") class Preparation: @@ -305,7 +306,6 @@ class Preparation: def create_rgb_images_and_patches_new(self): # in the folders of interest, we iterate over all images and search for the three that belong together # (because in advance we do not know how many there are) - pattern = re.compile(r"R_NG(\d+)_(\d+)\.jpg$") # create folder name in our temp folder "Backup" and store it # therefore, first extract the name of the current folder from the whole path @@ -328,12 +328,16 @@ class Preparation: # ?? Hier gewinnen wir wieder alle Verzeichnisse oberhalb der Paketebene, d.h. # ?? unabhängig vom Päckchen # Annahme: Wir wollen tatsächlich nur auf Päckchenebene arbeiten + # Sollten i.d.R. vier Ordner sein, 2 Kameras je 1x Vorder-/Rückseite checkimg_folders = tuple( p for p in self.original_data_path.rglob("checkimg") if p.is_dir() ) # print(f">>> {checkimg_folders=}") - # sys.exit(0) + images = tuple(self.original_data_path.rglob("checkimg/R_NG*_*.jpg")) + print(f">>> {len(images)=}") + + sys.exit(0) # iterate through all 'checkimg' folders recursively for checkimg_folder in checkimg_folders: @@ -355,10 +359,9 @@ class Preparation: for file_path in checkimg_folder.glob("R_NG*_*.jpg"): # find match according to pattern defined at the very beginning - match = pattern.match(file_path.name) + match = RE_CHANNEL_MAPPING.match(file_path.name) if not match: continue - num1, num2 = match.groups() # find all three images belonging together