prepare multiprocessing

This commit is contained in:
Florian Förster 2026-02-27 16:19:52 +01:00
parent b505bd7076
commit a9c6e4a260
6 changed files with 49 additions and 9 deletions

4
pdm.lock generated
View File

@ -5,7 +5,7 @@
groups = ["default", "dev", "lint", "nb", "open-cv", "tests"]
strategy = ["inherit_metadata"]
lock_version = "4.5.0"
content_hash = "sha256:dd33ca3d0a561a8f2634539cc333c0456a89a60141b902cb78408bf546abbc85"
content_hash = "sha256:3c47abf04bea7dfd195f350d89b59416b8492aaaf54257badfb8b4814b20e996"
[[metadata.targets]]
requires_python = ">=3.11,<3.15"
@ -1862,7 +1862,7 @@ name = "psutil"
version = "7.2.2"
requires_python = ">=3.6"
summary = "Cross-platform lib for process and system monitoring."
groups = ["nb"]
groups = ["default", "nb"]
files = [
{file = "psutil-7.2.2-cp313-cp313t-macosx_10_13_x86_64.whl", hash = "sha256:2edccc433cbfa046b980b0df0171cd25bcaeb3a68fe9022db0979e7aa74a826b"},
{file = "psutil-7.2.2-cp313-cp313t-macosx_11_0_arm64.whl", hash = "sha256:e78c8603dcd9a04c7364f1a3e670cea95d51ee865e4efb3556a3a63adef958ea"},

View File

@ -6,7 +6,7 @@ authors = [
{name = "Susanne Franke", email = "s.franke@d-opt.de"},
{name = "Florian Förster", email = "f.foerster@d-opt.com"},
]
dependencies = ["PySide6>=6.10.2", "numpy>=2.4.2", "pillow>=12.1.1", "pyvips[binary]>=3.1.1"]
dependencies = ["PySide6>=6.10.2", "numpy>=2.4.2", "pillow>=12.1.1", "pyvips[binary]>=3.1.1", "psutil>=7.2.2"]
requires-python = "<3.15,>=3.11"
readme = "README.md"
license = {text = "LicenseRef-Proprietary"}

View File

@ -6,7 +6,7 @@ from KSG_anomaly_detection.monitor import monitor_folder_simple
profiler = cProfile.Profile()
PROFILE = True
PROFILE = False
USE_NEW_IMPL = True
ONLY_PREPARE = False

View File

@ -5,5 +5,6 @@ PATH = r"B:\projects\KSG\Ordnerstruktur"
# Pfad zu den einzelnen Päckchen, die untersucht werden sollen
FOLDER_LIST = [
r"B:\projects\KSG\Ordnerstruktur\Verifizierdaten_1\20260225\614706_helles Entek\614706_helles Entek[3136761]_1"
r"B:\projects\KSG\Ordnerstruktur\Verifizierdaten_1\20260225\614706_helles Entek\614706_helles Entek[3136761]_1",
r"B:\projects\KSG\Ordnerstruktur\Verifizierdaten_1\20260225\614706_helles Entek\614706_helles Entek[3136761]_2",
]

View File

@ -0,0 +1,36 @@
from __future__ import annotations
import multiprocessing as mp
from collections.abc import Iterable, Sequence
from typing import Any, TypeVar
import psutil
T = TypeVar("T")
class MPPool:
def __init__(self) -> None:
self.num_workers = psutil.cpu_count(logical=False) or 4
self.pool = mp.Pool(processes=self.num_workers)
def chunk_data(
self,
data: list[T],
chunk_size: int | None = None,
) -> Sequence[Sequence[T]]:
if chunk_size is None:
chunk_size = max(1, len(data) // self.num_workers)
chunks = [data[i : i + chunk_size] for i in range(0, len(data), chunk_size)]
if len(chunks) > self.num_workers:
open_chunk = chunks[-1]
for idx, entry in enumerate(open_chunk):
chunks[idx].append(entry)
del chunks[-1]
return chunks
def stop(self) -> None:
self.pool.close()
self.pool.join()

View File

@ -15,6 +15,7 @@ from KSG_anomaly_detection import config
Image.MAX_IMAGE_PIXELS = None
COLOUR_ASSIGNMENT = {"R": [255, 0, 0], "G": [0, 255, 0], "B": [0, 0, 0]}
RE_CHANNEL_MAPPING = re.compile(r"R_NG(\d+)_(\d+)\.jpg$")
class Preparation:
@ -305,7 +306,6 @@ class Preparation:
def create_rgb_images_and_patches_new(self):
# in the folders of interest, we iterate over all images and search for the three that belong together
# (because in advance we do not know how many there are)
pattern = re.compile(r"R_NG(\d+)_(\d+)\.jpg$")
# create folder name in our temp folder "Backup" and store it
# therefore, first extract the name of the current folder from the whole path
@ -328,12 +328,16 @@ class Preparation:
# ?? Hier gewinnen wir wieder alle Verzeichnisse oberhalb der Paketebene, d.h.
# ?? unabhängig vom Päckchen
# Annahme: Wir wollen tatsächlich nur auf Päckchenebene arbeiten
# Sollten i.d.R. vier Ordner sein, 2 Kameras je 1x Vorder-/Rückseite
checkimg_folders = tuple(
p for p in self.original_data_path.rglob("checkimg") if p.is_dir()
)
# print(f">>> {checkimg_folders=}")
# sys.exit(0)
images = tuple(self.original_data_path.rglob("checkimg/R_NG*_*.jpg"))
print(f">>> {len(images)=}")
sys.exit(0)
# iterate through all 'checkimg' folders recursively
for checkimg_folder in checkimg_folders:
@ -355,10 +359,9 @@ class Preparation:
for file_path in checkimg_folder.glob("R_NG*_*.jpg"):
# find match according to pattern defined at the very beginning
match = pattern.match(file_path.name)
match = RE_CHANNEL_MAPPING.match(file_path.name)
if not match:
continue
num1, num2 = match.groups()
# find all three images belonging together