From 9743d41dfd9ffd1862b60df2fe1707d7dd1fd8e0 Mon Sep 17 00:00:00 2001 From: foefl Date: Wed, 3 Jun 2026 14:42:27 +0200 Subject: [PATCH 01/48] add deps --- pdm.lock | 217 ++++++++++++++++++++++++++++++++++++++++++++++++- pyproject.toml | 2 +- 2 files changed, 216 insertions(+), 3 deletions(-) diff --git a/pdm.lock b/pdm.lock index ecca6c1..f15bd02 100644 --- a/pdm.lock +++ b/pdm.lock @@ -5,7 +5,7 @@ groups = ["default", "dev", "lint", "nb", "tests"] strategy = ["inherit_metadata"] lock_version = "4.5.0" -content_hash = "sha256:3a107981dc4305f031f87c89e3a57a6bb823954d397a52d074fef1c72ac639d0" +content_hash = "sha256:33241b6273d0130d424b01e4bde45eba106100f2a47b12e3c053ba1ecd1557ae" [[metadata.targets]] requires_python = ">=3.11" @@ -820,6 +820,85 @@ files = [ {file = "fqdn-1.5.1.tar.gz", hash = "sha256:105ed3677e767fb5ca086a0c1f4bb66ebc3c100be518f0e0d755d9eae164d89f"}, ] +[[package]] +name = "greenlet" +version = "3.5.1" +requires_python = ">=3.10" +summary = "Lightweight in-process concurrent programming" +groups = ["default"] +files = [ + {file = "greenlet-3.5.1-cp311-cp311-macosx_11_0_universal2.whl", hash = "sha256:73f78f9b9f0a5c06e5c946ba1e8e36f5114923b6be109ee618c54f079c3ea14f"}, + {file = "greenlet-3.5.1-cp311-cp311-manylinux_2_24_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:a0cbed8bb44e23c5b199f888f4e4ce096b45ad9f25ff74a7ad0213875e936bb2"}, + {file = "greenlet-3.5.1-cp311-cp311-manylinux_2_24_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:a203a8bd0acb0701653d3bbb26e404854a68674139ed5cbb778830f42b09bb33"}, + {file = "greenlet-3.5.1-cp311-cp311-manylinux_2_24_s390x.manylinux_2_28_s390x.whl", hash = "sha256:6ebeb75c81211f5c702576cf81f315e77e23cfdb2c7c6fcb9dd143e6de35c360"}, + {file = "greenlet-3.5.1-cp311-cp311-manylinux_2_24_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:8a271fcd66c74615cda6a964fda3f304267a12e50a084472218a39bb0376f563"}, + {file = "greenlet-3.5.1-cp311-cp311-manylinux_2_39_riscv64.whl", hash = "sha256:017a544f0385d441e88714160d089d6900ef46c9eff9d99b6715a5ef2d127747"}, + {file = "greenlet-3.5.1-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:ded7b068c7c31c1a8657d4fd42d886b3e051ae29f88b80c5ff9d502257b0f071"}, + {file = "greenlet-3.5.1-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:d0932b81d72f552ded9d810d00021b64d89f2195a91ce115b893f943b7a4ab3c"}, + {file = "greenlet-3.5.1-cp311-cp311-win_amd64.whl", hash = "sha256:88e300d136eac057b2397aa1cfd7328b4c87c7eb66a09c7bc6a1292234db474e"}, + {file = "greenlet-3.5.1-cp311-cp311-win_arm64.whl", hash = "sha256:cc6ab7e555c8a112ad3a76e368e86e12a2754bcae1652a5602e133ec7b635523"}, + {file = "greenlet-3.5.1-cp312-cp312-macosx_11_0_universal2.whl", hash = "sha256:fa4f98af3a528f0c3fd592a26df7f376f93329c8f4d987f6bb979057af8bf5e2"}, + {file = "greenlet-3.5.1-cp312-cp312-manylinux_2_24_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:ffea73584b216150eab159b6d12348fb253e68757974de1e2c40d8a318ac89ed"}, + {file = "greenlet-3.5.1-cp312-cp312-manylinux_2_24_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:1072b4f9edcc1e192d9283a66a3e68d6b84c561de33a83d7858beb9ba1effe10"}, + {file = "greenlet-3.5.1-cp312-cp312-manylinux_2_24_s390x.manylinux_2_28_s390x.whl", hash = "sha256:89101bfd5011e069be974903cb3a4e4523845e4ece2d62dcd8d358933c0ef249"}, + {file = "greenlet-3.5.1-cp312-cp312-manylinux_2_24_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:add5217d68b31130f0beca584d7fef4878327d2e31642b66618a14eef312b63b"}, + {file = "greenlet-3.5.1-cp312-cp312-manylinux_2_39_riscv64.whl", hash = "sha256:e6cd99ea59dd5d89f0c956606571d79bfe6f68c9eb7f4a4083a41a7f1587edee"}, + {file = "greenlet-3.5.1-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:a5ea42a752d47a145eae922b605cd1634665ac3d5ec1e72402d5048e8d60d207"}, + {file = "greenlet-3.5.1-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:c5551170cf4f5ff5623e9af81323751979fee2c731e2287b61f73cd27257b823"}, + {file = "greenlet-3.5.1-cp312-cp312-win_amd64.whl", hash = "sha256:3c8bb982ad117d29478ef8f5533e97df21f1e2befd17a299257b0c96d1371c0b"}, + {file = "greenlet-3.5.1-cp312-cp312-win_arm64.whl", hash = "sha256:80eb4b04dadc4e67df3fae179a32c4706a3f495bc7f22fc8a81115d5f5512188"}, + {file = "greenlet-3.5.1-cp313-cp313-macosx_11_0_universal2.whl", hash = "sha256:51518ff74664078fc51bffcc6fc529b0df5ae58da192691cee765d45ce944a2b"}, + {file = "greenlet-3.5.1-cp313-cp313-manylinux_2_24_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:1ffdb3c0bb002c99cd8f298957e046c3dbf6006b5b7cdf11a4e19194624a0a0a"}, + {file = "greenlet-3.5.1-cp313-cp313-manylinux_2_24_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:7715a5a2c3378ba602c3a440558261e13a820bb53a82693aacd7b7f6d964e283"}, + {file = "greenlet-3.5.1-cp313-cp313-manylinux_2_24_s390x.manylinux_2_28_s390x.whl", hash = "sha256:d40a890035c0058cadbdc4af7569800fd28a0e527a0fdbb7b5f9418f176846ce"}, + {file = "greenlet-3.5.1-cp313-cp313-manylinux_2_24_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:dc71ff466927a201b08305acac451ebe1aedfcea002f62f1f2f2ac2ac1e6a135"}, + {file = "greenlet-3.5.1-cp313-cp313-manylinux_2_39_riscv64.whl", hash = "sha256:67821bb03e4e98664490edb787ff6af501194c29bbee0f5c1dfdcf1dc3d9d436"}, + {file = "greenlet-3.5.1-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:cd443683db272ebaaca03af98c0b063ab30db70ea8a31a1559f35e3f7b744ccd"}, + {file = "greenlet-3.5.1-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:089fff7a6ce8d9316d1f65ebc00273a56be258c1725b32b94de90a3a979557e1"}, + {file = "greenlet-3.5.1-cp313-cp313-win_amd64.whl", hash = "sha256:110a1ca7b49b014b097f6078272c3f4ed31af45b254de5228b79adba879f6af9"}, + {file = "greenlet-3.5.1-cp313-cp313-win_arm64.whl", hash = "sha256:f16ba1efc0715b680a18b8123d90dad887c6112ae3555b4b5c32c149540c6b4e"}, + {file = "greenlet-3.5.1-cp314-cp314-macosx_11_0_universal2.whl", hash = "sha256:d8ab31c9de8651a2facdd5c5bb0011f2380dd1a7af78ce2adf4b56095294fc07"}, + {file = "greenlet-3.5.1-cp314-cp314-manylinux_2_24_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:5e300185139abc337ade480c327183adf42a875ac7181bfe66d7d4efea31fbea"}, + {file = "greenlet-3.5.1-cp314-cp314-manylinux_2_24_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:7ffdb990dcaa0234cf9845aead5df2e3c3a8b6507d409274dd87e0d5ab05ffc2"}, + {file = "greenlet-3.5.1-cp314-cp314-manylinux_2_24_s390x.manylinux_2_28_s390x.whl", hash = "sha256:6c09df69dc1712d131332054a858a3e5cca400967fa3a672e2324fbb0971448c"}, + {file = "greenlet-3.5.1-cp314-cp314-manylinux_2_24_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:2f82b3597e9d83b63408affed0b48fd0f54935edac4302237b9a837be0dae33c"}, + {file = "greenlet-3.5.1-cp314-cp314-manylinux_2_39_riscv64.whl", hash = "sha256:a4764e0bfc6a4d114c865b32520805c16a990ef5f286a514413b05d5ecd6a23d"}, + {file = "greenlet-3.5.1-cp314-cp314-musllinux_1_2_aarch64.whl", hash = "sha256:c0141e37414c10164e702b8fb1473304221ad98f71600850c6ef7ff4880feba0"}, + {file = "greenlet-3.5.1-cp314-cp314-musllinux_1_2_x86_64.whl", hash = "sha256:50ae25a67bea74ea41fb14b960bc532df73eb713417b2d61892dced82fe8d3bc"}, + {file = "greenlet-3.5.1-cp314-cp314-win_amd64.whl", hash = "sha256:8a17c42330e261299766b75ac1ea32caa437a9453c8f65d16a13140db378ecd3"}, + {file = "greenlet-3.5.1-cp314-cp314-win_arm64.whl", hash = "sha256:7b5f5fae05b8ac6d176a61b60c394a8cbdc2b5b91b81793066e68745cf165e54"}, + {file = "greenlet-3.5.1-cp314-cp314t-macosx_11_0_universal2.whl", hash = "sha256:ea8da1e900d758d078810d4255d8c6aa572181896a31ec79d779eb79c3adc9ad"}, + {file = "greenlet-3.5.1-cp314-cp314t-manylinux_2_24_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:a19570c52a21420dcbc94e661994bc325c0b5b11304540fed514586da5dc8f2e"}, + {file = "greenlet-3.5.1-cp314-cp314t-manylinux_2_24_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:3d955c89b75eeca4723d7cc14135f393cd47c32e2a6cb4a8e4c6e760a26b0986"}, + {file = "greenlet-3.5.1-cp314-cp314t-manylinux_2_24_s390x.manylinux_2_28_s390x.whl", hash = "sha256:ea37d5a157eb9493820d3792ac4ece28619a394391d2b9f2f78057d396ff0f0f"}, + {file = "greenlet-3.5.1-cp314-cp314t-manylinux_2_24_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:de2daaaebd1a5aa88c49045b6baf9310b3263796bd88db713edf37cf53e7bb4e"}, + {file = "greenlet-3.5.1-cp314-cp314t-manylinux_2_39_riscv64.whl", hash = "sha256:8d8a23250ea3ec7b36de8fa4b541e9e2db3ee82915cc060ab0631609ad8b28de"}, + {file = "greenlet-3.5.1-cp314-cp314t-musllinux_1_2_aarch64.whl", hash = "sha256:3bfbd69cc349e43bf3a8ae1c85548ff0718efc887615c2db16c3833d7b0b072d"}, + {file = "greenlet-3.5.1-cp314-cp314t-musllinux_1_2_x86_64.whl", hash = "sha256:4378720dd888136c27215a0214d32a4d37c3852765d45bc37aad0623423cfd78"}, + {file = "greenlet-3.5.1-cp314-cp314t-win_amd64.whl", hash = "sha256:45718441607f9325d948db98cbc691276059316d0358c188c246da4e1d4d23d2"}, + {file = "greenlet-3.5.1-cp315-cp315-macosx_11_0_universal2.whl", hash = "sha256:2baee5ca02031757ffe8cc3d69f0cc0aec7065ce362622da74f32d3bcab1c541"}, + {file = "greenlet-3.5.1-cp315-cp315-manylinux_2_24_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:9b1ec3274918a81d3ea778b9e75b56b72b33f300edb6cf7f3a7fe1dae56683de"}, + {file = "greenlet-3.5.1-cp315-cp315-manylinux_2_24_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:111e2390ffffc47d5840b01711dd7fac07d4c09283d0283e7f3264b14e284c64"}, + {file = "greenlet-3.5.1-cp315-cp315-manylinux_2_24_s390x.manylinux_2_28_s390x.whl", hash = "sha256:10a9a1c0bfbc93d41156ffcb90c75fbc05544054faf15dcc1fdf9765f8b607f0"}, + {file = "greenlet-3.5.1-cp315-cp315-manylinux_2_24_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:e630136e905fe5ff43e86945ae41220b6d1470956a39220e708110ac48d01ea5"}, + {file = "greenlet-3.5.1-cp315-cp315-manylinux_2_39_riscv64.whl", hash = "sha256:ef08c1567c78074b22d1a200183d52d04a14df447bf70bcbb6a3507a48e776fc"}, + {file = "greenlet-3.5.1-cp315-cp315-musllinux_1_2_aarch64.whl", hash = "sha256:975eac34b44a7077ca4d421348455b94f0f518246a7f14bc6d2fdcfe5b584368"}, + {file = "greenlet-3.5.1-cp315-cp315-musllinux_1_2_x86_64.whl", hash = "sha256:9ab3c3a0b2ae6198e67c898dad5215a49f9ae0d0081b3c3ec59f333e39eeca26"}, + {file = "greenlet-3.5.1-cp315-cp315-win_amd64.whl", hash = "sha256:cbfc69be86e10dcfef5b1e6269d1d6926552aa89ee39e1de3353360c1b6989ab"}, + {file = "greenlet-3.5.1-cp315-cp315-win_arm64.whl", hash = "sha256:92fd6d44ac5e5a887c8a5dc4a8ba0ba908527c31c12f78c6bc7dcfe8aab279f6"}, + {file = "greenlet-3.5.1-cp315-cp315t-macosx_11_0_universal2.whl", hash = "sha256:a6fdf2433a5441ef9a95464f7c3e674775da1c8c1177fff311cee1acad4626ed"}, + {file = "greenlet-3.5.1-cp315-cp315t-manylinux_2_24_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:7546556f0d649f99f6a361098a55f761181bb2ea12ff150bb16d26092ad88244"}, + {file = "greenlet-3.5.1-cp315-cp315t-manylinux_2_24_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:d5ee3ea898009fa898f85f9982255d35278c477bebe185beca249cab42d4526c"}, + {file = "greenlet-3.5.1-cp315-cp315t-manylinux_2_24_s390x.manylinux_2_28_s390x.whl", hash = "sha256:a57b0d05a0448eed231d59c0ceb287dde984551e54cbc51ac2d4865712838e9c"}, + {file = "greenlet-3.5.1-cp315-cp315t-manylinux_2_24_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:a5c81f74d204d3edd136ebfd50dce53acbb776995d721a0fe801626cfc93b8cd"}, + {file = "greenlet-3.5.1-cp315-cp315t-manylinux_2_39_riscv64.whl", hash = "sha256:b0703c2cef53e01baec47f7a3868009913ad71ec678bbecb42a6f40895e4ce62"}, + {file = "greenlet-3.5.1-cp315-cp315t-musllinux_1_2_aarch64.whl", hash = "sha256:2c18ef16bf6d4dd410e4dd52996888ea1497be26892fe5bbc73580aba4287b8e"}, + {file = "greenlet-3.5.1-cp315-cp315t-musllinux_1_2_x86_64.whl", hash = "sha256:17d86354f0ae6b61bf9be5148d0dd34e06c3cb7c602c671f79f29ac3b150e659"}, + {file = "greenlet-3.5.1-cp315-cp315t-win_amd64.whl", hash = "sha256:e7516cf6ae6b8a582c2770a0caed47b8a48373ed732c33d69a72913ae6ac923e"}, + {file = "greenlet-3.5.1-cp315-cp315t-win_arm64.whl", hash = "sha256:5028648bf2253ec4745add746129d3904121fa7fe871a76bed23c5720573ce0a"}, + {file = "greenlet-3.5.1.tar.gz", hash = "sha256:5a56aeb7d5d9cc4b3a735efb5095bd4b4f6f0e4f93e5ca876d0e2315137b7829"}, +] + [[package]] name = "h11" version = "0.16.0" @@ -1658,6 +1737,38 @@ files = [ {file = "pluggy-1.6.0.tar.gz", hash = "sha256:7dcc130b76258d33b90f61b658791dede3486c3e6bfb003ee5c9bfb396dd22f3"}, ] +[[package]] +name = "polars" +version = "1.41.2" +requires_python = ">=3.10" +summary = "Blazingly fast DataFrame library" +groups = ["default"] +dependencies = [ + "polars-runtime-32==1.41.2", +] +files = [ + {file = "polars-1.41.2-py3-none-any.whl", hash = "sha256:23ce9a2910b6e3e8d4258770bf44aa17170958df7af6e85feedf4458a04d8d29"}, + {file = "polars-1.41.2.tar.gz", hash = "sha256:256d6731162371b77f3f29a55eacb8c0fc740ddb1a293a01d2ef5b5393c5c708"}, +] + +[[package]] +name = "polars-runtime-32" +version = "1.41.2" +requires_python = ">=3.10" +summary = "Blazingly fast DataFrame library" +groups = ["default"] +files = [ + {file = "polars_runtime_32-1.41.2-cp310-abi3-macosx_10_12_x86_64.whl", hash = "sha256:95a08346dac337357cdb825c8076df7d36da54c4caa59a5cb41d0a30691c5edd"}, + {file = "polars_runtime_32-1.41.2-cp310-abi3-macosx_11_0_arm64.whl", hash = "sha256:dedfaeec2c7f995298da7319dd9431d662e5dd1d0ec51b1459df4a0234ceff52"}, + {file = "polars_runtime_32-1.41.2-cp310-abi3-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:18eea22c5cc34e27f8a60950458ad81e6a9ea75e89363ca1367e14e7e7f781fc"}, + {file = "polars_runtime_32-1.41.2-cp310-abi3-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:2630540dfdfb0f36f9b04a07c7c2e3f50bf2ad384113263c1c812007ee9141e0"}, + {file = "polars_runtime_32-1.41.2-cp310-abi3-musllinux_1_2_aarch64.whl", hash = "sha256:20e969e08f9b137e233c04cc04de73d9795f89eb77d34854e40a025965a43763"}, + {file = "polars_runtime_32-1.41.2-cp310-abi3-musllinux_1_2_x86_64.whl", hash = "sha256:e7016a3deb641b64a31447abbbee0f34bd020a6a9ae34ee6b743837def15e2a4"}, + {file = "polars_runtime_32-1.41.2-cp310-abi3-win_amd64.whl", hash = "sha256:1e5e5377c315e0dcafdfb2a31adc546abbaeb3f9cb1864e6536523d2af473265"}, + {file = "polars_runtime_32-1.41.2-cp310-abi3-win_arm64.whl", hash = "sha256:843d96f69d18eca53429c1198e58891db7f18111f83b9c419bb45ad9d73eaed5"}, + {file = "polars_runtime_32-1.41.2.tar.gz", hash = "sha256:7af09ec1ab053da2c9669e8d15f809a4083a29be05db57111688b8051062af56"}, +] + [[package]] name = "prometheus-client" version = "0.25.0" @@ -2457,6 +2568,108 @@ files = [ {file = "soupsieve-2.8.4.tar.gz", hash = "sha256:e121fd02e975c695e4e9e8774a5ee35d74714b59307868dcc5319ad2d9e3328e"}, ] +[[package]] +name = "sqlalchemy" +version = "2.0.50" +requires_python = ">=3.7" +summary = "Database Abstraction Library" +groups = ["default"] +dependencies = [ + "greenlet>=1; platform_machine == \"win32\" or platform_machine == \"WIN32\" or platform_machine == \"AMD64\" or platform_machine == \"amd64\" or platform_machine == \"x86_64\" or platform_machine == \"ppc64le\" or platform_machine == \"aarch64\"", + "importlib-metadata; python_version < \"3.8\"", + "typing-extensions>=4.6.0", +] +files = [ + {file = "sqlalchemy-2.0.50-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:1aa6e403663a9c43c8fef7ce4bdb4cf48bcd8d352e91deda2a99f963270bd508"}, + {file = "sqlalchemy-2.0.50-cp311-cp311-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:51b637a84f9fa35ae1f9017e786cb142974a25305085e1b378b3647a67f65ad3"}, + {file = "sqlalchemy-2.0.50-cp311-cp311-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:2dab927761d9108550f0cf8e66ff21af56f907a0ce0a689793db615e2b55f62c"}, + {file = "sqlalchemy-2.0.50-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:545eae198d37bcf837a10ede3684e2af32458d6f35c597c35c2de7502dc38fc4"}, + {file = "sqlalchemy-2.0.50-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:0fec460e18cdbb4c7773531122ce9a27e96c6ca17af3933941d94da475ad2c86"}, + {file = "sqlalchemy-2.0.50-cp311-cp311-win32.whl", hash = "sha256:e6e814658818fd165e749e3d8490ef16cc7f379a118c37ada8b0589ffbaaac22"}, + {file = "sqlalchemy-2.0.50-cp311-cp311-win_amd64.whl", hash = "sha256:1c5f858fe79c9f5d8fda065c06186356acb7f8df3cd52dbd5ee3f200e4b144f5"}, + {file = "sqlalchemy-2.0.50-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:23ae23d8b9d344d30d0a92f06d45825024a5790f1c1dd4cf452636a50d3e58cb"}, + {file = "sqlalchemy-2.0.50-cp312-cp312-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:47b71b933e7b4ebad407c8fdfd70d2c4f08b78b3238bb30eebdd6eb32ca51b89"}, + {file = "sqlalchemy-2.0.50-cp312-cp312-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:110fdac56ace278949f00de805edacbd6141e382d992f9ba28238b3a0827a600"}, + {file = "sqlalchemy-2.0.50-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:0f5e4ac70e9e757f6b3e87c0491ff034442ecd8dfd36d041a50564c322dafc0e"}, + {file = "sqlalchemy-2.0.50-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:724f3dcbe53dd0151e3cb5e7ec4ba4c620bede579caacd16275dc35ce06e8615"}, + {file = "sqlalchemy-2.0.50-cp312-cp312-win32.whl", hash = "sha256:1208050441471d003b7c8cb4054fb084f185cf35ac3f0ea270803865bca9939a"}, + {file = "sqlalchemy-2.0.50-cp312-cp312-win_amd64.whl", hash = "sha256:9d1af51558029a156a70986b7df88f042b3d158d7c8d8fb5072912d4b32d89c7"}, + {file = "sqlalchemy-2.0.50-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:06a9210bdc5f4298cff0781087e2ff45683922252dacc452846373a58761f093"}, + {file = "sqlalchemy-2.0.50-cp313-cp313-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:8b53784972ade4f8174b9aa661f31a06f8a936d2cfdd602913ff3c6dd40ae873"}, + {file = "sqlalchemy-2.0.50-cp313-cp313-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:31648fa14460537e768a7303b078e4344d208e0d23e06867c1f376a227ed82db"}, + {file = "sqlalchemy-2.0.50-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:03f4323c980ad0e918cc9e5369b015f759f4e534db5bbaf4dc36832c10d05064"}, + {file = "sqlalchemy-2.0.50-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:2b9dcc43afef8ac157cd92fce96985d6b8b0cfbd3df4d666f66b4d55a75d202f"}, + {file = "sqlalchemy-2.0.50-cp313-cp313-win32.whl", hash = "sha256:60922d6599065ddca2c6f376b9aa2f41a6b85a271725e0909490bbc50b1998a5"}, + {file = "sqlalchemy-2.0.50-cp313-cp313-win_amd64.whl", hash = "sha256:287086e67275a212c4582d166a6fb03a65ccc5551d80866270ce0dd9f34eccd3"}, + {file = "sqlalchemy-2.0.50-cp314-cp314-macosx_11_0_arm64.whl", hash = "sha256:c966932507a4d7d0a37314927dbfcd89720e3f37d2a1e3352e7ae7939fa8e8a0"}, + {file = "sqlalchemy-2.0.50-cp314-cp314-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:faffef4bcc20a1892e65e155293d99d60855bbbc79250ab712819cfd56a8e6bb"}, + {file = "sqlalchemy-2.0.50-cp314-cp314-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:6c206aec519a2e7bd08abbfb33436e325fd22c632d9c21a9047e376ce241646e"}, + {file = "sqlalchemy-2.0.50-cp314-cp314-musllinux_1_2_aarch64.whl", hash = "sha256:bef4ac756363227ef6402a75fee025a4bc690f92328e825868939b3b3a446a6d"}, + {file = "sqlalchemy-2.0.50-cp314-cp314-musllinux_1_2_x86_64.whl", hash = "sha256:96fbee6b19c19cd1556c8bf9419447cf2ec149ffcab7ab64348c23e54ef8547f"}, + {file = "sqlalchemy-2.0.50-cp314-cp314-win32.whl", hash = "sha256:8f00e3eb43ba30eb1b238ee03a8a62309486d1321eda3328bb611e0340033ad8"}, + {file = "sqlalchemy-2.0.50-cp314-cp314-win_amd64.whl", hash = "sha256:15708c613cd5005b7dffe1f66ee6a63ee8f5e46799f71c70ebad74178c676a39"}, + {file = "sqlalchemy-2.0.50-cp314-cp314t-macosx_11_0_arm64.whl", hash = "sha256:3699dac4be410e97049a1658e9480da9cde956594aa0f3aebc60b88f21c5ba70"}, + {file = "sqlalchemy-2.0.50-cp314-cp314t-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:f96233858e3df43932ac11589e22520da6e8aeb624b03fedfeebb0e8ea213086"}, + {file = "sqlalchemy-2.0.50-cp314-cp314t-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:c4e70c46fad30c3bcc6a4708bc0130a3173e11a5b25f0ea4a9d8911b450f1f52"}, + {file = "sqlalchemy-2.0.50-cp314-cp314t-musllinux_1_2_aarch64.whl", hash = "sha256:1918a3cf564d16d95bca7301005f41ab2ad50b07cd3b9da50d3ed986db148d6a"}, + {file = "sqlalchemy-2.0.50-cp314-cp314t-musllinux_1_2_x86_64.whl", hash = "sha256:b00098cdbdbd38c7be3d568b0c9c3122b8c0ec62b911b57cd5e6e0254d60a76d"}, + {file = "sqlalchemy-2.0.50-cp314-cp314t-win32.whl", hash = "sha256:1fbd55a969d7ac44a98e3dec75016074f809fa08f871585ace58dde110d1bf3e"}, + {file = "sqlalchemy-2.0.50-cp314-cp314t-win_amd64.whl", hash = "sha256:c5c3cdb753a9004183e1ccb634b41611654c989e61bc68617ce878e46d6f1e51"}, + {file = "sqlalchemy-2.0.50-py3-none-any.whl", hash = "sha256:92064363517a3ff8212b5a93b8c62876579d8dfd1ca5b561335f30152d884fa9"}, + {file = "sqlalchemy-2.0.50.tar.gz", hash = "sha256:af5607d11ef90fd6a5c0549fe0045dce1663d427426bcfb506dcb5346a85a3b9"}, +] + +[[package]] +name = "sqlalchemy" +version = "2.0.50" +extras = ["asyncio"] +requires_python = ">=3.7" +summary = "Database Abstraction Library" +groups = ["default"] +dependencies = [ + "greenlet>=1", + "sqlalchemy==2.0.50", +] +files = [ + {file = "sqlalchemy-2.0.50-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:1aa6e403663a9c43c8fef7ce4bdb4cf48bcd8d352e91deda2a99f963270bd508"}, + {file = "sqlalchemy-2.0.50-cp311-cp311-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:51b637a84f9fa35ae1f9017e786cb142974a25305085e1b378b3647a67f65ad3"}, + {file = "sqlalchemy-2.0.50-cp311-cp311-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:2dab927761d9108550f0cf8e66ff21af56f907a0ce0a689793db615e2b55f62c"}, + {file = "sqlalchemy-2.0.50-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:545eae198d37bcf837a10ede3684e2af32458d6f35c597c35c2de7502dc38fc4"}, + {file = "sqlalchemy-2.0.50-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:0fec460e18cdbb4c7773531122ce9a27e96c6ca17af3933941d94da475ad2c86"}, + {file = "sqlalchemy-2.0.50-cp311-cp311-win32.whl", hash = "sha256:e6e814658818fd165e749e3d8490ef16cc7f379a118c37ada8b0589ffbaaac22"}, + {file = "sqlalchemy-2.0.50-cp311-cp311-win_amd64.whl", hash = "sha256:1c5f858fe79c9f5d8fda065c06186356acb7f8df3cd52dbd5ee3f200e4b144f5"}, + {file = "sqlalchemy-2.0.50-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:23ae23d8b9d344d30d0a92f06d45825024a5790f1c1dd4cf452636a50d3e58cb"}, + {file = "sqlalchemy-2.0.50-cp312-cp312-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:47b71b933e7b4ebad407c8fdfd70d2c4f08b78b3238bb30eebdd6eb32ca51b89"}, + {file = "sqlalchemy-2.0.50-cp312-cp312-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:110fdac56ace278949f00de805edacbd6141e382d992f9ba28238b3a0827a600"}, + {file = "sqlalchemy-2.0.50-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:0f5e4ac70e9e757f6b3e87c0491ff034442ecd8dfd36d041a50564c322dafc0e"}, + {file = "sqlalchemy-2.0.50-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:724f3dcbe53dd0151e3cb5e7ec4ba4c620bede579caacd16275dc35ce06e8615"}, + {file = "sqlalchemy-2.0.50-cp312-cp312-win32.whl", hash = "sha256:1208050441471d003b7c8cb4054fb084f185cf35ac3f0ea270803865bca9939a"}, + {file = "sqlalchemy-2.0.50-cp312-cp312-win_amd64.whl", hash = "sha256:9d1af51558029a156a70986b7df88f042b3d158d7c8d8fb5072912d4b32d89c7"}, + {file = "sqlalchemy-2.0.50-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:06a9210bdc5f4298cff0781087e2ff45683922252dacc452846373a58761f093"}, + {file = "sqlalchemy-2.0.50-cp313-cp313-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:8b53784972ade4f8174b9aa661f31a06f8a936d2cfdd602913ff3c6dd40ae873"}, + {file = "sqlalchemy-2.0.50-cp313-cp313-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:31648fa14460537e768a7303b078e4344d208e0d23e06867c1f376a227ed82db"}, + {file = "sqlalchemy-2.0.50-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:03f4323c980ad0e918cc9e5369b015f759f4e534db5bbaf4dc36832c10d05064"}, + {file = "sqlalchemy-2.0.50-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:2b9dcc43afef8ac157cd92fce96985d6b8b0cfbd3df4d666f66b4d55a75d202f"}, + {file = "sqlalchemy-2.0.50-cp313-cp313-win32.whl", hash = "sha256:60922d6599065ddca2c6f376b9aa2f41a6b85a271725e0909490bbc50b1998a5"}, + {file = "sqlalchemy-2.0.50-cp313-cp313-win_amd64.whl", hash = "sha256:287086e67275a212c4582d166a6fb03a65ccc5551d80866270ce0dd9f34eccd3"}, + {file = "sqlalchemy-2.0.50-cp314-cp314-macosx_11_0_arm64.whl", hash = "sha256:c966932507a4d7d0a37314927dbfcd89720e3f37d2a1e3352e7ae7939fa8e8a0"}, + {file = "sqlalchemy-2.0.50-cp314-cp314-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:faffef4bcc20a1892e65e155293d99d60855bbbc79250ab712819cfd56a8e6bb"}, + {file = "sqlalchemy-2.0.50-cp314-cp314-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:6c206aec519a2e7bd08abbfb33436e325fd22c632d9c21a9047e376ce241646e"}, + {file = "sqlalchemy-2.0.50-cp314-cp314-musllinux_1_2_aarch64.whl", hash = "sha256:bef4ac756363227ef6402a75fee025a4bc690f92328e825868939b3b3a446a6d"}, + {file = "sqlalchemy-2.0.50-cp314-cp314-musllinux_1_2_x86_64.whl", hash = "sha256:96fbee6b19c19cd1556c8bf9419447cf2ec149ffcab7ab64348c23e54ef8547f"}, + {file = "sqlalchemy-2.0.50-cp314-cp314-win32.whl", hash = "sha256:8f00e3eb43ba30eb1b238ee03a8a62309486d1321eda3328bb611e0340033ad8"}, + {file = "sqlalchemy-2.0.50-cp314-cp314-win_amd64.whl", hash = "sha256:15708c613cd5005b7dffe1f66ee6a63ee8f5e46799f71c70ebad74178c676a39"}, + {file = "sqlalchemy-2.0.50-cp314-cp314t-macosx_11_0_arm64.whl", hash = "sha256:3699dac4be410e97049a1658e9480da9cde956594aa0f3aebc60b88f21c5ba70"}, + {file = "sqlalchemy-2.0.50-cp314-cp314t-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:f96233858e3df43932ac11589e22520da6e8aeb624b03fedfeebb0e8ea213086"}, + {file = "sqlalchemy-2.0.50-cp314-cp314t-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:c4e70c46fad30c3bcc6a4708bc0130a3173e11a5b25f0ea4a9d8911b450f1f52"}, + {file = "sqlalchemy-2.0.50-cp314-cp314t-musllinux_1_2_aarch64.whl", hash = "sha256:1918a3cf564d16d95bca7301005f41ab2ad50b07cd3b9da50d3ed986db148d6a"}, + {file = "sqlalchemy-2.0.50-cp314-cp314t-musllinux_1_2_x86_64.whl", hash = "sha256:b00098cdbdbd38c7be3d568b0c9c3122b8c0ec62b911b57cd5e6e0254d60a76d"}, + {file = "sqlalchemy-2.0.50-cp314-cp314t-win32.whl", hash = "sha256:1fbd55a969d7ac44a98e3dec75016074f809fa08f871585ace58dde110d1bf3e"}, + {file = "sqlalchemy-2.0.50-cp314-cp314t-win_amd64.whl", hash = "sha256:c5c3cdb753a9004183e1ccb634b41611654c989e61bc68617ce878e46d6f1e51"}, + {file = "sqlalchemy-2.0.50-py3-none-any.whl", hash = "sha256:92064363517a3ff8212b5a93b8c62876579d8dfd1ca5b561335f30152d884fa9"}, + {file = "sqlalchemy-2.0.50.tar.gz", hash = "sha256:af5607d11ef90fd6a5c0549fe0045dce1663d427426bcfb506dcb5346a85a3b9"}, +] + [[package]] name = "stack-data" version = "0.6.3" @@ -2548,7 +2761,7 @@ name = "typing-extensions" version = "4.15.0" requires_python = ">=3.9" summary = "Backported and Experimental Type Hints for Python 3.9+" -groups = ["dev", "nb"] +groups = ["default", "dev", "nb"] files = [ {file = "typing_extensions-4.15.0-py3-none-any.whl", hash = "sha256:f0fa19c6845758ab08074a0cfa8b7aecb71c999ca73d62883bc25cc018c4e548"}, {file = "typing_extensions-4.15.0.tar.gz", hash = "sha256:0cea48d173cc12fa28ecabc3b837ea3cf6f38c6d1136f85cbaaf598984861466"}, diff --git a/pyproject.toml b/pyproject.toml index 7c88be7..e9a9d6f 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -5,7 +5,7 @@ description = "analysis of production state messages obtained from customers" authors = [ {name = "d-opt GmbH, resp. Florian Förster", email = "f.foerster@d-opt.com"}, ] -dependencies = [] +dependencies = ["polars>=1.41.2", "sqlalchemy[asyncio]>=2.0.50"] requires-python = ">=3.11" readme = "README.md" license = {text = "LicenseRef-Proprietary"} -- 2.34.1 From 8c6e36e43dcfeeb0aa2e025e86302c6f2ce8e860 Mon Sep 17 00:00:00 2001 From: foefl Date: Wed, 3 Jun 2026 14:42:33 +0200 Subject: [PATCH 02/48] add prototyping --- .gitignore | 2 +- prototypes/01_first-look_20260603.py | 228 +++++++++++++++++++++++++++ 2 files changed, 229 insertions(+), 1 deletion(-) create mode 100644 prototypes/01_first-look_20260603.py diff --git a/.gitignore b/.gitignore index 7ebd259..44bff78 100644 --- a/.gitignore +++ b/.gitignore @@ -1,5 +1,5 @@ # own -prototypes/ +# prototypes/ data/ reports/ *.code-workspace diff --git a/prototypes/01_first-look_20260603.py b/prototypes/01_first-look_20260603.py new file mode 100644 index 0000000..4d53dc8 --- /dev/null +++ b/prototypes/01_first-look_20260603.py @@ -0,0 +1,228 @@ +# %% +from pathlib import Path + +import polars as pl + +# %% +PROJECT_BASE = Path(__file__).parents[1] +DATA = PROJECT_BASE / "data" +assert DATA.exists() +# %% +data_t1 = DATA / "PSM/20260507" +assert data_t1.exists() +# %% +data_t1_jobs = data_t1 / "MIS-Auträge_22.csv" +assert data_t1_jobs.exists() +data_t1_PSM = data_t1 / "Produktionsstandsmeldungen.csv" +assert data_t1_PSM.exists() +# %% +# // MIS-Aufträge +pl.read_csv(data_t1_jobs, encoding="windows-1252", separator=";") + +# %% +# // PSM +schema_PSM: dict[str, type[pl.DataType]] = { + "VK Auftrag": pl.UInt32, + "Artikelbez.": pl.String, + "Auftragsmenge": pl.UInt32, + "Kunde": pl.String, + "PA": pl.UInt64, + "PA Pos": pl.UInt32, + "PSM gemeldet am": pl.Datetime, + "Konfektionär": pl.String, + "Artikelnr.": pl.String, + "LT Kunde bestätigt": pl.Date, + "Export Ist": pl.Date, + "1.bestät. Import Konfektionär": pl.Date, + "Import Ist": pl.Date, + "Ablief.(Import Ist+Transport)": pl.Date, + "Wareneingang am": pl.Date, + "Wareneingang geprüft": pl.String, + "Täglicher Ausstoss": pl.Int64, + "Zuschnitt am": pl.Date, + "Teile in Zuschnitt": pl.UInt64, + "Teile im Nähband": pl.UInt64, + "Fertigware aus Nähband": pl.UInt64, + "Teile kontrolliert": pl.UInt64, + "Teile verpackt in Karton": pl.UInt64, + "Anzahl Bänder": pl.UInt16, + "Anzahl Näher": pl.UInt16, + "Arbeitsstunden pro Näher": pl.UInt8, + "Anzahl Arbeitstage pro Woche": pl.UInt8, + "Blockauftrag": pl.String, +} + +# psm = pl.read_csv(data_t1_PSM, encoding="windows-1252", separator=";") +psm = pl.read_csv( + data_t1_PSM, + encoding="windows-1252", + separator=";", + schema_overrides=schema_PSM, + null_values=["01.01.1111 00:00:00"], +) +# %% +psm.filter(pl.col("Konfektionär").str.contains("MEMTEKS")) +# %% + +# %% +psm.estimated_size("mb") + +# %% +regex_pattern = r"^[\s\-#+/$]+$" +psm = psm.with_columns( + pl.when(pl.col(pl.String).str.contains(regex_pattern)) + .then(None) + .otherwise(pl.col(pl.String)) + .name.keep() +) +psm.filter((pl.col.PA == 17191) & (pl.col("PA Pos") == 10)) + +# %% +psm.estimated_size("mb") + +# %% +psm.head() +# %% +psm.filter(pl.any_horizontal(pl.col("VK Auftrag").is_null())) + +# %% +psm.filter(pl.col("Wareneingang am") == "01.01.1111 00:00:00").group_by( + pl.col.Konfektionär +).agg(pl.len()) + +# %% +dupl_filter = psm.select([pl.col.PA, pl.col("PA Pos")]).is_duplicated() +# %% +psm.group_by(["PA", "PA Pos"]).agg(pl.col("PA").n_unique().alias("unique")).sort( + "unique", descending=True +) +# %% +most_occurrences = ( + psm.group_by(["PA", "PA Pos", "Konfektionär"]) + .agg(pl.len().alias("count")) + .sort("count", descending=True) +) +most_occurrences +# %% +most_occurrences.filter(~pl.col("Konfektionär").str.contains("May Tekstil Camcesme")) +# %% +psm.filter((pl.col.PA == 16003) & (pl.col("PA Pos") == 10)).sort( + "PSM gemeldet am", descending=False +) + + +# %% +psm.filter((pl.col.PA == 17085) & (pl.col("PA Pos") == 10)).sort( + "PSM gemeldet am", descending=False +) +# %% +tmp = psm.filter((pl.col.PA == 15372) & (pl.col("PA Pos") == 10)).sort( + "PSM gemeldet am", descending=False +) +tmp +# %% +# // simulate time series +series: list[pl.DataFrame] = [] + +for i in range(tmp.height): + series.append(tmp[: (i + 1)]) + +assert len(series) == tmp.height + +for idx, entry in enumerate(series, start=1): + assert idx == entry.height +# %% +series[1] +# %% +tmp.columns +# %% +tmp = psm.filter((pl.col.PA == 16003) & (pl.col("PA Pos") == 10)).sort( + "PSM gemeldet am", descending=False +) +# %% +# // plausibility check +# ** production quantities +plausi_features_all = [ + "Teile in Zuschnitt", + "Teile im Nähband", + "Fertigware aus Nähband", + "Teile kontrolliert", + "Teile verpackt in Karton", +] +plausi_features_endpoint_only = [ + "Teile in Zuschnitt", + "Fertigware aus Nähband", + "Teile kontrolliert", + "Teile verpackt in Karton", +] +plausi_features = plausi_features_all +# plausi_features = plausi_features_endpoint_only +# %% +IDX = None +if IDX is None: + tmp_1 = tmp.select(plausi_features_all) +else: + tmp_1 = tmp[IDX].select(plausi_features_all) +print(tmp_1) +# %% +# ** empty: default state +tmp_1 = tmp_1.with_columns( + pl.all_horizontal(pl.col("*").is_null() | (pl.col("*") == 0)).alias("is_empty") +) +# %% +# tmp_1 = tmp_1.transpose() +# %% +# tmp_1.shift(1) + +# %% +conditions = [ + pl.col(plausi_features[i]) >= pl.col(plausi_features[i + 1]) + for i in range(len(plausi_features) - 1) +] + +# 4. Filter anwenden +# pl.all_horizontal stellt sicher, dass die Bedingung für JEDES Paar in der Zeile stimmt +df_markiert = tmp_1.with_columns( + pl.when(pl.all_horizontal(conditions) | pl.col("is_empty")) + .then(pl.lit(True)) + .otherwise(pl.lit(False)) + .alias("Produktionsstückzahlen_valide") +) +print(df_markiert) + +# df_valide = tmp_1.filter(pl.all_horizontal(conditions)) +# df_invalide = tmp_1.filter( +# ~pl.all_horizontal(conditions) +# ) # Das Tilde-Zeichen ~ bedeutet "NOT" + +# print("--- Valide Zeilen ---") +# print(df_valide) + +# print("\n--- Invalide Zeilen ---") +# print(df_invalide) + + +# %% +# 1. Testdaten erstellen (Zeile 0-2 sind valide, Zeile 3 ist dein invalides Beispiel) +df = pl.DataFrame({"EP-1": [0, 100, 100, 0], "EP-2": [0, 0, 100, 100], "EP-3": [0, 0, 0, 0]}) + +# 2. Liste der Erfassungspunkte in der richtigen (konsekutiven) Reihenfolge +ep_spalten = ["EP-1", "EP-2", "EP-3"] + +# 3. Dynamisch die Bedingungen für alle Paare erstellen +# Wir prüfen für jedes Paar: Ist der vorherige Punkt (i) >= dem nächsten Punkt (i+1)? +bedingungen = [ + pl.col(ep_spalten[i]) >= pl.col(ep_spalten[i + 1]) for i in range(len(ep_spalten) - 1) +] + +# 4. Filter anwenden +# pl.all_horizontal stellt sicher, dass die Bedingung für JEDES Paar in der Zeile stimmt +df_valide = df.filter(pl.all_horizontal(bedingungen)) +df_invalide = df.filter(~pl.all_horizontal(bedingungen)) # Das Tilde-Zeichen ~ bedeutet "NOT" + +print("--- Valide Zeilen ---") +print(df_valide) + +print("\n--- Invalide Zeilen ---") +print(df_invalide) +# %% -- 2.34.1 From ae4d684d4fb6068c82c30898ddc5dfae10d6fb1b Mon Sep 17 00:00:00 2001 From: foefl Date: Wed, 3 Jun 2026 16:07:20 +0200 Subject: [PATCH 03/48] basic steps for concept of architecture --- prototypes/01_first-look_20260603.py | 85 +++++++++++++++++++++++----- 1 file changed, 71 insertions(+), 14 deletions(-) diff --git a/prototypes/01_first-look_20260603.py b/prototypes/01_first-look_20260603.py index 4d53dc8..7b7a9c0 100644 --- a/prototypes/01_first-look_20260603.py +++ b/prototypes/01_first-look_20260603.py @@ -1,4 +1,5 @@ # %% +import enum from pathlib import Path import polars as pl @@ -19,8 +20,15 @@ assert data_t1_PSM.exists() # // MIS-Aufträge pl.read_csv(data_t1_jobs, encoding="windows-1252", separator=";") + # %% # // PSM +class QualityPsm(enum.StrEnum): + FEHLEND = enum.auto() + UNPLAUSIBEL = enum.auto() + PLAUSIBEL = enum.auto() + + schema_PSM: dict[str, type[pl.DataType]] = { "VK Auftrag": pl.UInt32, "Artikelbez.": pl.String, @@ -134,8 +142,6 @@ for idx, entry in enumerate(series, start=1): # %% series[1] # %% -tmp.columns -# %% tmp = psm.filter((pl.col.PA == 16003) & (pl.col("PA Pos") == 10)).sort( "PSM gemeldet am", descending=False ) @@ -156,7 +162,7 @@ plausi_features_endpoint_only = [ "Teile verpackt in Karton", ] plausi_features = plausi_features_all -# plausi_features = plausi_features_endpoint_only +plausi_features = plausi_features_endpoint_only # %% IDX = None if IDX is None: @@ -169,36 +175,40 @@ print(tmp_1) tmp_1 = tmp_1.with_columns( pl.all_horizontal(pl.col("*").is_null() | (pl.col("*") == 0)).alias("is_empty") ) -# %% -# tmp_1 = tmp_1.transpose() -# %% -# tmp_1.shift(1) -# %% conditions = [ pl.col(plausi_features[i]) >= pl.col(plausi_features[i + 1]) for i in range(len(plausi_features) - 1) ] -# 4. Filter anwenden -# pl.all_horizontal stellt sicher, dass die Bedingung für JEDES Paar in der Zeile stimmt -df_markiert = tmp_1.with_columns( +df_marked = tmp_1.with_columns( pl.when(pl.all_horizontal(conditions) | pl.col("is_empty")) .then(pl.lit(True)) .otherwise(pl.lit(False)) .alias("Produktionsstückzahlen_valide") ) -print(df_markiert) +# print(df_marked) + +# %% +df_score = df_marked.with_columns( + pl.when(pl.col("is_empty")) + .then(pl.lit(QualityPsm.FEHLEND)) + .when(pl.col("Produktionsstückzahlen_valide")) + .then(pl.lit(QualityPsm.PLAUSIBEL)) + .otherwise(pl.lit(QualityPsm.UNPLAUSIBEL)) + .alias("Qualität Produktionsfortschritt") +) +print(df_score) # df_valide = tmp_1.filter(pl.all_horizontal(conditions)) # df_invalide = tmp_1.filter( # ~pl.all_horizontal(conditions) # ) # Das Tilde-Zeichen ~ bedeutet "NOT" -# print("--- Valide Zeilen ---") +# print("--- valid rows ---") # print(df_valide) -# print("\n--- Invalide Zeilen ---") +# print("\n--- invalid rows ---") # print(df_invalide) @@ -226,3 +236,50 @@ print(df_valide) print("\n--- Invalide Zeilen ---") print(df_invalide) # %% +# // principle of aggregated data in Polars +# map the database structure to a Polars dataframe and just insert or update the +# corresponding entries of the defined database table +# We use an upsert strategy, keep local copies of the data and merge them with new entries. +# This ensures that we always have a clean and complete history. + +# 1. Testdaten: Auftrag 1 ist valide, Auftrag 2 enthält dein invalides Beispiel +df = pl.DataFrame( + { + "auftrag_id": [1, 2], + "EP-1": [[0, 100, 100, 100], [0, 0, 100, 100]], + "EP-2": [[0, 0, 100, 100], [0, 100, 100, 100]], # Auftrag 2 kippt hier bei Index 1! + "EP-3": [[0, 0, 0, 100], [0, 0, 0, 100]], + } +) +df.head() + +# %% +ep_spalten = ["EP-1", "EP-2", "EP-3"] + +# --- SCHRITT 1: Die Listen synchron entfalten (Explode) --- +# Polars macht aus den Listen temporär wieder "flache" Zeilen unter Beibehaltung der auftrag_id +df_flach = df.select(["auftrag_id"] + ep_spalten).explode(ep_spalten) +df_flach +# %% + +# --- SCHRITT 2: Unsere bekannte Paar-Logik anwenden --- +bedingungen = [ + pl.col(ep_spalten[i]) >= pl.col(ep_spalten[i + 1]) for i in range(len(ep_spalten) - 1) +] + +# Wir prüfen für jede Zeile (jeden Zeitpunkt), ob das Schema stimmt +df_flach = df_flach.with_columns(pl.all_horizontal(bedingungen).alias("zeitpunkt_valide")) +df_flach +# %% +# --- SCHRITT 3: Zurück auf Auftragsebene aggregieren --- +# Ein Auftrag ist nur dann komplett valide, wenn JEDER EINZELNE Zeitpunkt valide war (.all()) +df_status = df_flach.group_by("auftrag_id").agg( + pl.col("zeitpunkt_valide").all().alias("ist_valide") +) + + +# --- SCHRITT 4: Das Ergebnis an deinen Original-Dataframe hängen --- +df_final = df.join(df_status, on="auftrag_id", how="left") + +print(df_final) +# %% -- 2.34.1 From f95eab58439986adc161318144561019726dae28 Mon Sep 17 00:00:00 2001 From: foefl Date: Thu, 4 Jun 2026 15:45:01 +0200 Subject: [PATCH 04/48] base routine to transform, calculate and aggregate data --- prototypes/01_first-look_20260603.py | 343 +++++++++++++++++++++++---- 1 file changed, 298 insertions(+), 45 deletions(-) diff --git a/prototypes/01_first-look_20260603.py b/prototypes/01_first-look_20260603.py index 7b7a9c0..de4be19 100644 --- a/prototypes/01_first-look_20260603.py +++ b/prototypes/01_first-look_20260603.py @@ -1,4 +1,5 @@ # %% +import datetime import enum from pathlib import Path @@ -29,6 +30,7 @@ class QualityPsm(enum.StrEnum): PLAUSIBEL = enum.auto() +# %% schema_PSM: dict[str, type[pl.DataType]] = { "VK Auftrag": pl.UInt32, "Artikelbez.": pl.String, @@ -76,6 +78,7 @@ psm.filter(pl.col("Konfektionär").str.contains("MEMTEKS")) psm.estimated_size("mb") # %% +# // preprocessing I regex_pattern = r"^[\s\-#+/$]+$" psm = psm.with_columns( pl.when(pl.col(pl.String).str.contains(regex_pattern)) @@ -94,12 +97,12 @@ psm.head() psm.filter(pl.any_horizontal(pl.col("VK Auftrag").is_null())) # %% -psm.filter(pl.col("Wareneingang am") == "01.01.1111 00:00:00").group_by( - pl.col.Konfektionär -).agg(pl.len()) +# psm.filter(pl.col("Wareneingang am") == "01.01.1111 00:00:00").group_by( +# pl.col.Konfektionär +# ).agg(pl.len()) # %% -dupl_filter = psm.select([pl.col.PA, pl.col("PA Pos")]).is_duplicated() +psm.select([pl.col.PA, pl.col("PA Pos")]).is_duplicated().sum() # %% psm.group_by(["PA", "PA Pos"]).agg(pl.col("PA").n_unique().alias("unique")).sort( "unique", descending=True @@ -113,6 +116,9 @@ most_occurrences = ( most_occurrences # %% most_occurrences.filter(~pl.col("Konfektionär").str.contains("May Tekstil Camcesme")) +# %% +psm.columns + # %% psm.filter((pl.col.PA == 16003) & (pl.col("PA Pos") == 10)).sort( "PSM gemeldet am", descending=False @@ -130,6 +136,7 @@ tmp = psm.filter((pl.col.PA == 15372) & (pl.col("PA Pos") == 10)).sort( tmp # %% # // simulate time series +# this is a sequence how data would be provided: first one entry, and then more additional entries series: list[pl.DataFrame] = [] for i in range(tmp.height): @@ -145,6 +152,7 @@ series[1] tmp = psm.filter((pl.col.PA == 16003) & (pl.col("PA Pos") == 10)).sort( "PSM gemeldet am", descending=False ) +tmp # %% # // plausibility check # ** production quantities @@ -162,7 +170,7 @@ plausi_features_endpoint_only = [ "Teile verpackt in Karton", ] plausi_features = plausi_features_all -plausi_features = plausi_features_endpoint_only +# plausi_features = plausi_features_endpoint_only # %% IDX = None if IDX is None: @@ -187,61 +195,306 @@ df_marked = tmp_1.with_columns( .otherwise(pl.lit(False)) .alias("Produktionsstückzahlen_valide") ) -# print(df_marked) -# %% +PSM_SCORES: dict[QualityPsm, int] = { + QualityPsm.FEHLEND: 1, + QualityPsm.UNPLAUSIBEL: 0, + QualityPsm.PLAUSIBEL: 2, +} + df_score = df_marked.with_columns( pl.when(pl.col("is_empty")) - .then(pl.lit(QualityPsm.FEHLEND)) + .then(pl.lit(PSM_SCORES[QualityPsm.FEHLEND])) .when(pl.col("Produktionsstückzahlen_valide")) - .then(pl.lit(QualityPsm.PLAUSIBEL)) - .otherwise(pl.lit(QualityPsm.UNPLAUSIBEL)) + .then(pl.lit(PSM_SCORES[QualityPsm.PLAUSIBEL])) + .otherwise(pl.lit(PSM_SCORES[QualityPsm.UNPLAUSIBEL])) .alias("Qualität Produktionsfortschritt") ) print(df_score) -# df_valide = tmp_1.filter(pl.all_horizontal(conditions)) -# df_invalide = tmp_1.filter( -# ~pl.all_horizontal(conditions) -# ) # Das Tilde-Zeichen ~ bedeutet "NOT" - -# print("--- valid rows ---") -# print(df_valide) - -# print("\n--- invalid rows ---") -# print(df_invalide) - - -# %% -# 1. Testdaten erstellen (Zeile 0-2 sind valide, Zeile 3 ist dein invalides Beispiel) -df = pl.DataFrame({"EP-1": [0, 100, 100, 0], "EP-2": [0, 0, 100, 100], "EP-3": [0, 0, 0, 0]}) - -# 2. Liste der Erfassungspunkte in der richtigen (konsekutiven) Reihenfolge -ep_spalten = ["EP-1", "EP-2", "EP-3"] - -# 3. Dynamisch die Bedingungen für alle Paare erstellen -# Wir prüfen für jedes Paar: Ist der vorherige Punkt (i) >= dem nächsten Punkt (i+1)? -bedingungen = [ - pl.col(ep_spalten[i]) >= pl.col(ep_spalten[i + 1]) for i in range(len(ep_spalten) - 1) -] - -# 4. Filter anwenden -# pl.all_horizontal stellt sicher, dass die Bedingung für JEDES Paar in der Zeile stimmt -df_valide = df.filter(pl.all_horizontal(bedingungen)) -df_invalide = df.filter(~pl.all_horizontal(bedingungen)) # Das Tilde-Zeichen ~ bedeutet "NOT" - -print("--- Valide Zeilen ---") -print(df_valide) - -print("\n--- Invalide Zeilen ---") -print(df_invalide) # %% # // principle of aggregated data in Polars # map the database structure to a Polars dataframe and just insert or update the # corresponding entries of the defined database table # We use an upsert strategy, keep local copies of the data and merge them with new entries. # This ensures that we always have a clean and complete history. +# %% +tmp = series[2] + +# ** production quants plausibility or quality check +renaming_scheme: dict[str, str] = { + "PA Pos": "PA_Pos", + "PSM gemeldet am": "Meldezeitpunkt_Historie", + "Import Ist": "Import-Ist_Historie", + "1.bestät. Import Konfektionär": "Bestaetigter-Import_Historie", + "Zuschnitt am": "Prod-Start_Historie", + "Teile in Zuschnitt": "Prod-EP10_Historie", + "Teile im Nähband": "Prod-EP20_Historie", + "Fertigware aus Nähband": "Prod-EP30_Historie", + "Teile kontrolliert": "Prod-EP40_Historie", + "Teile verpackt in Karton": "Prod-EP50_Historie", +} + +KEYS = ["PA", "PA_Pos"] + +tmp = tmp.rename(renaming_scheme) +tmp = tmp.sort(KEYS + ["Meldezeitpunkt_Historie"], descending=False) + + +plausi_features_all = [ + "Prod-EP10_Historie", + "Prod-EP20_Historie", + "Prod-EP30_Historie", + "Prod-EP40_Historie", + "Prod-EP50_Historie", +] +PLAUSI_FEATURES = plausi_features_all + + +tmp = tmp.with_columns( + pl.all_horizontal( + pl.col(PLAUSI_FEATURES).is_null() | (pl.col(PLAUSI_FEATURES) == 0) + ).alias("is_empty") +) + +conditions = [ + pl.col(PLAUSI_FEATURES[i]) >= pl.col(PLAUSI_FEATURES[i + 1]) + for i in range(len(PLAUSI_FEATURES) - 1) +] + +tmp = tmp.with_columns( + pl.when(pl.all_horizontal(conditions) | pl.col("is_empty")) + .then(pl.lit(True)) + .otherwise(pl.lit(False)) + .alias("Prod-Qty_is_valid") +).with_columns( + pl.when(pl.col("is_empty")) + .then(pl.lit(PSM_SCORES[QualityPsm.FEHLEND])) + .when(pl.col("Prod-Qty_is_valid")) + .then(pl.lit(PSM_SCORES[QualityPsm.PLAUSIBEL])) + .otherwise(pl.lit(PSM_SCORES[QualityPsm.UNPLAUSIBEL])) + .alias("Prod-Qualitaet_Historie") +) +# aggregate hint for "Prod-Qualitaet_Durchschnitt": use "drop_nulls" "last" +# aggregate "Prod-Qualitaet_Historie" and use "mean" +# need additional "alias" on "Prod-Qualitaet_Historie" + +# tmp = ( +# tmp.with_row_index("row_nr") +# .with_columns( +# pl.when(pl.col("row_nr") == 1) # Index 1 ist die zweite Zeile +# .then(None) +# .otherwise(pl.col("1.bestät. Import Konfektionär")) +# .alias("1.bestät. Import Konfektionär") +# ) +# .drop("row_nr") +# ) +# tmp +current_date = datetime.datetime.now().date() +print(f"{current_date=}") +tmp = tmp.with_columns( + pl.coalesce(["Bestaetigter-Import_Historie", "Import-Ist_Historie"]).alias( + "Liefertermin_Soll" + ) +) +# aggregate hint for "Liefertermin_Soll": use "drop_nulls" "first" +# first filled field for "Liefertermin Soll" is the relevant target date +# should be first confirmed date, but if this field is not filled we use the first +# filled import by the supplier + +# now check if set import date is before current date --> becomes actual value +tmp = tmp.with_columns( + pl.when(pl.col("Import-Ist_Historie") < current_date) + .then(pl.col("Import-Ist_Historie")) + .otherwise(None) + .alias("Liefertermin_Ist") +) +# aggregate hint for "Liefertermin_Ist": use "drop_nulls" "last" +# keep last because that is the latest value set by the supplier +# if all values are NULL then NULL is returned (no actual date available) + +# aggregate hint for "Prod-Start" +# aggregate "Prod-Start_Historie" and use "drop_nulls" "first" +# first entry should be treated as the truth value, changing later does not make sense +# need additional "alias" on "Prod-Start_Historie" + +# duration since last report in days +tmp = tmp.sort(KEYS + ["Meldezeitpunkt_Historie"], descending=False).with_columns( + ( + pl.col("Meldezeitpunkt_Historie") + - pl.col("Meldezeitpunkt_Historie").shift(1).over(KEYS) + ) + .dt.total_days() + .alias("Tage_zu_letzter_PSM_Historie") +) +# aggregate hint for "Tage_zu_letzter_PSM_Durchschnitt" +# aggregate "Tage_zu_letzter_PSM_Historie" and use "mean" (NULL is ignored automatically) +# need additional "alias" on "Tage_zu_letzter_PSM_Historie" + +# aggregate hint for "Import-Ist_letzter_Wert" +# aggregate "Import-Ist_Historie" and use "drop_nulls" "last" +# need additional "alias" on "Import-Ist_Historie" + +tmp = tmp.sort(KEYS + ["Meldezeitpunkt_Historie"], descending=False).with_columns( + # Prüfen: Ist das aktuelle Datum ungleich dem vorherigen Datum derselben Position? + (pl.col("Import-Ist_Historie") != pl.col("Import-Ist_Historie").shift(1).over(KEYS)) + .fill_null(False) # Der allererste Eintrag hat keinen Vorgänger -> Ist keine Änderung + .alias("Import-Ist_geaendert") +) +# aggregate hint for "Import-Ist_geaendert" +# aggregate "Import-Ist_geaendert" and use "last" + +# aggregate hint for "Import-Ist_Anzahl_Aenderungen" +# aggregate "Import-Ist_geaendert" and use "sum" +# need additional "alias" on "Import-Ist_geaendert" + + +# whole aggregates see DB schema +tmp = ( + tmp.sort(KEYS + ["Meldezeitpunkt_Historie"], descending=False) + .group_by(KEYS + ["Konfektionär"]) + .agg( + pl.col("Meldezeitpunkt_Historie"), + pl.col("Liefertermin_Soll").drop_nulls().first(), + pl.col("Bestaetigter-Import_Historie"), + pl.col("Liefertermin_Ist").drop_nulls().last(), + pl.col("Import-Ist_Historie"), + pl.col("Import-Ist_Historie").drop_nulls().last().alias("Import-Ist_letzter_Wert"), + pl.col("Import-Ist_geaendert").last(), + pl.col("Import-Ist_geaendert").sum().alias("Import-Ist_Anzahl_Aenderungen"), + pl.col("Tage_zu_letzter_PSM_Historie"), + pl.col("Tage_zu_letzter_PSM_Historie") + .mean() + .alias("Tage_zu_letzter_PSM_Durchschnitt"), + pl.col("Prod-EP10_Historie"), + pl.col("Prod-EP20_Historie"), + pl.col("Prod-EP30_Historie"), + pl.col("Prod-EP40_Historie"), + pl.col("Prod-EP50_Historie"), + pl.col("Prod-Qualitaet_Historie"), + pl.col("Prod-Qualitaet_Historie").mean().alias("Prod-Qualitaet_Durchschnitt"), + pl.col("Prod-Start_Historie"), + pl.col("Prod-Start_Historie").drop_nulls().first().alias("Prod-Start"), + ) +) + +tmp +# %% +# ** order specific aggregates +LOWER_BOUND_DATE_DEVIATION = 0 +UPPER_BOUND_DATE_DEVIATION = 0 + +tmp = tmp.with_columns( + pl.when( + (pl.col("Liefertermin_Ist").is_not_null()) + & (pl.col("Liefertermin_Soll").is_not_null()) + ) + .then((pl.col("Liefertermin_Ist") - pl.col("Liefertermin_Soll")).dt.total_days()) + .otherwise(None) + .alias("Terminabweichung_Anzahl_Tage") +).with_columns( + pl.when(pl.col("Terminabweichung_Anzahl_Tage") < LOWER_BOUND_DATE_DEVIATION) + .then(pl.lit(True)) + .otherwise(pl.lit(False)) + .alias("Terminunterschreitung"), + pl.when(pl.col("Terminabweichung_Anzahl_Tage") > UPPER_BOUND_DATE_DEVIATION) + .then(pl.lit(True)) + .otherwise(pl.lit(False)) + .alias("Terminüberschreitung"), + pl.when((pl.col("Liefertermin_Ist").is_not_null()) & (pl.col("Prod-Start").is_not_null())) + .then((pl.col("Liefertermin_Ist") - pl.col("Prod-Start")).dt.total_days()) + .otherwise(None) + .alias("Durchlaufzeit_Anzahl_Tage"), +) +tmp +# %% +tmp_1 = tmp.select("Meldezeitpunkt_Historie") +tmp_1 = tmp_1.with_columns( + Meldezeitpunkt_datum=pl.col("Meldezeitpunkt_Historie").dt.date(), +) +tmp_1 + +# %% +tmp_1 = tmp.with_columns( + # Aktuelles Datum minus verschobenes Datum (isoliert je Auftrag) + ( + pl.col("Meldezeitpunkt_Historie") + - pl.col("Meldezeitpunkt_Historie").shift(1).over(["PA", "PA_Pos"]) + ) + .dt.total_days() # Macht aus der Zeitspanne (Duration) eine nackte Ganzzahl (Tage) + .alias("Tage_zu_letzter_PSM") +) +tmp_1 +# %% +tmp_1.with_columns( + delta=( + pl.col("Meldezeitpunkt_datum").shift( + -1, fill_value=pl.col("Meldezeitpunkt_datum").last() + ) + - pl.col("Meldezeitpunkt_datum") + ) +) + +# %% + + +######################################## +# %% +# 1. Das ist der alte Zustand aus der SQLite-DB (aufgelöst als Dataframe) +# Angenommen, das Quellsystem hatte beim letzten Mal noch die alten Daten (10:00 Uhr) +df_db = pl.DataFrame( + { + "auftrag_id": [1], + "zeitstempel": [["10:00", "11:00"]], + "EP-1": [[0, 100]], + "EP-2": [[0, 0]], + } +) +df_db +# %% +# 2. Der neue Input (Das Quellsystem hat den 10:00 Uhr Eintrag plötzlich "vergessen"!) +df_input_neu = pl.DataFrame( + { + "auftrag_id": [1, 1], + "zeitstempel": ["11:00", "12:00"], # 10:00 fehlt, 11:00 ist redundant, 12:00 ist neu + "EP-1": [100, 100], + "EP-2": [0, 100], + } +) +df_input_neu + +# %% +# --- SCHRITT 1: Die Datenbank-Listen "flach" machen --- +# Wir entfalten die alten Listen, sodass jede Zeile wieder ein einzelnes Ereignis ist +df_db_flach = df_db.explode(["zeitstempel", "EP-1", "EP-2"]) +df_db_flach +# %% +# --- SCHRITT 2: Alles in einen Topf werfen --- +# Wir kleben die alten DB-Daten und die neuen Input-Daten einfach untereinander +df_kombiniert = pl.concat([df_db_flach, df_input_neu]) +df_kombiniert + +# %% +# --- SCHRITT 3: Duplikate entfernen (Die Magie) --- +# Wir behalten nur die einzigartigen Kombinationen aus Auftrag und Zeit. +# Durch keep="last" überschreibt ein eventuell korrigierter neuer Wert den alten. +df_dedupliziert = df_kombiniert.unique(subset=["auftrag_id", "zeitstempel"], keep="last") +df_dedupliziert +# %% +# --- SCHRITT 4: Wieder zu sauberen Listen zusammenbauen --- +# Jetzt aggregieren wir die sauberen Daten wieder zu unserer Datenbank-Sicht +df_final_db = ( + df_dedupliziert.sort("zeitstempel") # Wichtig, damit die Chronologie in der Liste stimmt! + .group_by("auftrag_id") + .agg(pl.col("zeitstempel"), pl.col("EP-1"), pl.col("EP-2")) +) + +print(df_final_db) + + +################################################################################### +# %% # 1. Testdaten: Auftrag 1 ist valide, Auftrag 2 enthält dein invalides Beispiel df = pl.DataFrame( { -- 2.34.1 From 356e0f263f123907f105c7c648328fb0d247b3c0 Mon Sep 17 00:00:00 2001 From: foefl Date: Thu, 4 Jun 2026 15:45:13 +0200 Subject: [PATCH 05/48] prepare lib environment --- .gitignore | 1 + deployment/.env | 3 +++ pdm.lock | 21 +++++++++++++++++---- pyproject.toml | 2 +- src/wattanalyse/README.md | 6 ++++++ src/wattanalyse/constants.py | 23 +++++++++++++++++++++++ src/wattanalyse/logging.py | 30 ++++++++++++++++++++++++++++++ 7 files changed, 81 insertions(+), 5 deletions(-) create mode 100644 deployment/.env create mode 100644 src/wattanalyse/README.md create mode 100644 src/wattanalyse/constants.py create mode 100644 src/wattanalyse/logging.py diff --git a/.gitignore b/.gitignore index 44bff78..5f2bf0a 100644 --- a/.gitignore +++ b/.gitignore @@ -135,6 +135,7 @@ celerybeat.pid # Environments .env +!deployment/.env .venv env/ venv/ diff --git a/deployment/.env b/deployment/.env new file mode 100644 index 0000000..9c741ea --- /dev/null +++ b/deployment/.env @@ -0,0 +1,3 @@ +DOPT_STOP_FOLDER_NAME=python +DOPT_INTERNAL_DB=data/wattana.db +DOPT_PATH_LOGGING=data/logs \ No newline at end of file diff --git a/pdm.lock b/pdm.lock index f15bd02..efcf931 100644 --- a/pdm.lock +++ b/pdm.lock @@ -5,7 +5,7 @@ groups = ["default", "dev", "lint", "nb", "tests"] strategy = ["inherit_metadata"] lock_version = "4.5.0" -content_hash = "sha256:33241b6273d0130d424b01e4bde45eba106100f2a47b12e3c053ba1ecd1557ae" +content_hash = "sha256:19e06fc0367f0208bc7ff972401f8297331c48c0a153b393b4d92e595eabc852" [[metadata.targets]] requires_python = ">=3.11" @@ -762,6 +762,20 @@ files = [ {file = "distlib-0.4.1.tar.gz", hash = "sha256:c3804d0d2d4b5fcd44036eb860cb6660485fcdf5c2aba53dc324d805837ea65b"}, ] +[[package]] +name = "dopt-basics" +version = "0.2.6" +requires_python = ">=3.11" +summary = "basic cross-project tools for Python-based d-opt projects" +groups = ["default"] +dependencies = [ + "tzdata>=2025.1", +] +files = [ + {file = "dopt_basics-0.2.6-py3-none-any.whl", hash = "sha256:f0818e2f83e91fb7d398bcabfc6c420159757d7d093b20574b88a3abc24e3eab"}, + {file = "dopt_basics-0.2.6.tar.gz", hash = "sha256:0e90d0d7a711e0dee9f898574683442644d3145ac8905d38ea23775f62aa5d2b"}, +] + [[package]] name = "execnet" version = "2.1.2" @@ -2083,7 +2097,7 @@ name = "python-dotenv" version = "1.2.2" requires_python = ">=3.10" summary = "Read key-value pairs from a .env file and set them as environment variables" -groups = ["dev"] +groups = ["default", "dev"] files = [ {file = "python_dotenv-1.2.2-py3-none-any.whl", hash = "sha256:1d8214789a24de455a8b8bd8ae6fe3c6b69a5e3d64aa8a8e5d68e694bbcb285a"}, {file = "python_dotenv-1.2.2.tar.gz", hash = "sha256:2c371a91fbd7ba082c2c1dc1f8bf89ca22564a087c2c287cd9b662adde799cf3"}, @@ -2786,8 +2800,7 @@ name = "tzdata" version = "2026.2" requires_python = ">=2" summary = "Provider of IANA time zone data" -groups = ["nb"] -marker = "python_version >= \"3.9\"" +groups = ["default", "nb"] files = [ {file = "tzdata-2026.2-py2.py3-none-any.whl", hash = "sha256:bbe9af844f658da81a5f95019480da3a89415801f6cc966806612cc7169bffe7"}, {file = "tzdata-2026.2.tar.gz", hash = "sha256:9173fde7d80d9018e02a662e168e5a2d04f87c41ea174b139fbef642eda62d10"}, diff --git a/pyproject.toml b/pyproject.toml index e9a9d6f..4927bdf 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -5,7 +5,7 @@ description = "analysis of production state messages obtained from customers" authors = [ {name = "d-opt GmbH, resp. Florian Förster", email = "f.foerster@d-opt.com"}, ] -dependencies = ["polars>=1.41.2", "sqlalchemy[asyncio]>=2.0.50"] +dependencies = ["polars>=1.41.2", "sqlalchemy[asyncio]>=2.0.50", "python-dotenv>=1.2.2", "dopt-basics>=0.2.6"] requires-python = ">=3.11" readme = "README.md" license = {text = "LicenseRef-Proprietary"} diff --git a/src/wattanalyse/README.md b/src/wattanalyse/README.md new file mode 100644 index 0000000..6b70cdb --- /dev/null +++ b/src/wattanalyse/README.md @@ -0,0 +1,6 @@ +# List of environment variables + +- DOPT_DEVELOPMENT: flag which signals that the current environment is in development mode +- DOPT_STOP_FOLDER_NAME: stop folder to find base path +- DOPT_INTERNAL_DB: path to CRM database, relative to base path +- DOPT_PATH_LOGGING: path to logging folder, relative to base path diff --git a/src/wattanalyse/constants.py b/src/wattanalyse/constants.py new file mode 100644 index 0000000..661c9a0 --- /dev/null +++ b/src/wattanalyse/constants.py @@ -0,0 +1,23 @@ +from __future__ import annotations + +import os +from pathlib import Path +from typing import Final + +from dopt_basics import io as io_ + +# PROJECT_ROOT = Path(__file__).resolve().parents[2] +LIB_PATH: Final[Path] = Path(__file__).resolve().parent + + +BASE_PATH = io_.search_folder_path( + LIB_PATH, stop_folder_name=os.getenv("DOPT_STOP_FOLDER_NAME", "python") +) +assert BASE_PATH + + +class Config: + DEVELOPMENT_STATE: bool = bool(os.getenv("DOPT_DEVELOPMENT", None)) + DB_PATH_INTERNAL: Path = BASE_PATH / os.getenv("DOPT_INTERNAL_DB", "not_existing") + PATH_LOGGING: Path = BASE_PATH / os.getenv("DOPT_PATH_LOGGING", "data/d-opt.log") + LOG_FILENAME: str = "dopt.log" diff --git a/src/wattanalyse/logging.py b/src/wattanalyse/logging.py new file mode 100644 index 0000000..befd4d5 --- /dev/null +++ b/src/wattanalyse/logging.py @@ -0,0 +1,30 @@ +import logging + +from dopt_basics.logging import BASE_LOGGER, LoggingConfig, setup_logging + +from wattanalyse.constants import Config + +enable_stderr: bool = False +enable_file: bool = True + +if Config.DEVELOPMENT_STATE: + enable_stderr = True + enable_file = False + +if not Config.PATH_LOGGING.exists(): + Config.PATH_LOGGING.mkdir() + +LOGGING_CFG: LoggingConfig = LoggingConfig( + enable_stderr=enable_stderr, + enable_file=enable_file, + logging_dir=Config.PATH_LOGGING, + log_filename=Config.LOG_FILENAME, + file_max_bytes=10_485_760, + file_backup_count=2, +) + +setup_logging(LOGGING_CFG) +logger_base = BASE_LOGGER.getChild("wattana") + +logger_database = logger_base.getChild("database") +logger_database.setLevel(logging.DEBUG) -- 2.34.1 From 692b8951c8e676f9b15b848c12514a5b7a05296b Mon Sep 17 00:00:00 2001 From: foefl Date: Thu, 4 Jun 2026 15:46:36 +0200 Subject: [PATCH 06/48] fix description --- src/wattanalyse/README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/wattanalyse/README.md b/src/wattanalyse/README.md index 6b70cdb..bf18895 100644 --- a/src/wattanalyse/README.md +++ b/src/wattanalyse/README.md @@ -2,5 +2,5 @@ - DOPT_DEVELOPMENT: flag which signals that the current environment is in development mode - DOPT_STOP_FOLDER_NAME: stop folder to find base path -- DOPT_INTERNAL_DB: path to CRM database, relative to base path +- DOPT_INTERNAL_DB: path to internal database where results for further processing are saved, relative to base path - DOPT_PATH_LOGGING: path to logging folder, relative to base path -- 2.34.1 From 949fd46058edb7359a6a3f9fe5a6b618b5502af6 Mon Sep 17 00:00:00 2001 From: foefl Date: Thu, 4 Jun 2026 15:49:23 +0200 Subject: [PATCH 07/48] env loading --- src/wattanalyse/__init__.py | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/src/wattanalyse/__init__.py b/src/wattanalyse/__init__.py index e69de29..fa1a5b0 100644 --- a/src/wattanalyse/__init__.py +++ b/src/wattanalyse/__init__.py @@ -0,0 +1,12 @@ +import os +import sys + +import dotenv + +if sys.stdout is None: + sys.stdout = open(os.devnull, "w", encoding="utf-8") + +if sys.stderr is None: + sys.stderr = open(os.devnull, "w", encoding="utf-8") + +dotenv.load_dotenv() -- 2.34.1 From 8532c605fa880f6e144d9039926bb93b3f11d712 Mon Sep 17 00:00:00 2001 From: foefl Date: Thu, 4 Jun 2026 16:42:21 +0200 Subject: [PATCH 08/48] prepare database interaction --- prototypes/01_first-look_20260603.py | 35 +++++++++++++++++++++ prototypes/02_save_to_db.py | 27 ++++++++++++++++ src/wattanalyse/db.py | 46 ++++++++++++++++++++++++++++ 3 files changed, 108 insertions(+) create mode 100644 prototypes/02_save_to_db.py create mode 100644 src/wattanalyse/db.py diff --git a/prototypes/01_first-look_20260603.py b/prototypes/01_first-look_20260603.py index de4be19..b7591cb 100644 --- a/prototypes/01_first-look_20260603.py +++ b/prototypes/01_first-look_20260603.py @@ -1,9 +1,14 @@ # %% import datetime import enum +import json from pathlib import Path +from typing import Any import polars as pl +import sqlalchemy as sql + +from wattanalyse import db # %% PROJECT_BASE = Path(__file__).parents[1] @@ -408,6 +413,36 @@ tmp = tmp.with_columns( .alias("Durchlaufzeit_Anzahl_Tage"), ) tmp + + +# %% +# // dump to database + + +def _parse_to_json(value: Any) -> str: + if isinstance(value, (datetime.date, datetime.datetime)): + return value.isoformat() + else: + raise TypeError + + +parsed_lists = tmp.with_columns( + pl.col(pl.List) + .map_elements( + lambda x: json.dumps(x.to_list(), default=_parse_to_json) if x is not None else None, + return_dtype=pl.String, + ) + .name.keep() +) +parsed_lists + + +# %% +parsed_lists["Import-Ist_Historie"].item(0) + +# TODO make UPSERT with staging + +######################################################## # %% tmp_1 = tmp.select("Meldezeitpunkt_Historie") tmp_1 = tmp_1.with_columns( diff --git a/prototypes/02_save_to_db.py b/prototypes/02_save_to_db.py new file mode 100644 index 0000000..ba8c6ff --- /dev/null +++ b/prototypes/02_save_to_db.py @@ -0,0 +1,27 @@ +# %% +import datetime +import json +from typing import Any + +# %% +dt = datetime.datetime.now() +date = dt.date() + +# %% +val = [dt, date] +json.dumps(val) + + +# %% +def _parse_to_json(value: Any) -> str: + if isinstance(value, datetime.date): + return value.isoformat() + elif isinstance(value, datetime.datetime): + return value.isoformat() + else: + raise TypeError + + +# %% +json.dumps(val, default=_parse_to_json) +# %% diff --git a/src/wattanalyse/db.py b/src/wattanalyse/db.py new file mode 100644 index 0000000..aa27640 --- /dev/null +++ b/src/wattanalyse/db.py @@ -0,0 +1,46 @@ +import sqlalchemy as sql +from sqlalchemy import Column, Table + +from wattanalyse import constants + +assert constants.Config.DB_PATH_INTERNAL.parent.exists(), ( + "database parent folder does not exists" +) + +ENGINE = sql.create_engine(f"sqlite:///{constants.Config.DB_PATH_INTERNAL}") + +MD_INTERNAL = sql.MetaData() + + +intern_prod_order_t: Table = Table( + "Produktionsauftrag-Einzelsicht", + MD_INTERNAL, + Column("PA", sql.Integer, primary_key=True), + Column("PA_Pos", sql.Integer, primary_key=True), + Column("Konfektionär", sql.Text, nullable=False), + Column("Meldezeitpunkt_Historie", sql.Text, nullable=False), + Column("Liefertermin_Soll", sql.Date, nullable=False), + Column("Bestaetigter-Import_Historie", sql.Text, nullable=False), + Column("Liefertermin_Ist", sql.Date, nullable=True), + Column("Import-Ist_Historie", sql.Text, nullable=False), + Column("Import-Ist_letzter_Wert", sql.Date, nullable=True), + Column("Import-Ist_geaendert", sql.Boolean, nullable=False), + Column("Import-Ist_Anzahl_Aenderungen", sql.Integer, nullable=False), + Column("Tage_zu_letzter_PSM_Historie", sql.Text, nullable=False), + Column("Tage_zu_letzter_PSM_Durchschnitt", sql.Float, nullable=True), + Column("Prod-EP10_Historie", sql.Text, nullable=False), + Column("Prod-EP20_Historie", sql.Text, nullable=False), + Column("Prod-EP30_Historie", sql.Text, nullable=False), + Column("Prod-EP40_Historie", sql.Text, nullable=False), + Column("Prod-EP50_Historie", sql.Text, nullable=False), + Column("Prod-Qualitaet_Historie", sql.Text, nullable=False), + Column("Prod-Qualitaet_Durchschnitt", sql.Float, nullable=False), + Column("Prod-Start_Historie", sql.Text, nullable=False), + Column("Prod-Start", sql.Date, nullable=True), + Column("Terminabweichung_Anzahl_Tage", sql.Integer, nullable=True), + Column("Terminunterschreitung", sql.Boolean, nullable=True), + Column("Terminüberschreitung", sql.Boolean, nullable=True), + Column("Durchlaufzeit_Anzahl_Tage", sql.Float, nullable=True), +) + +MD_INTERNAL.create_all(ENGINE) -- 2.34.1 From c99e354ed87f7c89e362534977f4625a810ec081 Mon Sep 17 00:00:00 2001 From: foefl Date: Fri, 5 Jun 2026 12:01:36 +0200 Subject: [PATCH 09/48] database interaction with saving and loading --- pdm.lock | 129 +++++++++++++++- prototypes/01_first-look_20260603.py | 220 ++++++++++++++++++--------- pyproject.toml | 2 +- 3 files changed, 275 insertions(+), 76 deletions(-) diff --git a/pdm.lock b/pdm.lock index efcf931..cc8b8e4 100644 --- a/pdm.lock +++ b/pdm.lock @@ -5,11 +5,72 @@ groups = ["default", "dev", "lint", "nb", "tests"] strategy = ["inherit_metadata"] lock_version = "4.5.0" -content_hash = "sha256:19e06fc0367f0208bc7ff972401f8297331c48c0a153b393b4d92e595eabc852" +content_hash = "sha256:8f138c1407dc86bdf19aa5a6ce42cb158c9b9963fbb8cf7f4c85f453799f5a10" [[metadata.targets]] requires_python = ">=3.11" +[[package]] +name = "adbc-driver-manager" +version = "1.11.0" +requires_python = ">=3.10" +summary = "A generic entrypoint for ADBC drivers." +groups = ["default"] +dependencies = [ + "typing-extensions", +] +files = [ + {file = "adbc_driver_manager-1.11.0-cp311-cp311-macosx_10_15_x86_64.whl", hash = "sha256:3eb5d6dd94d14e9f1abd340b0bc04bde6d16d692f598ada5ceef3186c6a90eaf"}, + {file = "adbc_driver_manager-1.11.0-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:07469c219d79645a6b2f3df0b8c176c0abbaf7d2b20725e15531735972f65db1"}, + {file = "adbc_driver_manager-1.11.0-cp311-cp311-manylinux_2_26_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:8863a841ac362c26217e9ed69d1d1eb7add881c452382676c3fd4f19b562186c"}, + {file = "adbc_driver_manager-1.11.0-cp311-cp311-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:b4641430ca41c1b570083aeb7771766fa51d963ac5a4bb11b208b51b96ed7f58"}, + {file = "adbc_driver_manager-1.11.0-cp311-cp311-win_amd64.whl", hash = "sha256:c6efa733bf219582bf0f9402f7a8034b113555b1edf178e4743caa69a736ddc5"}, + {file = "adbc_driver_manager-1.11.0-cp312-cp312-macosx_10_15_x86_64.whl", hash = "sha256:08d3008cd6fee3d27b6265864b134902baacf00cd441dc750fb738615290004f"}, + {file = "adbc_driver_manager-1.11.0-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:08f0a6e8030676b7fda5ffe095c33a819a15114541089b8d0fa8281d2dee2079"}, + {file = "adbc_driver_manager-1.11.0-cp312-cp312-manylinux_2_26_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:fb33beabe3a697a54ffcc9593b94705688f33b64741a17f7bdd37690f85a0ecf"}, + {file = "adbc_driver_manager-1.11.0-cp312-cp312-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:dba5306b90932e8af5e4a71756eec2f717f5fe283b1ad7cc7fb094fe4ef3f0f9"}, + {file = "adbc_driver_manager-1.11.0-cp312-cp312-win_amd64.whl", hash = "sha256:e5e9962e6e737e1c028cacb38c08141a8730f5c90cd397537413012ece901cc5"}, + {file = "adbc_driver_manager-1.11.0-cp313-cp313-macosx_10_15_x86_64.whl", hash = "sha256:300b07f4c1113b113e18dddcb9d96dd8b84f09fa35f8e4e3e8a2f112f291142c"}, + {file = "adbc_driver_manager-1.11.0-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:f577be7c4730a43bae08f88105317d7e1d519d02a94aaa98da694358084a4735"}, + {file = "adbc_driver_manager-1.11.0-cp313-cp313-manylinux_2_26_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:c980f81730752cdb98881357c238e87110e1810e4a69c7627c2211bd576b6230"}, + {file = "adbc_driver_manager-1.11.0-cp313-cp313-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:cbc93830500a2f0db7b32501a4f88678fac14b9a9921d94d919439a5b65099e6"}, + {file = "adbc_driver_manager-1.11.0-cp313-cp313-win_amd64.whl", hash = "sha256:0c27cff12cdf074d9052bf8c4775ed1904053189a70497fa7b5746f0dbe326d8"}, + {file = "adbc_driver_manager-1.11.0-cp313-cp313t-macosx_10_15_x86_64.whl", hash = "sha256:d8fdeb10ea464dce88feffe23f35cc37a44ac6bad4e90e793416a3c60afb354f"}, + {file = "adbc_driver_manager-1.11.0-cp313-cp313t-macosx_11_0_arm64.whl", hash = "sha256:cc565ed5d9f8c7974bbaff60c30c8330dae5a903592618a303291db4227b3d54"}, + {file = "adbc_driver_manager-1.11.0-cp313-cp313t-manylinux_2_26_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:9523ca4e8943aa7b43958762bc9d1cb0b5355cd84855359a91c54a4bae9a75df"}, + {file = "adbc_driver_manager-1.11.0-cp313-cp313t-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:54dc142fc8065e13c6347fb3f2acb48430e3cab6863f27276a2b53594cc055b5"}, + {file = "adbc_driver_manager-1.11.0-cp314-cp314-macosx_10_15_x86_64.whl", hash = "sha256:f6fcd6fe4f82f8f2fc83948ed2b0b549d0831253d449f5734603cc03850e4f47"}, + {file = "adbc_driver_manager-1.11.0-cp314-cp314-macosx_11_0_arm64.whl", hash = "sha256:4b4293fc88d0683b6ea9fe1b7d7498c5ae9b4f53a93369c760cfa753a22039c0"}, + {file = "adbc_driver_manager-1.11.0-cp314-cp314-manylinux_2_26_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:5a2d6d1971ce104e41e3969afee8d5782ebcb06bf496606aa4eed2005fbead43"}, + {file = "adbc_driver_manager-1.11.0-cp314-cp314-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:24ef0e33bab3b0480e85d954f88664b578ea045efdc644681c5a487982818e5f"}, + {file = "adbc_driver_manager-1.11.0-cp314-cp314-win_amd64.whl", hash = "sha256:830efd3f212a6360ad66c09fd95171a26a1006a51c893f72238dfb50e0f35e13"}, + {file = "adbc_driver_manager-1.11.0-cp314-cp314t-macosx_10_15_x86_64.whl", hash = "sha256:b5e97d4cb3f5a798e18c802dd1f3d1bf7b77d763cdc707ac295907bf223d1ae8"}, + {file = "adbc_driver_manager-1.11.0-cp314-cp314t-macosx_11_0_arm64.whl", hash = "sha256:2e4e155cae12667aa383750d879e177ada3ab0c351f8306d96e33fbe6949f6f4"}, + {file = "adbc_driver_manager-1.11.0-cp314-cp314t-manylinux_2_26_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:dfb736661f95eb8fc185a4b9951b2e61734633c7448e8d3d937e93ef1d9e5c08"}, + {file = "adbc_driver_manager-1.11.0-cp314-cp314t-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:e87a6f2b70baf21d3c52b280a17e2e8516197a4670b9a080a07dd255f2ab6e9d"}, + {file = "adbc_driver_manager-1.11.0-cp314-cp314t-win_amd64.whl", hash = "sha256:b853e613c6c8afbe7a3fcea0098c88b935a4d1e1b046813aed1fe7363c7b8fc7"}, + {file = "adbc_driver_manager-1.11.0.tar.gz", hash = "sha256:c64aaabeb5810109ab3d2961008f1b014e9f2d87b3df4416c2a080a40237af50"}, +] + +[[package]] +name = "adbc-driver-sqlite" +version = "1.11.0" +requires_python = ">=3.10" +summary = "An ADBC driver for working with SQLite." +groups = ["default"] +dependencies = [ + "adbc-driver-manager", + "importlib-resources>=1.3", +] +files = [ + {file = "adbc_driver_sqlite-1.11.0-py3-none-macosx_10_15_x86_64.whl", hash = "sha256:d227ab10a56b0b5f106d9f85f3f8bce8b75c2b34a28ad962b71e8a3a0b6dc0ed"}, + {file = "adbc_driver_sqlite-1.11.0-py3-none-macosx_11_0_arm64.whl", hash = "sha256:98fd35e14c85e44eeffae1ef9a56466169719ad7bd15e314c2ff88c342e50d9d"}, + {file = "adbc_driver_sqlite-1.11.0-py3-none-manylinux_2_28_aarch64.whl", hash = "sha256:9c28401c31d775d5506ed1188b73de9f7ed1a292927157f2171c7dca67f6cb9e"}, + {file = "adbc_driver_sqlite-1.11.0-py3-none-manylinux_2_28_x86_64.whl", hash = "sha256:2bcab0cfe9380c1691cf995430f8b0b56bf8b9875d8fd9d69a5aecf2b72159e6"}, + {file = "adbc_driver_sqlite-1.11.0-py3-none-win_amd64.whl", hash = "sha256:e41246c5bf929bb5d768227606eb10add420171134ae6ba7928136376f5842fd"}, + {file = "adbc_driver_sqlite-1.11.0.tar.gz", hash = "sha256:a4c6b4962610f7cd67cd754c42dd74e18a2c11fabeec9488c5501d73ae62dc62"}, +] + [[package]] name = "annotated-types" version = "0.7.0" @@ -978,6 +1039,20 @@ files = [ {file = "idna-3.18.tar.gz", hash = "sha256:ffb385a7e039654cef1ab9ef32c6fafe283c0c0467bba1d9029738ce4a14a848"}, ] +[[package]] +name = "importlib-resources" +version = "7.1.0" +requires_python = ">=3.10" +summary = "Read resources from Python packages" +groups = ["default"] +dependencies = [ + "zipp>=3.1.0; python_version < \"3.10\"", +] +files = [ + {file = "importlib_resources-7.1.0-py3-none-any.whl", hash = "sha256:1bd7b48b4088eddb2cd16382150bb515af0bd2c70128194392725f82ad2c96a1"}, + {file = "importlib_resources-7.1.0.tar.gz", hash = "sha256:0722d4c6212489c530f2a145a34c0a7a3b4721bc96a15fada5930e2a0b760708"}, +] + [[package]] name = "iniconfig" version = "2.3.0" @@ -1859,6 +1934,58 @@ files = [ {file = "pure_eval-0.2.3.tar.gz", hash = "sha256:5f4e983f40564c576c7c8635ae88db5956bb2229d7e9237d03b3c0b0190eaf42"}, ] +[[package]] +name = "pyarrow" +version = "24.0.0" +requires_python = ">=3.10" +summary = "Python library for Apache Arrow" +groups = ["default"] +files = [ + {file = "pyarrow-24.0.0-cp311-cp311-macosx_12_0_arm64.whl", hash = "sha256:b0e131f880cda8d04e076cee175a46fc0e8bc8b65c99c6c09dff6669335fde74"}, + {file = "pyarrow-24.0.0-cp311-cp311-macosx_12_0_x86_64.whl", hash = "sha256:1b2fe7f9a5566401a0ef2571f197eb92358925c1f0c8dba305d6e43ea0871bb3"}, + {file = "pyarrow-24.0.0-cp311-cp311-manylinux_2_28_aarch64.whl", hash = "sha256:0b3537c00fb8d384f15ac1e79b6eb6db04a16514c8c1d22e59a9b95c8ba42868"}, + {file = "pyarrow-24.0.0-cp311-cp311-manylinux_2_28_x86_64.whl", hash = "sha256:14e31a3c9e35f1ab6356c6378f6f72830e6d2d5f1791df3774a7b097d18a6a1e"}, + {file = "pyarrow-24.0.0-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:b7d9a514e73bc42711e6a35aaccf3587c520024fe0a25d830a1a8a27c15f4f57"}, + {file = "pyarrow-24.0.0-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:b196eb3f931862af3fa84c2a253514d859c08e0d8fe020e07be12e75a5a9780c"}, + {file = "pyarrow-24.0.0-cp311-cp311-win_amd64.whl", hash = "sha256:35405aecb474e683fb36af650618fd5340ee5471fc65a21b36076a18bbc6c981"}, + {file = "pyarrow-24.0.0-cp312-cp312-macosx_12_0_arm64.whl", hash = "sha256:6233c9ed9ab9d1db47de57d9753256d9dcffbf42db341576099f0fd9f6bf4810"}, + {file = "pyarrow-24.0.0-cp312-cp312-macosx_12_0_x86_64.whl", hash = "sha256:f7616236ec1bc2b15bfdec22a71ab38851c86f8f05ff64f379e1278cf20c634a"}, + {file = "pyarrow-24.0.0-cp312-cp312-manylinux_2_28_aarch64.whl", hash = "sha256:1617043b99bd33e5318ae18eb2919af09c71322ef1ca46566cdafc6e6712fb66"}, + {file = "pyarrow-24.0.0-cp312-cp312-manylinux_2_28_x86_64.whl", hash = "sha256:6165461f55ef6314f026de6638d661188e3455d3ec49834556a0ebbdbace18bb"}, + {file = "pyarrow-24.0.0-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:3b13dedfe76a0ad2d1d859b0811b53827a4e9d93a0bcb05cf59333ab4980cc7e"}, + {file = "pyarrow-24.0.0-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:25ea65d868eb04015cd18e6df2fbe98f07e5bda2abefabcb88fce39a947716f6"}, + {file = "pyarrow-24.0.0-cp312-cp312-win_amd64.whl", hash = "sha256:295f0a7f2e242dabd513737cf076007dc5b2d59237e3eca37b05c0c6446f3826"}, + {file = "pyarrow-24.0.0-cp313-cp313-macosx_12_0_arm64.whl", hash = "sha256:02b001b3ed4723caa44f6cd1af2d5c86aa2cf9971dacc2ffa55b21237713dfba"}, + {file = "pyarrow-24.0.0-cp313-cp313-macosx_12_0_x86_64.whl", hash = "sha256:04920d6a71aabd08a0417709efce97d45ea8e6fb733d9ca9ecffb13c67839f68"}, + {file = "pyarrow-24.0.0-cp313-cp313-manylinux_2_28_aarch64.whl", hash = "sha256:a964266397740257f16f7bb2e4f08a0c81454004beab8ff59dd531b73610e9f2"}, + {file = "pyarrow-24.0.0-cp313-cp313-manylinux_2_28_x86_64.whl", hash = "sha256:6f066b179d68c413374294bc1735f68475457c933258df594443bb9d88ddc2a0"}, + {file = "pyarrow-24.0.0-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:1183baeb14c5f587b1ec52831e665718ce632caab84b7cd6b85fd44f96114495"}, + {file = "pyarrow-24.0.0-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:806f24b4085453c197a5078218d1ee08783ebbba271badd153d1ae22a3ee804f"}, + {file = "pyarrow-24.0.0-cp313-cp313-win_amd64.whl", hash = "sha256:e4505fc6583f7b05ab854934896bcac8253b04ac1171a77dfb73efef92076d91"}, + {file = "pyarrow-24.0.0-cp313-cp313t-macosx_12_0_arm64.whl", hash = "sha256:1a4e45017efbf115032e4475ee876d525e0e36c742214fbe405332480ecd6275"}, + {file = "pyarrow-24.0.0-cp313-cp313t-macosx_12_0_x86_64.whl", hash = "sha256:7986f1fa71cee060ad00758bcc79d3a93bab8559bf978fab9e53472a2e25a17b"}, + {file = "pyarrow-24.0.0-cp313-cp313t-manylinux_2_28_aarch64.whl", hash = "sha256:d3e0b61e8efb24ed38898e5cdc5fffa9124be480008d401a1f8071500494ae42"}, + {file = "pyarrow-24.0.0-cp313-cp313t-manylinux_2_28_x86_64.whl", hash = "sha256:55a3bc1e3df3b5567b7d27ef551b2283f0c68a5e86f1cd56abc569da4f31335b"}, + {file = "pyarrow-24.0.0-cp313-cp313t-musllinux_1_2_aarch64.whl", hash = "sha256:641f795b361874ac9da5294f8f443dfdbee355cf2bd9e3b8d97aaac2306b9b37"}, + {file = "pyarrow-24.0.0-cp313-cp313t-musllinux_1_2_x86_64.whl", hash = "sha256:8adc8e6ce5fccf5dc707046ae4914fd537def529709cc0d285d37a7f9cd442ca"}, + {file = "pyarrow-24.0.0-cp313-cp313t-win_amd64.whl", hash = "sha256:9b18371ad2f44044b81a8d23bc2d8a9b6a6226dca775e8e16cfee640473d6c5d"}, + {file = "pyarrow-24.0.0-cp314-cp314-macosx_12_0_arm64.whl", hash = "sha256:1cc9057f0319e26333b357e17f3c2c022f1a83739b48a88b25bfd5fa2dc18838"}, + {file = "pyarrow-24.0.0-cp314-cp314-macosx_12_0_x86_64.whl", hash = "sha256:e6f1278ee4785b6db21229374a1c9e54ec7c549de5d1efc9630b6207de7e170b"}, + {file = "pyarrow-24.0.0-cp314-cp314-manylinux_2_28_aarch64.whl", hash = "sha256:adbbedc55506cbdabb830890444fb856bfb0060c46c6f8026c6c2f2cf86ae795"}, + {file = "pyarrow-24.0.0-cp314-cp314-manylinux_2_28_x86_64.whl", hash = "sha256:ae8a1145af31d903fa9bb166824d7abe9b4681a000b0159c9fb99c11bc11ad26"}, + {file = "pyarrow-24.0.0-cp314-cp314-musllinux_1_2_aarch64.whl", hash = "sha256:d7027eba1df3b2069e2e8d80f644fa0918b68c46432af3d088ddd390d063ecde"}, + {file = "pyarrow-24.0.0-cp314-cp314-musllinux_1_2_x86_64.whl", hash = "sha256:e56a1ffe9bf7b727432b89104cc0849c21582949dd7bdcb34f17b2001a351a76"}, + {file = "pyarrow-24.0.0-cp314-cp314-win_amd64.whl", hash = "sha256:38be1808cdd068605b787e6ca9119b27eb275a0234e50212c3492331680c3b1e"}, + {file = "pyarrow-24.0.0-cp314-cp314t-macosx_12_0_arm64.whl", hash = "sha256:418e48ce50a45a6a6c73c454677203a9c75c966cb1e92ca3370959185f197a05"}, + {file = "pyarrow-24.0.0-cp314-cp314t-macosx_12_0_x86_64.whl", hash = "sha256:2f16197705a230a78270cdd4ea8a1d57e86b2fdcbc34a1f6aebc72e65c986f9a"}, + {file = "pyarrow-24.0.0-cp314-cp314t-manylinux_2_28_aarch64.whl", hash = "sha256:fb24ac194bfc5e86839d7dcd52092ee31e5fe6733fe11f5e3b06ef0812b20072"}, + {file = "pyarrow-24.0.0-cp314-cp314t-manylinux_2_28_x86_64.whl", hash = "sha256:9700ebd9a51f5895ce75ff4ac4b3c47a7d4b42bc618be8e713e5d56bacf5f931"}, + {file = "pyarrow-24.0.0-cp314-cp314t-musllinux_1_2_aarch64.whl", hash = "sha256:d8ddd2768da81d3ee08cfea9b597f4abb4e8e1dc8ae7e204b608d23a0d3ab699"}, + {file = "pyarrow-24.0.0-cp314-cp314t-musllinux_1_2_x86_64.whl", hash = "sha256:61a3d7eaa97a14768b542f3d284dc6400dd2470d9f080708b13cd46b6ae18136"}, + {file = "pyarrow-24.0.0-cp314-cp314t-win_amd64.whl", hash = "sha256:c91d00057f23b8d353039520dc3a6c09d8608164c692e9f59a175a42b2ae0c19"}, + {file = "pyarrow-24.0.0.tar.gz", hash = "sha256:85fe721a14dd823aca09127acbb06c3ca723efbd436c004f16bca601b04dcc83"}, +] + [[package]] name = "pycparser" version = "3.0" diff --git a/prototypes/01_first-look_20260603.py b/prototypes/01_first-look_20260603.py index b7591cb..afa61a5 100644 --- a/prototypes/01_first-look_20260603.py +++ b/prototypes/01_first-look_20260603.py @@ -1,6 +1,7 @@ # %% import datetime import enum +import importlib import json from pathlib import Path from typing import Any @@ -10,12 +11,14 @@ import sqlalchemy as sql from wattanalyse import db +importlib.reload(db) + # %% PROJECT_BASE = Path(__file__).parents[1] -DATA = PROJECT_BASE / "data" -assert DATA.exists() +DATA_PTH = PROJECT_BASE / "data" +assert DATA_PTH.exists() # %% -data_t1 = DATA / "PSM/20260507" +data_t1 = DATA_PTH / "PSM/20260507" assert data_t1.exists() # %% data_t1_jobs = data_t1 / "MIS-Auträge_22.csv" @@ -24,7 +27,7 @@ data_t1_PSM = data_t1 / "Produktionsstandsmeldungen.csv" assert data_t1_PSM.exists() # %% # // MIS-Aufträge -pl.read_csv(data_t1_jobs, encoding="windows-1252", separator=";") +# pl.read_csv(data_t1_jobs, encoding="windows-1252", separator=";") # %% @@ -35,46 +38,26 @@ class QualityPsm(enum.StrEnum): PLAUSIBEL = enum.auto() -# %% -schema_PSM: dict[str, type[pl.DataType]] = { - "VK Auftrag": pl.UInt32, - "Artikelbez.": pl.String, - "Auftragsmenge": pl.UInt32, - "Kunde": pl.String, - "PA": pl.UInt64, - "PA Pos": pl.UInt32, - "PSM gemeldet am": pl.Datetime, - "Konfektionär": pl.String, - "Artikelnr.": pl.String, - "LT Kunde bestätigt": pl.Date, - "Export Ist": pl.Date, - "1.bestät. Import Konfektionär": pl.Date, - "Import Ist": pl.Date, - "Ablief.(Import Ist+Transport)": pl.Date, - "Wareneingang am": pl.Date, - "Wareneingang geprüft": pl.String, - "Täglicher Ausstoss": pl.Int64, - "Zuschnitt am": pl.Date, - "Teile in Zuschnitt": pl.UInt64, - "Teile im Nähband": pl.UInt64, - "Fertigware aus Nähband": pl.UInt64, - "Teile kontrolliert": pl.UInt64, - "Teile verpackt in Karton": pl.UInt64, - "Anzahl Bänder": pl.UInt16, - "Anzahl Näher": pl.UInt16, - "Arbeitsstunden pro Näher": pl.UInt8, - "Anzahl Arbeitstage pro Woche": pl.UInt8, - "Blockauftrag": pl.String, +PSM_SCORES: dict[QualityPsm, int] = { + QualityPsm.FEHLEND: 1, + QualityPsm.UNPLAUSIBEL: 0, + QualityPsm.PLAUSIBEL: 2, } -# psm = pl.read_csv(data_t1_PSM, encoding="windows-1252", separator=";") +# %% psm = pl.read_csv( data_t1_PSM, encoding="windows-1252", separator=";", - schema_overrides=schema_PSM, + schema_overrides=db.extern_prod_order_t_schema, null_values=["01.01.1111 00:00:00"], ) + +# %% +# // save data as raw +target = DATA_PTH / "PSM_20260507.arrow" +psm.write_ipc(target) + # %% psm.filter(pl.col("Konfektionär").str.contains("MEMTEKS")) # %% @@ -201,11 +184,6 @@ df_marked = tmp_1.with_columns( .alias("Produktionsstückzahlen_valide") ) -PSM_SCORES: dict[QualityPsm, int] = { - QualityPsm.FEHLEND: 1, - QualityPsm.UNPLAUSIBEL: 0, - QualityPsm.PLAUSIBEL: 2, -} df_score = df_marked.with_columns( pl.when(pl.col("is_empty")) @@ -241,10 +219,10 @@ renaming_scheme: dict[str, str] = { "Teile verpackt in Karton": "Prod-EP50_Historie", } -KEYS = ["PA", "PA_Pos"] +PRIM_KEYS = ["PA", "PA_Pos"] tmp = tmp.rename(renaming_scheme) -tmp = tmp.sort(KEYS + ["Meldezeitpunkt_Historie"], descending=False) +tmp = tmp.sort(PRIM_KEYS + ["Meldezeitpunkt_Historie"], descending=False) plausi_features_all = [ @@ -325,10 +303,10 @@ tmp = tmp.with_columns( # need additional "alias" on "Prod-Start_Historie" # duration since last report in days -tmp = tmp.sort(KEYS + ["Meldezeitpunkt_Historie"], descending=False).with_columns( +tmp = tmp.sort(PRIM_KEYS + ["Meldezeitpunkt_Historie"], descending=False).with_columns( ( pl.col("Meldezeitpunkt_Historie") - - pl.col("Meldezeitpunkt_Historie").shift(1).over(KEYS) + - pl.col("Meldezeitpunkt_Historie").shift(1).over(PRIM_KEYS) ) .dt.total_days() .alias("Tage_zu_letzter_PSM_Historie") @@ -341,9 +319,9 @@ tmp = tmp.sort(KEYS + ["Meldezeitpunkt_Historie"], descending=False).with_column # aggregate "Import-Ist_Historie" and use "drop_nulls" "last" # need additional "alias" on "Import-Ist_Historie" -tmp = tmp.sort(KEYS + ["Meldezeitpunkt_Historie"], descending=False).with_columns( +tmp = tmp.sort(PRIM_KEYS + ["Meldezeitpunkt_Historie"], descending=False).with_columns( # Prüfen: Ist das aktuelle Datum ungleich dem vorherigen Datum derselben Position? - (pl.col("Import-Ist_Historie") != pl.col("Import-Ist_Historie").shift(1).over(KEYS)) + (pl.col("Import-Ist_Historie") != pl.col("Import-Ist_Historie").shift(1).over(PRIM_KEYS)) .fill_null(False) # Der allererste Eintrag hat keinen Vorgänger -> Ist keine Änderung .alias("Import-Ist_geaendert") ) @@ -357,8 +335,8 @@ tmp = tmp.sort(KEYS + ["Meldezeitpunkt_Historie"], descending=False).with_column # whole aggregates see DB schema tmp = ( - tmp.sort(KEYS + ["Meldezeitpunkt_Historie"], descending=False) - .group_by(KEYS + ["Konfektionär"]) + tmp.sort(PRIM_KEYS + ["Meldezeitpunkt_Historie"], descending=False) + .group_by(PRIM_KEYS + ["Konfektionär"]) .agg( pl.col("Meldezeitpunkt_Historie"), pl.col("Liefertermin_Soll").drop_nulls().first(), @@ -390,27 +368,41 @@ tmp LOWER_BOUND_DATE_DEVIATION = 0 UPPER_BOUND_DATE_DEVIATION = 0 -tmp = tmp.with_columns( - pl.when( - (pl.col("Liefertermin_Ist").is_not_null()) - & (pl.col("Liefertermin_Soll").is_not_null()) +tmp = ( + tmp.with_columns( + pl.when( + (pl.col("Liefertermin_Ist").is_not_null()) + & (pl.col("Liefertermin_Soll").is_not_null()) + ) + .then((pl.col("Liefertermin_Ist") - pl.col("Liefertermin_Soll")).dt.total_days()) + .otherwise(None) + .alias("Terminabweichung_Anzahl_Tage") + ) + .with_columns( + pl.when(pl.col("Terminabweichung_Anzahl_Tage") < LOWER_BOUND_DATE_DEVIATION) + .then(pl.lit(True)) + .otherwise(pl.lit(False)) + .alias("Terminunterschreitung"), + pl.when(pl.col("Terminabweichung_Anzahl_Tage") > UPPER_BOUND_DATE_DEVIATION) + .then(pl.lit(True)) + .otherwise(pl.lit(False)) + .alias("Terminüberschreitung"), + pl.when( + (pl.col("Liefertermin_Ist").is_not_null()) & (pl.col("Prod-Start").is_not_null()) + ) + .then((pl.col("Liefertermin_Ist") - pl.col("Prod-Start")).dt.total_days()) + .otherwise(None) + .alias("Durchlaufzeit_Anzahl_Tage"), + ) + .with_columns( + pl.when( + (pl.col("Durchlaufzeit_Anzahl_Tage").is_not_null()) + & (pl.col("Durchlaufzeit_Anzahl_Tage") < 0) + ) + .then(None) + .otherwise(pl.col("Durchlaufzeit_Anzahl_Tage")) + .alias("Durchlaufzeit_Anzahl_Tage") ) - .then((pl.col("Liefertermin_Ist") - pl.col("Liefertermin_Soll")).dt.total_days()) - .otherwise(None) - .alias("Terminabweichung_Anzahl_Tage") -).with_columns( - pl.when(pl.col("Terminabweichung_Anzahl_Tage") < LOWER_BOUND_DATE_DEVIATION) - .then(pl.lit(True)) - .otherwise(pl.lit(False)) - .alias("Terminunterschreitung"), - pl.when(pl.col("Terminabweichung_Anzahl_Tage") > UPPER_BOUND_DATE_DEVIATION) - .then(pl.lit(True)) - .otherwise(pl.lit(False)) - .alias("Terminüberschreitung"), - pl.when((pl.col("Liefertermin_Ist").is_not_null()) & (pl.col("Prod-Start").is_not_null())) - .then((pl.col("Liefertermin_Ist") - pl.col("Prod-Start")).dt.total_days()) - .otherwise(None) - .alias("Durchlaufzeit_Anzahl_Tage"), ) tmp @@ -419,28 +411,105 @@ tmp # // dump to database -def _parse_to_json(value: Any) -> str: +def _json_default( + value: Any, +) -> str: if isinstance(value, (datetime.date, datetime.datetime)): return value.isoformat() else: raise TypeError -parsed_lists = tmp.with_columns( +def _parse_to_json( + x: pl.Series | None, +) -> str | None: + if x is None: + return None + + return json.dumps(x.to_list(), default=_json_default) + + +staging_data = tmp.with_columns( pl.col(pl.List) .map_elements( - lambda x: json.dumps(x.to_list(), default=_parse_to_json) if x is not None else None, + _parse_to_json, return_dtype=pl.String, ) .name.keep() ) -parsed_lists +staging_data # %% -parsed_lists["Import-Ist_Historie"].item(0) +rows_inserted = staging_data.write_database( + "Produktionsauftrag-Einzelsicht_Staging", + connection=db.DB_URI, + engine="adbc", + if_table_exists="replace", +) +assert rows_inserted == staging_data.height +# %% # TODO make UPSERT with staging +all_columns = staging_data.columns +update_columns = [col for col in all_columns if col not in PRIM_KEYS] + +sql_column_list_str = ", ".join([f'"{c}"' for c in all_columns]) +sql_pk_list_str = ", ".join([f'"{c}"' for c in PRIM_KEYS]) +sql_update_rules_str = ", ".join([f'"{c}" = EXCLUDED."{c}"' for c in update_columns]) + +upsert_sql = f""" +INSERT INTO "Produktionsauftrag-Einzelsicht" ({sql_column_list_str}) +SELECT {sql_column_list_str} FROM "Produktionsauftrag-Einzelsicht_Staging" WHERE 1=1 +ON CONFLICT({sql_pk_list_str}) DO UPDATE SET + {sql_update_rules_str}; +""" + +# %% +with db.ENGINE_INTERNAL.begin() as conn: + res = conn.execute(sql.text(upsert_sql)) + conn.execute(sql.text('DROP TABLE IF EXISTS "Produktionsauftrag-Einzelsicht_Staging";')) + +# %% +# ** test if loaded correctly +stmt = sql.select(db.intern_prod_order_t) + +with db.ENGINE_INTERNAL.connect() as conn: + ret = conn.execute(stmt) + +ret.fetchall() + +# %% +# // database loading + +df = pl.read_database_uri( + 'SELECT * FROM "Produktionsauftrag-Einzelsicht"', + uri=db.DB_URI, + engine="adbc", + schema_overrides=db.intern_prod_order_t_schema, +) + +list_cols_to_type: dict[str, type[pl.DataType]] = { + "Meldezeitpunkt_Historie": pl.Datetime, + "Bestaetigter-Import_Historie": pl.Date, + "Import-Ist_Historie": pl.Date, + "Tage_zu_letzter_PSM_Historie": pl.Int64, + "Prod-EP10_Historie": pl.UInt64, + "Prod-EP20_Historie": pl.UInt64, + "Prod-EP30_Historie": pl.UInt64, + "Prod-EP40_Historie": pl.UInt64, + "Prod-EP50_Historie": pl.UInt64, + "Prod-Qualitaet_Historie": pl.Int32, + "Prod-Start_Historie": pl.Date, +} + +list_col_parse_conds = { + col: pl.col(col).str.json_decode(pl.List(list_type)) + for col, list_type in list_cols_to_type.items() +} + +df.with_columns(**list_col_parse_conds) + ######################################################## # %% @@ -450,6 +519,9 @@ tmp_1 = tmp_1.with_columns( ) tmp_1 +# %% + + # %% tmp_1 = tmp.with_columns( # Aktuelles Datum minus verschobenes Datum (isoliert je Auftrag) diff --git a/pyproject.toml b/pyproject.toml index 4927bdf..9978a19 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -5,7 +5,7 @@ description = "analysis of production state messages obtained from customers" authors = [ {name = "d-opt GmbH, resp. Florian Förster", email = "f.foerster@d-opt.com"}, ] -dependencies = ["polars>=1.41.2", "sqlalchemy[asyncio]>=2.0.50", "python-dotenv>=1.2.2", "dopt-basics>=0.2.6"] +dependencies = ["polars>=1.41.2", "sqlalchemy[asyncio]>=2.0.50", "python-dotenv>=1.2.2", "dopt-basics>=0.2.6", "adbc-driver-sqlite>=1.11.0", "pyarrow>=24.0.0"] requires-python = ">=3.11" readme = "README.md" license = {text = "LicenseRef-Proprietary"} -- 2.34.1 From 9c8b4ea48c550ca4a5d9a260657d08c4fe7f8c82 Mon Sep 17 00:00:00 2001 From: foefl Date: Fri, 5 Jun 2026 12:01:53 +0200 Subject: [PATCH 10/48] refactor and prepare pipeline --- prototypes/02_integrate_wokflow.py | 168 ++++++++++++ prototypes/02_save_to_db.py | 27 -- prototypes/external_code.py | 411 +++++++++++++++++++++++++++++ src/wattanalyse/db.py | 69 ++++- 4 files changed, 646 insertions(+), 29 deletions(-) create mode 100644 prototypes/02_integrate_wokflow.py delete mode 100644 prototypes/02_save_to_db.py create mode 100644 prototypes/external_code.py diff --git a/prototypes/02_integrate_wokflow.py b/prototypes/02_integrate_wokflow.py new file mode 100644 index 0000000..bc2892c --- /dev/null +++ b/prototypes/02_integrate_wokflow.py @@ -0,0 +1,168 @@ +# %% +import datetime +import importlib +from pathlib import Path + +import external_code +import polars as pl +import sqlalchemy as sql + +from wattanalyse import db + +importlib.reload(db) +importlib.reload(external_code) + +# %% +PROJECT_BASE = Path(__file__).parents[1] +DATA_PTH = PROJECT_BASE / "data" +assert DATA_PTH.exists() + +# %% +# // load data +target = DATA_PTH / "PSM_20260507.arrow" +data_raw = pl.read_ipc(target) + + +# %% +# // preprocessing I +res = external_code.preprocess_psm(data_raw) + +# %% +res.filtered +# %% +data = data_raw.rename(external_code.RENAMING_SCHEME) +REGEX_PATTERN = r"^[\s\-#+/$]+$" +data = data.with_columns( + pl.when(pl.col(pl.String).str.contains(REGEX_PATTERN)) + .then(None) + .otherwise(pl.col(pl.String)) + .name.keep() +) +data = data.with_columns(pl.col("Konfektionär").str.strip_chars(" \n\t")) +print(f"Size of dataset before cleansing: {data.height}") +filtered_data = pl.DataFrame(schema=data.schema) +# %% +# data.filter(pl.col.Meldezeitpunkt_Historie.is_null()) +# %% +# any NULL values in critical columns +NOT_NULL_COLS = ("PA", "PA_Pos", "Meldezeitpunkt_Historie") +conds = [pl.col(col).is_null() for col in NOT_NULL_COLS] +filtered_data = pl.concat([filtered_data, data.filter(pl.any_horizontal(*conds))]) +data = data.filter(~pl.any_horizontal(*conds)) + +# implausible dates +# dates not allowed to be in the future +current_datetime = datetime.datetime.now() +current_date = current_datetime.date() +NOT_IN_FUTURE_COLS_DATETIME = ("Meldezeitpunkt_Historie",) +NOT_IN_FUTURE_COLS_DATE = ("Wareneingang am", "Prod-Start_Historie") +conds = [ + (pl.col(col) > current_datetime).fill_null(False) for col in NOT_IN_FUTURE_COLS_DATETIME +] + +conds.extend( + [(pl.col(col) > current_date).fill_null(False) for col in NOT_IN_FUTURE_COLS_DATE] +) +filtered_data = pl.concat([filtered_data, data.filter(pl.any_horizontal(*conds))]) +data = data.filter(~pl.any_horizontal(*conds)) + +# too much in the future or the past +NUMBER_YEARS_UPPER_BOUND_DATES = 4 +# dates +future_limit = current_date + datetime.timedelta(days=(365 * NUMBER_YEARS_UPPER_BOUND_DATES)) +past_limit = datetime.date(1990, 1, 1) +cond = (pl.col(pl.Date) > future_limit).fill_null(False) | ( + pl.col(pl.Date) < past_limit +).fill_null(False) +filtered_data = pl.concat([filtered_data, data.filter(pl.any_horizontal(cond))]) +data = data.filter(~pl.any_horizontal(cond)) +# datetime +future_limit = current_datetime + datetime.timedelta( + days=(365 * NUMBER_YEARS_UPPER_BOUND_DATES) +) +past_limit = datetime.datetime(1990, 1, 1) +cond = (pl.col(pl.Datetime) > future_limit).fill_null(False) | ( + pl.col(pl.Datetime) < past_limit +).fill_null(False) +filtered_data = pl.concat([filtered_data, data.filter(pl.any_horizontal(cond))]) +data = data.filter(~pl.any_horizontal(cond)) + +print(f"Size of dataset after cleansing: {data.height}") +print(f"Filtered data: {filtered_data}") +# %% +test = pl.DataFrame( + { + "t1": [0, 1, 3], + "t2": [1, None, 3], + "t3": [3, 8, None], + } +) +test + + +# %% +columns = ["t1", "t2", "t3"] +conds = [pl.col(col).is_null() for col in columns] +test.filter(pl.any_horizontal(*conds)) + +# %% +most_occurrences = ( + data.group_by(["PA", "PA Pos", "Konfektionär"]) + .agg(pl.len().alias("count")) + .sort("count", descending=True) +) +most_occurrences +# %% +most_occurrences.filter(~pl.col("Konfektionär").str.contains("May Tekstil Camcesme")) +# %% +# data = data.filter( +# ((pl.col.PA == 15372) & (pl.col("PA Pos") == 10)) +# | ((pl.col.PA == 16856) & (pl.col("PA Pos") == 10)) +# ).sort("PSM gemeldet am", descending=False) +data = data.filter((pl.col.PA == 15372) & (pl.col("PA Pos") == 10)).sort( + "PSM gemeldet am", descending=False +) +data.select(pl.col.PA.unique()) +# %% +# // simulate time series +# this is a sequence how data would be provided: first one entry, and then more additional entries +series: list[pl.DataFrame] = [] + +for i in range(data.height): + series.append(data[: (i + 1)]) + +assert len(series) == data.height + +for idx, entry in enumerate(series, start=1): + assert idx == entry.height + +# %% +# 1. cleanup obtained new data +# ~~2. load data from internal database~~ +# ~~3. integrate with with new data (whole snapshot)~~ +# 2. process on order level +# 3. save results to internal database +# 4. post-process results +# 5. write to external database + +# // (1) cleanup obtained new data +# load data from internal database +# integrate with with new data (whole snapshot) + + +# // (2) processing order level +tmp = series[3] +tmp +# %% +df = external_code.process_order_level(tmp) +df + +# %% +# // (3) save results to internal database +external_code.dump_order_level_to_internal_database_wipe(df) +# %% +# now load data from database +df = external_code.load_order_level_from_internal_database() +df + +# %% diff --git a/prototypes/02_save_to_db.py b/prototypes/02_save_to_db.py deleted file mode 100644 index ba8c6ff..0000000 --- a/prototypes/02_save_to_db.py +++ /dev/null @@ -1,27 +0,0 @@ -# %% -import datetime -import json -from typing import Any - -# %% -dt = datetime.datetime.now() -date = dt.date() - -# %% -val = [dt, date] -json.dumps(val) - - -# %% -def _parse_to_json(value: Any) -> str: - if isinstance(value, datetime.date): - return value.isoformat() - elif isinstance(value, datetime.datetime): - return value.isoformat() - else: - raise TypeError - - -# %% -json.dumps(val, default=_parse_to_json) -# %% diff --git a/prototypes/external_code.py b/prototypes/external_code.py new file mode 100644 index 0000000..5c32996 --- /dev/null +++ b/prototypes/external_code.py @@ -0,0 +1,411 @@ +import dataclasses as dc +import datetime +import enum +import json +from typing import Any, Final + +import polars as pl +import sqlalchemy as sql + +from wattanalyse import db + +# 1. cleanup obtained new data +# ~~2. load data from internal database~~ +# ~~3. integrate with with new data (whole snapshot)~~ +# 2. process on order level +# 3. save results to internal database +# 4. post-process results +# 5. write to external database + + +@dc.dataclass(slots=True, eq=False) +class PreProcessResult: + data: pl.DataFrame + filtered: pl.DataFrame + + +class QualityPsm(enum.StrEnum): + FEHLEND = enum.auto() + UNPLAUSIBEL = enum.auto() + PLAUSIBEL = enum.auto() + + +PSM_SCORES: dict[QualityPsm, int] = { + QualityPsm.FEHLEND: 1, + QualityPsm.UNPLAUSIBEL: 0, + QualityPsm.PLAUSIBEL: 2, +} + +RENAMING_SCHEME: dict[str, str] = { + "PA Pos": "PA_Pos", + "PSM gemeldet am": "Meldezeitpunkt_Historie", + "Import Ist": "Import-Ist_Historie", + "1.bestät. Import Konfektionär": "Bestaetigter-Import_Historie", + "Zuschnitt am": "Prod-Start_Historie", + "Teile in Zuschnitt": "Prod-EP10_Historie", + "Teile im Nähband": "Prod-EP20_Historie", + "Fertigware aus Nähband": "Prod-EP30_Historie", + "Teile kontrolliert": "Prod-EP40_Historie", + "Teile verpackt in Karton": "Prod-EP50_Historie", +} + +PRIM_KEYS: Final[list[str]] = ["PA", "PA_Pos"] + +LOWER_BOUND_DATE_DEVIATION: Final[int] = 0 +UPPER_BOUND_DATE_DEVIATION: Final[int] = 0 +NUMBER_YEARS_UPPER_BOUND_DATES: Final[int] = 4 + + +# // (1) preprocess +def preprocess_psm( + data: pl.DataFrame, +) -> PreProcessResult: + data = data.rename(RENAMING_SCHEME) + REGEX_PATTERN = r"^[\s\-#+/$]+$" + data = data.with_columns( + pl.when(pl.col(pl.String).str.contains(REGEX_PATTERN)) + .then(None) + .otherwise(pl.col(pl.String)) + .name.keep() + ) + data = data.with_columns(pl.col("Konfektionär").str.strip_chars(" \n\t")) + filtered_data = pl.DataFrame(schema=data.schema) + + # any NULL values in critical columns + NOT_NULL_COLS = ("PA", "PA_Pos", "Meldezeitpunkt_Historie") + conds = [pl.col(col).is_null() for col in NOT_NULL_COLS] + filtered_data = pl.concat([filtered_data, data.filter(pl.any_horizontal(*conds))]) + data = data.filter(~pl.any_horizontal(*conds)) + + # implausible dates + # dates not allowed to be in the future + current_datetime = datetime.datetime.now() + current_date = current_datetime.date() + NOT_IN_FUTURE_COLS_DATETIME = ("Meldezeitpunkt_Historie",) + NOT_IN_FUTURE_COLS_DATE = ("Wareneingang am", "Prod-Start_Historie") + conds = [ + (pl.col(col) > current_datetime).fill_null(False) + for col in NOT_IN_FUTURE_COLS_DATETIME + ] + conds.extend( + [(pl.col(col) > current_date).fill_null(False) for col in NOT_IN_FUTURE_COLS_DATE] + ) + filtered_data = pl.concat([filtered_data, data.filter(pl.any_horizontal(*conds))]) + data = data.filter(~pl.any_horizontal(*conds)) + + # too much in the future or the past + # dates + future_limit = current_date + datetime.timedelta( + days=(365 * NUMBER_YEARS_UPPER_BOUND_DATES) + ) + past_limit = datetime.date(1990, 1, 1) + cond = (pl.col(pl.Date) > future_limit).fill_null(False) | ( + pl.col(pl.Date) < past_limit + ).fill_null(False) + filtered_data = pl.concat([filtered_data, data.filter(pl.any_horizontal(cond))]) + data = data.filter(~pl.any_horizontal(cond)) + # datetimes + future_limit = current_datetime + datetime.timedelta( + days=(365 * NUMBER_YEARS_UPPER_BOUND_DATES) + ) + past_limit = datetime.datetime(1990, 1, 1) + cond = (pl.col(pl.Datetime) > future_limit).fill_null(False) | ( + pl.col(pl.Datetime) < past_limit + ).fill_null(False) + filtered_data = pl.concat([filtered_data, data.filter(pl.any_horizontal(cond))]) + data = data.filter(~pl.any_horizontal(cond)) + + return PreProcessResult(data=data, filtered=filtered_data) + + +# // (2) process on order level +def process_order_level(data: pl.DataFrame) -> pl.DataFrame: + # ** renaming + # data = data.rename(RENAMING_SCHEME) # TODO delete, done in pre-processing + data = data.sort(PRIM_KEYS + ["Meldezeitpunkt_Historie"], descending=False) + + # ** plausibility check of order quantities + PLAUSI_FEATURES: list[str] = [ + "Prod-EP10_Historie", + "Prod-EP20_Historie", + "Prod-EP30_Historie", + "Prod-EP40_Historie", + "Prod-EP50_Historie", + ] + data = data.with_columns( + pl.all_horizontal( + pl.col(PLAUSI_FEATURES).is_null() | (pl.col(PLAUSI_FEATURES) == 0) + ).alias("is_empty") + ) + conditions = [ + pl.col(PLAUSI_FEATURES[i]) >= pl.col(PLAUSI_FEATURES[i + 1]) + for i in range(len(PLAUSI_FEATURES) - 1) + ] + data = data.with_columns( + pl.when(pl.all_horizontal(conditions) | pl.col("is_empty")) + .then(pl.lit(True)) + .otherwise(pl.lit(False)) + .alias("Prod-Qty_is_valid") + ).with_columns( + pl.when(pl.col("is_empty")) + .then(pl.lit(PSM_SCORES[QualityPsm.FEHLEND])) + .when(pl.col("Prod-Qty_is_valid")) + .then(pl.lit(PSM_SCORES[QualityPsm.PLAUSIBEL])) + .otherwise(pl.lit(PSM_SCORES[QualityPsm.UNPLAUSIBEL])) + .alias("Prod-Qualitaet_Historie") + ) + # aggregate hint for "Prod-Qualitaet_Durchschnitt": use "drop_nulls" "last" + # aggregate "Prod-Qualitaet_Historie" and use "mean" + # need additional "alias" on "Prod-Qualitaet_Historie" + + # ** planned or target delivery date + current_date = datetime.datetime.now().date() + print(f"{current_date=}") + data = data.with_columns( + pl.coalesce(["Bestaetigter-Import_Historie", "Import-Ist_Historie"]).alias( + "Liefertermin_Soll" + ) + ) + # aggregate hint for "Liefertermin_Soll": use "drop_nulls" "first" + # first filled field for "Liefertermin Soll" is the relevant target date + # should be first confirmed date, but if this field is not filled we use the first + # filled import by the supplier + + # ** actual delivery date + # logic of Wattana: set date is before current date --> becomes actual value + data = data.with_columns( + pl.when(pl.col("Import-Ist_Historie") < current_date) + .then(pl.col("Import-Ist_Historie")) + .otherwise(None) + .alias("Liefertermin_Ist") + ) + # aggregate hint for "Liefertermin_Ist": use "drop_nulls" "last" + # keep last because that is the latest value set by the supplier + # if all values are NULL then NULL is returned (no actual date available) + + # ** duration since last report in days + data = data.sort(PRIM_KEYS + ["Meldezeitpunkt_Historie"], descending=False).with_columns( + ( + pl.col("Meldezeitpunkt_Historie") + - pl.col("Meldezeitpunkt_Historie").shift(1).over(PRIM_KEYS) + ) + .dt.total_days() + .alias("Tage_zu_letzter_PSM_Historie") + ) + # aggregate hint for "Tage_zu_letzter_PSM_Durchschnitt" + # aggregate "Tage_zu_letzter_PSM_Historie" and use "mean" (NULL is ignored automatically) + # need additional "alias" on "Tage_zu_letzter_PSM_Historie" + + data = data.sort(PRIM_KEYS + ["Meldezeitpunkt_Historie"], descending=False).with_columns( + # Prüfen: Ist das aktuelle Datum ungleich dem vorherigen Datum derselben Position? + ( + pl.col("Import-Ist_Historie") + != pl.col("Import-Ist_Historie").shift(1).over(PRIM_KEYS) + ) + .fill_null(False) # Der allererste Eintrag hat keinen Vorgänger -> Ist keine Änderung + .alias("Import-Ist_geaendert") + ) + # aggregate hint for "Import-Ist_geaendert" + # aggregate "Import-Ist_geaendert" and use "last" + + # aggregate hint for "Import-Ist_letzter_Wert" + # aggregate "Import-Ist_Historie" and use "drop_nulls" "last" + # need additional "alias" on "Import-Ist_Historie" + + # aggregate hint for "Import-Ist_Anzahl_Aenderungen" + # aggregate "Import-Ist_geaendert" and use "sum" + # need additional "alias" on "Import-Ist_geaendert" + + # aggregate hint for "Prod-Start" + # aggregate "Prod-Start_Historie" and use "drop_nulls" "first" + # first entry should be treated as the truth value, changing later does not make sense + # need additional "alias" on "Prod-Start_Historie" + + # whole aggregates see DB schema + data = ( + data.sort(PRIM_KEYS + ["Meldezeitpunkt_Historie"], descending=False) + .group_by(PRIM_KEYS + ["Konfektionär"]) + .agg( + pl.col("Meldezeitpunkt_Historie"), + pl.col("Liefertermin_Soll").drop_nulls().first(), + pl.col("Bestaetigter-Import_Historie"), + pl.col("Liefertermin_Ist").drop_nulls().last(), + pl.col("Import-Ist_Historie"), + pl.col("Import-Ist_Historie") + .drop_nulls() + .last() + .alias("Import-Ist_letzter_Wert"), + pl.col("Import-Ist_geaendert").last(), + pl.col("Import-Ist_geaendert").sum().alias("Import-Ist_Anzahl_Aenderungen"), + pl.col("Tage_zu_letzter_PSM_Historie"), + pl.col("Tage_zu_letzter_PSM_Historie") + .mean() + .alias("Tage_zu_letzter_PSM_Durchschnitt"), + pl.col("Prod-EP10_Historie"), + pl.col("Prod-EP20_Historie"), + pl.col("Prod-EP30_Historie"), + pl.col("Prod-EP40_Historie"), + pl.col("Prod-EP50_Historie"), + pl.col("Prod-Qualitaet_Historie"), + pl.col("Prod-Qualitaet_Historie").mean().alias("Prod-Qualitaet_Durchschnitt"), + pl.col("Prod-Start_Historie"), + pl.col("Prod-Start_Historie").drop_nulls().first().alias("Prod-Start"), + ) + ) + # ** order specific aggregates + data = ( + data.with_columns( + pl.when( + (pl.col("Liefertermin_Ist").is_not_null()) + & (pl.col("Liefertermin_Soll").is_not_null()) + ) + .then((pl.col("Liefertermin_Ist") - pl.col("Liefertermin_Soll")).dt.total_days()) + .otherwise(None) + .alias("Terminabweichung_Anzahl_Tage") + ) + .with_columns( + pl.when(pl.col("Terminabweichung_Anzahl_Tage") < LOWER_BOUND_DATE_DEVIATION) + .then(pl.lit(True)) + .otherwise(pl.lit(False)) + .alias("Terminunterschreitung"), + pl.when(pl.col("Terminabweichung_Anzahl_Tage") > UPPER_BOUND_DATE_DEVIATION) + .then(pl.lit(True)) + .otherwise(pl.lit(False)) + .alias("Terminüberschreitung"), + pl.when( + (pl.col("Liefertermin_Ist").is_not_null()) + & (pl.col("Prod-Start").is_not_null()) + ) + .then((pl.col("Liefertermin_Ist") - pl.col("Prod-Start")).dt.total_days()) + .otherwise(None) + .alias("Durchlaufzeit_Anzahl_Tage"), + ) + .with_columns( + pl.when( + (pl.col("Durchlaufzeit_Anzahl_Tage").is_not_null()) + & (pl.col("Durchlaufzeit_Anzahl_Tage") < 0) + ) + .then(None) + .otherwise(pl.col("Durchlaufzeit_Anzahl_Tage")) + .alias("Durchlaufzeit_Anzahl_Tage") + ) + ) + + return data + + +# // (3) dump order level to internal database +def _json_default( + value: Any, +) -> str: + if isinstance(value, (datetime.date, datetime.datetime)): + return value.isoformat() + raise TypeError + + +def _parse_to_json( + x: pl.Series | None, +) -> str | None: + if x is None: + return None + + return json.dumps(x.to_list(), default=_json_default) + + +def dump_order_level_to_internal_database_staging( + data: pl.DataFrame, +) -> None: + + staging_data = data.with_columns( + pl.col(pl.List) + .map_elements( + _parse_to_json, + return_dtype=pl.String, + ) + .name.keep() + ) + rows_inserted = staging_data.write_database( + "Produktionsauftrag-Einzelsicht_Staging", + connection=db.DB_URI, + engine="adbc", + if_table_exists="replace", + ) + if rows_inserted != staging_data.height: + raise RuntimeError("Number of inserted rows and length of staging data do not match.") + + all_columns = staging_data.columns + update_columns = [col for col in all_columns if col not in PRIM_KEYS] + + sql_column_list_str = ", ".join([f'"{c}"' for c in all_columns]) + sql_pk_list_str = ", ".join([f'"{c}"' for c in PRIM_KEYS]) + sql_update_rules_str = ", ".join([f'"{c}" = EXCLUDED."{c}"' for c in update_columns]) + + upsert_sql = f""" + INSERT INTO "Produktionsauftrag-Einzelsicht" ({sql_column_list_str}) + SELECT {sql_column_list_str} FROM "Produktionsauftrag-Einzelsicht_Staging" WHERE 1=1 + ON CONFLICT({sql_pk_list_str}) DO UPDATE SET + {sql_update_rules_str}; + """ + + with db.ENGINE_INTERNAL.begin() as conn: + conn.execute(sql.text(upsert_sql)) + conn.execute( + sql.text('DROP TABLE IF EXISTS "Produktionsauftrag-Einzelsicht_Staging";') + ) + + +def dump_order_level_to_internal_database_wipe( + data: pl.DataFrame, +) -> None: + + staging_data = data.with_columns( + pl.col(pl.List) + .map_elements( + _parse_to_json, + return_dtype=pl.String, + ) + .name.keep() + ) + # empty table + with db.ENGINE_INTERNAL.begin() as conn: + conn.execute(sql.text('DELETE FROM "Produktionsauftrag-Einzelsicht";')) + + rows_inserted = staging_data.write_database( + "Produktionsauftrag-Einzelsicht", + connection=db.DB_URI, + engine="adbc", + if_table_exists="append", + ) + if rows_inserted != staging_data.height: + raise RuntimeError("Number of inserted rows and length of staging data do not match.") + + +# ** load order level data from internal database +def load_order_level_from_internal_database() -> pl.DataFrame: + data = pl.read_database_uri( + 'SELECT * FROM "Produktionsauftrag-Einzelsicht"', + uri=db.DB_URI, + engine="adbc", + schema_overrides=db.intern_prod_order_t_schema, + ) + + list_cols_to_type: dict[str, type[pl.DataType]] = { + "Meldezeitpunkt_Historie": pl.Datetime, + "Bestaetigter-Import_Historie": pl.Date, + "Import-Ist_Historie": pl.Date, + "Tage_zu_letzter_PSM_Historie": pl.Int64, + "Prod-EP10_Historie": pl.UInt64, + "Prod-EP20_Historie": pl.UInt64, + "Prod-EP30_Historie": pl.UInt64, + "Prod-EP40_Historie": pl.UInt64, + "Prod-EP50_Historie": pl.UInt64, + "Prod-Qualitaet_Historie": pl.Int32, + "Prod-Start_Historie": pl.Date, + } + + list_col_parse_conds = { + col: pl.col(col).str.json_decode(pl.List(list_type)) + for col, list_type in list_cols_to_type.items() + } + + return data.with_columns(**list_col_parse_conds) diff --git a/src/wattanalyse/db.py b/src/wattanalyse/db.py index aa27640..ddb0215 100644 --- a/src/wattanalyse/db.py +++ b/src/wattanalyse/db.py @@ -1,3 +1,6 @@ +from typing import Final + +import polars as pl import sqlalchemy as sql from sqlalchemy import Column, Table @@ -7,7 +10,8 @@ assert constants.Config.DB_PATH_INTERNAL.parent.exists(), ( "database parent folder does not exists" ) -ENGINE = sql.create_engine(f"sqlite:///{constants.Config.DB_PATH_INTERNAL}") +DB_URI: Final[str] = f"sqlite:///{constants.Config.DB_PATH_INTERNAL}" +ENGINE_INTERNAL: Final[sql.Engine] = sql.create_engine(DB_URI) MD_INTERNAL = sql.MetaData() @@ -43,4 +47,65 @@ intern_prod_order_t: Table = Table( Column("Durchlaufzeit_Anzahl_Tage", sql.Float, nullable=True), ) -MD_INTERNAL.create_all(ENGINE) +intern_prod_order_t_schema: dict[str, type[pl.DataType]] = { + "PA": pl.UInt64, + "PA_Pos": pl.UInt32, + "Konfektionär": pl.String, + "Meldezeitpunkt_Historie": pl.String, + "Liefertermin_Soll": pl.Date, + "Bestaetigter-Import_Historie": pl.String, + "Liefertermin_Ist": pl.Date, + "Import-Ist_Historie": pl.String, + "Import-Ist_letzter_Wert": pl.Date, + "Import-Ist_geaendert": pl.Boolean, + "Import-Ist_Anzahl_Aenderungen": pl.UInt32, + "Tage_zu_letzter_PSM_Historie": pl.String, + "Tage_zu_letzter_PSM_Durchschnitt": pl.Float64, + "Prod-EP10_Historie": pl.String, + "Prod-EP20_Historie": pl.String, + "Prod-EP30_Historie": pl.String, + "Prod-EP40_Historie": pl.String, + "Prod-EP50_Historie": pl.String, + "Prod-Qualitaet_Historie": pl.String, + "Prod-Qualitaet_Durchschnitt": pl.Float64, + "Prod-Start_Historie": pl.String, + "Prod-Start": pl.Date, + "Terminabweichung_Anzahl_Tage": pl.Int64, + "Terminunterschreitung": pl.Boolean, + "Terminüberschreitung": pl.Boolean, + "Durchlaufzeit_Anzahl_Tage": pl.Int64, +} + + +MD_INTERNAL.create_all(ENGINE_INTERNAL) + +extern_prod_order_t_schema: dict[str, type[pl.DataType]] = { + "VK Auftrag": pl.UInt32, + "Artikelbez.": pl.String, + "Auftragsmenge": pl.UInt32, + "Kunde": pl.String, + "PA": pl.UInt64, + "PA Pos": pl.UInt32, + "PSM gemeldet am": pl.Datetime, + "Konfektionär": pl.String, + "Artikelnr.": pl.String, + "LT Kunde bestätigt": pl.Date, + "Export Ist": pl.Date, + "1.bestät. Import Konfektionär": pl.Date, + "Import Ist": pl.Date, + "Ablief.(Import Ist+Transport)": pl.Date, + "Wareneingang am": pl.Date, + "Wareneingang geprüft": pl.String, + "Täglicher Ausstoss": pl.Int64, + "Zuschnitt am": pl.Date, + "Teile in Zuschnitt": pl.UInt64, + "Teile im Nähband": pl.UInt64, + "Fertigware aus Nähband": pl.UInt64, + "Teile kontrolliert": pl.UInt64, + "Teile verpackt in Karton": pl.UInt64, + "Anzahl Bänder": pl.UInt16, + "Anzahl Näher": pl.UInt16, + "Arbeitsstunden pro Näher": pl.UInt8, + "Anzahl Arbeitstage pro Woche": pl.UInt8, + "Blockauftrag": pl.String, +} -- 2.34.1 From 53df924bcb56f34e2e38642633d71b9fab82ec3a Mon Sep 17 00:00:00 2001 From: foefl Date: Fri, 5 Jun 2026 17:35:01 +0200 Subject: [PATCH 11/48] successful saving/loading of production order aggregate table --- prototypes/02_integrate_wokflow.py | 198 ++++++++++++++--------------- prototypes/external_code.py | 107 +++++++++++----- src/wattanalyse/db.py | 4 +- 3 files changed, 175 insertions(+), 134 deletions(-) diff --git a/prototypes/02_integrate_wokflow.py b/prototypes/02_integrate_wokflow.py index bc2892c..038e812 100644 --- a/prototypes/02_integrate_wokflow.py +++ b/prototypes/02_integrate_wokflow.py @@ -11,7 +11,6 @@ from wattanalyse import db importlib.reload(db) importlib.reload(external_code) - # %% PROJECT_BASE = Path(__file__).parents[1] DATA_PTH = PROJECT_BASE / "data" @@ -20,121 +19,44 @@ assert DATA_PTH.exists() # %% # // load data target = DATA_PTH / "PSM_20260507.arrow" -data_raw = pl.read_ipc(target) - - +data_raw = pl.scan_ipc(target) # %% # // preprocessing I -res = external_code.preprocess_psm(data_raw) +# res = external_code.preprocess_psm(data_raw) +# data = res.data -# %% -res.filtered -# %% -data = data_raw.rename(external_code.RENAMING_SCHEME) -REGEX_PATTERN = r"^[\s\-#+/$]+$" -data = data.with_columns( - pl.when(pl.col(pl.String).str.contains(REGEX_PATTERN)) - .then(None) - .otherwise(pl.col(pl.String)) - .name.keep() -) -data = data.with_columns(pl.col("Konfektionär").str.strip_chars(" \n\t")) -print(f"Size of dataset before cleansing: {data.height}") -filtered_data = pl.DataFrame(schema=data.schema) -# %% -# data.filter(pl.col.Meldezeitpunkt_Historie.is_null()) -# %% -# any NULL values in critical columns -NOT_NULL_COLS = ("PA", "PA_Pos", "Meldezeitpunkt_Historie") -conds = [pl.col(col).is_null() for col in NOT_NULL_COLS] -filtered_data = pl.concat([filtered_data, data.filter(pl.any_horizontal(*conds))]) -data = data.filter(~pl.any_horizontal(*conds)) - -# implausible dates -# dates not allowed to be in the future -current_datetime = datetime.datetime.now() -current_date = current_datetime.date() -NOT_IN_FUTURE_COLS_DATETIME = ("Meldezeitpunkt_Historie",) -NOT_IN_FUTURE_COLS_DATE = ("Wareneingang am", "Prod-Start_Historie") -conds = [ - (pl.col(col) > current_datetime).fill_null(False) for col in NOT_IN_FUTURE_COLS_DATETIME -] - -conds.extend( - [(pl.col(col) > current_date).fill_null(False) for col in NOT_IN_FUTURE_COLS_DATE] -) -filtered_data = pl.concat([filtered_data, data.filter(pl.any_horizontal(*conds))]) -data = data.filter(~pl.any_horizontal(*conds)) - -# too much in the future or the past -NUMBER_YEARS_UPPER_BOUND_DATES = 4 -# dates -future_limit = current_date + datetime.timedelta(days=(365 * NUMBER_YEARS_UPPER_BOUND_DATES)) -past_limit = datetime.date(1990, 1, 1) -cond = (pl.col(pl.Date) > future_limit).fill_null(False) | ( - pl.col(pl.Date) < past_limit -).fill_null(False) -filtered_data = pl.concat([filtered_data, data.filter(pl.any_horizontal(cond))]) -data = data.filter(~pl.any_horizontal(cond)) -# datetime -future_limit = current_datetime + datetime.timedelta( - days=(365 * NUMBER_YEARS_UPPER_BOUND_DATES) -) -past_limit = datetime.datetime(1990, 1, 1) -cond = (pl.col(pl.Datetime) > future_limit).fill_null(False) | ( - pl.col(pl.Datetime) < past_limit -).fill_null(False) -filtered_data = pl.concat([filtered_data, data.filter(pl.any_horizontal(cond))]) -data = data.filter(~pl.any_horizontal(cond)) - -print(f"Size of dataset after cleansing: {data.height}") -print(f"Filtered data: {filtered_data}") -# %% -test = pl.DataFrame( - { - "t1": [0, 1, 3], - "t2": [1, None, 3], - "t3": [3, 8, None], - } -) -test - - -# %% -columns = ["t1", "t2", "t3"] -conds = [pl.col(col).is_null() for col in columns] -test.filter(pl.any_horizontal(*conds)) # %% most_occurrences = ( - data.group_by(["PA", "PA Pos", "Konfektionär"]) + data.group_by(["PA", "PA_Pos", "Konfektionär"]) .agg(pl.len().alias("count")) .sort("count", descending=True) ) -most_occurrences -# %% -most_occurrences.filter(~pl.col("Konfektionär").str.contains("May Tekstil Camcesme")) +print(most_occurrences.collect()) +most_occurrences.filter( + ~pl.col("Konfektionär").str.contains("May Tekstil Camcesme") +).collect() # %% # data = data.filter( # ((pl.col.PA == 15372) & (pl.col("PA Pos") == 10)) # | ((pl.col.PA == 16856) & (pl.col("PA Pos") == 10)) # ).sort("PSM gemeldet am", descending=False) -data = data.filter((pl.col.PA == 15372) & (pl.col("PA Pos") == 10)).sort( - "PSM gemeldet am", descending=False -) -data.select(pl.col.PA.unique()) +# data = data.filter((pl.col.PA == 15372) & (pl.col("PA Pos") == 10)).sort( +# "PSM gemeldet am", descending=False +# ) +# data.select(pl.col.PA.unique()) # %% # // simulate time series # this is a sequence how data would be provided: first one entry, and then more additional entries -series: list[pl.DataFrame] = [] +# series: list[pl.DataFrame] = [] -for i in range(data.height): - series.append(data[: (i + 1)]) +# for i in range(data.height): +# series.append(data[: (i + 1)]) -assert len(series) == data.height +# assert len(series) == data.height -for idx, entry in enumerate(series, start=1): - assert idx == entry.height +# for idx, entry in enumerate(series, start=1): +# assert idx == entry.height # %% # 1. cleanup obtained new data @@ -148,14 +70,17 @@ for idx, entry in enumerate(series, start=1): # // (1) cleanup obtained new data # load data from internal database # integrate with with new data (whole snapshot) +res = external_code.preprocess_psm(data_raw) +data = res.data +print(f"Data:\n{data.collect()}\n\n---\n\nFiltered:\n{res.filtered}") -# // (2) processing order level -tmp = series[3] -tmp # %% -df = external_code.process_order_level(tmp) -df +# // (2) processing order level +df = external_code.process_order_level(data) + + +# TODO What is if "Konfektionär" is NULL? # %% # // (3) save results to internal database @@ -165,4 +90,75 @@ external_code.dump_order_level_to_internal_database_wipe(df) df = external_code.load_order_level_from_internal_database() df +############################################# +# %% +# handle "Liefertermin_Soll" nulls +df.filter(pl.col("Liefertermin_Soll").is_null()).collect() +# %% +df.head().collect() + +# %% +data_raw.filter(pl.col.PA == 18759) + +# %% +data_raw.filter(pl.col.PA == 16626).collect() +# %% +data_raw.filter(pl.all().is_duplicated()) +# %% +test = data_raw.collect() + +# %% +all_cols = test.columns +test = test.with_row_index("tmp_idx") + +# %% +all_uni = test.unique(subset=all_cols, keep="first") +# %% +sub_uni = test.unique(subset=["PA", "PA Pos", "PSM gemeldet am"], keep="first") +# %% +all_uni.join(sub_uni, on="tmp_idx", how="anti") +# %% +all_uni.height +# %% +sub_uni.height +# %% +tmp = test.filter(pl.col.PA == 17055).sort("PSM gemeldet am") +# %% +tmp.height +# %% +tmp_11 = tmp.unique(subset=["PA", "PA Pos", "PSM gemeldet am"], keep="first") +# %% +tmp_12 = tmp.unique(subset=all_cols, keep="first") +# %% +# tmp.select(all_cols).is_duplicated() +# %% +tmp.filter(tmp.is_unique()) +# %% +tmp_12.join(tmp_11, on="tmp_idx", how="anti") +# %% +test.filter( + (pl.col.PA == 17055) + & (pl.col("PSM gemeldet am") == datetime.datetime(2024, 10, 29, 10, 27)) +) +# after fix should be the entry with the most information (least null count) +# %% +t1 = data_raw.collect() +t1.head() +# %% +t1 = t1.with_columns(pl.sum_horizontal(pl.all().is_null()).alias("null_count")) + +t1.head() +# %% +check = data.collect() +check.filter( + (pl.col.PA == 17055) + & (pl.col("Meldezeitpunkt_Historie") == datetime.datetime(2024, 10, 29, 10, 27)) +) + +# %% +res.filtered.filter( + (pl.col.PA == 17055) + & (pl.col("Meldezeitpunkt_Historie") == datetime.datetime(2024, 10, 29, 10, 27)) +) + # %% diff --git a/prototypes/external_code.py b/prototypes/external_code.py index 5c32996..43187e1 100644 --- a/prototypes/external_code.py +++ b/prototypes/external_code.py @@ -20,7 +20,7 @@ from wattanalyse import db @dc.dataclass(slots=True, eq=False) class PreProcessResult: - data: pl.DataFrame + data: pl.LazyFrame filtered: pl.DataFrame @@ -58,7 +58,7 @@ NUMBER_YEARS_UPPER_BOUND_DATES: Final[int] = 4 # // (1) preprocess def preprocess_psm( - data: pl.DataFrame, + data: pl.LazyFrame, ) -> PreProcessResult: data = data.rename(RENAMING_SCHEME) REGEX_PATTERN = r"^[\s\-#+/$]+$" @@ -69,7 +69,23 @@ def preprocess_psm( .name.keep() ) data = data.with_columns(pl.col("Konfektionär").str.strip_chars(" \n\t")) - filtered_data = pl.DataFrame(schema=data.schema) + filtered_data = pl.LazyFrame(schema=data.collect_schema()) + + # drop duplicates + # use null count as information measure, least amount of nulls should be contained + base_columns = data.columns + data = data.with_columns(pl.sum_horizontal(pl.all().is_null()).alias("null_count")) + data = data.sort(PRIM_KEYS + ["Meldezeitpunkt_Historie", "null_count"], descending=False) + filtered_data = pl.concat( + [ + filtered_data, + data.filter( + ~pl.struct(PRIM_KEYS + ["Meldezeitpunkt_Historie"]).is_first_distinct() + ).select(base_columns), + ] + ) + data = data.filter(pl.struct(PRIM_KEYS + ["Meldezeitpunkt_Historie"]).is_first_distinct()) + data = data.drop("null_count") # any NULL values in critical columns NOT_NULL_COLS = ("PA", "PA_Pos", "Meldezeitpunkt_Historie") @@ -115,11 +131,13 @@ def preprocess_psm( filtered_data = pl.concat([filtered_data, data.filter(pl.any_horizontal(cond))]) data = data.filter(~pl.any_horizontal(cond)) - return PreProcessResult(data=data, filtered=filtered_data) + return PreProcessResult(data=data, filtered=filtered_data.collect()) # // (2) process on order level -def process_order_level(data: pl.DataFrame) -> pl.DataFrame: +def process_order_level( + data: pl.LazyFrame, +) -> pl.LazyFrame: # ** renaming # data = data.rename(RENAMING_SCHEME) # TODO delete, done in pre-processing data = data.sort(PRIM_KEYS + ["Meldezeitpunkt_Historie"], descending=False) @@ -255,42 +273,67 @@ def process_order_level(data: pl.DataFrame) -> pl.DataFrame: # ** order specific aggregates data = ( data.with_columns( - pl.when( - (pl.col("Liefertermin_Ist").is_not_null()) - & (pl.col("Liefertermin_Soll").is_not_null()) - ) - .then((pl.col("Liefertermin_Ist") - pl.col("Liefertermin_Soll")).dt.total_days()) - .otherwise(None) + (pl.col("Liefertermin_Ist") - pl.col("Liefertermin_Soll")) + .dt.total_days() .alias("Terminabweichung_Anzahl_Tage") ) .with_columns( - pl.when(pl.col("Terminabweichung_Anzahl_Tage") < LOWER_BOUND_DATE_DEVIATION) - .then(pl.lit(True)) - .otherwise(pl.lit(False)) - .alias("Terminunterschreitung"), - pl.when(pl.col("Terminabweichung_Anzahl_Tage") > UPPER_BOUND_DATE_DEVIATION) - .then(pl.lit(True)) - .otherwise(pl.lit(False)) - .alias("Terminüberschreitung"), - pl.when( - (pl.col("Liefertermin_Ist").is_not_null()) - & (pl.col("Prod-Start").is_not_null()) - ) - .then((pl.col("Liefertermin_Ist") - pl.col("Prod-Start")).dt.total_days()) - .otherwise(None) + (pl.col("Terminabweichung_Anzahl_Tage") < LOWER_BOUND_DATE_DEVIATION).alias( + "Terminunterschreitung" + ), + (pl.col("Terminabweichung_Anzahl_Tage") > UPPER_BOUND_DATE_DEVIATION).alias( + "Terminüberschreitung" + ), + (pl.col("Liefertermin_Ist") - pl.col("Prod-Start")) + .dt.total_days() .alias("Durchlaufzeit_Anzahl_Tage"), ) .with_columns( - pl.when( - (pl.col("Durchlaufzeit_Anzahl_Tage").is_not_null()) - & (pl.col("Durchlaufzeit_Anzahl_Tage") < 0) - ) + pl.when(pl.col("Durchlaufzeit_Anzahl_Tage") < 0) .then(None) .otherwise(pl.col("Durchlaufzeit_Anzahl_Tage")) .alias("Durchlaufzeit_Anzahl_Tage") ) ) + # data = ( + # data.with_columns( + # pl.when( + # (pl.col("Liefertermin_Ist").is_not_null()) + # & (pl.col("Liefertermin_Soll").is_not_null()) + # ) + # .then((pl.col("Liefertermin_Ist") - pl.col("Liefertermin_Soll")).dt.total_days()) + # .otherwise(None) + # .alias("Terminabweichung_Anzahl_Tage") + # ) + # .with_columns( + # pl.when(pl.col("Terminabweichung_Anzahl_Tage") < LOWER_BOUND_DATE_DEVIATION) + # .then(pl.lit(True)) + # .otherwise(pl.lit(False)) + # .alias("Terminunterschreitung"), + # pl.when(pl.col("Terminabweichung_Anzahl_Tage") > UPPER_BOUND_DATE_DEVIATION) + # .then(pl.lit(True)) + # .otherwise(pl.lit(False)) + # .alias("Terminüberschreitung"), + # pl.when( + # (pl.col("Liefertermin_Ist").is_not_null()) + # & (pl.col("Prod-Start").is_not_null()) + # ) + # .then((pl.col("Liefertermin_Ist") - pl.col("Prod-Start")).dt.total_days()) + # .otherwise(None) + # .alias("Durchlaufzeit_Anzahl_Tage"), + # ) + # .with_columns( + # pl.when( + # (pl.col("Durchlaufzeit_Anzahl_Tage").is_not_null()) + # & (pl.col("Durchlaufzeit_Anzahl_Tage") < 0) + # ) + # .then(None) + # .otherwise(pl.col("Durchlaufzeit_Anzahl_Tage")) + # .alias("Durchlaufzeit_Anzahl_Tage") + # ) + # ) + return data @@ -313,7 +356,7 @@ def _parse_to_json( def dump_order_level_to_internal_database_staging( - data: pl.DataFrame, + data: pl.LazyFrame, ) -> None: staging_data = data.with_columns( @@ -324,6 +367,7 @@ def dump_order_level_to_internal_database_staging( ) .name.keep() ) + staging_data = staging_data.collect() rows_inserted = staging_data.write_database( "Produktionsauftrag-Einzelsicht_Staging", connection=db.DB_URI, @@ -355,7 +399,7 @@ def dump_order_level_to_internal_database_staging( def dump_order_level_to_internal_database_wipe( - data: pl.DataFrame, + data: pl.LazyFrame, ) -> None: staging_data = data.with_columns( @@ -370,6 +414,7 @@ def dump_order_level_to_internal_database_wipe( with db.ENGINE_INTERNAL.begin() as conn: conn.execute(sql.text('DELETE FROM "Produktionsauftrag-Einzelsicht";')) + staging_data = staging_data.collect() rows_inserted = staging_data.write_database( "Produktionsauftrag-Einzelsicht", connection=db.DB_URI, diff --git a/src/wattanalyse/db.py b/src/wattanalyse/db.py index ddb0215..e2b7fd3 100644 --- a/src/wattanalyse/db.py +++ b/src/wattanalyse/db.py @@ -21,9 +21,9 @@ intern_prod_order_t: Table = Table( MD_INTERNAL, Column("PA", sql.Integer, primary_key=True), Column("PA_Pos", sql.Integer, primary_key=True), - Column("Konfektionär", sql.Text, nullable=False), + Column("Konfektionär", sql.Text, nullable=True), Column("Meldezeitpunkt_Historie", sql.Text, nullable=False), - Column("Liefertermin_Soll", sql.Date, nullable=False), + Column("Liefertermin_Soll", sql.Date, nullable=True), Column("Bestaetigter-Import_Historie", sql.Text, nullable=False), Column("Liefertermin_Ist", sql.Date, nullable=True), Column("Import-Ist_Historie", sql.Text, nullable=False), -- 2.34.1 From 56b88adac36c0369a37e9339a46b54a9e176b9f5 Mon Sep 17 00:00:00 2001 From: foefl Date: Mon, 8 Jun 2026 07:44:07 +0200 Subject: [PATCH 12/48] add OracleDB lib --- pdm.lock | 114 +++++++++++++++++++++++++++++++++++++++++++++++-- pyproject.toml | 2 +- 2 files changed, 112 insertions(+), 4 deletions(-) diff --git a/pdm.lock b/pdm.lock index cc8b8e4..ead0185 100644 --- a/pdm.lock +++ b/pdm.lock @@ -5,7 +5,7 @@ groups = ["default", "dev", "lint", "nb", "tests"] strategy = ["inherit_metadata"] lock_version = "4.5.0" -content_hash = "sha256:8f138c1407dc86bdf19aa5a6ce42cb158c9b9963fbb8cf7f4c85f453799f5a10" +content_hash = "sha256:22683d7fc93e640ef1e22b402539ab0bd4a326d6b35aa301423b71ca3488e08c" [[metadata.targets]] requires_python = ">=3.11" @@ -332,7 +332,7 @@ name = "cffi" version = "2.0.0" requires_python = ">=3.9" summary = "Foreign Function Interface for Python calling C code." -groups = ["nb"] +groups = ["default", "nb"] dependencies = [ "pycparser; implementation_name != \"PyPy\"", ] @@ -748,6 +748,68 @@ files = [ {file = "coverage-7.14.1.tar.gz", hash = "sha256:30c08f7d90415aa98b3c990385dea2939b0da55f38515e5b369b83655f8523be"}, ] +[[package]] +name = "cryptography" +version = "48.0.0" +requires_python = "!=3.9.0,!=3.9.1,>=3.9" +summary = "cryptography is a package which provides cryptographic recipes and primitives to Python developers." +groups = ["default"] +dependencies = [ + "cffi>=2.0.0; platform_python_implementation != \"PyPy\"", + "typing-extensions>=4.13.2; python_full_version < \"3.11\"", +] +files = [ + {file = "cryptography-48.0.0-cp311-abi3-macosx_10_9_universal2.whl", hash = "sha256:0c558d2cdffd8f4bbb30fc7134c74d2ca9a476f830bb053074498fbc86f41ed6"}, + {file = "cryptography-48.0.0-cp311-abi3-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:f5333311663ea94f75dd408665686aaf426563556bb5283554a3539177e03b8c"}, + {file = "cryptography-48.0.0-cp311-abi3-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:7995ef305d7165c3f11ae07f2517e5a4f1d5c18da1376a0a9ed496336b69e5f3"}, + {file = "cryptography-48.0.0-cp311-abi3-manylinux_2_28_aarch64.whl", hash = "sha256:40ba1f85eaa6959837b1d51c9767e230e14612eea4ef110ee8854ada22da1bf5"}, + {file = "cryptography-48.0.0-cp311-abi3-manylinux_2_28_ppc64le.whl", hash = "sha256:369a6348999f94bbd53435c894377b20ab95f25a9065c283570e70150d8abc3c"}, + {file = "cryptography-48.0.0-cp311-abi3-manylinux_2_28_x86_64.whl", hash = "sha256:a0e692c683f4df67815a2d258b324e66f4738bd7a96a218c826dce4f4bd05d8f"}, + {file = "cryptography-48.0.0-cp311-abi3-manylinux_2_31_armv7l.whl", hash = "sha256:18349bbc56f4743c8b12dc32e2bccb2cf83ee8b69a3bba74ef8ae857e26b3d25"}, + {file = "cryptography-48.0.0-cp311-abi3-manylinux_2_34_aarch64.whl", hash = "sha256:7e8eac43dfca5c4cccc6dad9a80504436fca53bb9bc3100a2386d730fbe6b602"}, + {file = "cryptography-48.0.0-cp311-abi3-manylinux_2_34_ppc64le.whl", hash = "sha256:9ccdac7d40688ecb5a3b4a604b8a88c8002e3442d6c60aead1db2a89a041560c"}, + {file = "cryptography-48.0.0-cp311-abi3-manylinux_2_34_x86_64.whl", hash = "sha256:bd72e68b06bb1e96913f97dd4901119bc17f39d4586a5adf2d3e47bc2b9d58b5"}, + {file = "cryptography-48.0.0-cp311-abi3-musllinux_1_2_aarch64.whl", hash = "sha256:59baa2cb386c4f0b9905bd6eb4c2a79a69a128408fd31d32ca4d7102d4156321"}, + {file = "cryptography-48.0.0-cp311-abi3-musllinux_1_2_x86_64.whl", hash = "sha256:9249e3cd978541d665967ac2cb2787fd6a62bddf1e75b3e347a594d7dacf4f74"}, + {file = "cryptography-48.0.0-cp311-abi3-win32.whl", hash = "sha256:9c459db21422be75e2809370b829a87eb37f74cd785fc4aa9ea1e5f43b47cda4"}, + {file = "cryptography-48.0.0-cp311-abi3-win_amd64.whl", hash = "sha256:5b012212e08b8dd5edc78ef54da83dd9892fd9105323b3993eff6bea65dc21d7"}, + {file = "cryptography-48.0.0-cp314-cp314t-macosx_10_9_universal2.whl", hash = "sha256:3cb07a3ed6431663cd321ea8a000a1314c74211f823e4177fefa2255e057d1ec"}, + {file = "cryptography-48.0.0-cp314-cp314t-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:8c7378637d7d88016fa6791c159f698b3d3eed28ebf844ac36b9dc04a14dae18"}, + {file = "cryptography-48.0.0-cp314-cp314t-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:cc90c0b39b2e3c65ef52c804b72e3c58f8a04ab2a1871272798e5f9572c17d20"}, + {file = "cryptography-48.0.0-cp314-cp314t-manylinux_2_28_aarch64.whl", hash = "sha256:76341972e1eff8b4bea859f09c0d3e64b96ce931b084f9b9b7db8ef364c30eff"}, + {file = "cryptography-48.0.0-cp314-cp314t-manylinux_2_28_ppc64le.whl", hash = "sha256:55b7718303bf06a5753dcdccf2f3945cf18ad7bffde41b61226e4db31ab89a9c"}, + {file = "cryptography-48.0.0-cp314-cp314t-manylinux_2_28_x86_64.whl", hash = "sha256:a64697c641c7b1b2178e573cbc31c7c6684cd56883a478d75143dbb7118036db"}, + {file = "cryptography-48.0.0-cp314-cp314t-manylinux_2_31_armv7l.whl", hash = "sha256:561215ea3879cb1cbbf272867e2efda62476f240fb58c64de6b393ae19246741"}, + {file = "cryptography-48.0.0-cp314-cp314t-manylinux_2_34_aarch64.whl", hash = "sha256:ad64688338ed4bc1a6618076ba75fd7194a5f1797ac60b47afe926285adb3166"}, + {file = "cryptography-48.0.0-cp314-cp314t-manylinux_2_34_ppc64le.whl", hash = "sha256:906cbf0670286c6e0044156bc7d4af9cbb0ef6db9f73e52c3ec56ba6bdde5336"}, + {file = "cryptography-48.0.0-cp314-cp314t-manylinux_2_34_x86_64.whl", hash = "sha256:ea8990436d914540a40ab24b6a77c0969695ed52f4a4874c5137ccf7045a7057"}, + {file = "cryptography-48.0.0-cp314-cp314t-musllinux_1_2_aarch64.whl", hash = "sha256:c18684a7f0cc9a3cb60328f496b8e3372def7c5d2df39ac267878b05565aaaae"}, + {file = "cryptography-48.0.0-cp314-cp314t-musllinux_1_2_x86_64.whl", hash = "sha256:9be5aafa5736574f8f15f262adc81b2a9869e2cfe9014d52a44633905b40d52c"}, + {file = "cryptography-48.0.0-cp314-cp314t-win32.whl", hash = "sha256:c17dfe85494deaeddc5ce251aebd1d60bbe6afc8b62071bb0b469431a000124f"}, + {file = "cryptography-48.0.0-cp314-cp314t-win_amd64.whl", hash = "sha256:27241b1dc9962e056062a8eef1991d02c3a24569c95975bd2322a8a52c6e5e12"}, + {file = "cryptography-48.0.0-cp39-abi3-macosx_10_9_universal2.whl", hash = "sha256:58d00498e8933e4a194f3076aee1b4a97dfec1a6da444535755822fe5d8b0b86"}, + {file = "cryptography-48.0.0-cp39-abi3-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:614d0949f4790582d2cc25553abd09dd723025f0c0e7c67376a1d77196743d6e"}, + {file = "cryptography-48.0.0-cp39-abi3-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:7ce4bfae76319a532a2dc68f82cc32f5676ee792a983187dac07183690e5c66f"}, + {file = "cryptography-48.0.0-cp39-abi3-manylinux_2_28_aarch64.whl", hash = "sha256:2eb992bbd4661238c5a397594c83f5b4dc2bc5b848c365c8f991b6780efcc5c7"}, + {file = "cryptography-48.0.0-cp39-abi3-manylinux_2_28_ppc64le.whl", hash = "sha256:22a5cb272895dce158b2cacdfdc3debd299019659f42947dbdac6f32d68fe832"}, + {file = "cryptography-48.0.0-cp39-abi3-manylinux_2_28_x86_64.whl", hash = "sha256:2b4d59804e8408e2fea7d1fbaf218e5ec984325221db76e6a241a9abd6cdd95c"}, + {file = "cryptography-48.0.0-cp39-abi3-manylinux_2_31_armv7l.whl", hash = "sha256:984a20b0f62a26f48a3396c72e4bc34c66e356d356bf370053066b3b6d54634a"}, + {file = "cryptography-48.0.0-cp39-abi3-manylinux_2_34_aarch64.whl", hash = "sha256:5a5ed8fde7a1d09376ca0b40e68cd59c69fe23b1f9768bd5824f54681626032a"}, + {file = "cryptography-48.0.0-cp39-abi3-manylinux_2_34_ppc64le.whl", hash = "sha256:8cd666227ef7af430aa5914a9910e0ddd703e75f039cef0825cd0da71b6b711a"}, + {file = "cryptography-48.0.0-cp39-abi3-manylinux_2_34_x86_64.whl", hash = "sha256:9071196d81abc88b3516ac8cdfad32e2b66dd4a5393a8e68a961e9161ddc6239"}, + {file = "cryptography-48.0.0-cp39-abi3-musllinux_1_2_aarch64.whl", hash = "sha256:1e2d54c8be6152856a36f0882ab231e70f8ec7f14e93cf87db8a2ed056bf160c"}, + {file = "cryptography-48.0.0-cp39-abi3-musllinux_1_2_x86_64.whl", hash = "sha256:a5da777e32ffed6f85a7b2b3f7c5cbc88c146bfcd0a1d7baf5fcc6c52ee35dd4"}, + {file = "cryptography-48.0.0-cp39-abi3-win32.whl", hash = "sha256:77a2ccbbe917f6710e05ba9adaa25fb5075620bf3ea6fb751997875aff4ae4bd"}, + {file = "cryptography-48.0.0-cp39-abi3-win_amd64.whl", hash = "sha256:16cd65b9330583e4619939b3a3843eec1e6e789744bb01e7c7e2e62e33c239c8"}, + {file = "cryptography-48.0.0-pp311-pypy311_pp73-macosx_11_0_arm64.whl", hash = "sha256:84cf79f0dc8b36ac5da873481716e87aef31fcfa0444f9e1d8b4b2cece142855"}, + {file = "cryptography-48.0.0-pp311-pypy311_pp73-manylinux_2_28_aarch64.whl", hash = "sha256:fdfef35d751d510fcef5252703621574364fec16418c4a1e5e1055248401054b"}, + {file = "cryptography-48.0.0-pp311-pypy311_pp73-manylinux_2_28_x86_64.whl", hash = "sha256:0890f502ddf7d9c6426129c3f49f5c0a39278ed7cd6322c8755ffca6ee675a13"}, + {file = "cryptography-48.0.0-pp311-pypy311_pp73-manylinux_2_34_aarch64.whl", hash = "sha256:ecde28a596bead48b0cfd2a1b4416c3d43074c2d785e3a398d7ec1fc4d0f7fbb"}, + {file = "cryptography-48.0.0-pp311-pypy311_pp73-manylinux_2_34_x86_64.whl", hash = "sha256:4defde8685ae324a9eb9d818717e93b4638ef67070ac9bc15b8ca85f63048355"}, + {file = "cryptography-48.0.0-pp311-pypy311_pp73-win_amd64.whl", hash = "sha256:db63bf618e5dea46c07de12e900fe1cdd2541e6dc9dbae772a70b7d4d4765f6a"}, + {file = "cryptography-48.0.0.tar.gz", hash = "sha256:5c3932f4436d1cccb036cb0eaef46e6e2db91035166f1ad6505c3c9d5a635920"}, +] + [[package]] name = "debugpy" version = "1.8.21" @@ -1727,6 +1789,52 @@ files = [ {file = "nox-2026.4.10.tar.gz", hash = "sha256:2d0af5374f3f37a295428c927d1b04a8182aa01762897d172446dda2f1ce9692"}, ] +[[package]] +name = "oracledb" +version = "4.0.1" +requires_python = ">=3.9" +summary = "Python interface to Oracle Database" +groups = ["default"] +dependencies = [ + "cryptography>=3.2.1", + "typing-extensions>=4.14.0", +] +files = [ + {file = "oracledb-4.0.1-cp311-cp311-macosx_10_9_universal2.whl", hash = "sha256:86a06d0afb3bb3a24bace0e72fb9abca2093efe0fa3457c65c13ba4eb5000b0b"}, + {file = "oracledb-4.0.1-cp311-cp311-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:416b324cd7715073cf5f3d577330387ffd59741463995c25bdc2d82b3e80b88e"}, + {file = "oracledb-4.0.1-cp311-cp311-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:ce6319ee01dcbb4d74f0e2a5794c6a566f339958ecac9830c67c7070521620e2"}, + {file = "oracledb-4.0.1-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:873fcca53306e2b3b445a7d657cddc19e415a7aa7e392c473dfd1a3ae3970989"}, + {file = "oracledb-4.0.1-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:b09eec35681d72c9476e6d715b89bb775724a31e7363df6beba7470494ea8040"}, + {file = "oracledb-4.0.1-cp311-cp311-win32.whl", hash = "sha256:08e84a6af1b6e5921dba088dd9fc0738927206eafe5ce9763c34195f87556849"}, + {file = "oracledb-4.0.1-cp311-cp311-win_amd64.whl", hash = "sha256:3b5ef1676a27b7e0a7ec55be27fd8f6d28d1601f5e8dfdae78705909f25b7c0a"}, + {file = "oracledb-4.0.1-cp311-cp311-win_arm64.whl", hash = "sha256:90586b3c7729b9cf3d40df902e81257f01e15e3408d8b6b9dbf91e939b64f72c"}, + {file = "oracledb-4.0.1-cp312-cp312-macosx_10_13_universal2.whl", hash = "sha256:c05a01d6ad610a88c2aa1a43b1dc0a8485f5fbd4374d2b36908859d4205de192"}, + {file = "oracledb-4.0.1-cp312-cp312-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:cf61e42b9ef723dbdd0b23032b695e872009ed7341003df59d9a97cd960df977"}, + {file = "oracledb-4.0.1-cp312-cp312-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:c2d394453f669858bec942ff0da18b6ebade296ece823d582ad2b464ed5c6c90"}, + {file = "oracledb-4.0.1-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:d7cd278d59780e22e0a7451d208460756d779dc62b55bdbd95652f9640fbf8c3"}, + {file = "oracledb-4.0.1-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:b73820521eccd290506af94e1ffb9a8a5941b4018e3861df9b040652a7cef123"}, + {file = "oracledb-4.0.1-cp312-cp312-win32.whl", hash = "sha256:8fcad6d9628923281bf21e48a391ac2f87ec6950dc63381d8fea470e3128aef0"}, + {file = "oracledb-4.0.1-cp312-cp312-win_amd64.whl", hash = "sha256:523b3356cde9d588ba250cefafdfc34869233d65c179f805ea6e4d3d6b209a7f"}, + {file = "oracledb-4.0.1-cp312-cp312-win_arm64.whl", hash = "sha256:10204432f0eea8707a79c75bdccb84071e43fd19c658cb3b34d1746b12c6e7fe"}, + {file = "oracledb-4.0.1-cp313-cp313-macosx_10_13_universal2.whl", hash = "sha256:443b2f03461e873ccd73dff3d8541fcf974c05e13e296a6687ffbb0c4a72c0a1"}, + {file = "oracledb-4.0.1-cp313-cp313-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:ae894ca2705929eb0ac228329336fd03388ad6e3b54002be6f5d4400a8feaf52"}, + {file = "oracledb-4.0.1-cp313-cp313-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:4b42725337f80d433a3bd2928c08667e5b89da9ce05cf9ae3a4189c4fc4805ea"}, + {file = "oracledb-4.0.1-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:8e13ff1e6f28fdb863180d23fa94cb42c619c29d2981e24992431e51b97caa54"}, + {file = "oracledb-4.0.1-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:e36581bb10e719d928dad12018c2d42606db2c34f49d6665b06f701f049255f0"}, + {file = "oracledb-4.0.1-cp313-cp313-win32.whl", hash = "sha256:86ac65cbc8d29626b1d9d203f9151566c26a78e55bdfc030c06169ae8017f458"}, + {file = "oracledb-4.0.1-cp313-cp313-win_amd64.whl", hash = "sha256:a029dcee759bca56a8c95e952040c3d3f57e5ec05965355293b21930a66967fb"}, + {file = "oracledb-4.0.1-cp313-cp313-win_arm64.whl", hash = "sha256:20a10f903c8da59e9689a98bd68012f78fa19bed950ad9f19cd8f5b8b97e73a0"}, + {file = "oracledb-4.0.1-cp314-cp314-macosx_10_15_universal2.whl", hash = "sha256:cb7727f93ff962ab826bc3d0bca4b0e5bf45ecb7c525551c70c9e094f0f27027"}, + {file = "oracledb-4.0.1-cp314-cp314-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:032ca4f558b05f03fa1bef1b04e59ec350ae0b22e6d85c47f4ac62ae98315823"}, + {file = "oracledb-4.0.1-cp314-cp314-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:7156ef112a901967b3ee89b6c582bafc5a3082c47ca566de1a79e9ac3b48da32"}, + {file = "oracledb-4.0.1-cp314-cp314-musllinux_1_2_aarch64.whl", hash = "sha256:8159c5bd8f25b0ca0ce30f21e7a732a2bdfb4adb81b9c8ea1ca75339d8ec8398"}, + {file = "oracledb-4.0.1-cp314-cp314-musllinux_1_2_x86_64.whl", hash = "sha256:e4926e699a42c526137724960fa4303ecb0b542186b11d3705ac84414a896508"}, + {file = "oracledb-4.0.1-cp314-cp314-win32.whl", hash = "sha256:b05bfadbfe462c39cc97258a973972f5bbbc9f8e2e9a4c2e0efcb1ec86b91088"}, + {file = "oracledb-4.0.1-cp314-cp314-win_amd64.whl", hash = "sha256:0ece951553c106a0896c8e1690bcdf69d472761fa65fec9b8152cbce13ab8b81"}, + {file = "oracledb-4.0.1-cp314-cp314-win_arm64.whl", hash = "sha256:0d3c6ed987df64b914ece0722692419fe494d07f15bb4d7715adeada4f914c3a"}, + {file = "oracledb-4.0.1.tar.gz", hash = "sha256:34bbea44423ed8b24093aa859ca7ee9b6e76ea490f9acdc5f6ff01aa1083e343"}, +] + [[package]] name = "overrides" version = "7.7.0" @@ -1991,7 +2099,7 @@ name = "pycparser" version = "3.0" requires_python = ">=3.10" summary = "C parser in Python" -groups = ["nb"] +groups = ["default", "nb"] marker = "implementation_name != \"PyPy\"" files = [ {file = "pycparser-3.0-py3-none-any.whl", hash = "sha256:b727414169a36b7d524c1c3e31839a521725078d7b2ff038656844266160a992"}, diff --git a/pyproject.toml b/pyproject.toml index 9978a19..957c081 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -5,7 +5,7 @@ description = "analysis of production state messages obtained from customers" authors = [ {name = "d-opt GmbH, resp. Florian Förster", email = "f.foerster@d-opt.com"}, ] -dependencies = ["polars>=1.41.2", "sqlalchemy[asyncio]>=2.0.50", "python-dotenv>=1.2.2", "dopt-basics>=0.2.6", "adbc-driver-sqlite>=1.11.0", "pyarrow>=24.0.0"] +dependencies = ["polars>=1.41.2", "sqlalchemy[asyncio]>=2.0.50", "python-dotenv>=1.2.2", "dopt-basics>=0.2.6", "adbc-driver-sqlite>=1.11.0", "pyarrow>=24.0.0", "oracledb>=4.0.1"] requires-python = ">=3.11" readme = "README.md" license = {text = "LicenseRef-Proprietary"} -- 2.34.1 From c0cb16a8935c2ed28fee02687e06909998ecea05 Mon Sep 17 00:00:00 2001 From: foefl Date: Mon, 8 Jun 2026 07:48:09 +0200 Subject: [PATCH 13/48] drop instead of select --- prototypes/external_code.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/prototypes/external_code.py b/prototypes/external_code.py index 43187e1..7e7d201 100644 --- a/prototypes/external_code.py +++ b/prototypes/external_code.py @@ -73,7 +73,6 @@ def preprocess_psm( # drop duplicates # use null count as information measure, least amount of nulls should be contained - base_columns = data.columns data = data.with_columns(pl.sum_horizontal(pl.all().is_null()).alias("null_count")) data = data.sort(PRIM_KEYS + ["Meldezeitpunkt_Historie", "null_count"], descending=False) filtered_data = pl.concat( @@ -81,7 +80,7 @@ def preprocess_psm( filtered_data, data.filter( ~pl.struct(PRIM_KEYS + ["Meldezeitpunkt_Historie"]).is_first_distinct() - ).select(base_columns), + ).drop("null_count"), ] ) data = data.filter(pl.struct(PRIM_KEYS + ["Meldezeitpunkt_Historie"]).is_first_distinct()) -- 2.34.1 From 0ac2689b68f925fdc19217ea511de1e75f9eabf6 Mon Sep 17 00:00:00 2001 From: foefl Date: Mon, 8 Jun 2026 09:18:09 +0200 Subject: [PATCH 14/48] aggregates for production orders --- ...e_wokflow.py => 02-1_integrate_wokflow.py} | 0 prototypes/02-2_aggregates.py | 114 ++++++++++++++++++ 2 files changed, 114 insertions(+) rename prototypes/{02_integrate_wokflow.py => 02-1_integrate_wokflow.py} (100%) create mode 100644 prototypes/02-2_aggregates.py diff --git a/prototypes/02_integrate_wokflow.py b/prototypes/02-1_integrate_wokflow.py similarity index 100% rename from prototypes/02_integrate_wokflow.py rename to prototypes/02-1_integrate_wokflow.py diff --git a/prototypes/02-2_aggregates.py b/prototypes/02-2_aggregates.py new file mode 100644 index 0000000..d419ef5 --- /dev/null +++ b/prototypes/02-2_aggregates.py @@ -0,0 +1,114 @@ +# %% +import datetime +import importlib +from pathlib import Path + +import external_code +import polars as pl +import sqlalchemy as sql + +from wattanalyse import db + +importlib.reload(db) +importlib.reload(external_code) +# %% +PROJECT_BASE = Path(__file__).parents[1] +DATA_PTH = PROJECT_BASE / "data" +assert DATA_PTH.exists() + +# %% +# // load data +target = DATA_PTH / "PSM_20260507.arrow" +data_raw = pl.scan_ipc(target) + +# %% +# 0. read data (from customer's database) +# 1. cleanup obtained new data +# ~~2. load data from internal database~~ +# ~~3. integrate with with new data (whole snapshot)~~ +# 2. process on order level +# 3. save results to internal database +# 4. post-process results +# 5. write to external database + +# // (1) cleanup obtained new data +# load data from internal database +# integrate with with new data (whole snapshot) +res = external_code.preprocess_psm(data_raw) +data = res.data + +print(f"Data:\n{data.collect()}\n\n---\n\nFiltered:\n{res.filtered}") + +# %% +# // (2) processing order level +df = external_code.process_order_level(data) + + +# ?? What is if "Konfektionär" is NULL? +# If this is NULL, then the aggregates for "Konfektionär" will not work. Instead, they are +# calculated for all NULL entries which might incorporate different production orders which +# belong to different "Konfektionär". Thus, these values will be calculated, but should not be +# considered. + +# %% +# // (3) save results to internal database +external_code.dump_order_level_to_internal_database_wipe(df) +# %% +# now load data from database +df = external_code.load_order_level_from_internal_database() +df +# %% +tmp = df.clone() + +# two ways to define the aggregate for date deviations: just use < 0 or use Boolean flag +# defined by the user-specified boundaries +USE_BOUNDARIES = False +filter_date_deviation_early: pl.Expr +filter_date_deviation_late: pl.Expr +if USE_BOUNDARIES: + filter_date_deviation_early = pl.col("Terminunterschreitung") + filter_date_deviation_late = pl.col("Terminüberschreitung") +else: + filter_date_deviation_early = pl.col("Terminabweichung_Anzahl_Tage") < 0 + filter_date_deviation_late = pl.col("Terminabweichung_Anzahl_Tage") > 0 + + +tmp.select( + pl.col("Terminabweichung_Anzahl_Tage") + .filter(filter_date_deviation_early) + .mean() + .abs() + .round(mode="half_away_from_zero") + .cast(pl.Int64) + .alias("Mittlere_Tage_Unterschreitung"), + pl.col("Terminabweichung_Anzahl_Tage") + .filter(filter_date_deviation_late) + .mean() + .abs() + .round(mode="half_away_from_zero") + .cast(pl.Int64) + .alias("Mittlere_Tage_Ueberschreitung"), + pl.col("Terminabweichung_Anzahl_Tage") + .std(ddof=1) + .alias("Standardabweichung_Lieferterminabweichung"), + pl.col("Import-Ist_Anzahl_Aenderungen") + .mean() + .abs() + .round(mode="half_away_from_zero") + .cast(pl.Int64) + .alias("Mittlere_Anzahl_Anpassungen_Liefertermin"), + pl.col("Tage_zu_letzter_PSM_Historie") + .list.explode() + .mean() + .abs() + .round(mode="half_away_from_zero") + .cast(pl.Int64) + .alias("Mittlere_Abstaende_PSM"), + pl.col("Durchlaufzeit_Anzahl_Tage") + .mean() + .round(mode="half_away_from_zero") + .cast(pl.Int64) + .alias("Mittlere_Durchlaufzeit"), +) + +# %% -- 2.34.1 From 6e76807298d491f25775ba7cd870b4496cbed506 Mon Sep 17 00:00:00 2001 From: foefl Date: Mon, 8 Jun 2026 15:40:48 +0200 Subject: [PATCH 15/48] add user-specified config --- config/wattana.toml | 6 ++++++ deployment/.env | 3 ++- src/wattanalyse/README.md | 1 + src/wattanalyse/constants.py | 10 ++++++++++ src/wattanalyse/types.py | 17 +++++++++++++++++ 5 files changed, 36 insertions(+), 1 deletion(-) create mode 100644 config/wattana.toml create mode 100644 src/wattanalyse/types.py diff --git a/config/wattana.toml b/config/wattana.toml new file mode 100644 index 0000000..2219c64 --- /dev/null +++ b/config/wattana.toml @@ -0,0 +1,6 @@ +[Datenbank] +NUTZER = "WATTANA" +PASSWORT = "MyWattanaPassword123" +HOST = "localhost" +PORT = 1521 +SERVICE_NAME = "FREEPDB1" \ No newline at end of file diff --git a/deployment/.env b/deployment/.env index 9c741ea..e426e48 100644 --- a/deployment/.env +++ b/deployment/.env @@ -1,3 +1,4 @@ DOPT_STOP_FOLDER_NAME=python DOPT_INTERNAL_DB=data/wattana.db -DOPT_PATH_LOGGING=data/logs \ No newline at end of file +DOPT_PATH_LOGGING=data/logs +DOPT_PATH_CONFIG=config/wattana.toml \ No newline at end of file diff --git a/src/wattanalyse/README.md b/src/wattanalyse/README.md index bf18895..e5ce939 100644 --- a/src/wattanalyse/README.md +++ b/src/wattanalyse/README.md @@ -4,3 +4,4 @@ - DOPT_STOP_FOLDER_NAME: stop folder to find base path - DOPT_INTERNAL_DB: path to internal database where results for further processing are saved, relative to base path - DOPT_PATH_LOGGING: path to logging folder, relative to base path +- DOPT_PATH_CONFIG: path to the config file which can be changed by the user/customer diff --git a/src/wattanalyse/constants.py b/src/wattanalyse/constants.py index 661c9a0..51a8712 100644 --- a/src/wattanalyse/constants.py +++ b/src/wattanalyse/constants.py @@ -4,8 +4,11 @@ import os from pathlib import Path from typing import Final +from dopt_basics import configs from dopt_basics import io as io_ +from wattanalyse import types as t + # PROJECT_ROOT = Path(__file__).resolve().parents[2] LIB_PATH: Final[Path] = Path(__file__).resolve().parent @@ -21,3 +24,10 @@ class Config: DB_PATH_INTERNAL: Path = BASE_PATH / os.getenv("DOPT_INTERNAL_DB", "not_existing") PATH_LOGGING: Path = BASE_PATH / os.getenv("DOPT_PATH_LOGGING", "data/d-opt.log") LOG_FILENAME: str = "dopt.log" + PTH_USER_CFG: Path = BASE_PATH / os.getenv("DOPT_PATH_CONFIG", "config/wattana.toml") + + +user_cfg = configs.load_toml(Config.PTH_USER_CFG) +USER_CFG: t.UserConfig = t.UserConfig( + Datenbank=t.UserConfig_Datenbank(**user_cfg["Datenbank"]) +) diff --git a/src/wattanalyse/types.py b/src/wattanalyse/types.py new file mode 100644 index 0000000..c518040 --- /dev/null +++ b/src/wattanalyse/types.py @@ -0,0 +1,17 @@ +from __future__ import annotations + +import dataclasses as dc + + +@dc.dataclass(kw_only=True, slots=True) +class UserConfig_Datenbank: + NUTZER: str + PASSWORT: str + HOST: str + PORT: int + SERVICE_NAME: str + + +@dc.dataclass(kw_only=True, slots=True) +class UserConfig: + Datenbank: UserConfig_Datenbank -- 2.34.1 From af91e05d97e8264eb128435237d373e905c25947 Mon Sep 17 00:00:00 2001 From: foefl Date: Mon, 8 Jun 2026 15:41:08 +0200 Subject: [PATCH 16/48] add aggregates for suppliers --- prototypes/02-2_aggregates.py | 177 ++++++++++++++++++++++++++++++++-- 1 file changed, 170 insertions(+), 7 deletions(-) diff --git a/prototypes/02-2_aggregates.py b/prototypes/02-2_aggregates.py index d419ef5..dccc4d3 100644 --- a/prototypes/02-2_aggregates.py +++ b/prototypes/02-2_aggregates.py @@ -9,6 +9,7 @@ import sqlalchemy as sql from wattanalyse import db +# %% importlib.reload(db) importlib.reload(external_code) # %% @@ -58,6 +59,7 @@ external_code.dump_order_level_to_internal_database_wipe(df) df = external_code.load_order_level_from_internal_database() df # %% +# ** aggregate production orders tmp = df.clone() # two ways to define the aggregate for date deviations: just use < 0 or use Boolean flag @@ -73,42 +75,203 @@ else: filter_date_deviation_late = pl.col("Terminabweichung_Anzahl_Tage") > 0 -tmp.select( +tmp = tmp.select( pl.col("Terminabweichung_Anzahl_Tage") .filter(filter_date_deviation_early) .mean() .abs() .round(mode="half_away_from_zero") .cast(pl.Int64) - .alias("Mittlere_Tage_Unterschreitung"), + .alias("MITTLERE_ANZAHL_TAGE_LIEFERTERMINUNTERSCHREITUNG"), pl.col("Terminabweichung_Anzahl_Tage") .filter(filter_date_deviation_late) .mean() .abs() .round(mode="half_away_from_zero") .cast(pl.Int64) - .alias("Mittlere_Tage_Ueberschreitung"), + .alias("MITTLERE_ANZAHL_TAGE_LIEFERTERMINUEBERSCHREITUNG"), pl.col("Terminabweichung_Anzahl_Tage") .std(ddof=1) - .alias("Standardabweichung_Lieferterminabweichung"), + .alias("STANDARDABWEICHUNG_TAGE_LIEFERTERMINABWEICHUNG"), pl.col("Import-Ist_Anzahl_Aenderungen") .mean() .abs() .round(mode="half_away_from_zero") .cast(pl.Int64) - .alias("Mittlere_Anzahl_Anpassungen_Liefertermin"), + .alias("MITTLERE_ANZAHL_ANPASSUNGEN_LIEFERTERMIN"), pl.col("Tage_zu_letzter_PSM_Historie") .list.explode() .mean() .abs() .round(mode="half_away_from_zero") .cast(pl.Int64) - .alias("Mittlere_Abstaende_PSM"), + .alias("MITTLERE_ABSTAENDE_ZWISCHEN_MELDUNGEN"), pl.col("Durchlaufzeit_Anzahl_Tage") .mean() .round(mode="half_away_from_zero") .cast(pl.Int64) - .alias("Mittlere_Durchlaufzeit"), + .alias("MITTLERE_DURCHLAUFZEIT_ANZAHL_TAGE"), +) +tmp +# %% +# to DB transform (mock Oracle database) +cols_sorted = ["ID", "AKTUALISIERT_AM"] + [c for c in tmp.columns] +tmp = ( + tmp.with_columns( + pl.lit(1).alias("ID"), + pl.lit(datetime.datetime.now()).alias("AKTUALISIERT_AM"), + ) + .select( + pl.col(pl.Boolean).cast(pl.Int8), + pl.all().exclude(pl.Boolean), + ) + .select(cols_sorted) +) +tmp + +# %% + +# return sql_delete, sql_insert + +print(f"SQL DELETE: {sql_delete}\nSQL Insert: {sql_insert}") +# %% +prepared_oracle_pth = DATA_PTH / "db/oracle_prepare_KPI_PRODUKTIONSAUFTRAEGE.arrow" +tmp.write_ipc(prepared_oracle_pth) +# %% +# ** aggregate supplier +tmp = df.clone() + +USE_BOUNDARIES = False +filter_date_deviation_early: pl.Expr +filter_date_deviation_late: pl.Expr +if USE_BOUNDARIES: + filter_date_deviation_early = pl.col("Terminunterschreitung") + filter_date_deviation_late = pl.col("Terminüberschreitung") +else: + filter_date_deviation_early = pl.col("Terminabweichung_Anzahl_Tage") < 0 + filter_date_deviation_late = pl.col("Terminabweichung_Anzahl_Tage") > 0 + +tmp = ( + tmp.group_by("Konfektionär") + .agg( + ( + ( + ~(filter_date_deviation_early | filter_date_deviation_late) + & (pl.col("Import-Ist_Anzahl_Aenderungen") == 0) + ).mean() + * 100 + ) + .round(4, mode="half_away_from_zero") + .alias("QUOTE_ERSTBESTAETIGUNG"), + ((~(filter_date_deviation_early | filter_date_deviation_late)).mean() * 100) + .round(4, mode="half_away_from_zero") + .alias("PROZENT_LIEFERTREUE"), + (filter_date_deviation_early.mean() * 100) + .round(4, mode="half_away_from_zero") + .alias("ANTEIL_PROZENT_LIEFERTERMINUNTERSCHREITUNG"), + (filter_date_deviation_late.mean() * 100) + .round(4, mode="half_away_from_zero") + .alias("ANTEIL_PROZENT_LIEFERTERMINUEBERSCHREITUNG"), + pl.col("Terminabweichung_Anzahl_Tage") + .filter(filter_date_deviation_early) + .mean() + .abs() + .round(mode="half_away_from_zero") + .cast(pl.Int64) + .alias("MITTLERE_ANZAHL_TAGE_LIEFERTERMINUNTERSCHREITUNG"), + pl.col("Terminabweichung_Anzahl_Tage") + .filter(filter_date_deviation_late) + .mean() + .abs() + .round(mode="half_away_from_zero") + .cast(pl.Int64) + .alias("MITTLERE_ANZAHL_TAGE_LIEFERTERMINUEBERSCHREITUNG"), + pl.col("Terminabweichung_Anzahl_Tage") + .std(ddof=1) + .alias("STANDARDABWEICHUNG_TAGE_LIEFERTERMINABWEICHUNG"), + pl.col("Import-Ist_Anzahl_Aenderungen") + .mean() + .abs() + .round(mode="half_away_from_zero") + .cast(pl.Int64) + .alias("MITTLERE_ANZAHL_ANPASSUNGEN_LIEFERTERMIN"), + pl.col("Tage_zu_letzter_PSM_Historie") + .list.explode() + .mean() + .abs() + .round(mode="half_away_from_zero") + .cast(pl.Int64) + .alias("MITTLERE_ABSTAENDE_ZWISCHEN_MELDUNGEN"), + pl.col("Durchlaufzeit_Anzahl_Tage") + .mean() + .round(mode="half_away_from_zero") + .cast(pl.Int64) + .alias("MITTLERE_DURCHLAUFZEIT_ANZAHL_TAGE"), + pl.col("Prod-Qualitaet_Historie") + .list.explode() + .mean() + .round(4, mode="half_away_from_zero") + .alias("MITTLERER_QUALITAETSSCORE_PSM"), + ) + .sort("Konfektionär") +) +tmp + +# %% +tmp = df.clone() +tmp.filter(pl.col.Konfektionär == "BS Make Ltd").filter( + ~(filter_date_deviation_early | filter_date_deviation_late) +).filter(pl.col("Import-Ist_Anzahl_Aenderungen") == 0) +# %% +tmp.filter(pl.col.Konfektionär == "BS Make Ltd") +# %% +tmp.head() +# %% +tmp.filter(pl.col.Konfektionär == "Siluet") + +# %% +tmp.select(pl.col.Konfektionär.str.len_chars().alias("len_char")).sort( + "len_char", descending=True ) # %% +# // whole pipeline +# ** aggregate production orders +tmp = df.clone() + +tmp = external_code.aggregate_production_orders(tmp.lazy()).collect() +print(tmp) +tmp = external_code.oracle_prepare_KPI_aggregate(tmp.lazy()).collect() +print(tmp) +prepared_oracle_pth = DATA_PTH / "db/oracle_prepare_KPI_PRODUKTIONSAUFTRAEGE.arrow" +tmp.write_ipc(prepared_oracle_pth) + +# %% +stmts = external_code.oracle_generate_sql_insert( + table_name="KPI_PRODUKTIONSAUFTRAEGE", columns=tmp.columns +) +print(f"SQL DELETE: {stmts.delete}\nSQL Insert: {stmts.insert}") + +# %% +# ** aggregate supplier +tmp = df.clone() +RENAME_SCHEME = {"Konfektionär": "KONFEKTIONAER"} +tmp = external_code.aggregate_suppliers(tmp.lazy()).collect() +print(tmp.head()) +tmp = external_code.oracle_prepare_KPI_aggregate( + tmp.lazy(), + rename_schema=RENAME_SCHEME, + sort_by="KONFEKTIONAER", + sort_descending=False, +).collect() +print(tmp.head()) +prepared_oracle_pth = DATA_PTH / "db/oracle_prepare_KPI_KONFEKTIONAERE.arrow" +tmp.write_ipc(prepared_oracle_pth) +# %% +stmts = external_code.oracle_generate_sql_insert( + table_name="KPI_KONFEKTIONAERE", columns=tmp.columns +) +print(f"SQL DELETE: {stmts.delete}\nSQL Insert: {stmts.insert}") +# %% +tmp +# %% -- 2.34.1 From 516712ca404a6b7aab30f2433c58b4a6ef639525 Mon Sep 17 00:00:00 2001 From: foefl Date: Mon, 8 Jun 2026 15:41:25 +0200 Subject: [PATCH 17/48] define schema for supplier aggregates --- src/wattanalyse/db.py | 41 +++++++++++++++++++++++++++++++++++++++++ 1 file changed, 41 insertions(+) diff --git a/src/wattanalyse/db.py b/src/wattanalyse/db.py index e2b7fd3..727265f 100644 --- a/src/wattanalyse/db.py +++ b/src/wattanalyse/db.py @@ -14,6 +14,7 @@ DB_URI: Final[str] = f"sqlite:///{constants.Config.DB_PATH_INTERNAL}" ENGINE_INTERNAL: Final[sql.Engine] = sql.create_engine(DB_URI) MD_INTERNAL = sql.MetaData() +MD_EXTERNAL = sql.MetaData() intern_prod_order_t: Table = Table( @@ -79,6 +80,9 @@ intern_prod_order_t_schema: dict[str, type[pl.DataType]] = { MD_INTERNAL.create_all(ENGINE_INTERNAL) +# // external database + +# ** read extern_prod_order_t_schema: dict[str, type[pl.DataType]] = { "VK Auftrag": pl.UInt32, "Artikelbez.": pl.String, @@ -109,3 +113,40 @@ extern_prod_order_t_schema: dict[str, type[pl.DataType]] = { "Anzahl Arbeitstage pro Woche": pl.UInt8, "Blockauftrag": pl.String, } + +# ** write +extern_results_prod_orders_t: Table = Table( + "KPI_PRODUKTIONSAUFTRAEGE", + MD_EXTERNAL, + Column("ID", sql.Integer, sql.CheckConstraint("ID = 1"), primary_key=True), + Column("AKTUALISIERT_AM", sql.DateTime, nullable=False), + Column("MITTLERE_ANZAHL_TAGE_LIEFERTERMINUNTERSCHREITUNG", sql.Integer, nullable=True), + Column("MITTLERE_ANZAHL_TAGE_LIEFERTERMINUEBERSCHREITUNG", sql.Integer, nullable=True), + Column( + "STANDARDABWEICHUNG_TAGE_LIEFERTERMINABWEICHUNG", sql.Numeric(10, 4), nullable=True + ), + Column("MITTLERE_ANZAHL_ANPASSUNGEN_LIEFERTERMIN", sql.Integer, nullable=True), + Column("MITTLERE_ABSTAENDE_ZWISCHEN_MELDUNGEN", sql.Integer, nullable=True), + Column("MITTLERE_DURCHLAUFZEIT_ANZAHL_TAGE", sql.Integer, nullable=True), +) + +extern_results_suppliers_t: Table = Table( + "KPI_KONFEKTIONAERE", + MD_EXTERNAL, + Column("ID", sql.Integer, primary_key=True), + Column("AKTUALISIERT_AM", sql.DateTime, nullable=False), + Column("KONFEKTIONAER", sql.String(200), nullable=True), + Column("QUOTE_ERSTBESTAETIGUNG", sql.Numeric(7, 4), nullable=True), + Column("PROZENT_LIEFERTREUE", sql.Numeric(7, 4), nullable=True), + Column("ANTEIL_PROZENT_LIEFERTERMINUNTERSCHREITUNG", sql.Numeric(7, 4), nullable=True), + Column("ANTEIL_PROZENT_LIEFERTERMINUEBERSCHREITUNG", sql.Numeric(7, 4), nullable=True), + Column("MITTLERE_ANZAHL_TAGE_LIEFERTERMINUNTERSCHREITUNG", sql.Integer, nullable=True), + Column("MITTLERE_ANZAHL_TAGE_LIEFERTERMINUEBERSCHREITUNG", sql.Integer, nullable=True), + Column( + "STANDARDABWEICHUNG_TAGE_LIEFERTERMINABWEICHUNG", sql.Numeric(10, 4), nullable=True + ), + Column("MITTLERE_ANZAHL_ANPASSUNGEN_LIEFERTERMIN", sql.Integer, nullable=True), + Column("MITTLERE_ABSTAENDE_ZWISCHEN_MELDUNGEN", sql.Integer, nullable=True), + Column("MITTLERE_DURCHLAUFZEIT_ANZAHL_TAGE", sql.Integer, nullable=True), + Column("MITTLERER_QUALITAETSSCORE_PSM", sql.Numeric(5, 4), nullable=True), +) -- 2.34.1 From 4ce5fce0d9eae7ce1507a3736f5b163a329ebe0f Mon Sep 17 00:00:00 2001 From: foefl Date: Mon, 8 Jun 2026 15:41:59 +0200 Subject: [PATCH 18/48] add Docker OracleDB for test purposes --- docker-compose.yml | 24 ++++++++++++++++++++++++ oracle/init-scripts/01_init.sql | 32 ++++++++++++++++++++++++++++++++ 2 files changed, 56 insertions(+) create mode 100644 docker-compose.yml create mode 100644 oracle/init-scripts/01_init.sql diff --git a/docker-compose.yml b/docker-compose.yml new file mode 100644 index 0000000..294a6c4 --- /dev/null +++ b/docker-compose.yml @@ -0,0 +1,24 @@ +# cleanup: docker compose down -v + +services: + oracle-db: + # "faststart" tag loads pre-configured DB + # starts in seconds, not minutes! + image: gvenzl/oracle-free:23-slim-faststart + container_name: oracle_dev_db + ports: + - "1521:1521" + environment: + # passwords for system admins (SYS, SYSTEM) + - ORACLE_PASSWORD=Master_Admin + # user/schema at first start + - APP_USER=WATTANA + - APP_USER_PASSWORD=MyWattanaPassword123 + volumes: + - oracle_data:/opt/oracle/oradata + # mounts local folder SQL initialisation scripts + - ./oracle/init-scripts:/container-entrypoint-startdb.d + restart: unless-stopped + +volumes: + oracle_data: \ No newline at end of file diff --git a/oracle/init-scripts/01_init.sql b/oracle/init-scripts/01_init.sql new file mode 100644 index 0000000..97739b9 --- /dev/null +++ b/oracle/init-scripts/01_init.sql @@ -0,0 +1,32 @@ +-- change to default generated pluggable database (PDB) +ALTER SESSION SET CONTAINER = FREEPDB1; + +-- create table directly in new user's schema +CREATE TABLE WATTANA.KPI_PRODUKTIONSAUFTRAEGE ( + ID NUMBER(1) PRIMARY KEY, + AKTUALISIERT_AM TIMESTAMP, + MITTLERE_ANZAHL_TAGE_LIEFERTERMINUNTERSCHREITUNG NUMBER(10), + MITTLERE_ANZAHL_TAGE_LIEFERTERMINUEBERSCHREITUNG NUMBER(10), + STANDARDABWEICHUNG_TAGE_LIEFERTERMINABWEICHUNG NUMBER(10,4), + MITTLERE_ANZAHL_ANPASSUNGEN_LIEFERTERMIN NUMBER(10), + MITTLERE_ABSTAENDE_ZWISCHEN_MELDUNGEN NUMBER(10), + MITTLERE_DURCHLAUFZEIT_ANZAHL_TAGE NUMBER(10), + CONSTRAINT CHK_SINGLE_ROW CHECK (ID = 1) +); + +CREATE TABLE WATTANA.KPI_KONFEKTIONAERE ( + ID NUMBER PRIMARY KEY, + AKTUALISIERT_AM TIMESTAMP, + KONFEKTIONAER VARCHAR2(200), + QUOTE_ERSTBESTAETIGUNG NUMBER(7,4), + PROZENT_LIEFERTREUE NUMBER(7,4), + ANTEIL_PROZENT_LIEFERTERMINUNTERSCHREITUNG NUMBER(7,4), + ANTEIL_PROZENT_LIEFERTERMINUEBERSCHREITUNG NUMBER(7,4), + MITTLERE_ANZAHL_TAGE_LIEFERTERMINUNTERSCHREITUNG NUMBER(10), + MITTLERE_ANZAHL_TAGE_LIEFERTERMINUEBERSCHREITUNG NUMBER(10), + STANDARDABWEICHUNG_TAGE_LIEFERTERMINABWEICHUNG NUMBER(10,4), + MITTLERE_ANZAHL_ANPASSUNGEN_LIEFERTERMIN NUMBER(10), + MITTLERE_ABSTAENDE_ZWISCHEN_MELDUNGEN NUMBER(10), + MITTLERE_DURCHLAUFZEIT_ANZAHL_TAGE NUMBER(10), + MITTLERER_QUALITAETSSCORE_PSM NUMBER(5,4) +); -- 2.34.1 From 34ba005ddef283fb7119cc359e3d2c7e0f2a990c Mon Sep 17 00:00:00 2001 From: foefl Date: Mon, 8 Jun 2026 15:42:26 +0200 Subject: [PATCH 19/48] enhanced pipeline functions --- prototypes/03-1_check_db.py | 119 +++++++++++++++++++++ prototypes/external_code.py | 204 +++++++++++++++++++++++++++++++++++- 2 files changed, 322 insertions(+), 1 deletion(-) create mode 100644 prototypes/03-1_check_db.py diff --git a/prototypes/03-1_check_db.py b/prototypes/03-1_check_db.py new file mode 100644 index 0000000..d15b9a5 --- /dev/null +++ b/prototypes/03-1_check_db.py @@ -0,0 +1,119 @@ +# %% +import importlib +from pathlib import Path + +import external_code +import oracledb +import polars as pl + +import wattanalyse +from wattanalyse import constants + +importlib.reload(wattanalyse) +importlib.reload(constants) +# %% +PROJECT_BASE = Path(__file__).parents[1] +DATA_PTH = PROJECT_BASE / "data" +assert DATA_PTH.exists() + +# %% +conn = oracledb.connect( + user=constants.USER_CFG.Datenbank.NUTZER, + password=constants.USER_CFG.Datenbank.PASSWORT, + host=constants.USER_CFG.Datenbank.HOST, + port=constants.USER_CFG.Datenbank.PORT, + service_name=constants.USER_CFG.Datenbank.SERVICE_NAME, +) + +# %% +# // KPI_PRODUKTIONSAUFTRAEGE +TABLE_NAME = "KPI_PRODUKTIONSAUFTRAEGE" +prepared_oracle_pth = DATA_PTH / f"db/oracle_prepare_{TABLE_NAME}.arrow" +assert prepared_oracle_pth.exists() +df = pl.read_ipc(prepared_oracle_pth) + +# %% +with conn.cursor() as cursor: + cursor.execute(f'SELECT * FROM "{TABLE_NAME}"') + data = cursor.fetchall() + columns = [desc[0] for desc in cursor.description] + +print("columns:", columns) +print("data:", data) +# %% +# ** insert +stmts = external_code.oracle_generate_sql_insert(TABLE_NAME, columns=df.columns) +print(f"SQL DELETE: {stmts.delete}\nSQL Insert: {stmts.insert}") + +with conn.cursor() as cursor: + cursor.execute(stmts.delete) + cursor.executemany(stmts.insert, df) + conn.commit() + +# %% +# ** read +stmt = f"SELECT * FROM {TABLE_NAME}" +odf = conn.fetch_df_all(statement=stmt) +loaded_df = pl.from_arrow(odf) +print(loaded_df) + +############# +# %% +# // +TABLE_NAME = "KPI_KONFEKTIONAERE" +prepared_oracle_pth = DATA_PTH / f"db/oracle_prepare_{TABLE_NAME}.arrow" +assert prepared_oracle_pth.exists() +df = pl.read_ipc(prepared_oracle_pth) + +# %% +with conn.cursor() as cursor: + cursor.execute(f'SELECT * FROM "{TABLE_NAME}"') + data = cursor.fetchall() + columns = [desc[0] for desc in cursor.description] + +print("columns:", columns) +print("data:", data) +# %% +# ** insert +stmts = external_code.oracle_generate_sql_insert(TABLE_NAME, columns=df.columns) +print(f"SQL DELETE: {stmts.delete}\nSQL Insert: {stmts.insert}") + +with conn.cursor() as cursor: + cursor.execute(stmts.delete) + cursor.executemany(stmts.insert, df) + conn.commit() + +# %% +# ** read +stmt = f"SELECT * FROM {TABLE_NAME}" +odf = conn.fetch_df_all(statement=stmt) +loaded_df = pl.from_arrow(odf) +print(loaded_df) + +# %% +df.height + +##################################### +# %% +columns = df.columns +spalten_str = ", ".join([f'"{c}"' for c in columns]) +platzhalter_str = ", ".join([f":{i}" for i in range(1, len(columns) + 1)]) + +table_name = "KPI_PRODUKTIONSAUFTRAEGE" +sql_delete = f'DELETE FROM "{table_name}"' +sql_insert = f'INSERT INTO "{table_name}" ({spalten_str}) VALUES ({platzhalter_str})' +print(f"SQL DELETE: {sql_delete}\nSQL Insert: {sql_insert}") +# %% +with conn.cursor() as cursor: + cursor.execute(sql_delete) + # df_oracle_bereit wird direkt als Arrow-Stream an Oracle übergeben! + cursor.executemany(sql_insert, df) + conn.commit() + +# %% +stmt = f"SELECT * FROM {table_name}" +odf = conn.fetch_df_all(statement=stmt) +pl_df = pl.from_arrow(odf) +# %% +pl_df +# %% diff --git a/prototypes/external_code.py b/prototypes/external_code.py index 7e7d201..da1f6dd 100644 --- a/prototypes/external_code.py +++ b/prototypes/external_code.py @@ -1,14 +1,19 @@ +from __future__ import annotations + import dataclasses as dc import datetime import enum import json -from typing import Any, Final +from typing import TYPE_CHECKING, Any, Final, cast import polars as pl import sqlalchemy as sql from wattanalyse import db +if TYPE_CHECKING: + from oracledb import Connection as OracleConnection + # 1. cleanup obtained new data # ~~2. load data from internal database~~ # ~~3. integrate with with new data (whole snapshot)~~ @@ -24,6 +29,12 @@ class PreProcessResult: filtered: pl.DataFrame +@dc.dataclass(slots=True, kw_only=True) +class SqlInsertStmts: + delete: str + insert: str + + class QualityPsm(enum.StrEnum): FEHLEND = enum.auto() UNPLAUSIBEL = enum.auto() @@ -453,3 +464,194 @@ def load_order_level_from_internal_database() -> pl.DataFrame: } return data.with_columns(**list_col_parse_conds) + + +# // (4) post-process results + +USE_BOUNDARIES: Final[bool] = False +filter_date_deviation_early: pl.Expr +filter_date_deviation_late: pl.Expr +if USE_BOUNDARIES: + filter_date_deviation_early = pl.col("Terminunterschreitung") + filter_date_deviation_late = pl.col("Terminüberschreitung") +else: + filter_date_deviation_early = pl.col("Terminabweichung_Anzahl_Tage") < 0 + filter_date_deviation_late = pl.col("Terminabweichung_Anzahl_Tage") > 0 + + +def aggregate_production_orders( + data: pl.LazyFrame, +) -> pl.LazyFrame: + data = data.select( + pl.col("Terminabweichung_Anzahl_Tage") + .filter(filter_date_deviation_early) + .mean() + .abs() + .round(mode="half_away_from_zero") + .cast(pl.Int64) + .alias("MITTLERE_ANZAHL_TAGE_LIEFERTERMINUNTERSCHREITUNG"), + pl.col("Terminabweichung_Anzahl_Tage") + .filter(filter_date_deviation_late) + .mean() + .abs() + .round(mode="half_away_from_zero") + .cast(pl.Int64) + .alias("MITTLERE_ANZAHL_TAGE_LIEFERTERMINUEBERSCHREITUNG"), + pl.col("Terminabweichung_Anzahl_Tage") + .std(ddof=1) + .alias("STANDARDABWEICHUNG_TAGE_LIEFERTERMINABWEICHUNG"), + pl.col("Import-Ist_Anzahl_Aenderungen") + .mean() + .abs() + .round(mode="half_away_from_zero") + .cast(pl.Int64) + .alias("MITTLERE_ANZAHL_ANPASSUNGEN_LIEFERTERMIN"), + pl.col("Tage_zu_letzter_PSM_Historie") + .list.explode() + .mean() + .abs() + .round(mode="half_away_from_zero") + .cast(pl.Int64) + .alias("MITTLERE_ABSTAENDE_ZWISCHEN_MELDUNGEN"), + pl.col("Durchlaufzeit_Anzahl_Tage") + .mean() + .round(mode="half_away_from_zero") + .cast(pl.Int64) + .alias("MITTLERE_DURCHLAUFZEIT_ANZAHL_TAGE"), + ) + + return data + + +def aggregate_suppliers( + data: pl.LazyFrame, +) -> pl.LazyFrame: + data = data.group_by("Konfektionär").agg( + ( + ( + ~(filter_date_deviation_early | filter_date_deviation_late) + & (pl.col("Import-Ist_Anzahl_Aenderungen") == 0) + ).mean() + * 100 + ) + .round(4, mode="half_away_from_zero") + .alias("QUOTE_ERSTBESTAETIGUNG"), + ((~(filter_date_deviation_early | filter_date_deviation_late)).mean() * 100) + .round(4, mode="half_away_from_zero") + .alias("PROZENT_LIEFERTREUE"), + (filter_date_deviation_early.mean() * 100) + .round(4, mode="half_away_from_zero") + .alias("ANTEIL_PROZENT_LIEFERTERMINUNTERSCHREITUNG"), + (filter_date_deviation_late.mean() * 100) + .round(4, mode="half_away_from_zero") + .alias("ANTEIL_PROZENT_LIEFERTERMINUEBERSCHREITUNG"), + pl.col("Terminabweichung_Anzahl_Tage") + .filter(filter_date_deviation_early) + .mean() + .abs() + .round(mode="half_away_from_zero") + .cast(pl.Int64) + .alias("MITTLERE_ANZAHL_TAGE_LIEFERTERMINUNTERSCHREITUNG"), + pl.col("Terminabweichung_Anzahl_Tage") + .filter(filter_date_deviation_late) + .mean() + .abs() + .round(mode="half_away_from_zero") + .cast(pl.Int64) + .alias("MITTLERE_ANZAHL_TAGE_LIEFERTERMINUEBERSCHREITUNG"), + pl.col("Terminabweichung_Anzahl_Tage") + .std(ddof=1) + .alias("STANDARDABWEICHUNG_TAGE_LIEFERTERMINABWEICHUNG"), + pl.col("Import-Ist_Anzahl_Aenderungen") + .mean() + .abs() + .round(mode="half_away_from_zero") + .cast(pl.Int64) + .alias("MITTLERE_ANZAHL_ANPASSUNGEN_LIEFERTERMIN"), + pl.col("Tage_zu_letzter_PSM_Historie") + .list.explode() + .mean() + .abs() + .round(mode="half_away_from_zero") + .cast(pl.Int64) + .alias("MITTLERE_ABSTAENDE_ZWISCHEN_MELDUNGEN"), + pl.col("Durchlaufzeit_Anzahl_Tage") + .mean() + .round(mode="half_away_from_zero") + .cast(pl.Int64) + .alias("MITTLERE_DURCHLAUFZEIT_ANZAHL_TAGE"), + pl.col("Prod-Qualitaet_Historie") + .list.explode() + .mean() + .round(4, mode="half_away_from_zero") + .alias("MITTLERER_QUALITAETSSCORE_PSM"), + ) + + return data + + +# // (5) external database + + +def oracle_prepare_KPI_aggregate( + data: pl.LazyFrame, + rename_schema: dict[str, str] | None = None, + sort_by: str = "", + sort_descending: bool = False, +) -> pl.LazyFrame: + if rename_schema is not None: + data = data.rename(rename_schema) + + cols_sorted = ["ID", "AKTUALISIERT_AM"] + [c for c in data.collect_schema().names()] + + if sort_by: + data = data.sort(sort_by, descending=sort_descending) + + data = data.with_row_index("ID", 1) + data = ( + data.with_columns( + pl.lit(datetime.datetime.now()).alias("AKTUALISIERT_AM"), + ) + .select( + pl.col(pl.Boolean).cast(pl.Int8), + pl.all().exclude(pl.Boolean), + ) + .select(cols_sorted) + ) + + return data + + +def oracle_generate_sql_insert( + table_name: str, + columns: list, +) -> SqlInsertStmts: + spalten_str = ", ".join([f'"{c}"' for c in columns]) + platzhalter_str = ", ".join([f":{i}" for i in range(1, len(columns) + 1)]) + + sql_delete = f'DELETE FROM "{table_name}"' + sql_insert = f'INSERT INTO "{table_name}" ({spalten_str}) VALUES ({platzhalter_str})' + + return SqlInsertStmts(delete=sql_delete, insert=sql_insert) + + +def oracle_load_table_as_polars( + conn: OracleConnection, + table_name: str, +) -> pl.LazyFrame: + stmt = f"SELECT * FROM {table_name}" + odf = conn.fetch_df_all(statement=stmt) + df = cast(pl.DataFrame, pl.from_arrow(odf)) + + return df.lazy() + + +def oracle_save_polars( + conn: OracleConnection, + stmts: SqlInsertStmts, + data: pl.DataFrame, +) -> None: + with conn.cursor() as cursor: + cursor.execute(stmts.delete) + cursor.executemany(stmts.insert, data) + conn.commit() -- 2.34.1 From 59148aaaf371728cb341b85f5f59657c7851ceae Mon Sep 17 00:00:00 2001 From: foefl Date: Wed, 10 Jun 2026 13:32:11 +0200 Subject: [PATCH 20/48] prepare external database interaction --- oracle/init-scripts/01_init.sql | 43 ++ ...0260603.py => 01-1_first-look_20260603.py} | 9 +- prototypes/01-2_cleanup_MIS-file.py | 408 ++++++++++++++++++ prototypes/03-2_fill_oracle-db.py | 111 +++++ src/wattanalyse/db.py | 16 +- 5 files changed, 583 insertions(+), 4 deletions(-) rename prototypes/{01_first-look_20260603.py => 01-1_first-look_20260603.py} (98%) create mode 100644 prototypes/01-2_cleanup_MIS-file.py create mode 100644 prototypes/03-2_fill_oracle-db.py diff --git a/oracle/init-scripts/01_init.sql b/oracle/init-scripts/01_init.sql index 97739b9..cfd421f 100644 --- a/oracle/init-scripts/01_init.sql +++ b/oracle/init-scripts/01_init.sql @@ -18,6 +18,7 @@ CREATE TABLE WATTANA.KPI_KONFEKTIONAERE ( ID NUMBER PRIMARY KEY, AKTUALISIERT_AM TIMESTAMP, KONFEKTIONAER VARCHAR2(200), + KONFEKTIONAER_ID NUMBER, QUOTE_ERSTBESTAETIGUNG NUMBER(7,4), PROZENT_LIEFERTREUE NUMBER(7,4), ANTEIL_PROZENT_LIEFERTERMINUNTERSCHREITUNG NUMBER(7,4), @@ -30,3 +31,45 @@ CREATE TABLE WATTANA.KPI_KONFEKTIONAERE ( MITTLERE_DURCHLAUFZEIT_ANZAHL_TAGE NUMBER(10), MITTLERER_QUALITAETSSCORE_PSM NUMBER(5,4) ); + +CREATE TABLE WATTANA.EXTERN_MIS ( + "ID" NUMBER PRIMARY KEY, + "PA" NUMBER, + "PA Pos" NUMBER, + "VK Auftrag" NUMBER, + "Konfektionär" VARCHAR2(1000), + "Lieferantnr." NUMBER +); + +CREATE TABLE WATTANA.EXTERN_PSM ( + "ID" NUMBER PRIMARY KEY, + "VK Auftrag" NUMBER, + "Artikelbez." VARCHAR2(1000), + "Auftragsmenge" NUMBER, + "Kunde" VARCHAR2(1000), + "PA" NUMBER, + "PA Pos" NUMBER, + "PSM gemeldet am" TIMESTAMP, + "Konfektionär" VARCHAR2(1000), + "Lieferantnr." NUMBER, + "Artikelnr." VARCHAR2(300), + "LT Kunde bestätigt" DATE, + "Export Ist" DATE, + "1.bestät. Import Konfektionär" DATE, + "Import Ist" DATE, + "Ablief.(Import Ist+Transport)" DATE, + "Wareneingang am" DATE, + "Wareneingang geprüft" VARCHAR2(10), + "Täglicher Ausstoss" NUMBER, + "Zuschnitt am" DATE, + "Teile in Zuschnitt" NUMBER, + "Teile im Nähband" NUMBER, + "Fertigware aus Nähband" NUMBER, + "Teile kontrolliert" NUMBER, + "Teile verpackt in Karton" NUMBER, + "Anzahl Bänder" NUMBER, + "Anzahl Näher" NUMBER, + "Arbeitsstunden pro Näher" NUMBER, + "Anzahl Arbeitstage pro Woche" NUMBER, + "Blockauftrag" VARCHAR2(10) +); \ No newline at end of file diff --git a/prototypes/01_first-look_20260603.py b/prototypes/01-1_first-look_20260603.py similarity index 98% rename from prototypes/01_first-look_20260603.py rename to prototypes/01-1_first-look_20260603.py index afa61a5..43c2c71 100644 --- a/prototypes/01_first-look_20260603.py +++ b/prototypes/01-1_first-look_20260603.py @@ -25,9 +25,16 @@ data_t1_jobs = data_t1 / "MIS-Auträge_22.csv" assert data_t1_jobs.exists() data_t1_PSM = data_t1 / "Produktionsstandsmeldungen.csv" assert data_t1_PSM.exists() + # %% # // MIS-Aufträge -# pl.read_csv(data_t1_jobs, encoding="windows-1252", separator=";") +# schema_override = { +# "Fertigungsminuten pro Artikel": pl.String, +# } +schema_override = None +pl.read_csv( + data_t1_jobs, encoding="windows-1252", separator=";", schema_overrides=schema_override +) # %% diff --git a/prototypes/01-2_cleanup_MIS-file.py b/prototypes/01-2_cleanup_MIS-file.py new file mode 100644 index 0000000..b4d50fc --- /dev/null +++ b/prototypes/01-2_cleanup_MIS-file.py @@ -0,0 +1,408 @@ +# %% +import datetime +import enum +import importlib +import json +from pathlib import Path +from pprint import pprint +from typing import Any + +import polars as pl +import sqlalchemy as sql + +from wattanalyse import db + +importlib.reload(db) + +# %% +PROJECT_BASE = Path(__file__).parents[1] +DATA_PTH = PROJECT_BASE / "data" +assert DATA_PTH.exists() +# %% +data_t1 = DATA_PTH / "PSM/20260507" +assert data_t1.exists() +# %% +# data_t1_jobs = data_t1 / "MIS-Auträge_22.csv" +data_t1_jobs = data_t1 / "MIS Auträge ab 22.csv" +assert data_t1_jobs.exists() +data_t1_PSM = data_t1 / "Produktionsstandsmeldungen.csv" +assert data_t1_PSM.exists() +# %% +# // MIS-Aufträge +schema_override = { + "Fertigungsminuten pro Artikel": pl.String, + "Fertigungsminuten pro PA": pl.String, + "Auftragsmenge offen": pl.String, + "Gutschrift Menge": pl.String, + "PA Menge offen": pl.String, +} +# schema_override = None +data = pl.read_csv( + data_t1_jobs, encoding="windows-1252", separator=";", schema_overrides=schema_override +).head(-1) +# %% +faulty_cols = [ + "Fertigungsminuten pro Artikel", + "Fertigungsminuten pro PA", + "Auftragsmenge offen", + "Gutschrift Menge", + "PA Menge offen", +] +data = data.with_columns( + pl.col(faulty_cols) + .str.replace(r"\.(\d)$", r".${1}00") + .str.replace(r"\.(\d\d)$", r".${1}0") + .str.replace_all(r"\.", "") + .cast(pl.Int64) + .name.keep() +) +# %% +# cols = data.columns +# data = data.with_row_index("idx", offset=2) +# data = data.select(["idx"] + cols) +# data.head() + +# %% +data = data.select(["PA", "PA Pos", "VK Auftrag", "Konfektionär"]) +# // save data as raw +target = DATA_PTH / "MIS_20260507.arrow" +data.write_ipc(target) + +# %% +# !! schema not working because of faulty base data +# keep this schema for later, maybe helpful at later stages +# for prototyping just use a subset with the relevant features, especially "PA" + "PA Pos" +schema_MIS = { + "Muster neu / vor Ort": pl.String, + "PA Pos": pl.UInt32, + "Agentur": pl.String, + "Konfektionär bestätigt": pl.Int8, + "Auftragsart": pl.String, + "Geplante Fertigungsdauer (Tage)": pl.UInt64, + "Geplanter Ausstoß pro Tag": pl.UInt64, + "PSM Import Ist": pl.Date, + "Export Ist": pl.Date, + "Kundenliefertermin bestätigt": pl.Date, + "Kundenliefertermin Soll": pl.Date, + "Kundenliefertermin bestätigt (erster)": pl.Date, + "PSM Gemeldet am": pl.Date, + "Kunde Kurzbezeichnung": pl.String, + "Artikelbezeichnung": pl.String, + "Artikelnummer": pl.String, + "PA Menge Soll": pl.UInt32, + "PA": pl.UInt64, + "VK Auftrag": pl.UInt64, + "PSM Bereits in Produktion": pl.UInt32, + "Konfektionär": pl.String, + "Auftragseingang": pl.Date, + "Export geplant": pl.Date, + "VK Auftrag Pos": pl.UInt32, + "Import Ist": pl.Date, + "Importtermin bestätigt": pl.Int8, + "Kundenliefertermin geplant (Import + Transport) autom. berechnet": pl.Date, + "1. bestätigter Import Konfektionär": pl.Date, + "PSM Grund Lieferverschiebung": pl.String, + "Saison": pl.Int64, + "Auftragsstatus": pl.String, + "Kd. Auftrag": pl.String, + "Schweißartikel": pl.String, + "Konfektionsland": pl.String, + "Artikelfarbe": pl.String, + "Materialbedarf Kosten Soll gesamt": pl.Decimal(scale=2), + "ASD Nr.": pl.String, + "Kunde": pl.String, + "Konzern": pl.String, + "Lieferort": pl.String, + "Kommentar Lieferant KW X": pl.String, + "Kommentar BWBM KW X": pl.String, + "Auftragsmenge": pl.UInt32, + "Auftragsmenge geliefert": pl.UInt32, + "Auftragsmenge offen": pl.UInt32, + "Angebotspreis kalk pro Artikel aus KalkSchema": pl.Decimal(scale=2), + "VK Preis": pl.Decimal(scale=2), + "VK Rechnungen bezahlt am": pl.String, + "VK Rechnungen netto bezahlt": pl.Decimal(scale=2), + "VK Gutschriften Info": pl.String, + "RV Vertragslaufzeit Beginn": pl.Date, + "RV Vertragslaufzeit Ende": pl.Date, + "Auftrag angelegt am": pl.Date, + "Kundenliefertermin Ist (Letzte Lieferung)": pl.Date, + "Exportabschluss vollständig am": pl.Date, + "LP kalk pro Artikel aus Auftragspos.": pl.Decimal(scale=2), + "PA aktiv (PL auf PA umstellen)": pl.Int8, + "Vorfertigungsstufen in PA": pl.String, + "PA Status": pl.String, + "PA Art": pl.String, + "PA Typ": pl.String, + "Dach-PA": pl.String, + "MEL": pl.String, + "Nachlieferungen": pl.String, + "Ersatzlieferungen": pl.String, + "Konfektionär aus Kalkulation": pl.String, + "Konfektionsland aus Kalkulation": pl.String, + "LP kalk gesamt aus Auftragspos.": pl.Decimal(scale=2), + "LP Ist pro Artikel aus PA-Stufe Fertigung": pl.Decimal(scale=2), + "LP bestätigt": pl.Int8, + "LP Ist gesamt aus PA-Stufe Fertigung": pl.Decimal(scale=2), + "Info PPS (aus PA)": pl.String, + "PA Menge offen": pl.UInt32, + "PSM Teile in Zuschnitt": pl.UInt32, + "PSM Teile in Nähband": pl.UInt32, + "Info PPS (allgemein)": pl.String, + "MwVZ kalk pro Artikel aus Auftragpos.": pl.Decimal(scale=2), + "MwVZ kalk gesamt aus Auftragpos.": pl.Decimal(scale=2), + "MwVZ berechnet pro Artikel (Basis PA Stüli)": pl.Decimal(scale=2), + "MwVZ berechnet gesamt (Basis PA Stüli)": pl.Decimal(scale=2), + "MwVZ berechnet Bemerkung (Basis PA Stüli)": pl.String, + "MwVZ Ist pro Artikel": pl.Decimal(scale=2), + "MwVZ Ist gesamt": pl.Decimal(scale=2), + "Kalk/Ist-Preise aus ELO": pl.String, + "Materialkosten kalk gesamt aus KalkSchema": pl.Decimal(scale=2), + "Herstellkosten kalk pro Artikel aus KalkSchema": pl.Decimal(scale=2), + "Herstellkosten kalk gesamt aus KalkSchema": pl.Decimal(scale=2), + "Materialbedarf Kosten Soll pro Artikel": pl.Decimal(scale=2), + "Materialkosten Überlieferung pro Artikel": pl.Decimal(scale=2), + "Materialkosten Überlieferung gesamt": pl.Decimal(scale=2), + "Materialkosten Komplettlieferung pro Artikel": pl.Decimal(scale=2), + "Materialkosten Komplettlieferung gesamt": pl.Decimal(scale=2), + "Material Bestellwert pro Artikel": pl.Decimal(scale=2), + "Material Bestellwert gesamt": pl.Decimal(scale=2), + "Material bereits bezahlt": pl.Decimal(scale=2), + "Material noch nicht bezahlt - ELO Rechnung vorhanden": pl.Decimal(scale=2), + "Transportkosten Import kalk pro Artikel aus KalkSchema oder Auftragspos.": pl.Decimal( + scale=2 + ), + "Transportkosten Import kalk gesamt aus KalkSchema oder Auftragspos.": pl.Decimal( + scale=2 + ), + "Transportkosten Import Ist gesamt": pl.Decimal(scale=2), + "Transportkosten Import Ist pro Artikel": pl.Decimal(scale=2), + "Transportkosten Export kalk pro Artikel aus KalkSchema oder Auftragspos. ": pl.Decimal( + scale=2 + ), + "Transportkosten Export kalk gesamt aus KalkSchema oder Auftragspos.": pl.Decimal( + scale=2 + ), + "Umsatz Auftragsmenge gesamt": pl.Decimal(scale=2), + "Transportkosten Export Ist gesamt": pl.Decimal(scale=2), + "Transportkosten Export Ist pro Artikel": pl.Decimal(scale=2), + "Zuschlag (Stickerei/Zutaten)": pl.Decimal(scale=2), + "LP + MwVz kalk gesamt": pl.Decimal(scale=2), + "LP + MwVz Ist gesamt": pl.Decimal(scale=2), + "Stückliste Freigabe am": pl.Date, + "Kundenmuster Freigabe am": pl.Date, + "Lieferanten ABs komplett am": pl.Date, + "Technische Unterlagen/Muster erstellt am": pl.Date, + "Etikettlayout erstellt/Freigabe Kunde am": pl.Date, + "Schnibi": pl.Date, + "VK Rechnung Kz.": pl.String, + "Kapazität pro Tag pro Näher (min)": pl.UInt16, + "Gesamtkapazität pro Tag (min)": pl.UInt16, + "Fertigungsminuten pro Artikel": pl.UInt32, + "Fertigungsminuten pro PA": pl.UInt32, + "APP Nr.": pl.String, + "VK Auftrag Los": pl.Int64, + "Umsatz Auftragsmenge offen": pl.Decimal(scale=2), + "Umsatz fakturiert": pl.Decimal(scale=2), + "Kundenliefertermin bestätigt (Monat/Jahr)": pl.Date, + "Materialkosten zu Kalk Preis aus Kalk.-Stüli": pl.Decimal(scale=2), + "Materialkosten zu EK Preis aus Kalk.-Stüli": pl.Decimal(scale=2), + "Materialkosten zu EKD Preis aus Kalk.-Stüli": pl.Decimal(scale=2), + "Materialkosten zu Staffel Preis aus Kalk.-Stüli": pl.Decimal(scale=2), + "Materialkosten zu EK Preis aus Soll-Prod.Stüli": pl.Decimal(scale=2), + "Materialkosten zu EKD Preis aus Soll-Prod.Stüli": pl.Decimal(scale=2), + "Materialkosten zu Staffel Preis aus Soll-Prod.Stüli": pl.Decimal(scale=2), + "VK Rechnung erstellt am": pl.Date, + "Nationalisierungsabgabe kalk aus KalkSchema": pl.Decimal(scale=2), + "Materialkosten kalk pro Artikel aus KalkSchema": pl.Decimal(scale=2), + "Materialkosten Artikelstamm": pl.Decimal(scale=2), + "PA Menge Ist": pl.UInt32, + "Kundenliefertermin Ist (Letzte Lieferung) (Monat/Jahr)": pl.Date, + "Import Ist (Monat/Jahr)": pl.Date, + "Kalkulationen (alle) zum Artikel": pl.String, + "Kalkulation zu Auftrag": pl.String, + "Material Oberstoff Überlieferung in %": pl.Float64, + "Material Zutaten Überlieferung in %": pl.Float64, + "Lohnkonfektionsbestellnr": pl.String, + "VK Auftrag Pos Text": pl.String, + "PA Vorfertigung Menge": pl.UInt32, + "Preisanpassungen zu Artikel": pl.String, + "MwVZ kalk pro Artikel aus KalkSchema": pl.Decimal(scale=2), + "PA Vorfertigung Menge offen": pl.UInt32, + "PA Vorfertigung Menge Ist": pl.UInt32, + "RV Nr.": pl.String, + "VK Rechnung erstellt am Jahr": pl.UInt16, + "VK Kalk. Angebotspreise (Staffelpreise)": pl.String, + "Gutschrift Menge": pl.UInt32, + "Ausschreibung abgegeben am": pl.Date, + "VK Blockauftrag": pl.String, + "Lieferung im Jahr": pl.UInt16, + "LP kalk pro Artikel aus KalkSchema": pl.Decimal(scale=2), + "LP kalk gesamt aus KalkSchema": pl.Decimal(scale=2), + "Artikel Produktbereich": pl.String, + "VK Blockauftrag Pos": pl.Int32, + "Preisanpassung beantragt": pl.String, + "Preisanpassung erhalten": pl.String, + "VK Preis vor Preisanpassung": pl.Decimal(scale=2), + "VK Preis nach Preisanpassung": pl.Decimal(scale=2), + "Tagesplannr": pl.Int64, + "Abweichung Gesamtkosten pro Artikel Kalk zu Ist": pl.Decimal(scale=2), + "Abweichung Materialkosten pro Artikel Kalk zu Ist": pl.Decimal(scale=2), + "Abweichung Materialkosten pro Artikel Kalk zu Ist %": pl.Float64, + "Abweichung Lohnpreis pro Artikel Kalk zu Ist": pl.Decimal(scale=2), + "Abweichung Lohnpreis pro Artikel Kalk zu Ist %": pl.Float64, + "Abweichung MwVZ pro Artikel Kalk zu Ist": pl.Decimal(scale=2), + "Abweichung MwVZ pro Artikel Kalk zu Ist %": pl.Float64, + "Abweichung Transportkosten Export pro Artikel Kalk zu Ist": pl.Decimal(scale=2), + "Abweichung Transportkosten Export pro Artikel Kalk zu Ist %": pl.Float64, + "BANFen erstellt am (Prod.Planung)": pl.Date, + "Bestellfreigabe am": pl.Date, + "Abweichung Transportkosten Import pro Artikel Kalk zu Ist": pl.Decimal(scale=2), + "Abweichung Transportkosten Import pro Artikel Kalk zu Ist %": pl.Float64, + "Artikelgruppierung PPS": pl.String, + "Info QS": pl.String, + "Artikelgruppierung VK": pl.String, + "Mengenart": pl.String, + "Info Z/L": pl.String, + "EK Erste Bestellung erstellt am": pl.Date, + "Info VK": pl.String, + "Info PM": pl.String, + "EK Letzte Bestellung erstellt am": pl.Date, + "EK Letzte AB Oberstoff": pl.Date, + "EK Letzte AB Zutaten": pl.Date, + "Info EK": pl.String, + "Info BuHa": pl.String, + "Info Musternäherei": pl.String, + "VK Ansprechpartner": pl.String, + "Artikel Matchcode": pl.String, + "Artikel Produktgruppe": pl.String, + "Artikel Thema": pl.String, + "PSM Wareneingang geprüft": pl.Int8, + "LV gesendet": pl.Date, + "LV bestätigt": pl.Date, + "VK Suchmerkmal": pl.String, + "PSM Wareneingang am": pl.Date, + "VK Rechnung Nr.": pl.String, + "Lieferschein Nr.": pl.String, + "QS Prüfung vor Ort erforderlich": pl.Int8, + "RV Nr. mit Artikel": pl.String, + "PSM täglicher Ausstoß": pl.UInt32, + "QS Lieferung Qualität aus ELO": pl.String, + "PSM Anzahl Bänder": pl.UInt32, + "PSM Anzahl Näher": pl.UInt32, + "PSM Arbeitsstunden pro Tag pro Näher": pl.UInt8, + "PSM Arbeitstage pro Woche": pl.UInt8, + "PSM Zuschnitt am": pl.Date, + "PSM Fertigware aus Nähband": pl.UInt32, + "PSM Teile kontrolliert": pl.UInt32, + "PSM Teile verpackt": pl.UInt32, + "PSM Hängeware": pl.Int8, + "Kundenliefertermin Verschiebungen": pl.UInt32, + "PSM Anzahl Paletten": pl.UInt32, + "PSM Anzahl Kartons": pl.UInt32, + "PSM Teile in Färberei": pl.UInt32, + "PSM Teile in Stickerei": pl.UInt32, + "Güteprüfung beantragt": pl.Int8, + "Güteprüfung erforderlich": pl.Int8, + "Güteprüfung Termin": pl.Date, + "Güteprüfung Ort": pl.String, + "Materialdatei erforderlich": pl.Int8, + "Materialdatei beantragt am": pl.Date, + "Materialdatei Freigabe am": pl.Date, + "Export bestätigt": pl.Int8, + "MEL gesendet": pl.Int8, + "Lieferart": pl.String, + "Schnittfreigabe": pl.Date, + "Schnibi gesendet an": pl.String, + "Etiketten Druckfreigabe": pl.Date, + "Sonderkommissionierung": pl.Int8, + "Sonderkommissionierung Kosten": pl.Decimal(scale=2), + "Artikel Gewicht in g": pl.UInt64, + "Nach-/Ersatzlieferung Transportkosten": pl.Decimal(scale=2), + "HPZ (Herstellerprüfzertifikat) eingereicht": pl.Int8, + "Stückliste Freigabe Bemerkung": pl.String, + "Schnittfreigabe Bemerkung": pl.String, + "RV Vertragsmenge gesamt (BA)": pl.UInt32, + "RV Vertragsmenge abgerufen gesamt (BA)": pl.UInt32, + "RV Vertragsmenge offen gesamt (BA)": pl.UInt32, + "RV Auftragsmenge (DA)": pl.UInt32, + "RV Auftragsmenge geliefert (DA)": pl.UInt32, + "Vertragsstrafe Belegnr. intern/extern mit Belegdatum aus ELO": pl.String, + "RV Auftragsmenge offen (DA)": pl.UInt32, + "RV Forecastmenge": pl.UInt32, + "QS Auftrag aus VK gedruckt": pl.Int8, + "TU gespeichert in Auftrag": pl.Int8, + "Mail Muster für Proforma": pl.Int8, + "Musterliste": pl.Int8, + "Vertragsstrafe Info aus ELO": pl.String, + "EP (Endprüfprotokoll)": pl.Int8, + "ZP (Zwischenprüfprotokoll)": pl.Int8, + "Lagen Laminat": pl.String, + "Workflowart": pl.String, + "Vertragsstrafe Grund aus ELO": pl.String, + "RV Sortierung": pl.String, + "EK Lieferzeit (längste) Oberstoff in Wochen aus Kalk.Stüli": pl.String, + "EK Lieferzeit (längste) Zutaten in Wochen aus Kalk.Stüli": pl.String, + "Vertragsstrafe Verzugstage aus ELO": pl.String, + "Vertragsstrafe Betrag aus ELO": pl.String, + "RV Mindestmenge (BA)": pl.String, + "QS Ansprechpartner für Konfektionär": pl.String, + "Vertragsstrafe weiterbelastet an (Weiterbelastung-Nr.) aus ELO": pl.String, + "Vertragsstrafe Weiterbelastung Betrag aus ELO": pl.Decimal(scale=2), + "Vertragsstrafe Gutschrift Nr. aus ELO": pl.String, + "Vertragsstrafe Gutschrift Betrag aus ELO": pl.Decimal(scale=2), + "Vertragsstrafe fiktiv Betrag LT Soll zu LT bestätigt (letzter)": pl.Decimal(scale=2), + "Vertragsstrafe fiktiv Betrag LT Soll zu LT Ist": pl.Decimal(scale=2), + "Ausschreibung Lieferzeit in Tagen (Mo-Fr)": pl.UInt64, + "Konfektionär Gesamtanzahl Näher": pl.UInt64, + "QS VP Muster erforderlich": pl.Int8, + "Abweichung LT Soll zu LT bestätigt (letzter) in Tagen (Mo-Fr)": pl.UInt64, + "Abweichung LT Soll zu LT Ist in Tagen (Mo-Fr)": pl.UInt64, + "RV Mindestmenge geliefert (BA)": pl.UInt32, + "RV Mindestmenge offen (BA)": pl.UInt32, + "RV unverbindliche Menge (BA)": pl.UInt32, + "RV unverbindliche Menge geliefert (BA)": pl.UInt32, + "RV unverbindliche Menge offen (BA)": pl.UInt32, + "RV unverbindliche Restmenge zur Maximalmenge (BA)": pl.UInt32, + "RV unverbindliche Restmenge zur Maximalmenge geliefert (BA)": pl.UInt32, + "RV unverbindliche Restmenge zur Maximalmenge offen (BA)": pl.UInt32, + "QS Lieferung Qualität Bemerkung aus ELO": pl.String, + "Abweichung 'Export Ist' zu 'Zuschnitt am' (Mo-Fr)": pl.UInt64, + "Lieferschein erstellt am": pl.Date, + "Materialdatei EK abgeschlossen": pl.String, + "Lademeter Export": pl.String, + "Palettenanzahl Import": pl.UInt64, + "Transportzeit Konfektionär zu Kunde": pl.UInt32, + "Abweichung LT bestätigt zu LT geplant": pl.UInt64, + "Lieferadresse": pl.String, +} +# %% +data.filter(pl.col("LP kalk gesamt aus KalkSchema").str.contains("6.072")).select( + "LP kalk gesamt aus KalkSchema" +) +# %% +data = data.cast(schema_MIS) +# %% +df = pl.DataFrame( + { + "auftrag_id": [1, 2, 3, 4, 5], + "defekte_zahlen": ["3.11", "1.2", "1.250.000", "500", "1.0"], + } +) + +# 2. Bereinigung durchführen +df_bereinigt = df.with_columns( + pl.col("defekte_zahlen") + # Schritt A: Wenn nur 1 Ziffer nach dem Punkt steht (z.B. .2), zwei Nullen anhängen + .str.replace(r"\.(\d)$", r".${1}00") + # Schritt B: Wenn 2 Ziffern nach dem Punkt stehen (z.B. .11), eine Null anhängen + .str.replace(r"\.(\d\d)$", r".${1}0") + # Schritt C: Alle Punkte entfernen (da es reine Tausendertrennzeichen sind) + .str.replace_all(r"\.", "") + # Schritt D: In eine Ganzzahl (Integer) umwandeln + .cast(pl.Int64) + .alias("saubere_zahlen") +) +# %% +df_bereinigt +# %% diff --git a/prototypes/03-2_fill_oracle-db.py b/prototypes/03-2_fill_oracle-db.py new file mode 100644 index 0000000..04a3460 --- /dev/null +++ b/prototypes/03-2_fill_oracle-db.py @@ -0,0 +1,111 @@ +# %% +import importlib +from pathlib import Path +from pprint import pprint + +import external_code +import oracledb +import polars as pl + +import wattanalyse +from wattanalyse import constants, db + +importlib.reload(wattanalyse) +importlib.reload(constants) +importlib.reload(external_code) +importlib.reload(db) +# %% +PROJECT_BASE = Path(__file__).parents[1] +DATA_PTH = PROJECT_BASE / "data" +assert DATA_PTH.exists() +# %% + +mis_data = DATA_PTH / "MIS_20260507.arrow" +psm_data = DATA_PTH / "PSM_20260507.arrow" + +assert mis_data.exists() +assert psm_data.exists() +# %% +# // prepare +data_mis = pl.read_ipc(mis_data) +select_cols = data_mis.columns +data_mis = data_mis.with_row_index("ID", offset=1) +data_mis = data_mis.select(["ID"] + select_cols) +data_mis.head() + +# %% +data_mis = ( + data_mis.with_columns( + pl.col("Konfektionär").str.replace(r" - [^-]+$", "").alias("Konfektionär_"), + pl.col("Konfektionär") + .str.extract(r" - ([^-]+)$", 1) + .cast(pl.UInt64) + .alias("Lieferantnr."), + ) + .drop("Konfektionär") + .rename({"Konfektionär_": "Konfektionär"}) + .with_columns( + pl.col("Konfektionär").str.strip_chars(" "), + ) +) +# %% +new_mis_data = DATA_PTH / "MIS_prep_20260507.arrow" +data_mis.write_ipc(new_mis_data) +#################################### +# %% +konf_ids = data_mis.select(["Konfektionär", "Lieferantnr."]).unique( + subset=["Konfektionär"], keep="first" +) +# %% +data_psm = pl.read_ipc(psm_data) +select_cols = data_psm.columns +data_psm = data_psm.with_row_index("ID", offset=1).with_columns( + pl.col("Konfektionär").str.strip_chars(" ") +) +data_psm.head() +# %% +select_cols = data_psm.columns +konf_idx = select_cols.index("Konfektionär") +select_cols.insert(konf_idx + 1, "Lieferantnr.") +data_psm = data_psm.join(konf_ids, on="Konfektionär", how="left").select(select_cols) +data_psm.head() +# %% +new_psm_data = DATA_PTH / "PSM_prep_20260507.arrow" +data_psm.write_ipc(new_psm_data) +# %% +# // save to database +new_mis_data = DATA_PTH / "MIS_prep_20260507.arrow" +data_mis = pl.read_ipc(new_mis_data) +new_psm_data = DATA_PTH / "PSM_prep_20260507.arrow" +data_psm = pl.read_ipc(new_psm_data) +data_psm.head() +# %% +conn = oracledb.connect( + user=constants.USER_CFG.Datenbank.NUTZER, + password=constants.USER_CFG.Datenbank.PASSWORT, + host=constants.USER_CFG.Datenbank.HOST, + port=constants.USER_CFG.Datenbank.PORT, + service_name=constants.USER_CFG.Datenbank.SERVICE_NAME, +) + +# %% +stmts = external_code.oracle_generate_sql_insert("EXTERN_MIS", data_mis.columns) +external_code.oracle_save_polars(conn, stmts, data_mis) +# %% +stmts = external_code.oracle_generate_sql_insert("EXTERN_PSM", data_psm.columns) +external_code.oracle_save_polars(conn, stmts, data_psm) +# %% +schema = db.extern_MIS_t_schema +df = external_code.oracle_load_table_as_polars(conn, "EXTERN_MIS", schema=schema).collect() +df +# %% +schema = db.extern_prod_order_t_schema +df = external_code.oracle_load_table_as_polars(conn, "EXTERN_PSM", schema=schema).collect() +df +# %% +# // alter table for aggregates of supplier +stmt = 'ALTER TABLE "KPI_KONFEKTIONAERE" ADD ("KONFEKTIONAER_ID" NUMBER);' +with conn.cursor() as cursor: + cursor.execute(stmt) + conn.commit() +# %% diff --git a/src/wattanalyse/db.py b/src/wattanalyse/db.py index 727265f..9b6da40 100644 --- a/src/wattanalyse/db.py +++ b/src/wattanalyse/db.py @@ -84,7 +84,7 @@ MD_INTERNAL.create_all(ENGINE_INTERNAL) # ** read extern_prod_order_t_schema: dict[str, type[pl.DataType]] = { - "VK Auftrag": pl.UInt32, + "VK Auftrag": pl.UInt64, "Artikelbez.": pl.String, "Auftragsmenge": pl.UInt32, "Kunde": pl.String, @@ -92,6 +92,7 @@ extern_prod_order_t_schema: dict[str, type[pl.DataType]] = { "PA Pos": pl.UInt32, "PSM gemeldet am": pl.Datetime, "Konfektionär": pl.String, + "Lieferantnr.": pl.UInt64, "Artikelnr.": pl.String, "LT Kunde bestätigt": pl.Date, "Export Ist": pl.Date, @@ -107,13 +108,21 @@ extern_prod_order_t_schema: dict[str, type[pl.DataType]] = { "Fertigware aus Nähband": pl.UInt64, "Teile kontrolliert": pl.UInt64, "Teile verpackt in Karton": pl.UInt64, - "Anzahl Bänder": pl.UInt16, - "Anzahl Näher": pl.UInt16, + "Anzahl Bänder": pl.UInt32, + "Anzahl Näher": pl.UInt32, "Arbeitsstunden pro Näher": pl.UInt8, "Anzahl Arbeitstage pro Woche": pl.UInt8, "Blockauftrag": pl.String, } +extern_MIS_t_schema: dict[str, type[pl.DataType]] = { + "PA": pl.UInt64, + "PA Pos": pl.UInt32, + "VK Auftrag": pl.UInt64, + "Konfektionär": pl.String, + "Lieferantnr.": pl.UInt64, +} + # ** write extern_results_prod_orders_t: Table = Table( "KPI_PRODUKTIONSAUFTRAEGE", @@ -136,6 +145,7 @@ extern_results_suppliers_t: Table = Table( Column("ID", sql.Integer, primary_key=True), Column("AKTUALISIERT_AM", sql.DateTime, nullable=False), Column("KONFEKTIONAER", sql.String(200), nullable=True), + Column("KONFEKTIONAER_ID", sql.Integer, nullable=True), Column("QUOTE_ERSTBESTAETIGUNG", sql.Numeric(7, 4), nullable=True), Column("PROZENT_LIEFERTREUE", sql.Numeric(7, 4), nullable=True), Column("ANTEIL_PROZENT_LIEFERTERMINUNTERSCHREITUNG", sql.Numeric(7, 4), nullable=True), -- 2.34.1 From b66d5a4921bb1023a21b277b80a4fbee6a7f6734 Mon Sep 17 00:00:00 2001 From: foefl Date: Wed, 10 Jun 2026 13:32:39 +0200 Subject: [PATCH 21/48] prepare enhanced pipeline --- prototypes/04-1_pipeline_with_db.py | 99 +++++++++++++++++++++++++++++ prototypes/external_code.py | 76 ++++++++++------------ 2 files changed, 133 insertions(+), 42 deletions(-) create mode 100644 prototypes/04-1_pipeline_with_db.py diff --git a/prototypes/04-1_pipeline_with_db.py b/prototypes/04-1_pipeline_with_db.py new file mode 100644 index 0000000..9fc4cd3 --- /dev/null +++ b/prototypes/04-1_pipeline_with_db.py @@ -0,0 +1,99 @@ +# %% +import importlib +from pathlib import Path +from pprint import pprint + +import external_code +import oracledb +import polars as pl + +import wattanalyse +from wattanalyse import constants, db + +importlib.reload(wattanalyse) +importlib.reload(constants) +importlib.reload(external_code) +importlib.reload(db) + +PROJECT_BASE = Path(__file__).parents[1] +DATA_PTH = PROJECT_BASE / "data" +assert DATA_PTH.exists() +# %% +conn = oracledb.connect( + user=constants.USER_CFG.Datenbank.NUTZER, + password=constants.USER_CFG.Datenbank.PASSWORT, + host=constants.USER_CFG.Datenbank.HOST, + port=constants.USER_CFG.Datenbank.PORT, + service_name=constants.USER_CFG.Datenbank.SERVICE_NAME, +) + +##################################### +# // Get data from database +# %% +schema = db.extern_MIS_t_schema +data_mis = external_code.oracle_load_table_as_polars( + conn, + schema=schema, + table_name="EXTERN_MIS", +).collect() +data_mis +# %% +schema = db.extern_prod_order_t_schema +data_psm = external_code.oracle_load_table_as_polars( + conn, + schema=schema, + table_name="EXTERN_PSM", +).collect() +data_psm + +# %% +data_mis = data_mis.drop("ID", strict=False) +data_psm = data_psm.drop("ID", strict=False) + +# %% +data_psm.height +# %% +data_psm.join(data_mis, on=["PA", "PA Pos"], how="semi") + +# %% +# +tab_name_psm = "EXTERN_PSM" +tab_name_mis = "EXTERN_MIS" +stmt = f""" +SELECT t1.* FROM "{tab_name_psm}" t1 +WHERE EXISTS( + SELECT 1 FROM "{tab_name_mis}" t2 + WHERE t1."PA" = t2."PA" AND t1."PA Pos" = t2."PA Pos" +) +""" +# test = external_code.oracle_load_table_as_polars( +# conn, db.extern_prod_order_t_schema, "", None +# ).collect() +test = external_code.oracle_load_table_as_polars( + conn, db.extern_prod_order_t_schema, tab_name_psm, stmt +).collect() + +# %% +# data_psm = external_code.load_PSM_data(conn).collect() + +# %% +# // preprocess data +# TODO: add check with MIS data if the orders are relevant +tmp = data_psm.clone() +res = external_code.preprocess_psm(tmp.lazy()) +tmp = res.data +tmp = tmp.collect() +tmp +# %% +tmp = tmp.rename({"PA_Pos": "PA Pos"}) +# %% +tmp.join(data_mis, on=["PA", "PA Pos"], how="semi") + +# %% +res.filtered +# %% +tmp = data_psm.clone() +tmp = external_code.aggregate_production_orders(tmp.lazy()).collect() +print(tmp) +tmp = external_code.oracle_prepare_KPI_aggregate(tmp.lazy()).collect() +print(tmp) diff --git a/prototypes/external_code.py b/prototypes/external_code.py index da1f6dd..ec606c1 100644 --- a/prototypes/external_code.py +++ b/prototypes/external_code.py @@ -4,7 +4,8 @@ import dataclasses as dc import datetime import enum import json -from typing import TYPE_CHECKING, Any, Final, cast +import warnings +from typing import TYPE_CHECKING, Any, Final, TypeAlias, cast import polars as pl import sqlalchemy as sql @@ -13,6 +14,7 @@ from wattanalyse import db if TYPE_CHECKING: from oracledb import Connection as OracleConnection + from polars._typing import SchemaDict # 1. cleanup obtained new data # ~~2. load data from internal database~~ @@ -22,6 +24,8 @@ if TYPE_CHECKING: # 4. post-process results # 5. write to external database +SqlStatement: TypeAlias = str + @dc.dataclass(slots=True, eq=False) class PreProcessResult: @@ -65,6 +69,22 @@ PRIM_KEYS: Final[list[str]] = ["PA", "PA_Pos"] LOWER_BOUND_DATE_DEVIATION: Final[int] = 0 UPPER_BOUND_DATE_DEVIATION: Final[int] = 0 NUMBER_YEARS_UPPER_BOUND_DATES: Final[int] = 4 +TAB_NAME_PSM: Final[str] = "EXTERN_PSM" +TAB_NAME_MIS: Final[str] = "EXTERN_MIS" + + +# // (0) load data +def load_PSM_data( + conn: OracleConnection, +) -> pl.LazyFrame: + stmt = f""" + SELECT t1.* FROM "{TAB_NAME_PSM}" t1 + WHERE EXISTS( + SELECT 1 FROM "{TAB_NAME_MIS}" t2 + WHERE t1."PA" = t2."PA" AND t1."PA Pos" = t2."PA Pos" + ) + """ + return oracle_load_table_as_polars(conn, db.extern_prod_order_t_schema, None, stmt) # // (1) preprocess @@ -306,44 +326,6 @@ def process_order_level( ) ) - # data = ( - # data.with_columns( - # pl.when( - # (pl.col("Liefertermin_Ist").is_not_null()) - # & (pl.col("Liefertermin_Soll").is_not_null()) - # ) - # .then((pl.col("Liefertermin_Ist") - pl.col("Liefertermin_Soll")).dt.total_days()) - # .otherwise(None) - # .alias("Terminabweichung_Anzahl_Tage") - # ) - # .with_columns( - # pl.when(pl.col("Terminabweichung_Anzahl_Tage") < LOWER_BOUND_DATE_DEVIATION) - # .then(pl.lit(True)) - # .otherwise(pl.lit(False)) - # .alias("Terminunterschreitung"), - # pl.when(pl.col("Terminabweichung_Anzahl_Tage") > UPPER_BOUND_DATE_DEVIATION) - # .then(pl.lit(True)) - # .otherwise(pl.lit(False)) - # .alias("Terminüberschreitung"), - # pl.when( - # (pl.col("Liefertermin_Ist").is_not_null()) - # & (pl.col("Prod-Start").is_not_null()) - # ) - # .then((pl.col("Liefertermin_Ist") - pl.col("Prod-Start")).dt.total_days()) - # .otherwise(None) - # .alias("Durchlaufzeit_Anzahl_Tage"), - # ) - # .with_columns( - # pl.when( - # (pl.col("Durchlaufzeit_Anzahl_Tage").is_not_null()) - # & (pl.col("Durchlaufzeit_Anzahl_Tage") < 0) - # ) - # .then(None) - # .otherwise(pl.col("Durchlaufzeit_Anzahl_Tage")) - # .alias("Durchlaufzeit_Anzahl_Tage") - # ) - # ) - return data @@ -637,11 +619,21 @@ def oracle_generate_sql_insert( def oracle_load_table_as_polars( conn: OracleConnection, - table_name: str, + schema: SchemaDict | None, + table_name: str | None = None, + stmt: SqlStatement | None = None, ) -> pl.LazyFrame: - stmt = f"SELECT * FROM {table_name}" + if not any((table_name, stmt)): + raise ValueError("Table name or SQL statement must be provided") + if all((table_name, stmt)): + warnings.warn( + "Table name and SQL statement provided. In this case, the statement is used." + ) + if not stmt: + stmt = f"SELECT * FROM {table_name}" + odf = conn.fetch_df_all(statement=stmt) - df = cast(pl.DataFrame, pl.from_arrow(odf)) + df = cast(pl.DataFrame, pl.from_arrow(odf, schema_overrides=schema)) return df.lazy() -- 2.34.1 From 5e15c99520685e7ac89965aa5324cbe4800a4171 Mon Sep 17 00:00:00 2001 From: foefl Date: Wed, 10 Jun 2026 16:48:03 +0200 Subject: [PATCH 22/48] construct base pipeline with "run" function --- prototypes/04-1_pipeline_with_db.py | 101 ++-- prototypes/external_code.py | 28 +- src/wattanalyse/db.py | 36 +- src/wattanalyse/logging.py | 3 + src/wattanalyse/pipeline.py | 723 ++++++++++++++++++++++++++++ src/wattanalyse/types.py | 3 + 6 files changed, 844 insertions(+), 50 deletions(-) create mode 100644 src/wattanalyse/pipeline.py diff --git a/prototypes/04-1_pipeline_with_db.py b/prototypes/04-1_pipeline_with_db.py index 9fc4cd3..f4f66a2 100644 --- a/prototypes/04-1_pipeline_with_db.py +++ b/prototypes/04-1_pipeline_with_db.py @@ -51,49 +51,74 @@ data_mis = data_mis.drop("ID", strict=False) data_psm = data_psm.drop("ID", strict=False) # %% -data_psm.height -# %% -data_psm.join(data_mis, on=["PA", "PA Pos"], how="semi") +# // (0) Load from external database +data_psm = external_code.load_PSM_data(conn) +data_psm.collect() # %% -# -tab_name_psm = "EXTERN_PSM" -tab_name_mis = "EXTERN_MIS" -stmt = f""" -SELECT t1.* FROM "{tab_name_psm}" t1 -WHERE EXISTS( - SELECT 1 FROM "{tab_name_mis}" t2 - WHERE t1."PA" = t2."PA" AND t1."PA Pos" = t2."PA Pos" -) -""" -# test = external_code.oracle_load_table_as_polars( -# conn, db.extern_prod_order_t_schema, "", None -# ).collect() -test = external_code.oracle_load_table_as_polars( - conn, db.extern_prod_order_t_schema, tab_name_psm, stmt -).collect() - -# %% -# data_psm = external_code.load_PSM_data(conn).collect() - -# %% -# // preprocess data -# TODO: add check with MIS data if the orders are relevant +# // (1) preprocess data tmp = data_psm.clone() res = external_code.preprocess_psm(tmp.lazy()) tmp = res.data -tmp = tmp.collect() -tmp -# %% -tmp = tmp.rename({"PA_Pos": "PA Pos"}) -# %% -tmp.join(data_mis, on=["PA", "PA Pos"], how="semi") +tmp_show = tmp.collect() +tmp_show # %% -res.filtered +# // (2) process on order level +tmp = external_code.process_order_level(tmp) +tmp.collect() +# %% +# // (3) dump to database (intermediate result) +external_code.dump_order_level_to_internal_database_wipe(tmp) +# %% +# // (4) post-process +# ** aggregation for orders +aggregate_orders = external_code.aggregate_production_orders(tmp) +print(aggregate_orders.collect()) + +# ** aggregation for suppliers +aggregate_suppliers = external_code.aggregate_suppliers(tmp) +print(aggregate_suppliers.collect()) +# %% +# // (5) save to external database +# ** orders +aggregate_orders = external_code.oracle_prepare_KPI_aggregate(aggregate_orders) +print(aggregate_orders.head().collect()) +stmts_orders = external_code.oracle_generate_sql_insert( + table_name="KPI_PRODUKTIONSAUFTRAEGE", columns=aggregate_orders.collect_schema().names() +) +print(f"SQL DELETE: {stmts_orders.delete}\nSQL Insert: {stmts_orders.insert}") + + +# ** suppliers +aggregate_suppliers = external_code.oracle_prepare_KPI_aggregate( + aggregate_suppliers, + sort_by="Konfektionaer", + sort_descending=False, +) +print(aggregate_suppliers.head().collect()) +stmts_suppliers = external_code.oracle_generate_sql_insert( + table_name="KPI_KONFEKTIONAERE", columns=aggregate_suppliers.collect_schema().names() +) +print(f"SQL DELETE: {stmts_suppliers.delete}\nSQL Insert: {stmts_suppliers.insert}") +# %% +# ** actual saving procedure +external_code.oracle_save_polars(conn, stmts_orders, aggregate_orders.collect()) +external_code.oracle_save_polars(conn, stmts_suppliers, aggregate_suppliers.collect()) + +# %% +print(f"Shape Aggregate Production Orders: {aggregate_orders.collect().shape}") +print(f"Shape Aggregate Suppliers: {aggregate_suppliers.collect().shape}") + +# %% +# // try loading +loaded_orders = external_code.oracle_load_table_as_polars( + conn, db.extern_results_prod_orders_t_schema, table_name="KPI_PRODUKTIONSAUFTRAEGE" +) +loaded_orders.collect() +# %% +loaded_suppliers = external_code.oracle_load_table_as_polars( + conn, db.extern_results_suppliers_t_schema, table_name="KPI_KONFEKTIONAERE" +) +loaded_suppliers.collect() # %% -tmp = data_psm.clone() -tmp = external_code.aggregate_production_orders(tmp.lazy()).collect() -print(tmp) -tmp = external_code.oracle_prepare_KPI_aggregate(tmp.lazy()).collect() -print(tmp) diff --git a/prototypes/external_code.py b/prototypes/external_code.py index ec606c1..b5d84af 100644 --- a/prototypes/external_code.py +++ b/prototypes/external_code.py @@ -9,6 +9,7 @@ from typing import TYPE_CHECKING, Any, Final, TypeAlias, cast import polars as pl import sqlalchemy as sql +from dopt_basics.datastructures import flatten from wattanalyse import db @@ -30,7 +31,13 @@ SqlStatement: TypeAlias = str @dc.dataclass(slots=True, eq=False) class PreProcessResult: data: pl.LazyFrame - filtered: pl.DataFrame + filtered: pl.LazyFrame + + +DROP_COLUMNS: Final[list[str]] = cast( + list[str], + list(flatten(((x.lower(), x.upper(), x.capitalize()) for x in ("id", "index", "idx")))), +) @dc.dataclass(slots=True, kw_only=True) @@ -51,7 +58,7 @@ PSM_SCORES: dict[QualityPsm, int] = { QualityPsm.PLAUSIBEL: 2, } -RENAMING_SCHEME: dict[str, str] = { +RENAMING_SCHEME_PSM: dict[str, str] = { "PA Pos": "PA_Pos", "PSM gemeldet am": "Meldezeitpunkt_Historie", "Import Ist": "Import-Ist_Historie", @@ -62,6 +69,8 @@ RENAMING_SCHEME: dict[str, str] = { "Fertigware aus Nähband": "Prod-EP30_Historie", "Teile kontrolliert": "Prod-EP40_Historie", "Teile verpackt in Karton": "Prod-EP50_Historie", + "Konfektionär": "Konfektionaer", + "Lieferantnr.": "Konfektionaer_ID", } PRIM_KEYS: Final[list[str]] = ["PA", "PA_Pos"] @@ -91,7 +100,8 @@ def load_PSM_data( def preprocess_psm( data: pl.LazyFrame, ) -> PreProcessResult: - data = data.rename(RENAMING_SCHEME) + data = data.rename(RENAMING_SCHEME_PSM) + data = data.drop(DROP_COLUMNS, strict=False) REGEX_PATTERN = r"^[\s\-#+/$]+$" data = data.with_columns( pl.when(pl.col(pl.String).str.contains(REGEX_PATTERN)) @@ -99,7 +109,7 @@ def preprocess_psm( .otherwise(pl.col(pl.String)) .name.keep() ) - data = data.with_columns(pl.col("Konfektionär").str.strip_chars(" \n\t")) + data = data.with_columns(pl.col("Konfektionaer").str.strip_chars(" \n\t")) filtered_data = pl.LazyFrame(schema=data.collect_schema()) # drop duplicates @@ -161,7 +171,7 @@ def preprocess_psm( filtered_data = pl.concat([filtered_data, data.filter(pl.any_horizontal(cond))]) data = data.filter(~pl.any_horizontal(cond)) - return PreProcessResult(data=data, filtered=filtered_data.collect()) + return PreProcessResult(data=data, filtered=filtered_data) # // (2) process on order level @@ -169,7 +179,6 @@ def process_order_level( data: pl.LazyFrame, ) -> pl.LazyFrame: # ** renaming - # data = data.rename(RENAMING_SCHEME) # TODO delete, done in pre-processing data = data.sort(PRIM_KEYS + ["Meldezeitpunkt_Historie"], descending=False) # ** plausibility check of order quantities @@ -272,7 +281,7 @@ def process_order_level( # whole aggregates see DB schema data = ( data.sort(PRIM_KEYS + ["Meldezeitpunkt_Historie"], descending=False) - .group_by(PRIM_KEYS + ["Konfektionär"]) + .group_by(PRIM_KEYS + ["Konfektionaer", "Konfektionaer_ID"]) .agg( pl.col("Meldezeitpunkt_Historie"), pl.col("Liefertermin_Soll").drop_nulls().first(), @@ -508,7 +517,7 @@ def aggregate_production_orders( def aggregate_suppliers( data: pl.LazyFrame, ) -> pl.LazyFrame: - data = data.group_by("Konfektionär").agg( + data = data.group_by(["Konfektionaer", "Konfektionaer_ID"]).agg( ( ( ~(filter_date_deviation_early | filter_date_deviation_late) @@ -573,8 +582,6 @@ def aggregate_suppliers( # // (5) external database - - def oracle_prepare_KPI_aggregate( data: pl.LazyFrame, rename_schema: dict[str, str] | None = None, @@ -599,6 +606,7 @@ def oracle_prepare_KPI_aggregate( pl.all().exclude(pl.Boolean), ) .select(cols_sorted) + .select(pl.all().name.to_uppercase()) ) return data diff --git a/src/wattanalyse/db.py b/src/wattanalyse/db.py index 9b6da40..058be1e 100644 --- a/src/wattanalyse/db.py +++ b/src/wattanalyse/db.py @@ -22,7 +22,8 @@ intern_prod_order_t: Table = Table( MD_INTERNAL, Column("PA", sql.Integer, primary_key=True), Column("PA_Pos", sql.Integer, primary_key=True), - Column("Konfektionär", sql.Text, nullable=True), + Column("Konfektionaer", sql.Text, nullable=True), + Column("Konfektionaer_ID", sql.Integer, nullable=True), Column("Meldezeitpunkt_Historie", sql.Text, nullable=False), Column("Liefertermin_Soll", sql.Date, nullable=True), Column("Bestaetigter-Import_Historie", sql.Text, nullable=False), @@ -51,7 +52,8 @@ intern_prod_order_t: Table = Table( intern_prod_order_t_schema: dict[str, type[pl.DataType]] = { "PA": pl.UInt64, "PA_Pos": pl.UInt32, - "Konfektionär": pl.String, + "Konfektionaer": pl.String, + "Konfektionaer_ID": pl.UInt64, "Meldezeitpunkt_Historie": pl.String, "Liefertermin_Soll": pl.Date, "Bestaetigter-Import_Historie": pl.String, @@ -139,6 +141,17 @@ extern_results_prod_orders_t: Table = Table( Column("MITTLERE_DURCHLAUFZEIT_ANZAHL_TAGE", sql.Integer, nullable=True), ) +extern_results_prod_orders_t_schema: dict[str, type[pl.DataType]] = { + "ID": pl.UInt32, + "AKTUALISIERT_AM": pl.Datetime, + "MITTLERE_ANZAHL_TAGE_LIEFERTERMINUNTERSCHREITUNG": pl.Int64, + "MITTLERE_ANZAHL_TAGE_LIEFERTERMINUEBERSCHREITUNG": pl.Int64, + "STANDARDABWEICHUNG_TAGE_LIEFERTERMINABWEICHUNG": pl.Float64, + "MITTLERE_ANZAHL_ANPASSUNGEN_LIEFERTERMIN": pl.Int64, + "MITTLERE_ABSTAENDE_ZWISCHEN_MELDUNGEN": pl.Int64, + "MITTLERE_DURCHLAUFZEIT_ANZAHL_TAGE": pl.Int64, +} + extern_results_suppliers_t: Table = Table( "KPI_KONFEKTIONAERE", MD_EXTERNAL, @@ -160,3 +173,22 @@ extern_results_suppliers_t: Table = Table( Column("MITTLERE_DURCHLAUFZEIT_ANZAHL_TAGE", sql.Integer, nullable=True), Column("MITTLERER_QUALITAETSSCORE_PSM", sql.Numeric(5, 4), nullable=True), ) + + +extern_results_suppliers_t_schema: dict[str, type[pl.DataType]] = { + "ID": pl.UInt32, + "AKTUALISIERT_AM": pl.Datetime, + "KONFEKTIONAER": pl.String, + "KONFEKTIONAER_ID": pl.UInt64, + "QUOTE_ERSTBESTAETIGUNG": pl.Float64, + "PROZENT_LIEFERTREUE": pl.Float64, + "ANTEIL_PROZENT_LIEFERTERMINUNTERSCHREITUNG": pl.Float64, + "ANTEIL_PROZENT_LIEFERTERMINUEBERSCHREITUNG": pl.Float64, + "MITTLERE_ANZAHL_TAGE_LIEFERTERMINUNTERSCHREITUNG": pl.Int64, + "MITTLERE_ANZAHL_TAGE_LIEFERTERMINUEBERSCHREITUNG": pl.Int64, + "STANDARDABWEICHUNG_TAGE_LIEFERTERMINABWEICHUNG": pl.Float64, + "MITTLERE_ANZAHL_ANPASSUNGEN_LIEFERTERMIN": pl.Int64, + "MITTLERE_ABSTAENDE_ZWISCHEN_MELDUNGEN": pl.Int64, + "MITTLERE_DURCHLAUFZEIT_ANZAHL_TAGE": pl.Int64, + "MITTLERER_QUALITAETSSCORE_PSM": pl.Float64, +} diff --git a/src/wattanalyse/logging.py b/src/wattanalyse/logging.py index befd4d5..65936cf 100644 --- a/src/wattanalyse/logging.py +++ b/src/wattanalyse/logging.py @@ -28,3 +28,6 @@ logger_base = BASE_LOGGER.getChild("wattana") logger_database = logger_base.getChild("database") logger_database.setLevel(logging.DEBUG) + +logger_pipeline = logger_base.getChild("pipeline") +logger_pipeline.setLevel(logging.DEBUG) diff --git a/src/wattanalyse/pipeline.py b/src/wattanalyse/pipeline.py new file mode 100644 index 0000000..8cdfa0c --- /dev/null +++ b/src/wattanalyse/pipeline.py @@ -0,0 +1,723 @@ +from __future__ import annotations + +import dataclasses as dc +import datetime +import enum +import json +import warnings +from typing import TYPE_CHECKING, Any, Final, cast + +import polars as pl +import sqlalchemy as sql +from dopt_basics.datastructures import flatten +from dopt_basics.result_pattern import wrap_result + +from wattanalyse import db +from wattanalyse.logging import logger_pipeline as logger +from wattanalyse.types import SqlStatement + +if TYPE_CHECKING: + from oracledb import Connection as OracleConnection + from polars._typing import SchemaDict + + +@dc.dataclass(slots=True, eq=False) +class PreProcessResult: + data: pl.LazyFrame + filtered: pl.LazyFrame + + +DROP_COLUMNS: Final[list[str]] = cast( + list[str], + list(flatten(((x.lower(), x.upper(), x.capitalize()) for x in ("id", "index", "idx")))), # type: ignore +) + + +@dc.dataclass(slots=True, kw_only=True) +class SqlInsertStmts: + delete: str + insert: str + + +class QualityPsm(enum.StrEnum): + FEHLEND = enum.auto() + UNPLAUSIBEL = enum.auto() + PLAUSIBEL = enum.auto() + + +PSM_SCORES: dict[QualityPsm, int] = { + QualityPsm.FEHLEND: 1, + QualityPsm.UNPLAUSIBEL: 0, + QualityPsm.PLAUSIBEL: 2, +} + +RENAMING_SCHEME_PSM: dict[str, str] = { + "PA Pos": "PA_Pos", + "PSM gemeldet am": "Meldezeitpunkt_Historie", + "Import Ist": "Import-Ist_Historie", + "1.bestät. Import Konfektionär": "Bestaetigter-Import_Historie", + "Zuschnitt am": "Prod-Start_Historie", + "Teile in Zuschnitt": "Prod-EP10_Historie", + "Teile im Nähband": "Prod-EP20_Historie", + "Fertigware aus Nähband": "Prod-EP30_Historie", + "Teile kontrolliert": "Prod-EP40_Historie", + "Teile verpackt in Karton": "Prod-EP50_Historie", + "Konfektionär": "Konfektionaer", + "Lieferantnr.": "Konfektionaer_ID", +} + +PRIM_KEYS: Final[list[str]] = ["PA", "PA_Pos"] + +LOWER_BOUND_DATE_DEVIATION: Final[int] = 0 +UPPER_BOUND_DATE_DEVIATION: Final[int] = 0 +NUMBER_YEARS_UPPER_BOUND_DATES: Final[int] = 4 +TAB_NAME_PSM: Final[str] = "EXTERN_PSM" +TAB_NAME_MIS: Final[str] = "EXTERN_MIS" + + +# // (0) load data +def load_PSM_data( + conn: OracleConnection, +) -> pl.LazyFrame: + stmt = f""" + SELECT t1.* FROM "{TAB_NAME_PSM}" t1 + WHERE EXISTS( + SELECT 1 FROM "{TAB_NAME_MIS}" t2 + WHERE t1."PA" = t2."PA" AND t1."PA Pos" = t2."PA Pos" + ) + """ + return oracle_load_table_as_polars(conn, db.extern_prod_order_t_schema, None, stmt) + + +# // (1) preprocess +def preprocess_psm( + data: pl.LazyFrame, +) -> PreProcessResult: + data = data.rename(RENAMING_SCHEME_PSM) + data = data.drop(DROP_COLUMNS, strict=False) + REGEX_PATTERN = r"^[\s\-#+/$]+$" + data = data.with_columns( + pl.when(pl.col(pl.String).str.contains(REGEX_PATTERN)) + .then(None) + .otherwise(pl.col(pl.String)) + .name.keep() + ) + data = data.with_columns(pl.col("Konfektionaer").str.strip_chars(" \n\t")) + filtered_data = pl.LazyFrame(schema=data.collect_schema()) + + # drop duplicates + # use null count as information measure, least amount of nulls should be contained + data = data.with_columns(pl.sum_horizontal(pl.all().is_null()).alias("null_count")) + data = data.sort(PRIM_KEYS + ["Meldezeitpunkt_Historie", "null_count"], descending=False) + filtered_data = pl.concat( + [ + filtered_data, + data.filter( + ~pl.struct(PRIM_KEYS + ["Meldezeitpunkt_Historie"]).is_first_distinct() + ).drop("null_count"), + ] + ) + data = data.filter(pl.struct(PRIM_KEYS + ["Meldezeitpunkt_Historie"]).is_first_distinct()) + data = data.drop("null_count") + + # any NULL values in critical columns + NOT_NULL_COLS = ("PA", "PA_Pos", "Meldezeitpunkt_Historie") + conds = [pl.col(col).is_null() for col in NOT_NULL_COLS] + filtered_data = pl.concat([filtered_data, data.filter(pl.any_horizontal(*conds))]) + data = data.filter(~pl.any_horizontal(*conds)) + + # implausible dates + # dates not allowed to be in the future + current_datetime = datetime.datetime.now() + current_date = current_datetime.date() + NOT_IN_FUTURE_COLS_DATETIME = ("Meldezeitpunkt_Historie",) + NOT_IN_FUTURE_COLS_DATE = ("Wareneingang am", "Prod-Start_Historie") + conds = [ + (pl.col(col) > current_datetime).fill_null(False) + for col in NOT_IN_FUTURE_COLS_DATETIME + ] + conds.extend( + [(pl.col(col) > current_date).fill_null(False) for col in NOT_IN_FUTURE_COLS_DATE] + ) + filtered_data = pl.concat([filtered_data, data.filter(pl.any_horizontal(*conds))]) + data = data.filter(~pl.any_horizontal(*conds)) + + # too much in the future or the past + # dates + future_limit = current_date + datetime.timedelta( + days=(365 * NUMBER_YEARS_UPPER_BOUND_DATES) + ) + past_limit = datetime.date(1990, 1, 1) + cond = (pl.col(pl.Date) > future_limit).fill_null(False) | ( + pl.col(pl.Date) < past_limit + ).fill_null(False) + filtered_data = pl.concat([filtered_data, data.filter(pl.any_horizontal(cond))]) + data = data.filter(~pl.any_horizontal(cond)) + # datetimes + future_limit = current_datetime + datetime.timedelta( + days=(365 * NUMBER_YEARS_UPPER_BOUND_DATES) + ) + past_limit = datetime.datetime(1990, 1, 1) + cond = (pl.col(pl.Datetime) > future_limit).fill_null(False) | ( + pl.col(pl.Datetime) < past_limit + ).fill_null(False) + filtered_data = pl.concat([filtered_data, data.filter(pl.any_horizontal(cond))]) + data = data.filter(~pl.any_horizontal(cond)) + + return PreProcessResult(data=data, filtered=filtered_data) + + +# // (2) process on order level +def process_order_level( + data: pl.LazyFrame, +) -> pl.LazyFrame: + # ** renaming + data = data.sort(PRIM_KEYS + ["Meldezeitpunkt_Historie"], descending=False) + + # ** plausibility check of order quantities + PLAUSI_FEATURES: list[str] = [ + "Prod-EP10_Historie", + "Prod-EP20_Historie", + "Prod-EP30_Historie", + "Prod-EP40_Historie", + "Prod-EP50_Historie", + ] + data = data.with_columns( + pl.all_horizontal( + pl.col(PLAUSI_FEATURES).is_null() | (pl.col(PLAUSI_FEATURES) == 0) + ).alias("is_empty") + ) + conditions = [ + pl.col(PLAUSI_FEATURES[i]) >= pl.col(PLAUSI_FEATURES[i + 1]) + for i in range(len(PLAUSI_FEATURES) - 1) + ] + data = data.with_columns( + pl.when(pl.all_horizontal(conditions) | pl.col("is_empty")) + .then(pl.lit(True)) + .otherwise(pl.lit(False)) + .alias("Prod-Qty_is_valid") + ).with_columns( + pl.when(pl.col("is_empty")) + .then(pl.lit(PSM_SCORES[QualityPsm.FEHLEND])) + .when(pl.col("Prod-Qty_is_valid")) + .then(pl.lit(PSM_SCORES[QualityPsm.PLAUSIBEL])) + .otherwise(pl.lit(PSM_SCORES[QualityPsm.UNPLAUSIBEL])) + .alias("Prod-Qualitaet_Historie") + ) + # aggregate hint for "Prod-Qualitaet_Durchschnitt": use "drop_nulls" "last" + # aggregate "Prod-Qualitaet_Historie" and use "mean" + # need additional "alias" on "Prod-Qualitaet_Historie" + + # ** planned or target delivery date + current_date = datetime.datetime.now().date() + print(f"{current_date=}") + data = data.with_columns( + pl.coalesce(["Bestaetigter-Import_Historie", "Import-Ist_Historie"]).alias( + "Liefertermin_Soll" + ) + ) + # aggregate hint for "Liefertermin_Soll": use "drop_nulls" "first" + # first filled field for "Liefertermin Soll" is the relevant target date + # should be first confirmed date, but if this field is not filled we use the first + # filled import by the supplier + + # ** actual delivery date + # logic of Wattana: set date is before current date --> becomes actual value + data = data.with_columns( + pl.when(pl.col("Import-Ist_Historie") < current_date) + .then(pl.col("Import-Ist_Historie")) + .otherwise(None) + .alias("Liefertermin_Ist") + ) + # aggregate hint for "Liefertermin_Ist": use "drop_nulls" "last" + # keep last because that is the latest value set by the supplier + # if all values are NULL then NULL is returned (no actual date available) + + # ** duration since last report in days + data = data.sort(PRIM_KEYS + ["Meldezeitpunkt_Historie"], descending=False).with_columns( + ( + pl.col("Meldezeitpunkt_Historie") + - pl.col("Meldezeitpunkt_Historie").shift(1).over(PRIM_KEYS) + ) + .dt.total_days() + .alias("Tage_zu_letzter_PSM_Historie") + ) + # aggregate hint for "Tage_zu_letzter_PSM_Durchschnitt" + # aggregate "Tage_zu_letzter_PSM_Historie" and use "mean" (NULL is ignored automatically) + # need additional "alias" on "Tage_zu_letzter_PSM_Historie" + + data = data.sort(PRIM_KEYS + ["Meldezeitpunkt_Historie"], descending=False).with_columns( + # Prüfen: Ist das aktuelle Datum ungleich dem vorherigen Datum derselben Position? + ( + pl.col("Import-Ist_Historie") + != pl.col("Import-Ist_Historie").shift(1).over(PRIM_KEYS) + ) + .fill_null(False) # Der allererste Eintrag hat keinen Vorgänger -> Ist keine Änderung + .alias("Import-Ist_geaendert") + ) + # aggregate hint for "Import-Ist_geaendert" + # aggregate "Import-Ist_geaendert" and use "last" + + # aggregate hint for "Import-Ist_letzter_Wert" + # aggregate "Import-Ist_Historie" and use "drop_nulls" "last" + # need additional "alias" on "Import-Ist_Historie" + + # aggregate hint for "Import-Ist_Anzahl_Aenderungen" + # aggregate "Import-Ist_geaendert" and use "sum" + # need additional "alias" on "Import-Ist_geaendert" + + # aggregate hint for "Prod-Start" + # aggregate "Prod-Start_Historie" and use "drop_nulls" "first" + # first entry should be treated as the truth value, changing later does not make sense + # need additional "alias" on "Prod-Start_Historie" + + # whole aggregates see DB schema + data = ( + data.sort(PRIM_KEYS + ["Meldezeitpunkt_Historie"], descending=False) + .group_by(PRIM_KEYS + ["Konfektionaer", "Konfektionaer_ID"]) + .agg( + pl.col("Meldezeitpunkt_Historie"), + pl.col("Liefertermin_Soll").drop_nulls().first(), + pl.col("Bestaetigter-Import_Historie"), + pl.col("Liefertermin_Ist").drop_nulls().last(), + pl.col("Import-Ist_Historie"), + pl.col("Import-Ist_Historie") + .drop_nulls() + .last() + .alias("Import-Ist_letzter_Wert"), + pl.col("Import-Ist_geaendert").last(), + pl.col("Import-Ist_geaendert").sum().alias("Import-Ist_Anzahl_Aenderungen"), + pl.col("Tage_zu_letzter_PSM_Historie"), + pl.col("Tage_zu_letzter_PSM_Historie") + .mean() + .alias("Tage_zu_letzter_PSM_Durchschnitt"), + pl.col("Prod-EP10_Historie"), + pl.col("Prod-EP20_Historie"), + pl.col("Prod-EP30_Historie"), + pl.col("Prod-EP40_Historie"), + pl.col("Prod-EP50_Historie"), + pl.col("Prod-Qualitaet_Historie"), + pl.col("Prod-Qualitaet_Historie").mean().alias("Prod-Qualitaet_Durchschnitt"), + pl.col("Prod-Start_Historie"), + pl.col("Prod-Start_Historie").drop_nulls().first().alias("Prod-Start"), + ) + ) + # ** order specific aggregates + data = ( + data.with_columns( + (pl.col("Liefertermin_Ist") - pl.col("Liefertermin_Soll")) + .dt.total_days() + .alias("Terminabweichung_Anzahl_Tage") + ) + .with_columns( + (pl.col("Terminabweichung_Anzahl_Tage") < LOWER_BOUND_DATE_DEVIATION).alias( + "Terminunterschreitung" + ), + (pl.col("Terminabweichung_Anzahl_Tage") > UPPER_BOUND_DATE_DEVIATION).alias( + "Terminüberschreitung" + ), + (pl.col("Liefertermin_Ist") - pl.col("Prod-Start")) + .dt.total_days() + .alias("Durchlaufzeit_Anzahl_Tage"), + ) + .with_columns( + pl.when(pl.col("Durchlaufzeit_Anzahl_Tage") < 0) + .then(None) + .otherwise(pl.col("Durchlaufzeit_Anzahl_Tage")) + .alias("Durchlaufzeit_Anzahl_Tage") + ) + ) + + return data + + +# // (3) dump order level to internal database +def _json_default( + value: Any, +) -> str: + if isinstance(value, (datetime.date, datetime.datetime)): + return value.isoformat() + raise TypeError + + +def _parse_to_json( + x: pl.Series | None, +) -> str | None: + if x is None: + return None + + return json.dumps(x.to_list(), default=_json_default) + + +def dump_order_level_to_internal_database_staging( + data: pl.LazyFrame, +) -> None: + + staging_data = data.with_columns( + pl.col(pl.List) + .map_elements( + _parse_to_json, + return_dtype=pl.String, + ) + .name.keep() + ) + staging_data = staging_data.collect() + rows_inserted = staging_data.write_database( + "Produktionsauftrag-Einzelsicht_Staging", + connection=db.DB_URI, + engine="adbc", + if_table_exists="replace", + ) + if rows_inserted != staging_data.height: + raise RuntimeError("Number of inserted rows and length of staging data do not match.") + + all_columns = staging_data.columns + update_columns = [col for col in all_columns if col not in PRIM_KEYS] + + sql_column_list_str = ", ".join([f'"{c}"' for c in all_columns]) + sql_pk_list_str = ", ".join([f'"{c}"' for c in PRIM_KEYS]) + sql_update_rules_str = ", ".join([f'"{c}" = EXCLUDED."{c}"' for c in update_columns]) + + upsert_sql = f""" + INSERT INTO "Produktionsauftrag-Einzelsicht" ({sql_column_list_str}) + SELECT {sql_column_list_str} FROM "Produktionsauftrag-Einzelsicht_Staging" WHERE 1=1 + ON CONFLICT({sql_pk_list_str}) DO UPDATE SET + {sql_update_rules_str}; + """ + + with db.ENGINE_INTERNAL.begin() as conn: + conn.execute(sql.text(upsert_sql)) + conn.execute( + sql.text('DROP TABLE IF EXISTS "Produktionsauftrag-Einzelsicht_Staging";') + ) + + +def dump_order_level_to_internal_database_wipe( + data: pl.LazyFrame, +) -> None: + + staging_data = data.with_columns( + pl.col(pl.List) + .map_elements( + _parse_to_json, + return_dtype=pl.String, + ) + .name.keep() + ) + # empty table + with db.ENGINE_INTERNAL.begin() as conn: + conn.execute(sql.text('DELETE FROM "Produktionsauftrag-Einzelsicht";')) + + staging_data = staging_data.collect() + rows_inserted = staging_data.write_database( + "Produktionsauftrag-Einzelsicht", + connection=db.DB_URI, + engine="adbc", + if_table_exists="append", + ) + if rows_inserted != staging_data.height: + raise RuntimeError("Number of inserted rows and length of staging data do not match.") + + +# ** load order level data from internal database +def load_order_level_from_internal_database() -> pl.DataFrame: + data = pl.read_database_uri( + 'SELECT * FROM "Produktionsauftrag-Einzelsicht"', + uri=db.DB_URI, + engine="adbc", + schema_overrides=db.intern_prod_order_t_schema, + ) + + list_cols_to_type: dict[str, type[pl.DataType]] = { + "Meldezeitpunkt_Historie": pl.Datetime, + "Bestaetigter-Import_Historie": pl.Date, + "Import-Ist_Historie": pl.Date, + "Tage_zu_letzter_PSM_Historie": pl.Int64, + "Prod-EP10_Historie": pl.UInt64, + "Prod-EP20_Historie": pl.UInt64, + "Prod-EP30_Historie": pl.UInt64, + "Prod-EP40_Historie": pl.UInt64, + "Prod-EP50_Historie": pl.UInt64, + "Prod-Qualitaet_Historie": pl.Int32, + "Prod-Start_Historie": pl.Date, + } + + list_col_parse_conds = { + col: pl.col(col).str.json_decode(pl.List(list_type)) + for col, list_type in list_cols_to_type.items() + } + + return data.with_columns(**list_col_parse_conds) + + +# // (4) post-process results + +USE_BOUNDARIES: Final[bool] = False +filter_date_deviation_early: pl.Expr +filter_date_deviation_late: pl.Expr +if USE_BOUNDARIES: + filter_date_deviation_early = pl.col("Terminunterschreitung") + filter_date_deviation_late = pl.col("Terminüberschreitung") +else: + filter_date_deviation_early = pl.col("Terminabweichung_Anzahl_Tage") < 0 + filter_date_deviation_late = pl.col("Terminabweichung_Anzahl_Tage") > 0 + + +def aggregate_production_orders( + data: pl.LazyFrame, +) -> pl.LazyFrame: + data = data.select( + pl.col("Terminabweichung_Anzahl_Tage") + .filter(filter_date_deviation_early) + .mean() + .abs() + .round(mode="half_away_from_zero") + .cast(pl.Int64) + .alias("MITTLERE_ANZAHL_TAGE_LIEFERTERMINUNTERSCHREITUNG"), + pl.col("Terminabweichung_Anzahl_Tage") + .filter(filter_date_deviation_late) + .mean() + .abs() + .round(mode="half_away_from_zero") + .cast(pl.Int64) + .alias("MITTLERE_ANZAHL_TAGE_LIEFERTERMINUEBERSCHREITUNG"), + pl.col("Terminabweichung_Anzahl_Tage") + .std(ddof=1) + .alias("STANDARDABWEICHUNG_TAGE_LIEFERTERMINABWEICHUNG"), + pl.col("Import-Ist_Anzahl_Aenderungen") + .mean() + .abs() + .round(mode="half_away_from_zero") + .cast(pl.Int64) + .alias("MITTLERE_ANZAHL_ANPASSUNGEN_LIEFERTERMIN"), + pl.col("Tage_zu_letzter_PSM_Historie") + .list.explode() + .mean() + .abs() + .round(mode="half_away_from_zero") + .cast(pl.Int64) + .alias("MITTLERE_ABSTAENDE_ZWISCHEN_MELDUNGEN"), + pl.col("Durchlaufzeit_Anzahl_Tage") + .mean() + .round(mode="half_away_from_zero") + .cast(pl.Int64) + .alias("MITTLERE_DURCHLAUFZEIT_ANZAHL_TAGE"), + ) + + return data + + +def aggregate_suppliers( + data: pl.LazyFrame, +) -> pl.LazyFrame: + data = data.group_by(["Konfektionaer", "Konfektionaer_ID"]).agg( + ( + ( + ~(filter_date_deviation_early | filter_date_deviation_late) + & (pl.col("Import-Ist_Anzahl_Aenderungen") == 0) + ).mean() + * 100 + ) + .round(4, mode="half_away_from_zero") + .alias("QUOTE_ERSTBESTAETIGUNG"), + ((~(filter_date_deviation_early | filter_date_deviation_late)).mean() * 100) + .round(4, mode="half_away_from_zero") + .alias("PROZENT_LIEFERTREUE"), + (filter_date_deviation_early.mean() * 100) + .round(4, mode="half_away_from_zero") + .alias("ANTEIL_PROZENT_LIEFERTERMINUNTERSCHREITUNG"), + (filter_date_deviation_late.mean() * 100) + .round(4, mode="half_away_from_zero") + .alias("ANTEIL_PROZENT_LIEFERTERMINUEBERSCHREITUNG"), + pl.col("Terminabweichung_Anzahl_Tage") + .filter(filter_date_deviation_early) + .mean() + .abs() + .round(mode="half_away_from_zero") + .cast(pl.Int64) + .alias("MITTLERE_ANZAHL_TAGE_LIEFERTERMINUNTERSCHREITUNG"), + pl.col("Terminabweichung_Anzahl_Tage") + .filter(filter_date_deviation_late) + .mean() + .abs() + .round(mode="half_away_from_zero") + .cast(pl.Int64) + .alias("MITTLERE_ANZAHL_TAGE_LIEFERTERMINUEBERSCHREITUNG"), + pl.col("Terminabweichung_Anzahl_Tage") + .std(ddof=1) + .alias("STANDARDABWEICHUNG_TAGE_LIEFERTERMINABWEICHUNG"), + pl.col("Import-Ist_Anzahl_Aenderungen") + .mean() + .abs() + .round(mode="half_away_from_zero") + .cast(pl.Int64) + .alias("MITTLERE_ANZAHL_ANPASSUNGEN_LIEFERTERMIN"), + pl.col("Tage_zu_letzter_PSM_Historie") + .list.explode() + .mean() + .abs() + .round(mode="half_away_from_zero") + .cast(pl.Int64) + .alias("MITTLERE_ABSTAENDE_ZWISCHEN_MELDUNGEN"), + pl.col("Durchlaufzeit_Anzahl_Tage") + .mean() + .round(mode="half_away_from_zero") + .cast(pl.Int64) + .alias("MITTLERE_DURCHLAUFZEIT_ANZAHL_TAGE"), + pl.col("Prod-Qualitaet_Historie") + .list.explode() + .mean() + .round(4, mode="half_away_from_zero") + .alias("MITTLERER_QUALITAETSSCORE_PSM"), + ) + + return data + + +# // (5) external database +def oracle_prepare_KPI_aggregate( + data: pl.LazyFrame, + rename_schema: dict[str, str] | None = None, + sort_by: str = "", + sort_descending: bool = False, +) -> pl.LazyFrame: + if rename_schema is not None: + data = data.rename(rename_schema) + + cols_sorted = ["ID", "AKTUALISIERT_AM"] + [c for c in data.collect_schema().names()] + + if sort_by: + data = data.sort(sort_by, descending=sort_descending) + + data = data.with_row_index("ID", 1) + data = ( + data.with_columns( + pl.lit(datetime.datetime.now()).alias("AKTUALISIERT_AM"), + ) + .select( + pl.col(pl.Boolean).cast(pl.Int8), + pl.all().exclude(pl.Boolean), + ) + .select(cols_sorted) + .select(pl.all().name.to_uppercase()) + ) + + return data + + +def oracle_generate_sql_insert( + table_name: str, + columns: list, +) -> SqlInsertStmts: + spalten_str = ", ".join([f'"{c}"' for c in columns]) + platzhalter_str = ", ".join([f":{i}" for i in range(1, len(columns) + 1)]) + + sql_delete = f'DELETE FROM "{table_name}"' + sql_insert = f'INSERT INTO "{table_name}" ({spalten_str}) VALUES ({platzhalter_str})' + + return SqlInsertStmts(delete=sql_delete, insert=sql_insert) + + +def oracle_load_table_as_polars( + conn: OracleConnection, + schema: SchemaDict | None, + table_name: str | None = None, + stmt: SqlStatement | None = None, +) -> pl.LazyFrame: + if not any((table_name, stmt)): + raise ValueError("Table name or SQL statement must be provided") + if all((table_name, stmt)): + warnings.warn( + "Table name and SQL statement provided. In this case, the statement is used." + ) + if not stmt: + stmt = f"SELECT * FROM {table_name}" + + odf = conn.fetch_df_all(statement=stmt) + df = cast(pl.DataFrame, pl.from_arrow(odf, schema_overrides=schema)) + + return df.lazy() + + +def oracle_save_polars( + conn: OracleConnection, + stmts: SqlInsertStmts, + data: pl.DataFrame, +) -> None: + with conn.cursor() as cursor: + cursor.execute(stmts.delete) + cursor.executemany(stmts.insert, data) + conn.commit() + + +# TODO wrap this in a metadata tracking call +@wrap_result(code_on_error=1, logger=logger) +def run( + conn: OracleConnection, +) -> None: + # // (0) Load from external database + logger.info("Load data from database >load_PSM_data<...") + data = load_PSM_data(conn) + logger.info("Successfully loaded data from database") + + # // (1) preprocess data + logger.info("Preprocess data (cleansing) >preprocess_psm<...") + res = preprocess_psm(data) + data = res.data + logger.info("Successfully preprocessed data") + + # // (2) process on order level + logger.info("Process data on order level >process_order_level<...") + data = process_order_level(data) + logger.info("Successfully processed data on order level") + + # // (3) dump to database (intermediate result) + logger.info("Save order level data in internal database...") + dump_order_level_to_internal_database_wipe(data) + logger.info("Successfully saved order level data in internal DB") + + # // (4) post-process + # ** aggregation for orders + logger.info("Aggregate data with KPI calculation...") + logger.info("...production orders...") + orders_aggregated = aggregate_production_orders(data) + # ** aggregation for suppliers + logger.info("...suppliers...") + suppliers_aggregated = aggregate_suppliers(data) + logger.info("Successfully aggregated and calculated KPIs") + + # // (5) save to external database + logger.info("Prepare saving data to external database...") + logger.info("Prepare production order KPI table for Oracle export...") + orders_aggregated = oracle_prepare_KPI_aggregate(orders_aggregated) + stmts_orders = oracle_generate_sql_insert( + table_name="KPI_PRODUKTIONSAUFTRAEGE", + columns=orders_aggregated.collect_schema().names(), + ) + logger.info( + "SQL Statemens:\n--- DELETE: %s\n---INSERT: %s", + stmts_orders.delete, + stmts_orders.insert, + ) + + # ** suppliers + logger.info("Prepare supplier KPI table for Oracle export...") + suppliers_aggregated = oracle_prepare_KPI_aggregate( + suppliers_aggregated, + sort_by="Konfektionaer", + sort_descending=False, + ) + stmts_suppliers = oracle_generate_sql_insert( + table_name="KPI_KONFEKTIONAERE", columns=suppliers_aggregated.collect_schema().names() + ) + logger.info( + "SQL Statemens:\n--- DELETE: %s\n---INSERT: %s", + stmts_suppliers.delete, + stmts_suppliers.insert, + ) + + # ** actual saving procedure + logger.info("Saving data to external database...") + oracle_save_polars(conn, stmts_orders, orders_aggregated.collect()) + oracle_save_polars(conn, stmts_suppliers, suppliers_aggregated.collect()) + logger.info("Successfully saved KPI tables to external database") diff --git a/src/wattanalyse/types.py b/src/wattanalyse/types.py index c518040..e61d0c0 100644 --- a/src/wattanalyse/types.py +++ b/src/wattanalyse/types.py @@ -1,6 +1,9 @@ from __future__ import annotations import dataclasses as dc +from typing import TypeAlias + +SqlStatement: TypeAlias = str @dc.dataclass(kw_only=True, slots=True) -- 2.34.1 From 2a6777becc6eda5e5001cb86018b6dfd2e47a5aa Mon Sep 17 00:00:00 2001 From: foefl Date: Thu, 11 Jun 2026 09:18:09 +0200 Subject: [PATCH 23/48] wrapped pipeline --- src/wattanalyse/constants.py | 4 + src/wattanalyse/external_interface.py | 29 +++++ src/wattanalyse/{pipeline.py => pipelines.py} | 101 +++++++++++++----- 3 files changed, 106 insertions(+), 28 deletions(-) create mode 100644 src/wattanalyse/external_interface.py rename src/wattanalyse/{pipeline.py => pipelines.py} (90%) diff --git a/src/wattanalyse/constants.py b/src/wattanalyse/constants.py index 51a8712..19561f5 100644 --- a/src/wattanalyse/constants.py +++ b/src/wattanalyse/constants.py @@ -4,6 +4,7 @@ import os from pathlib import Path from typing import Final +import oracledb from dopt_basics import configs from dopt_basics import io as io_ @@ -31,3 +32,6 @@ user_cfg = configs.load_toml(Config.PTH_USER_CFG) USER_CFG: t.UserConfig = t.UserConfig( Datenbank=t.UserConfig_Datenbank(**user_cfg["Datenbank"]) ) + +oracledb.defaults.arraysize = 1000 +oracledb.defaults.prefetchrows = 1000 diff --git a/src/wattanalyse/external_interface.py b/src/wattanalyse/external_interface.py new file mode 100644 index 0000000..9f533c8 --- /dev/null +++ b/src/wattanalyse/external_interface.py @@ -0,0 +1,29 @@ +import oracledb +from dopt_basics.result_pattern import STATUS_HANDLER + +from wattanalyse import pipelines +from wattanalyse.constants import USER_CFG +from wattanalyse.logging import logger_pipeline + +ORACLE_CONN = oracledb.connect( + user=USER_CFG.Datenbank.NUTZER, + password=USER_CFG.Datenbank.PASSWORT, + host=USER_CFG.Datenbank.HOST, + port=USER_CFG.Datenbank.PORT, + service_name=USER_CFG.Datenbank.SERVICE_NAME, +) + + +def pipeline_KPI_calculation() -> None: + logger_pipeline.info("Start pipeline >KPI_calculation<") + res = pipelines.KPI_calculation(ORACLE_CONN) + + if res.status != STATUS_HANDLER.SUCCESS: + logger_pipeline.error( + "An error occurred during the procedure --- Status:\n%s", + res.status, + stack_info=True, + ) + return + + logger_pipeline.info("Pipeline >KPI_calculation< ended successfully") diff --git a/src/wattanalyse/pipeline.py b/src/wattanalyse/pipelines.py similarity index 90% rename from src/wattanalyse/pipeline.py rename to src/wattanalyse/pipelines.py index 8cdfa0c..edc32ec 100644 --- a/src/wattanalyse/pipeline.py +++ b/src/wattanalyse/pipelines.py @@ -10,7 +10,7 @@ from typing import TYPE_CHECKING, Any, Final, cast import polars as pl import sqlalchemy as sql from dopt_basics.datastructures import flatten -from dopt_basics.result_pattern import wrap_result +from dopt_basics.result_pattern import STATUS_HANDLER, Status, wrap_result from wattanalyse import db from wattanalyse.logging import logger_pipeline as logger @@ -75,7 +75,8 @@ TAB_NAME_PSM: Final[str] = "EXTERN_PSM" TAB_NAME_MIS: Final[str] = "EXTERN_MIS" -# // (0) load data +# // (10) load data +@wrap_result(code_on_error=10) def load_PSM_data( conn: OracleConnection, ) -> pl.LazyFrame: @@ -89,7 +90,8 @@ def load_PSM_data( return oracle_load_table_as_polars(conn, db.extern_prod_order_t_schema, None, stmt) -# // (1) preprocess +# // (20) preprocess +@wrap_result(code_on_error=20) def preprocess_psm( data: pl.LazyFrame, ) -> PreProcessResult: @@ -167,7 +169,8 @@ def preprocess_psm( return PreProcessResult(data=data, filtered=filtered_data) -# // (2) process on order level +# // (30) process on order level +@wrap_result(code_on_error=30) def process_order_level( data: pl.LazyFrame, ) -> pl.LazyFrame: @@ -331,7 +334,7 @@ def process_order_level( return data -# // (3) dump order level to internal database +# // (40) dump order level to internal database def _json_default( value: Any, ) -> str: @@ -349,6 +352,7 @@ def _parse_to_json( return json.dumps(x.to_list(), default=_json_default) +@wrap_result(code_on_error=41) def dump_order_level_to_internal_database_staging( data: pl.LazyFrame, ) -> None: @@ -392,6 +396,7 @@ def dump_order_level_to_internal_database_staging( ) +@wrap_result(code_on_error=40) def dump_order_level_to_internal_database_wipe( data: pl.LazyFrame, ) -> None: @@ -420,6 +425,7 @@ def dump_order_level_to_internal_database_wipe( # ** load order level data from internal database +@wrap_result(code_on_error=49) def load_order_level_from_internal_database() -> pl.DataFrame: data = pl.read_database_uri( 'SELECT * FROM "Produktionsauftrag-Einzelsicht"', @@ -450,7 +456,7 @@ def load_order_level_from_internal_database() -> pl.DataFrame: return data.with_columns(**list_col_parse_conds) -# // (4) post-process results +# // (50) post-process results USE_BOUNDARIES: Final[bool] = False filter_date_deviation_early: pl.Expr @@ -463,6 +469,7 @@ else: filter_date_deviation_late = pl.col("Terminabweichung_Anzahl_Tage") > 0 +@wrap_result(code_on_error=51) def aggregate_production_orders( data: pl.LazyFrame, ) -> pl.LazyFrame: @@ -507,6 +514,7 @@ def aggregate_production_orders( return data +@wrap_result(code_on_error=52) def aggregate_suppliers( data: pl.LazyFrame, ) -> pl.LazyFrame: @@ -574,7 +582,8 @@ def aggregate_suppliers( return data -# // (5) external database +# // (60) external database +@wrap_result(code_on_error=60) def oracle_prepare_KPI_aggregate( data: pl.LazyFrame, rename_schema: dict[str, str] | None = None, @@ -605,6 +614,7 @@ def oracle_prepare_KPI_aggregate( return data +@wrap_result(code_on_error=61) def oracle_generate_sql_insert( table_name: str, columns: list, @@ -639,6 +649,7 @@ def oracle_load_table_as_polars( return df.lazy() +@wrap_result(code_on_error=62) def oracle_save_polars( conn: OracleConnection, stmts: SqlInsertStmts, @@ -652,48 +663,70 @@ def oracle_save_polars( # TODO wrap this in a metadata tracking call @wrap_result(code_on_error=1, logger=logger) -def run( +def KPI_calculation( conn: OracleConnection, -) -> None: - # // (0) Load from external database +) -> Status: + # // (10) Load from external database logger.info("Load data from database >load_PSM_data<...") - data = load_PSM_data(conn) + res = load_PSM_data(conn) + if res.status != STATUS_HANDLER.SUCCESS: + return res.status logger.info("Successfully loaded data from database") - # // (1) preprocess data + # // (20) preprocess data logger.info("Preprocess data (cleansing) >preprocess_psm<...") - res = preprocess_psm(data) - data = res.data + res = preprocess_psm(res.unwrap()) + if res.status != STATUS_HANDLER.SUCCESS: + return res.status + # data = res.result.data logger.info("Successfully preprocessed data") - # // (2) process on order level + # // (30) process on order level logger.info("Process data on order level >process_order_level<...") - data = process_order_level(data) + res = process_order_level(res.unwrap().data) + if res.status != STATUS_HANDLER.SUCCESS: + return res.status + data = res.unwrap() logger.info("Successfully processed data on order level") - # // (3) dump to database (intermediate result) + # // (40) dump to database (intermediate result) logger.info("Save order level data in internal database...") - dump_order_level_to_internal_database_wipe(data) + res = dump_order_level_to_internal_database_wipe(data) + if res.status != STATUS_HANDLER.SUCCESS: + return res.status logger.info("Successfully saved order level data in internal DB") - # // (4) post-process + # // (50) post-process # ** aggregation for orders logger.info("Aggregate data with KPI calculation...") logger.info("...production orders...") - orders_aggregated = aggregate_production_orders(data) + res = aggregate_production_orders(data) + if res.status != STATUS_HANDLER.SUCCESS: + return res.status + orders_aggregated = res.unwrap() # ** aggregation for suppliers logger.info("...suppliers...") - suppliers_aggregated = aggregate_suppliers(data) + res = aggregate_suppliers(data) + if res.status != STATUS_HANDLER.SUCCESS: + return res.status + suppliers_aggregated = res.unwrap() logger.info("Successfully aggregated and calculated KPIs") - # // (5) save to external database + # // (60) save to external database logger.info("Prepare saving data to external database...") logger.info("Prepare production order KPI table for Oracle export...") - orders_aggregated = oracle_prepare_KPI_aggregate(orders_aggregated) - stmts_orders = oracle_generate_sql_insert( + res = oracle_prepare_KPI_aggregate(orders_aggregated) + if res.status != STATUS_HANDLER.SUCCESS: + return res.status + orders_aggregated = res.unwrap() + # TODO add table names as variables + res = oracle_generate_sql_insert( table_name="KPI_PRODUKTIONSAUFTRAEGE", columns=orders_aggregated.collect_schema().names(), ) + if res.status != STATUS_HANDLER.SUCCESS: + return res.status + stmts_orders = res.unwrap() logger.info( "SQL Statemens:\n--- DELETE: %s\n---INSERT: %s", stmts_orders.delete, @@ -702,14 +735,20 @@ def run( # ** suppliers logger.info("Prepare supplier KPI table for Oracle export...") - suppliers_aggregated = oracle_prepare_KPI_aggregate( + res = oracle_prepare_KPI_aggregate( suppliers_aggregated, sort_by="Konfektionaer", sort_descending=False, ) - stmts_suppliers = oracle_generate_sql_insert( + if res.status != STATUS_HANDLER.SUCCESS: + return res.status + suppliers_aggregated = res.unwrap() + res = oracle_generate_sql_insert( table_name="KPI_KONFEKTIONAERE", columns=suppliers_aggregated.collect_schema().names() ) + if res.status != STATUS_HANDLER.SUCCESS: + return res.status + stmts_suppliers = res.unwrap() logger.info( "SQL Statemens:\n--- DELETE: %s\n---INSERT: %s", stmts_suppliers.delete, @@ -718,6 +757,12 @@ def run( # ** actual saving procedure logger.info("Saving data to external database...") - oracle_save_polars(conn, stmts_orders, orders_aggregated.collect()) - oracle_save_polars(conn, stmts_suppliers, suppliers_aggregated.collect()) + res = oracle_save_polars(conn, stmts_orders, orders_aggregated.collect()) + if res.status != STATUS_HANDLER.SUCCESS: + return res.status + res = oracle_save_polars(conn, stmts_suppliers, suppliers_aggregated.collect()) + if res.status != STATUS_HANDLER.SUCCESS: + return res.status logger.info("Successfully saved KPI tables to external database") + + return STATUS_HANDLER.SUCCESS -- 2.34.1 From 7f864bc76b50decf9971bfad4ea9be73d098bf1e Mon Sep 17 00:00:00 2001 From: foefl Date: Thu, 11 Jun 2026 09:46:14 +0200 Subject: [PATCH 24/48] refactoring and enhanced user configuration --- config/wattana.toml | 21 +++++++++--- prototypes/external_code.py | 49 ++++++++-------------------- src/wattanalyse/constants.py | 15 +++++++-- src/wattanalyse/pipelines.py | 62 ++++++++++++++++-------------------- src/wattanalyse/types.py | 30 ++++++++++++++--- 5 files changed, 94 insertions(+), 83 deletions(-) diff --git a/config/wattana.toml b/config/wattana.toml index 2219c64..d81e01b 100644 --- a/config/wattana.toml +++ b/config/wattana.toml @@ -1,6 +1,17 @@ [Datenbank] -NUTZER = "WATTANA" -PASSWORT = "MyWattanaPassword123" -HOST = "localhost" -PORT = 1521 -SERVICE_NAME = "FREEPDB1" \ No newline at end of file +Nutzer = "WATTANA" +Passwort = "MyWattanaPassword123" +Host = "localhost" +Port = 1521 +Service_Name = "FREEPDB1" +Tabellenname_Produktionsstandmeldung = "EXTERN_PSM" # Datenbanktabelle mit den Produktionsstandmeldungen +Tabellenname_MIS_Auftraege = "EXTERN_MIS" # Datenbanktabelle mit den MIS-Aufträgen + +[Datenpipelines_PSM] +Vorverarbeitung_Anzahl_Jahre_in_Zukunft_zulaessig = 4 # prüft bei der Vorverarbeitung, ob Datumsangaben über diesen Horizont hinaus vorliegen; diese werden entfernt +Terminabweichung_untere_Schranke = 0 # Anzahl an Tagen +Terminabweichung_obere_Schranke = 0 # Anzahl an Tagen +Nutze_Schranken_Terminabweichung_KPI_Berechnung = true # bei "false" wird 0 als Grenze (oben + unten) angenommen +Score_Qualitaet_Produktionsmengen_fehlend = 1 # Score, wenn durch den Konfektionär die Produktionsmengen gar nicht gepflegt werden +Score_Qualitaet_Produktionsmengen_unplausibel = 0 # Score, wenn durch den Konfektionär die Produktionsmengen nicht plausibel gepflegt werden +Score_Qualitaet_Produktionsmengen_plausibel = 2 # Score, wenn durch den Konfektionär die Produktionsmengen sauber gepflegt werden diff --git a/prototypes/external_code.py b/prototypes/external_code.py index b5d84af..0a69cf2 100644 --- a/prototypes/external_code.py +++ b/prototypes/external_code.py @@ -2,7 +2,6 @@ from __future__ import annotations import dataclasses as dc import datetime -import enum import json import warnings from typing import TYPE_CHECKING, Any, Final, TypeAlias, cast @@ -12,21 +11,13 @@ import sqlalchemy as sql from dopt_basics.datastructures import flatten from wattanalyse import db +from wattanalyse.constants import QualityPsm +from wattanalyse.types import SqlInsertStmts, SqlStatement if TYPE_CHECKING: from oracledb import Connection as OracleConnection from polars._typing import SchemaDict -# 1. cleanup obtained new data -# ~~2. load data from internal database~~ -# ~~3. integrate with with new data (whole snapshot)~~ -# 2. process on order level -# 3. save results to internal database -# 4. post-process results -# 5. write to external database - -SqlStatement: TypeAlias = str - @dc.dataclass(slots=True, eq=False) class PreProcessResult: @@ -36,22 +27,10 @@ class PreProcessResult: DROP_COLUMNS: Final[list[str]] = cast( list[str], - list(flatten(((x.lower(), x.upper(), x.capitalize()) for x in ("id", "index", "idx")))), + list(flatten(((x.lower(), x.upper(), x.capitalize()) for x in ("id", "index", "idx")))), # type: ignore ) -@dc.dataclass(slots=True, kw_only=True) -class SqlInsertStmts: - delete: str - insert: str - - -class QualityPsm(enum.StrEnum): - FEHLEND = enum.auto() - UNPLAUSIBEL = enum.auto() - PLAUSIBEL = enum.auto() - - PSM_SCORES: dict[QualityPsm, int] = { QualityPsm.FEHLEND: 1, QualityPsm.UNPLAUSIBEL: 0, @@ -81,6 +60,16 @@ NUMBER_YEARS_UPPER_BOUND_DATES: Final[int] = 4 TAB_NAME_PSM: Final[str] = "EXTERN_PSM" TAB_NAME_MIS: Final[str] = "EXTERN_MIS" +USE_BOUNDARIES: Final[bool] = False +filter_date_deviation_early: pl.Expr +filter_date_deviation_late: pl.Expr +if USE_BOUNDARIES: + filter_date_deviation_early = pl.col("Terminunterschreitung") + filter_date_deviation_late = pl.col("Terminüberschreitung") +else: + filter_date_deviation_early = pl.col("Terminabweichung_Anzahl_Tage") < 0 + filter_date_deviation_late = pl.col("Terminabweichung_Anzahl_Tage") > 0 + # // (0) load data def load_PSM_data( @@ -458,18 +447,6 @@ def load_order_level_from_internal_database() -> pl.DataFrame: # // (4) post-process results - -USE_BOUNDARIES: Final[bool] = False -filter_date_deviation_early: pl.Expr -filter_date_deviation_late: pl.Expr -if USE_BOUNDARIES: - filter_date_deviation_early = pl.col("Terminunterschreitung") - filter_date_deviation_late = pl.col("Terminüberschreitung") -else: - filter_date_deviation_early = pl.col("Terminabweichung_Anzahl_Tage") < 0 - filter_date_deviation_late = pl.col("Terminabweichung_Anzahl_Tage") > 0 - - def aggregate_production_orders( data: pl.LazyFrame, ) -> pl.LazyFrame: diff --git a/src/wattanalyse/constants.py b/src/wattanalyse/constants.py index 19561f5..fc7ecaf 100644 --- a/src/wattanalyse/constants.py +++ b/src/wattanalyse/constants.py @@ -1,5 +1,6 @@ from __future__ import annotations +import enum import os from pathlib import Path from typing import Final @@ -10,14 +11,13 @@ from dopt_basics import io as io_ from wattanalyse import types as t -# PROJECT_ROOT = Path(__file__).resolve().parents[2] LIB_PATH: Final[Path] = Path(__file__).resolve().parent BASE_PATH = io_.search_folder_path( LIB_PATH, stop_folder_name=os.getenv("DOPT_STOP_FOLDER_NAME", "python") ) -assert BASE_PATH +assert BASE_PATH, "base path not found" class Config: @@ -30,8 +30,17 @@ class Config: user_cfg = configs.load_toml(Config.PTH_USER_CFG) USER_CFG: t.UserConfig = t.UserConfig( - Datenbank=t.UserConfig_Datenbank(**user_cfg["Datenbank"]) + Datenbank=t.UserConfig_Datenbank(**user_cfg["Datenbank"]), + Datenpipelines_PSM=t.UserConfig_Pipelines_PSM(**user_cfg["Datenpipelines_PSM"]), ) +# ** DB interaction oracledb.defaults.arraysize = 1000 oracledb.defaults.prefetchrows = 1000 + + +# ** pipelines +class QualityPsm(enum.StrEnum): + FEHLEND = enum.auto() + UNPLAUSIBEL = enum.auto() + PLAUSIBEL = enum.auto() diff --git a/src/wattanalyse/pipelines.py b/src/wattanalyse/pipelines.py index edc32ec..5b76d07 100644 --- a/src/wattanalyse/pipelines.py +++ b/src/wattanalyse/pipelines.py @@ -2,7 +2,6 @@ from __future__ import annotations import dataclasses as dc import datetime -import enum import json import warnings from typing import TYPE_CHECKING, Any, Final, cast @@ -13,8 +12,9 @@ from dopt_basics.datastructures import flatten from dopt_basics.result_pattern import STATUS_HANDLER, Status, wrap_result from wattanalyse import db +from wattanalyse.constants import USER_CFG, QualityPsm from wattanalyse.logging import logger_pipeline as logger -from wattanalyse.types import SqlStatement +from wattanalyse.types import SqlInsertStmts, SqlStatement if TYPE_CHECKING: from oracledb import Connection as OracleConnection @@ -33,22 +33,10 @@ DROP_COLUMNS: Final[list[str]] = cast( ) -@dc.dataclass(slots=True, kw_only=True) -class SqlInsertStmts: - delete: str - insert: str - - -class QualityPsm(enum.StrEnum): - FEHLEND = enum.auto() - UNPLAUSIBEL = enum.auto() - PLAUSIBEL = enum.auto() - - PSM_SCORES: dict[QualityPsm, int] = { - QualityPsm.FEHLEND: 1, - QualityPsm.UNPLAUSIBEL: 0, - QualityPsm.PLAUSIBEL: 2, + QualityPsm.FEHLEND: USER_CFG.Datenpipelines_PSM.Score_Qualitaet_Produktionsmengen_fehlend, + QualityPsm.UNPLAUSIBEL: USER_CFG.Datenpipelines_PSM.Score_Qualitaet_Produktionsmengen_unplausibel, + QualityPsm.PLAUSIBEL: USER_CFG.Datenpipelines_PSM.Score_Qualitaet_Produktionsmengen_plausibel, } RENAMING_SCHEME_PSM: dict[str, str] = { @@ -68,11 +56,29 @@ RENAMING_SCHEME_PSM: dict[str, str] = { PRIM_KEYS: Final[list[str]] = ["PA", "PA_Pos"] -LOWER_BOUND_DATE_DEVIATION: Final[int] = 0 -UPPER_BOUND_DATE_DEVIATION: Final[int] = 0 -NUMBER_YEARS_UPPER_BOUND_DATES: Final[int] = 4 -TAB_NAME_PSM: Final[str] = "EXTERN_PSM" -TAB_NAME_MIS: Final[str] = "EXTERN_MIS" +LOWER_BOUND_DATE_DEVIATION: Final[int] = ( + USER_CFG.Datenpipelines_PSM.Terminabweichung_untere_Schranke +) +UPPER_BOUND_DATE_DEVIATION: Final[int] = ( + USER_CFG.Datenpipelines_PSM.Terminabweichung_obere_Schranke +) +NUMBER_YEARS_UPPER_BOUND_DATES: Final[int] = ( + USER_CFG.Datenpipelines_PSM.Vorverarbeitung_Anzahl_Jahre_in_Zukunft_zulaessig +) +TAB_NAME_PSM: Final[str] = USER_CFG.Datenbank.Tabellenname_Produktionsstandmeldung +TAB_NAME_MIS: Final[str] = USER_CFG.Datenbank.Tabellenname_MIS_Auftraege + +USE_BOUNDARIES: Final[bool] = ( + USER_CFG.Datenpipelines_PSM.Nutze_Schranken_Terminabweichung_KPI_Berechnung +) +filter_date_deviation_early: pl.Expr +filter_date_deviation_late: pl.Expr +if USE_BOUNDARIES: + filter_date_deviation_early = pl.col("Terminunterschreitung") + filter_date_deviation_late = pl.col("Terminüberschreitung") +else: + filter_date_deviation_early = pl.col("Terminabweichung_Anzahl_Tage") < 0 + filter_date_deviation_late = pl.col("Terminabweichung_Anzahl_Tage") > 0 # // (10) load data @@ -457,18 +463,6 @@ def load_order_level_from_internal_database() -> pl.DataFrame: # // (50) post-process results - -USE_BOUNDARIES: Final[bool] = False -filter_date_deviation_early: pl.Expr -filter_date_deviation_late: pl.Expr -if USE_BOUNDARIES: - filter_date_deviation_early = pl.col("Terminunterschreitung") - filter_date_deviation_late = pl.col("Terminüberschreitung") -else: - filter_date_deviation_early = pl.col("Terminabweichung_Anzahl_Tage") < 0 - filter_date_deviation_late = pl.col("Terminabweichung_Anzahl_Tage") > 0 - - @wrap_result(code_on_error=51) def aggregate_production_orders( data: pl.LazyFrame, diff --git a/src/wattanalyse/types.py b/src/wattanalyse/types.py index e61d0c0..bb5318a 100644 --- a/src/wattanalyse/types.py +++ b/src/wattanalyse/types.py @@ -8,13 +8,33 @@ SqlStatement: TypeAlias = str @dc.dataclass(kw_only=True, slots=True) class UserConfig_Datenbank: - NUTZER: str - PASSWORT: str - HOST: str - PORT: int - SERVICE_NAME: str + Nutzer: str + Passwort: str + Host: str + Port: int + Service_Name: str + Tabellenname_Produktionsstandmeldung: str + Tabellenname_MIS_Auftraege: str + + +@dc.dataclass(kw_only=True, slots=True) +class UserConfig_Pipelines_PSM: + Vorverarbeitung_Anzahl_Jahre_in_Zukunft_zulaessig: int + Terminabweichung_untere_Schranke: int + Terminabweichung_obere_Schranke: int + Nutze_Schranken_Terminabweichung_KPI_Berechnung: bool + Score_Qualitaet_Produktionsmengen_fehlend: int + Score_Qualitaet_Produktionsmengen_unplausibel: int + Score_Qualitaet_Produktionsmengen_plausibel: int @dc.dataclass(kw_only=True, slots=True) class UserConfig: Datenbank: UserConfig_Datenbank + Datenpipelines_PSM: UserConfig_Pipelines_PSM + + +@dc.dataclass(slots=True, kw_only=True) +class SqlInsertStmts: + delete: str + insert: str -- 2.34.1 From 83b2610ee81e269f6324bd4622c770751a73eb34 Mon Sep 17 00:00:00 2001 From: foefl Date: Thu, 11 Jun 2026 10:43:30 +0200 Subject: [PATCH 25/48] add metadata tracking --- prototypes/05-1_metadata.py | 49 ++++++++++++++++++++++ src/wattanalyse/db.py | 57 +++++++++++++++++++++++++- src/wattanalyse/external_interface.py | 59 ++++++++++++++++++++++----- src/wattanalyse/pipelines.py | 8 ++++ 4 files changed, 161 insertions(+), 12 deletions(-) create mode 100644 prototypes/05-1_metadata.py diff --git a/prototypes/05-1_metadata.py b/prototypes/05-1_metadata.py new file mode 100644 index 0000000..dab0712 --- /dev/null +++ b/prototypes/05-1_metadata.py @@ -0,0 +1,49 @@ +# %% +import dataclasses as dc +import importlib +import time +from pathlib import Path + +import polars as pl +import sqlalchemy as sql +from dopt_basics import datetime as dopt_dt + +import wattanalyse +from wattanalyse import constants, db, pipelines + +importlib.reload(wattanalyse) +importlib.reload(constants) +importlib.reload(db) + +PROJECT_BASE = Path(__file__).parents[1] +DATA_PTH = PROJECT_BASE / "data" +assert DATA_PTH.exists() + +# %% +start = dopt_dt.current_time_tz() +t1 = time.perf_counter_ns() + +time.sleep(1.5) + +t2 = time.perf_counter_ns() +dur_sek = (t2 - t1) / 1e9 +dur = dopt_dt.timedelta_from_val(dur_sek, dopt_dt.TimeUnitsTimedelta.SECONDS) + +stop = start + dur + +print(f"Started: {start}\nDuration: {dur} sek\nEnded: {stop}") +# %% +metadata = db.InternMetadataInsertEntry( + pipeline_name="test", + gestartet_um=start, + beendet_um=stop, + dauer_sek=dur_sek, + status_code=0, +) + +# %% +res = pipelines.write_metadata(metadata) + +# %% +res.status +# %% diff --git a/src/wattanalyse/db.py b/src/wattanalyse/db.py index 058be1e..405cd00 100644 --- a/src/wattanalyse/db.py +++ b/src/wattanalyse/db.py @@ -1,8 +1,10 @@ +import dataclasses as dc +import datetime from typing import Final import polars as pl import sqlalchemy as sql -from sqlalchemy import Column, Table +from sqlalchemy import Column, Table, TypeDecorator from wattanalyse import constants @@ -10,12 +12,65 @@ assert constants.Config.DB_PATH_INTERNAL.parent.exists(), ( "database parent folder does not exists" ) + +class UTCDateTime(TypeDecorator): + """Safely coerces naive datetimes from SQLite into timezone-aware UTC.""" + + impl = sql.DateTime + cache_ok = True + + def process_bind_param(self, value, dialect): + """Runs when saving to the database.""" + if value is not None: + if value.tzinfo is None: + value = value.replace(tzinfo=datetime.timezone.utc) + else: + value = value.astimezone(datetime.timezone.utc) + return value + + def process_result_value(self, value, dialect): + """Runs when fetching from the database.""" + if value is not None and value.tzinfo is None: + value = value.replace(tzinfo=datetime.timezone.utc) + return value + + DB_URI: Final[str] = f"sqlite:///{constants.Config.DB_PATH_INTERNAL}" ENGINE_INTERNAL: Final[sql.Engine] = sql.create_engine(DB_URI) MD_INTERNAL = sql.MetaData() MD_EXTERNAL = sql.MetaData() +# // internal database +intern_metadata_t: Table = Table( + "Metadaten", + MD_INTERNAL, + Column("ID", sql.Integer, primary_key=True, autoincrement=True), + Column("pipeline_name", sql.String, nullable=False), + Column("gestartet_um", UTCDateTime, nullable=False), + Column("beendet_um", UTCDateTime, nullable=False), + Column("dauer_sek", sql.Float, nullable=False), + Column("status_code", sql.Integer, nullable=False), +) + +intern_metadata_t_schema: dict[str, type[pl.DataType] | pl.DataType] = { + "ID": pl.UInt64, + "pipeline_name": pl.String, + "gestartet_um": pl.Datetime(time_zone=datetime.timezone.utc), + "beendet_um": pl.Datetime(time_zone=datetime.timezone.utc), + "dauer_sek": pl.Float64, + "status_code": pl.Int16, +} + + +@dc.dataclass(slots=True, kw_only=True) +class InternMetadataInsertEntry: + pipeline_name: str + gestartet_um: datetime.datetime + beendet_um: datetime.datetime + dauer_sek: float + status_code: int + intern_prod_order_t: Table = Table( "Produktionsauftrag-Einzelsicht", diff --git a/src/wattanalyse/external_interface.py b/src/wattanalyse/external_interface.py index 9f533c8..3474a66 100644 --- a/src/wattanalyse/external_interface.py +++ b/src/wattanalyse/external_interface.py @@ -1,29 +1,66 @@ +import time + +import dopt_basics.datetime as dopt_dt import oracledb from dopt_basics.result_pattern import STATUS_HANDLER -from wattanalyse import pipelines +from wattanalyse import db, pipelines from wattanalyse.constants import USER_CFG -from wattanalyse.logging import logger_pipeline +from wattanalyse.logging import logger_database, logger_pipeline ORACLE_CONN = oracledb.connect( - user=USER_CFG.Datenbank.NUTZER, - password=USER_CFG.Datenbank.PASSWORT, - host=USER_CFG.Datenbank.HOST, - port=USER_CFG.Datenbank.PORT, - service_name=USER_CFG.Datenbank.SERVICE_NAME, + user=USER_CFG.Datenbank.Nutzer, + password=USER_CFG.Datenbank.Passwort, + host=USER_CFG.Datenbank.Host, + port=USER_CFG.Datenbank.Port, + service_name=USER_CFG.Datenbank.Service_Name, ) def pipeline_KPI_calculation() -> None: logger_pipeline.info("Start pipeline >KPI_calculation<") - res = pipelines.KPI_calculation(ORACLE_CONN) + + start = dopt_dt.current_time_tz() + t1 = time.perf_counter_ns() + + res_pipe = pipelines.KPI_calculation(ORACLE_CONN) + + t2 = time.perf_counter_ns() + + if res_pipe.status != STATUS_HANDLER.SUCCESS: + logger_pipeline.error( + ( + "[PIPELINE: KPI Calculation] An error occurred during the " + "procedure --- Status:\n%s" + ), + res_pipe.status, + stack_info=True, + ) + + dur_sek = (t2 - t1) / 1e9 + dur = dopt_dt.timedelta_from_val(dur_sek, dopt_dt.TimeUnitsTimedelta.SECONDS) + stop = start + dur + metadata = db.InternMetadataInsertEntry( + pipeline_name="test", + gestartet_um=start, + beendet_um=stop, + dauer_sek=dur_sek, + status_code=res_pipe.status.code, + ) + res = pipelines.write_metadata(metadata) if res.status != STATUS_HANDLER.SUCCESS: - logger_pipeline.error( - "An error occurred during the procedure --- Status:\n%s", + logger_database.error( + ( + "[INTERNAL DB] An error occurred while writing the metadata to the internal " + "database --- Status:\n%s" + ), res.status, stack_info=True, ) - return logger_pipeline.info("Pipeline >KPI_calculation< ended successfully") + + +if __name__ == "__main__": + pipeline_KPI_calculation() diff --git a/src/wattanalyse/pipelines.py b/src/wattanalyse/pipelines.py index 5b76d07..7bc25e0 100644 --- a/src/wattanalyse/pipelines.py +++ b/src/wattanalyse/pipelines.py @@ -760,3 +760,11 @@ def KPI_calculation( logger.info("Successfully saved KPI tables to external database") return STATUS_HANDLER.SUCCESS + + +@wrap_result(code_on_error=200) +def write_metadata(metadata: db.InternMetadataInsertEntry) -> None: + stmt = sql.insert(db.intern_metadata_t) + + with db.ENGINE_INTERNAL.begin() as conn: + conn.execute(stmt, dc.asdict(metadata)) -- 2.34.1 From e1048bb78cba4ffbb89034db479e0d70b70f0383 Mon Sep 17 00:00:00 2001 From: foefl Date: Thu, 11 Jun 2026 11:45:31 +0200 Subject: [PATCH 26/48] additional config parameters --- config/wattana.toml | 6 ++++-- src/wattanalyse/types.py | 2 ++ 2 files changed, 6 insertions(+), 2 deletions(-) diff --git a/config/wattana.toml b/config/wattana.toml index d81e01b..73536d6 100644 --- a/config/wattana.toml +++ b/config/wattana.toml @@ -4,8 +4,10 @@ Passwort = "MyWattanaPassword123" Host = "localhost" Port = 1521 Service_Name = "FREEPDB1" -Tabellenname_Produktionsstandmeldung = "EXTERN_PSM" # Datenbanktabelle mit den Produktionsstandmeldungen -Tabellenname_MIS_Auftraege = "EXTERN_MIS" # Datenbanktabelle mit den MIS-Aufträgen +Tabellenname_Produktionsstandmeldung = "EXTERN_PSM" # Datenbanktabelle/View zum Import der Produktionsstandmeldungen +Tabellenname_MIS_Auftraege = "EXTERN_MIS" # Datenbanktabelle/View mit den MIS-Aufträgen (Import) +Tabellenname_KPI_Auftraege = "KPI_PRODUKTIONSAUFTRAEGE" # Datenbanktabelle zum Export der KPIs über alle Produktionsaufträge +Tabellenname_KPI_Konfektionaere = "KPI_KONFEKTIONAERE" # Datenbanktabelle zum Export der KPIs über alle Konfektionäre [Datenpipelines_PSM] Vorverarbeitung_Anzahl_Jahre_in_Zukunft_zulaessig = 4 # prüft bei der Vorverarbeitung, ob Datumsangaben über diesen Horizont hinaus vorliegen; diese werden entfernt diff --git a/src/wattanalyse/types.py b/src/wattanalyse/types.py index bb5318a..3a881a0 100644 --- a/src/wattanalyse/types.py +++ b/src/wattanalyse/types.py @@ -15,6 +15,8 @@ class UserConfig_Datenbank: Service_Name: str Tabellenname_Produktionsstandmeldung: str Tabellenname_MIS_Auftraege: str + Tabellenname_KPI_Auftraege: str + Tabellenname_KPI_Konfektionaere: str @dc.dataclass(kw_only=True, slots=True) -- 2.34.1 From 7c354703174121402d682a33303a19f305804c32 Mon Sep 17 00:00:00 2001 From: foefl Date: Thu, 11 Jun 2026 11:45:48 +0200 Subject: [PATCH 27/48] enhanced error handling and metadata handling --- prototypes/05-1_metadata.py | 12 ++++ src/wattanalyse/external_interface.py | 81 ++++++++++++++++++++------- src/wattanalyse/pipelines.py | 48 ++++++++++++---- 3 files changed, 109 insertions(+), 32 deletions(-) diff --git a/prototypes/05-1_metadata.py b/prototypes/05-1_metadata.py index dab0712..5af2301 100644 --- a/prototypes/05-1_metadata.py +++ b/prototypes/05-1_metadata.py @@ -47,3 +47,15 @@ res = pipelines.write_metadata(metadata) # %% res.status # %% +res = pipelines.load_metadata_from_internal_database() +df = res.unwrap() +# %% +df +# %% +res = pipelines.delete_metadata_from_internal_database() +res.unwrap() +res = pipelines.load_metadata_from_internal_database() +df = res.unwrap() +# %% +df +# %% diff --git a/src/wattanalyse/external_interface.py b/src/wattanalyse/external_interface.py index 3474a66..b3513eb 100644 --- a/src/wattanalyse/external_interface.py +++ b/src/wattanalyse/external_interface.py @@ -1,24 +1,41 @@ +import sys import time +from typing import Final import dopt_basics.datetime as dopt_dt import oracledb from dopt_basics.result_pattern import STATUS_HANDLER +from oracledb.exceptions import OperationalError from wattanalyse import db, pipelines from wattanalyse.constants import USER_CFG -from wattanalyse.logging import logger_database, logger_pipeline +from wattanalyse.logging import logger_base, logger_database, logger_pipeline -ORACLE_CONN = oracledb.connect( - user=USER_CFG.Datenbank.Nutzer, - password=USER_CFG.Datenbank.Passwort, - host=USER_CFG.Datenbank.Host, - port=USER_CFG.Datenbank.Port, - service_name=USER_CFG.Datenbank.Service_Name, -) +try: + ORACLE_CONN = oracledb.connect( + user=USER_CFG.Datenbank.Nutzer, + password=USER_CFG.Datenbank.Passwort, + host=USER_CFG.Datenbank.Host, + port=USER_CFG.Datenbank.Port, + service_name=USER_CFG.Datenbank.Service_Name, + ) +except OperationalError as err: + logger_base.error( + ( + "[Oracle Database] Could not establish connection. Check if the database " + "online, fully functional and reachable. Check the configuration parameters.\n" + ">>> Exception:\n%s" + ), + err, + stack_info=True, + ) + sys.exit(1) -def pipeline_KPI_calculation() -> None: - logger_pipeline.info("Start pipeline >KPI_calculation<") +def pipeline_KPI_calculation() -> int: + return_code: int = 0 + PIPELINE_NAME: Final[str] = "KPI_calculation" + logger_pipeline.info("Start pipeline >%s<", PIPELINE_NAME) start = dopt_dt.current_time_tz() t1 = time.perf_counter_ns() @@ -29,38 +46,62 @@ def pipeline_KPI_calculation() -> None: if res_pipe.status != STATUS_HANDLER.SUCCESS: logger_pipeline.error( - ( - "[PIPELINE: KPI Calculation] An error occurred during the " - "procedure --- Status:\n%s" - ), + ("[PIPELINE: %s] An error occurred during the procedure --- Status:\n%s"), + PIPELINE_NAME, res_pipe.status, stack_info=True, ) + return_code = 1 + logger_database.info("Prepare collected metadata...") dur_sek = (t2 - t1) / 1e9 dur = dopt_dt.timedelta_from_val(dur_sek, dopt_dt.TimeUnitsTimedelta.SECONDS) stop = start + dur metadata = db.InternMetadataInsertEntry( - pipeline_name="test", + pipeline_name=PIPELINE_NAME, gestartet_um=start, beendet_um=stop, dauer_sek=dur_sek, status_code=res_pipe.status.code, ) - res = pipelines.write_metadata(metadata) + res_metadata = pipelines.write_metadata(metadata) - if res.status != STATUS_HANDLER.SUCCESS: + if res_metadata.status != STATUS_HANDLER.SUCCESS: logger_database.error( ( "[INTERNAL DB] An error occurred while writing the metadata to the internal " "database --- Status:\n%s" ), - res.status, + res_metadata.status, stack_info=True, ) + return_code = 1 + return return_code - logger_pipeline.info("Pipeline >KPI_calculation< ended successfully") + logger_database.info("Successfully saved metadata to database") + logger_pipeline.info("Pipeline >%s< ended successfully", PIPELINE_NAME) + logger_pipeline.info( + "Pipeline >%s<: execution duration was %.4f seconds", + PIPELINE_NAME, + metadata.dauer_sek, + ) + + return return_code if __name__ == "__main__": - pipeline_KPI_calculation() + try: + code = pipeline_KPI_calculation() + sys.exit(code) + except Exception as err: + logger_base.error( + ( + "[BASE ERROR] An unexpected and unwrapped error occurred during the " + "execution of the pipeline function.\n>>> Exception:\n%s" + ), + err, + stack_info=True, + ) + sys.exit(1) + finally: + ORACLE_CONN.close() diff --git a/src/wattanalyse/pipelines.py b/src/wattanalyse/pipelines.py index 7bc25e0..5b32aba 100644 --- a/src/wattanalyse/pipelines.py +++ b/src/wattanalyse/pipelines.py @@ -4,6 +4,7 @@ import dataclasses as dc import datetime import json import warnings +from pprint import pformat from typing import TYPE_CHECKING, Any, Final, cast import polars as pl @@ -13,6 +14,7 @@ from dopt_basics.result_pattern import STATUS_HANDLER, Status, wrap_result from wattanalyse import db from wattanalyse.constants import USER_CFG, QualityPsm +from wattanalyse.logging import logger_database from wattanalyse.logging import logger_pipeline as logger from wattanalyse.types import SqlInsertStmts, SqlStatement @@ -65,8 +67,10 @@ UPPER_BOUND_DATE_DEVIATION: Final[int] = ( NUMBER_YEARS_UPPER_BOUND_DATES: Final[int] = ( USER_CFG.Datenpipelines_PSM.Vorverarbeitung_Anzahl_Jahre_in_Zukunft_zulaessig ) -TAB_NAME_PSM: Final[str] = USER_CFG.Datenbank.Tabellenname_Produktionsstandmeldung -TAB_NAME_MIS: Final[str] = USER_CFG.Datenbank.Tabellenname_MIS_Auftraege +TAB_NAME_IMPORT_PSM: Final[str] = USER_CFG.Datenbank.Tabellenname_Produktionsstandmeldung +TAB_NAME_IMPORT_MIS: Final[str] = USER_CFG.Datenbank.Tabellenname_MIS_Auftraege +TAB_NAME_EXPORT_ORDERS: Final[str] = USER_CFG.Datenbank.Tabellenname_KPI_Auftraege +TAB_NAME_EXPORT_SUPPLIERS: Final[str] = USER_CFG.Datenbank.Tabellenname_KPI_Konfektionaere USE_BOUNDARIES: Final[bool] = ( USER_CFG.Datenpipelines_PSM.Nutze_Schranken_Terminabweichung_KPI_Berechnung @@ -87,9 +91,9 @@ def load_PSM_data( conn: OracleConnection, ) -> pl.LazyFrame: stmt = f""" - SELECT t1.* FROM "{TAB_NAME_PSM}" t1 + SELECT t1.* FROM "{TAB_NAME_IMPORT_PSM}" t1 WHERE EXISTS( - SELECT 1 FROM "{TAB_NAME_MIS}" t2 + SELECT 1 FROM "{TAB_NAME_IMPORT_MIS}" t2 WHERE t1."PA" = t2."PA" AND t1."PA Pos" = t2."PA Pos" ) """ @@ -655,7 +659,6 @@ def oracle_save_polars( conn.commit() -# TODO wrap this in a metadata tracking call @wrap_result(code_on_error=1, logger=logger) def KPI_calculation( conn: OracleConnection, @@ -713,16 +716,15 @@ def KPI_calculation( if res.status != STATUS_HANDLER.SUCCESS: return res.status orders_aggregated = res.unwrap() - # TODO add table names as variables res = oracle_generate_sql_insert( - table_name="KPI_PRODUKTIONSAUFTRAEGE", + table_name=TAB_NAME_EXPORT_ORDERS, columns=orders_aggregated.collect_schema().names(), ) if res.status != STATUS_HANDLER.SUCCESS: return res.status stmts_orders = res.unwrap() logger.info( - "SQL Statemens:\n--- DELETE: %s\n---INSERT: %s", + "SQL Statemens:\n--- DELETE: %s\n--- INSERT: %s", stmts_orders.delete, stmts_orders.insert, ) @@ -738,13 +740,14 @@ def KPI_calculation( return res.status suppliers_aggregated = res.unwrap() res = oracle_generate_sql_insert( - table_name="KPI_KONFEKTIONAERE", columns=suppliers_aggregated.collect_schema().names() + table_name=TAB_NAME_EXPORT_SUPPLIERS, + columns=suppliers_aggregated.collect_schema().names(), ) if res.status != STATUS_HANDLER.SUCCESS: return res.status stmts_suppliers = res.unwrap() logger.info( - "SQL Statemens:\n--- DELETE: %s\n---INSERT: %s", + "SQL Statemens:\n--- DELETE: %s\n--- INSERT: %s", stmts_suppliers.delete, stmts_suppliers.insert, ) @@ -763,8 +766,29 @@ def KPI_calculation( @wrap_result(code_on_error=200) -def write_metadata(metadata: db.InternMetadataInsertEntry) -> None: +def write_metadata( + metadata: db.InternMetadataInsertEntry, +) -> None: stmt = sql.insert(db.intern_metadata_t) + metadata_insert = dc.asdict(metadata) + logger_database.info( + "Trying to save the following metadata to the internal database:\n%s", + pformat(metadata_insert), + ) with db.ENGINE_INTERNAL.begin() as conn: - conn.execute(stmt, dc.asdict(metadata)) + conn.execute(stmt, metadata_insert) + + +@wrap_result(code_on_error=201) +def load_metadata_from_internal_database() -> pl.DataFrame: + with db.ENGINE_INTERNAL.connect() as conn: + res = conn.execute(sql.select(db.intern_metadata_t)) + + return pl.DataFrame(res.fetchall()) + + +@wrap_result(code_on_error=202) +def delete_metadata_from_internal_database() -> None: + with db.ENGINE_INTERNAL.begin() as conn: + conn.execute(sql.delete(db.intern_metadata_t)) -- 2.34.1 From 132219144a2c0407ac15f9c1a8ff6d05a7dae836 Mon Sep 17 00:00:00 2001 From: foefl Date: Thu, 11 Jun 2026 11:57:35 +0200 Subject: [PATCH 28/48] internalised connection to database and added main routine --- src/wattanalyse/external_interface.py | 63 ++++++++++++++++----------- 1 file changed, 37 insertions(+), 26 deletions(-) diff --git a/src/wattanalyse/external_interface.py b/src/wattanalyse/external_interface.py index b3513eb..fa397e6 100644 --- a/src/wattanalyse/external_interface.py +++ b/src/wattanalyse/external_interface.py @@ -1,6 +1,8 @@ +from __future__ import annotations + import sys import time -from typing import Final +from typing import TYPE_CHECKING, Final import dopt_basics.datetime as dopt_dt import oracledb @@ -11,28 +13,13 @@ from wattanalyse import db, pipelines from wattanalyse.constants import USER_CFG from wattanalyse.logging import logger_base, logger_database, logger_pipeline -try: - ORACLE_CONN = oracledb.connect( - user=USER_CFG.Datenbank.Nutzer, - password=USER_CFG.Datenbank.Passwort, - host=USER_CFG.Datenbank.Host, - port=USER_CFG.Datenbank.Port, - service_name=USER_CFG.Datenbank.Service_Name, - ) -except OperationalError as err: - logger_base.error( - ( - "[Oracle Database] Could not establish connection. Check if the database " - "online, fully functional and reachable. Check the configuration parameters.\n" - ">>> Exception:\n%s" - ), - err, - stack_info=True, - ) - sys.exit(1) +if TYPE_CHECKING: + from wattanalyse.pipelines import OracleConnection -def pipeline_KPI_calculation() -> int: +def pipeline_KPI_calculation( + conn: OracleConnection, +) -> int: return_code: int = 0 PIPELINE_NAME: Final[str] = "KPI_calculation" logger_pipeline.info("Start pipeline >%s<", PIPELINE_NAME) @@ -40,7 +27,7 @@ def pipeline_KPI_calculation() -> int: start = dopt_dt.current_time_tz() t1 = time.perf_counter_ns() - res_pipe = pipelines.KPI_calculation(ORACLE_CONN) + res_pipe = pipelines.KPI_calculation(conn) t2 = time.perf_counter_ns() @@ -89,14 +76,34 @@ def pipeline_KPI_calculation() -> int: return return_code -if __name__ == "__main__": +def main() -> None: try: - code = pipeline_KPI_calculation() + ORACLE_CONN = oracledb.connect( + user=USER_CFG.Datenbank.Nutzer, + password=USER_CFG.Datenbank.Passwort, + host=USER_CFG.Datenbank.Host, + port=USER_CFG.Datenbank.Port, + service_name=USER_CFG.Datenbank.Service_Name, + ) + except OperationalError as err: + logger_base.critical( + ( + "[Oracle Database] Could not establish connection. Check if the database " + "is online, fully functional and reachable. Check the configuration " + "parameters.\n>>> Exception:\n%s" + ), + err, + stack_info=True, + ) + sys.exit(1) + + try: + code = pipeline_KPI_calculation(ORACLE_CONN) sys.exit(code) except Exception as err: - logger_base.error( + logger_base.critical( ( - "[BASE ERROR] An unexpected and unwrapped error occurred during the " + "[BASE] An unexpected and unwrapped error occurred during the " "execution of the pipeline function.\n>>> Exception:\n%s" ), err, @@ -105,3 +112,7 @@ if __name__ == "__main__": sys.exit(1) finally: ORACLE_CONN.close() + + +if __name__ == "__main__": + main() -- 2.34.1 From 9dd462ba7f4c859c8b52678cad4d5624fe671a5d Mon Sep 17 00:00:00 2001 From: foefl Date: Thu, 11 Jun 2026 12:26:14 +0200 Subject: [PATCH 29/48] disable oracledb option --- src/wattanalyse/constants.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/wattanalyse/constants.py b/src/wattanalyse/constants.py index fc7ecaf..dd2ddf1 100644 --- a/src/wattanalyse/constants.py +++ b/src/wattanalyse/constants.py @@ -35,8 +35,8 @@ USER_CFG: t.UserConfig = t.UserConfig( ) # ** DB interaction -oracledb.defaults.arraysize = 1000 -oracledb.defaults.prefetchrows = 1000 +# oracledb.defaults.prefetchrows = 1_000 +oracledb.defaults.arraysize = 5_000 # ** pipelines -- 2.34.1 From 1c51abd12c2b7b23d9d6dfdc5a78035047e992ba Mon Sep 17 00:00:00 2001 From: foefl Date: Thu, 11 Jun 2026 12:26:37 +0200 Subject: [PATCH 30/48] add tool for memory profiling to dev dependencies --- pdm.lock | 18 ++++++++++++++++-- pyproject.toml | 1 + 2 files changed, 17 insertions(+), 2 deletions(-) diff --git a/pdm.lock b/pdm.lock index ead0185..3a3a479 100644 --- a/pdm.lock +++ b/pdm.lock @@ -5,7 +5,7 @@ groups = ["default", "dev", "lint", "nb", "tests"] strategy = ["inherit_metadata"] lock_version = "4.5.0" -content_hash = "sha256:22683d7fc93e640ef1e22b402539ab0bd4a326d6b35aa301423b71ca3488e08c" +content_hash = "sha256:91c3c20f659c5b217ff159d20fad50370aa3dd2033f9608b1e198fa3ae95c3d6" [[metadata.targets]] requires_python = ">=3.11" @@ -1666,6 +1666,20 @@ files = [ {file = "mdurl-0.1.2.tar.gz", hash = "sha256:bb413d29f5eea38f31dd4754dd7377d4465116fb207585f97bf925588687c1ba"}, ] +[[package]] +name = "memory-profiler" +version = "0.61.0" +requires_python = ">=3.5" +summary = "A module for monitoring memory usage of a python program" +groups = ["dev"] +dependencies = [ + "psutil", +] +files = [ + {file = "memory_profiler-0.61.0-py3-none-any.whl", hash = "sha256:400348e61031e3942ad4d4109d18753b2fb08c2f6fb8290671c5513a34182d84"}, + {file = "memory_profiler-0.61.0.tar.gz", hash = "sha256:4e5b73d7864a1d1292fb76a03e82a3e78ef934d06828a698d9dada76da2067b0"}, +] + [[package]] name = "mistune" version = "3.2.1" @@ -1996,7 +2010,7 @@ name = "psutil" version = "7.2.2" requires_python = ">=3.6" summary = "Cross-platform lib for process and system monitoring." -groups = ["nb"] +groups = ["dev", "nb"] files = [ {file = "psutil-7.2.2-cp313-cp313t-macosx_10_13_x86_64.whl", hash = "sha256:2edccc433cbfa046b980b0df0171cd25bcaeb3a68fe9022db0979e7aa74a826b"}, {file = "psutil-7.2.2-cp313-cp313t-macosx_11_0_arm64.whl", hash = "sha256:e78c8603dcd9a04c7364f1a3e670cea95d51ee865e4efb3556a3a63adef958ea"}, diff --git a/pyproject.toml b/pyproject.toml index 957c081..92f1c3e 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -143,6 +143,7 @@ dev = [ "pdoc3>=0.11.5", "bump-my-version>=1.1.1", "nox>=2025.2.9", + "memory-profiler>=0.61.0", ] nb = [ "jupyterlab>=4.3.5", -- 2.34.1 From 4c7bb92596547dfb49163fd8d1fac0da0276da78 Mon Sep 17 00:00:00 2001 From: foefl Date: Thu, 11 Jun 2026 13:51:53 +0200 Subject: [PATCH 31/48] bump version (dev) --- pyproject.toml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pyproject.toml b/pyproject.toml index 92f1c3e..419b353 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,6 +1,6 @@ [project] name = "wattanalyse" -version = "0.1.0" +version = "0.1.1dev1" description = "analysis of production state messages obtained from customers" authors = [ {name = "d-opt GmbH, resp. Florian Förster", email = "f.foerster@d-opt.com"}, @@ -71,7 +71,7 @@ directory = "reports/coverage" [tool.bumpversion] -current_version = "0.1.0" +current_version = "0.1.1dev1" parse = """(?x) (?P0|[1-9]\\d*)\\. (?P0|[1-9]\\d*)\\. -- 2.34.1 From 922eb8993c1e460f9db99aacd5a79bfe22eb541a Mon Sep 17 00:00:00 2001 From: foefl Date: Fri, 12 Jun 2026 08:16:58 +0200 Subject: [PATCH 32/48] remove unnecessary print statement --- src/wattanalyse/pipelines.py | 1 - 1 file changed, 1 deletion(-) diff --git a/src/wattanalyse/pipelines.py b/src/wattanalyse/pipelines.py index 5b32aba..e4bcf0b 100644 --- a/src/wattanalyse/pipelines.py +++ b/src/wattanalyse/pipelines.py @@ -223,7 +223,6 @@ def process_order_level( # ** planned or target delivery date current_date = datetime.datetime.now().date() - print(f"{current_date=}") data = data.with_columns( pl.coalesce(["Bestaetigter-Import_Historie", "Import-Ist_Historie"]).alias( "Liefertermin_Soll" -- 2.34.1 From 07bcb8de10b1f08559ecb033f22c43b8205f8a83 Mon Sep 17 00:00:00 2001 From: foefl Date: Fri, 12 Jun 2026 08:55:27 +0200 Subject: [PATCH 33/48] fix loading of `.env` files --- src/wattanalyse/__init__.py | 7 ++++++- src/wattanalyse/constants.py | 7 ++++++- 2 files changed, 12 insertions(+), 2 deletions(-) diff --git a/src/wattanalyse/__init__.py b/src/wattanalyse/__init__.py index fa1a5b0..ea8075d 100644 --- a/src/wattanalyse/__init__.py +++ b/src/wattanalyse/__init__.py @@ -1,5 +1,6 @@ import os import sys +from pathlib import Path import dotenv @@ -9,4 +10,8 @@ if sys.stdout is None: if sys.stderr is None: sys.stderr = open(os.devnull, "w", encoding="utf-8") -dotenv.load_dotenv() +deploy_env_pth = Path(sys.executable).parent / ".env" +if deploy_env_pth.exists(): + dotenv.load_dotenv(dotenv_path=deploy_env_pth) +else: + dotenv.load_dotenv() diff --git a/src/wattanalyse/constants.py b/src/wattanalyse/constants.py index dd2ddf1..87c99f4 100644 --- a/src/wattanalyse/constants.py +++ b/src/wattanalyse/constants.py @@ -2,6 +2,7 @@ from __future__ import annotations import enum import os +import uuid from pathlib import Path from typing import Final @@ -15,7 +16,10 @@ LIB_PATH: Final[Path] = Path(__file__).resolve().parent BASE_PATH = io_.search_folder_path( - LIB_PATH, stop_folder_name=os.getenv("DOPT_STOP_FOLDER_NAME", "python") + LIB_PATH, + stop_folder_name=os.getenv( + "DOPT_STOP_FOLDER_NAME", str(uuid.uuid4()) + ), # random default to provoke early failures ) assert BASE_PATH, "base path not found" @@ -28,6 +32,7 @@ class Config: PTH_USER_CFG: Path = BASE_PATH / os.getenv("DOPT_PATH_CONFIG", "config/wattana.toml") +assert Config.PTH_USER_CFG.exists(), "user config not found" user_cfg = configs.load_toml(Config.PTH_USER_CFG) USER_CFG: t.UserConfig = t.UserConfig( Datenbank=t.UserConfig_Datenbank(**user_cfg["Datenbank"]), -- 2.34.1 From 551e5de085a4153ad39edb778d7304ae8e8ce593 Mon Sep 17 00:00:00 2001 From: foefl Date: Fri, 12 Jun 2026 08:56:18 +0200 Subject: [PATCH 34/48] first version of build script --- pyproject.toml | 4 +- scripts/build.ps1 | 201 +++++++++++++++++++++++++++++++++++++++++- scripts/build_pdm.ps1 | 1 + 3 files changed, 203 insertions(+), 3 deletions(-) create mode 100644 scripts/build_pdm.ps1 diff --git a/pyproject.toml b/pyproject.toml index 419b353..eff43ae 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,6 +1,6 @@ [project] name = "wattanalyse" -version = "0.1.1dev1" +version = "0.1.1dev5" description = "analysis of production state messages obtained from customers" authors = [ {name = "d-opt GmbH, resp. Florian Förster", email = "f.foerster@d-opt.com"}, @@ -71,7 +71,7 @@ directory = "reports/coverage" [tool.bumpversion] -current_version = "0.1.1dev1" +current_version = "0.1.1dev5" parse = """(?x) (?P0|[1-9]\\d*)\\. (?P0|[1-9]\\d*)\\. diff --git a/scripts/build.ps1 b/scripts/build.ps1 index edb0a24..85a6557 100644 --- a/scripts/build.ps1 +++ b/scripts/build.ps1 @@ -1 +1,200 @@ -pdm build -d build/ \ No newline at end of file +[CmdletBinding()] +Param( + [switch]$NoPackaging +) + +$DEPLOYMENT_PATH = 'B:\deployments\Wattana' +$ENV_PATH = 'B:\deployments\Wattana\dopt_wattana_data-analytics' +$PY_PATH = Join-Path -Path $ENV_PATH -ChildPath 'python' +$SRC_PATH = (Get-Location).Path + +function create_folder { + param ( + [string]$base_path, + [string]$folder_name + ) + $target_path = Join-Path -Path $base_path -ChildPath $folder_name + + if (-not (Test-Path -Path $target_path)){ + Write-Output "[PWSH] Folder >$folder_name< not existing. Create..." + New-Item -Path $target_path -ItemType Directory + } + else { + Write-Output "Folder >$folder_name< already exists." + } + +} + +Write-Output "Build Pipeline for d-opt Wattana (wattanalyse) project" + +Write-Output "Delete existing artifacts..." +$pattern = "dopt_wattana_data-analytics_v*" +Get-ChildItem -Path $DEPLOYMENT_PATH -Filter $pattern | Remove-Item -Recurse -Force +if ($? -eq $false){ + Write-Output "[PWSH] Exiting script because there were errors in the deletion procedure" + Exit +} +Write-Output "Deleted outstanding artifacts successfully" + + +Write-Output "Create folders..." +create_folder -base_path $ENV_PATH -folder_name 'data' +create_folder -base_path $ENV_PATH -folder_name 'data/logs' +create_folder -base_path $ENV_PATH -folder_name 'config' + + +# $data_folder = Join-Path -Path $ENV_PATH -ChildPath 'data' + +# if (-not (Test-Path -Path $data_folder)){ +# Write-Output "[PWSH] Data path not existing. Create..." +# New-Item -Path $data_folder -ItemType Directory +# } +# else { +# Write-Output "Data path already exists." +# } + + +Write-Output "Building package..." +.\scripts\publish.ps1 +if ($? -eq $false){ + Write-Output "[PWSH] Exiting script because there were build errors" + Exit +} +Write-Output "Built package successfully" + +# TODO documentation +# Write-Output "Generate docs..." +# .\scripts\cvt_docs.ps1 +# if ($? -eq $false){ +# Write-Output "[PWSH] Exiting script because there errors while generating the doc files" +# Exit +# } +# Write-Output "Generated doc files successfully" +# Write-Output "Copying doc files..." +# $docs_src_path = Join-Path -Path $SRC_PATH -ChildPath 'docs\*.pdf' +# Copy-Item -Path $docs_src_path -Destination $ENV_PATH -Force +# if ($? -eq $false){ +# Write-Output "[PWSH] Exiting script because there were errors while copying the doc files" +# Exit +# } +# Write-Output "Copied doc files successfully" + + +Write-Output "Go into env directory..." +Set-Location $ENV_PATH +Write-Output "Install package into environment..." +pycage venv add -p -i http://localhost:8001/simple/ wattanalyse +if ($? -eq $false){ + Write-Output "[PWSH] Exiting script because there were errors while installing the package into the environment" + Exit +} +Write-Output "Successfully installed package" + +# copy database file +# Write-Output "Copying database files..." +# $copy_file = Join-Path -Path $SRC_PATH -ChildPath 'data/db/wce_grunderfassung.db' +# $dest_file = Join-Path -Path $ENV_PATH -ChildPath 'data' +# Copy-Item -Path $copy_file -Destination $dest_file -Force +# if ($? -eq $false){ +# Write-Output "[PWSH] Exiting script because there were errors while copying database 'Grunderfassung'" +# Exit +# } +# $copy_file = Join-Path -Path $SRC_PATH -ChildPath 'data/db/wce_crm.db' +# Copy-Item -Path $copy_file -Destination $dest_file -Force +# if ($? -eq $false){ +# Write-Output "[PWSH] Exiting script because there were errors while copying database 'CRM'" +# Exit +# } +# Write-Output "Copied database files successfully" + +# copy .env file +Write-Output "Copying ENV file..." +$env_file = Join-Path -Path $SRC_PATH -ChildPath 'deployment/.env' +$env_dest_path = Join-Path -Path $PY_PATH -ChildPath '.env' +Copy-Item -Path $env_file -Destination $env_dest_path -Force +if ($? -eq $false){ + Write-Output "[PWSH] Exiting script because there were errors while copying ENV file" + Exit +} +Write-Output "Copied ENV file successfully" + +# copy config file +Write-Output "Copying ENV file..." +$env_file = Join-Path -Path $SRC_PATH -ChildPath 'config/wattana.toml' +$env_dest_path = Join-Path -Path $ENV_PATH -ChildPath 'config' +Copy-Item -Path $env_file -Destination $env_dest_path -Force +if ($? -eq $false){ + Write-Output "[PWSH] Exiting script because there were errors while copying config file" + Exit +} +Write-Output "Copied config file successfully" + +# TODO check needed? +# copy startup script files +# Write-Output "Copying startup scripts..." +# $copy_file = Join-Path -Path $SRC_PATH -ChildPath 'scripts/start.bat' +# Copy-Item -Path $copy_file -Destination $ENV_PATH -Force +# if ($? -eq $false){ +# Write-Output "[PWSH] Exiting script because there were errors while copying startup scripts" +# Exit +# } +# Write-Output "Copied startup scripts successfully" + + +# env preparation +Write-Output "Preparing environment with cleanup and pre-compilation..." +pycage clean dist-info +if ($? -eq $false){ + Write-Output "[PWSH] Exiting script because there were errors while deleting the distribution info files" + Exit +} +pycage compile -q -d -f +if ($? -eq $false){ + Write-Output "[PWSH] Exiting script because there were errors during the pre-compilation process" + Exit +} +Write-Output "Successfully prepared environment with cleanup and pre-compilation" + +if ($NoPackaging) { + Write-Output "[PWSH] No packaging selected. Exit..." + Set-Location $SRC_PATH + Exit +} + + +Write-Output "Get version string..." +$pyproject = Get-Content $SRC_PATH\pyproject.toml -Raw + +if ($pyproject -match '\[project\].*?\n[\s\w\n=""-_{}]*\[[\w-]*\]') { + $projectBlock = $matches[0] + if ($projectBlock -match 'version\s*=\s*"([^"]+)"') { + $version = $matches[1] + Write-Output "The version string is: $version" + } + else { + Write-Output "[PWSH] Exiting script because the version string was not found" + Exit + } +} +else { + Write-Output "[PWSH] Exiting script because the version string was not found" + Exit +} + +Write-Output "Packaging whole standalone environment in a ZIP file" +$dest_path = Join-Path -Path $DEPLOYMENT_PATH -ChildPath "dopt_nafka_wce-crm_v$version.zip" +$compress = @{ + Path = "$ENV_PATH\**" + CompressionLevel = "Optimal" + DestinationPath = $dest_path +} +Compress-Archive @compress -Force + +if ($? -eq $false){ + Write-Output "[PWSH] Exiting script because there were errors during the archive compression operation" + Exit +} +Write-Output "Successfully compressed archive. Saved under: >$dest_path<" + +Write-Output "Go back to source directory..." +Set-Location $SRC_PATH diff --git a/scripts/build_pdm.ps1 b/scripts/build_pdm.ps1 new file mode 100644 index 0000000..edb0a24 --- /dev/null +++ b/scripts/build_pdm.ps1 @@ -0,0 +1 @@ +pdm build -d build/ \ No newline at end of file -- 2.34.1 From d852c0c7e606affb4a0e2cb28daaf9d1670d6769 Mon Sep 17 00:00:00 2001 From: foefl Date: Fri, 12 Jun 2026 11:09:12 +0200 Subject: [PATCH 35/48] fix placement of constants --- src/wattanalyse/pipelines.py | 22 ++++++++++++---------- 1 file changed, 12 insertions(+), 10 deletions(-) diff --git a/src/wattanalyse/pipelines.py b/src/wattanalyse/pipelines.py index e4bcf0b..8b6c5d3 100644 --- a/src/wattanalyse/pipelines.py +++ b/src/wattanalyse/pipelines.py @@ -58,6 +58,18 @@ RENAMING_SCHEME_PSM: dict[str, str] = { PRIM_KEYS: Final[list[str]] = ["PA", "PA_Pos"] +NOT_NULL_COLS: Final[tuple[str, ...]] = ("PA", "PA_Pos", "Meldezeitpunkt_Historie") +NOT_IN_FUTURE_COLS_DATETIME: Final[tuple[str, ...]] = ("Meldezeitpunkt_Historie",) +NOT_IN_FUTURE_COLS_DATE: Final[tuple[str, ...]] = ("Wareneingang am", "Prod-Start_Historie") + +PLAUSI_FEATURES: Final[list[str]] = [ + "Prod-EP10_Historie", + "Prod-EP20_Historie", + "Prod-EP30_Historie", + "Prod-EP40_Historie", + "Prod-EP50_Historie", +] + LOWER_BOUND_DATE_DEVIATION: Final[int] = ( USER_CFG.Datenpipelines_PSM.Terminabweichung_untere_Schranke ) @@ -133,7 +145,6 @@ def preprocess_psm( data = data.drop("null_count") # any NULL values in critical columns - NOT_NULL_COLS = ("PA", "PA_Pos", "Meldezeitpunkt_Historie") conds = [pl.col(col).is_null() for col in NOT_NULL_COLS] filtered_data = pl.concat([filtered_data, data.filter(pl.any_horizontal(*conds))]) data = data.filter(~pl.any_horizontal(*conds)) @@ -142,8 +153,6 @@ def preprocess_psm( # dates not allowed to be in the future current_datetime = datetime.datetime.now() current_date = current_datetime.date() - NOT_IN_FUTURE_COLS_DATETIME = ("Meldezeitpunkt_Historie",) - NOT_IN_FUTURE_COLS_DATE = ("Wareneingang am", "Prod-Start_Historie") conds = [ (pl.col(col) > current_datetime).fill_null(False) for col in NOT_IN_FUTURE_COLS_DATETIME @@ -188,13 +197,6 @@ def process_order_level( data = data.sort(PRIM_KEYS + ["Meldezeitpunkt_Historie"], descending=False) # ** plausibility check of order quantities - PLAUSI_FEATURES: list[str] = [ - "Prod-EP10_Historie", - "Prod-EP20_Historie", - "Prod-EP30_Historie", - "Prod-EP40_Historie", - "Prod-EP50_Historie", - ] data = data.with_columns( pl.all_horizontal( pl.col(PLAUSI_FEATURES).is_null() | (pl.col(PLAUSI_FEATURES) == 0) -- 2.34.1 From 53a2d2681c159a6503ab0cbd8b4ca352d3112cea Mon Sep 17 00:00:00 2001 From: foefl Date: Fri, 12 Jun 2026 11:12:39 +0200 Subject: [PATCH 36/48] update README as docs --- README.md | 126 +++++++++++++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 124 insertions(+), 2 deletions(-) diff --git a/README.md b/README.md index ecc1f6c..75148f2 100644 --- a/README.md +++ b/README.md @@ -1,3 +1,125 @@ -# Wattanalyse +# Dokumentation zum Modul für Datenanalyse "wattanalyse" -*description added later* +*erstellt von: d-opt-GmbH, Oberneumarker Str. 59, 08496 Neumark* + +*erstellt für: WATTANA GmbH, Gewerbering 19, 09337 Hohenstein-Ernstthal* + +*Datum der letzten Änderung: 12.06.2026* + +## Zweck der Anwendung + +Die Anwendung erlaubt die Auswertung gesammelter Produktionsstandmeldungen, also von Rückmeldungen zum aktuellen Auftragsstatus der Lieferenten von Wattana, auch Konfektionäre genannt. Hierfür wurden gemeinsam verschiedene Kennzahlen erarbeitet, die über die Aufträge bzw. die Konfektionäre ermittelt werden können. Diese Kennzahlen sollen in nachfolgenden Schritten für die Erarbeitung eines Ampelsystems herangezogen werden, um die Kritikalität von Produktionsaufträgen visuell zu verdeutlichen und Maßnahmen zur fristgerechten Lieferung einleiten zu können. + +## Funktionsprinzip + +### Allgemein + +Die Anwendung besteht aus einer isolierten Python-Umgebung mit allen erforderlichen Abhängigkeiten sowie zusätzlichen Ordnern, in denen Daten sowie Konfiguration abliegen. Den Kern bildet eine Datenverarbeitungs-Pipeline, die über einen Modulaufruf angestoßen wird. Um die Pipeline zu starten, befindet sich im Wurzelverzeichnis ein Startup-PowerShell-Skript mit dem Namen "``startup.ps1``". + +Die Verarbeitungs-Pipeline ist aktuell **zustandslos**, was bedeutet, dass sie über das Startup-Skript angestoßen wird und nach erfolgtem Durchlauf automatisch endet. Es wird kein Prozess gestartet, der manuell wieder beendet werden müsste. Ebenso existieren keine externen Trigger, um die Pipeline zu starten. Dies geschieht stets über den Modulaufruf über das Skript. + +### Datenfluss + +#### Wattana-Datenbank + +Die Anwendung muss die zu verarbeitenden Daten aus Wattanas Datenbank abrufen und auch dort hineinschreiben. Hierbei handelt es sich um eine *Oracle-Datenbank*. Die Anwendung nutzt zur Kommunikation den offiziell von Oracle bereitgestellten und gewarteten Datenbanktreiber. Durch Wattana muss ein Datenbank-Nutzer eingerichtet werden, mit dem die erforderlichen Daten abgerufen und auch wieder zurückgeschrieben werden. *Die Konfiguration der Datenbankverbindung und des Nutzers erfolgt über die vorhandene Konfigurationsdatei **(siehe Abschnitt "Konfiguration → IT")**.* + +#### Import (von Wattana) + +Die Anwendung benötigt Zugriff auf eine Tabelle oder View mit den Produktionsstandmeldungen sowie die Auftragsübersicht aus dem internen ERP-System *"MIS"*, wie sie bereits durch Wattana als CSV-Auszüge bereitgestellt wurden. *Die Konfiguration der Namen der Views und Tabellen erfolgt über die vorhandene Konfigurationsdatei **(siehe Abschnitt "Konfiguration")**.* + +#### Export (zu Wattana) + +Der Export zu Wattana geschieht nach aktuellem Projektstand über zwei Tabellen, in welche die Ergebnisse zurückgeschrieben werden. Diese weisen folgende Schemata auf: + +```sql +CREATE TABLE WATTANA.KPI_PRODUKTIONSAUFTRAEGE ( +    ID NUMBER(1) PRIMARY KEY, +    AKTUALISIERT_AM TIMESTAMP, +    MITTLERE_ANZAHL_TAGE_LIEFERTERMINUNTERSCHREITUNG NUMBER(10), +    MITTLERE_ANZAHL_TAGE_LIEFERTERMINUEBERSCHREITUNG NUMBER(10), +    STANDARDABWEICHUNG_TAGE_LIEFERTERMINABWEICHUNG NUMBER(10,4), +    MITTLERE_ANZAHL_ANPASSUNGEN_LIEFERTERMIN NUMBER(10), +    MITTLERE_ABSTAENDE_ZWISCHEN_MELDUNGEN NUMBER(10), +    MITTLERE_DURCHLAUFZEIT_ANZAHL_TAGE NUMBER(10), +    CONSTRAINT CHK_SINGLE_ROW CHECK (ID = 1) +); + +CREATE TABLE WATTANA.KPI_KONFEKTIONAERE ( +    ID NUMBER PRIMARY KEY, +    AKTUALISIERT_AM TIMESTAMP, +    KONFEKTIONAER VARCHAR2(200), +    KONFEKTIONAER_ID NUMBER, +    QUOTE_ERSTBESTAETIGUNG NUMBER(7,4), +    PROZENT_LIEFERTREUE NUMBER(7,4), +    ANTEIL_PROZENT_LIEFERTERMINUNTERSCHREITUNG NUMBER(7,4), +    ANTEIL_PROZENT_LIEFERTERMINUEBERSCHREITUNG NUMBER(7,4), +    MITTLERE_ANZAHL_TAGE_LIEFERTERMINUNTERSCHREITUNG NUMBER(10), +    MITTLERE_ANZAHL_TAGE_LIEFERTERMINUEBERSCHREITUNG NUMBER(10), +    STANDARDABWEICHUNG_TAGE_LIEFERTERMINABWEICHUNG NUMBER(10,4), +    MITTLERE_ANZAHL_ANPASSUNGEN_LIEFERTERMIN NUMBER(10), +    MITTLERE_ABSTAENDE_ZWISCHEN_MELDUNGEN NUMBER(10), +    MITTLERE_DURCHLAUFZEIT_ANZAHL_TAGE NUMBER(10), +    MITTLERER_QUALITAETSSCORE_PSM NUMBER(5,4) +); +``` + +Die Tabelle "KPI_PRODUKTIONSAUFTRAEGE" enthält das Aggregat der Kennzahlen über alle Aufträge, die in den Produktionsstandmeldungen enthalten sind. Diese besteht demzufolge immer nur aus **einem Eintrag, welcher überschrieben wird.** + +Die Tabelle "KPI_KONFEKTIONAERE" enthält die Kennzahlen über alle Konfektionäre. Sie enthält demzufolge immer so viele Einträge, wie unterschiedliche Konfektionäre in den Produktionsstandmeldungen enthalten sind. **Diese Tabelle wird ebenfalls überschrieben.** + +Die hier festgelegten Tabellennamen sind nur beispielhaft für den Nutzer "WATTANA" erstellt worden und müssen nicht den tatsächlichen entsprechen. *Die Konfiguration der Namen dieser beiden Tabellen erfolgt über die vorhandene Konfigurationsdatei **(siehe Abschnitt "Konfiguration")**.* + +## Konfiguration + +Die Konfiguration erfolgt über eine TOML-Datei, die über das Applikationsverzeichnis unter ``"config → wattana.toml"`` zu finden ist. Diese enthält zwei Tabellen oder "Überschriften": + +- ``Datenbank``: relevant für den folgenden Abschnitt "IT" +- ``Datenpipelines_PSM``: relevant für den Abschnitt "Auswertung" + +### IT + +*relevanter Abschnitt der Konfiguration:* ``"Datenbank"`` + +In diesem Teil der Konfiguration werden alle IT-seitigen Einstellungen vorgenommen. Das betrifft gegenwärtig die Konfiguration von: + +- Datenbanknutzer über: + - ``Nutzer`` + - ``Passwort`` +- Datenbankverbindung über: + - ``Host`` + - ``Port`` + - ``Service_Name`` +- Import der Daten über: + - ``Tabellenname_Produktionsstandmeldung``: Name der Tabelle/View, wo die Produktionsstandmeldungen zu finden sind + - ``Tabellenname_MIS_Auftraege``: Name der Tabelle/View, wo die Liste der Produktionsaufträge zu finden ist +- Export der Daten über: + - ``Tabellenname_KPI_Auftraege``: Name der Tabelle, wo die Kennzahlen aus Auftragssicht gespeichert werden können + - ``Tabellenname_KPI_Konfektionaere``: Name der Tabelle, wo die Kennzahlen aus Konfektionärssicht gespeichert werden können + +### Auswertung + +*relevanter Abschnitt der Konfiguration:* ``"Datenpipelines_PSM"`` + +In diesem Teil der Konfiguration werden alle Einstellungen vorgenommen, die das Verhalten bei den Auswertungen beeinflussen. Das betrifft gegenwärtig die Konfiguration von: + +- Vorverarbeitung über: + - ``Vorverarbeitung_Anzahl_Jahre_in_Zukunft_zulaessig``: Dieser Parameter bestimmt, ab wann ein Datum in der Zukunft unzulässig ist. Ausgehend vom aktuellen Datum wird der dort als Ganzzahl angegebene Wert als Anzahl der Jahre in die Zukunft interpretiert. Alle Datumswerte darüber, werden als unzulässig markiert und in der Auswertung nicht berücksichtigt. *Aktuell wird dieser Filter für folgende Merkmale angewandt: **Meldezeitpunkt, Wareneingang am, Zuschnitt am**.* +- Auswertung der Terminabweichungen über: + - ``Terminabweichung_untere_Schranke``: Schranke in Anzahl an Tagen, ab wann ein Auftrag als verfrüht gilt + - ``Terminabweichung_obere_Schranke``: Schranke in Anzahl an Tagen, ab wann ein Auftrag als verspätet gilt + - ``Nutze_Schranken_Terminabweichung_KPI_Berechnung``: kann ``true`` (aktiviert) oder ``false`` (deaktiviert) sein. Dieser Parameter gibt an, ob der KPI-Berechnung die benutzerdefinierten Schranken (siehe beide Anstriche zuvor) genutzt werden sollen oder nicht. Ist diese Option deaktiviert, wird eine Abweichung ``< 0`` als verfrüht und eine ``> 0`` als verspätet gewertet. +- Bewertung der Datenqualität für zurückgemeldeten Produktionsvolumen. Entgegen der ursprünglichen Excel-Tabelle wurde nun zur Qualitätsbewertung der gemeldeten Stückzahlen ein Scoring-System eingeführt, da es die Vergleichbarkeit verbessert. Es gibt drei Kategorien, für die ein nutzerdefinierter Score festgelegt werden kann. **Der Maximalwert eines Scores sollte kleiner als 10 sein.** Die Definition geschieht über: + - ``Score_Qualitaet_Produktionsmengen_fehlend``: Dieser Score wird vergeben, wenn alle Einträge für die Stückzahlen als "0" oder gar nicht zurückgemeldet wurden. + - ``Score_Qualitaet_Produktionsmengen_unplausibel``: Dieser Score wird vergeben, wenn die Einträge für die Stückzahlen nicht plausibel sind. Plausible Einträge sind so definiert, dass die zurückgemeldeten Stückzahlen gemäß dem Fortschritt in der Fertigung anwachsen müssen. Die Stückzahlen eines vorgelagerter Prozessschritts dürfen nicht kleiner als die eines nachgelagerten sein. + - ``Score_Qualitaet_Produktionsmengen_plausibel``: Dieser Score wird vergeben, wenn die Einträge für die Stückzahlen plausibel sind. + +## Systemanforderungen + +### CPU + +Die Anwendung stellt keine besonderen Anforderungen an das ausführende System. Es werden gängige x86-CPU-Generationen unterstützt. Die im Hintergrund verwendeten Bibliotheken benötigen zum Teil moderne CPU-Befehlssätze, um die maximale Geschwindigkeit zu erreichen. Häufig haben diese aber auch eine Rückfalloption auf ältere Instruktionen, wodurch die Ausführungsgeschwindigkeit minimal sinken kann. Für neuere Prozessoren ab 2018 sollte das jedoch keine Rolle spielen. + +### RAM + +Die RAM-Auslastung hängt primär von der Größe der zu verarbeitenden Datenbestände ab. Bei internen Tests mit den bereitgestellten Daten wurde für den gesamten Prozess eine maximale RAM-Belegung von **180 MB** gemessen. Je mehr Produktionsstandmeldungen vorhanden sind, desto größer wird der Speicherbedarf. Auf eine inkrementelle Verarbeitungslogik wurde zunächst verzichtet, da die Größe der zu verarbeitenden Datenbestände unkritisch sein sollte. -- 2.34.1 From c80f665db969ca63419b7eb5b71eb7d585aef034 Mon Sep 17 00:00:00 2001 From: foefl Date: Fri, 12 Jun 2026 11:40:27 +0200 Subject: [PATCH 37/48] update README --- README.md | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/README.md b/README.md index 75148f2..40265d5 100644 --- a/README.md +++ b/README.md @@ -18,6 +18,10 @@ Die Anwendung besteht aus einer isolierten Python-Umgebung mit allen erforderlic Die Verarbeitungs-Pipeline ist aktuell **zustandslos**, was bedeutet, dass sie über das Startup-Skript angestoßen wird und nach erfolgtem Durchlauf automatisch endet. Es wird kein Prozess gestartet, der manuell wieder beendet werden müsste. Ebenso existieren keine externen Trigger, um die Pipeline zu starten. Dies geschieht stets über den Modulaufruf über das Skript. +### Systemausgaben und Debugging + +Die Anwendung legt unter dem Pfad ``"data → logs"`` Log-Dateien an. Damit können Fehler identifiziert und die Anwendung debuggt werden. Standardmäßig ist die Log-Ausgabe für ``STDOUT`` oder ``STDERR`` deaktiviert. Bei der Ausführung des Skripts sind deshalb im Terminal keine Ausgaben ersichtlich. Sollte das aktuelle Verhalten nicht gewünscht sein, so kann dies in Abstimmung mit d-opt angepasst werden. + ### Datenfluss #### Wattana-Datenbank -- 2.34.1 From 1aaa590a0afdec6acc59ce3ff23dfca29368f332 Mon Sep 17 00:00:00 2001 From: foefl Date: Fri, 12 Jun 2026 11:41:09 +0200 Subject: [PATCH 38/48] add startup script --- deployment/startup.ps1 | 1 + 1 file changed, 1 insertion(+) create mode 100644 deployment/startup.ps1 diff --git a/deployment/startup.ps1 b/deployment/startup.ps1 new file mode 100644 index 0000000..d81116a --- /dev/null +++ b/deployment/startup.ps1 @@ -0,0 +1 @@ +.\python\python.exe -m wattanalyse.external_interface -- 2.34.1 From c18f03255b98b3575c670173307e6e937d9d26c7 Mon Sep 17 00:00:00 2001 From: foefl Date: Fri, 12 Jun 2026 11:41:30 +0200 Subject: [PATCH 39/48] add version history and doc building script --- docs/Versionshistorie.md | 5 +++++ scripts/cvt_docs.ps1 | 3 +++ 2 files changed, 8 insertions(+) create mode 100644 docs/Versionshistorie.md create mode 100644 scripts/cvt_docs.ps1 diff --git a/docs/Versionshistorie.md b/docs/Versionshistorie.md new file mode 100644 index 0000000..745dabf --- /dev/null +++ b/docs/Versionshistorie.md @@ -0,0 +1,5 @@ +# Versionshistorie (Changelog) + +## XX.06.2026 (Version: v0.1.1dev) (Tag: ExtTest-2026XXXX) + +- initiale Version für erste Feedback-Schleife diff --git a/scripts/cvt_docs.ps1 b/scripts/cvt_docs.ps1 new file mode 100644 index 0000000..20f2f27 --- /dev/null +++ b/scripts/cvt_docs.ps1 @@ -0,0 +1,3 @@ +# convert README Markdown file to PDF as a manual +pandoc .\README.md -o .\docs\01_Kurzanleitung.pdf -V geometry:"a4paper, margin=2.5cm" #-V header-includes="\usepackage[none]{hyphenat}" +pandoc .\docs\Versionshistorie.md -o .\docs\02_Versionshistorie.pdf -V geometry:"a4paper, margin=2.5cm" -V header-includes="\usepackage[none]{hyphenat}" -- 2.34.1 From 03fb2a62cdb6f744e5af4f3cf7e97a39ccea897e Mon Sep 17 00:00:00 2001 From: foefl Date: Fri, 12 Jun 2026 11:41:37 +0200 Subject: [PATCH 40/48] update build pipeline --- scripts/build.ps1 | 65 ++++++++++++++++++++++++++--------------------- 1 file changed, 36 insertions(+), 29 deletions(-) diff --git a/scripts/build.ps1 b/scripts/build.ps1 index 85a6557..1e3c6f9 100644 --- a/scripts/build.ps1 +++ b/scripts/build.ps1 @@ -11,18 +11,24 @@ $SRC_PATH = (Get-Location).Path function create_folder { param ( [string]$base_path, - [string]$folder_name + [string]$folder_name, + [switch]$recreate ) $target_path = Join-Path -Path $base_path -ChildPath $folder_name - if (-not (Test-Path -Path $target_path)){ + $target_path_exists = Test-Path -Path $target_path + if (-not $target_path_exists){ Write-Output "[PWSH] Folder >$folder_name< not existing. Create..." New-Item -Path $target_path -ItemType Directory } + elseif ($target_path_exists -and $recreate){ + Write-Output "[PWSH] Folder >$folder_name< exists, but should be recreated..." + Remove-Item -Path $target_path -Recurse -Force + New-Item -Path $target_path -ItemType Directory + } else { Write-Output "Folder >$folder_name< already exists." } - } Write-Output "Build Pipeline for d-opt Wattana (wattanalyse) project" @@ -38,7 +44,7 @@ Write-Output "Deleted outstanding artifacts successfully" Write-Output "Create folders..." -create_folder -base_path $ENV_PATH -folder_name 'data' +create_folder -base_path $ENV_PATH -folder_name 'data' -recreate create_folder -base_path $ENV_PATH -folder_name 'data/logs' create_folder -base_path $ENV_PATH -folder_name 'config' @@ -62,22 +68,22 @@ if ($? -eq $false){ } Write-Output "Built package successfully" -# TODO documentation -# Write-Output "Generate docs..." -# .\scripts\cvt_docs.ps1 -# if ($? -eq $false){ -# Write-Output "[PWSH] Exiting script because there errors while generating the doc files" -# Exit -# } -# Write-Output "Generated doc files successfully" -# Write-Output "Copying doc files..." -# $docs_src_path = Join-Path -Path $SRC_PATH -ChildPath 'docs\*.pdf' -# Copy-Item -Path $docs_src_path -Destination $ENV_PATH -Force -# if ($? -eq $false){ -# Write-Output "[PWSH] Exiting script because there were errors while copying the doc files" -# Exit -# } -# Write-Output "Copied doc files successfully" +# docs +Write-Output "Generate docs..." +.\scripts\cvt_docs.ps1 +if ($? -eq $false){ + Write-Output "[PWSH] Exiting script because there errors while generating the doc files" + Exit +} +Write-Output "Generated doc files successfully" +Write-Output "Copying doc files..." +$docs_src_path = Join-Path -Path $SRC_PATH -ChildPath 'docs\*.pdf' +Copy-Item -Path $docs_src_path -Destination $ENV_PATH -Force +if ($? -eq $false){ + Write-Output "[PWSH] Exiting script because there were errors while copying the doc files" + Exit +} +Write-Output "Copied doc files successfully" Write-Output "Go into env directory..." @@ -90,6 +96,7 @@ if ($? -eq $false){ } Write-Output "Successfully installed package" +# TODO check removal # copy database file # Write-Output "Copying database files..." # $copy_file = Join-Path -Path $SRC_PATH -ChildPath 'data/db/wce_grunderfassung.db' @@ -129,16 +136,16 @@ if ($? -eq $false){ } Write-Output "Copied config file successfully" -# TODO check needed? + # copy startup script files -# Write-Output "Copying startup scripts..." -# $copy_file = Join-Path -Path $SRC_PATH -ChildPath 'scripts/start.bat' -# Copy-Item -Path $copy_file -Destination $ENV_PATH -Force -# if ($? -eq $false){ -# Write-Output "[PWSH] Exiting script because there were errors while copying startup scripts" -# Exit -# } -# Write-Output "Copied startup scripts successfully" +Write-Output "Copying startup scripts..." +$copy_file = Join-Path -Path $SRC_PATH -ChildPath 'deployment/startup*' +Copy-Item -Path $copy_file -Destination $ENV_PATH -Force +if ($? -eq $false){ + Write-Output "[PWSH] Exiting script because there were errors while copying startup scripts" + Exit +} +Write-Output "Copied startup scripts successfully" # env preparation -- 2.34.1 From cda409ffab423e19648d2b250b083dc47f5e3a92 Mon Sep 17 00:00:00 2001 From: foefl Date: Tue, 16 Jun 2026 08:17:47 +0200 Subject: [PATCH 41/48] add option to show short state messages with startup script --- README.md | 4 +++- deployment/startup.ps1 | 13 +++++++++++++ 2 files changed, 16 insertions(+), 1 deletion(-) diff --git a/README.md b/README.md index 40265d5..b3ea080 100644 --- a/README.md +++ b/README.md @@ -4,7 +4,7 @@ *erstellt für: WATTANA GmbH, Gewerbering 19, 09337 Hohenstein-Ernstthal* -*Datum der letzten Änderung: 12.06.2026* +*Datum der letzten Änderung: 16.06.2026* ## Zweck der Anwendung @@ -22,6 +22,8 @@ Die Verarbeitungs-Pipeline ist aktuell **zustandslos**, was bedeutet, dass sie Die Anwendung legt unter dem Pfad ``"data → logs"`` Log-Dateien an. Damit können Fehler identifiziert und die Anwendung debuggt werden. Standardmäßig ist die Log-Ausgabe für ``STDOUT`` oder ``STDERR`` deaktiviert. Bei der Ausführung des Skripts sind deshalb im Terminal keine Ausgaben ersichtlich. Sollte das aktuelle Verhalten nicht gewünscht sein, so kann dies in Abstimmung mit d-opt angepasst werden. +Das Start-Skript "``startup.ps1``" kann mit der Option "``-enableOutput``" aufgerufen werden. Damit werden nach Abschluss der Verarbeitungs-Pipeline kurze Rückmeldungen im Terminal ausgegeben, ob die Verarbeitung erfolgreich war oder Fehler aufgetreten sind. + ### Datenfluss #### Wattana-Datenbank diff --git a/deployment/startup.ps1 b/deployment/startup.ps1 index d81116a..def3094 100644 --- a/deployment/startup.ps1 +++ b/deployment/startup.ps1 @@ -1 +1,14 @@ +param( + [switch]$enableOutput +) + .\python\python.exe -m wattanalyse.external_interface + +if ($enableOutput) { + if ($LASTEXITCODE -eq 0) { + Write-Host "Die Verarbeitungspipeline wurde erfolgreich abgeschlossen." + } + else { + Write-Host "Bei der Verarbeitung ist ein Fehler aufgetreten. Details sind den Logs zu entnehmen." + } +} \ No newline at end of file -- 2.34.1 From 7d6dd86ceeda33c9e1ca6181fce73754c81a5282 Mon Sep 17 00:00:00 2001 From: foefl Date: Tue, 16 Jun 2026 08:52:35 +0200 Subject: [PATCH 42/48] improve README --- README.md | 26 ++++++++++++-------------- 1 file changed, 12 insertions(+), 14 deletions(-) diff --git a/README.md b/README.md index b3ea080..2607b1d 100644 --- a/README.md +++ b/README.md @@ -16,23 +16,21 @@ Die Anwendung erlaubt die Auswertung gesammelter Produktionsstandmeldungen, also Die Anwendung besteht aus einer isolierten Python-Umgebung mit allen erforderlichen Abhängigkeiten sowie zusätzlichen Ordnern, in denen Daten sowie Konfiguration abliegen. Den Kern bildet eine Datenverarbeitungs-Pipeline, die über einen Modulaufruf angestoßen wird. Um die Pipeline zu starten, befindet sich im Wurzelverzeichnis ein Startup-PowerShell-Skript mit dem Namen "``startup.ps1``". -Die Verarbeitungs-Pipeline ist aktuell **zustandslos**, was bedeutet, dass sie über das Startup-Skript angestoßen wird und nach erfolgtem Durchlauf automatisch endet. Es wird kein Prozess gestartet, der manuell wieder beendet werden müsste. Ebenso existieren keine externen Trigger, um die Pipeline zu starten. Dies geschieht stets über den Modulaufruf über das Skript. +Die Verarbeitungs-Pipeline ist aktuell **zustandslos**, was bedeutet, dass sie über das Startup-Skript angestoßen wird und nach erfolgtem Durchlauf automatisch endet. Es wird kein Prozess gestartet, der manuell wieder beendet werden müsste. Ebenso existieren keine externen Trigger, um die Pipeline zu starten. Dies geschieht stets über den Modulaufruf mithilfe des Skripts. ### Systemausgaben und Debugging -Die Anwendung legt unter dem Pfad ``"data → logs"`` Log-Dateien an. Damit können Fehler identifiziert und die Anwendung debuggt werden. Standardmäßig ist die Log-Ausgabe für ``STDOUT`` oder ``STDERR`` deaktiviert. Bei der Ausführung des Skripts sind deshalb im Terminal keine Ausgaben ersichtlich. Sollte das aktuelle Verhalten nicht gewünscht sein, so kann dies in Abstimmung mit d-opt angepasst werden. - -Das Start-Skript "``startup.ps1``" kann mit der Option "``-enableOutput``" aufgerufen werden. Damit werden nach Abschluss der Verarbeitungs-Pipeline kurze Rückmeldungen im Terminal ausgegeben, ob die Verarbeitung erfolgreich war oder Fehler aufgetreten sind. +Die Anwendung legt unter dem Pfad ``"data → logs"`` Log-Dateien an. Damit können Fehler identifiziert und die Anwendung debuggt werden. Standardmäßig ist die Log-Ausgabe für ``STDOUT`` oder ``STDERR`` deaktiviert. Bei der Ausführung des Skripts sind deshalb im Terminal normalerweise keine Ausgaben ersichtlich. Sollte das aktuelle Verhalten nicht gewünscht sein, so kann dies in Abstimmung mit d-opt angepasst werden. Darüber hinaus kann das Start-Skript "``startup.ps1``" auch mit der Option "``-enableOutput``" aufgerufen werden. Damit werden nach Abschluss der Verarbeitungs-Pipeline kurze Rückmeldungen im Terminal ausgegeben, ob die Verarbeitung erfolgreich war oder Fehler aufgetreten sind. Diese enthalten jedoch keine Fehler-Details. Diese sind ausschließlich in den Log-Dateien vorzufinden. ### Datenfluss #### Wattana-Datenbank -Die Anwendung muss die zu verarbeitenden Daten aus Wattanas Datenbank abrufen und auch dort hineinschreiben. Hierbei handelt es sich um eine *Oracle-Datenbank*. Die Anwendung nutzt zur Kommunikation den offiziell von Oracle bereitgestellten und gewarteten Datenbanktreiber. Durch Wattana muss ein Datenbank-Nutzer eingerichtet werden, mit dem die erforderlichen Daten abgerufen und auch wieder zurückgeschrieben werden. *Die Konfiguration der Datenbankverbindung und des Nutzers erfolgt über die vorhandene Konfigurationsdatei **(siehe Abschnitt "Konfiguration → IT")**.* +Die Anwendung muss die zu verarbeitenden Daten aus Wattanas Datenbank abrufen und auch dort hineinschreiben. Hierbei handelt es sich um eine *Oracle-Datenbank*. Die Anwendung nutzt zur Kommunikation den offiziell von Oracle bereitgestellten und gewarteten Datenbanktreiber. Durch Wattana muss ein Datenbank-Nutzer eingerichtet werden, mit dem die erforderlichen Daten abgerufen und auch wieder zurückgeschrieben werden. *Die Konfiguration der Datenbankverbindung und des Nutzers erfolgt über die vorhandene Konfigurationsdatei **(siehe Abschnitt "Konfiguration → IT")**.* Details zu notwendigen Tabellen und Views sind den nachfolgenden Abschnitten zu entnehmen. #### Import (von Wattana) -Die Anwendung benötigt Zugriff auf eine Tabelle oder View mit den Produktionsstandmeldungen sowie die Auftragsübersicht aus dem internen ERP-System *"MIS"*, wie sie bereits durch Wattana als CSV-Auszüge bereitgestellt wurden. *Die Konfiguration der Namen der Views und Tabellen erfolgt über die vorhandene Konfigurationsdatei **(siehe Abschnitt "Konfiguration")**.* +Die Anwendung benötigt Zugriff auf eine Tabelle oder View mit den Produktionsstandmeldungen sowie mit der Auftragsübersicht aus dem internen ERP-System *"MIS"*, wie sie bereits durch Wattana als CSV-Auszüge bereitgestellt wurden. *Die Konfiguration der Namen der Views und Tabellen erfolgt über die vorhandene Konfigurationsdatei **(siehe Abschnitt "Konfiguration")**.* So ist sichergestellt, dass im Programmcode die korrekten SQL-Abfragen formuliert werden können. #### Export (zu Wattana) @@ -85,7 +83,7 @@ Die Konfiguration erfolgt über eine TOML-Datei, die über das Applikationsverze ### IT -*relevanter Abschnitt der Konfiguration:* ``"Datenbank"`` +*relevanter Abschnitt der Konfiguration:* "``Datenbank``" In diesem Teil der Konfiguration werden alle IT-seitigen Einstellungen vorgenommen. Das betrifft gegenwärtig die Konfiguration von: @@ -105,26 +103,26 @@ In diesem Teil der Konfiguration werden alle IT-seitigen Einstellungen vorgenomm ### Auswertung -*relevanter Abschnitt der Konfiguration:* ``"Datenpipelines_PSM"`` +*relevanter Abschnitt der Konfiguration:* "``Datenpipelines_PSM``" In diesem Teil der Konfiguration werden alle Einstellungen vorgenommen, die das Verhalten bei den Auswertungen beeinflussen. Das betrifft gegenwärtig die Konfiguration von: - Vorverarbeitung über: - ``Vorverarbeitung_Anzahl_Jahre_in_Zukunft_zulaessig``: Dieser Parameter bestimmt, ab wann ein Datum in der Zukunft unzulässig ist. Ausgehend vom aktuellen Datum wird der dort als Ganzzahl angegebene Wert als Anzahl der Jahre in die Zukunft interpretiert. Alle Datumswerte darüber, werden als unzulässig markiert und in der Auswertung nicht berücksichtigt. *Aktuell wird dieser Filter für folgende Merkmale angewandt: **Meldezeitpunkt, Wareneingang am, Zuschnitt am**.* - Auswertung der Terminabweichungen über: - - ``Terminabweichung_untere_Schranke``: Schranke in Anzahl an Tagen, ab wann ein Auftrag als verfrüht gilt - - ``Terminabweichung_obere_Schranke``: Schranke in Anzahl an Tagen, ab wann ein Auftrag als verspätet gilt - - ``Nutze_Schranken_Terminabweichung_KPI_Berechnung``: kann ``true`` (aktiviert) oder ``false`` (deaktiviert) sein. Dieser Parameter gibt an, ob der KPI-Berechnung die benutzerdefinierten Schranken (siehe beide Anstriche zuvor) genutzt werden sollen oder nicht. Ist diese Option deaktiviert, wird eine Abweichung ``< 0`` als verfrüht und eine ``> 0`` als verspätet gewertet. -- Bewertung der Datenqualität für zurückgemeldeten Produktionsvolumen. Entgegen der ursprünglichen Excel-Tabelle wurde nun zur Qualitätsbewertung der gemeldeten Stückzahlen ein Scoring-System eingeführt, da es die Vergleichbarkeit verbessert. Es gibt drei Kategorien, für die ein nutzerdefinierter Score festgelegt werden kann. **Der Maximalwert eines Scores sollte kleiner als 10 sein.** Die Definition geschieht über: + - ``Terminabweichung_untere_Schranke``: Schranke in Anzahl an Tagen, ab wann ein Auftrag als verfrüht gilt. Dieser Wert muss kleiner oder gleich der oberen Schranke sein (siehe nachfolgender Parameter), sonst kommt es zu einem Abbruch. + - ``Terminabweichung_obere_Schranke``: Schranke in Anzahl an Tagen, ab wann ein Auftrag als verspätet gilt. Dieser Wert muss größer oder gleich der unteren Schranke sein (siehe vorangegangener Parameter), sonst kommt es zu einem Abbruch. + - ``Nutze_Schranken_Terminabweichung_KPI_Berechnung``: kann ``true`` (aktiviert) oder ``false`` (deaktiviert) sein. Dieser Parameter gibt an, ob bei der KPI-Berechnung die benutzerdefinierten Schranken (siehe beide Parameter zuvor) genutzt werden sollen oder nicht. Ist diese Option deaktiviert, wird eine Abweichung ``< 0`` als verfrüht und eine ``> 0`` als verspätet gewertet. +- Bewertung der Datenqualität für zurückgemeldeten Produktionsvolumen. Entgegen der ursprünglichen KPI-Excel-Tabelle wurde nun zur Qualitätsbewertung der gemeldeten Stückzahlen ein Scoring-System eingeführt, da es die Vergleichbarkeit verbessert. Es gibt drei Kategorien, für die ein nutzerdefinierter Score festgelegt werden kann. **Der Maximalwert eines Scores sollte kleiner als 10 sein.** Die Definition geschieht über: - ``Score_Qualitaet_Produktionsmengen_fehlend``: Dieser Score wird vergeben, wenn alle Einträge für die Stückzahlen als "0" oder gar nicht zurückgemeldet wurden. - ``Score_Qualitaet_Produktionsmengen_unplausibel``: Dieser Score wird vergeben, wenn die Einträge für die Stückzahlen nicht plausibel sind. Plausible Einträge sind so definiert, dass die zurückgemeldeten Stückzahlen gemäß dem Fortschritt in der Fertigung anwachsen müssen. Die Stückzahlen eines vorgelagerter Prozessschritts dürfen nicht kleiner als die eines nachgelagerten sein. - - ``Score_Qualitaet_Produktionsmengen_plausibel``: Dieser Score wird vergeben, wenn die Einträge für die Stückzahlen plausibel sind. + - ``Score_Qualitaet_Produktionsmengen_plausibel``: Dieser Score wird vergeben, wenn die Einträge für die Stückzahlen plausibel sind. Dieser Score sollte immer der Maximal- oder Minimalwert über alle drei Kategorien sein. Erreicht ein Konfektionär einen durchschnittlichen Wert nahe diesem, gelten seine gemeldeten Produktionsvolumina als zuverlässig. ## Systemanforderungen ### CPU -Die Anwendung stellt keine besonderen Anforderungen an das ausführende System. Es werden gängige x86-CPU-Generationen unterstützt. Die im Hintergrund verwendeten Bibliotheken benötigen zum Teil moderne CPU-Befehlssätze, um die maximale Geschwindigkeit zu erreichen. Häufig haben diese aber auch eine Rückfalloption auf ältere Instruktionen, wodurch die Ausführungsgeschwindigkeit minimal sinken kann. Für neuere Prozessoren ab 2018 sollte das jedoch keine Rolle spielen. +Die Anwendung stellt keine besonderen Anforderungen an das ausführende System. Es werden gängige x86-CPU-Generationen unterstützt. Die im Hintergrund verwendeten Bibliotheken benötigen zum Teil moderne CPU-Befehlssätze, um die maximale Geschwindigkeit zu erreichen. Häufig haben diese aber auch eine Rückfalloption auf ältere Instruktionen, wodurch die Ausführungsgeschwindigkeit minimal sinken kann. Für neuere Prozessoren ab Baujahr 2018 sollte das jedoch keine Rolle spielen. ### RAM -- 2.34.1 From 94cdb7ee9726a94afc39dc0442149f091005f997 Mon Sep 17 00:00:00 2001 From: foefl Date: Tue, 16 Jun 2026 08:52:49 +0200 Subject: [PATCH 43/48] fix correct result handling for pipeline --- src/wattanalyse/external_interface.py | 20 ++++++++++++++++++-- src/wattanalyse/pipelines.py | 9 ++++++++- 2 files changed, 26 insertions(+), 3 deletions(-) diff --git a/src/wattanalyse/external_interface.py b/src/wattanalyse/external_interface.py index fa397e6..cbd5df4 100644 --- a/src/wattanalyse/external_interface.py +++ b/src/wattanalyse/external_interface.py @@ -21,6 +21,7 @@ def pipeline_KPI_calculation( conn: OracleConnection, ) -> int: return_code: int = 0 + export_status_code: int = 0 PIPELINE_NAME: Final[str] = "KPI_calculation" logger_pipeline.info("Start pipeline >%s<", PIPELINE_NAME) @@ -33,12 +34,27 @@ def pipeline_KPI_calculation( if res_pipe.status != STATUS_HANDLER.SUCCESS: logger_pipeline.error( - ("[PIPELINE: %s] An error occurred during the procedure --- Status:\n%s"), + ( + "[PIPELINE: %s] An immediate pipeline error occurred during the procedure " + "--- Status:\n%s" + ), PIPELINE_NAME, res_pipe.status, stack_info=True, ) return_code = 1 + export_status_code = res_pipe.status.code + + internal_pipe_state = res_pipe.unwrap() + if internal_pipe_state != STATUS_HANDLER.SUCCESS: + logger_pipeline.error( + ("[PIPELINE: %s] An error occurred during the procedure --- Status:\n%s"), + PIPELINE_NAME, + internal_pipe_state, + stack_info=True, + ) + return_code = 1 + export_status_code = internal_pipe_state.code logger_database.info("Prepare collected metadata...") dur_sek = (t2 - t1) / 1e9 @@ -49,7 +65,7 @@ def pipeline_KPI_calculation( gestartet_um=start, beendet_um=stop, dauer_sek=dur_sek, - status_code=res_pipe.status.code, + status_code=export_status_code, ) res_metadata = pipelines.write_metadata(metadata) diff --git a/src/wattanalyse/pipelines.py b/src/wattanalyse/pipelines.py index 8b6c5d3..d8cd0f4 100644 --- a/src/wattanalyse/pipelines.py +++ b/src/wattanalyse/pipelines.py @@ -76,6 +76,8 @@ LOWER_BOUND_DATE_DEVIATION: Final[int] = ( UPPER_BOUND_DATE_DEVIATION: Final[int] = ( USER_CFG.Datenpipelines_PSM.Terminabweichung_obere_Schranke ) + + NUMBER_YEARS_UPPER_BOUND_DATES: Final[int] = ( USER_CFG.Datenpipelines_PSM.Vorverarbeitung_Anzahl_Jahre_in_Zukunft_zulaessig ) @@ -117,6 +119,11 @@ def load_PSM_data( def preprocess_psm( data: pl.LazyFrame, ) -> PreProcessResult: + if LOWER_BOUND_DATE_DEVIATION > UPPER_BOUND_DATE_DEVIATION: + raise ValueError( + "Lower bound for date deviation must not be greater than upper bound." + ) + data = data.rename(RENAMING_SCHEME_PSM) data = data.drop(DROP_COLUMNS, strict=False) REGEX_PATTERN = r"^[\s\-#+/$]+$" @@ -660,7 +667,7 @@ def oracle_save_polars( conn.commit() -@wrap_result(code_on_error=1, logger=logger) +@wrap_result(code_on_error=1) def KPI_calculation( conn: OracleConnection, ) -> Status: -- 2.34.1 From 731d3419c490fd3c497f8fb07cddc11db33b08e2 Mon Sep 17 00:00:00 2001 From: foefl Date: Tue, 16 Jun 2026 08:58:15 +0200 Subject: [PATCH 44/48] fix logging message --- src/wattanalyse/external_interface.py | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/src/wattanalyse/external_interface.py b/src/wattanalyse/external_interface.py index cbd5df4..3414173 100644 --- a/src/wattanalyse/external_interface.py +++ b/src/wattanalyse/external_interface.py @@ -82,7 +82,12 @@ def pipeline_KPI_calculation( return return_code logger_database.info("Successfully saved metadata to database") - logger_pipeline.info("Pipeline >%s< ended successfully", PIPELINE_NAME) + if export_status_code == 0: + logger_pipeline.info("Pipeline >%s< ended successfully", PIPELINE_NAME) + else: + logger_pipeline.info( + "Pipeline >%s< ended with error. Code: %d", PIPELINE_NAME, export_status_code + ) logger_pipeline.info( "Pipeline >%s<: execution duration was %.4f seconds", PIPELINE_NAME, -- 2.34.1 From 15ec3721214bfb2be86b04508b09354fe600e813 Mon Sep 17 00:00:00 2001 From: foefl Date: Tue, 16 Jun 2026 09:01:52 +0200 Subject: [PATCH 45/48] fix logging order --- src/wattanalyse/external_interface.py | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) diff --git a/src/wattanalyse/external_interface.py b/src/wattanalyse/external_interface.py index 3414173..473a90b 100644 --- a/src/wattanalyse/external_interface.py +++ b/src/wattanalyse/external_interface.py @@ -56,6 +56,13 @@ def pipeline_KPI_calculation( return_code = 1 export_status_code = internal_pipe_state.code + if export_status_code == 0: + logger_pipeline.info("Pipeline >%s< ended successfully", PIPELINE_NAME) + else: + logger_pipeline.info( + "Pipeline >%s< ended with error. Code: %d", PIPELINE_NAME, export_status_code + ) + logger_database.info("Prepare collected metadata...") dur_sek = (t2 - t1) / 1e9 dur = dopt_dt.timedelta_from_val(dur_sek, dopt_dt.TimeUnitsTimedelta.SECONDS) @@ -82,12 +89,6 @@ def pipeline_KPI_calculation( return return_code logger_database.info("Successfully saved metadata to database") - if export_status_code == 0: - logger_pipeline.info("Pipeline >%s< ended successfully", PIPELINE_NAME) - else: - logger_pipeline.info( - "Pipeline >%s< ended with error. Code: %d", PIPELINE_NAME, export_status_code - ) logger_pipeline.info( "Pipeline >%s<: execution duration was %.4f seconds", PIPELINE_NAME, -- 2.34.1 From 6de723ecabf90534dac7964b007cdcb94ac29514 Mon Sep 17 00:00:00 2001 From: foefl Date: Tue, 16 Jun 2026 09:04:04 +0200 Subject: [PATCH 46/48] add changelog --- docs/Versionshistorie.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/Versionshistorie.md b/docs/Versionshistorie.md index 745dabf..8772415 100644 --- a/docs/Versionshistorie.md +++ b/docs/Versionshistorie.md @@ -1,5 +1,5 @@ # Versionshistorie (Changelog) -## XX.06.2026 (Version: v0.1.1dev) (Tag: ExtTest-2026XXXX) +## 16.06.2026 (Version: v0.1.1dev) (Tag: ExtTest-20260616) - initiale Version für erste Feedback-Schleife -- 2.34.1 From 0144f63339b48023f5659e74b1df395443098db5 Mon Sep 17 00:00:00 2001 From: foefl Date: Tue, 16 Jun 2026 09:23:13 +0200 Subject: [PATCH 47/48] small name fix in archive creation --- scripts/build.ps1 | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/scripts/build.ps1 b/scripts/build.ps1 index 1e3c6f9..8de4e5f 100644 --- a/scripts/build.ps1 +++ b/scripts/build.ps1 @@ -189,7 +189,7 @@ else { } Write-Output "Packaging whole standalone environment in a ZIP file" -$dest_path = Join-Path -Path $DEPLOYMENT_PATH -ChildPath "dopt_nafka_wce-crm_v$version.zip" +$dest_path = Join-Path -Path $DEPLOYMENT_PATH -ChildPath "dopt_wattana_data-analytics_v$version.zip" $compress = @{ Path = "$ENV_PATH\**" CompressionLevel = "Optimal" -- 2.34.1 From 78cc8d203ab0a5d205acb1b9b0a6e1e093167c57 Mon Sep 17 00:00:00 2001 From: foefl Date: Tue, 16 Jun 2026 09:23:19 +0200 Subject: [PATCH 48/48] bump version --- pyproject.toml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pyproject.toml b/pyproject.toml index eff43ae..a4ee869 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,6 +1,6 @@ [project] name = "wattanalyse" -version = "0.1.1dev5" +version = "0.1.1dev8" description = "analysis of production state messages obtained from customers" authors = [ {name = "d-opt GmbH, resp. Florian Förster", email = "f.foerster@d-opt.com"}, @@ -71,7 +71,7 @@ directory = "reports/coverage" [tool.bumpversion] -current_version = "0.1.1dev5" +current_version = "0.1.1dev8" parse = """(?x) (?P0|[1-9]\\d*)\\. (?P0|[1-9]\\d*)\\. -- 2.34.1