diff --git a/pdm.lock b/pdm.lock index efcf931..cc8b8e4 100644 --- a/pdm.lock +++ b/pdm.lock @@ -5,11 +5,72 @@ groups = ["default", "dev", "lint", "nb", "tests"] strategy = ["inherit_metadata"] lock_version = "4.5.0" -content_hash = "sha256:19e06fc0367f0208bc7ff972401f8297331c48c0a153b393b4d92e595eabc852" +content_hash = "sha256:8f138c1407dc86bdf19aa5a6ce42cb158c9b9963fbb8cf7f4c85f453799f5a10" [[metadata.targets]] requires_python = ">=3.11" +[[package]] +name = "adbc-driver-manager" +version = "1.11.0" +requires_python = ">=3.10" +summary = "A generic entrypoint for ADBC drivers." +groups = ["default"] +dependencies = [ + "typing-extensions", +] +files = [ + {file = "adbc_driver_manager-1.11.0-cp311-cp311-macosx_10_15_x86_64.whl", hash = "sha256:3eb5d6dd94d14e9f1abd340b0bc04bde6d16d692f598ada5ceef3186c6a90eaf"}, + {file = "adbc_driver_manager-1.11.0-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:07469c219d79645a6b2f3df0b8c176c0abbaf7d2b20725e15531735972f65db1"}, + {file = "adbc_driver_manager-1.11.0-cp311-cp311-manylinux_2_26_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:8863a841ac362c26217e9ed69d1d1eb7add881c452382676c3fd4f19b562186c"}, + {file = "adbc_driver_manager-1.11.0-cp311-cp311-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:b4641430ca41c1b570083aeb7771766fa51d963ac5a4bb11b208b51b96ed7f58"}, + {file = "adbc_driver_manager-1.11.0-cp311-cp311-win_amd64.whl", hash = "sha256:c6efa733bf219582bf0f9402f7a8034b113555b1edf178e4743caa69a736ddc5"}, + {file = "adbc_driver_manager-1.11.0-cp312-cp312-macosx_10_15_x86_64.whl", hash = "sha256:08d3008cd6fee3d27b6265864b134902baacf00cd441dc750fb738615290004f"}, + {file = "adbc_driver_manager-1.11.0-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:08f0a6e8030676b7fda5ffe095c33a819a15114541089b8d0fa8281d2dee2079"}, + {file = "adbc_driver_manager-1.11.0-cp312-cp312-manylinux_2_26_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:fb33beabe3a697a54ffcc9593b94705688f33b64741a17f7bdd37690f85a0ecf"}, + {file = "adbc_driver_manager-1.11.0-cp312-cp312-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:dba5306b90932e8af5e4a71756eec2f717f5fe283b1ad7cc7fb094fe4ef3f0f9"}, + {file = "adbc_driver_manager-1.11.0-cp312-cp312-win_amd64.whl", hash = "sha256:e5e9962e6e737e1c028cacb38c08141a8730f5c90cd397537413012ece901cc5"}, + {file = "adbc_driver_manager-1.11.0-cp313-cp313-macosx_10_15_x86_64.whl", hash = "sha256:300b07f4c1113b113e18dddcb9d96dd8b84f09fa35f8e4e3e8a2f112f291142c"}, + {file = "adbc_driver_manager-1.11.0-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:f577be7c4730a43bae08f88105317d7e1d519d02a94aaa98da694358084a4735"}, + {file = "adbc_driver_manager-1.11.0-cp313-cp313-manylinux_2_26_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:c980f81730752cdb98881357c238e87110e1810e4a69c7627c2211bd576b6230"}, + {file = "adbc_driver_manager-1.11.0-cp313-cp313-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:cbc93830500a2f0db7b32501a4f88678fac14b9a9921d94d919439a5b65099e6"}, + {file = "adbc_driver_manager-1.11.0-cp313-cp313-win_amd64.whl", hash = "sha256:0c27cff12cdf074d9052bf8c4775ed1904053189a70497fa7b5746f0dbe326d8"}, + {file = "adbc_driver_manager-1.11.0-cp313-cp313t-macosx_10_15_x86_64.whl", hash = "sha256:d8fdeb10ea464dce88feffe23f35cc37a44ac6bad4e90e793416a3c60afb354f"}, + {file = "adbc_driver_manager-1.11.0-cp313-cp313t-macosx_11_0_arm64.whl", hash = "sha256:cc565ed5d9f8c7974bbaff60c30c8330dae5a903592618a303291db4227b3d54"}, + {file = "adbc_driver_manager-1.11.0-cp313-cp313t-manylinux_2_26_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:9523ca4e8943aa7b43958762bc9d1cb0b5355cd84855359a91c54a4bae9a75df"}, + {file = "adbc_driver_manager-1.11.0-cp313-cp313t-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:54dc142fc8065e13c6347fb3f2acb48430e3cab6863f27276a2b53594cc055b5"}, + {file = "adbc_driver_manager-1.11.0-cp314-cp314-macosx_10_15_x86_64.whl", hash = "sha256:f6fcd6fe4f82f8f2fc83948ed2b0b549d0831253d449f5734603cc03850e4f47"}, + {file = "adbc_driver_manager-1.11.0-cp314-cp314-macosx_11_0_arm64.whl", hash = "sha256:4b4293fc88d0683b6ea9fe1b7d7498c5ae9b4f53a93369c760cfa753a22039c0"}, + {file = "adbc_driver_manager-1.11.0-cp314-cp314-manylinux_2_26_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:5a2d6d1971ce104e41e3969afee8d5782ebcb06bf496606aa4eed2005fbead43"}, + {file = "adbc_driver_manager-1.11.0-cp314-cp314-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:24ef0e33bab3b0480e85d954f88664b578ea045efdc644681c5a487982818e5f"}, + {file = "adbc_driver_manager-1.11.0-cp314-cp314-win_amd64.whl", hash = "sha256:830efd3f212a6360ad66c09fd95171a26a1006a51c893f72238dfb50e0f35e13"}, + {file = "adbc_driver_manager-1.11.0-cp314-cp314t-macosx_10_15_x86_64.whl", hash = "sha256:b5e97d4cb3f5a798e18c802dd1f3d1bf7b77d763cdc707ac295907bf223d1ae8"}, + {file = "adbc_driver_manager-1.11.0-cp314-cp314t-macosx_11_0_arm64.whl", hash = "sha256:2e4e155cae12667aa383750d879e177ada3ab0c351f8306d96e33fbe6949f6f4"}, + {file = "adbc_driver_manager-1.11.0-cp314-cp314t-manylinux_2_26_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:dfb736661f95eb8fc185a4b9951b2e61734633c7448e8d3d937e93ef1d9e5c08"}, + {file = "adbc_driver_manager-1.11.0-cp314-cp314t-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:e87a6f2b70baf21d3c52b280a17e2e8516197a4670b9a080a07dd255f2ab6e9d"}, + {file = "adbc_driver_manager-1.11.0-cp314-cp314t-win_amd64.whl", hash = "sha256:b853e613c6c8afbe7a3fcea0098c88b935a4d1e1b046813aed1fe7363c7b8fc7"}, + {file = "adbc_driver_manager-1.11.0.tar.gz", hash = "sha256:c64aaabeb5810109ab3d2961008f1b014e9f2d87b3df4416c2a080a40237af50"}, +] + +[[package]] +name = "adbc-driver-sqlite" +version = "1.11.0" +requires_python = ">=3.10" +summary = "An ADBC driver for working with SQLite." +groups = ["default"] +dependencies = [ + "adbc-driver-manager", + "importlib-resources>=1.3", +] +files = [ + {file = "adbc_driver_sqlite-1.11.0-py3-none-macosx_10_15_x86_64.whl", hash = "sha256:d227ab10a56b0b5f106d9f85f3f8bce8b75c2b34a28ad962b71e8a3a0b6dc0ed"}, + {file = "adbc_driver_sqlite-1.11.0-py3-none-macosx_11_0_arm64.whl", hash = "sha256:98fd35e14c85e44eeffae1ef9a56466169719ad7bd15e314c2ff88c342e50d9d"}, + {file = "adbc_driver_sqlite-1.11.0-py3-none-manylinux_2_28_aarch64.whl", hash = "sha256:9c28401c31d775d5506ed1188b73de9f7ed1a292927157f2171c7dca67f6cb9e"}, + {file = "adbc_driver_sqlite-1.11.0-py3-none-manylinux_2_28_x86_64.whl", hash = "sha256:2bcab0cfe9380c1691cf995430f8b0b56bf8b9875d8fd9d69a5aecf2b72159e6"}, + {file = "adbc_driver_sqlite-1.11.0-py3-none-win_amd64.whl", hash = "sha256:e41246c5bf929bb5d768227606eb10add420171134ae6ba7928136376f5842fd"}, + {file = "adbc_driver_sqlite-1.11.0.tar.gz", hash = "sha256:a4c6b4962610f7cd67cd754c42dd74e18a2c11fabeec9488c5501d73ae62dc62"}, +] + [[package]] name = "annotated-types" version = "0.7.0" @@ -978,6 +1039,20 @@ files = [ {file = "idna-3.18.tar.gz", hash = "sha256:ffb385a7e039654cef1ab9ef32c6fafe283c0c0467bba1d9029738ce4a14a848"}, ] +[[package]] +name = "importlib-resources" +version = "7.1.0" +requires_python = ">=3.10" +summary = "Read resources from Python packages" +groups = ["default"] +dependencies = [ + "zipp>=3.1.0; python_version < \"3.10\"", +] +files = [ + {file = "importlib_resources-7.1.0-py3-none-any.whl", hash = "sha256:1bd7b48b4088eddb2cd16382150bb515af0bd2c70128194392725f82ad2c96a1"}, + {file = "importlib_resources-7.1.0.tar.gz", hash = "sha256:0722d4c6212489c530f2a145a34c0a7a3b4721bc96a15fada5930e2a0b760708"}, +] + [[package]] name = "iniconfig" version = "2.3.0" @@ -1859,6 +1934,58 @@ files = [ {file = "pure_eval-0.2.3.tar.gz", hash = "sha256:5f4e983f40564c576c7c8635ae88db5956bb2229d7e9237d03b3c0b0190eaf42"}, ] +[[package]] +name = "pyarrow" +version = "24.0.0" +requires_python = ">=3.10" +summary = "Python library for Apache Arrow" +groups = ["default"] +files = [ + {file = "pyarrow-24.0.0-cp311-cp311-macosx_12_0_arm64.whl", hash = "sha256:b0e131f880cda8d04e076cee175a46fc0e8bc8b65c99c6c09dff6669335fde74"}, + {file = "pyarrow-24.0.0-cp311-cp311-macosx_12_0_x86_64.whl", hash = "sha256:1b2fe7f9a5566401a0ef2571f197eb92358925c1f0c8dba305d6e43ea0871bb3"}, + {file = "pyarrow-24.0.0-cp311-cp311-manylinux_2_28_aarch64.whl", hash = "sha256:0b3537c00fb8d384f15ac1e79b6eb6db04a16514c8c1d22e59a9b95c8ba42868"}, + {file = "pyarrow-24.0.0-cp311-cp311-manylinux_2_28_x86_64.whl", hash = "sha256:14e31a3c9e35f1ab6356c6378f6f72830e6d2d5f1791df3774a7b097d18a6a1e"}, + {file = "pyarrow-24.0.0-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:b7d9a514e73bc42711e6a35aaccf3587c520024fe0a25d830a1a8a27c15f4f57"}, + {file = "pyarrow-24.0.0-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:b196eb3f931862af3fa84c2a253514d859c08e0d8fe020e07be12e75a5a9780c"}, + {file = "pyarrow-24.0.0-cp311-cp311-win_amd64.whl", hash = "sha256:35405aecb474e683fb36af650618fd5340ee5471fc65a21b36076a18bbc6c981"}, + {file = "pyarrow-24.0.0-cp312-cp312-macosx_12_0_arm64.whl", hash = "sha256:6233c9ed9ab9d1db47de57d9753256d9dcffbf42db341576099f0fd9f6bf4810"}, + {file = "pyarrow-24.0.0-cp312-cp312-macosx_12_0_x86_64.whl", hash = "sha256:f7616236ec1bc2b15bfdec22a71ab38851c86f8f05ff64f379e1278cf20c634a"}, + {file = "pyarrow-24.0.0-cp312-cp312-manylinux_2_28_aarch64.whl", hash = "sha256:1617043b99bd33e5318ae18eb2919af09c71322ef1ca46566cdafc6e6712fb66"}, + {file = "pyarrow-24.0.0-cp312-cp312-manylinux_2_28_x86_64.whl", hash = "sha256:6165461f55ef6314f026de6638d661188e3455d3ec49834556a0ebbdbace18bb"}, + {file = "pyarrow-24.0.0-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:3b13dedfe76a0ad2d1d859b0811b53827a4e9d93a0bcb05cf59333ab4980cc7e"}, + {file = "pyarrow-24.0.0-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:25ea65d868eb04015cd18e6df2fbe98f07e5bda2abefabcb88fce39a947716f6"}, + {file = "pyarrow-24.0.0-cp312-cp312-win_amd64.whl", hash = "sha256:295f0a7f2e242dabd513737cf076007dc5b2d59237e3eca37b05c0c6446f3826"}, + {file = "pyarrow-24.0.0-cp313-cp313-macosx_12_0_arm64.whl", hash = "sha256:02b001b3ed4723caa44f6cd1af2d5c86aa2cf9971dacc2ffa55b21237713dfba"}, + {file = "pyarrow-24.0.0-cp313-cp313-macosx_12_0_x86_64.whl", hash = "sha256:04920d6a71aabd08a0417709efce97d45ea8e6fb733d9ca9ecffb13c67839f68"}, + {file = "pyarrow-24.0.0-cp313-cp313-manylinux_2_28_aarch64.whl", hash = "sha256:a964266397740257f16f7bb2e4f08a0c81454004beab8ff59dd531b73610e9f2"}, + {file = "pyarrow-24.0.0-cp313-cp313-manylinux_2_28_x86_64.whl", hash = "sha256:6f066b179d68c413374294bc1735f68475457c933258df594443bb9d88ddc2a0"}, + {file = "pyarrow-24.0.0-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:1183baeb14c5f587b1ec52831e665718ce632caab84b7cd6b85fd44f96114495"}, + {file = "pyarrow-24.0.0-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:806f24b4085453c197a5078218d1ee08783ebbba271badd153d1ae22a3ee804f"}, + {file = "pyarrow-24.0.0-cp313-cp313-win_amd64.whl", hash = "sha256:e4505fc6583f7b05ab854934896bcac8253b04ac1171a77dfb73efef92076d91"}, + {file = "pyarrow-24.0.0-cp313-cp313t-macosx_12_0_arm64.whl", hash = "sha256:1a4e45017efbf115032e4475ee876d525e0e36c742214fbe405332480ecd6275"}, + {file = "pyarrow-24.0.0-cp313-cp313t-macosx_12_0_x86_64.whl", hash = "sha256:7986f1fa71cee060ad00758bcc79d3a93bab8559bf978fab9e53472a2e25a17b"}, + {file = "pyarrow-24.0.0-cp313-cp313t-manylinux_2_28_aarch64.whl", hash = "sha256:d3e0b61e8efb24ed38898e5cdc5fffa9124be480008d401a1f8071500494ae42"}, + {file = "pyarrow-24.0.0-cp313-cp313t-manylinux_2_28_x86_64.whl", hash = "sha256:55a3bc1e3df3b5567b7d27ef551b2283f0c68a5e86f1cd56abc569da4f31335b"}, + {file = "pyarrow-24.0.0-cp313-cp313t-musllinux_1_2_aarch64.whl", hash = "sha256:641f795b361874ac9da5294f8f443dfdbee355cf2bd9e3b8d97aaac2306b9b37"}, + {file = "pyarrow-24.0.0-cp313-cp313t-musllinux_1_2_x86_64.whl", hash = "sha256:8adc8e6ce5fccf5dc707046ae4914fd537def529709cc0d285d37a7f9cd442ca"}, + {file = "pyarrow-24.0.0-cp313-cp313t-win_amd64.whl", hash = "sha256:9b18371ad2f44044b81a8d23bc2d8a9b6a6226dca775e8e16cfee640473d6c5d"}, + {file = "pyarrow-24.0.0-cp314-cp314-macosx_12_0_arm64.whl", hash = "sha256:1cc9057f0319e26333b357e17f3c2c022f1a83739b48a88b25bfd5fa2dc18838"}, + {file = "pyarrow-24.0.0-cp314-cp314-macosx_12_0_x86_64.whl", hash = "sha256:e6f1278ee4785b6db21229374a1c9e54ec7c549de5d1efc9630b6207de7e170b"}, + {file = "pyarrow-24.0.0-cp314-cp314-manylinux_2_28_aarch64.whl", hash = "sha256:adbbedc55506cbdabb830890444fb856bfb0060c46c6f8026c6c2f2cf86ae795"}, + {file = "pyarrow-24.0.0-cp314-cp314-manylinux_2_28_x86_64.whl", hash = "sha256:ae8a1145af31d903fa9bb166824d7abe9b4681a000b0159c9fb99c11bc11ad26"}, + {file = "pyarrow-24.0.0-cp314-cp314-musllinux_1_2_aarch64.whl", hash = "sha256:d7027eba1df3b2069e2e8d80f644fa0918b68c46432af3d088ddd390d063ecde"}, + {file = "pyarrow-24.0.0-cp314-cp314-musllinux_1_2_x86_64.whl", hash = "sha256:e56a1ffe9bf7b727432b89104cc0849c21582949dd7bdcb34f17b2001a351a76"}, + {file = "pyarrow-24.0.0-cp314-cp314-win_amd64.whl", hash = "sha256:38be1808cdd068605b787e6ca9119b27eb275a0234e50212c3492331680c3b1e"}, + {file = "pyarrow-24.0.0-cp314-cp314t-macosx_12_0_arm64.whl", hash = "sha256:418e48ce50a45a6a6c73c454677203a9c75c966cb1e92ca3370959185f197a05"}, + {file = "pyarrow-24.0.0-cp314-cp314t-macosx_12_0_x86_64.whl", hash = "sha256:2f16197705a230a78270cdd4ea8a1d57e86b2fdcbc34a1f6aebc72e65c986f9a"}, + {file = "pyarrow-24.0.0-cp314-cp314t-manylinux_2_28_aarch64.whl", hash = "sha256:fb24ac194bfc5e86839d7dcd52092ee31e5fe6733fe11f5e3b06ef0812b20072"}, + {file = "pyarrow-24.0.0-cp314-cp314t-manylinux_2_28_x86_64.whl", hash = "sha256:9700ebd9a51f5895ce75ff4ac4b3c47a7d4b42bc618be8e713e5d56bacf5f931"}, + {file = "pyarrow-24.0.0-cp314-cp314t-musllinux_1_2_aarch64.whl", hash = "sha256:d8ddd2768da81d3ee08cfea9b597f4abb4e8e1dc8ae7e204b608d23a0d3ab699"}, + {file = "pyarrow-24.0.0-cp314-cp314t-musllinux_1_2_x86_64.whl", hash = "sha256:61a3d7eaa97a14768b542f3d284dc6400dd2470d9f080708b13cd46b6ae18136"}, + {file = "pyarrow-24.0.0-cp314-cp314t-win_amd64.whl", hash = "sha256:c91d00057f23b8d353039520dc3a6c09d8608164c692e9f59a175a42b2ae0c19"}, + {file = "pyarrow-24.0.0.tar.gz", hash = "sha256:85fe721a14dd823aca09127acbb06c3ca723efbd436c004f16bca601b04dcc83"}, +] + [[package]] name = "pycparser" version = "3.0" diff --git a/prototypes/01_first-look_20260603.py b/prototypes/01_first-look_20260603.py index b7591cb..afa61a5 100644 --- a/prototypes/01_first-look_20260603.py +++ b/prototypes/01_first-look_20260603.py @@ -1,6 +1,7 @@ # %% import datetime import enum +import importlib import json from pathlib import Path from typing import Any @@ -10,12 +11,14 @@ import sqlalchemy as sql from wattanalyse import db +importlib.reload(db) + # %% PROJECT_BASE = Path(__file__).parents[1] -DATA = PROJECT_BASE / "data" -assert DATA.exists() +DATA_PTH = PROJECT_BASE / "data" +assert DATA_PTH.exists() # %% -data_t1 = DATA / "PSM/20260507" +data_t1 = DATA_PTH / "PSM/20260507" assert data_t1.exists() # %% data_t1_jobs = data_t1 / "MIS-Auträge_22.csv" @@ -24,7 +27,7 @@ data_t1_PSM = data_t1 / "Produktionsstandsmeldungen.csv" assert data_t1_PSM.exists() # %% # // MIS-Aufträge -pl.read_csv(data_t1_jobs, encoding="windows-1252", separator=";") +# pl.read_csv(data_t1_jobs, encoding="windows-1252", separator=";") # %% @@ -35,46 +38,26 @@ class QualityPsm(enum.StrEnum): PLAUSIBEL = enum.auto() -# %% -schema_PSM: dict[str, type[pl.DataType]] = { - "VK Auftrag": pl.UInt32, - "Artikelbez.": pl.String, - "Auftragsmenge": pl.UInt32, - "Kunde": pl.String, - "PA": pl.UInt64, - "PA Pos": pl.UInt32, - "PSM gemeldet am": pl.Datetime, - "Konfektionär": pl.String, - "Artikelnr.": pl.String, - "LT Kunde bestätigt": pl.Date, - "Export Ist": pl.Date, - "1.bestät. Import Konfektionär": pl.Date, - "Import Ist": pl.Date, - "Ablief.(Import Ist+Transport)": pl.Date, - "Wareneingang am": pl.Date, - "Wareneingang geprüft": pl.String, - "Täglicher Ausstoss": pl.Int64, - "Zuschnitt am": pl.Date, - "Teile in Zuschnitt": pl.UInt64, - "Teile im Nähband": pl.UInt64, - "Fertigware aus Nähband": pl.UInt64, - "Teile kontrolliert": pl.UInt64, - "Teile verpackt in Karton": pl.UInt64, - "Anzahl Bänder": pl.UInt16, - "Anzahl Näher": pl.UInt16, - "Arbeitsstunden pro Näher": pl.UInt8, - "Anzahl Arbeitstage pro Woche": pl.UInt8, - "Blockauftrag": pl.String, +PSM_SCORES: dict[QualityPsm, int] = { + QualityPsm.FEHLEND: 1, + QualityPsm.UNPLAUSIBEL: 0, + QualityPsm.PLAUSIBEL: 2, } -# psm = pl.read_csv(data_t1_PSM, encoding="windows-1252", separator=";") +# %% psm = pl.read_csv( data_t1_PSM, encoding="windows-1252", separator=";", - schema_overrides=schema_PSM, + schema_overrides=db.extern_prod_order_t_schema, null_values=["01.01.1111 00:00:00"], ) + +# %% +# // save data as raw +target = DATA_PTH / "PSM_20260507.arrow" +psm.write_ipc(target) + # %% psm.filter(pl.col("Konfektionär").str.contains("MEMTEKS")) # %% @@ -201,11 +184,6 @@ df_marked = tmp_1.with_columns( .alias("Produktionsstückzahlen_valide") ) -PSM_SCORES: dict[QualityPsm, int] = { - QualityPsm.FEHLEND: 1, - QualityPsm.UNPLAUSIBEL: 0, - QualityPsm.PLAUSIBEL: 2, -} df_score = df_marked.with_columns( pl.when(pl.col("is_empty")) @@ -241,10 +219,10 @@ renaming_scheme: dict[str, str] = { "Teile verpackt in Karton": "Prod-EP50_Historie", } -KEYS = ["PA", "PA_Pos"] +PRIM_KEYS = ["PA", "PA_Pos"] tmp = tmp.rename(renaming_scheme) -tmp = tmp.sort(KEYS + ["Meldezeitpunkt_Historie"], descending=False) +tmp = tmp.sort(PRIM_KEYS + ["Meldezeitpunkt_Historie"], descending=False) plausi_features_all = [ @@ -325,10 +303,10 @@ tmp = tmp.with_columns( # need additional "alias" on "Prod-Start_Historie" # duration since last report in days -tmp = tmp.sort(KEYS + ["Meldezeitpunkt_Historie"], descending=False).with_columns( +tmp = tmp.sort(PRIM_KEYS + ["Meldezeitpunkt_Historie"], descending=False).with_columns( ( pl.col("Meldezeitpunkt_Historie") - - pl.col("Meldezeitpunkt_Historie").shift(1).over(KEYS) + - pl.col("Meldezeitpunkt_Historie").shift(1).over(PRIM_KEYS) ) .dt.total_days() .alias("Tage_zu_letzter_PSM_Historie") @@ -341,9 +319,9 @@ tmp = tmp.sort(KEYS + ["Meldezeitpunkt_Historie"], descending=False).with_column # aggregate "Import-Ist_Historie" and use "drop_nulls" "last" # need additional "alias" on "Import-Ist_Historie" -tmp = tmp.sort(KEYS + ["Meldezeitpunkt_Historie"], descending=False).with_columns( +tmp = tmp.sort(PRIM_KEYS + ["Meldezeitpunkt_Historie"], descending=False).with_columns( # Prüfen: Ist das aktuelle Datum ungleich dem vorherigen Datum derselben Position? - (pl.col("Import-Ist_Historie") != pl.col("Import-Ist_Historie").shift(1).over(KEYS)) + (pl.col("Import-Ist_Historie") != pl.col("Import-Ist_Historie").shift(1).over(PRIM_KEYS)) .fill_null(False) # Der allererste Eintrag hat keinen Vorgänger -> Ist keine Änderung .alias("Import-Ist_geaendert") ) @@ -357,8 +335,8 @@ tmp = tmp.sort(KEYS + ["Meldezeitpunkt_Historie"], descending=False).with_column # whole aggregates see DB schema tmp = ( - tmp.sort(KEYS + ["Meldezeitpunkt_Historie"], descending=False) - .group_by(KEYS + ["Konfektionär"]) + tmp.sort(PRIM_KEYS + ["Meldezeitpunkt_Historie"], descending=False) + .group_by(PRIM_KEYS + ["Konfektionär"]) .agg( pl.col("Meldezeitpunkt_Historie"), pl.col("Liefertermin_Soll").drop_nulls().first(), @@ -390,27 +368,41 @@ tmp LOWER_BOUND_DATE_DEVIATION = 0 UPPER_BOUND_DATE_DEVIATION = 0 -tmp = tmp.with_columns( - pl.when( - (pl.col("Liefertermin_Ist").is_not_null()) - & (pl.col("Liefertermin_Soll").is_not_null()) +tmp = ( + tmp.with_columns( + pl.when( + (pl.col("Liefertermin_Ist").is_not_null()) + & (pl.col("Liefertermin_Soll").is_not_null()) + ) + .then((pl.col("Liefertermin_Ist") - pl.col("Liefertermin_Soll")).dt.total_days()) + .otherwise(None) + .alias("Terminabweichung_Anzahl_Tage") + ) + .with_columns( + pl.when(pl.col("Terminabweichung_Anzahl_Tage") < LOWER_BOUND_DATE_DEVIATION) + .then(pl.lit(True)) + .otherwise(pl.lit(False)) + .alias("Terminunterschreitung"), + pl.when(pl.col("Terminabweichung_Anzahl_Tage") > UPPER_BOUND_DATE_DEVIATION) + .then(pl.lit(True)) + .otherwise(pl.lit(False)) + .alias("Terminüberschreitung"), + pl.when( + (pl.col("Liefertermin_Ist").is_not_null()) & (pl.col("Prod-Start").is_not_null()) + ) + .then((pl.col("Liefertermin_Ist") - pl.col("Prod-Start")).dt.total_days()) + .otherwise(None) + .alias("Durchlaufzeit_Anzahl_Tage"), + ) + .with_columns( + pl.when( + (pl.col("Durchlaufzeit_Anzahl_Tage").is_not_null()) + & (pl.col("Durchlaufzeit_Anzahl_Tage") < 0) + ) + .then(None) + .otherwise(pl.col("Durchlaufzeit_Anzahl_Tage")) + .alias("Durchlaufzeit_Anzahl_Tage") ) - .then((pl.col("Liefertermin_Ist") - pl.col("Liefertermin_Soll")).dt.total_days()) - .otherwise(None) - .alias("Terminabweichung_Anzahl_Tage") -).with_columns( - pl.when(pl.col("Terminabweichung_Anzahl_Tage") < LOWER_BOUND_DATE_DEVIATION) - .then(pl.lit(True)) - .otherwise(pl.lit(False)) - .alias("Terminunterschreitung"), - pl.when(pl.col("Terminabweichung_Anzahl_Tage") > UPPER_BOUND_DATE_DEVIATION) - .then(pl.lit(True)) - .otherwise(pl.lit(False)) - .alias("Terminüberschreitung"), - pl.when((pl.col("Liefertermin_Ist").is_not_null()) & (pl.col("Prod-Start").is_not_null())) - .then((pl.col("Liefertermin_Ist") - pl.col("Prod-Start")).dt.total_days()) - .otherwise(None) - .alias("Durchlaufzeit_Anzahl_Tage"), ) tmp @@ -419,28 +411,105 @@ tmp # // dump to database -def _parse_to_json(value: Any) -> str: +def _json_default( + value: Any, +) -> str: if isinstance(value, (datetime.date, datetime.datetime)): return value.isoformat() else: raise TypeError -parsed_lists = tmp.with_columns( +def _parse_to_json( + x: pl.Series | None, +) -> str | None: + if x is None: + return None + + return json.dumps(x.to_list(), default=_json_default) + + +staging_data = tmp.with_columns( pl.col(pl.List) .map_elements( - lambda x: json.dumps(x.to_list(), default=_parse_to_json) if x is not None else None, + _parse_to_json, return_dtype=pl.String, ) .name.keep() ) -parsed_lists +staging_data # %% -parsed_lists["Import-Ist_Historie"].item(0) +rows_inserted = staging_data.write_database( + "Produktionsauftrag-Einzelsicht_Staging", + connection=db.DB_URI, + engine="adbc", + if_table_exists="replace", +) +assert rows_inserted == staging_data.height +# %% # TODO make UPSERT with staging +all_columns = staging_data.columns +update_columns = [col for col in all_columns if col not in PRIM_KEYS] + +sql_column_list_str = ", ".join([f'"{c}"' for c in all_columns]) +sql_pk_list_str = ", ".join([f'"{c}"' for c in PRIM_KEYS]) +sql_update_rules_str = ", ".join([f'"{c}" = EXCLUDED."{c}"' for c in update_columns]) + +upsert_sql = f""" +INSERT INTO "Produktionsauftrag-Einzelsicht" ({sql_column_list_str}) +SELECT {sql_column_list_str} FROM "Produktionsauftrag-Einzelsicht_Staging" WHERE 1=1 +ON CONFLICT({sql_pk_list_str}) DO UPDATE SET + {sql_update_rules_str}; +""" + +# %% +with db.ENGINE_INTERNAL.begin() as conn: + res = conn.execute(sql.text(upsert_sql)) + conn.execute(sql.text('DROP TABLE IF EXISTS "Produktionsauftrag-Einzelsicht_Staging";')) + +# %% +# ** test if loaded correctly +stmt = sql.select(db.intern_prod_order_t) + +with db.ENGINE_INTERNAL.connect() as conn: + ret = conn.execute(stmt) + +ret.fetchall() + +# %% +# // database loading + +df = pl.read_database_uri( + 'SELECT * FROM "Produktionsauftrag-Einzelsicht"', + uri=db.DB_URI, + engine="adbc", + schema_overrides=db.intern_prod_order_t_schema, +) + +list_cols_to_type: dict[str, type[pl.DataType]] = { + "Meldezeitpunkt_Historie": pl.Datetime, + "Bestaetigter-Import_Historie": pl.Date, + "Import-Ist_Historie": pl.Date, + "Tage_zu_letzter_PSM_Historie": pl.Int64, + "Prod-EP10_Historie": pl.UInt64, + "Prod-EP20_Historie": pl.UInt64, + "Prod-EP30_Historie": pl.UInt64, + "Prod-EP40_Historie": pl.UInt64, + "Prod-EP50_Historie": pl.UInt64, + "Prod-Qualitaet_Historie": pl.Int32, + "Prod-Start_Historie": pl.Date, +} + +list_col_parse_conds = { + col: pl.col(col).str.json_decode(pl.List(list_type)) + for col, list_type in list_cols_to_type.items() +} + +df.with_columns(**list_col_parse_conds) + ######################################################## # %% @@ -450,6 +519,9 @@ tmp_1 = tmp_1.with_columns( ) tmp_1 +# %% + + # %% tmp_1 = tmp.with_columns( # Aktuelles Datum minus verschobenes Datum (isoliert je Auftrag) diff --git a/pyproject.toml b/pyproject.toml index 4927bdf..9978a19 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -5,7 +5,7 @@ description = "analysis of production state messages obtained from customers" authors = [ {name = "d-opt GmbH, resp. Florian Förster", email = "f.foerster@d-opt.com"}, ] -dependencies = ["polars>=1.41.2", "sqlalchemy[asyncio]>=2.0.50", "python-dotenv>=1.2.2", "dopt-basics>=0.2.6"] +dependencies = ["polars>=1.41.2", "sqlalchemy[asyncio]>=2.0.50", "python-dotenv>=1.2.2", "dopt-basics>=0.2.6", "adbc-driver-sqlite>=1.11.0", "pyarrow>=24.0.0"] requires-python = ">=3.11" readme = "README.md" license = {text = "LicenseRef-Proprietary"}