From 64b0216c626fc3f37be291313492946a8f45d3f6 Mon Sep 17 00:00:00 2001 From: foefl Date: Thu, 2 Apr 2026 16:28:27 +0200 Subject: [PATCH] ONNX optimisations --- ONNX_quantize.ps1 | 29 ++++++++ pdm.lock | 169 +++++++++++++++++++++++++++------------------- pyproject.toml | 3 +- 3 files changed, 129 insertions(+), 72 deletions(-) create mode 100644 ONNX_quantize.ps1 diff --git a/ONNX_quantize.ps1 b/ONNX_quantize.ps1 new file mode 100644 index 0000000..a1ed7a8 --- /dev/null +++ b/ONNX_quantize.ps1 @@ -0,0 +1,29 @@ +$TARGET_PTH = "B:\projects\MOP-TOM\models\models--BAAI--bge-reranker-v2-m3\snapshots\953dc6f6f85a1b2dbfca4c34a2796e7dde08d41e" +$MODEL_NAME = 'BAAI/bge-reranker-v2-m3' +$base = Get-Location +$model_path = Join-Path $base "..\models" -Resolve +$MODEL_EXPORT = Join-Path $model_path "onnx_test" +$MODEL_EXPORT_QUANT = Join-Path $MODEL_EXPORT "quant" + +Write-Host "Path to base: $base" +Write-Host "Path to model folder: $model_path" + +if (Test-Path -Path $model_path) { + Write-Host "Model directory found" +} else { + Write-Host "Model directory not found: $model_path" + exit +} +$Env:SENTENCE_TRANSFORMERS_HOME = $model_path +$Env:TRANSFORMERS_CACHE = $model_path +$Env:HF_HOME = $model_path + +if ($true) { + pdm run optimum-cli export onnx -m $TARGET_PTH --task text-classification --optimize O1 $MODEL_EXPORT + # pdm run optimum-cli export onnx -m $TARGET_PTH --task text-classification $MODEL_EXPORT +} + +if ($true) { + pdm run optimum-cli onnxruntime quantize --onnx_model $MODEL_EXPORT -o $MODEL_EXPORT_QUANT --avx2 + # pdm run optimum-cli onnxruntime quantize --onnx_model $TARGET_PTH -o $MODEL_EXPORT_QUANT --avx2 +} \ No newline at end of file diff --git a/pdm.lock b/pdm.lock index 37920fb..5d50a85 100644 --- a/pdm.lock +++ b/pdm.lock @@ -5,7 +5,7 @@ groups = ["default", "data", "dev", "lint", "nb", "tests"] strategy = ["inherit_metadata"] lock_version = "4.5.0" -content_hash = "sha256:a00615420189e1aaf1b5740745f49e22f1d24a168c4a4b5eb32f658a83d0c333" +content_hash = "sha256:66d54cb438ad288d6972ddeb9674920ffcc236de9765927820687e21bf95f6bc" [[metadata.targets]] requires_python = ">=3.11,<3.14" @@ -30,17 +30,6 @@ files = [ {file = "accelerate-1.13.0.tar.gz", hash = "sha256:d631b4e0f5b3de4aff2d7e9e6857d164810dfc3237d54d017f075122d057b236"}, ] -[[package]] -name = "annotated-doc" -version = "0.0.4" -requires_python = ">=3.8" -summary = "Document parameters, class attributes, return types, and variables inline, with Annotated." -groups = ["default", "dev"] -files = [ - {file = "annotated_doc-0.0.4-py3-none-any.whl", hash = "sha256:571ac1dc6991c450b25a9c2d84a3705e2ae7a53467b5d111c24fa8baabbed320"}, - {file = "annotated_doc-0.0.4.tar.gz", hash = "sha256:fbcda96e87e9c92ad167c2e53839e57503ecfda18804ea28102353485033faa4"}, -] - [[package]] name = "annotated-types" version = "0.7.0" @@ -60,7 +49,7 @@ name = "anyio" version = "4.12.1" requires_python = ">=3.9" summary = "High-level concurrency and networking framework on top of asyncio or Trio" -groups = ["default", "dev", "nb"] +groups = ["dev", "nb"] dependencies = [ "exceptiongroup>=1.0.2; python_version < \"3.11\"", "idna>=2.8", @@ -375,7 +364,7 @@ name = "charset-normalizer" version = "3.4.4" requires_python = ">=3.7" summary = "The Real First Universal Charset Detector. Open, modern and actively maintained alternative to Chardet." -groups = ["nb"] +groups = ["default", "dev", "nb"] files = [ {file = "charset_normalizer-3.4.4-cp311-cp311-macosx_10_9_universal2.whl", hash = "sha256:6e1fcf0720908f200cd21aa4e6750a48ff6ce4afe7ff5a79a90d5ed8a08296f8"}, {file = "charset_normalizer-3.4.4-cp311-cp311-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:5f819d5fe9234f9f82d75bdfa9aef3a3d72c4d24a6e57aeaebba32a704553aa0"}, @@ -450,7 +439,7 @@ name = "click" version = "8.3.1" requires_python = ">=3.10" summary = "Composable command line interface toolkit" -groups = ["default", "dev"] +groups = ["dev"] dependencies = [ "colorama; platform_system == \"Windows\"", ] @@ -909,7 +898,7 @@ files = [ name = "flatbuffers" version = "25.12.19" summary = "The FlatBuffers serialization format for Python" -groups = ["default"] +groups = ["default", "dev"] files = [ {file = "flatbuffers-25.12.19-py2.py3-none-any.whl", hash = "sha256:7634f50c427838bb021c2d66a3d1168e9d199b0607e6329399f04846d42e20b4"}, ] @@ -944,7 +933,7 @@ name = "h11" version = "0.16.0" requires_python = ">=3.8" summary = "A pure-Python, bring-your-own-I/O implementation of HTTP/1.1" -groups = ["default", "dev", "nb"] +groups = ["dev", "nb"] files = [ {file = "h11-0.16.0-py3-none-any.whl", hash = "sha256:63cf8bbe7522de3bf65932fda1d9c2772064ffb3dae62d55932da54b31cb6c86"}, {file = "h11-0.16.0.tar.gz", hash = "sha256:4e35b956cf45792e4caa5885e69fba00bdbc6ffafbfa020300e549b208ee5ff1"}, @@ -956,7 +945,7 @@ version = "1.4.0" requires_python = ">=3.8" summary = "Fast transfer of large files with the Hugging Face Hub." groups = ["default", "dev"] -marker = "platform_machine == \"x86_64\" or platform_machine == \"amd64\" or platform_machine == \"AMD64\" or platform_machine == \"arm64\" or platform_machine == \"aarch64\"" +marker = "platform_machine == \"x86_64\" or platform_machine == \"amd64\" or platform_machine == \"arm64\" or platform_machine == \"aarch64\"" files = [ {file = "hf_xet-1.4.0-cp313-cp313t-macosx_10_12_x86_64.whl", hash = "sha256:76725fcbc5f59b23ac778f097d3029d6623e3cf6f4057d99d1fce1a7e3cff8fc"}, {file = "hf_xet-1.4.0-cp313-cp313t-macosx_11_0_arm64.whl", hash = "sha256:76f1f73bee81a6e6f608b583908aa24c50004965358ac92c1dc01080a21bcd09"}, @@ -982,7 +971,7 @@ name = "httpcore" version = "1.0.9" requires_python = ">=3.8" summary = "A minimal low-level HTTP client." -groups = ["default", "dev", "nb"] +groups = ["dev", "nb"] dependencies = [ "certifi", "h11>=0.16", @@ -997,7 +986,7 @@ name = "httpx" version = "0.28.1" requires_python = ">=3.8" summary = "The next generation HTTP client." -groups = ["default", "dev", "nb"] +groups = ["dev", "nb"] dependencies = [ "anyio", "certifi", @@ -1011,24 +1000,23 @@ files = [ [[package]] name = "huggingface-hub" -version = "1.6.0" -requires_python = ">=3.9.0" +version = "0.36.2" +requires_python = ">=3.8.0" summary = "Client library to download and publish models, datasets and other repos on the huggingface.co hub" groups = ["default", "dev"] dependencies = [ - "filelock>=3.10.0", + "filelock", "fsspec>=2023.5.0", - "hf-xet<2.0.0,>=1.3.2; platform_machine == \"x86_64\" or platform_machine == \"amd64\" or platform_machine == \"AMD64\" or platform_machine == \"arm64\" or platform_machine == \"aarch64\"", - "httpx<1,>=0.23.0", + "hf-xet<2.0.0,>=1.1.3; platform_machine == \"x86_64\" or platform_machine == \"amd64\" or platform_machine == \"arm64\" or platform_machine == \"aarch64\"", "packaging>=20.9", "pyyaml>=5.1", + "requests", "tqdm>=4.42.1", - "typer", - "typing-extensions>=4.1.0", + "typing-extensions>=3.7.4.3", ] files = [ - {file = "huggingface_hub-1.6.0-py3-none-any.whl", hash = "sha256:ef40e2d5cb85e48b2c067020fa5142168342d5108a1b267478ed384ecbf18961"}, - {file = "huggingface_hub-1.6.0.tar.gz", hash = "sha256:d931ddad8ba8dfc1e816bf254810eb6f38e5c32f60d4184b5885662a3b167325"}, + {file = "huggingface_hub-0.36.2-py3-none-any.whl", hash = "sha256:48f0c8eac16145dfce371e9d2d7772854a4f591bcb56c9cf548accf531d54270"}, + {file = "huggingface_hub-0.36.2.tar.gz", hash = "sha256:1934304d2fb224f8afa3b87007d58501acfda9215b334eed53072dd5e815ff7a"}, ] [[package]] @@ -1576,7 +1564,7 @@ name = "markdown-it-py" version = "4.0.0" requires_python = ">=3.10" summary = "Python port of markdown-it. Markdown parsing, done right!" -groups = ["default", "dev"] +groups = ["dev"] dependencies = [ "mdurl~=0.1", ] @@ -1680,7 +1668,7 @@ name = "mdurl" version = "0.1.2" requires_python = ">=3.7" summary = "Markdown URL utilities" -groups = ["default", "dev"] +groups = ["dev"] files = [ {file = "mdurl-0.1.2-py3-none-any.whl", hash = "sha256:84008a41e51615a49fc9966191ff91509e3c40b939176e643fd50a5c2196b8f8"}, {file = "mdurl-0.1.2.tar.gz", hash = "sha256:bb413d29f5eea38f31dd4754dd7377d4465116fb207585f97bf925588687c1ba"}, @@ -2171,7 +2159,7 @@ name = "onnxruntime" version = "1.24.4" requires_python = ">=3.11" summary = "ONNX Runtime is a runtime accelerator for Machine Learning models" -groups = ["default"] +groups = ["default", "dev"] dependencies = [ "flatbuffers", "numpy>=1.21.6", @@ -2199,6 +2187,72 @@ files = [ {file = "onnxruntime-1.24.4-cp313-cp313t-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:4c74e268dc808e61e63784d43f9ddcdaf50a776c2819e8bd1d1b11ef64bf7e36"}, ] +[[package]] +name = "optimum" +version = "2.1.0" +requires_python = ">=3.9.0" +summary = "Optimum Library is an extension of the Hugging Face Transformers library, providing a framework to integrate third-party libraries from Hardware Partners and interface with their specific functionality." +groups = ["dev"] +dependencies = [ + "huggingface-hub>=0.8.0", + "numpy", + "packaging", + "torch>=1.11", + "transformers>=4.29", +] +files = [ + {file = "optimum-2.1.0-py3-none-any.whl", hash = "sha256:bc3af32e1236a9b2c2ca1d27ed9d3ab1b6591e24c6bcd47f9671a8198a30ea88"}, + {file = "optimum-2.1.0.tar.gz", hash = "sha256:0a2a13f91500e41d34863ffdb08fcb886b3ce68a84a386e59653e3064a45dd4b"}, +] + +[[package]] +name = "optimum-onnx" +version = "0.1.0" +requires_python = ">=3.9.0" +summary = "Optimum ONNX is an interface between the Hugging Face libraries and ONNX / ONNX Runtime" +groups = ["dev"] +dependencies = [ + "onnx", + "optimum~=2.1.0", + "transformers<4.58.0,>=4.36", +] +files = [ + {file = "optimum_onnx-0.1.0-py3-none-any.whl", hash = "sha256:0301ec7a6ec5c77a57581e9970d380a6dc104bdb8f15b282e05af40d829c2eda"}, + {file = "optimum_onnx-0.1.0.tar.gz", hash = "sha256:182c54b25eddaded1618af7b58516da34749393a987ec7111f74677f249676f9"}, +] + +[[package]] +name = "optimum-onnx" +version = "0.1.0" +extras = ["onnxruntime"] +requires_python = ">=3.9.0" +summary = "Optimum ONNX is an interface between the Hugging Face libraries and ONNX / ONNX Runtime" +groups = ["dev"] +dependencies = [ + "onnxruntime>=1.18.0", + "optimum-onnx==0.1.0", +] +files = [ + {file = "optimum_onnx-0.1.0-py3-none-any.whl", hash = "sha256:0301ec7a6ec5c77a57581e9970d380a6dc104bdb8f15b282e05af40d829c2eda"}, + {file = "optimum_onnx-0.1.0.tar.gz", hash = "sha256:182c54b25eddaded1618af7b58516da34749393a987ec7111f74677f249676f9"}, +] + +[[package]] +name = "optimum" +version = "2.1.0" +extras = ["onnxruntime"] +requires_python = ">=3.9.0" +summary = "Optimum Library is an extension of the Hugging Face Transformers library, providing a framework to integrate third-party libraries from Hardware Partners and interface with their specific functionality." +groups = ["dev"] +dependencies = [ + "optimum-onnx[onnxruntime]", + "optimum==2.1.0", +] +files = [ + {file = "optimum-2.1.0-py3-none-any.whl", hash = "sha256:bc3af32e1236a9b2c2ca1d27ed9d3ab1b6591e24c6bcd47f9671a8198a30ea88"}, + {file = "optimum-2.1.0.tar.gz", hash = "sha256:0a2a13f91500e41d34863ffdb08fcb886b3ce68a84a386e59653e3064a45dd4b"}, +] + [[package]] name = "overrides" version = "7.7.0" @@ -2689,7 +2743,7 @@ name = "pygments" version = "2.19.2" requires_python = ">=3.8" summary = "Pygments is a syntax highlighting package written in Python." -groups = ["default", "dev", "nb", "tests"] +groups = ["dev", "nb", "tests"] files = [ {file = "pygments-2.19.2-py3-none-any.whl", hash = "sha256:86540386c03d588bb81d44bc3928634ff26449851e99741617ecb9037ee5ec0b"}, {file = "pygments-2.19.2.tar.gz", hash = "sha256:636cb2477cec7f8952536970bc533bc43743542f70392ae026374600add5b887"}, @@ -3050,7 +3104,7 @@ name = "requests" version = "2.32.5" requires_python = ">=3.9" summary = "Python HTTP for Humans." -groups = ["nb"] +groups = ["default", "dev", "nb"] dependencies = [ "certifi>=2017.4.17", "charset-normalizer<4,>=2", @@ -3106,7 +3160,7 @@ name = "rich" version = "14.3.3" requires_python = ">=3.8.0" summary = "Render rich text, tables, progress bars, syntax highlighting, markdown and more to the terminal" -groups = ["default", "dev"] +groups = ["dev"] dependencies = [ "markdown-it-py>=2.2.0", "pygments<3.0.0,>=2.13.0", @@ -3430,17 +3484,6 @@ files = [ {file = "setuptools-82.0.0.tar.gz", hash = "sha256:22e0a2d69474c6ae4feb01951cb69d515ed23728cf96d05513d36e42b62b37cb"}, ] -[[package]] -name = "shellingham" -version = "1.5.4" -requires_python = ">=3.7" -summary = "Tool to Detect Surrounding Shell" -groups = ["default", "dev"] -files = [ - {file = "shellingham-1.5.4-py2.py3-none-any.whl", hash = "sha256:7ecfff8f2fd72616f7481040475a65b2bf8af90a56c89140852d1120324e8686"}, - {file = "shellingham-1.5.4.tar.gz", hash = "sha256:8dbca0739d487e5bd35ab3ca4b36e11c4078f3a234bfce294b0a0291363404de"}, -] - [[package]] name = "six" version = "1.17.0" @@ -3733,24 +3776,25 @@ files = [ [[package]] name = "transformers" -version = "5.3.0" -requires_python = ">=3.10.0" -summary = "Transformers: the model-definition framework for state-of-the-art machine learning models in text, vision, audio, and multimodal models, for both inference and training." +version = "4.57.6" +requires_python = ">=3.9.0" +summary = "State-of-the-art Machine Learning for JAX, PyTorch and TensorFlow" groups = ["default", "dev"] dependencies = [ - "huggingface-hub<2.0,>=1.3.0", + "filelock", + "huggingface-hub<1.0,>=0.34.0", "numpy>=1.17", "packaging>=20.0", "pyyaml>=5.1", "regex!=2019.12.17", + "requests", "safetensors>=0.4.3", "tokenizers<=0.23.0,>=0.22.0", "tqdm>=4.27", - "typer", ] files = [ - {file = "transformers-5.3.0-py3-none-any.whl", hash = "sha256:50ac8c89c3c7033444fb3f9f53138096b997ebb70d4b5e50a2e810bf12d3d29a"}, - {file = "transformers-5.3.0.tar.gz", hash = "sha256:009555b364029da9e2946d41f1c5de9f15e6b1df46b189b7293f33a161b9c557"}, + {file = "transformers-4.57.6-py3-none-any.whl", hash = "sha256:4c9e9de11333ddfe5114bc872c9f370509198acf0b87a832a0ab9458e2bd0550"}, + {file = "transformers-4.57.6.tar.gz", hash = "sha256:55e44126ece9dc0a291521b7e5492b572e6ef2766338a610b9ab5afbb70689d3"}, ] [[package]] @@ -3789,23 +3833,6 @@ files = [ {file = "triton_windows-3.6.0.post26-cp313-cp313-win_amd64.whl", hash = "sha256:033f3d50c6a0e4539a3ccfa042304dbf76bf79155f382f9c09d010323d5a9a32"}, ] -[[package]] -name = "typer" -version = "0.24.1" -requires_python = ">=3.10" -summary = "Typer, build great CLIs. Easy to code. Based on Python type hints." -groups = ["default", "dev"] -dependencies = [ - "annotated-doc>=0.0.2", - "click>=8.2.1", - "rich>=12.3.0", - "shellingham>=1.3.0", -] -files = [ - {file = "typer-0.24.1-py3-none-any.whl", hash = "sha256:112c1f0ce578bfb4cab9ffdabc68f031416ebcc216536611ba21f04e9aa84c9e"}, - {file = "typer-0.24.1.tar.gz", hash = "sha256:e39b4732d65fbdcde189ae76cf7cd48aeae72919dea1fdfc16593be016256b45"}, -] - [[package]] name = "typing-extensions" version = "4.15.0" @@ -3858,7 +3885,7 @@ name = "urllib3" version = "2.6.3" requires_python = ">=3.9" summary = "HTTP library with thread-safe connection pooling, file post, and more." -groups = ["default", "nb"] +groups = ["default", "dev", "nb"] files = [ {file = "urllib3-2.6.3-py3-none-any.whl", hash = "sha256:bf272323e553dfb2e87d9bfd225ca7b0f467b919d7bbd355436d3fd37cb0acd4"}, {file = "urllib3-2.6.3.tar.gz", hash = "sha256:1b62b6884944a57dbe321509ab94fd4d3b307075e0c2eae991ac71ee15ad38ed"}, diff --git a/pyproject.toml b/pyproject.toml index 2e94072..6436bac 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -157,13 +157,14 @@ dev = [ "pdoc3>=0.11.5", "bump-my-version>=1.1.1", "nox>=2025.2.9", - "huggingface-hub>=1.6.0", + "huggingface-hub<1.0", "tqdm>=4.66.5", "peft>=0.18.1", "einops>=0.8.2", "onnx>=1.20.1", "triton-windows>=3.6.0.post26", "xformers>=0.0.34", + "optimum[onnxruntime]>=2.1.0", ] nb = [ "jupyterlab>=4.3.5",