ONNX optimisations

This commit is contained in:
Florian Förster 2026-04-02 16:28:27 +02:00
parent 809f1a02c4
commit 64b0216c62
3 changed files with 129 additions and 72 deletions

29
ONNX_quantize.ps1 Normal file
View File

@ -0,0 +1,29 @@
$TARGET_PTH = "B:\projects\MOP-TOM\models\models--BAAI--bge-reranker-v2-m3\snapshots\953dc6f6f85a1b2dbfca4c34a2796e7dde08d41e"
$MODEL_NAME = 'BAAI/bge-reranker-v2-m3'
$base = Get-Location
$model_path = Join-Path $base "..\models" -Resolve
$MODEL_EXPORT = Join-Path $model_path "onnx_test"
$MODEL_EXPORT_QUANT = Join-Path $MODEL_EXPORT "quant"
Write-Host "Path to base: $base"
Write-Host "Path to model folder: $model_path"
if (Test-Path -Path $model_path) {
Write-Host "Model directory found"
} else {
Write-Host "Model directory not found: $model_path"
exit
}
$Env:SENTENCE_TRANSFORMERS_HOME = $model_path
$Env:TRANSFORMERS_CACHE = $model_path
$Env:HF_HOME = $model_path
if ($true) {
pdm run optimum-cli export onnx -m $TARGET_PTH --task text-classification --optimize O1 $MODEL_EXPORT
# pdm run optimum-cli export onnx -m $TARGET_PTH --task text-classification $MODEL_EXPORT
}
if ($true) {
pdm run optimum-cli onnxruntime quantize --onnx_model $MODEL_EXPORT -o $MODEL_EXPORT_QUANT --avx2
# pdm run optimum-cli onnxruntime quantize --onnx_model $TARGET_PTH -o $MODEL_EXPORT_QUANT --avx2
}

169
pdm.lock generated
View File

@ -5,7 +5,7 @@
groups = ["default", "data", "dev", "lint", "nb", "tests"]
strategy = ["inherit_metadata"]
lock_version = "4.5.0"
content_hash = "sha256:a00615420189e1aaf1b5740745f49e22f1d24a168c4a4b5eb32f658a83d0c333"
content_hash = "sha256:66d54cb438ad288d6972ddeb9674920ffcc236de9765927820687e21bf95f6bc"
[[metadata.targets]]
requires_python = ">=3.11,<3.14"
@ -30,17 +30,6 @@ files = [
{file = "accelerate-1.13.0.tar.gz", hash = "sha256:d631b4e0f5b3de4aff2d7e9e6857d164810dfc3237d54d017f075122d057b236"},
]
[[package]]
name = "annotated-doc"
version = "0.0.4"
requires_python = ">=3.8"
summary = "Document parameters, class attributes, return types, and variables inline, with Annotated."
groups = ["default", "dev"]
files = [
{file = "annotated_doc-0.0.4-py3-none-any.whl", hash = "sha256:571ac1dc6991c450b25a9c2d84a3705e2ae7a53467b5d111c24fa8baabbed320"},
{file = "annotated_doc-0.0.4.tar.gz", hash = "sha256:fbcda96e87e9c92ad167c2e53839e57503ecfda18804ea28102353485033faa4"},
]
[[package]]
name = "annotated-types"
version = "0.7.0"
@ -60,7 +49,7 @@ name = "anyio"
version = "4.12.1"
requires_python = ">=3.9"
summary = "High-level concurrency and networking framework on top of asyncio or Trio"
groups = ["default", "dev", "nb"]
groups = ["dev", "nb"]
dependencies = [
"exceptiongroup>=1.0.2; python_version < \"3.11\"",
"idna>=2.8",
@ -375,7 +364,7 @@ name = "charset-normalizer"
version = "3.4.4"
requires_python = ">=3.7"
summary = "The Real First Universal Charset Detector. Open, modern and actively maintained alternative to Chardet."
groups = ["nb"]
groups = ["default", "dev", "nb"]
files = [
{file = "charset_normalizer-3.4.4-cp311-cp311-macosx_10_9_universal2.whl", hash = "sha256:6e1fcf0720908f200cd21aa4e6750a48ff6ce4afe7ff5a79a90d5ed8a08296f8"},
{file = "charset_normalizer-3.4.4-cp311-cp311-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:5f819d5fe9234f9f82d75bdfa9aef3a3d72c4d24a6e57aeaebba32a704553aa0"},
@ -450,7 +439,7 @@ name = "click"
version = "8.3.1"
requires_python = ">=3.10"
summary = "Composable command line interface toolkit"
groups = ["default", "dev"]
groups = ["dev"]
dependencies = [
"colorama; platform_system == \"Windows\"",
]
@ -909,7 +898,7 @@ files = [
name = "flatbuffers"
version = "25.12.19"
summary = "The FlatBuffers serialization format for Python"
groups = ["default"]
groups = ["default", "dev"]
files = [
{file = "flatbuffers-25.12.19-py2.py3-none-any.whl", hash = "sha256:7634f50c427838bb021c2d66a3d1168e9d199b0607e6329399f04846d42e20b4"},
]
@ -944,7 +933,7 @@ name = "h11"
version = "0.16.0"
requires_python = ">=3.8"
summary = "A pure-Python, bring-your-own-I/O implementation of HTTP/1.1"
groups = ["default", "dev", "nb"]
groups = ["dev", "nb"]
files = [
{file = "h11-0.16.0-py3-none-any.whl", hash = "sha256:63cf8bbe7522de3bf65932fda1d9c2772064ffb3dae62d55932da54b31cb6c86"},
{file = "h11-0.16.0.tar.gz", hash = "sha256:4e35b956cf45792e4caa5885e69fba00bdbc6ffafbfa020300e549b208ee5ff1"},
@ -956,7 +945,7 @@ version = "1.4.0"
requires_python = ">=3.8"
summary = "Fast transfer of large files with the Hugging Face Hub."
groups = ["default", "dev"]
marker = "platform_machine == \"x86_64\" or platform_machine == \"amd64\" or platform_machine == \"AMD64\" or platform_machine == \"arm64\" or platform_machine == \"aarch64\""
marker = "platform_machine == \"x86_64\" or platform_machine == \"amd64\" or platform_machine == \"arm64\" or platform_machine == \"aarch64\""
files = [
{file = "hf_xet-1.4.0-cp313-cp313t-macosx_10_12_x86_64.whl", hash = "sha256:76725fcbc5f59b23ac778f097d3029d6623e3cf6f4057d99d1fce1a7e3cff8fc"},
{file = "hf_xet-1.4.0-cp313-cp313t-macosx_11_0_arm64.whl", hash = "sha256:76f1f73bee81a6e6f608b583908aa24c50004965358ac92c1dc01080a21bcd09"},
@ -982,7 +971,7 @@ name = "httpcore"
version = "1.0.9"
requires_python = ">=3.8"
summary = "A minimal low-level HTTP client."
groups = ["default", "dev", "nb"]
groups = ["dev", "nb"]
dependencies = [
"certifi",
"h11>=0.16",
@ -997,7 +986,7 @@ name = "httpx"
version = "0.28.1"
requires_python = ">=3.8"
summary = "The next generation HTTP client."
groups = ["default", "dev", "nb"]
groups = ["dev", "nb"]
dependencies = [
"anyio",
"certifi",
@ -1011,24 +1000,23 @@ files = [
[[package]]
name = "huggingface-hub"
version = "1.6.0"
requires_python = ">=3.9.0"
version = "0.36.2"
requires_python = ">=3.8.0"
summary = "Client library to download and publish models, datasets and other repos on the huggingface.co hub"
groups = ["default", "dev"]
dependencies = [
"filelock>=3.10.0",
"filelock",
"fsspec>=2023.5.0",
"hf-xet<2.0.0,>=1.3.2; platform_machine == \"x86_64\" or platform_machine == \"amd64\" or platform_machine == \"AMD64\" or platform_machine == \"arm64\" or platform_machine == \"aarch64\"",
"httpx<1,>=0.23.0",
"hf-xet<2.0.0,>=1.1.3; platform_machine == \"x86_64\" or platform_machine == \"amd64\" or platform_machine == \"arm64\" or platform_machine == \"aarch64\"",
"packaging>=20.9",
"pyyaml>=5.1",
"requests",
"tqdm>=4.42.1",
"typer",
"typing-extensions>=4.1.0",
"typing-extensions>=3.7.4.3",
]
files = [
{file = "huggingface_hub-1.6.0-py3-none-any.whl", hash = "sha256:ef40e2d5cb85e48b2c067020fa5142168342d5108a1b267478ed384ecbf18961"},
{file = "huggingface_hub-1.6.0.tar.gz", hash = "sha256:d931ddad8ba8dfc1e816bf254810eb6f38e5c32f60d4184b5885662a3b167325"},
{file = "huggingface_hub-0.36.2-py3-none-any.whl", hash = "sha256:48f0c8eac16145dfce371e9d2d7772854a4f591bcb56c9cf548accf531d54270"},
{file = "huggingface_hub-0.36.2.tar.gz", hash = "sha256:1934304d2fb224f8afa3b87007d58501acfda9215b334eed53072dd5e815ff7a"},
]
[[package]]
@ -1576,7 +1564,7 @@ name = "markdown-it-py"
version = "4.0.0"
requires_python = ">=3.10"
summary = "Python port of markdown-it. Markdown parsing, done right!"
groups = ["default", "dev"]
groups = ["dev"]
dependencies = [
"mdurl~=0.1",
]
@ -1680,7 +1668,7 @@ name = "mdurl"
version = "0.1.2"
requires_python = ">=3.7"
summary = "Markdown URL utilities"
groups = ["default", "dev"]
groups = ["dev"]
files = [
{file = "mdurl-0.1.2-py3-none-any.whl", hash = "sha256:84008a41e51615a49fc9966191ff91509e3c40b939176e643fd50a5c2196b8f8"},
{file = "mdurl-0.1.2.tar.gz", hash = "sha256:bb413d29f5eea38f31dd4754dd7377d4465116fb207585f97bf925588687c1ba"},
@ -2171,7 +2159,7 @@ name = "onnxruntime"
version = "1.24.4"
requires_python = ">=3.11"
summary = "ONNX Runtime is a runtime accelerator for Machine Learning models"
groups = ["default"]
groups = ["default", "dev"]
dependencies = [
"flatbuffers",
"numpy>=1.21.6",
@ -2199,6 +2187,72 @@ files = [
{file = "onnxruntime-1.24.4-cp313-cp313t-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:4c74e268dc808e61e63784d43f9ddcdaf50a776c2819e8bd1d1b11ef64bf7e36"},
]
[[package]]
name = "optimum"
version = "2.1.0"
requires_python = ">=3.9.0"
summary = "Optimum Library is an extension of the Hugging Face Transformers library, providing a framework to integrate third-party libraries from Hardware Partners and interface with their specific functionality."
groups = ["dev"]
dependencies = [
"huggingface-hub>=0.8.0",
"numpy",
"packaging",
"torch>=1.11",
"transformers>=4.29",
]
files = [
{file = "optimum-2.1.0-py3-none-any.whl", hash = "sha256:bc3af32e1236a9b2c2ca1d27ed9d3ab1b6591e24c6bcd47f9671a8198a30ea88"},
{file = "optimum-2.1.0.tar.gz", hash = "sha256:0a2a13f91500e41d34863ffdb08fcb886b3ce68a84a386e59653e3064a45dd4b"},
]
[[package]]
name = "optimum-onnx"
version = "0.1.0"
requires_python = ">=3.9.0"
summary = "Optimum ONNX is an interface between the Hugging Face libraries and ONNX / ONNX Runtime"
groups = ["dev"]
dependencies = [
"onnx",
"optimum~=2.1.0",
"transformers<4.58.0,>=4.36",
]
files = [
{file = "optimum_onnx-0.1.0-py3-none-any.whl", hash = "sha256:0301ec7a6ec5c77a57581e9970d380a6dc104bdb8f15b282e05af40d829c2eda"},
{file = "optimum_onnx-0.1.0.tar.gz", hash = "sha256:182c54b25eddaded1618af7b58516da34749393a987ec7111f74677f249676f9"},
]
[[package]]
name = "optimum-onnx"
version = "0.1.0"
extras = ["onnxruntime"]
requires_python = ">=3.9.0"
summary = "Optimum ONNX is an interface between the Hugging Face libraries and ONNX / ONNX Runtime"
groups = ["dev"]
dependencies = [
"onnxruntime>=1.18.0",
"optimum-onnx==0.1.0",
]
files = [
{file = "optimum_onnx-0.1.0-py3-none-any.whl", hash = "sha256:0301ec7a6ec5c77a57581e9970d380a6dc104bdb8f15b282e05af40d829c2eda"},
{file = "optimum_onnx-0.1.0.tar.gz", hash = "sha256:182c54b25eddaded1618af7b58516da34749393a987ec7111f74677f249676f9"},
]
[[package]]
name = "optimum"
version = "2.1.0"
extras = ["onnxruntime"]
requires_python = ">=3.9.0"
summary = "Optimum Library is an extension of the Hugging Face Transformers library, providing a framework to integrate third-party libraries from Hardware Partners and interface with their specific functionality."
groups = ["dev"]
dependencies = [
"optimum-onnx[onnxruntime]",
"optimum==2.1.0",
]
files = [
{file = "optimum-2.1.0-py3-none-any.whl", hash = "sha256:bc3af32e1236a9b2c2ca1d27ed9d3ab1b6591e24c6bcd47f9671a8198a30ea88"},
{file = "optimum-2.1.0.tar.gz", hash = "sha256:0a2a13f91500e41d34863ffdb08fcb886b3ce68a84a386e59653e3064a45dd4b"},
]
[[package]]
name = "overrides"
version = "7.7.0"
@ -2689,7 +2743,7 @@ name = "pygments"
version = "2.19.2"
requires_python = ">=3.8"
summary = "Pygments is a syntax highlighting package written in Python."
groups = ["default", "dev", "nb", "tests"]
groups = ["dev", "nb", "tests"]
files = [
{file = "pygments-2.19.2-py3-none-any.whl", hash = "sha256:86540386c03d588bb81d44bc3928634ff26449851e99741617ecb9037ee5ec0b"},
{file = "pygments-2.19.2.tar.gz", hash = "sha256:636cb2477cec7f8952536970bc533bc43743542f70392ae026374600add5b887"},
@ -3050,7 +3104,7 @@ name = "requests"
version = "2.32.5"
requires_python = ">=3.9"
summary = "Python HTTP for Humans."
groups = ["nb"]
groups = ["default", "dev", "nb"]
dependencies = [
"certifi>=2017.4.17",
"charset-normalizer<4,>=2",
@ -3106,7 +3160,7 @@ name = "rich"
version = "14.3.3"
requires_python = ">=3.8.0"
summary = "Render rich text, tables, progress bars, syntax highlighting, markdown and more to the terminal"
groups = ["default", "dev"]
groups = ["dev"]
dependencies = [
"markdown-it-py>=2.2.0",
"pygments<3.0.0,>=2.13.0",
@ -3430,17 +3484,6 @@ files = [
{file = "setuptools-82.0.0.tar.gz", hash = "sha256:22e0a2d69474c6ae4feb01951cb69d515ed23728cf96d05513d36e42b62b37cb"},
]
[[package]]
name = "shellingham"
version = "1.5.4"
requires_python = ">=3.7"
summary = "Tool to Detect Surrounding Shell"
groups = ["default", "dev"]
files = [
{file = "shellingham-1.5.4-py2.py3-none-any.whl", hash = "sha256:7ecfff8f2fd72616f7481040475a65b2bf8af90a56c89140852d1120324e8686"},
{file = "shellingham-1.5.4.tar.gz", hash = "sha256:8dbca0739d487e5bd35ab3ca4b36e11c4078f3a234bfce294b0a0291363404de"},
]
[[package]]
name = "six"
version = "1.17.0"
@ -3733,24 +3776,25 @@ files = [
[[package]]
name = "transformers"
version = "5.3.0"
requires_python = ">=3.10.0"
summary = "Transformers: the model-definition framework for state-of-the-art machine learning models in text, vision, audio, and multimodal models, for both inference and training."
version = "4.57.6"
requires_python = ">=3.9.0"
summary = "State-of-the-art Machine Learning for JAX, PyTorch and TensorFlow"
groups = ["default", "dev"]
dependencies = [
"huggingface-hub<2.0,>=1.3.0",
"filelock",
"huggingface-hub<1.0,>=0.34.0",
"numpy>=1.17",
"packaging>=20.0",
"pyyaml>=5.1",
"regex!=2019.12.17",
"requests",
"safetensors>=0.4.3",
"tokenizers<=0.23.0,>=0.22.0",
"tqdm>=4.27",
"typer",
]
files = [
{file = "transformers-5.3.0-py3-none-any.whl", hash = "sha256:50ac8c89c3c7033444fb3f9f53138096b997ebb70d4b5e50a2e810bf12d3d29a"},
{file = "transformers-5.3.0.tar.gz", hash = "sha256:009555b364029da9e2946d41f1c5de9f15e6b1df46b189b7293f33a161b9c557"},
{file = "transformers-4.57.6-py3-none-any.whl", hash = "sha256:4c9e9de11333ddfe5114bc872c9f370509198acf0b87a832a0ab9458e2bd0550"},
{file = "transformers-4.57.6.tar.gz", hash = "sha256:55e44126ece9dc0a291521b7e5492b572e6ef2766338a610b9ab5afbb70689d3"},
]
[[package]]
@ -3789,23 +3833,6 @@ files = [
{file = "triton_windows-3.6.0.post26-cp313-cp313-win_amd64.whl", hash = "sha256:033f3d50c6a0e4539a3ccfa042304dbf76bf79155f382f9c09d010323d5a9a32"},
]
[[package]]
name = "typer"
version = "0.24.1"
requires_python = ">=3.10"
summary = "Typer, build great CLIs. Easy to code. Based on Python type hints."
groups = ["default", "dev"]
dependencies = [
"annotated-doc>=0.0.2",
"click>=8.2.1",
"rich>=12.3.0",
"shellingham>=1.3.0",
]
files = [
{file = "typer-0.24.1-py3-none-any.whl", hash = "sha256:112c1f0ce578bfb4cab9ffdabc68f031416ebcc216536611ba21f04e9aa84c9e"},
{file = "typer-0.24.1.tar.gz", hash = "sha256:e39b4732d65fbdcde189ae76cf7cd48aeae72919dea1fdfc16593be016256b45"},
]
[[package]]
name = "typing-extensions"
version = "4.15.0"
@ -3858,7 +3885,7 @@ name = "urllib3"
version = "2.6.3"
requires_python = ">=3.9"
summary = "HTTP library with thread-safe connection pooling, file post, and more."
groups = ["default", "nb"]
groups = ["default", "dev", "nb"]
files = [
{file = "urllib3-2.6.3-py3-none-any.whl", hash = "sha256:bf272323e553dfb2e87d9bfd225ca7b0f467b919d7bbd355436d3fd37cb0acd4"},
{file = "urllib3-2.6.3.tar.gz", hash = "sha256:1b62b6884944a57dbe321509ab94fd4d3b307075e0c2eae991ac71ee15ad38ed"},

View File

@ -157,13 +157,14 @@ dev = [
"pdoc3>=0.11.5",
"bump-my-version>=1.1.1",
"nox>=2025.2.9",
"huggingface-hub>=1.6.0",
"huggingface-hub<1.0",
"tqdm>=4.66.5",
"peft>=0.18.1",
"einops>=0.8.2",
"onnx>=1.20.1",
"triton-windows>=3.6.0.post26",
"xformers>=0.0.34",
"optimum[onnxruntime]>=2.1.0",
]
nb = [
"jupyterlab>=4.3.5",