llama-cpp-wheels/build_llama-cpp-python.bat

@echo off
SETLOCAL EnableDelayedExpansion

:: path to VS Build Tools (example for VS 2022 Community) (otherwise not found in default terminal)
set "VS_PATH=C:\Program Files\Microsoft Visual Studio\2022\Community\VC\Auxiliary\Build\vcvars64.bat"

if exist "%VS_PATH%" (
    call "%VS_PATH%"
) else (
    echo [X] Setup script for Visual Studio env variables could not be found!
    pause
    exit /b
)

:: --- Configuration ---
set "REPO_URL=https://github.com/abetlen/llama-cpp-python.git"
set "SOURCE_DIR=llama_cpp_python_src"
set "WHEEL_DIR=.\dist"

:: Optional: specify CUDA architecture (e.g. "89" für RTX 1000 Ada Laptop GPU)
:: "native" architecture only possible if device is identified properly
:: see https://developer.nvidia.com/cuda/gpus for compute compatibility
:: set "CMAKE_ARGS=-DGGML_CUDA=on -DCMAKE_CUDA_ARCHITECTURES=native"
set "CMAKE_ARGS=-DGGML_CUDA=on -DCMAKE_CUDA_ARCHITECTURES='89'"
set "FORCE_CMAKE=1"
set "CMAKE_BUILD_PARALLEL_LEVEL=4"
set "MAX_JOBS=4"

echo [*] Check source directory...

if exist "%SOURCE_DIR%" (
    echo [*] Directory exists. Update repository...
    cd "%SOURCE_DIR%"
    git fetch --all
    git reset --hard origin/main
    git pull origin main
    :: update submodules (contains the actual llama.cpp)
    git submodule update --init --recursive
) else (
    echo [*] Clone repository...
    git clone --recurse-submodules %REPO_URL% %SOURCE_DIR%
    cd "%SOURCE_DIR%"
)

echo [*] Start compilation und wheel generation...

:: build wheel from current directory (.)
@REM pdm run -p ..\ python --version
@REM pdm run -p ..\ python -m pip --version
@REM pause
@REM exit /b

pdm run -p ..\ python -m pip wheel . --wheel-dir="..\%WHEEL_DIR%" --no-cache-dir --no-deps -vv

if %ERRORLEVEL% EQU 0 (
    echo.
    echo [!] SUCCESS: Wheel with CUDA support placed in: %WHEEL_DIR%
    dir "..\%WHEEL_DIR%\llama_cpp_python*.whl"
) else (
    echo.
    echo [X] ERROR: Build process failed.
)

cd ..
pause
ENDLOCAL