{
    "version": "b9010",
    "description": "Inference of LLaMA model in pure C/C++ (CUDA 12.4 version)",
    "homepage": "https://github.com/ggml-org/llama.cpp",
    "license": "MIT",
    "architecture": {
        "64bit": {
            "url": "https://github.com/ggml-org/llama.cpp/releases/download/b9010/llama-b9010-bin-win-cuda-12.4-x64.zip",
            "hash": "411c4d17a6505c210f4b977450420f630fbe7d9db7942dea809f077976968ef9"
        }
    },
    "bin": [
        "llama-batched-bench.exe",
        "llama-bench.exe",
        "llama-cli.exe",
        "llama-completion.exe",
        "llama-fit-params.exe",
        "llama-gemma3-cli.exe",
        "llama-gguf-split.exe",
        "llama-imatrix.exe",
        "llama-llava-cli.exe",
        "llama-minicpmv-cli.exe",
        "llama-mtmd-cli.exe",
        "llama-perplexity.exe",
        "llama-quantize.exe",
        "llama-qwen2vl-cli.exe",
        "llama-server.exe",
        "llama-tokenize.exe",
        "llama-tts.exe"
    ],
    "persist": "models",
    "checkver": {
        "github": "https://github.com/ggml-org/llama.cpp",
        "regex": "/releases/tag/(b[\\d]+)"
    },
    "autoupdate": {
        "architecture": {
            "64bit": {
                "url": "https://github.com/ggml-org/llama.cpp/releases/download/$version/llama-$version-bin-win-cuda-12.4-x64.zip"
            }
        }
    }
}