CI : Enable CUDA and Vulkan ARM64 runners and fix CI/CD (#21122)

* CI: Enable CUDA and Vulkan ARM64 runners and fix CI/CD Co-authored-by: Ts-sound <44093942+Ts-sound@users.noreply.github.com> * Obtain source tag name from git tag Co-authored-by: Sigbjørn Skjæret <sigbjorn.skjaeret@scala.com> --------- Co-authored-by: Ts-sound <44093942+Ts-sound@users.noreply.github.com> Co-authored-by: Sigbjørn Skjæret <sigbjorn.skjaeret@scala.com>
2026-03-31 03:24:37 +09:00
parent ead417f01c
commit 84ae8434d0
10 changed files with 436 additions and 205 deletions
@@ -36,7 +36,7 @@ RUN mkdir -p /app/full \
 FROM ubuntu:$UBUNTU_VERSION AS base
 RUN apt-get update \
-    && apt-get install -y libgomp1 curl\
+    && apt-get install -y libgomp1 curl \
    && apt autoremove -y \
    && apt clean -y \
    && rm -rf /tmp/* /var/tmp/* \
@@ -1,6 +1,6 @@
 ARG UBUNTU_VERSION=24.04
 # This needs to generally match the container host's environment.
-ARG CUDA_VERSION=13.1.0
+ARG CUDA_VERSION=13.1.1
 # Target the CUDA build image
 ARG BASE_CUDA_DEV_CONTAINER=nvidia/cuda:${CUDA_VERSION}-devel-ubuntu${UBUNTU_VERSION}
@@ -12,7 +12,9 @@ FROM ${BASE_CUDA_DEV_CONTAINER} AS build
 ARG CUDA_DOCKER_ARCH=default
 RUN apt-get update && \
-    apt-get install -y build-essential cmake python3 python3-pip git libssl-dev libgomp1
+    apt-get install -y gcc-14 g++-14 build-essential cmake python3 python3-pip git libssl-dev libgomp1
 ENV CC=gcc-14 CXX=g++-14 CUDAHOSTCXX=g++-14
 WORKDIR /app
@@ -39,7 +41,7 @@ RUN mkdir -p /app/full \
 FROM ${BASE_CUDA_RUN_CONTAINER} AS base
 RUN apt-get update \
-    && apt-get install -y libgomp1 curl\
+    && apt-get install -y libgomp1 curl \
    && apt autoremove -y \
    && apt clean -y \
    && rm -rf /tmp/* /var/tmp/* \
@@ -1,6 +1,6 @@
-ARG UBUNTU_VERSION=22.04
+ARG UBUNTU_VERSION=24.04
 # This needs to generally match the container host's environment.
-ARG CUDA_VERSION=12.4.0
+ARG CUDA_VERSION=12.8.1
 # Target the CUDA build image
 ARG BASE_CUDA_DEV_CONTAINER=nvidia/cuda:${CUDA_VERSION}-devel-ubuntu${UBUNTU_VERSION}
@@ -12,7 +12,9 @@ FROM ${BASE_CUDA_DEV_CONTAINER} AS build
 ARG CUDA_DOCKER_ARCH=default
 RUN apt-get update && \
-    apt-get install -y build-essential cmake python3 python3-pip git libssl-dev libgomp1
+    apt-get install -y gcc-14 g++-14 build-essential cmake python3 python3-pip git libssl-dev libgomp1
 ENV CC=gcc-14 CXX=g++-14 CUDAHOSTCXX=g++-14
 WORKDIR /app
@@ -39,7 +41,7 @@ RUN mkdir -p /app/full \
 FROM ${BASE_CUDA_RUN_CONTAINER} AS base
 RUN apt-get update \
-    && apt-get install -y libgomp1 curl\
+    && apt-get install -y libgomp1 curl \
    && apt autoremove -y \
    && apt clean -y \
    && rm -rf /tmp/* /var/tmp/* \
@@ -60,7 +62,8 @@ RUN apt-get update \
    git \
    python3 \
    python3-pip \
-    && pip install --upgrade pip setuptools wheel \
+    python3-wheel \
    && pip install --break-system-packages --upgrade setuptools \
    && pip install --break-system-packages -r requirements.txt \
    && apt autoremove -y \
    && apt clean -y \
@@ -51,7 +51,7 @@ RUN mkdir /tmp/neo/ && cd /tmp/neo/ \
  && dpkg --install *.deb
 RUN apt-get update \
-    && apt-get install -y libgomp1 curl\
+    && apt-get install -y libgomp1 curl \
    && apt autoremove -y \
    && apt clean -y \
    && rm -rf /tmp/* /var/tmp/* \
@@ -46,7 +46,7 @@ RUN mkdir -p /app/full \
 FROM ${BASE_MUSA_RUN_CONTAINER} AS base
 RUN apt-get update \
-    && apt-get install -y libgomp1 curl\
+    && apt-get install -y libgomp1 curl \
    && apt autoremove -y \
    && apt clean -y \
    && rm -rf /tmp/* /var/tmp/* \
@@ -78,7 +78,7 @@ ARG http_proxy
 ARG https_proxy
 RUN apt-get update \
-    && apt-get install -y libgomp1 libtbb12 curl\
+    && apt-get install -y libgomp1 libtbb12 curl \
    && apt autoremove -y \
    && apt clean -y \
    && rm -rf /tmp/* /var/tmp/* \
@@ -58,7 +58,7 @@ RUN mkdir -p /app/full \
 FROM ${BASE_ROCM_DEV_CONTAINER} AS base
 RUN apt-get update \
-    && apt-get install -y libgomp1 curl\
+    && apt-get install -y libgomp1 curl \
    && apt autoremove -y \
    && apt clean -y \
    && rm -rf /tmp/* /var/tmp/* \
@@ -79,7 +79,7 @@ RUN apt-get update \
    git \
    python3-pip \
    python3 \
-    python3-wheel\
+    python3-wheel \
    && pip install --break-system-packages --upgrade setuptools \
    && pip install --break-system-packages -r requirements.txt \
    && apt autoremove -y \
@@ -49,17 +49,20 @@ COPY --from=build /app/full /app
 WORKDIR /app
 ENV PATH="/root/.venv/bin:/root/.local/bin:${PATH}"
 # Flag for compatibility with pip
 ARG UV_INDEX_STRATEGY="unsafe-best-match"
 RUN apt-get update \
    && apt-get install -y \
    build-essential \
    curl \
    git \
-    python3.13 \
+    ca-certificates \
-    python3.13-dev \
+    && curl -LsSf https://astral.sh/uv/install.sh | sh \
-    python3-pip \
+    && uv python install 3.13 \
-    python3-wheel \
+    && uv venv --python 3.13 /root/.venv \
-    && update-alternatives --install /usr/bin/python3 python3 /usr/bin/python3.13 100 \
+    && uv pip install --python /root/.venv/bin/python -r requirements.txt \
    && pip install --break-system-packages --upgrade setuptools \
    && pip install --break-system-packages -r requirements.txt \
    && apt autoremove -y \
    && apt clean -y \
    && rm -rf /tmp/* /var/tmp/* \
@@ -25,184 +25,13 @@ permissions:
  packages: write
 jobs:
  push_to_registry:
    name: Push Docker image to Docker Hub
    runs-on: ${{ matrix.config.runs_on }}
    env:
      COMMIT_SHA: ${{ github.sha }}
    strategy:
      fail-fast: false
      matrix:
        config:
          # Multi-stage build
          - { tag: "cpu", dockerfile: ".devops/cpu.Dockerfile", platforms: "linux/arm64", full: true, light: true, server: true, free_disk_space: false, runs_on: "ubuntu-24.04" }
          - { tag: "cpu", dockerfile: ".devops/cpu.Dockerfile", platforms: "linux/amd64", full: true, light: true, server: true, free_disk_space: false, runs_on: "ubuntu-24.04" }
          - { tag: "cuda cuda12", dockerfile: ".devops/cuda.Dockerfile", platforms: "linux/amd64", full: true, light: true, server: true, free_disk_space: true,  runs_on: "ubuntu-24.04", cuda_version: "12.4.0", ubuntu_version: "22.04" }
          - { tag: "cuda13", dockerfile: ".devops/cuda-new.Dockerfile",  platforms: "linux/amd64", full: true, light: true, server: true, free_disk_space: true,  runs_on: "ubuntu-24.04", cuda_version: "13.1.0", ubuntu_version: "24.04" }
          - { tag: "musa",   dockerfile: ".devops/musa.Dockerfile",   platforms: "linux/amd64", full: true, light: true, server: true, free_disk_space: true,  runs_on: "ubuntu-24.04" }
          - { tag: "intel",  dockerfile: ".devops/intel.Dockerfile",  platforms: "linux/amd64", full: true, light: true, server: true, free_disk_space: true,  runs_on: "ubuntu-24.04" }
          - { tag: "vulkan", dockerfile: ".devops/vulkan.Dockerfile", platforms: "linux/amd64", full: true, light: true, server: true, free_disk_space: false, runs_on: "ubuntu-24.04" }
          - { tag: "s390x",  dockerfile: ".devops/s390x.Dockerfile",  platforms: "linux/s390x", full: true, light: true, server: true, free_disk_space: false, runs_on: "ubuntu-24.04-s390x" }
          - { tag: "rocm",   dockerfile: ".devops/rocm.Dockerfile",   platforms: "linux/amd64", full: true, light: true, server: true, free_disk_space: true,  runs_on: "ubuntu-24.04" }
          - { tag: "openvino", dockerfile: ".devops/openvino.Dockerfile", platforms: "linux/amd64", full: true, light: true, server: true, free_disk_space: false, runs_on: "ubuntu-24.04" }
    steps:
      - name: Check out the repo
        uses: actions/checkout@v6
        with:
          fetch-depth: 0 # preserve git history, so we can determine the build number
      - name: Set up QEMU
        if: ${{ matrix.config.tag != 's390x' }}
        uses: docker/setup-qemu-action@c7c53464625b32c7a7e944ae62b3e17d2b600130 # v3
        with:
          image: tonistiigi/binfmt:qemu-v10.2.1
      - name: Set up Docker Buildx
        uses: docker/setup-buildx-action@8d2750c68a42422c14e847fe6c8ac0403b4cbd6f # v3
      - name: Log in to Docker Hub
        uses: docker/login-action@c94ce9fb468520275223c153574b00df6fe4bcc9 # v3
        with:
          registry: ghcr.io
          username: ${{ github.repository_owner }}
          password: ${{ secrets.GITHUB_TOKEN }}
      - name: Determine source tag name
        id: srctag
        uses: ./.github/actions/get-tag-name
        env:
          BRANCH_NAME: ${{ github.head_ref || github.ref_name }}
      - name: Determine image tag name
        id: tag
        shell: bash
        run: |
          REPO_OWNER="${GITHUB_REPOSITORY_OWNER@L}"  # to lower case
          REPO_NAME="${{ github.event.repository.name }}"
          PREFIX="ghcr.io/${REPO_OWNER}/${REPO_NAME}:"
          # list all tags possible
          tags="${{ matrix.config.tag }}"
          for tag in $tags; do
              if [[ "$tag" == "cpu" ]]; then
                  TYPE=""
              else
                  TYPE="-$tag"
              fi
              CACHETAGS="${PREFIX}buildcache${TYPE}"
              FULLTAGS="${FULLTAGS:+$FULLTAGS,}${PREFIX}full${TYPE},${PREFIX}full${TYPE}-${{ steps.srctag.outputs.name }}"
              LIGHTTAGS="${LIGHTTAGS:+$LIGHTTAGS,}${PREFIX}light${TYPE},${PREFIX}light${TYPE}-${{ steps.srctag.outputs.name }}"
              SERVERTAGS="${SERVERTAGS:+$SERVERTAGS,}${PREFIX}server${TYPE},${PREFIX}server${TYPE}-${{ steps.srctag.outputs.name }}"
          done
          echo "cache_output_tags=$CACHETAGS" >> $GITHUB_OUTPUT
          echo "full_output_tags=$FULLTAGS" >> $GITHUB_OUTPUT
          echo "light_output_tags=$LIGHTTAGS" >> $GITHUB_OUTPUT
          echo "server_output_tags=$SERVERTAGS" >> $GITHUB_OUTPUT
          echo "cache_output_tags=$CACHETAGS"  # print out for debugging
          echo "full_output_tags=$FULLTAGS"  # print out for debugging
          echo "light_output_tags=$LIGHTTAGS"  # print out for debugging
          echo "server_output_tags=$SERVERTAGS"  # print out for debugging
        env:
          GITHUB_REPOSITORY_OWNER: '${{ github.repository_owner }}'
      - name: Free Disk Space (Ubuntu)
        if: ${{ matrix.config.free_disk_space == true }}
        uses: ggml-org/free-disk-space@v1.3.1
        with:
          # this might remove tools that are actually needed,
          # if set to "true" but frees about 6 GB
          tool-cache: false
          # all of these default to true, but feel free to set to
          # "false" if necessary for your workflow
          android: true
          dotnet: true
          haskell: true
          large-packages: true
          docker-images: true
          swap-storage: true
      - name: Build and push Full Docker image (tagged + versioned)
        if: ${{ (github.event_name == 'push' || github.event_name == 'schedule' || github.event_name == 'workflow_dispatch') && matrix.config.full == true }}
        uses: docker/build-push-action@10e90e3645eae34f1e60eeb005ba3a3d33f178e8 # v6
        with:
          context: .
          push: true
          platforms: ${{ matrix.config.platforms }}
          # tag list is generated from step above
          tags: ${{ steps.tag.outputs.full_output_tags }}
          file: ${{ matrix.config.dockerfile }}
          target: full
          provenance: false
          build-args: |
            ${{ matrix.config.ubuntu_version && format('UBUNTU_VERSION={0}', matrix.config.ubuntu_version) || '' }}
            ${{ matrix.config.cuda_version && format('CUDA_VERSION={0}', matrix.config.cuda_version) || '' }}
          # using github experimental cache
          #cache-from: type=gha
          #cache-to: type=gha,mode=max
          # return to this if the experimental github cache is having issues
          #cache-to: type=local,dest=/tmp/.buildx-cache
          #cache-from: type=local,src=/tmp/.buildx-cache
          # using registry cache (no storage limit)
          cache-from: type=registry,ref=${{ steps.tag.outputs.cache_output_tags }}
          cache-to: type=registry,ref=${{ steps.tag.outputs.cache_output_tags }},mode=max
      - name: Build and push Light Docker image (tagged + versioned)
        if: ${{ (github.event_name == 'push' || github.event_name == 'schedule' || github.event_name == 'workflow_dispatch') && matrix.config.light == true }}
        uses: docker/build-push-action@10e90e3645eae34f1e60eeb005ba3a3d33f178e8 # v6
        with:
          context: .
          push: true
          platforms: ${{ matrix.config.platforms }}
          # tag list is generated from step above
          tags: ${{ steps.tag.outputs.light_output_tags }}
          file: ${{ matrix.config.dockerfile }}
          target: light
          provenance: false
          build-args: |
            ${{ matrix.config.ubuntu_version && format('UBUNTU_VERSION={0}', matrix.config.ubuntu_version) || '' }}
            ${{ matrix.config.cuda_version && format('CUDA_VERSION={0}', matrix.config.cuda_version) || '' }}
          # using github experimental cache
          #cache-from: type=gha
          #cache-to: type=gha,mode=max
          # return to this if the experimental github cache is having issues
          #cache-to: type=local,dest=/tmp/.buildx-cache
          #cache-from: type=local,src=/tmp/.buildx-cache
          # using registry cache (no storage limit)
          cache-from: type=registry,ref=${{ steps.tag.outputs.cache_output_tags }}
          cache-to: type=registry,ref=${{ steps.tag.outputs.cache_output_tags }},mode=max
      - name: Build and push Server Docker image (tagged + versioned)
        if: ${{ (github.event_name == 'push' || github.event_name == 'schedule' || github.event_name == 'workflow_dispatch') && matrix.config.server == true }}
        uses: docker/build-push-action@10e90e3645eae34f1e60eeb005ba3a3d33f178e8 # v6
        with:
          context: .
          push: true
          platforms: ${{ matrix.config.platforms }}
          # tag list is generated from step above
          tags: ${{ steps.tag.outputs.server_output_tags }}
          file: ${{ matrix.config.dockerfile }}
          target: server
          provenance: false
          build-args: |
            ${{ matrix.config.ubuntu_version && format('UBUNTU_VERSION={0}', matrix.config.ubuntu_version) || '' }}
            ${{ matrix.config.cuda_version && format('CUDA_VERSION={0}', matrix.config.cuda_version) || '' }}
          # using github experimental cache
          #cache-from: type=gha
          #cache-to: type=gha,mode=max
          # return to this if the experimental github cache is having issues
          #cache-to: type=local,dest=/tmp/.buildx-cache
          #cache-from: type=local,src=/tmp/.buildx-cache
          # using registry cache (no storage limit)
          cache-from: type=registry,ref=${{ steps.tag.outputs.cache_output_tags }}
          cache-to: type=registry,ref=${{ steps.tag.outputs.cache_output_tags }},mode=max
  create_tag:
    name: Create and push git tag
-    runs-on: ubuntu-22.04
+    runs-on: ubuntu-slim
    permissions:
      contents: write
    outputs:
      source_tag: ${{ steps.srctag.outputs.name }}
    steps:
      - name: Clone
@@ -223,3 +52,391 @@ jobs:
        run: |
          git tag ${{ steps.srctag.outputs.name }} || exit 0
          git push origin ${{ steps.srctag.outputs.name }} || exit 0
  prepare_matrices:
    name: Prepare Docker matrices
    runs-on: ubuntu-24.04
    outputs:
      build_matrix: ${{ steps.matrices.outputs.build_matrix }}
      merge_matrix: ${{ steps.matrices.outputs.merge_matrix }}
    steps:
      - name: Generate build and merge matrices
        id: matrices
        shell: bash
        run: |
          set -euo pipefail
          # Keep all build targets in one place and derive merge targets from it.
          cat > build-matrix.json <<'JSON'
          [
            { "tag": "cpu", "dockerfile": ".devops/cpu.Dockerfile", "platforms": "linux/amd64", "full": true, "light": true, "server": true, "free_disk_space": false, "runs_on": "ubuntu-24.04" },
            { "tag": "cpu", "dockerfile": ".devops/cpu.Dockerfile", "platforms": "linux/arm64", "full": true, "light": true, "server": true, "free_disk_space": false, "runs_on": "ubuntu-24.04-arm" },
            { "tag": "cpu", "dockerfile": ".devops/s390x.Dockerfile", "platforms": "linux/s390x", "full": true, "light": true, "server": true, "free_disk_space": false, "runs_on": "ubuntu-24.04-s390x" },
            { "tag": "cuda cuda12", "dockerfile": ".devops/cuda.Dockerfile", "platforms": "linux/amd64", "full": true, "light": true, "server": true, "free_disk_space": true, "runs_on": "ubuntu-24.04" },
            { "tag": "cuda cuda12", "dockerfile": ".devops/cuda.Dockerfile", "platforms": "linux/arm64", "full": true, "light": true, "server": true, "free_disk_space": true, "runs_on": "ubuntu-24.04-arm" },
            { "tag": "cuda13", "dockerfile": ".devops/cuda-new.Dockerfile", "platforms": "linux/amd64", "full": true, "light": true, "server": true, "free_disk_space": true, "runs_on": "ubuntu-24.04" },
            { "tag": "cuda13", "dockerfile": ".devops/cuda-new.Dockerfile", "platforms": "linux/arm64", "full": true, "light": true, "server": true, "free_disk_space": true, "runs_on": "ubuntu-24.04-arm" },
            { "tag": "musa", "dockerfile": ".devops/musa.Dockerfile", "platforms": "linux/amd64", "full": true, "light": true, "server": true, "free_disk_space": true, "runs_on": "ubuntu-24.04" },
            { "tag": "intel", "dockerfile": ".devops/intel.Dockerfile", "platforms": "linux/amd64", "full": true, "light": true, "server": true, "free_disk_space": true, "runs_on": "ubuntu-24.04" },
            { "tag": "vulkan", "dockerfile": ".devops/vulkan.Dockerfile", "platforms": "linux/amd64", "full": true, "light": true, "server": true, "free_disk_space": false, "runs_on": "ubuntu-24.04" },
            { "tag": "vulkan", "dockerfile": ".devops/vulkan.Dockerfile", "platforms": "linux/arm64", "full": true, "light": true, "server": true, "free_disk_space": false, "runs_on": "ubuntu-24.04-arm" },
            { "tag": "rocm", "dockerfile": ".devops/rocm.Dockerfile", "platforms": "linux/amd64", "full": true, "light": true, "server": true, "free_disk_space": true, "runs_on": "ubuntu-24.04" },
            { "tag": "openvino", "dockerfile": ".devops/openvino.Dockerfile", "platforms": "linux/amd64", "full": true, "light": true, "server": true, "free_disk_space": false, "runs_on": "ubuntu-24.04" }
          ]
          JSON
          BUILD_MATRIX="$(jq -c . build-matrix.json)"
          MERGE_MATRIX="$(jq -c '
            reduce .[] as $entry ({}; .[$entry.tag] |= (
              . // {
                tag: $entry.tag,
                arches: [],
                full: false,
                light: false,
                server: false
              }
              | .full = (.full or ($entry.full // false))
              | .light = (.light or ($entry.light // false))
              | .server = (.server or ($entry.server // false))
              | .arches += [($entry.platforms | sub("^linux/"; ""))]
            ))
            # Backward compatibility: s390x tags are aliases of cpu for the linux/s390x platform.
            | if (has("cpu") and (((.cpu.arches // []) | index("s390x")) != null)) then
                . + {
                  s390x: {
                    tag: "s390x",
                    arches: ["s390x"],
                    full: .cpu.full,
                    light: .cpu.light,
                    server: .cpu.server
                  }
                }
              else
                .
              end
            | [.[] | .arches = (.arches | unique | sort | join(" "))]
          ' build-matrix.json)"
          echo "build_matrix=$BUILD_MATRIX" >> "$GITHUB_OUTPUT"
          echo "merge_matrix=$MERGE_MATRIX" >> "$GITHUB_OUTPUT"
  push_to_registry:
    name: Push Docker image to Docker Registry
    needs: [prepare_matrices, create_tag]
    runs-on: ${{ matrix.config.runs_on }}
    strategy:
      fail-fast: false
      matrix:
        config: ${{ fromJSON(needs.prepare_matrices.outputs.build_matrix) }}
    steps:
      - name: Check out the repo
        uses: actions/checkout@v6
        with:
          fetch-depth: 0
          ref: ${{ needs.create_tag.outputs.source_tag }}
      - name: Set up QEMU
        if: ${{ contains(matrix.config.platforms, 'linux/amd64') }}
        uses: docker/setup-qemu-action@ce360397dd3f832beb865e1373c09c0e9f86d70a # v4
        with:
          image: tonistiigi/binfmt:qemu-v10.2.1
      - name: Set up Docker Buildx
        uses: docker/setup-buildx-action@4d04d5d9486b7bd6fa91e7baf45bbb4f8b9deedd # v4
      - name: Log in to Docker Registry
        uses: docker/login-action@b45d80f862d83dbcd57f89517bcf500b2ab88fb2 # v4
        with:
          registry: ghcr.io
          username: ${{ github.repository_owner }}
          password: ${{ secrets.GITHUB_TOKEN }}
      - name: Determine image metadata
        id: meta
        shell: bash
        run: |
          set -euo pipefail
          REPO_OWNER="${GITHUB_REPOSITORY_OWNER@L}"  # to lower case
          REPO_NAME="${{ github.event.repository.name }}"
          IMAGE_REPO="ghcr.io/${REPO_OWNER}/${REPO_NAME}"
          PREFIX="${IMAGE_REPO}:"
          PLATFORM="${{ matrix.config.platforms }}"
          ARCH_SUFFIX="${PLATFORM#linux/}"
          # list all tags possible
          tags="${{ matrix.config.tag }}"
          for tag in $tags; do
              if [[ "$tag" == "cpu" ]]; then
                  TYPE=""
              else
                  TYPE="-$tag"
              fi
              CACHETAG="${PREFIX}buildcache${TYPE}-${ARCH_SUFFIX}"
          done
          SAFE_TAGS="$(echo "$tags" | tr ' ' '_')"
          echo "image_repo=$IMAGE_REPO" >> $GITHUB_OUTPUT
          echo "arch_suffix=$ARCH_SUFFIX" >> $GITHUB_OUTPUT
          echo "cache_output_tag=$CACHETAG" >> $GITHUB_OUTPUT
          echo "digest_artifact_suffix=${SAFE_TAGS}-${ARCH_SUFFIX}" >> $GITHUB_OUTPUT
          echo "cache_output_tag=$CACHETAG"  # print out for debugging
        env:
          GITHUB_REPOSITORY_OWNER: '${{ github.repository_owner }}'
      - name: Free Disk Space (Ubuntu)
        if: ${{ matrix.config.free_disk_space == true }}
        uses: ggml-org/free-disk-space@v1.3.1
        with:
          # this might remove tools that are actually needed,
          # if set to "true" but frees about 6 GB
          tool-cache: false
          # all of these default to true, but feel free to set to
          # "false" if necessary for your workflow
          android: true
          dotnet: true
          haskell: true
          large-packages: true
          docker-images: true
          swap-storage: true
      - name: Build and push Full Docker image by digest
        id: build_full
        if: ${{ (github.event_name == 'push' || github.event_name == 'schedule' || github.event_name == 'workflow_dispatch') && matrix.config.full == true }}
        uses: docker/build-push-action@d08e5c354a6adb9ed34480a06d141179aa583294 # v7
        with:
          context: .
          platforms: ${{ matrix.config.platforms }}
          outputs: type=image,name=${{ steps.meta.outputs.image_repo }},push-by-digest=true,name-canonical=true,push=true
          file: ${{ matrix.config.dockerfile }}
          target: full
          provenance: false
          build-args: |
            ${{ matrix.config.ubuntu_version && format('UBUNTU_VERSION={0}', matrix.config.ubuntu_version) || '' }}
            ${{ matrix.config.cuda_version && format('CUDA_VERSION={0}', matrix.config.cuda_version) || '' }}
          # using github experimental cache
          #cache-from: type=gha
          #cache-to: type=gha,mode=max
          # return to this if the experimental github cache is having issues
          #cache-to: type=local,dest=/tmp/.buildx-cache
          #cache-from: type=local,src=/tmp/.buildx-cache
          # using registry cache (no storage limit)
          cache-from: type=registry,ref=${{ steps.meta.outputs.cache_output_tag }}
          cache-to: type=registry,ref=${{ steps.meta.outputs.cache_output_tag }},mode=max
      - name: Build and push Light Docker image by digest
        id: build_light
        if: ${{ (github.event_name == 'push' || github.event_name == 'schedule' || github.event_name == 'workflow_dispatch') && matrix.config.light == true }}
        uses: docker/build-push-action@d08e5c354a6adb9ed34480a06d141179aa583294 # v7
        with:
          context: .
          platforms: ${{ matrix.config.platforms }}
          outputs: type=image,name=${{ steps.meta.outputs.image_repo }},push-by-digest=true,name-canonical=true,push=true
          file: ${{ matrix.config.dockerfile }}
          target: light
          provenance: false
          build-args: |
            ${{ matrix.config.ubuntu_version && format('UBUNTU_VERSION={0}', matrix.config.ubuntu_version) || '' }}
            ${{ matrix.config.cuda_version && format('CUDA_VERSION={0}', matrix.config.cuda_version) || '' }}
          # using github experimental cache
          #cache-from: type=gha
          #cache-to: type=gha,mode=max
          # return to this if the experimental github cache is having issues
          #cache-to: type=local,dest=/tmp/.buildx-cache
          #cache-from: type=local,src=/tmp/.buildx-cache
          # using registry cache (no storage limit)
          cache-from: type=registry,ref=${{ steps.meta.outputs.cache_output_tag }}
          cache-to: type=registry,ref=${{ steps.meta.outputs.cache_output_tag }},mode=max
      - name: Build and push Server Docker image by digest
        id: build_server
        if: ${{ (github.event_name == 'push' || github.event_name == 'schedule' || github.event_name == 'workflow_dispatch') && matrix.config.server == true }}
        uses: docker/build-push-action@d08e5c354a6adb9ed34480a06d141179aa583294 # v7
        with:
          context: .
          platforms: ${{ matrix.config.platforms }}
          outputs: type=image,name=${{ steps.meta.outputs.image_repo }},push-by-digest=true,name-canonical=true,push=true
          file: ${{ matrix.config.dockerfile }}
          target: server
          provenance: false
          build-args: |
            ${{ matrix.config.ubuntu_version && format('UBUNTU_VERSION={0}', matrix.config.ubuntu_version) || '' }}
            ${{ matrix.config.cuda_version && format('CUDA_VERSION={0}', matrix.config.cuda_version) || '' }}
          # using github experimental cache
          #cache-from: type=gha
          #cache-to: type=gha,mode=max
          # return to this if the experimental github cache is having issues
          #cache-to: type=local,dest=/tmp/.buildx-cache
          #cache-from: type=local,src=/tmp/.buildx-cache
          # using registry cache (no storage limit)
          cache-from: type=registry,ref=${{ steps.meta.outputs.cache_output_tag }}
          cache-to: type=registry,ref=${{ steps.meta.outputs.cache_output_tag }},mode=max
      - name: Export digest metadata
        shell: bash
        run: |
            set -euo pipefail
            TAGS="${{ matrix.config.tag }}"
            ARCH_SUFFIX="${{ steps.meta.outputs.arch_suffix }}"
            DIGEST_FILE="/tmp/digests/${{ steps.meta.outputs.digest_artifact_suffix }}.tsv"
            mkdir -p /tmp/digests
            add_digest_rows() {
                local image_type="$1"
                local digest="$2"
                if [[ -z "$digest" ]]; then
                  echo "Missing digest for image_type=${image_type}" >&2
                  exit 1
                fi
                for tag in $TAGS; do
                    printf '%s\t%s\t%s\t%s\n' "$tag" "$ARCH_SUFFIX" "$image_type" "$digest" >> "$DIGEST_FILE"
                done
            }
            if [[ "${{ matrix.config.full }}" == "true" ]]; then
                add_digest_rows "full" "${{ steps.build_full.outputs.digest }}"
            fi
            if [[ "${{ matrix.config.light }}" == "true" ]]; then
                add_digest_rows "light" "${{ steps.build_light.outputs.digest }}"
            fi
            if [[ "${{ matrix.config.server }}" == "true" ]]; then
                add_digest_rows "server" "${{ steps.build_server.outputs.digest }}"
            fi
      - name: Upload digest metadata
        uses: actions/upload-artifact@bbbca2ddaa5d8feaa63e36b76fdaad77386f024f # v7
        with:
          name: digests-${{ steps.meta.outputs.digest_artifact_suffix }}
          path: /tmp/digests/${{ steps.meta.outputs.digest_artifact_suffix }}.tsv
          if-no-files-found: error
  merge_arch_tags:
    name: Create shared tags from digests
    needs: [prepare_matrices, push_to_registry, create_tag]
    runs-on: ubuntu-24.04
    strategy:
      fail-fast: false
      matrix:
        config: ${{ fromJSON(needs.prepare_matrices.outputs.merge_matrix) }}
    steps:
      - name: Check out the repo
        uses: actions/checkout@v6
        with:
          fetch-depth: 0
      - name: Download digest metadata
        uses: actions/download-artifact@3e5f45b2cfb9172054b4087a40e8e0b5a5461e7c # v8
        with:
          pattern: digests-*
          path: /tmp/digests
          merge-multiple: true
      - name: Set up Docker Buildx
        uses: docker/setup-buildx-action@4d04d5d9486b7bd6fa91e7baf45bbb4f8b9deedd # v4
      - name: Log in to Docker Registry
        uses: docker/login-action@b45d80f862d83dbcd57f89517bcf500b2ab88fb2 # v4
        with:
          registry: ghcr.io
          username: ${{ github.repository_owner }}
          password: ${{ secrets.GITHUB_TOKEN }}
      - name: Create tags from digests
        shell: bash
        run: |
          set -euo pipefail
          REPO_OWNER="${GITHUB_REPOSITORY_OWNER@L}"  # to lower case
          REPO_NAME="${{ github.event.repository.name }}"
          IMAGE_REPO="ghcr.io/${REPO_OWNER}/${REPO_NAME}"
          PREFIX="${IMAGE_REPO}:"
          SRC_TAG="${{ needs.create_tag.outputs.source_tag }}"
          TAGS="${{ matrix.config.tag }}"
          ARCHES="${{ matrix.config.arches }}"
          DIGEST_GLOB="/tmp/digests/*.tsv"
          if ! ls ${DIGEST_GLOB} >/dev/null 2>&1; then
              echo "No digest metadata found in /tmp/digests" >&2
              exit 1
          fi
          if [[ -z "$SRC_TAG" ]]; then
              echo "Missing source tag from create_tag" >&2
              exit 1
          fi
          find_digest() {
              local tag_name="$1"
              local arch="$2"
              local image_type="$3"
              local digest
              digest="$(awk -F '\t' -v t="$tag_name" -v a="$arch" -v i="$image_type" '$1 == t && $2 == a && $3 == i { print $4; exit }' ${DIGEST_GLOB})"
              # Backward compatibility: s390x tags are aliases of cpu for the linux/s390x platform.
              if [[ -z "$digest" && "$tag_name" == "s390x" && "$arch" == "s390x" ]]; then
                digest="$(awk -F '\t' -v t="cpu" -v a="$arch" -v i="$image_type" '$1 == t && $2 == a && $3 == i { print $4; exit }' ${DIGEST_GLOB})"
              fi
              if [[ -z "$digest" ]]; then
                echo "Missing digest for tag=${tag_name} arch=${arch} image_type=${image_type}" >&2
                exit 1
              fi
              echo "$digest"
          }
          create_manifest_tags() {
              local image_type="$1"
              local tag_name="$2"
              local suffix="$3"
              local merged_tag="${PREFIX}${image_type}${suffix}"
              local merged_versioned_tag="${merged_tag}-${SRC_TAG}"
              local refs=()
              for arch in $ARCHES; do
                  local digest
                  digest="$(find_digest "$tag_name" "$arch" "$image_type")"
                  refs+=("${IMAGE_REPO}@${digest}")
              done
              echo "Creating ${merged_tag} from ${refs[*]}"
              docker buildx imagetools create --tag "${merged_tag}" "${refs[@]}"
              echo "Creating ${merged_versioned_tag} from ${refs[*]}"
              docker buildx imagetools create --tag "${merged_versioned_tag}" "${refs[@]}"
          }
          for tag in $TAGS; do
              if [[ "$tag" == "cpu" ]]; then
                  TYPE=""
              else
                  TYPE="-$tag"
              fi
              if [[ "${{ matrix.config.full }}" == "true" ]]; then
                  create_manifest_tags "full" "$tag" "$TYPE"
              fi
              if [[ "${{ matrix.config.light }}" == "true" ]]; then
                  create_manifest_tags "light" "$tag" "$TYPE"
              fi
              if [[ "${{ matrix.config.server }}" == "true" ]]; then
                  create_manifest_tags "server" "$tag" "$TYPE"
              fi
          done
        env:
          GITHUB_REPOSITORY_OWNER: '${{ github.repository_owner }}'
@@ -13,24 +13,30 @@ We have three Docker images available for this project:
 Additionally, there the following images, similar to the above:
- `ghcr.io/ggml-org/llama.cpp:full-cuda`: Same as `full` but compiled with CUDA support. (platforms: `linux/amd64`)
+- `ghcr.io/ggml-org/llama.cpp:full-cuda`: Same as `full` but compiled with CUDA 12 support. (platforms: `linux/amd64`, `linux/arm64`)
- `ghcr.io/ggml-org/llama.cpp:light-cuda`: Same as `light` but compiled with CUDA support. (platforms: `linux/amd64`)
+- `ghcr.io/ggml-org/llama.cpp:full-cuda13`: Same as `full` but compiled with CUDA 13 support. (platforms: `linux/amd64`, `linux/arm64`)
- `ghcr.io/ggml-org/llama.cpp:server-cuda`: Same as `server` but compiled with CUDA support. (platforms: `linux/amd64`)
+- `ghcr.io/ggml-org/llama.cpp:light-cuda`: Same as `light` but compiled with CUDA 12 support. (platforms: `linux/amd64`, `linux/arm64`)
- `ghcr.io/ggml-org/llama.cpp:full-rocm`: Same as `full` but compiled with ROCm support. (platforms: `linux/amd64`, `linux/arm64`)
+- `ghcr.io/ggml-org/llama.cpp:light-cuda13`: Same as `light` but compiled with CUDA 13 support. (platforms: `linux/amd64`, `linux/arm64`)
- `ghcr.io/ggml-org/llama.cpp:light-rocm`: Same as `light` but compiled with ROCm support. (platforms: `linux/amd64`, `linux/arm64`)
+- `ghcr.io/ggml-org/llama.cpp:server-cuda`: Same as `server` but compiled with CUDA 12 support. (platforms: `linux/amd64`, `linux/arm64`)
- `ghcr.io/ggml-org/llama.cpp:server-rocm`: Same as `server` but compiled with ROCm support. (platforms: `linux/amd64`, `linux/arm64`)
+- `ghcr.io/ggml-org/llama.cpp:server-cuda13`: Same as `server` but compiled with CUDA 13 support. (platforms: `linux/amd64`, `linux/arm64`)
 - `ghcr.io/ggml-org/llama.cpp:full-rocm`: Same as `full` but compiled with ROCm support. (platforms: `linux/amd64`)
 - `ghcr.io/ggml-org/llama.cpp:light-rocm`: Same as `light` but compiled with ROCm support. (platforms: `linux/amd64`)
 - `ghcr.io/ggml-org/llama.cpp:server-rocm`: Same as `server` but compiled with ROCm support. (platforms: `linux/amd64`)
 - `ghcr.io/ggml-org/llama.cpp:full-musa`: Same as `full` but compiled with MUSA support. (platforms: `linux/amd64`)
 - `ghcr.io/ggml-org/llama.cpp:light-musa`: Same as `light` but compiled with MUSA support. (platforms: `linux/amd64`)
 - `ghcr.io/ggml-org/llama.cpp:server-musa`: Same as `server` but compiled with MUSA support. (platforms: `linux/amd64`)
 - `ghcr.io/ggml-org/llama.cpp:full-intel`: Same as `full` but compiled with SYCL support. (platforms: `linux/amd64`)
 - `ghcr.io/ggml-org/llama.cpp:light-intel`: Same as `light` but compiled with SYCL support. (platforms: `linux/amd64`)
 - `ghcr.io/ggml-org/llama.cpp:server-intel`: Same as `server` but compiled with SYCL support. (platforms: `linux/amd64`)
- `ghcr.io/ggml-org/llama.cpp:full-vulkan`: Same as `full` but compiled with Vulkan support. (platforms: `linux/amd64`)
+- `ghcr.io/ggml-org/llama.cpp:full-vulkan`: Same as `full` but compiled with Vulkan support. (platforms: `linux/amd64`, `linux/arm64`)
- `ghcr.io/ggml-org/llama.cpp:light-vulkan`: Same as `light` but compiled with Vulkan support. (platforms: `linux/amd64`)
+- `ghcr.io/ggml-org/llama.cpp:light-vulkan`: Same as `light` but compiled with Vulkan support. (platforms: `linux/amd64`, `linux/arm64`)
- `ghcr.io/ggml-org/llama.cpp:server-vulkan`: Same as `server` but compiled with Vulkan support. (platforms: `linux/amd64`)
+- `ghcr.io/ggml-org/llama.cpp:server-vulkan`: Same as `server` but compiled with Vulkan support. (platforms: `linux/amd64`, `linux/arm64`)
 - `ghcr.io/ggml-org/llama.cpp:full-openvino`: Same as `full` but compiled with OpenVino support. (platforms: `linux/amd64`)
 - `ghcr.io/ggml-org/llama.cpp:light-openvino`: Same as `light` but compiled with OpenVino support. (platforms: `linux/amd64`)
 - `ghcr.io/ggml-org/llama.cpp:server-openvino`: Same as `server` but compiled with OpenVino support. (platforms: `linux/amd64`)
 - `ghcr.io/ggml-org/llama.cpp:full-s390x`: Identical to `full`, an alias for the `s390x` platform. (platforms: `linux/s390x`)
 - `ghcr.io/ggml-org/llama.cpp:light-s390x`: Identical to `light`, an alias for the `s390x` platform. (platforms: `linux/s390x`)
 - `ghcr.io/ggml-org/llama.cpp:server-s390x`: Identical to `server`, an alias for the `s390x` platform. (platforms: `linux/s390x`)
 The GPU enabled images are not currently tested by CI beyond being built. They are not built with any variation from the ones in the Dockerfiles defined in [.devops/](../.devops/) and the GitHub Action defined in [.github/workflows/docker.yml](../.github/workflows/docker.yml). If you need different settings (for example, a different CUDA, ROCm or MUSA library, you'll need to build the images locally for now).
@@ -82,7 +88,7 @@ You may want to pass in some different `ARGS`, depending on the CUDA environment
 The defaults are:
- `CUDA_VERSION` set to `12.4.0`
+- `CUDA_VERSION` set to `12.8.1`
 - `CUDA_DOCKER_ARCH` set to the cmake build default, which includes all the supported architectures
 The resulting images, are essentially the same as the non-CUDA images: