Enable testing on Snapdragon devices (#21051)

* Add the tests that we want to run on external CI * remove extra files * Fixes python issues, reove the deadlock on CI * remove unecessary changes * use override to ty.toml * fix pre-commit and try tests with secret in external repo not upstream * skip if key is unavailable * Fix feedback * switch hexagon to snapdragon * cleanup * fix secrets * remove the copyrights at the top of the files
2026-04-23 13:08:10 -07:00
parent 185cbff6f1
commit 187a456370
11 changed files with 764 additions and 99 deletions
@@ -0,0 +1,113 @@
 name: CI (snapdragon)
 on:
  workflow_dispatch:
  push:
    branches:
      - master
    paths:
      - '.github/workflows/build-and-test-snapdragon.yml'
      - 'ggml/include/ggml-hexagon.h'
      - 'ggml/src/ggml-hexagon/**'
      - 'docs/backend/snapdragon/**'
      - 'scripts/snapdragon/**'
      - 'CMakePresets.json'
  pull_request:
    types: [opened, synchronize, reopened]
    paths:
      - '.github/workflows/build-and-test-snapdragon.yml'
      - 'ggml/include/ggml-hexagon.h'
      - 'ggml/src/ggml-hexagon/**'
      - 'docs/backend/snapdragon/**'
      - 'scripts/snapdragon/**'
      - 'CMakePresets.json'
 concurrency:
  group: ${{ github.workflow }}-${{ github.head_ref && github.ref || github.run_id }}
  cancel-in-progress: true
 jobs:
  android-ndk-snapdragon:
    runs-on: ubuntu-latest
    container:
      image: 'ghcr.io/snapdragon-toolchain/arm64-android:v0.3'
    defaults:
      run:
        shell: bash
    steps:
      - name: Clone
        uses: actions/checkout@v6
        with:
          fetch-depth: 0
          lfs: false
      - name: Build Llama.CPP for Snapdragon Android
        id: build_llama_cpp_snapdragon_android
        run: |
          cp docs/backend/snapdragon/CMakeUserPresets.json .
          cmake --preset arm64-android-snapdragon-release -B build
          cmake --build build
          cmake --install build --prefix pkg-adb/llama.cpp
      - name: Upload Llama.CPP Snapdragon Android Build Artifact
        if: ${{ always() && steps.build_llama_cpp_snapdragon_android.outcome == 'success' }}
        uses: actions/upload-artifact@v6
        with:
          name: llama-cpp-android-arm64-snapdragon
          path: pkg-adb/llama.cpp
  check-secret:
    runs-on: ubuntu-latest
    outputs:
      has-key: ${{ steps.check.outputs.has-key }}
    steps:
      - id: check
        run: echo "has-key=${{ secrets.QDC_API_KEY != '' }}" >> "$GITHUB_OUTPUT"
  test-snapdragon-qdc:
    name: Test on QDC Android Device (${{ matrix.device }})
    needs: [android-ndk-snapdragon, check-secret]
    if: needs.check-secret.outputs.has-key == 'true'
    runs-on: ubuntu-latest
    strategy:
      fail-fast: false
      matrix:
        device: [SM8750, SM8650, SM8850]
    steps:
      - name: Checkout
        uses: actions/checkout@v6
      - name: Download build artifact
        uses: actions/download-artifact@v4
        with:
          name: llama-cpp-android-arm64-snapdragon
          path: pkg-snapdragon/
      - name: Set up Python
        uses: actions/setup-python@v5
        with:
          python-version: '3.x'
          cache: pip
      - name: Install QDC SDK wheel
        run: |
          curl -fSL -o qdc_sdk.zip https://softwarecenter.qualcomm.com/api/download/software/tools/Qualcomm_Device_Cloud_SDK/All/0.2.3/qualcomm_device_cloud_sdk-0.2.3.zip
          unzip qdc_sdk.zip -d qdc_sdk
          pip install qdc_sdk/qualcomm_device_cloud_sdk-0.2.3-py3-none-any.whl
      - name: Run QDC tests (${{ matrix.device }})
        run: |
          python scripts/snapdragon/qdc/run_qdc_jobs.py \
              --test       all \
              --pkg-dir    pkg-snapdragon/llama.cpp \
              --model-url  "https://huggingface.co/bartowski/Llama-3.2-1B-Instruct-GGUF/resolve/main/Llama-3.2-1B-Instruct-Q4_0.gguf" \
              --device     ${{ matrix.device }}
        env:
          QDC_API_KEY: ${{ secrets.QDC_API_KEY }}
      - name: Cleanup
        if: always()
        run: rm -rf pkg-snapdragon qdc_sdk qdc_sdk.zip
@@ -1,26 +1,24 @@
 name: CI (android)
 on:
-  workflow_dispatch: # allows manual triggering
+  workflow_dispatch:
  push:
    branches:
      - master
-    paths: [
+    paths:
-      '.github/workflows/build-android.yml',
+      - '.github/workflows/build-android.yml'
-      '**/CMakeLists.txt',
+      - '**/CMakeLists.txt'
-      '**/.cmake',
+      - '**/.cmake'
-      '**/*.h',
+      - '**/*.h'
-      '**/*.hpp',
+      - '**/*.hpp'
-      '**/*.c',
+      - '**/*.c'
-      '**/*.cpp'
+      - '**/*.cpp'
    ]
  pull_request:
    types: [opened, synchronize, reopened]
-    paths: [
+    paths:
-      '.github/workflows/build-android.yml',
+      - '.github/workflows/build-android.yml'
-      'examples/llama.android/**'
+      - 'examples/llama.android/**'
    ]
 concurrency:
  group: ${{ github.workflow }}-${{ github.head_ref && github.ref || github.run_id }}
@@ -67,35 +65,24 @@ jobs:
    defaults:
      run:
        shell: bash
    strategy:
      matrix:
        include:
          - build: 'arm64-cpu'
            defines: '-D ANDROID_ABI=arm64-v8a -D ANDROID_PLATFORM=android-31 -D CMAKE_TOOLCHAIN_FILE=${ANDROID_NDK_ROOT}/build/cmake/android.toolchain.cmake -D GGML_NATIVE=OFF -DGGML_CPU_ARM_ARCH=armv8.5-a+fp16+i8mm -G Ninja -D LLAMA_OPENSSL=OFF -D GGML_OPENMP=OFF'
          - build: 'arm64-snapdragon'
            defines: '--preset arm64-android-snapdragon-release'
    steps:
      - name: Clone
        id: checkout
        uses: actions/checkout@v6
        with:
          fetch-depth: 0
          lfs: false
-      - name: Build Llama.CPP for Hexagon Android
+      - name: Build
-        id: build_llama_cpp_hexagon_android
+        id: ndk_build
        run: |
-          if [[ "${{ matrix.build }}" == "arm64-snapdragon" ]]; then
+          cmake -D ANDROID_ABI=arm64-v8a -D ANDROID_PLATFORM=android-31 -D CMAKE_TOOLCHAIN_FILE=${ANDROID_NDK_ROOT}/build/cmake/android.toolchain.cmake -D GGML_NATIVE=OFF -DGGML_CPU_ARM_ARCH=armv8.5-a+fp16+i8mm -G Ninja -D LLAMA_OPENSSL=OFF -D GGML_OPENMP=OFF -B build
            cp docs/backend/snapdragon/CMakeUserPresets.json .
          fi
          cmake ${{ matrix.defines }} -B build
          cmake --build build
          cmake --install build --prefix pkg-adb/llama.cpp
-      - name: Upload Llama.CPP Hexagon Android Build Artifact
+      - name: Upload Android Build Artifact
-        if: ${{ always() && steps.build_llama_cpp_hexagon_android.outcome == 'success' }}
+        if: ${{ always() && steps.ndk_build.outcome == 'success' }}
        uses: actions/upload-artifact@v6
        with:
-          name: llama-cpp-android-${{ matrix.build }}
+          name: llama-cpp-android-arm64-cpu
          path: pkg-adb/llama.cpp
@@ -1 +0,0 @@
 This directory includes pytest based scripts for running CI jobs on Qualcomm Device Cloud (QDC).
@@ -8,12 +8,9 @@ iniconfig==2.1.0
 outcome==1.3.0.post0
 packaging==25.0
 pluggy==1.6.0
 Pygments==2.19.2
 PySocks==1.7.1
 pytest==8.4.2
 pytest-dependency==0.6.0
 selenium==4.36.0
 setuptools==80.9.0
 sniffio==1.3.1
 sortedcontainers==2.4.0
 tomli==2.3.0
@@ -0,0 +1,401 @@
 """Run llama.cpp Hexagon Android tests in a single QDC Appium job.
 Bundles test scripts into one artifact and submits a single QDC job:
  1. run_bench_tests_posix.py — llama-cli and llama-bench on CPU / GPU / NPU
                                (from scripts/snapdragon/qdc/)
 Results are written to $GITHUB_STEP_SUMMARY when set (GitHub Actions).
 Prerequisites:
  pip install /path/to/qualcomm_device_cloud_sdk*.whl
 Required environment variables:
  QDC_API_KEY   API key from QDC UI -> Users -> Settings -> API Keys
 Usage:
  python run_qdc_jobs.py \\
      --pkg-dir    pkg-snapdragon/llama.cpp \\
      --model-url  https://.../Llama-3.2-1B-Instruct-Q4_0.gguf \\
      --device     SM8750
 """
 from __future__ import annotations
 import argparse
 import logging
 import os
 import re
 import shutil
 import sys
 import tempfile
 import time
 import xml.etree.ElementTree as ET
 from dataclasses import dataclass, field
 from pathlib import Path
 from qualcomm_device_cloud_sdk.api import qdc_api  # ty: ignore[unresolved-import]
 from qualcomm_device_cloud_sdk.logging import configure_logging  # ty: ignore[unresolved-import]
 from qualcomm_device_cloud_sdk.models import ArtifactType, JobMode, JobState, JobSubmissionParameter, JobType, TestFramework  # ty: ignore[unresolved-import]
 configure_logging(level=logging.INFO, handlers=[logging.StreamHandler()])
 log = logging.getLogger(__name__)
 POLL_INTERVAL        = 30
 JOB_TIMEOUT          = 3600
 LOG_UPLOAD_TIMEOUT   = 600
 CAPACITY_TIMEOUT     = 1800
 CAPACITY_POLL        = 60
 MAX_CONCURRENT_JOBS  = 5
 TERMINAL_STATES     = {JobState.COMPLETED, JobState.CANCELED}
 NON_TERMINAL_STATES = {JobState.DISPATCHED, JobState.RUNNING, JobState.SETUP, JobState.SUBMITTED}
 _SCRIPTS_DIR      = Path(__file__).parent
 _TESTS_DIR        = _SCRIPTS_DIR / "tests"
 _RUN_BENCH        = _TESTS_DIR / "run_bench_tests_posix.py"
 _RUN_BACKEND_OPS  = _TESTS_DIR / "run_backend_ops_posix.py"
 _UTILS            = _TESTS_DIR / "utils.py"
 _CONFTEST         = _TESTS_DIR / "conftest.py"
 _REQUIREMENTS     = _SCRIPTS_DIR / "requirements.txt"
 _PYTEST_LINE_RE = re.compile(
    r"(?:[\w/]+\.py::)?(?:\w+::)?([\w\[\].-]+)\s+(PASSED|FAILED|ERROR|SKIPPED)"
 )
 _EXCLUDED_LOGS = {"qdc_android_whole_host-000.log", "qdc_kernel_host-000.log"}
 _NON_TERMINAL_STATE_VALUES = {s.value for s in NON_TERMINAL_STATES}
@dataclass
 class JobResult:
    passed: bool
    tests: dict[str, bool] = field(default_factory=dict)
    raw_logs: dict[str, str] = field(default_factory=dict)
    failure_details: dict[str, str] = field(default_factory=dict)
 def build_artifact_zip(
    pkg_dir: Path,
    stage_dir: Path,
    *,
    test_mode: str = "bench",
    model_url: str | None = None,
 ) -> Path:
    """Bundle everything into a single QDC artifact zip.
    Zip structure (extracted by QDC to /qdc/appium/ on the runner):
      llama_cpp_bundle/            installed package (adb pushed to /data/local/tmp/)
      tests/
        utils.py                   shared helpers (paths, run_adb_command, …)
        conftest.py                shared pytest fixtures (driver)
        test_bench_posix.py        bench + cli tests (<<MODEL_URL>> substituted)
          AND/OR
        test_backend_ops_posix.py  test-backend-ops -b HTP0
      requirements.txt
    """
    shutil.copytree(pkg_dir, stage_dir / "llama_cpp_bundle")
    tests_dir = stage_dir / "tests"
    tests_dir.mkdir()
    shutil.copy(_UTILS,    tests_dir / "utils.py")
    shutil.copy(_CONFTEST, tests_dir / "conftest.py")
    if test_mode in ("bench", "all"):
        assert model_url is not None, "--model-url is required for bench/all test modes"
        (tests_dir / "test_bench_posix.py").write_text(
            _RUN_BENCH.read_text().replace("<<MODEL_URL>>", model_url)
        )
    if test_mode in ("backend-ops", "all"):
        shutil.copy(_RUN_BACKEND_OPS, tests_dir / "test_backend_ops_posix.py")
    shutil.copy(_REQUIREMENTS, stage_dir / "requirements.txt")
    (stage_dir / "pytest.ini").write_text("[pytest]\naddopts = --junitxml=results.xml\n")
    zip_base = str(stage_dir / "artifact")
    shutil.make_archive(zip_base, "zip", stage_dir)
    return Path(f"{zip_base}.zip")
 def wait_for_job(client, job_id: str, timeout: int) -> str:
    elapsed = 0
    while elapsed < timeout:
        raw = qdc_api.get_job_status(client, job_id)
        try:
            status = JobState(raw)
        except ValueError:
            status = raw
        if status in TERMINAL_STATES:
            return raw.lower()
        log.info("Job %s: %s", job_id, raw)
        time.sleep(POLL_INTERVAL)
        elapsed += POLL_INTERVAL
    raise TimeoutError(f"Job {job_id} did not finish within {timeout}s")
 def wait_for_log_upload(client, job_id: str) -> None:
    elapsed = 0
    while elapsed <= LOG_UPLOAD_TIMEOUT:
        status = (qdc_api.get_job_log_upload_status(client, job_id) or "").lower()
        if status in {"completed", "failed"}:
            return
        log.info("Waiting for log upload (status=%s) ...", status)
        time.sleep(POLL_INTERVAL)
        elapsed += POLL_INTERVAL
    log.warning("Timed out waiting for log upload after %ds", LOG_UPLOAD_TIMEOUT)
 def wait_for_capacity(client, max_jobs: int = MAX_CONCURRENT_JOBS) -> None:
    """Block until the user's active (non-terminal) QDC job count is below max_jobs."""
    elapsed = 0
    while elapsed < CAPACITY_TIMEOUT:
        jobs_page = qdc_api.get_jobs_list(client, page_number=0, page_size=50)
        if jobs_page is None:
            log.warning("Could not retrieve job list; proceeding without capacity check")
            return
        items = getattr(jobs_page, "data", []) or []
        active = sum(1 for j in items if getattr(j, "state", None) in _NON_TERMINAL_STATE_VALUES)
        if active < max_jobs:
            log.info("Active QDC jobs: %d / %d — proceeding", active, max_jobs)
            return
        log.info("Active QDC jobs: %d / %d — waiting %ds ...", active, max_jobs, CAPACITY_POLL)
        time.sleep(CAPACITY_POLL)
        elapsed += CAPACITY_POLL
    log.warning("Capacity wait timed out after %ds; proceeding anyway", CAPACITY_TIMEOUT)
 def _parse_junit_xml(content: str) -> tuple[dict[str, bool], dict[str, str]]:
    try:
        root = ET.fromstring(content)
    except ET.ParseError:
        return {}, {}
    results: dict[str, bool] = {}
    failures: dict[str, str] = {}
    for tc in root.iter("testcase"):
        name = tc.get("name", "")
        if classname := tc.get("classname", ""):
            name = f"{classname}.{name}"
        failure_el = tc.find("failure")
        if failure_el is None:
            failure_el = tc.find("error")
        results[name] = failure_el is None
        if failure_el is not None:
            parts = [failure_el.get("message", ""), failure_el.text or ""]
            failures[name] = "\n".join(p for p in parts if p).strip()
    return results, failures
 def _parse_pytest_output(content: str) -> dict[str, bool]:
    results: dict[str, bool] = {}
    for m in _PYTEST_LINE_RE.finditer(content):
        results[m.group(1)] = m.group(2) == "PASSED"
    return results
 def fetch_logs_and_parse_tests(
    client, job_id: str
 ) -> tuple[dict[str, bool], dict[str, str], dict[str, str]]:
    """Returns (test_results, raw_logs, failure_details)."""
    log_files = qdc_api.get_job_log_files(client, job_id)
    if not log_files:
        log.warning("No log files returned for job %s", job_id)
        return {}, {}, {}
    test_results: dict[str, bool] = {}
    pytest_fallback: dict[str, bool] = {}
    raw_logs: dict[str, str] = {}
    failure_details: dict[str, str] = {}
    with tempfile.TemporaryDirectory() as tmpdir:
        for lf in log_files:
            log.info("Downloading log file: %s", lf.filename)
            zip_path = os.path.join(tmpdir, "log.zip")
            qdc_api.download_job_log_files(client, lf.filename, zip_path)
            try:
                shutil.unpack_archive(zip_path, tmpdir, "zip")
            except Exception as e:
                log.warning("Could not unpack %s as zip: %s", lf.filename, e)
        for root_dir, _, files in os.walk(tmpdir):
            for fname in sorted(files):
                fpath = os.path.join(root_dir, fname)
                content = Path(fpath).read_text(errors="replace")
                if fname.endswith(".xml"):
                    results, failures = _parse_junit_xml(content)
                    test_results.update(results)
                    failure_details.update(failures)
                elif fname.endswith(".log"):
                    if fname in _EXCLUDED_LOGS:
                        continue
                    log.info("--- %s ---", fname)
                    log.info("%s", content)
                    raw_logs[fname] = content
                    pytest_fallback.update(_parse_pytest_output(content))
    return (test_results if test_results else pytest_fallback), raw_logs, failure_details
 def write_summary(result: JobResult, title: str = "QDC Test Results") -> None:
    summary_path = os.environ.get("GITHUB_STEP_SUMMARY")
    if not summary_path:
        return
    icon = "✅" if result.passed else "❌"
    lines = [
        f"## {title}\n",
        f"Overall: {icon} {'PASSED' if result.passed else 'FAILED'}\n",
    ]
    reportable = {n: ok for n, ok in result.tests.items() if "test_install" not in n}
    if reportable:
        lines += ["| Test | Result |", "| ---- | ------ |"]
        for name, ok in reportable.items():
            lines.append(f"| `{name}` | {'✅' if ok else '❌'} |")
        passed_n = sum(1 for v in reportable.values() if v)
        failed_n = sum(1 for v in reportable.values() if not v)
        lines += ["", f"**{passed_n} passed, {failed_n} failed**"]
    else:
        lines.append("_No per-test data available._")
    failed_names = [n for n, ok in reportable.items() if not ok]
    if failed_names:
        lines += ["", "### Failures"]
        for name in failed_names:
            detail = result.failure_details.get(name)
            if detail:
                lines += [
                    f"<details><summary><code>{name}</code></summary>",
                    "",
                    "```",
                    detail,
                    "```",
                    "",
                    "</details>",
                ]
    if result.raw_logs:
        lines += ["", "### Raw Logs"]
        for fname, content in sorted(result.raw_logs.items()):
            lines += [
                f"<details><summary>{fname}</summary>",
                "",
                "```",
                content.rstrip(),
                "```",
                "",
                "</details>",
            ]
    with open(summary_path, "a") as f:
        f.write("\n".join(lines) + "\n")
 def parse_args() -> argparse.Namespace:
    p = argparse.ArgumentParser(
        description=__doc__,
        formatter_class=argparse.RawDescriptionHelpFormatter,
    )
    p.add_argument("--pkg-dir",   required=True, type=Path,
                   help="Installed llama.cpp package directory (contains bin/ and lib/)")
    p.add_argument("--model-url",
                   help="Direct URL to the GGUF model file (required for --test bench)")
    p.add_argument("--device",    required=True,
                   help="QDC chipset name, e.g. SM8750")
    p.add_argument("--test", choices=["bench", "backend-ops", "all"], default="bench",
                   help="Test suite to run (default: bench)")
    p.add_argument("--job-timeout", type=int, default=JOB_TIMEOUT, metavar="SECONDS",
                   help=f"Max seconds to wait for job completion (default: {JOB_TIMEOUT})")
    args = p.parse_args()
    if args.test in ("bench", "all") and not args.model_url:
        p.error("--model-url is required when --test bench or --test all")
    return args
 def main() -> int:
    args = parse_args()
    api_key = os.environ.get("QDC_API_KEY")
    if not api_key:
        log.error("QDC_API_KEY environment variable must be set")
        return 1
    if not args.pkg_dir.is_dir():
        log.error("--pkg-dir %s does not exist", args.pkg_dir)
        return 1
    client = qdc_api.get_public_api_client_using_api_key(
        api_key_header=api_key,
        app_name_header="llama-cpp-ci",
        on_behalf_of_header="llama-cpp-ci",
        client_type_header="Python",
    )
    target_id = qdc_api.get_target_id(client, args.device)
    if target_id is None:
        log.error("Could not find QDC target for device %r", args.device)
        return 1
    with tempfile.TemporaryDirectory() as tmpdir:
        log.info("Building artifact ...")
        zip_path = build_artifact_zip(
            args.pkg_dir, Path(tmpdir),
            test_mode=args.test, model_url=args.model_url,
        )
        log.info("Uploading artifact (%d MB) ...", zip_path.stat().st_size // 1_000_000)
        artifact_id = qdc_api.upload_file(client, str(zip_path), ArtifactType.TESTSCRIPT)
    if artifact_id is None:
        log.error("Artifact upload failed")
        return 1
    wait_for_capacity(client)
    job_id = qdc_api.submit_job(
        public_api_client=client,
        target_id=target_id,
        job_name="llama.cpp Hexagon tests",
        external_job_id=None,
        job_type=JobType.AUTOMATED,
        job_mode=JobMode.APPLICATION,
        timeout=max(1, args.job_timeout // 60),
        test_framework=TestFramework.APPIUM,
        entry_script=None,
        job_artifacts=[artifact_id],
        monkey_events=None,
        monkey_session_timeout=None,
        job_parameters=[JobSubmissionParameter.WIFIENABLED],
    )
    if job_id is None:
        log.error("Job submission failed")
        return 1
    log.info("Job submitted: %s  (device=%s)", job_id, args.device)
    try:
        job_status = wait_for_job(client, job_id, timeout=args.job_timeout)
    except TimeoutError as e:
        log.error("%s", e)
        write_summary(JobResult(passed=False, tests={}), title=f"QDC Job Timed Out ({args.device})")
        return 1
    log.info("Job %s finished: %s", job_id, job_status)
    wait_for_log_upload(client, job_id)
    tests, raw_logs, failure_details = fetch_logs_and_parse_tests(client, job_id)
    passed = job_status == JobState.COMPLETED.value.lower()
    if tests:
        passed = passed and all(tests.values())
    if not passed:
        log.error("Job did not complete successfully or tests failed (status=%s)", job_status)
    result = JobResult(passed=passed, tests=tests, raw_logs=raw_logs, failure_details=failure_details)
    if args.test == "backend-ops":
        title = f"Backend Ops — HTP0 ({args.device})"
    elif args.test == "all":
        title = f"QDC Tests ({args.device})"
    else:
        title = f"QDC Test Results ({args.device})"
    write_summary(result, title=title)
    return 0 if passed else 1
 if __name__ == "__main__":
    sys.exit(main())
@@ -0,0 +1,20 @@
 """Shared pytest fixtures for QDC on-device test runners."""
 import os
 import pytest
 from appium import webdriver
 from utils import options, write_qdc_log
@pytest.fixture(scope="session", autouse=True)
 def driver():
    return webdriver.Remote(command_executor="http://127.0.0.1:4723/wd/hub", options=options)
 def pytest_sessionfinish(session, exitstatus):
    xml_path = getattr(session.config.option, "xmlpath", None) or "results.xml"
    if os.path.exists(xml_path):
        with open(xml_path) as f:
            write_qdc_log("results.xml", f.read())
@@ -0,0 +1,41 @@
 """
 On-device test-backend-ops runner for llama.cpp (HTP0 backend).
 Executed by QDC's Appium test framework on the QDC runner.
 The runner has ADB access to the allocated device.
 """
 import os
 import sys
 import pytest
 from utils import BIN_PATH, CMD_PREFIX, push_bundle_if_needed, run_adb_command, write_qdc_log
@pytest.fixture(scope="session", autouse=True)
 def install(driver):
    push_bundle_if_needed(f"{BIN_PATH}/test-backend-ops")
@pytest.mark.parametrize("type_a", ["mxfp4", "fp16", "q4_0"])
 def test_backend_ops_htp0(type_a):
    cmd = f"{CMD_PREFIX} GGML_HEXAGON_HOSTBUF=0 GGML_HEXAGON_EXPERIMENTAL=1 {BIN_PATH}/test-backend-ops -b HTP0 -o MUL_MAT"
    if type_a == "q4_0":
        cmd += r' -p "^(?=.*type_a=q4_0)(?!.*type_b=f32,m=576,n=512,k=576).*$"'
    else:
        cmd += f" -p type_a={type_a}"
    result = run_adb_command(
        cmd,
        check=False,
    )
    write_qdc_log(f"backend_ops_{type_a}.log", result.stdout or "")
    assert result.returncode == 0, f"test-backend-ops type_a={type_a} failed (exit {result.returncode})"
 if __name__ == "__main__":
    ret = pytest.main(["-s", "--junitxml=results.xml", os.path.realpath(__file__)])
    if os.path.exists("results.xml"):
        with open("results.xml") as f:
            write_qdc_log("results.xml", f.read())
    sys.exit(ret)
@@ -0,0 +1,76 @@
 """
 On-device bench and completion test runner for llama.cpp (CPU, GPU, NPU backends).
 Executed by QDC's Appium test framework on the QDC runner.
 The runner has ADB access to the allocated device.
 Placeholders replaced at artifact creation time by run_qdc_jobs.py:
  <<MODEL_URL>>  Direct URL to the GGUF model file (downloaded on-device via curl)
 """
 import os
 import subprocess
 import sys
 import pytest
 from utils import BIN_PATH, CMD_PREFIX, push_bundle_if_needed, run_adb_command, write_qdc_log
 MODEL_PATH = "/data/local/tmp/model.gguf"
 PROMPT     = "What is the capital of France?"
 CLI_OPTS   = "--batch-size 128 -n 128 -no-cnv --seed 42"
@pytest.fixture(scope="session", autouse=True)
 def install(driver):
    push_bundle_if_needed(f"{BIN_PATH}/llama-cli")
    # Skip model download if already present
    check = subprocess.run(
        ["adb", "shell", f"ls {MODEL_PATH}"],
        text=True, stdout=subprocess.PIPE, stderr=subprocess.STDOUT,
    )
    if check.returncode != 0:
        run_adb_command(f'curl -L -J --output {MODEL_PATH} "<<MODEL_URL>>"')
@pytest.mark.parametrize("device,extra_flags", [
    pytest.param("none",      "-ctk q8_0 -ctv q8_0", id="cpu"),
    pytest.param("GPUOpenCL", "",                     id="gpu"),
    pytest.param("HTP0",      "-ctk q8_0 -ctv q8_0", id="npu"),
 ])
 def test_llama_completion(device, extra_flags):
    result = run_adb_command(
        f'{CMD_PREFIX} {BIN_PATH}/llama-completion'
        f' -m {MODEL_PATH} --device {device} -ngl 99 -t 4 {CLI_OPTS} {extra_flags} -fa on'
        f' -p "{PROMPT}"',
        check=False,
    )
    write_qdc_log(f"llama_completion_{device}.log", result.stdout or "")
    assert result.returncode == 0, f"llama-completion {device} failed (exit {result.returncode})"
 _DEVICE_LOG_NAME = {"none": "cpu", "GPUOpenCL": "gpu", "HTP0": "htp"}
@pytest.mark.parametrize("device", [
    pytest.param("none",      id="cpu"),
    pytest.param("GPUOpenCL", id="gpu"),
    pytest.param("HTP0",      id="npu"),
 ])
 def test_llama_bench(device):
    result = run_adb_command(
        f"{CMD_PREFIX} {BIN_PATH}/llama-bench"
        f" -m {MODEL_PATH} --device {device} -ngl 99 --batch-size 128 -t 4 -p 128 -n 32",
        check=False,
    )
    write_qdc_log(f"llama_bench_{_DEVICE_LOG_NAME[device]}.log", result.stdout or "")
    assert result.returncode == 0, f"llama-bench {device} failed (exit {result.returncode})"
 if __name__ == "__main__":
    ret = pytest.main(["-s", "--junitxml=results.xml", os.path.realpath(__file__)])
    if os.path.exists("results.xml"):
        with open("results.xml") as f:
            write_qdc_log("results.xml", f.read())
    sys.exit(ret)
@@ -1,63 +0,0 @@
 import pytest
 import subprocess
 import sys
 tmp_path='/data/local/tmp'
 pkg_path=f'{tmp_path}/llama.cpp'
 lib_path=f'{pkg_path}/lib'
 bin_path=f'{pkg_path}/bin'
 model='../gguf/Llama-3.2-1B-Instruct-Q4_0.gguf'
 cli_pref=f'cd {pkg_path} && LD_LIBRARY_PATH={lib_path} ADSP_LIBRARY_PATH={lib_path} {bin_path}'
 def run_cmd(cmd):
    p = subprocess.run(cmd, text = True, stdout = subprocess.PIPE, stderr = subprocess.STDOUT)
    sys.stdout.write(p.stdout)
    assert(p.returncode == 0)
@pytest.mark.dependency()
 def test_install():
    run_cmd(['adb', 'push', 'llama.cpp', f'{tmp_path}'])
    run_cmd(['adb', 'shell', f'chmod 755 {bin_path}/*'])
 ## Basic cli tests
 def run_llama_cli(dev, opts):
    prompt='what is the most popular cookie in the world?\nPlease provide a very brief bullet point summary.\nBegin your answer with **BEGIN**.'
    opts = '--batch-size 128 -n 128 -no-cnv --seed 42 ' + opts
    run_cmd(['adb', 'shell', f'{cli_pref}/llama-cli -m {model} --device {dev} -ngl 99 -t 4 {opts} -p "{prompt}"'])
@pytest.mark.dependency(depends=['test_install'])
 def test_llama_cli_cpu():
    run_llama_cli('none', '-ctk q8_0 -ctv q8_0 -fa on')
@pytest.mark.dependency(depends=['test_install'])
 def test_llama_cli_gpu():
    run_llama_cli('GPUOpenCL', '-fa on')
@pytest.mark.dependency(depends=['test_install'])
 def test_llama_cli_npu():
    run_llama_cli('HTP0', '-ctk q8_0 -ctv q8_0 -fa on')
 ## Basic bench tests
 def run_llama_bench(dev):
    run_cmd(['adb', 'shell', f'{cli_pref}/llama-bench -m {model} --device {dev} -ngl 99 --batch-size 128 -t 4 -p 128 -n 32'])
@pytest.mark.dependency(depends=['test_install'])
 def test_llama_bench_cpu():
    run_llama_bench('none')
 def test_llama_bench_gpu():
    run_llama_bench('GPUOpenCL')
 def test_llama_bench_npu():
    run_llama_bench('HTP0')
@@ -0,0 +1,93 @@
 """Shared helpers for QDC on-device test runners."""
 import logging
 import os
 import subprocess
 import tempfile
 from appium.options.common import AppiumOptions
 log = logging.getLogger(__name__)
 # ---------------------------------------------------------------------------
 # On-device paths
 # ---------------------------------------------------------------------------
 BUNDLE_PATH  = "/data/local/tmp/llama_cpp_bundle"
 QDC_LOGS_PATH = "/data/local/tmp/QDC_logs"
 LIB_PATH    = f"{BUNDLE_PATH}/lib"
 BIN_PATH    = f"{BUNDLE_PATH}/bin"
 ENV_PREFIX  = (
    f"export LD_LIBRARY_PATH={LIB_PATH} && "
    f"export ADSP_LIBRARY_PATH={LIB_PATH} && "
    f"chmod +x {BIN_PATH}/* &&"
 )
 CMD_PREFIX  = f"cd {BUNDLE_PATH} && {ENV_PREFIX}"
 # ---------------------------------------------------------------------------
 # Appium session options
 # ---------------------------------------------------------------------------
 options = AppiumOptions()
 options.set_capability("automationName", "UiAutomator2")
 options.set_capability("platformName", "Android")
 options.set_capability("deviceName", os.getenv("ANDROID_DEVICE_VERSION"))
 # ---------------------------------------------------------------------------
 # ADB helpers
 # ---------------------------------------------------------------------------
 def run_adb_command(cmd: str, *, check: bool = True) -> subprocess.CompletedProcess:
    # Append exit-code sentinel because `adb shell` doesn't reliably propagate
    # the on-device exit code (older ADB versions always return 0).
    raw = subprocess.run(
        ["adb", "shell", f"{cmd}; echo __RC__:$?"],
        text=True, stdout=subprocess.PIPE, stderr=subprocess.STDOUT,
    )
    stdout = raw.stdout
    returncode = raw.returncode
    if stdout:
        lines = stdout.rstrip("\n").split("\n")
        if lines and lines[-1].startswith("__RC__:"):
            try:
                returncode = int(lines[-1][7:])
                stdout = "\n".join(lines[:-1]) + "\n"
            except ValueError:
                pass
    log.info("%s", stdout)
    result = subprocess.CompletedProcess(raw.args, returncode, stdout=stdout)
    if check:
        assert returncode == 0, f"Command failed (exit {returncode})"
    return result
 def write_qdc_log(filename: str, content: str) -> None:
    """Push content as a log file to QDC_LOGS_PATH on the device for QDC log collection."""
    subprocess.run(
        ["adb", "shell", f"mkdir -p {QDC_LOGS_PATH}"],
        stdout=subprocess.PIPE, stderr=subprocess.STDOUT,
    )
    with tempfile.NamedTemporaryFile(mode="w", suffix=".log", delete=False) as f:
        f.write(content)
        tmp_path = f.name
    try:
        subprocess.run(
            ["adb", "push", tmp_path, f"{QDC_LOGS_PATH}/{filename}"],
            stdout=subprocess.PIPE, stderr=subprocess.STDOUT,
        )
    finally:
        os.unlink(tmp_path)
 def push_bundle_if_needed(check_binary: str) -> None:
    """Push llama_cpp_bundle to the device if check_binary is not already present."""
    result = subprocess.run(
        ["adb", "shell", f"ls {check_binary}"],
        text=True, stdout=subprocess.PIPE, stderr=subprocess.STDOUT,
    )
    if result.returncode != 0:
        subprocess.run(
            ["adb", "push", "/qdc/appium/llama_cpp_bundle/", "/data/local/tmp"],
            text=True, stdout=subprocess.PIPE, stderr=subprocess.STDOUT,
        )
@@ -1,5 +1,5 @@
 [environment]
-extra-paths = ["./gguf-py", "./examples/model-conversion/scripts", "./tools/server/tests"]
+extra-paths = ["./gguf-py", "./examples/model-conversion/scripts", "./tools/server/tests", "./scripts/snapdragon/qdc/tests"]
 python-version = "3.10"
 [rules]
@@ -13,6 +13,7 @@ exclude = [
 [[overrides]]
 include = [
    "./tools/server/tests/**",
    "./scripts/snapdragon/qdc/tests/**",
 ]
 [overrides.rules]
		`@@ -1 +0,0 @@`
			`This directory includes pytest based scripts for running CI jobs on Qualcomm Device Cloud (QDC).`