diff --git a/.github/workflows/build-and-test-snapdragon.yml b/.github/workflows/build-and-test-snapdragon.yml new file mode 100644 index 000000000..7eb204ea2 --- /dev/null +++ b/.github/workflows/build-and-test-snapdragon.yml @@ -0,0 +1,113 @@ +name: CI (snapdragon) + +on: + workflow_dispatch: + push: + branches: + - master + paths: + - '.github/workflows/build-and-test-snapdragon.yml' + - 'ggml/include/ggml-hexagon.h' + - 'ggml/src/ggml-hexagon/**' + - 'docs/backend/snapdragon/**' + - 'scripts/snapdragon/**' + - 'CMakePresets.json' + + pull_request: + types: [opened, synchronize, reopened] + paths: + - '.github/workflows/build-and-test-snapdragon.yml' + - 'ggml/include/ggml-hexagon.h' + - 'ggml/src/ggml-hexagon/**' + - 'docs/backend/snapdragon/**' + - 'scripts/snapdragon/**' + - 'CMakePresets.json' + +concurrency: + group: ${{ github.workflow }}-${{ github.head_ref && github.ref || github.run_id }} + cancel-in-progress: true + +jobs: + android-ndk-snapdragon: + runs-on: ubuntu-latest + container: + image: 'ghcr.io/snapdragon-toolchain/arm64-android:v0.3' + defaults: + run: + shell: bash + + steps: + - name: Clone + uses: actions/checkout@v6 + with: + fetch-depth: 0 + lfs: false + + - name: Build Llama.CPP for Snapdragon Android + id: build_llama_cpp_snapdragon_android + run: | + cp docs/backend/snapdragon/CMakeUserPresets.json . + cmake --preset arm64-android-snapdragon-release -B build + cmake --build build + cmake --install build --prefix pkg-adb/llama.cpp + + - name: Upload Llama.CPP Snapdragon Android Build Artifact + if: ${{ always() && steps.build_llama_cpp_snapdragon_android.outcome == 'success' }} + uses: actions/upload-artifact@v6 + with: + name: llama-cpp-android-arm64-snapdragon + path: pkg-adb/llama.cpp + + check-secret: + runs-on: ubuntu-latest + outputs: + has-key: ${{ steps.check.outputs.has-key }} + steps: + - id: check + run: echo "has-key=${{ secrets.QDC_API_KEY != '' }}" >> "$GITHUB_OUTPUT" + + test-snapdragon-qdc: + name: Test on QDC Android Device (${{ matrix.device }}) + needs: [android-ndk-snapdragon, check-secret] + if: needs.check-secret.outputs.has-key == 'true' + runs-on: ubuntu-latest + strategy: + fail-fast: false + matrix: + device: [SM8750, SM8650, SM8850] + + steps: + - name: Checkout + uses: actions/checkout@v6 + + - name: Download build artifact + uses: actions/download-artifact@v4 + with: + name: llama-cpp-android-arm64-snapdragon + path: pkg-snapdragon/ + + - name: Set up Python + uses: actions/setup-python@v5 + with: + python-version: '3.x' + cache: pip + + - name: Install QDC SDK wheel + run: | + curl -fSL -o qdc_sdk.zip https://softwarecenter.qualcomm.com/api/download/software/tools/Qualcomm_Device_Cloud_SDK/All/0.2.3/qualcomm_device_cloud_sdk-0.2.3.zip + unzip qdc_sdk.zip -d qdc_sdk + pip install qdc_sdk/qualcomm_device_cloud_sdk-0.2.3-py3-none-any.whl + + - name: Run QDC tests (${{ matrix.device }}) + run: | + python scripts/snapdragon/qdc/run_qdc_jobs.py \ + --test all \ + --pkg-dir pkg-snapdragon/llama.cpp \ + --model-url "https://huggingface.co/bartowski/Llama-3.2-1B-Instruct-GGUF/resolve/main/Llama-3.2-1B-Instruct-Q4_0.gguf" \ + --device ${{ matrix.device }} + env: + QDC_API_KEY: ${{ secrets.QDC_API_KEY }} + + - name: Cleanup + if: always() + run: rm -rf pkg-snapdragon qdc_sdk qdc_sdk.zip diff --git a/.github/workflows/build-android.yml b/.github/workflows/build-android.yml index b38a793f1..5d88305a4 100644 --- a/.github/workflows/build-android.yml +++ b/.github/workflows/build-android.yml @@ -1,26 +1,24 @@ name: CI (android) on: - workflow_dispatch: # allows manual triggering + workflow_dispatch: push: branches: - master - paths: [ - '.github/workflows/build-android.yml', - '**/CMakeLists.txt', - '**/.cmake', - '**/*.h', - '**/*.hpp', - '**/*.c', - '**/*.cpp' - ] + paths: + - '.github/workflows/build-android.yml' + - '**/CMakeLists.txt' + - '**/.cmake' + - '**/*.h' + - '**/*.hpp' + - '**/*.c' + - '**/*.cpp' pull_request: types: [opened, synchronize, reopened] - paths: [ - '.github/workflows/build-android.yml', - 'examples/llama.android/**' - ] + paths: + - '.github/workflows/build-android.yml' + - 'examples/llama.android/**' concurrency: group: ${{ github.workflow }}-${{ github.head_ref && github.ref || github.run_id }} @@ -67,35 +65,24 @@ jobs: defaults: run: shell: bash - strategy: - matrix: - include: - - build: 'arm64-cpu' - defines: '-D ANDROID_ABI=arm64-v8a -D ANDROID_PLATFORM=android-31 -D CMAKE_TOOLCHAIN_FILE=${ANDROID_NDK_ROOT}/build/cmake/android.toolchain.cmake -D GGML_NATIVE=OFF -DGGML_CPU_ARM_ARCH=armv8.5-a+fp16+i8mm -G Ninja -D LLAMA_OPENSSL=OFF -D GGML_OPENMP=OFF' - - build: 'arm64-snapdragon' - defines: '--preset arm64-android-snapdragon-release' steps: - name: Clone - id: checkout uses: actions/checkout@v6 with: fetch-depth: 0 lfs: false - - name: Build Llama.CPP for Hexagon Android - id: build_llama_cpp_hexagon_android + - name: Build + id: ndk_build run: | - if [[ "${{ matrix.build }}" == "arm64-snapdragon" ]]; then - cp docs/backend/snapdragon/CMakeUserPresets.json . - fi - cmake ${{ matrix.defines }} -B build + cmake -D ANDROID_ABI=arm64-v8a -D ANDROID_PLATFORM=android-31 -D CMAKE_TOOLCHAIN_FILE=${ANDROID_NDK_ROOT}/build/cmake/android.toolchain.cmake -D GGML_NATIVE=OFF -DGGML_CPU_ARM_ARCH=armv8.5-a+fp16+i8mm -G Ninja -D LLAMA_OPENSSL=OFF -D GGML_OPENMP=OFF -B build cmake --build build cmake --install build --prefix pkg-adb/llama.cpp - - name: Upload Llama.CPP Hexagon Android Build Artifact - if: ${{ always() && steps.build_llama_cpp_hexagon_android.outcome == 'success' }} + - name: Upload Android Build Artifact + if: ${{ always() && steps.ndk_build.outcome == 'success' }} uses: actions/upload-artifact@v6 with: - name: llama-cpp-android-${{ matrix.build }} + name: llama-cpp-android-arm64-cpu path: pkg-adb/llama.cpp diff --git a/scripts/snapdragon/qdc/readme.md b/scripts/snapdragon/qdc/readme.md deleted file mode 100644 index b92cf243a..000000000 --- a/scripts/snapdragon/qdc/readme.md +++ /dev/null @@ -1 +0,0 @@ -This directory includes pytest based scripts for running CI jobs on Qualcomm Device Cloud (QDC). diff --git a/scripts/snapdragon/qdc/requirements.txt b/scripts/snapdragon/qdc/requirements.txt index f04bd682e..5e0f85917 100644 --- a/scripts/snapdragon/qdc/requirements.txt +++ b/scripts/snapdragon/qdc/requirements.txt @@ -8,12 +8,9 @@ iniconfig==2.1.0 outcome==1.3.0.post0 packaging==25.0 pluggy==1.6.0 -Pygments==2.19.2 PySocks==1.7.1 pytest==8.4.2 -pytest-dependency==0.6.0 selenium==4.36.0 -setuptools==80.9.0 sniffio==1.3.1 sortedcontainers==2.4.0 tomli==2.3.0 diff --git a/scripts/snapdragon/qdc/run_qdc_jobs.py b/scripts/snapdragon/qdc/run_qdc_jobs.py new file mode 100644 index 000000000..b4eede3d0 --- /dev/null +++ b/scripts/snapdragon/qdc/run_qdc_jobs.py @@ -0,0 +1,401 @@ +"""Run llama.cpp Hexagon Android tests in a single QDC Appium job. + +Bundles test scripts into one artifact and submits a single QDC job: + + 1. run_bench_tests_posix.py — llama-cli and llama-bench on CPU / GPU / NPU + (from scripts/snapdragon/qdc/) + +Results are written to $GITHUB_STEP_SUMMARY when set (GitHub Actions). + +Prerequisites: + pip install /path/to/qualcomm_device_cloud_sdk*.whl + +Required environment variables: + QDC_API_KEY API key from QDC UI -> Users -> Settings -> API Keys + +Usage: + python run_qdc_jobs.py \\ + --pkg-dir pkg-snapdragon/llama.cpp \\ + --model-url https://.../Llama-3.2-1B-Instruct-Q4_0.gguf \\ + --device SM8750 +""" + +from __future__ import annotations + +import argparse +import logging +import os +import re +import shutil +import sys +import tempfile +import time +import xml.etree.ElementTree as ET +from dataclasses import dataclass, field +from pathlib import Path + +from qualcomm_device_cloud_sdk.api import qdc_api # ty: ignore[unresolved-import] +from qualcomm_device_cloud_sdk.logging import configure_logging # ty: ignore[unresolved-import] +from qualcomm_device_cloud_sdk.models import ArtifactType, JobMode, JobState, JobSubmissionParameter, JobType, TestFramework # ty: ignore[unresolved-import] + +configure_logging(level=logging.INFO, handlers=[logging.StreamHandler()]) +log = logging.getLogger(__name__) + +POLL_INTERVAL = 30 +JOB_TIMEOUT = 3600 +LOG_UPLOAD_TIMEOUT = 600 +CAPACITY_TIMEOUT = 1800 +CAPACITY_POLL = 60 +MAX_CONCURRENT_JOBS = 5 +TERMINAL_STATES = {JobState.COMPLETED, JobState.CANCELED} +NON_TERMINAL_STATES = {JobState.DISPATCHED, JobState.RUNNING, JobState.SETUP, JobState.SUBMITTED} + +_SCRIPTS_DIR = Path(__file__).parent +_TESTS_DIR = _SCRIPTS_DIR / "tests" +_RUN_BENCH = _TESTS_DIR / "run_bench_tests_posix.py" +_RUN_BACKEND_OPS = _TESTS_DIR / "run_backend_ops_posix.py" +_UTILS = _TESTS_DIR / "utils.py" +_CONFTEST = _TESTS_DIR / "conftest.py" +_REQUIREMENTS = _SCRIPTS_DIR / "requirements.txt" + +_PYTEST_LINE_RE = re.compile( + r"(?:[\w/]+\.py::)?(?:\w+::)?([\w\[\].-]+)\s+(PASSED|FAILED|ERROR|SKIPPED)" +) +_EXCLUDED_LOGS = {"qdc_android_whole_host-000.log", "qdc_kernel_host-000.log"} +_NON_TERMINAL_STATE_VALUES = {s.value for s in NON_TERMINAL_STATES} + + +@dataclass +class JobResult: + passed: bool + tests: dict[str, bool] = field(default_factory=dict) + raw_logs: dict[str, str] = field(default_factory=dict) + failure_details: dict[str, str] = field(default_factory=dict) + + +def build_artifact_zip( + pkg_dir: Path, + stage_dir: Path, + *, + test_mode: str = "bench", + model_url: str | None = None, +) -> Path: + """Bundle everything into a single QDC artifact zip. + + Zip structure (extracted by QDC to /qdc/appium/ on the runner): + llama_cpp_bundle/ installed package (adb pushed to /data/local/tmp/) + tests/ + utils.py shared helpers (paths, run_adb_command, …) + conftest.py shared pytest fixtures (driver) + test_bench_posix.py bench + cli tests (<> substituted) + AND/OR + test_backend_ops_posix.py test-backend-ops -b HTP0 + requirements.txt + """ + shutil.copytree(pkg_dir, stage_dir / "llama_cpp_bundle") + + tests_dir = stage_dir / "tests" + tests_dir.mkdir() + + shutil.copy(_UTILS, tests_dir / "utils.py") + shutil.copy(_CONFTEST, tests_dir / "conftest.py") + + if test_mode in ("bench", "all"): + assert model_url is not None, "--model-url is required for bench/all test modes" + (tests_dir / "test_bench_posix.py").write_text( + _RUN_BENCH.read_text().replace("<>", model_url) + ) + if test_mode in ("backend-ops", "all"): + shutil.copy(_RUN_BACKEND_OPS, tests_dir / "test_backend_ops_posix.py") + + shutil.copy(_REQUIREMENTS, stage_dir / "requirements.txt") + (stage_dir / "pytest.ini").write_text("[pytest]\naddopts = --junitxml=results.xml\n") + + zip_base = str(stage_dir / "artifact") + shutil.make_archive(zip_base, "zip", stage_dir) + return Path(f"{zip_base}.zip") + + +def wait_for_job(client, job_id: str, timeout: int) -> str: + elapsed = 0 + while elapsed < timeout: + raw = qdc_api.get_job_status(client, job_id) + try: + status = JobState(raw) + except ValueError: + status = raw + if status in TERMINAL_STATES: + return raw.lower() + log.info("Job %s: %s", job_id, raw) + time.sleep(POLL_INTERVAL) + elapsed += POLL_INTERVAL + raise TimeoutError(f"Job {job_id} did not finish within {timeout}s") + + +def wait_for_log_upload(client, job_id: str) -> None: + elapsed = 0 + while elapsed <= LOG_UPLOAD_TIMEOUT: + status = (qdc_api.get_job_log_upload_status(client, job_id) or "").lower() + if status in {"completed", "failed"}: + return + log.info("Waiting for log upload (status=%s) ...", status) + time.sleep(POLL_INTERVAL) + elapsed += POLL_INTERVAL + log.warning("Timed out waiting for log upload after %ds", LOG_UPLOAD_TIMEOUT) + + +def wait_for_capacity(client, max_jobs: int = MAX_CONCURRENT_JOBS) -> None: + """Block until the user's active (non-terminal) QDC job count is below max_jobs.""" + elapsed = 0 + while elapsed < CAPACITY_TIMEOUT: + jobs_page = qdc_api.get_jobs_list(client, page_number=0, page_size=50) + if jobs_page is None: + log.warning("Could not retrieve job list; proceeding without capacity check") + return + items = getattr(jobs_page, "data", []) or [] + active = sum(1 for j in items if getattr(j, "state", None) in _NON_TERMINAL_STATE_VALUES) + if active < max_jobs: + log.info("Active QDC jobs: %d / %d — proceeding", active, max_jobs) + return + log.info("Active QDC jobs: %d / %d — waiting %ds ...", active, max_jobs, CAPACITY_POLL) + time.sleep(CAPACITY_POLL) + elapsed += CAPACITY_POLL + log.warning("Capacity wait timed out after %ds; proceeding anyway", CAPACITY_TIMEOUT) + + +def _parse_junit_xml(content: str) -> tuple[dict[str, bool], dict[str, str]]: + try: + root = ET.fromstring(content) + except ET.ParseError: + return {}, {} + results: dict[str, bool] = {} + failures: dict[str, str] = {} + for tc in root.iter("testcase"): + name = tc.get("name", "") + if classname := tc.get("classname", ""): + name = f"{classname}.{name}" + failure_el = tc.find("failure") + if failure_el is None: + failure_el = tc.find("error") + results[name] = failure_el is None + if failure_el is not None: + parts = [failure_el.get("message", ""), failure_el.text or ""] + failures[name] = "\n".join(p for p in parts if p).strip() + return results, failures + + +def _parse_pytest_output(content: str) -> dict[str, bool]: + results: dict[str, bool] = {} + for m in _PYTEST_LINE_RE.finditer(content): + results[m.group(1)] = m.group(2) == "PASSED" + return results + + +def fetch_logs_and_parse_tests( + client, job_id: str +) -> tuple[dict[str, bool], dict[str, str], dict[str, str]]: + """Returns (test_results, raw_logs, failure_details).""" + log_files = qdc_api.get_job_log_files(client, job_id) + if not log_files: + log.warning("No log files returned for job %s", job_id) + return {}, {}, {} + + test_results: dict[str, bool] = {} + pytest_fallback: dict[str, bool] = {} + raw_logs: dict[str, str] = {} + failure_details: dict[str, str] = {} + + with tempfile.TemporaryDirectory() as tmpdir: + for lf in log_files: + log.info("Downloading log file: %s", lf.filename) + zip_path = os.path.join(tmpdir, "log.zip") + qdc_api.download_job_log_files(client, lf.filename, zip_path) + try: + shutil.unpack_archive(zip_path, tmpdir, "zip") + except Exception as e: + log.warning("Could not unpack %s as zip: %s", lf.filename, e) + + for root_dir, _, files in os.walk(tmpdir): + for fname in sorted(files): + fpath = os.path.join(root_dir, fname) + content = Path(fpath).read_text(errors="replace") + if fname.endswith(".xml"): + results, failures = _parse_junit_xml(content) + test_results.update(results) + failure_details.update(failures) + elif fname.endswith(".log"): + if fname in _EXCLUDED_LOGS: + continue + log.info("--- %s ---", fname) + log.info("%s", content) + raw_logs[fname] = content + pytest_fallback.update(_parse_pytest_output(content)) + + return (test_results if test_results else pytest_fallback), raw_logs, failure_details + + +def write_summary(result: JobResult, title: str = "QDC Test Results") -> None: + summary_path = os.environ.get("GITHUB_STEP_SUMMARY") + if not summary_path: + return + + icon = "✅" if result.passed else "❌" + + lines = [ + f"## {title}\n", + f"Overall: {icon} {'PASSED' if result.passed else 'FAILED'}\n", + ] + reportable = {n: ok for n, ok in result.tests.items() if "test_install" not in n} + if reportable: + lines += ["| Test | Result |", "| ---- | ------ |"] + for name, ok in reportable.items(): + lines.append(f"| `{name}` | {'✅' if ok else '❌'} |") + passed_n = sum(1 for v in reportable.values() if v) + failed_n = sum(1 for v in reportable.values() if not v) + lines += ["", f"**{passed_n} passed, {failed_n} failed**"] + else: + lines.append("_No per-test data available._") + + failed_names = [n for n, ok in reportable.items() if not ok] + if failed_names: + lines += ["", "### Failures"] + for name in failed_names: + detail = result.failure_details.get(name) + if detail: + lines += [ + f"
{name}", + "", + "```", + detail, + "```", + "", + "
", + ] + + if result.raw_logs: + lines += ["", "### Raw Logs"] + for fname, content in sorted(result.raw_logs.items()): + lines += [ + f"
{fname}", + "", + "```", + content.rstrip(), + "```", + "", + "
", + ] + + with open(summary_path, "a") as f: + f.write("\n".join(lines) + "\n") + + +def parse_args() -> argparse.Namespace: + p = argparse.ArgumentParser( + description=__doc__, + formatter_class=argparse.RawDescriptionHelpFormatter, + ) + p.add_argument("--pkg-dir", required=True, type=Path, + help="Installed llama.cpp package directory (contains bin/ and lib/)") + p.add_argument("--model-url", + help="Direct URL to the GGUF model file (required for --test bench)") + p.add_argument("--device", required=True, + help="QDC chipset name, e.g. SM8750") + p.add_argument("--test", choices=["bench", "backend-ops", "all"], default="bench", + help="Test suite to run (default: bench)") + p.add_argument("--job-timeout", type=int, default=JOB_TIMEOUT, metavar="SECONDS", + help=f"Max seconds to wait for job completion (default: {JOB_TIMEOUT})") + args = p.parse_args() + if args.test in ("bench", "all") and not args.model_url: + p.error("--model-url is required when --test bench or --test all") + return args + + +def main() -> int: + args = parse_args() + + api_key = os.environ.get("QDC_API_KEY") + if not api_key: + log.error("QDC_API_KEY environment variable must be set") + return 1 + if not args.pkg_dir.is_dir(): + log.error("--pkg-dir %s does not exist", args.pkg_dir) + return 1 + + client = qdc_api.get_public_api_client_using_api_key( + api_key_header=api_key, + app_name_header="llama-cpp-ci", + on_behalf_of_header="llama-cpp-ci", + client_type_header="Python", + ) + + target_id = qdc_api.get_target_id(client, args.device) + if target_id is None: + log.error("Could not find QDC target for device %r", args.device) + return 1 + + with tempfile.TemporaryDirectory() as tmpdir: + log.info("Building artifact ...") + zip_path = build_artifact_zip( + args.pkg_dir, Path(tmpdir), + test_mode=args.test, model_url=args.model_url, + ) + log.info("Uploading artifact (%d MB) ...", zip_path.stat().st_size // 1_000_000) + artifact_id = qdc_api.upload_file(client, str(zip_path), ArtifactType.TESTSCRIPT) + + if artifact_id is None: + log.error("Artifact upload failed") + return 1 + + wait_for_capacity(client) + + job_id = qdc_api.submit_job( + public_api_client=client, + target_id=target_id, + job_name="llama.cpp Hexagon tests", + external_job_id=None, + job_type=JobType.AUTOMATED, + job_mode=JobMode.APPLICATION, + timeout=max(1, args.job_timeout // 60), + test_framework=TestFramework.APPIUM, + entry_script=None, + job_artifacts=[artifact_id], + monkey_events=None, + monkey_session_timeout=None, + job_parameters=[JobSubmissionParameter.WIFIENABLED], + ) + if job_id is None: + log.error("Job submission failed") + return 1 + log.info("Job submitted: %s (device=%s)", job_id, args.device) + + try: + job_status = wait_for_job(client, job_id, timeout=args.job_timeout) + except TimeoutError as e: + log.error("%s", e) + write_summary(JobResult(passed=False, tests={}), title=f"QDC Job Timed Out ({args.device})") + return 1 + log.info("Job %s finished: %s", job_id, job_status) + + wait_for_log_upload(client, job_id) + tests, raw_logs, failure_details = fetch_logs_and_parse_tests(client, job_id) + + passed = job_status == JobState.COMPLETED.value.lower() + if tests: + passed = passed and all(tests.values()) + if not passed: + log.error("Job did not complete successfully or tests failed (status=%s)", job_status) + + result = JobResult(passed=passed, tests=tests, raw_logs=raw_logs, failure_details=failure_details) + if args.test == "backend-ops": + title = f"Backend Ops — HTP0 ({args.device})" + elif args.test == "all": + title = f"QDC Tests ({args.device})" + else: + title = f"QDC Test Results ({args.device})" + write_summary(result, title=title) + + return 0 if passed else 1 + + +if __name__ == "__main__": + sys.exit(main()) diff --git a/scripts/snapdragon/qdc/tests/conftest.py b/scripts/snapdragon/qdc/tests/conftest.py new file mode 100644 index 000000000..0fc5b3e5f --- /dev/null +++ b/scripts/snapdragon/qdc/tests/conftest.py @@ -0,0 +1,20 @@ +"""Shared pytest fixtures for QDC on-device test runners.""" + +import os + +import pytest +from appium import webdriver + +from utils import options, write_qdc_log + + +@pytest.fixture(scope="session", autouse=True) +def driver(): + return webdriver.Remote(command_executor="http://127.0.0.1:4723/wd/hub", options=options) + + +def pytest_sessionfinish(session, exitstatus): + xml_path = getattr(session.config.option, "xmlpath", None) or "results.xml" + if os.path.exists(xml_path): + with open(xml_path) as f: + write_qdc_log("results.xml", f.read()) diff --git a/scripts/snapdragon/qdc/tests/run_backend_ops_posix.py b/scripts/snapdragon/qdc/tests/run_backend_ops_posix.py new file mode 100644 index 000000000..958fc0747 --- /dev/null +++ b/scripts/snapdragon/qdc/tests/run_backend_ops_posix.py @@ -0,0 +1,41 @@ +""" +On-device test-backend-ops runner for llama.cpp (HTP0 backend). + +Executed by QDC's Appium test framework on the QDC runner. +The runner has ADB access to the allocated device. +""" + +import os +import sys + +import pytest + +from utils import BIN_PATH, CMD_PREFIX, push_bundle_if_needed, run_adb_command, write_qdc_log + + +@pytest.fixture(scope="session", autouse=True) +def install(driver): + push_bundle_if_needed(f"{BIN_PATH}/test-backend-ops") + + +@pytest.mark.parametrize("type_a", ["mxfp4", "fp16", "q4_0"]) +def test_backend_ops_htp0(type_a): + cmd = f"{CMD_PREFIX} GGML_HEXAGON_HOSTBUF=0 GGML_HEXAGON_EXPERIMENTAL=1 {BIN_PATH}/test-backend-ops -b HTP0 -o MUL_MAT" + if type_a == "q4_0": + cmd += r' -p "^(?=.*type_a=q4_0)(?!.*type_b=f32,m=576,n=512,k=576).*$"' + else: + cmd += f" -p type_a={type_a}" + result = run_adb_command( + cmd, + check=False, + ) + write_qdc_log(f"backend_ops_{type_a}.log", result.stdout or "") + assert result.returncode == 0, f"test-backend-ops type_a={type_a} failed (exit {result.returncode})" + + +if __name__ == "__main__": + ret = pytest.main(["-s", "--junitxml=results.xml", os.path.realpath(__file__)]) + if os.path.exists("results.xml"): + with open("results.xml") as f: + write_qdc_log("results.xml", f.read()) + sys.exit(ret) diff --git a/scripts/snapdragon/qdc/tests/run_bench_tests_posix.py b/scripts/snapdragon/qdc/tests/run_bench_tests_posix.py new file mode 100644 index 000000000..44802c313 --- /dev/null +++ b/scripts/snapdragon/qdc/tests/run_bench_tests_posix.py @@ -0,0 +1,76 @@ +""" +On-device bench and completion test runner for llama.cpp (CPU, GPU, NPU backends). + +Executed by QDC's Appium test framework on the QDC runner. +The runner has ADB access to the allocated device. + +Placeholders replaced at artifact creation time by run_qdc_jobs.py: + <> Direct URL to the GGUF model file (downloaded on-device via curl) +""" + +import os +import subprocess +import sys + +import pytest + +from utils import BIN_PATH, CMD_PREFIX, push_bundle_if_needed, run_adb_command, write_qdc_log + +MODEL_PATH = "/data/local/tmp/model.gguf" +PROMPT = "What is the capital of France?" +CLI_OPTS = "--batch-size 128 -n 128 -no-cnv --seed 42" + + +@pytest.fixture(scope="session", autouse=True) +def install(driver): + push_bundle_if_needed(f"{BIN_PATH}/llama-cli") + + # Skip model download if already present + check = subprocess.run( + ["adb", "shell", f"ls {MODEL_PATH}"], + text=True, stdout=subprocess.PIPE, stderr=subprocess.STDOUT, + ) + if check.returncode != 0: + run_adb_command(f'curl -L -J --output {MODEL_PATH} "<>"') + + +@pytest.mark.parametrize("device,extra_flags", [ + pytest.param("none", "-ctk q8_0 -ctv q8_0", id="cpu"), + pytest.param("GPUOpenCL", "", id="gpu"), + pytest.param("HTP0", "-ctk q8_0 -ctv q8_0", id="npu"), +]) +def test_llama_completion(device, extra_flags): + result = run_adb_command( + f'{CMD_PREFIX} {BIN_PATH}/llama-completion' + f' -m {MODEL_PATH} --device {device} -ngl 99 -t 4 {CLI_OPTS} {extra_flags} -fa on' + f' -p "{PROMPT}"', + check=False, + ) + write_qdc_log(f"llama_completion_{device}.log", result.stdout or "") + assert result.returncode == 0, f"llama-completion {device} failed (exit {result.returncode})" + + +_DEVICE_LOG_NAME = {"none": "cpu", "GPUOpenCL": "gpu", "HTP0": "htp"} + + +@pytest.mark.parametrize("device", [ + pytest.param("none", id="cpu"), + pytest.param("GPUOpenCL", id="gpu"), + pytest.param("HTP0", id="npu"), +]) +def test_llama_bench(device): + result = run_adb_command( + f"{CMD_PREFIX} {BIN_PATH}/llama-bench" + f" -m {MODEL_PATH} --device {device} -ngl 99 --batch-size 128 -t 4 -p 128 -n 32", + check=False, + ) + write_qdc_log(f"llama_bench_{_DEVICE_LOG_NAME[device]}.log", result.stdout or "") + assert result.returncode == 0, f"llama-bench {device} failed (exit {result.returncode})" + + +if __name__ == "__main__": + ret = pytest.main(["-s", "--junitxml=results.xml", os.path.realpath(__file__)]) + if os.path.exists("results.xml"): + with open("results.xml") as f: + write_qdc_log("results.xml", f.read()) + sys.exit(ret) diff --git a/scripts/snapdragon/qdc/tests/test_bench.py b/scripts/snapdragon/qdc/tests/test_bench.py deleted file mode 100644 index 651ab5b71..000000000 --- a/scripts/snapdragon/qdc/tests/test_bench.py +++ /dev/null @@ -1,63 +0,0 @@ -import pytest -import subprocess -import sys - -tmp_path='/data/local/tmp' -pkg_path=f'{tmp_path}/llama.cpp' -lib_path=f'{pkg_path}/lib' -bin_path=f'{pkg_path}/bin' - -model='../gguf/Llama-3.2-1B-Instruct-Q4_0.gguf' -cli_pref=f'cd {pkg_path} && LD_LIBRARY_PATH={lib_path} ADSP_LIBRARY_PATH={lib_path} {bin_path}' - - -def run_cmd(cmd): - p = subprocess.run(cmd, text = True, stdout = subprocess.PIPE, stderr = subprocess.STDOUT) - sys.stdout.write(p.stdout) - assert(p.returncode == 0) - - -@pytest.mark.dependency() -def test_install(): - run_cmd(['adb', 'push', 'llama.cpp', f'{tmp_path}']) - run_cmd(['adb', 'shell', f'chmod 755 {bin_path}/*']) - - -## Basic cli tests -def run_llama_cli(dev, opts): - prompt='what is the most popular cookie in the world?\nPlease provide a very brief bullet point summary.\nBegin your answer with **BEGIN**.' - opts = '--batch-size 128 -n 128 -no-cnv --seed 42 ' + opts - run_cmd(['adb', 'shell', f'{cli_pref}/llama-cli -m {model} --device {dev} -ngl 99 -t 4 {opts} -p "{prompt}"']) - - -@pytest.mark.dependency(depends=['test_install']) -def test_llama_cli_cpu(): - run_llama_cli('none', '-ctk q8_0 -ctv q8_0 -fa on') - - -@pytest.mark.dependency(depends=['test_install']) -def test_llama_cli_gpu(): - run_llama_cli('GPUOpenCL', '-fa on') - - -@pytest.mark.dependency(depends=['test_install']) -def test_llama_cli_npu(): - run_llama_cli('HTP0', '-ctk q8_0 -ctv q8_0 -fa on') - - -## Basic bench tests -def run_llama_bench(dev): - run_cmd(['adb', 'shell', f'{cli_pref}/llama-bench -m {model} --device {dev} -ngl 99 --batch-size 128 -t 4 -p 128 -n 32']) - - -@pytest.mark.dependency(depends=['test_install']) -def test_llama_bench_cpu(): - run_llama_bench('none') - - -def test_llama_bench_gpu(): - run_llama_bench('GPUOpenCL') - - -def test_llama_bench_npu(): - run_llama_bench('HTP0') diff --git a/scripts/snapdragon/qdc/tests/utils.py b/scripts/snapdragon/qdc/tests/utils.py new file mode 100644 index 000000000..00f0f1b2f --- /dev/null +++ b/scripts/snapdragon/qdc/tests/utils.py @@ -0,0 +1,93 @@ +"""Shared helpers for QDC on-device test runners.""" + +import logging +import os +import subprocess +import tempfile + +from appium.options.common import AppiumOptions + +log = logging.getLogger(__name__) + +# --------------------------------------------------------------------------- +# On-device paths +# --------------------------------------------------------------------------- + +BUNDLE_PATH = "/data/local/tmp/llama_cpp_bundle" +QDC_LOGS_PATH = "/data/local/tmp/QDC_logs" +LIB_PATH = f"{BUNDLE_PATH}/lib" +BIN_PATH = f"{BUNDLE_PATH}/bin" +ENV_PREFIX = ( + f"export LD_LIBRARY_PATH={LIB_PATH} && " + f"export ADSP_LIBRARY_PATH={LIB_PATH} && " + f"chmod +x {BIN_PATH}/* &&" +) +CMD_PREFIX = f"cd {BUNDLE_PATH} && {ENV_PREFIX}" + +# --------------------------------------------------------------------------- +# Appium session options +# --------------------------------------------------------------------------- + +options = AppiumOptions() +options.set_capability("automationName", "UiAutomator2") +options.set_capability("platformName", "Android") +options.set_capability("deviceName", os.getenv("ANDROID_DEVICE_VERSION")) + +# --------------------------------------------------------------------------- +# ADB helpers +# --------------------------------------------------------------------------- + + +def run_adb_command(cmd: str, *, check: bool = True) -> subprocess.CompletedProcess: + # Append exit-code sentinel because `adb shell` doesn't reliably propagate + # the on-device exit code (older ADB versions always return 0). + raw = subprocess.run( + ["adb", "shell", f"{cmd}; echo __RC__:$?"], + text=True, stdout=subprocess.PIPE, stderr=subprocess.STDOUT, + ) + stdout = raw.stdout + returncode = raw.returncode + if stdout: + lines = stdout.rstrip("\n").split("\n") + if lines and lines[-1].startswith("__RC__:"): + try: + returncode = int(lines[-1][7:]) + stdout = "\n".join(lines[:-1]) + "\n" + except ValueError: + pass + log.info("%s", stdout) + result = subprocess.CompletedProcess(raw.args, returncode, stdout=stdout) + if check: + assert returncode == 0, f"Command failed (exit {returncode})" + return result + + +def write_qdc_log(filename: str, content: str) -> None: + """Push content as a log file to QDC_LOGS_PATH on the device for QDC log collection.""" + subprocess.run( + ["adb", "shell", f"mkdir -p {QDC_LOGS_PATH}"], + stdout=subprocess.PIPE, stderr=subprocess.STDOUT, + ) + with tempfile.NamedTemporaryFile(mode="w", suffix=".log", delete=False) as f: + f.write(content) + tmp_path = f.name + try: + subprocess.run( + ["adb", "push", tmp_path, f"{QDC_LOGS_PATH}/{filename}"], + stdout=subprocess.PIPE, stderr=subprocess.STDOUT, + ) + finally: + os.unlink(tmp_path) + + +def push_bundle_if_needed(check_binary: str) -> None: + """Push llama_cpp_bundle to the device if check_binary is not already present.""" + result = subprocess.run( + ["adb", "shell", f"ls {check_binary}"], + text=True, stdout=subprocess.PIPE, stderr=subprocess.STDOUT, + ) + if result.returncode != 0: + subprocess.run( + ["adb", "push", "/qdc/appium/llama_cpp_bundle/", "/data/local/tmp"], + text=True, stdout=subprocess.PIPE, stderr=subprocess.STDOUT, + ) diff --git a/ty.toml b/ty.toml index bcd23db9b..a07d7485d 100644 --- a/ty.toml +++ b/ty.toml @@ -1,5 +1,5 @@ [environment] -extra-paths = ["./gguf-py", "./examples/model-conversion/scripts", "./tools/server/tests"] +extra-paths = ["./gguf-py", "./examples/model-conversion/scripts", "./tools/server/tests", "./scripts/snapdragon/qdc/tests"] python-version = "3.10" [rules] @@ -13,6 +13,7 @@ exclude = [ [[overrides]] include = [ "./tools/server/tests/**", + "./scripts/snapdragon/qdc/tests/**", ] [overrides.rules]