Enable testing on Snapdragon devices (#21051)
* Add the tests that we want to run on external CI * remove extra files * Fixes python issues, reove the deadlock on CI * remove unecessary changes * use override to ty.toml * fix pre-commit and try tests with secret in external repo not upstream * skip if key is unavailable * Fix feedback * switch hexagon to snapdragon * cleanup * fix secrets * remove the copyrights at the top of the files
This commit is contained in:
@@ -0,0 +1,20 @@
|
||||
"""Shared pytest fixtures for QDC on-device test runners."""
|
||||
|
||||
import os
|
||||
|
||||
import pytest
|
||||
from appium import webdriver
|
||||
|
||||
from utils import options, write_qdc_log
|
||||
|
||||
|
||||
@pytest.fixture(scope="session", autouse=True)
|
||||
def driver():
|
||||
return webdriver.Remote(command_executor="http://127.0.0.1:4723/wd/hub", options=options)
|
||||
|
||||
|
||||
def pytest_sessionfinish(session, exitstatus):
|
||||
xml_path = getattr(session.config.option, "xmlpath", None) or "results.xml"
|
||||
if os.path.exists(xml_path):
|
||||
with open(xml_path) as f:
|
||||
write_qdc_log("results.xml", f.read())
|
||||
@@ -0,0 +1,41 @@
|
||||
"""
|
||||
On-device test-backend-ops runner for llama.cpp (HTP0 backend).
|
||||
|
||||
Executed by QDC's Appium test framework on the QDC runner.
|
||||
The runner has ADB access to the allocated device.
|
||||
"""
|
||||
|
||||
import os
|
||||
import sys
|
||||
|
||||
import pytest
|
||||
|
||||
from utils import BIN_PATH, CMD_PREFIX, push_bundle_if_needed, run_adb_command, write_qdc_log
|
||||
|
||||
|
||||
@pytest.fixture(scope="session", autouse=True)
|
||||
def install(driver):
|
||||
push_bundle_if_needed(f"{BIN_PATH}/test-backend-ops")
|
||||
|
||||
|
||||
@pytest.mark.parametrize("type_a", ["mxfp4", "fp16", "q4_0"])
|
||||
def test_backend_ops_htp0(type_a):
|
||||
cmd = f"{CMD_PREFIX} GGML_HEXAGON_HOSTBUF=0 GGML_HEXAGON_EXPERIMENTAL=1 {BIN_PATH}/test-backend-ops -b HTP0 -o MUL_MAT"
|
||||
if type_a == "q4_0":
|
||||
cmd += r' -p "^(?=.*type_a=q4_0)(?!.*type_b=f32,m=576,n=512,k=576).*$"'
|
||||
else:
|
||||
cmd += f" -p type_a={type_a}"
|
||||
result = run_adb_command(
|
||||
cmd,
|
||||
check=False,
|
||||
)
|
||||
write_qdc_log(f"backend_ops_{type_a}.log", result.stdout or "")
|
||||
assert result.returncode == 0, f"test-backend-ops type_a={type_a} failed (exit {result.returncode})"
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
ret = pytest.main(["-s", "--junitxml=results.xml", os.path.realpath(__file__)])
|
||||
if os.path.exists("results.xml"):
|
||||
with open("results.xml") as f:
|
||||
write_qdc_log("results.xml", f.read())
|
||||
sys.exit(ret)
|
||||
@@ -0,0 +1,76 @@
|
||||
"""
|
||||
On-device bench and completion test runner for llama.cpp (CPU, GPU, NPU backends).
|
||||
|
||||
Executed by QDC's Appium test framework on the QDC runner.
|
||||
The runner has ADB access to the allocated device.
|
||||
|
||||
Placeholders replaced at artifact creation time by run_qdc_jobs.py:
|
||||
<<MODEL_URL>> Direct URL to the GGUF model file (downloaded on-device via curl)
|
||||
"""
|
||||
|
||||
import os
|
||||
import subprocess
|
||||
import sys
|
||||
|
||||
import pytest
|
||||
|
||||
from utils import BIN_PATH, CMD_PREFIX, push_bundle_if_needed, run_adb_command, write_qdc_log
|
||||
|
||||
MODEL_PATH = "/data/local/tmp/model.gguf"
|
||||
PROMPT = "What is the capital of France?"
|
||||
CLI_OPTS = "--batch-size 128 -n 128 -no-cnv --seed 42"
|
||||
|
||||
|
||||
@pytest.fixture(scope="session", autouse=True)
|
||||
def install(driver):
|
||||
push_bundle_if_needed(f"{BIN_PATH}/llama-cli")
|
||||
|
||||
# Skip model download if already present
|
||||
check = subprocess.run(
|
||||
["adb", "shell", f"ls {MODEL_PATH}"],
|
||||
text=True, stdout=subprocess.PIPE, stderr=subprocess.STDOUT,
|
||||
)
|
||||
if check.returncode != 0:
|
||||
run_adb_command(f'curl -L -J --output {MODEL_PATH} "<<MODEL_URL>>"')
|
||||
|
||||
|
||||
@pytest.mark.parametrize("device,extra_flags", [
|
||||
pytest.param("none", "-ctk q8_0 -ctv q8_0", id="cpu"),
|
||||
pytest.param("GPUOpenCL", "", id="gpu"),
|
||||
pytest.param("HTP0", "-ctk q8_0 -ctv q8_0", id="npu"),
|
||||
])
|
||||
def test_llama_completion(device, extra_flags):
|
||||
result = run_adb_command(
|
||||
f'{CMD_PREFIX} {BIN_PATH}/llama-completion'
|
||||
f' -m {MODEL_PATH} --device {device} -ngl 99 -t 4 {CLI_OPTS} {extra_flags} -fa on'
|
||||
f' -p "{PROMPT}"',
|
||||
check=False,
|
||||
)
|
||||
write_qdc_log(f"llama_completion_{device}.log", result.stdout or "")
|
||||
assert result.returncode == 0, f"llama-completion {device} failed (exit {result.returncode})"
|
||||
|
||||
|
||||
_DEVICE_LOG_NAME = {"none": "cpu", "GPUOpenCL": "gpu", "HTP0": "htp"}
|
||||
|
||||
|
||||
@pytest.mark.parametrize("device", [
|
||||
pytest.param("none", id="cpu"),
|
||||
pytest.param("GPUOpenCL", id="gpu"),
|
||||
pytest.param("HTP0", id="npu"),
|
||||
])
|
||||
def test_llama_bench(device):
|
||||
result = run_adb_command(
|
||||
f"{CMD_PREFIX} {BIN_PATH}/llama-bench"
|
||||
f" -m {MODEL_PATH} --device {device} -ngl 99 --batch-size 128 -t 4 -p 128 -n 32",
|
||||
check=False,
|
||||
)
|
||||
write_qdc_log(f"llama_bench_{_DEVICE_LOG_NAME[device]}.log", result.stdout or "")
|
||||
assert result.returncode == 0, f"llama-bench {device} failed (exit {result.returncode})"
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
ret = pytest.main(["-s", "--junitxml=results.xml", os.path.realpath(__file__)])
|
||||
if os.path.exists("results.xml"):
|
||||
with open("results.xml") as f:
|
||||
write_qdc_log("results.xml", f.read())
|
||||
sys.exit(ret)
|
||||
@@ -1,63 +0,0 @@
|
||||
import pytest
|
||||
import subprocess
|
||||
import sys
|
||||
|
||||
tmp_path='/data/local/tmp'
|
||||
pkg_path=f'{tmp_path}/llama.cpp'
|
||||
lib_path=f'{pkg_path}/lib'
|
||||
bin_path=f'{pkg_path}/bin'
|
||||
|
||||
model='../gguf/Llama-3.2-1B-Instruct-Q4_0.gguf'
|
||||
cli_pref=f'cd {pkg_path} && LD_LIBRARY_PATH={lib_path} ADSP_LIBRARY_PATH={lib_path} {bin_path}'
|
||||
|
||||
|
||||
def run_cmd(cmd):
|
||||
p = subprocess.run(cmd, text = True, stdout = subprocess.PIPE, stderr = subprocess.STDOUT)
|
||||
sys.stdout.write(p.stdout)
|
||||
assert(p.returncode == 0)
|
||||
|
||||
|
||||
@pytest.mark.dependency()
|
||||
def test_install():
|
||||
run_cmd(['adb', 'push', 'llama.cpp', f'{tmp_path}'])
|
||||
run_cmd(['adb', 'shell', f'chmod 755 {bin_path}/*'])
|
||||
|
||||
|
||||
## Basic cli tests
|
||||
def run_llama_cli(dev, opts):
|
||||
prompt='what is the most popular cookie in the world?\nPlease provide a very brief bullet point summary.\nBegin your answer with **BEGIN**.'
|
||||
opts = '--batch-size 128 -n 128 -no-cnv --seed 42 ' + opts
|
||||
run_cmd(['adb', 'shell', f'{cli_pref}/llama-cli -m {model} --device {dev} -ngl 99 -t 4 {opts} -p "{prompt}"'])
|
||||
|
||||
|
||||
@pytest.mark.dependency(depends=['test_install'])
|
||||
def test_llama_cli_cpu():
|
||||
run_llama_cli('none', '-ctk q8_0 -ctv q8_0 -fa on')
|
||||
|
||||
|
||||
@pytest.mark.dependency(depends=['test_install'])
|
||||
def test_llama_cli_gpu():
|
||||
run_llama_cli('GPUOpenCL', '-fa on')
|
||||
|
||||
|
||||
@pytest.mark.dependency(depends=['test_install'])
|
||||
def test_llama_cli_npu():
|
||||
run_llama_cli('HTP0', '-ctk q8_0 -ctv q8_0 -fa on')
|
||||
|
||||
|
||||
## Basic bench tests
|
||||
def run_llama_bench(dev):
|
||||
run_cmd(['adb', 'shell', f'{cli_pref}/llama-bench -m {model} --device {dev} -ngl 99 --batch-size 128 -t 4 -p 128 -n 32'])
|
||||
|
||||
|
||||
@pytest.mark.dependency(depends=['test_install'])
|
||||
def test_llama_bench_cpu():
|
||||
run_llama_bench('none')
|
||||
|
||||
|
||||
def test_llama_bench_gpu():
|
||||
run_llama_bench('GPUOpenCL')
|
||||
|
||||
|
||||
def test_llama_bench_npu():
|
||||
run_llama_bench('HTP0')
|
||||
@@ -0,0 +1,93 @@
|
||||
"""Shared helpers for QDC on-device test runners."""
|
||||
|
||||
import logging
|
||||
import os
|
||||
import subprocess
|
||||
import tempfile
|
||||
|
||||
from appium.options.common import AppiumOptions
|
||||
|
||||
log = logging.getLogger(__name__)
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# On-device paths
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
BUNDLE_PATH = "/data/local/tmp/llama_cpp_bundle"
|
||||
QDC_LOGS_PATH = "/data/local/tmp/QDC_logs"
|
||||
LIB_PATH = f"{BUNDLE_PATH}/lib"
|
||||
BIN_PATH = f"{BUNDLE_PATH}/bin"
|
||||
ENV_PREFIX = (
|
||||
f"export LD_LIBRARY_PATH={LIB_PATH} && "
|
||||
f"export ADSP_LIBRARY_PATH={LIB_PATH} && "
|
||||
f"chmod +x {BIN_PATH}/* &&"
|
||||
)
|
||||
CMD_PREFIX = f"cd {BUNDLE_PATH} && {ENV_PREFIX}"
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Appium session options
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
options = AppiumOptions()
|
||||
options.set_capability("automationName", "UiAutomator2")
|
||||
options.set_capability("platformName", "Android")
|
||||
options.set_capability("deviceName", os.getenv("ANDROID_DEVICE_VERSION"))
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# ADB helpers
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
def run_adb_command(cmd: str, *, check: bool = True) -> subprocess.CompletedProcess:
|
||||
# Append exit-code sentinel because `adb shell` doesn't reliably propagate
|
||||
# the on-device exit code (older ADB versions always return 0).
|
||||
raw = subprocess.run(
|
||||
["adb", "shell", f"{cmd}; echo __RC__:$?"],
|
||||
text=True, stdout=subprocess.PIPE, stderr=subprocess.STDOUT,
|
||||
)
|
||||
stdout = raw.stdout
|
||||
returncode = raw.returncode
|
||||
if stdout:
|
||||
lines = stdout.rstrip("\n").split("\n")
|
||||
if lines and lines[-1].startswith("__RC__:"):
|
||||
try:
|
||||
returncode = int(lines[-1][7:])
|
||||
stdout = "\n".join(lines[:-1]) + "\n"
|
||||
except ValueError:
|
||||
pass
|
||||
log.info("%s", stdout)
|
||||
result = subprocess.CompletedProcess(raw.args, returncode, stdout=stdout)
|
||||
if check:
|
||||
assert returncode == 0, f"Command failed (exit {returncode})"
|
||||
return result
|
||||
|
||||
|
||||
def write_qdc_log(filename: str, content: str) -> None:
|
||||
"""Push content as a log file to QDC_LOGS_PATH on the device for QDC log collection."""
|
||||
subprocess.run(
|
||||
["adb", "shell", f"mkdir -p {QDC_LOGS_PATH}"],
|
||||
stdout=subprocess.PIPE, stderr=subprocess.STDOUT,
|
||||
)
|
||||
with tempfile.NamedTemporaryFile(mode="w", suffix=".log", delete=False) as f:
|
||||
f.write(content)
|
||||
tmp_path = f.name
|
||||
try:
|
||||
subprocess.run(
|
||||
["adb", "push", tmp_path, f"{QDC_LOGS_PATH}/{filename}"],
|
||||
stdout=subprocess.PIPE, stderr=subprocess.STDOUT,
|
||||
)
|
||||
finally:
|
||||
os.unlink(tmp_path)
|
||||
|
||||
|
||||
def push_bundle_if_needed(check_binary: str) -> None:
|
||||
"""Push llama_cpp_bundle to the device if check_binary is not already present."""
|
||||
result = subprocess.run(
|
||||
["adb", "shell", f"ls {check_binary}"],
|
||||
text=True, stdout=subprocess.PIPE, stderr=subprocess.STDOUT,
|
||||
)
|
||||
if result.returncode != 0:
|
||||
subprocess.run(
|
||||
["adb", "push", "/qdc/appium/llama_cpp_bundle/", "/data/local/tmp"],
|
||||
text=True, stdout=subprocess.PIPE, stderr=subprocess.STDOUT,
|
||||
)
|
||||
Reference in New Issue
Block a user