ggml-qnn:submit source code of ggml-qnn PR in kantv-ai/kantv#246

zhouwg · Feb 10, 2025 · 30717d7 · 30717d7
1 parent d7b31a9
commit 30717d7
Show file tree

Hide file tree

Showing 12 changed files with 4,242 additions and 1 deletion.
diff --git a/CMakeLists.txt b/CMakeLists.txt
@@ -7,6 +7,17 @@ set(CMAKE_WARN_UNUSED_CLI YES)
 
 set(CMAKE_EXPORT_COMPILE_COMMANDS ON)
 
+set(CMAKE_VERBOSE_MAKEFILE on)
+
+if(CMAKE_SYSTEM_NAME STREQUAL "Android")
+add_definitions(-DGGML_USE_QNN)
+add_definitions(-D__aarch64__)
+add_definitions(-D__linux__)
+add_definitions(-DGGML_USE_CPU)
+add_definitions(-D__ARM_NEON)
+add_definitions(-DGGML_USE_LLAMAFILE)
+endif()
+
 if (NOT XCODE AND NOT MSVC AND NOT CMAKE_BUILD_TYPE)
     set(CMAKE_BUILD_TYPE Release CACHE STRING "Build type" FORCE)
     set_property(CACHE CMAKE_BUILD_TYPE PROPERTY STRINGS "Debug" "Release" "MinSizeRel" "RelWithDebInfo")

diff --git a/README.md b/README.md
@@ -1,3 +1,8 @@
+# About ggml-qnn
+pls refer to <a href="https://github.com/zhouwg/kantv/blob/master/README-qnn.md">README-qnn.md</a>.
+
+I made a stupid git operation mistake when submit ggml-qnn source code in this forked project and then caused testcase test-backend-ops failed.<a href="https://github.com/zhouwg/kantv/tree/master">project kantv</a> is the main playground of ggml-qnn and this backend works pretty good on Xiaomi14(Qualcomm Snapdragon Gen 3 equipped Android phone).this backend can be verified with project kantv.
+
 # llama.cpp
 
 ![llama](https://user-images.githubusercontent.com/1991296/230134379-7181e485-c521-4d23-a0d6-f7b3b61ba524.png)

diff --git a/build-run-android.sh b/build-run-android.sh
@@ -0,0 +1,202 @@
+# Copyright (c) 2024- KanTV Authors
+#!/bin/bash
+
+set -e
+
+PWD=`pwd`
+ANDROID_PLATFORM=android-34
+ANDROID_NDK=${PWD}/android-ndk-r26c
+REMOTE_PATH=/data/local/tmp/
+
+#QNN SDK could be found at:
+#https://www.qualcomm.com/developer/software/qualcomm-ai-engine-direct-sdk
+#https://qpm.qualcomm.com/#/main/tools/details/qualcomm_ai_engine_direct
+#https://developer.qualcomm.com/software/hexagon-dsp-sdk/tools
+QNN_SDK_PATH=/opt/qcom/aistack/qairt/2.31.0.250130/
+
+LLAMA_CLI=llama-cli
+
+function dump_vars()
+{
+    echo -e "ANDROID_NDK:          ${ANDROID_NDK}"
+    echo -e "QNN_SDK_PATH:         ${QNN_SDK_PATH}"
+}
+
+
+function show_pwd()
+{
+    echo -e "current working path:$(pwd)\n"
+}
+
+
+function check_qnn_sdk()
+{
+    if [ ! -d ${QNN_SDK_PATH} ]; then
+        echo -e "QNN_SDK_PATH ${QNN_SDK_PATH} not exist, pls check or download it from https://qpm.qualcomm.com/#/main/tools/details/qualcomm_ai_engine_direct...\n"
+        exit 1
+    fi
+}
+
+
+function check_and_download_ndk()
+{
+    is_android_ndk_exist=1
+
+    if [ ! -d ${ANDROID_NDK} ]; then
+        is_android_ndk_exist=0
+    fi
+
+    if [ ! -f ${ANDROID_NDK}/build/cmake/android.toolchain.cmake ]; then
+        is_android_ndk_exist=0
+    fi
+
+    if [ ${is_android_ndk_exist} -eq 0 ]; then
+
+        if [ ! -f android-ndk-r26c-linux.zip ]; then
+            wget --no-config --quiet --show-progress -O android-ndk-r26c-linux.zip  https://dl.google.com/android/repository/android-ndk-r26c-linux.zip
+        fi
+
+        unzip android-ndk-r26c-linux.zip
+
+        if [ $? -ne 0 ]; then
+            printf "failed to download android ndk to %s \n" "${ANDROID_NDK}"
+            exit 1
+        fi
+
+        printf "android ndk saved to ${ANDROID_NDK} \n\n"
+    else
+        printf "android ndk already exist:${ANDROID_NDK} \n\n"
+    fi
+}
+
+
+function build_arm64
+{
+    cmake -H. -B./out/android -DGGML_USE_QNN=ON -DCMAKE_TOOLCHAIN_FILE=$ANDROID_NDK/build/cmake/android.toolchain.cmake -DANDROID_ABI=arm64-v8a -DANDROID_PLATFORM=latest -DCMAKE_C_FLAGS=-march=armv8.7-a -DGGML_QNN=ON -DGGML_QNN_SDK_PATH=${QNN_SDK_PATH}
+    cd out/android
+    make -j16
+    show_pwd
+
+    ls -lah bin/${LLAMA_CLI}
+    /bin/cp -fv bin/${LLAMA_CLI} ../../${LLAMA_CLI}-android
+    cd -
+}
+
+
+function remove_temp_dir()
+{
+    if [ -d out ]; then
+        echo "remove out directory in `pwd`"
+        rm -rf out
+    fi
+}
+
+
+function check_qnn_libs()
+{
+    #reuse the cached qnn libs on Android phone
+    adb shell ls ${REMOTE_PATH}/libQnnCpu.so
+    if [ $? -eq 0 ]; then
+        printf "QNN libs already exist on Android phone\n"
+    else
+        update_qnn_libs
+    fi
+}
+
+
+function update_qnn_libs()
+{
+        adb push ${QNN_SDK_PATH}/lib/aarch64-android/libQnnSystem.so              ${REMOTE_PATH}/
+        adb push ${QNN_SDK_PATH}/lib/aarch64-android/libQnnCpu.so                 ${REMOTE_PATH}/
+        adb push ${QNN_SDK_PATH}/lib/aarch64-android/libQnnGpu.so                 ${REMOTE_PATH}/
+
+        adb push ${QNN_SDK_PATH}/lib/aarch64-android/libQnnHtp.so                 ${REMOTE_PATH}/
+        adb push ${QNN_SDK_PATH}/lib/aarch64-android/libQnnHtpNetRunExtensions.so ${REMOTE_PATH}/
+        adb push ${QNN_SDK_PATH}/lib/aarch64-android/libQnnHtpPrepare.so          ${REMOTE_PATH}/
+        adb push ${QNN_SDK_PATH}/lib/aarch64-android/libQnnHtpV75Stub.so          ${REMOTE_PATH}/
+        adb push ${QNN_SDK_PATH}/lib/hexagon-v75/unsigned/libQnnHtpV75Skel.so     ${REMOTE_PATH}/
+}
+
+
+function build_ggml_qnn()
+{
+    show_pwd
+    check_and_download_ndk
+    check_qnn_sdk
+    dump_vars
+    remove_temp_dir
+    build_arm64
+}
+
+
+function run_llamacli()
+{
+    check_qnn_libs
+
+    adb push ./out/android/bin/*.so ${REMOTE_PATH}/
+    adb push ${LLAMA_CLI}-android ${REMOTE_PATH}/${LLAMA_CLI}
+    adb shell chmod +x ${REMOTE_PATH}/${LLAMA_CLI}
+
+    adb shell "cd ${REMOTE_PATH} \
+               && export LD_LIBRARY_PATH=${REMOTE_PATH} \
+               && ${REMOTE_PATH}/${LLAMA_CLI} -mg 2 -m /sdcard/kantv/gemma-2b.Q8_0.gguf -p \"introduce the movie Once Upon a Time in America briefly.\n\""
+
+}
+
+function run_test-backend-ops()
+{
+    check_qnn_libs
+
+    adb push ./out/android/bin/*.so ${REMOTE_PATH}/
+    adb push ./out/android/bin/test-backend-ops ${REMOTE_PATH}/
+    adb shell chmod +x ${REMOTE_PATH}/test-backend-ops
+
+    adb shell "cd ${REMOTE_PATH} \
+               && export LD_LIBRARY_PATH=${REMOTE_PATH} \
+               && ${REMOTE_PATH}/test-backend-ops test"
+
+}
+
+
+function show_usage()
+{
+    echo "Usage:"
+    echo "  $0 build"
+    echo "  $0 updateqnnlib"
+    echo "  $0 run"
+    echo "  $0 run_testop"
+    echo -e "\n\n\n"
+}
+
+
+show_pwd
+
+check_qnn_sdk
+
+if [ $# == 0 ]; then
+    show_usage
+    exit 1
+elif [ $# == 1 ]; then
+    if [ "$1" == "-h" ]; then
+        show_usage
+        exit 1
+    elif [ "$1" == "help" ]; then
+        show_usage
+        exit 1
+    elif [ "$1" == "build" ]; then
+        build_ggml_qnn
+        exit 0
+    elif [ "$1" == "run" ]; then
+        run_llamacli
+        exit 0
+    elif [ "$1" == "run_testop" ]; then
+        run_test-backend-ops
+        exit 0
+    elif [ "$1" == "updateqnnlib" ]; then
+        update_qnn_libs
+        exit 0
+    fi
+else
+    show_usage
+    exit 1
+fi
diff --git a/build-run-x86.sh b/build-run-x86.sh
@@ -0,0 +1,62 @@
+# Copyright (c) 2024- KanTV Authors
+#!/bin/bash
+
+set -e
+
+PWD=`pwd`
+LLAMA_CLI=llama-cli
+
+function show_pwd()
+{
+    echo -e "current working path:$(pwd)\n"
+}
+
+function build_x86
+{
+    cmake -H. -B./out/x86 -DBUILD_SHARED_LIBS=OFF -DGGML_BACKEND_DL=OFF
+    cd out/x86
+    make -j16
+
+    ls -lah bin/${LLAMA_CLI}
+    /bin/cp -fv bin/${LLAMA_CLI} ../../${LLAMA_CLI}-x86
+    cd -
+}
+
+
+function remove_temp_dir()
+{
+    if [ -d out ]; then
+        echo "remove out directory in `pwd`"
+        rm -rf out
+    fi
+}
+
+
+function show_usage()
+{
+    echo "Usage:"
+    echo "  $0 build"
+    echo -e "\n\n\n"
+}
+
+
+show_pwd
+
+if [ $# == 0 ]; then
+    show_usage
+    exit 1
+elif [ $# == 1 ]; then
+    if [ "$1" == "-h" ]; then
+        show_usage
+        exit 1
+    elif [ "$1" == "help" ]; then
+        show_usage
+        exit 1
+    elif [ "$1" == "build" ]; then
+        build_x86
+        exit 0
+    fi
+else
+    show_usage
+    exit 1
+fi
diff --git a/ggml/CMakeLists.txt b/ggml/CMakeLists.txt
@@ -187,6 +187,7 @@ option(GGML_OPENCL                          "ggml: use OpenCL"
 option(GGML_OPENCL_PROFILING                "ggml: use OpenCL profiling (increases overhead)" OFF)
 option(GGML_OPENCL_EMBED_KERNELS            "ggml: embed kernels"                             ON)
 option(GGML_OPENCL_USE_ADRENO_KERNELS       "ggml: use optimized kernels for Adreno"          ON)
+option(GGML_QNN                             "ggml: use QNN"                                   ON)
 
 # toolchain for vulkan-shaders-gen
 set   (GGML_VULKAN_SHADERS_GEN_TOOLCHAIN "" CACHE FILEPATH "ggml: toolchain file for vulkan-shaders-gen")

diff --git a/ggml/include/ggml-qnn.h b/ggml/include/ggml-qnn.h
@@ -0,0 +1,81 @@
+ /*
+ * Copyright (c) 2024- KanTV Authors
+ *
+ * this is new implementation of ggml-qnn(ggml backend of Qualcomm Neural Network), https://github.com/zhouwg/kantv/issues/246
+ *
+ * Qualcomm QNN SDK and reference tech guides could be found at:
+ * https://www.qualcomm.com/developer/software/qualcomm-ai-engine-direct-sdk
+ * https://qpm.qualcomm.com/#/main/tools/details/qualcomm_ai_engine_direct
+ * https://developer.qualcomm.com/software/hexagon-dsp-sdk/tools
+ *
+ */
+
+ /*
+ * Copyright (c) 2023-2024 The ggml authors
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ */
+#pragma once
+
+#include "ggml.h"
+#include "ggml-backend.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+
+#define GGML_QNN_MAX_DEVICES    3
+#define GGML_QNN_BACKEND_NAME   "qnn"
+
+enum QNNBackend {
+    QNN_BACKEND_CPU,
+    QNN_BACKEND_GPU,
+    QNN_BACKEND_NPU,
+    QNN_BACKEND_GGML, //"fake" QNN backend for compare performance between QNN backend and cpu backend
+};
+
+GGML_BACKEND_API ggml_backend_t ggml_backend_qnn_init(size_t dev_num, const char * qnn_lib_path);
+
+GGML_BACKEND_API bool           ggml_backend_is_qnn(ggml_backend_t backend);
+
+GGML_BACKEND_API void           ggml_backend_qnn_set_n_threads(ggml_backend_t backend, int thread_counts);
+
+GGML_BACKEND_API int            ggml_backend_qnn_get_device_count(void);
+
+GGML_BACKEND_API ggml_backend_reg_t ggml_backend_qnn_reg(void);
+
+inline const char * ggml_backend_qnn_get_devname(size_t dev_num) {
+    switch (dev_num) {
+        case QNN_BACKEND_CPU:
+            return "QNN-CPU";
+        case QNN_BACKEND_GPU:
+            return "QNN-GPU";
+        case QNN_BACKEND_NPU:
+            return "QNN-NPU";
+        case QNN_BACKEND_GGML:
+            return "ggml"; //"fake" QNN backend, used for compare performance between QNN backend and original GGML
+        default:
+            return "unknown";
+    }
+}
+
+#ifdef __cplusplus
+}
+#endif
diff --git a/ggml/src/CMakeLists.txt b/ggml/src/CMakeLists.txt
@@ -329,6 +329,7 @@ ggml_add_backend(RPC)
 ggml_add_backend(SYCL)
 ggml_add_backend(Vulkan)
 ggml_add_backend(OpenCL)
+ggml_add_backend(QNN)
 
 foreach (target ggml-base ggml)
     target_include_directories(${target} PUBLIC    $<BUILD_INTERFACE:${CMAKE_CURRENT_SOURCE_DIR}/../include> $<INSTALL_INTERFACE:include>)