Skip to content

Commit

Permalink
ggml-qnn:submit source code of ggml-qnn PR in kantv-ai/kantv#246
Browse files Browse the repository at this point in the history
  • Loading branch information
zhouwg committed Feb 10, 2025
1 parent d7b31a9 commit 30717d7
Show file tree
Hide file tree
Showing 12 changed files with 4,242 additions and 1 deletion.
11 changes: 11 additions & 0 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,17 @@ set(CMAKE_WARN_UNUSED_CLI YES)

set(CMAKE_EXPORT_COMPILE_COMMANDS ON)

set(CMAKE_VERBOSE_MAKEFILE on)

if(CMAKE_SYSTEM_NAME STREQUAL "Android")
add_definitions(-DGGML_USE_QNN)
add_definitions(-D__aarch64__)
add_definitions(-D__linux__)
add_definitions(-DGGML_USE_CPU)
add_definitions(-D__ARM_NEON)
add_definitions(-DGGML_USE_LLAMAFILE)
endif()

if (NOT XCODE AND NOT MSVC AND NOT CMAKE_BUILD_TYPE)
set(CMAKE_BUILD_TYPE Release CACHE STRING "Build type" FORCE)
set_property(CACHE CMAKE_BUILD_TYPE PROPERTY STRINGS "Debug" "Release" "MinSizeRel" "RelWithDebInfo")
Expand Down
5 changes: 5 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
@@ -1,3 +1,8 @@
# About ggml-qnn
pls refer to <a href="https://github.com/zhouwg/kantv/blob/master/README-qnn.md">README-qnn.md</a>.

I made a stupid git operation mistake when submit ggml-qnn source code in this forked project and then caused testcase test-backend-ops failed.<a href="https://github.com/zhouwg/kantv/tree/master">project kantv</a> is the main playground of ggml-qnn and this backend works pretty good on Xiaomi14(Qualcomm Snapdragon Gen 3 equipped Android phone).this backend can be verified with project kantv.

# llama.cpp

![llama](https://user-images.githubusercontent.com/1991296/230134379-7181e485-c521-4d23-a0d6-f7b3b61ba524.png)
Expand Down
202 changes: 202 additions & 0 deletions build-run-android.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,202 @@
# Copyright (c) 2024- KanTV Authors
#!/bin/bash

set -e

PWD=`pwd`
ANDROID_PLATFORM=android-34
ANDROID_NDK=${PWD}/android-ndk-r26c
REMOTE_PATH=/data/local/tmp/

#QNN SDK could be found at:
#https://www.qualcomm.com/developer/software/qualcomm-ai-engine-direct-sdk
#https://qpm.qualcomm.com/#/main/tools/details/qualcomm_ai_engine_direct
#https://developer.qualcomm.com/software/hexagon-dsp-sdk/tools
QNN_SDK_PATH=/opt/qcom/aistack/qairt/2.31.0.250130/

LLAMA_CLI=llama-cli

function dump_vars()
{
echo -e "ANDROID_NDK: ${ANDROID_NDK}"
echo -e "QNN_SDK_PATH: ${QNN_SDK_PATH}"
}


function show_pwd()
{
echo -e "current working path:$(pwd)\n"
}


function check_qnn_sdk()
{
if [ ! -d ${QNN_SDK_PATH} ]; then
echo -e "QNN_SDK_PATH ${QNN_SDK_PATH} not exist, pls check or download it from https://qpm.qualcomm.com/#/main/tools/details/qualcomm_ai_engine_direct...\n"
exit 1
fi
}


function check_and_download_ndk()
{
is_android_ndk_exist=1

if [ ! -d ${ANDROID_NDK} ]; then
is_android_ndk_exist=0
fi

if [ ! -f ${ANDROID_NDK}/build/cmake/android.toolchain.cmake ]; then
is_android_ndk_exist=0
fi

if [ ${is_android_ndk_exist} -eq 0 ]; then

if [ ! -f android-ndk-r26c-linux.zip ]; then
wget --no-config --quiet --show-progress -O android-ndk-r26c-linux.zip https://dl.google.com/android/repository/android-ndk-r26c-linux.zip
fi

unzip android-ndk-r26c-linux.zip

if [ $? -ne 0 ]; then
printf "failed to download android ndk to %s \n" "${ANDROID_NDK}"
exit 1
fi

printf "android ndk saved to ${ANDROID_NDK} \n\n"
else
printf "android ndk already exist:${ANDROID_NDK} \n\n"
fi
}


function build_arm64
{
cmake -H. -B./out/android -DGGML_USE_QNN=ON -DCMAKE_TOOLCHAIN_FILE=$ANDROID_NDK/build/cmake/android.toolchain.cmake -DANDROID_ABI=arm64-v8a -DANDROID_PLATFORM=latest -DCMAKE_C_FLAGS=-march=armv8.7-a -DGGML_QNN=ON -DGGML_QNN_SDK_PATH=${QNN_SDK_PATH}
cd out/android
make -j16
show_pwd

ls -lah bin/${LLAMA_CLI}
/bin/cp -fv bin/${LLAMA_CLI} ../../${LLAMA_CLI}-android
cd -
}


function remove_temp_dir()
{
if [ -d out ]; then
echo "remove out directory in `pwd`"
rm -rf out
fi
}


function check_qnn_libs()
{
#reuse the cached qnn libs on Android phone
adb shell ls ${REMOTE_PATH}/libQnnCpu.so
if [ $? -eq 0 ]; then
printf "QNN libs already exist on Android phone\n"
else
update_qnn_libs
fi
}


function update_qnn_libs()
{
adb push ${QNN_SDK_PATH}/lib/aarch64-android/libQnnSystem.so ${REMOTE_PATH}/
adb push ${QNN_SDK_PATH}/lib/aarch64-android/libQnnCpu.so ${REMOTE_PATH}/
adb push ${QNN_SDK_PATH}/lib/aarch64-android/libQnnGpu.so ${REMOTE_PATH}/

adb push ${QNN_SDK_PATH}/lib/aarch64-android/libQnnHtp.so ${REMOTE_PATH}/
adb push ${QNN_SDK_PATH}/lib/aarch64-android/libQnnHtpNetRunExtensions.so ${REMOTE_PATH}/
adb push ${QNN_SDK_PATH}/lib/aarch64-android/libQnnHtpPrepare.so ${REMOTE_PATH}/
adb push ${QNN_SDK_PATH}/lib/aarch64-android/libQnnHtpV75Stub.so ${REMOTE_PATH}/
adb push ${QNN_SDK_PATH}/lib/hexagon-v75/unsigned/libQnnHtpV75Skel.so ${REMOTE_PATH}/
}


function build_ggml_qnn()
{
show_pwd
check_and_download_ndk
check_qnn_sdk
dump_vars
remove_temp_dir
build_arm64
}


function run_llamacli()
{
check_qnn_libs

adb push ./out/android/bin/*.so ${REMOTE_PATH}/
adb push ${LLAMA_CLI}-android ${REMOTE_PATH}/${LLAMA_CLI}
adb shell chmod +x ${REMOTE_PATH}/${LLAMA_CLI}

adb shell "cd ${REMOTE_PATH} \
&& export LD_LIBRARY_PATH=${REMOTE_PATH} \
&& ${REMOTE_PATH}/${LLAMA_CLI} -mg 2 -m /sdcard/kantv/gemma-2b.Q8_0.gguf -p \"introduce the movie Once Upon a Time in America briefly.\n\""

}

function run_test-backend-ops()
{
check_qnn_libs

adb push ./out/android/bin/*.so ${REMOTE_PATH}/
adb push ./out/android/bin/test-backend-ops ${REMOTE_PATH}/
adb shell chmod +x ${REMOTE_PATH}/test-backend-ops

adb shell "cd ${REMOTE_PATH} \
&& export LD_LIBRARY_PATH=${REMOTE_PATH} \
&& ${REMOTE_PATH}/test-backend-ops test"

}


function show_usage()
{
echo "Usage:"
echo " $0 build"
echo " $0 updateqnnlib"
echo " $0 run"
echo " $0 run_testop"
echo -e "\n\n\n"
}


show_pwd

check_qnn_sdk

if [ $# == 0 ]; then
show_usage
exit 1
elif [ $# == 1 ]; then
if [ "$1" == "-h" ]; then
show_usage
exit 1
elif [ "$1" == "help" ]; then
show_usage
exit 1
elif [ "$1" == "build" ]; then
build_ggml_qnn
exit 0
elif [ "$1" == "run" ]; then
run_llamacli
exit 0
elif [ "$1" == "run_testop" ]; then
run_test-backend-ops
exit 0
elif [ "$1" == "updateqnnlib" ]; then
update_qnn_libs
exit 0
fi
else
show_usage
exit 1
fi
62 changes: 62 additions & 0 deletions build-run-x86.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,62 @@
# Copyright (c) 2024- KanTV Authors
#!/bin/bash

set -e

PWD=`pwd`
LLAMA_CLI=llama-cli

function show_pwd()
{
echo -e "current working path:$(pwd)\n"
}

function build_x86
{
cmake -H. -B./out/x86 -DBUILD_SHARED_LIBS=OFF -DGGML_BACKEND_DL=OFF
cd out/x86
make -j16

ls -lah bin/${LLAMA_CLI}
/bin/cp -fv bin/${LLAMA_CLI} ../../${LLAMA_CLI}-x86
cd -
}


function remove_temp_dir()
{
if [ -d out ]; then
echo "remove out directory in `pwd`"
rm -rf out
fi
}


function show_usage()
{
echo "Usage:"
echo " $0 build"
echo -e "\n\n\n"
}


show_pwd

if [ $# == 0 ]; then
show_usage
exit 1
elif [ $# == 1 ]; then
if [ "$1" == "-h" ]; then
show_usage
exit 1
elif [ "$1" == "help" ]; then
show_usage
exit 1
elif [ "$1" == "build" ]; then
build_x86
exit 0
fi
else
show_usage
exit 1
fi
1 change: 1 addition & 0 deletions ggml/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -187,6 +187,7 @@ option(GGML_OPENCL "ggml: use OpenCL"
option(GGML_OPENCL_PROFILING "ggml: use OpenCL profiling (increases overhead)" OFF)
option(GGML_OPENCL_EMBED_KERNELS "ggml: embed kernels" ON)
option(GGML_OPENCL_USE_ADRENO_KERNELS "ggml: use optimized kernels for Adreno" ON)
option(GGML_QNN "ggml: use QNN" ON)

# toolchain for vulkan-shaders-gen
set (GGML_VULKAN_SHADERS_GEN_TOOLCHAIN "" CACHE FILEPATH "ggml: toolchain file for vulkan-shaders-gen")
Expand Down
81 changes: 81 additions & 0 deletions ggml/include/ggml-qnn.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,81 @@
/*
* Copyright (c) 2024- KanTV Authors
*
* this is new implementation of ggml-qnn(ggml backend of Qualcomm Neural Network), https://github.com/zhouwg/kantv/issues/246
*
* Qualcomm QNN SDK and reference tech guides could be found at:
* https://www.qualcomm.com/developer/software/qualcomm-ai-engine-direct-sdk
* https://qpm.qualcomm.com/#/main/tools/details/qualcomm_ai_engine_direct
* https://developer.qualcomm.com/software/hexagon-dsp-sdk/tools
*
*/

/*
* Copyright (c) 2023-2024 The ggml authors
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to
* deal in the Software without restriction, including without limitation the
* rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
* sell copies of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
* IN THE SOFTWARE.
*/
#pragma once

#include "ggml.h"
#include "ggml-backend.h"

#ifdef __cplusplus
extern "C" {
#endif


#define GGML_QNN_MAX_DEVICES 3
#define GGML_QNN_BACKEND_NAME "qnn"

enum QNNBackend {
QNN_BACKEND_CPU,
QNN_BACKEND_GPU,
QNN_BACKEND_NPU,
QNN_BACKEND_GGML, //"fake" QNN backend for compare performance between QNN backend and cpu backend
};

GGML_BACKEND_API ggml_backend_t ggml_backend_qnn_init(size_t dev_num, const char * qnn_lib_path);

GGML_BACKEND_API bool ggml_backend_is_qnn(ggml_backend_t backend);

GGML_BACKEND_API void ggml_backend_qnn_set_n_threads(ggml_backend_t backend, int thread_counts);

GGML_BACKEND_API int ggml_backend_qnn_get_device_count(void);

GGML_BACKEND_API ggml_backend_reg_t ggml_backend_qnn_reg(void);

inline const char * ggml_backend_qnn_get_devname(size_t dev_num) {
switch (dev_num) {
case QNN_BACKEND_CPU:
return "QNN-CPU";
case QNN_BACKEND_GPU:
return "QNN-GPU";
case QNN_BACKEND_NPU:
return "QNN-NPU";
case QNN_BACKEND_GGML:
return "ggml"; //"fake" QNN backend, used for compare performance between QNN backend and original GGML
default:
return "unknown";
}
}

#ifdef __cplusplus
}
#endif
1 change: 1 addition & 0 deletions ggml/src/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -329,6 +329,7 @@ ggml_add_backend(RPC)
ggml_add_backend(SYCL)
ggml_add_backend(Vulkan)
ggml_add_backend(OpenCL)
ggml_add_backend(QNN)

foreach (target ggml-base ggml)
target_include_directories(${target} PUBLIC $<BUILD_INTERFACE:${CMAKE_CURRENT_SOURCE_DIR}/../include> $<INSTALL_INTERFACE:include>)
Expand Down
Loading

0 comments on commit 30717d7

Please sign in to comment.