microsoft
diff --git a/‎mobile/examples/phi-3/android/app/build.gradle.kts
+9-3 b/‎mobile/examples/phi-3/android/app/build.gradle.kts
+9-3
diff --git a/‎mobile/examples/phi-3/android/app/libs/onnxruntime-android-qnn-1.20.0.aar
6.37 MB b/‎mobile/examples/phi-3/android/app/libs/onnxruntime-android-qnn-1.20.0.aar
6.37 MB
diff --git a/‎mobile/examples/phi-3/android/app/libs/onnxruntime-genai-android-0.5.0-dev.aar
7.7 MB b/‎mobile/examples/phi-3/android/app/libs/onnxruntime-genai-android-0.5.0-dev.aar
7.7 MB
diff --git a/‎mobile/examples/phi-3/android/app/src/main/AndroidManifest.xml
+3 b/‎mobile/examples/phi-3/android/app/src/main/AndroidManifest.xml
+3
diff --git a/‎mobile/examples/phi-3/android/app/src/main/java/ai/onnxruntime/genai/demo/MainActivity.java
+73-7 b/‎mobile/examples/phi-3/android/app/src/main/java/ai/onnxruntime/genai/demo/MainActivity.java
+73-7
@@ -17,7 +17,8 @@ android {
 
         ndk {
             //noinspection ChromeOsAbiSupport
-            abiFilters += listOf("arm64-v8a", "x86_64")
+            //abiFilters += listOf("arm64-v8a", "x86_64")
+            abiFilters += listOf("arm64-v8a")
         }
     }
 
@@ -39,6 +40,9 @@ android {
     buildFeatures {
         viewBinding = true
     }
+
+    // set this so QNN libs will show up in nativeLibraryDir
+    packaging.jniLibs.useLegacyPackaging = true
 }
 
 dependencies {
@@ -51,7 +55,9 @@ dependencies {
     androidTestImplementation("androidx.test.espresso:espresso-core:3.5.1")
 
     // ONNX Runtime with GenAI
-    implementation("com.microsoft.onnxruntime:onnxruntime-android:latest.release")
-    implementation(files("libs/onnxruntime-genai-android-0.4.0-dev.aar"))
+    //implementation("com.microsoft.onnxruntime:onnxruntime-android:latest.release")
+    implementation(files("libs/onnxruntime-android-qnn-1.20.0.aar"))
+    implementation(files("libs/onnxruntime-genai-android-0.5.0-dev.aar"))
+    implementation("com.qualcomm.qti:qnn-runtime:2.27.0")
 
 }
@@ -24,6 +24,9 @@
                 <category android:name="android.intent.category.LAUNCHER" />
             </intent-filter>
         </activity>
+        <uses-native-library
+            android:name="libcdsprpc.so"
+            android:required="false" />
     </application>
 
 </manifest>
@@ -5,6 +5,7 @@
 import android.app.Dialog;
 import android.content.Context;
 import android.os.Bundle;
+import android.system.ErrnoException;
 import android.text.method.ScrollingMovementMethod;
 import android.util.Log;
 import android.util.Pair;
@@ -17,6 +18,10 @@
 import android.widget.Toast;
 
 import java.io.File;
+import java.nio.ByteBuffer;
+import java.nio.ByteOrder;
+import java.nio.FloatBuffer;
+import java.nio.LongBuffer;
 import java.util.ArrayList;
 import java.util.Arrays;
 import java.util.List;
@@ -28,6 +33,7 @@
 import ai.onnxruntime.genai.Generator;
 import ai.onnxruntime.genai.GeneratorParams;
 import ai.onnxruntime.genai.Sequences;
+import ai.onnxruntime.genai.Tensor;
 import ai.onnxruntime.genai.TokenizerStream;
 import ai.onnxruntime.genai.demo.databinding.ActivityMainBinding;
 import ai.onnxruntime.genai.Model;
@@ -45,7 +51,7 @@ public class MainActivity extends AppCompatActivity implements Consumer<String>
     private TextView progressText;
     private ImageButton settingsButton;
     private static final String TAG = "genai.demo.MainActivity";
-    private int maxLength = 100;
+    private int maxLength = 256;
     private float lengthPenalty = 1.0f;
 
     private static boolean fileExists(Context context, String fileName) {
@@ -55,6 +61,14 @@ private static boolean fileExists(Context context, String fileName) {
 
     @Override
     protected void onCreate(Bundle savedInstanceState) {
+        try {
+            // set ADSP_LIBRARY_PATH, QNN-specific
+            String adspLibraryPath = getApplicationContext().getApplicationInfo().nativeLibraryDir;
+            android.system.Os.setenv("ADSP_LIBRARY_PATH", adspLibraryPath, true);
+        } catch (ErrnoException e) {
+            throw new RuntimeException(e);
+        }
+
         super.onCreate(savedInstanceState);
 
         binding = ActivityMainBinding.inflate(getLayoutInflater());
@@ -69,8 +83,8 @@ protected void onCreate(Bundle savedInstanceState) {
 
         // Trigger the download operation when the application is created
         try {
-            downloadModels(
-                    getApplicationContext());
+            createModelFromPath("/data/local/tmp/phi3.5_qnn_qc/phi3.5-split-qnn-qc");
+            //downloadModels(getApplicationContext());
         } catch (GenAIException e) {
             throw new RuntimeException(e);
         }
@@ -135,17 +149,63 @@ public void run() {
                         GeneratorParams generatorParams = null;
                         Generator generator = null;
                         Sequences encodedPrompt = null;
+                        Tensor attentionMask = null, positionIds = null;
                         try {
+                            encodedPrompt = tokenizer.encode(promptQuestion_formatted);
+
                             stream = tokenizer.createStream();
 
+                            int maxSequenceLength = 128;
+                            int contextLength = 4096;
+
+                            int[] promptTokens = encodedPrompt.getSequence(0);
+                            int numPromptTokens = promptTokens.length;
+
+                            if (numPromptTokens > maxSequenceLength) {
+                                throw new RuntimeException("numPromptTokens is greater than maxSequenceLength");
+                            }
+                            if (numPromptTokens > contextLength) {
+                                throw new RuntimeException("numPromptTokens is greater than contextLength");
+                            }
+
+                            int paddingSize = maxSequenceLength - numPromptTokens;
+
+                            // paddedInputIds
+                            int[] paddedInputIds = new int[maxSequenceLength];
+                            for (int i = 0; i < maxSequenceLength; ++i) {
+                                paddedInputIds[i] = i < paddingSize ? 0 : promptTokens[i - paddingSize];
+                            }
+
+                            ByteOrder nativeOrder = ByteOrder.nativeOrder();
+
+                            // attentionMask
+                            int attentionMaskPaddingSize = contextLength - numPromptTokens;
+                            ByteBuffer attentionMaskBuffer = ByteBuffer.allocateDirect(contextLength * 4);
+                            attentionMaskBuffer.order(nativeOrder);
+                            FloatBuffer attentionMaskFloatBuffer = attentionMaskBuffer.asFloatBuffer();
+                            for (int i = 0; i < contextLength; i++) {
+                                attentionMaskFloatBuffer.put(i < attentionMaskPaddingSize ? 0.0f : 1.0f);
+                            }
+                            attentionMask = new Tensor(attentionMaskBuffer, new long[]{1, contextLength}, Tensor.ElementType.float32);
+
+                            // positionIds
+                            ByteBuffer positionIdsBuffer = ByteBuffer.allocateDirect(maxSequenceLength * 8);
+                            positionIdsBuffer.order(nativeOrder);
+                            LongBuffer positionIdsLongBuffer = positionIdsBuffer.asLongBuffer();
+                            for (int i = 0; i < maxSequenceLength; ++i) {
+                                positionIdsLongBuffer.put(i < paddingSize ? 0 : i - paddingSize);
+                            }
+                            positionIds = new Tensor(positionIdsBuffer, new long[]{1, maxSequenceLength}, Tensor.ElementType.int64);
+
                             generatorParams = model.createGeneratorParams();
                             //examples for optional parameters to format AI response
                             // https://onnxruntime.ai/docs/genai/reference/config.html
                             generatorParams.setSearchOption("length_penalty", lengthPenalty);
                             generatorParams.setSearchOption("max_length", maxLength);
+                            generatorParams.setInput("attention_mask_before_processor", attentionMask);
+                            generatorParams.setInput("position_ids", positionIds);
 
-                            encodedPrompt = tokenizer.encode(promptQuestion_formatted);
-                            generatorParams.setInput(encodedPrompt);
+                            generatorParams.setInput(paddedInputIds, maxSequenceLength, 1);
 
                             generator = new Generator(model, generatorParams);
 
@@ -175,7 +235,7 @@ public void run() {
                             long totalTime = System.currentTimeMillis() - firstTokenTime;
 
                             float promptProcessingTime = (firstTokenTime - startTime)/ 1000.0f;
-                            float tokensPerSecond = (1000 * (numTokens -1)) / totalTime;
+                            float tokensPerSecond = (1000.0f * (numTokens - 1)) / totalTime;
 
                             runOnUiThread(() -> {
                                 sendMsgIB.setEnabled(true);
@@ -192,6 +252,8 @@ public void run() {
                             Log.e(TAG, "Exception occurred during model query: " + e.getMessage());
                         }
                         finally {
+                            if (positionIds != null) positionIds.close();
+                            if (attentionMask != null) attentionMask.close();
                             if (generator != null) generator.close();
                             if (encodedPrompt != null) encodedPrompt.close();
                             if (stream != null) stream.close();
@@ -217,8 +279,12 @@ protected void onDestroy() {
         super.onDestroy();
     }
 
-    private void downloadModels(Context context) throws GenAIException {
+    private void createModelFromPath(String path) throws GenAIException {
+        model = new Model(path);
+        tokenizer = model.createTokenizer();
+    }
 
+    private void downloadModels(Context context) throws GenAIException {
         final String baseUrl = "https://huggingface.co/microsoft/Phi-3-mini-4k-instruct-onnx/resolve/main/cpu_and_mobile/cpu-int4-rtn-block-32-acc-level-4/";
         List<String> files = Arrays.asList(
                 "added_tokens.json",
Original file line number	Diff line number	Diff line change
`@@ -17,7 +17,8 @@ android {`
`17`	`17`
`18`	`18`	`ndk {`
`19`	`19`	`//noinspection ChromeOsAbiSupport`
`20`		`- abiFilters += listOf("arm64-v8a", "x86_64")`
	`20`	`+ //abiFilters += listOf("arm64-v8a", "x86_64")`
	`21`	`+ abiFilters += listOf("arm64-v8a")`
`21`	`22`	`}`
`22`	`23`	`}`
`23`	`24`
`@@ -39,6 +40,9 @@ android {`
`39`	`40`	`buildFeatures {`
`40`	`41`	`viewBinding = true`
`41`	`42`	`}`
	`43`	`+`
	`44`	`+ // set this so QNN libs will show up in nativeLibraryDir`
	`45`	`+ packaging.jniLibs.useLegacyPackaging = true`
`42`	`46`	`}`
`43`	`47`
`44`	`48`	`dependencies {`
`@@ -51,7 +55,9 @@ dependencies {`
`51`	`55`	`androidTestImplementation("androidx.test.espresso:espresso-core:3.5.1")`
`52`	`56`
`53`	`57`	`// ONNX Runtime with GenAI`
`54`		`- implementation("com.microsoft.onnxruntime:onnxruntime-android:latest.release")`
`55`		`- implementation(files("libs/onnxruntime-genai-android-0.4.0-dev.aar"))`
	`58`	`+ //implementation("com.microsoft.onnxruntime:onnxruntime-android:latest.release")`
	`59`	`+ implementation(files("libs/onnxruntime-android-qnn-1.20.0.aar"))`
	`60`	`+ implementation(files("libs/onnxruntime-genai-android-0.5.0-dev.aar"))`
	`61`	`+ implementation("com.qualcomm.qti:qnn-runtime:2.27.0")`
`56`	`62`
`57`	`63`	`}`