diff --git a/.github/workflows/build_triton_and_ft.yml b/.github/workflows/build_triton_and_ft.yml index 973ce5f6c5c9..beeff41a592a 100644 --- a/.github/workflows/build_triton_and_ft.yml +++ b/.github/workflows/build_triton_and_ft.yml @@ -27,7 +27,7 @@ jobs: pip3 install requests python3 build.py --enable-logging --enable-metrics --enable-stats --enable-cpu-metrics --enable-gpu --endpoint http - name: Configure AWS Credentials - uses: aws-actions/configure-aws-credentials@v2 + uses: aws-actions/configure-aws-credentials@v4 with: aws-access-key-id: ${{ secrets.AWS_ACCESS_KEY_ID }} aws-secret-access-key: ${{ secrets.AWS_SECRET_ACCESS_KEY }} diff --git a/.github/workflows/codeql-analysis-java.yml b/.github/workflows/codeql-analysis-java.yml index 0331d4bd1c4a..19fe93e17372 100644 --- a/.github/workflows/codeql-analysis-java.yml +++ b/.github/workflows/codeql-analysis-java.yml @@ -47,7 +47,7 @@ jobs: # Initializes the CodeQL tools for scanning. - name: Initialize CodeQL - uses: github/codeql-action/init@v2 + uses: github/codeql-action/init@v3 with: languages: ${{ matrix.language }} # If you wish to specify custom queries, you can do so here or in a config file. @@ -58,7 +58,7 @@ jobs: # Autobuild attempts to build any compiled languages (C/C++, C#, or Java). # If this step fails, then you should remove it and run the build manually (see below) - name: Autobuild - uses: github/codeql-action/autobuild@v2 + uses: github/codeql-action/autobuild@v3 # ℹī¸ Command-line programs to run using the OS shell. # 📚 https://git.io/JvXDl @@ -72,11 +72,4 @@ jobs: # make release - name: Perform CodeQL Analysis - uses: github/codeql-action/analyze@v2 - - publish-success-metric: - needs: [ analyze ] - if: always() - uses: ./.github/workflows/publish-job-success.yml - with: - metric-name: DJL-CodeQL-Failure + uses: github/codeql-action/analyze@v3 diff --git a/.github/workflows/continuous.yml b/.github/workflows/continuous.yml index e3d6aa915c2a..53a532dfa28e 100644 --- a/.github/workflows/continuous.yml +++ b/.github/workflows/continuous.yml @@ -10,6 +10,15 @@ on: - "**.js" - "**.css" - "android/**" + push: + paths-ignore: + - "**.md" + - "**.ipynb" + - "**.json" + - "**.html" + - "**.js" + - "**.css" + - "android/**" jobs: build: @@ -17,7 +26,7 @@ jobs: runs-on: ${{ matrix.operating-system }} strategy: matrix: - operating-system: [ ubuntu-latest, macos-12 ] + operating-system: [ ubuntu-latest, macos-13 ] steps: - uses: actions/checkout@v4 @@ -41,6 +50,9 @@ jobs: filters: | src: - 'extensions/sentencepiece/**' + - name: install libomp on macos + if: ${{ runner.os == 'macOS' }} + run: brew install libomp - name: Compile Sentencepiece JNI if: steps.sentencepiece_changes.outputs.src == 'true' run: ./gradlew :extensions:sentencepiece:compileJNI @@ -100,6 +112,12 @@ jobs: files: ./jacoco/build/reports/jacoco/testCodeCoverageReport/testCodeCoverageReport.xml fail_ci_if_error: false path_to_write_report: ./codecov_report.txt + - name: Submit Dependencies + if: github.event_name == 'push' + uses: gradle/actions/dependency-submission@v3 + env: + # Exclude dependencies that are only resolved in test classpaths + DEPENDENCY_GRAPH_EXCLUDE_CONFIGURATIONS: '.*[Tt]est(Compile|Runtime)Classpath' # Windows platform for testing hybrid engines build-windows: diff --git a/.github/workflows/docker_publish.yml b/.github/workflows/docker_publish.yml index 31c6e33ae482..a85ea409f4e7 100644 --- a/.github/workflows/docker_publish.yml +++ b/.github/workflows/docker_publish.yml @@ -19,7 +19,7 @@ jobs: - name: Setup Docker buildx uses: docker/setup-buildx-action@v2 - name: Configure AWS Credentials - uses: aws-actions/configure-aws-credentials@v1-node16 + uses: aws-actions/configure-aws-credentials@v4 with: aws-access-key-id: ${{ secrets.AWS_ACCESS_KEY_ID }} aws-secret-access-key: ${{ secrets.AWS_SECRET_ACCESS_KEY }} @@ -57,10 +57,3 @@ jobs: file: docker/spark/Dockerfile build-args: DJL_VERSION=${DJL_VERSION} tags: deepjavalibrary/djl-spark:${{ env.DJL_VERSION }}-cpu - - publish-success-metric: - needs: [ publish ] - if: always() - uses: ./.github/workflows/publish-job-success.yml - with: - metric-name: DJL-SparkDockerPublish-Failure diff --git a/.github/workflows/docs.yml b/.github/workflows/docs.yml index 7af0339cabe3..42b458ebe1dc 100644 --- a/.github/workflows/docs.yml +++ b/.github/workflows/docs.yml @@ -55,7 +55,7 @@ jobs: mkdocs build --site-dir ../../site - name: Configure AWS Credentials if: github.event_name != 'pull_request' - uses: aws-actions/configure-aws-credentials@v1-node16 + uses: aws-actions/configure-aws-credentials@v4 with: aws-access-key-id: ${{ secrets.AWS_ACCESS_KEY_ID }} aws-secret-access-key: ${{ secrets.AWS_SECRET_ACCESS_KEY }} @@ -65,10 +65,3 @@ jobs: run: | aws s3 sync ../site s3://djl-ai/documentation/nightly --delete aws cloudfront create-invalidation --distribution-id E733IIDCG0G5U --paths "/*" - - publish-success-metric: - needs: [ documentation ] - if: always() - uses: ./.github/workflows/publish-job-success.yml - with: - metric-name: DJL-DocumentationPublish-Failure diff --git a/.github/workflows/native_jni_s3_paddle.yml b/.github/workflows/native_jni_s3_paddle.yml index de904db2736b..752ba39c685a 100644 --- a/.github/workflows/native_jni_s3_paddle.yml +++ b/.github/workflows/native_jni_s3_paddle.yml @@ -54,13 +54,13 @@ jobs: yum -y install patch cmake3 ln -sf /usr/bin/cmake3 /usr/bin/cmake pip3 install awscli --upgrade - - uses: actions/checkout@v4 + - uses: actions/checkout@v3 - name: Set up JDK 17 uses: actions/setup-java@v3 with: distribution: 'corretto' java-version: 17 - - uses: actions/cache@v4 + - uses: actions/cache@v3 with: path: ~/.gradle/caches key: ${{ runner.os }}-gradle-${{ hashFiles('**/*.gradle*') }} @@ -98,7 +98,7 @@ jobs: name: jnilib-Linux path: jnilib - name: Configure AWS Credentials - uses: aws-actions/configure-aws-credentials@v1-node16 + uses: aws-actions/configure-aws-credentials@v4 with: aws-access-key-id: ${{ secrets.AWS_ACCESS_KEY_ID }} aws-secret-access-key: ${{ secrets.AWS_SECRET_ACCESS_KEY }} @@ -108,10 +108,3 @@ jobs: PADDLE_VERSION="$(cat gradle.properties | awk -F '=' '/paddlepaddle_version/ {print $2}')" aws s3 sync jnilib s3://djl-ai/publish/paddlepaddle-${PADDLE_VERSION}/jnilib aws cloudfront create-invalidation --distribution-id E371VB8JQ6NRVY --paths "/paddlepaddle-${PADDLE_VERSION}/jnilib*" - - publish-success-metric: - needs: [ publish ] - if: always() - uses: ./.github/workflows/publish-job-success.yml - with: - metric-name: DJL-NativeJNIPaddleS3Publish-Failure diff --git a/.github/workflows/native_jni_s3_pytorch.yml b/.github/workflows/native_jni_s3_pytorch.yml index 0104f23296a9..1f1a220e8c25 100644 --- a/.github/workflows/native_jni_s3_pytorch.yml +++ b/.github/workflows/native_jni_s3_pytorch.yml @@ -36,7 +36,7 @@ jobs: ./gradlew :engines:pytorch:pytorch-native:compileJNI -Ppt_version=$PYTORCH_VERSION ./gradlew -Pjni -Ppt_version=$PYTORCH_VERSION :integration:test "-Dai.djl.default_engine=PyTorch" - name: Configure AWS Credentials - uses: aws-actions/configure-aws-credentials@v1-node16 + uses: aws-actions/configure-aws-credentials@v4 with: aws-access-key-id: ${{ secrets.AWS_ACCESS_KEY_ID }} aws-secret-access-key: ${{ secrets.AWS_SECRET_ACCESS_KEY }} @@ -54,13 +54,13 @@ jobs: runs-on: ubuntu-latest container: nvidia/cuda:11.3.1-cudnn8-devel-ubuntu18.04 steps: - - uses: actions/checkout@v4 + - uses: actions/checkout@v3 - name: Set up JDK 17 uses: actions/setup-java@v3 with: distribution: 'corretto' java-version: 17 - - uses: actions/cache@v4 + - uses: actions/cache@v3 with: path: ~/.gradle/caches key: ${{ runner.os }}-gradle-${{ hashFiles('**/*.gradle*') }} @@ -91,7 +91,7 @@ jobs: if [[ "$PYTORCH_VERSION" == "1.12.1" ]]; then ./gradlew :engines:pytorch:pytorch-native:compileJNI -Pcu10 -Ppt_version=$PYTORCH_VERSION; fi if [[ "$PYTORCH_VERSION" == "1.11.0" ]]; then ./gradlew :engines:pytorch:pytorch-native:compileJNI -Pcu10 -Ppt_version=$PYTORCH_VERSION; fi - name: Configure AWS Credentials - uses: aws-actions/configure-aws-credentials@v1-node16 + uses: aws-actions/configure-aws-credentials@v3 with: aws-access-key-id: ${{ secrets.AWS_ACCESS_KEY_ID }} aws-secret-access-key: ${{ secrets.AWS_SECRET_ACCESS_KEY }} @@ -115,13 +115,13 @@ jobs: yum -y install devtoolset-7 rh-git218 patch cmake3 ln -s /usr/bin/cmake3 /usr/bin/cmake pip3 install awscli --upgrade - - uses: actions/checkout@v4 + - uses: actions/checkout@v3 - name: Set up JDK 17 uses: actions/setup-java@v3 with: distribution: 'corretto' java-version: 17 - - uses: actions/cache@v4 + - uses: actions/cache@v3 with: path: ~/.gradle/caches key: ${{ runner.os }}-gradle-${{ hashFiles('**/*.gradle*') }} @@ -140,7 +140,7 @@ jobs: rm -rf ~/.djl.ai ./gradlew :engines:pytorch:pytorch-native:compileJNI -Pcu11 -Pprecxx11 -Ppt_version=$PYTORCH_VERSION - name: Configure AWS Credentials - uses: aws-actions/configure-aws-credentials@v1-node16 + uses: aws-actions/configure-aws-credentials@v2 with: aws-access-key-id: ${{ secrets.AWS_ACCESS_KEY_ID }} aws-secret-access-key: ${{ secrets.AWS_SECRET_ACCESS_KEY }} @@ -191,7 +191,7 @@ jobs: set "PATH=%CUDA_PATH%\bin;%CUDA_PATH%\libnvvp;%PATH%" gradlew :engines:pytorch:pytorch-native:cleanJNI :engines:pytorch:pytorch-native:compileJNI -Pcu11 -Ppt_version=${{ github.event.inputs.pt_version }} - name: Configure AWS Credentials - uses: aws-actions/configure-aws-credentials@v1-node16 + uses: aws-actions/configure-aws-credentials@v4 with: aws-access-key-id: ${{ secrets.AWS_ACCESS_KEY_ID }} aws-secret-access-key: ${{ secrets.AWS_SECRET_ACCESS_KEY }} @@ -230,7 +230,7 @@ jobs: ./gradlew :engines:pytorch:pytorch-native:compileJNI -Ppt_version=$PYTORCH_VERSION ./gradlew -Pjni -Ppt_version=$PYTORCH_VERSION :integration:test "-Dai.djl.default_engine=PyTorch" - name: Configure AWS Credentials - uses: aws-actions/configure-aws-credentials@v1-node16 + uses: aws-actions/configure-aws-credentials@v4 with: aws-access-key-id: ${{ secrets.AWS_ACCESS_KEY_ID }} aws-secret-access-key: ${{ secrets.AWS_SECRET_ACCESS_KEY }} @@ -273,7 +273,7 @@ jobs: yum -y install patch git cmake3 python3-devel java-17-amazon-corretto-devel ln -sf /usr/bin/cmake3 /usr/bin/cmake pip3 install awscli --upgrade - - uses: actions/checkout@v4 + - uses: actions/checkout@v3 - name: Release JNI prep run: | export JAVA_HOME=/usr/lib/jvm/java-17-amazon-corretto.aarch64 @@ -284,7 +284,7 @@ jobs: ./gradlew :engines:pytorch:pytorch-native:compileJNI -Pprecxx11 -Ppt_version=$PYTORCH_VERSION ./gradlew -Pjni -Ppt_version=$PYTORCH_VERSION :integration:test "-Dai.djl.default_engine=PyTorch" - name: Configure AWS Credentials - uses: aws-actions/configure-aws-credentials@v1-node16 + uses: aws-actions/configure-aws-credentials@v2 with: aws-access-key-id: ${{ secrets.AWS_ACCESS_KEY_ID }} aws-secret-access-key: ${{ secrets.AWS_SECRET_ACCESS_KEY }} @@ -306,10 +306,3 @@ jobs: cd /home/ubuntu/djl_benchmark_script/scripts instance_id=${{ needs.create-aarch64-runner.outputs.aarch64_instance_id }} ./stop_instance.sh $instance_id - - publish-success-metric: - needs: [ build-pytorch-jni-macos, build-pytorch-jni-linux, build-pytorch-jni-precxx11, build-pytorch-jni-windows, build-pytorch-jni-arm64-macos, stop-runners ] - if: always() - uses: ./.github/workflows/publish-job-success.yml - with: - metric-name: DJL-NativeJNIPytorchS3Publish-Failure diff --git a/.github/workflows/native_jni_s3_pytorch_android.yml b/.github/workflows/native_jni_s3_pytorch_android.yml index e283b8165c85..0376856bb333 100644 --- a/.github/workflows/native_jni_s3_pytorch_android.yml +++ b/.github/workflows/native_jni_s3_pytorch_android.yml @@ -26,14 +26,14 @@ jobs: restore-keys: | ${{ runner.os }}-gradle- - name: Install NDK - run: echo "y" | sudo ${ANDROID_HOME}/tools/bin/sdkmanager --install "ndk;${NDK_VERSION}" + run: echo "y" | sudo ${ANDROID_HOME}/cmdline-tools/latest/bin/sdkmanager --install "ndk;${NDK_VERSION}" - name: build android run: | export ANDROID_NDK=${ANDROID_SDK_ROOT}/ndk/${NDK_VERSION} PYTORCH_VERSION=${PYTORCH_VERSION:-$(cat gradle.properties | awk -F '=' '/pytorch_version/ {print $2}')} ./gradlew :engines:pytorch:pytorch-native:compileAndroidJNI -Ppt_version=${PYTORCH_VERSION} - name: Configure AWS Credentials - uses: aws-actions/configure-aws-credentials@v1-node16 + uses: aws-actions/configure-aws-credentials@v4 with: aws-access-key-id: ${{ secrets.AWS_ACCESS_KEY_ID }} aws-secret-access-key: ${{ secrets.AWS_SECRET_ACCESS_KEY }} @@ -43,10 +43,3 @@ jobs: PYTORCH_VERSION=${PYTORCH_VERSION:-$(cat gradle.properties | awk -F '=' '/pytorch_version/ {print $2}')} aws s3 sync engines/pytorch/pytorch-native/jnilib s3://djl-ai/publish/pytorch/${PYTORCH_VERSION}/jnilib aws cloudfront create-invalidation --distribution-id E371VB8JQ6NRVY --paths "/pytorch/${PYTORCH_VERSION}/jnilib*" - - publish-success-metric: - needs: [ build-pytorch-jni-android ] - if: always() - uses: ./.github/workflows/publish-job-success.yml - with: - metric-name: DJL-NativeJNIPytorchAndroidS3Publish-Failure diff --git a/.github/workflows/native_jni_s3_tensorrt.yml b/.github/workflows/native_jni_s3_tensorrt.yml index b3da8c59593a..fff8406958a8 100644 --- a/.github/workflows/native_jni_s3_tensorrt.yml +++ b/.github/workflows/native_jni_s3_tensorrt.yml @@ -25,7 +25,7 @@ jobs: - name: Release JNI prep run: ./gradlew :engines:tensorrt:compileJNI - name: Configure AWS Credentials - uses: aws-actions/configure-aws-credentials@v1-node16 + uses: aws-actions/configure-aws-credentials@v4 with: aws-access-key-id: ${{ secrets.AWS_ACCESS_KEY_ID }} aws-secret-access-key: ${{ secrets.AWS_SECRET_ACCESS_KEY }} diff --git a/.github/workflows/native_s3_fasttext.yml b/.github/workflows/native_s3_fasttext.yml index 1c4a3e57aeec..8dd34e6d3c68 100644 --- a/.github/workflows/native_s3_fasttext.yml +++ b/.github/workflows/native_s3_fasttext.yml @@ -24,7 +24,7 @@ jobs: ./gradlew :extensions:fasttext:compileJNI ./gradlew -Pjni :extensions:fasttext:test - name: Configure AWS Credentials - uses: aws-actions/configure-aws-credentials@v1-node16 + uses: aws-actions/configure-aws-credentials@v4 with: aws-access-key-id: ${{ secrets.AWS_ACCESS_KEY_ID }} aws-secret-access-key: ${{ secrets.AWS_SECRET_ACCESS_KEY }} @@ -45,13 +45,13 @@ jobs: yum -y install patch cmake3 ln -sf /usr/bin/cmake3 /usr/bin/cmake pip3 install awscli --upgrade - - uses: actions/checkout@v4 + - uses: actions/checkout@v3 - name: Set up JDK 17 uses: actions/setup-java@v3 with: distribution: 'corretto' java-version: 17 - - uses: actions/cache@v4 + - uses: actions/cache@v3 with: path: ~/.gradle/caches key: ${{ runner.os }}-gradle-${{ hashFiles('**/*.gradle*') }} @@ -63,7 +63,7 @@ jobs: ./gradlew :extensions:fasttext:compileJNI ./gradlew -Pjni :extensions:fasttext:test - name: Configure AWS Credentials - uses: aws-actions/configure-aws-credentials@v1-node16 + uses: aws-actions/configure-aws-credentials@v2 with: aws-access-key-id: ${{ secrets.AWS_ACCESS_KEY_ID }} aws-secret-access-key: ${{ secrets.AWS_SECRET_ACCESS_KEY }} @@ -95,7 +95,7 @@ jobs: ./gradlew :extensions:fasttext:compileJNI ./gradlew -Pjni :extensions:fasttext:test - name: Configure AWS Credentials - uses: aws-actions/configure-aws-credentials@v1-node16 + uses: aws-actions/configure-aws-credentials@v4 with: aws-access-key-id: ${{ secrets.AWS_ACCESS_KEY_ID }} aws-secret-access-key: ${{ secrets.AWS_SECRET_ACCESS_KEY }} diff --git a/.github/workflows/native_s3_huggingface.yml b/.github/workflows/native_s3_huggingface.yml index 6e2ae782d4d2..4a21be7ac0c0 100644 --- a/.github/workflows/native_s3_huggingface.yml +++ b/.github/workflows/native_s3_huggingface.yml @@ -27,7 +27,7 @@ jobs: ./gradlew :extensions:tokenizers:compileJNI ./gradlew -Pjni :extensions:tokenizers:test - name: Configure AWS Credentials - uses: aws-actions/configure-aws-credentials@v1-node16 + uses: aws-actions/configure-aws-credentials@v4 with: aws-access-key-id: ${{ secrets.AWS_ACCESS_KEY_ID }} aws-secret-access-key: ${{ secrets.AWS_SECRET_ACCESS_KEY }} @@ -53,13 +53,13 @@ jobs: - uses: actions-rs/toolchain@v1 with: toolchain: stable - - uses: actions/checkout@v4 + - uses: actions/checkout@v3 - name: Set up JDK 17 uses: actions/setup-java@v3 with: distribution: 'corretto' java-version: 17 - - uses: actions/cache@v4 + - uses: actions/cache@v3 with: path: ~/.gradle/caches key: ${{ runner.os }}-gradle-${{ hashFiles('**/*.gradle*') }} @@ -72,7 +72,7 @@ jobs: ./gradlew :extensions:tokenizers:compileJNI PYTORCH_PRECXX11=true ./gradlew -Pjni :extensions:tokenizers:test - name: Configure AWS Credentials - uses: aws-actions/configure-aws-credentials@v1-node16 + uses: aws-actions/configure-aws-credentials@v2 with: aws-access-key-id: ${{ secrets.AWS_ACCESS_KEY_ID }} aws-secret-access-key: ${{ secrets.AWS_SECRET_ACCESS_KEY }} @@ -110,7 +110,7 @@ jobs: call "C:\Program Files (x86)\Microsoft Visual Studio\2019\Enterprise\VC\Auxiliary\Build\vcvarsall.bat" amd64 gradlew :extensions:tokenizer:compileJNI - name: Configure AWS Credentials - uses: aws-actions/configure-aws-credentials@v1-node16 + uses: aws-actions/configure-aws-credentials@v4 with: aws-access-key-id: ${{ secrets.AWS_ACCESS_KEY_ID }} aws-secret-access-key: ${{ secrets.AWS_SECRET_ACCESS_KEY }} @@ -147,7 +147,7 @@ jobs: ./gradlew :extensions:tokenizers:compileJNI ./gradlew -Pjni :extensions:tokenizers:test - name: Configure AWS Credentials - uses: aws-actions/configure-aws-credentials@v1-node16 + uses: aws-actions/configure-aws-credentials@v4 with: aws-access-key-id: ${{ secrets.AWS_ACCESS_KEY_ID }} aws-secret-access-key: ${{ secrets.AWS_SECRET_ACCESS_KEY }} @@ -191,14 +191,14 @@ jobs: - uses: actions-rs/toolchain@v1 with: toolchain: stable - - uses: actions/checkout@v4 + - uses: actions/checkout@v3 - name: Set up JDK 17 uses: actions/setup-java@v3 with: java-version: 17 distribution: corretto architecture: aarch64 - - uses: actions/cache@v4 + - uses: actions/cache@v3 with: path: ~/.gradle/caches key: ${{ runner.os }}-gradle-${{ hashFiles('**/*.gradle*') }} @@ -209,7 +209,7 @@ jobs: ./gradlew :extensions:tokenizers:compileJNI PYTORCH_PRECXX11=true ./gradlew -Pjni :extensions:tokenizers:test - name: Configure AWS Credentials - uses: aws-actions/configure-aws-credentials@v1-node16 + uses: aws-actions/configure-aws-credentials@v2 with: aws-access-key-id: ${{ secrets.AWS_ACCESS_KEY_ID }} aws-secret-access-key: ${{ secrets.AWS_SECRET_ACCESS_KEY }} diff --git a/.github/workflows/native_s3_llama.yml b/.github/workflows/native_s3_llama.yml index eea8cf278d66..8172fc4bbefb 100644 --- a/.github/workflows/native_s3_llama.yml +++ b/.github/workflows/native_s3_llama.yml @@ -24,7 +24,7 @@ jobs: ./gradlew :engines:llama:compileJNI ./gradlew -Pjni :engines:llama:test -Dnightly=true - name: Configure AWS Credentials - uses: aws-actions/configure-aws-credentials@v1-node16 + uses: aws-actions/configure-aws-credentials@v4 with: aws-access-key-id: ${{ secrets.AWS_ACCESS_KEY_ID }} aws-secret-access-key: ${{ secrets.AWS_SECRET_ACCESS_KEY }} @@ -46,7 +46,7 @@ jobs: yum -y install devtoolset-7 git patch cmake3 libstdc++-static ln -s /usr/bin/cmake3 /usr/bin/cmake pip3 install awscli --upgrade - - uses: actions/checkout@v4 + - uses: actions/checkout@v3 - name: Set up JDK 17 uses: actions/setup-java@v3 with: @@ -58,7 +58,7 @@ jobs: ./gradlew :engines:llama:compileJNI ./gradlew -Pjni :engines:llama:test -Dnightly=true - name: Configure AWS Credentials - uses: aws-actions/configure-aws-credentials@v1-node16 + uses: aws-actions/configure-aws-credentials@v2 with: aws-access-key-id: ${{ secrets.AWS_ACCESS_KEY_ID }} aws-secret-access-key: ${{ secrets.AWS_SECRET_ACCESS_KEY }} @@ -94,7 +94,7 @@ jobs: gradlew :engines:llama:compileJNI gradlew -Pjni :engines:llama:test -Dnightly=true - name: Configure AWS Credentials - uses: aws-actions/configure-aws-credentials@v1-node16 + uses: aws-actions/configure-aws-credentials@v4 with: aws-access-key-id: ${{ secrets.AWS_ACCESS_KEY_ID }} aws-secret-access-key: ${{ secrets.AWS_SECRET_ACCESS_KEY }} @@ -128,7 +128,7 @@ jobs: ./gradlew :engines:llama:compileJNI ./gradlew -Pjni :engines:llama:test -Dnightly=true - name: Configure AWS Credentials - uses: aws-actions/configure-aws-credentials@v1-node16 + uses: aws-actions/configure-aws-credentials@v4 with: aws-access-key-id: ${{ secrets.AWS_ACCESS_KEY_ID }} aws-secret-access-key: ${{ secrets.AWS_SECRET_ACCESS_KEY }} @@ -169,7 +169,7 @@ jobs: yum -y install patch perl-IPC-Cmd cmake3 ln -s /usr/bin/cmake3 /usr/bin/cmake pip3 install awscli --upgrade - - uses: actions/checkout@v4 + - uses: actions/checkout@v3 - name: Set up JDK 17 uses: actions/setup-java@v3 with: @@ -181,7 +181,7 @@ jobs: ./gradlew :engines:llama:compileJNI ./gradlew -Pjni :engines:llama:test -Dnightly=true - name: Configure AWS Credentials - uses: aws-actions/configure-aws-credentials@v1-node16 + uses: aws-actions/configure-aws-credentials@v2 with: aws-access-key-id: ${{ secrets.AWS_ACCESS_KEY_ID }} aws-secret-access-key: ${{ secrets.AWS_SECRET_ACCESS_KEY }} diff --git a/.github/workflows/native_s3_pytorch.yml b/.github/workflows/native_s3_pytorch.yml index 549084e89e75..54f4dba31194 100644 --- a/.github/workflows/native_s3_pytorch.yml +++ b/.github/workflows/native_s3_pytorch.yml @@ -21,7 +21,7 @@ jobs: restore-keys: | ${{ runner.os }}-gradle- - name: Configure AWS Credentials - uses: aws-actions/configure-aws-credentials@v1-node16 + uses: aws-actions/configure-aws-credentials@v4 with: aws-access-key-id: ${{ secrets.AWS_ACCESS_KEY_ID }} aws-secret-access-key: ${{ secrets.AWS_SECRET_ACCESS_KEY }} diff --git a/.github/workflows/native_s3_pytorch_android.yml b/.github/workflows/native_s3_pytorch_android.yml index 3c386c63d3ab..d0d920f1f4d2 100644 --- a/.github/workflows/native_s3_pytorch_android.yml +++ b/.github/workflows/native_s3_pytorch_android.yml @@ -39,7 +39,7 @@ jobs: cd build_android zip -r ${{ matrix.format }}_native.zip install/include lib - name: Configure AWS Credentials - uses: aws-actions/configure-aws-credentials@v1-node16 + uses: aws-actions/configure-aws-credentials@v4 with: aws-access-key-id: ${{ secrets.AWS_ACCESS_KEY_ID }} aws-secret-access-key: ${{ secrets.AWS_SECRET_ACCESS_KEY }} diff --git a/.github/workflows/native_s3_sentencepiece.yml b/.github/workflows/native_s3_sentencepiece.yml index 1ecc92fcee0e..134cf012a308 100644 --- a/.github/workflows/native_s3_sentencepiece.yml +++ b/.github/workflows/native_s3_sentencepiece.yml @@ -25,7 +25,7 @@ jobs: ./gradlew :extensions:sentencepiece:compileJNI ./gradlew -Pjni :extensions:sentencepiece:test - name: Configure AWS Credentials - uses: aws-actions/configure-aws-credentials@v1-node16 + uses: aws-actions/configure-aws-credentials@v4 with: aws-access-key-id: ${{ secrets.AWS_ACCESS_KEY_ID }} aws-secret-access-key: ${{ secrets.AWS_SECRET_ACCESS_KEY }} @@ -48,7 +48,7 @@ jobs: yum -y install devtoolset-7 git patch cmake3 libstdc++-static ln -sf /usr/bin/cmake3 /usr/bin/cmake pip3 install awscli --upgrade - - uses: actions/checkout@v4 + - uses: actions/checkout@v3 - name: Set up JDK 17 uses: actions/setup-java@v3 with: @@ -66,7 +66,7 @@ jobs: ./gradlew :extensions:sentencepiece:compileJNI ./gradlew -Pjni :extensions:sentencepiece:test - name: Configure AWS Credentials - uses: aws-actions/configure-aws-credentials@v1-node16 + uses: aws-actions/configure-aws-credentials@v2 with: aws-access-key-id: ${{ secrets.AWS_ACCESS_KEY_ID }} aws-secret-access-key: ${{ secrets.AWS_SECRET_ACCESS_KEY }} @@ -98,7 +98,7 @@ jobs: ./gradlew :extensions:sentencepiece:compileJNI ./gradlew -Pjni :extensions:sentencepiece:test - name: Configure AWS Credentials - uses: aws-actions/configure-aws-credentials@v1-node16 + uses: aws-actions/configure-aws-credentials@v4 with: aws-access-key-id: ${{ secrets.AWS_ACCESS_KEY_ID }} aws-secret-access-key: ${{ secrets.AWS_SECRET_ACCESS_KEY }} @@ -135,7 +135,7 @@ jobs: ./gradlew :extensions:sentencepiece:compileJNI ./gradlew -Pjni :extensions:sentencepiece:test - name: Configure AWS Credentials - uses: aws-actions/configure-aws-credentials@v1-node16 + uses: aws-actions/configure-aws-credentials@v4 with: aws-access-key-id: ${{ secrets.AWS_ACCESS_KEY_ID }} aws-secret-access-key: ${{ secrets.AWS_SECRET_ACCESS_KEY }} @@ -163,9 +163,9 @@ jobs: - uses: actions-rs/toolchain@v1 with: toolchain: stable - - uses: actions/checkout@v4 + - uses: actions/checkout@v3 - name: Set up JDK 17 - uses: actions/setup-java@v4 + uses: actions/setup-java@v3 with: java-version: 17 distribution: corretto @@ -182,7 +182,7 @@ jobs: ./gradlew :extensions:sentencepiece:compileJNI ./gradlew -Pjni :extensions:sentencepiece:test - name: Configure AWS Credentials - uses: aws-actions/configure-aws-credentials@v1-node16 + uses: aws-actions/configure-aws-credentials@v2 with: aws-access-key-id: ${{ secrets.AWS_ACCESS_KEY_ID }} aws-secret-access-key: ${{ secrets.AWS_SECRET_ACCESS_KEY }} diff --git a/.github/workflows/native_s3_tensorflow.yml b/.github/workflows/native_s3_tensorflow.yml index 01d590a9c374..4d88337b34f7 100644 --- a/.github/workflows/native_s3_tensorflow.yml +++ b/.github/workflows/native_s3_tensorflow.yml @@ -21,7 +21,7 @@ jobs: restore-keys: | ${{ runner.os }}-gradle- - name: Configure AWS Credentials - uses: aws-actions/configure-aws-credentials@v1-node16 + uses: aws-actions/configure-aws-credentials@v4 with: aws-access-key-id: ${{ secrets.AWS_ACCESS_KEY_ID }} aws-secret-access-key: ${{ secrets.AWS_SECRET_ACCESS_KEY }} diff --git a/.github/workflows/native_s3_tflite.yml b/.github/workflows/native_s3_tflite.yml index 801c9dd42378..8298880e14c5 100644 --- a/.github/workflows/native_s3_tflite.yml +++ b/.github/workflows/native_s3_tflite.yml @@ -33,7 +33,7 @@ jobs: cd tensorflow bazel build -c opt //tensorflow/lite/java:tensorflowlitelib //tensorflow/lite/delegates/flex:delegate - name: Configure AWS Credentials - uses: aws-actions/configure-aws-credentials@v1-node16 + uses: aws-actions/configure-aws-credentials@v4 with: aws-access-key-id: ${{ secrets.AWS_ACCESS_KEY_ID }} aws-secret-access-key: ${{ secrets.AWS_SECRET_ACCESS_KEY }} @@ -57,12 +57,12 @@ jobs: ln -sf /usr/bin/cmake3 /usr/bin/cmake pip3 install awscli --upgrade pip3 install numpy --upgrade - - uses: actions/checkout@v4 + - uses: actions/checkout@v3 - name: Get TFLITE_VERSION run: | TFLITE_VERSION="$(cat gradle.properties | awk -F '=' '/tflite_version/ {print $2}')" echo "TFLITE_VERSION=${TFLITE_VERSION}" >> $GITHUB_ENV - - uses: actions/checkout@v4 + - uses: actions/checkout@v3 with: repository: tensorflow/tensorflow ref: v${{ env.TFLITE_VERSION }} @@ -75,7 +75,7 @@ jobs: bash bazel.sh bazel build -c opt //tensorflow/lite/java:tensorflowlitelib //tensorflow/lite/delegates/flex:delegate - name: Configure AWS Credentials - uses: aws-actions/configure-aws-credentials@v1-node16 + uses: aws-actions/configure-aws-credentials@v2 with: aws-access-key-id: ${{ secrets.AWS_ACCESS_KEY_ID }} aws-secret-access-key: ${{ secrets.AWS_SECRET_ACCESS_KEY }} diff --git a/.github/workflows/native_s3_xgboost.yml b/.github/workflows/native_s3_xgboost.yml index 7be2b1496da8..2e83554a253e 100644 --- a/.github/workflows/native_s3_xgboost.yml +++ b/.github/workflows/native_s3_xgboost.yml @@ -29,7 +29,7 @@ jobs: timeout-minutes: 30 needs: create-aarch64-runner steps: - - uses: actions/checkout@v4 + - uses: actions/checkout@v3 - name: Install Environment run: | yum -y update @@ -55,7 +55,7 @@ jobs: python3 create_jni.py cd ../.. - name: Configure AWS Credentials - uses: aws-actions/configure-aws-credentials@v1-node16 + uses: aws-actions/configure-aws-credentials@v2 with: aws-access-key-id: ${{ secrets.AWS_ACCESS_KEY_ID }} aws-secret-access-key: ${{ secrets.AWS_SECRET_ACCESS_KEY }} diff --git a/.github/workflows/nightly_android.yml b/.github/workflows/nightly_android.yml index dc66b56845cd..b46528bc29f6 100644 --- a/.github/workflows/nightly_android.yml +++ b/.github/workflows/nightly_android.yml @@ -31,10 +31,3 @@ jobs: emulator-options: -no-snapshot-save -no-window -gpu swiftshader_indirect -noaudio -no-boot-anim -camera-back none disable-animations: true script: cd android/core && ./gradlew cAT - - publish-success-metric: - needs: [ build ] - if: always() - uses: ./.github/workflows/publish-job-success.yml - with: - metric-name: DJL-AndroidIntegrationTests-Failure diff --git a/.github/workflows/nightly_publish.yml b/.github/workflows/nightly_publish.yml index 7b916f6d5740..52c77a34d725 100644 --- a/.github/workflows/nightly_publish.yml +++ b/.github/workflows/nightly_publish.yml @@ -105,14 +105,15 @@ jobs: run: | yum -y update yum install -y tar gzip - - uses: actions/checkout@v4 + # checkout@v4 requires GLIBC 2.27 + - uses: actions/checkout@v3 - name: Set up JDK 17 uses: actions/setup-java@v3 with: java-version: 17 distribution: corretto architecture: aarch64 - - uses: actions/cache@v4 + - uses: actions/cache@v3 with: path: ~/.gradle/caches key: ${{ runner.os }}-gradle-${{ hashFiles('**/*.gradle*') }} @@ -210,7 +211,7 @@ jobs: ORG_GRADLE_PROJECT_ossrhUsername: ${{ secrets.ORG_GRADLE_PROJECT_ossrhUsername }} ORG_GRADLE_PROJECT_ossrhPassword: ${{ secrets.ORG_GRADLE_PROJECT_ossrhPassword }} - name: Configure AWS Credentials - uses: aws-actions/configure-aws-credentials@v1-node16 + uses: aws-actions/configure-aws-credentials@v4 with: aws-access-key-id: ${{ secrets.AWS_ACCESS_KEY_ID }} aws-secret-access-key: ${{ secrets.AWS_SECRET_ACCESS_KEY }} @@ -255,10 +256,3 @@ jobs: cd /home/ubuntu/djl_benchmark_script/scripts instance_id=${{ needs.create-runners.outputs.gpu_1_instance_id }} ./stop_instance.sh $instance_id - - publish-success-metric: - needs: [ publish, stop-runners ] - if: always() - uses: ./.github/workflows/publish-job-success.yml - with: - metric-name: DJL-NightlyIntegrationTestsPublish-Failure diff --git a/.github/workflows/publish-job-success.yml b/.github/workflows/publish-job-success.yml index 7b2f7ffc7fd4..ec75dd635956 100644 --- a/.github/workflows/publish-job-success.yml +++ b/.github/workflows/publish-job-success.yml @@ -1,27 +1,37 @@ name: Publish Job Success Metric to CloudWatch on: - workflow_call: - inputs: - metric-name: - description: "The name of the job to publish a metric for" - type: string - required: true + workflow_run: + workflows: "*" + types: + - completed + branches: + - master + +permissions: + id-token: write + contents: read jobs: publish-job-success-to-cloudwatch: - if: ${{ github.event_name == 'schedule' }} - runs-on: [ self-hosted, scheduler ] + if: ${{ github.event.workflow_run.event == 'schedule' }} + runs-on: ubuntu-latest steps: - name: Configure AWS Credentials - uses: aws-actions/configure-aws-credentials@v2 + uses: aws-actions/configure-aws-credentials@v4 with: + role-to-assume: arn:aws:iam::185921645874:role/djl-github-cloudwatch-ci-metrics aws-region: us-west-2 - name: Publish Job Success Metric + env: + WORKFLOW_NAME: ${{ github.event.workflow_run.display_title }} + REPO_NAME: ${{ github.event.workflow_run.repository.name }} + CONCLUSION: ${{ github.event.workflow_run.conclusion }} run: | - [[ ${{ job.status }} == "success" ]] - failedBuild=$? + workflow_name=$(echo "$WORKFLOW_NAME" | tr -d ' ') + metric_name="${REPO_NAME}-${workflow_name}-Failure" + failedBuild=$([ "$CONCLUSION" == "success" ]; echo $?) aws cloudwatch put-metric-data --namespace GithubCI \ - --metric-name ${{ inputs.metric-name }} \ + --metric-name "$metric_name" \ --value $failedBuild \ --unit Count diff --git a/.github/workflows/publish_android_packages.yml b/.github/workflows/publish_android_packages.yml index 2c12b0c1b4f2..4ae8bcefcb24 100644 --- a/.github/workflows/publish_android_packages.yml +++ b/.github/workflows/publish_android_packages.yml @@ -50,10 +50,3 @@ jobs: ORG_GRADLE_PROJECT_signingPassword: ${{ secrets.ORG_GRADLE_PROJECT_signingPassword }} ORG_GRADLE_PROJECT_ossrhUsername: ${{ secrets.ORG_GRADLE_PROJECT_ossrhUsername }} ORG_GRADLE_PROJECT_ossrhPassword: ${{ secrets.ORG_GRADLE_PROJECT_ossrhPassword }} - - publish-success-metric: - needs: [ release-android ] - if: always() - uses: ./.github/workflows/publish-job-success.yml - with: - metric-name: DJL-AndroidPublish-Failurei diff --git a/.github/workflows/serving_publish.yml b/.github/workflows/serving_publish.yml index 25c5b2f2e0e4..380666487a79 100644 --- a/.github/workflows/serving_publish.yml +++ b/.github/workflows/serving_publish.yml @@ -41,7 +41,7 @@ jobs: restore-keys: | ${{ runner.os }}-gradle- - name: Configure AWS Credentials - uses: aws-actions/configure-aws-credentials@v1-node16 + uses: aws-actions/configure-aws-credentials@v4 with: aws-access-key-id: ${{ secrets.AWS_ACCESS_KEY_ID }} aws-secret-access-key: ${{ secrets.AWS_SECRET_ACCESS_KEY }} @@ -105,10 +105,3 @@ jobs: ORG_GRADLE_PROJECT_ossrhUsername: ${{ secrets.ORG_GRADLE_PROJECT_ossrhUsername }} ORG_GRADLE_PROJECT_ossrhPassword: ${{ secrets.ORG_GRADLE_PROJECT_ossrhPassword }} DJL_STAGING: ${{ github.event.inputs.repo-id }} - - publish-success-metric: - needs: [ publish ] - if: always() - uses: ./.github/workflows/publish-job-success.yml - with: - metric-name: DJL-DJLServingPublish-Failure diff --git a/android/core/build.gradle b/android/core/build.gradle index 11b8c473c00a..fc549d5afb22 100644 --- a/android/core/build.gradle +++ b/android/core/build.gradle @@ -109,3 +109,7 @@ dependencies { androidTestImplementation 'androidx.test.ext:junit:1.1.5' androidTestImplementation 'androidx.test.espresso:espresso-core:3.5.1' } + +configurations.configureEach { + exclude group: "org.apache.commons", module: "commons-compress" +} diff --git a/android/gradle.properties b/android/gradle.properties index 8ad177db1bf4..bc78729d35c9 100644 --- a/android/gradle.properties +++ b/android/gradle.properties @@ -17,5 +17,5 @@ org.gradle.jvmargs=-Xmx1536m android.useAndroidX=true # Automatically convert third-party libraries to use AndroidX android.enableJetifier=true -djl_version=0.25.0 -pytorch_version=1.13.1 +djl_version=0.26.0 +pytorch_version=2.1.1 diff --git a/api/src/main/java/ai/djl/BaseModel.java b/api/src/main/java/ai/djl/BaseModel.java index 572ab65508c5..db2f0d3dd708 100644 --- a/api/src/main/java/ai/djl/BaseModel.java +++ b/api/src/main/java/ai/djl/BaseModel.java @@ -339,8 +339,12 @@ protected Path paramPathResolver(String prefix, Map options) throws I protected boolean readParameters(Path paramFile, Map options) throws IOException, MalformedModelException { logger.debug("Try to load model from {}", paramFile); - try (DataInputStream dis = - new DataInputStream(new BufferedInputStream(Files.newInputStream(paramFile)))) { + return readParameters(Files.newInputStream(paramFile), options); + } + + protected boolean readParameters(InputStream paramStream, Map options) + throws IOException, MalformedModelException { + try (DataInputStream dis = new DataInputStream(new BufferedInputStream(paramStream))) { byte[] buf = new byte[4]; dis.readFully(buf); if (!"DJL@".equals(new String(buf, StandardCharsets.US_ASCII))) { diff --git a/api/src/main/java/ai/djl/modality/cv/translator/YoloV8Translator.java b/api/src/main/java/ai/djl/modality/cv/translator/YoloV8Translator.java index d47f7a4a14a5..faf31ab31888 100644 --- a/api/src/main/java/ai/djl/modality/cv/translator/YoloV8Translator.java +++ b/api/src/main/java/ai/djl/modality/cv/translator/YoloV8Translator.java @@ -64,6 +64,11 @@ protected DetectedObjects processFromBoxOutput(NDList list) { float[] buf = reshapedResult.toFloatArray(); int numberRows = Math.toIntExact(shape.get(0)); int nClasses = Math.toIntExact(shape.get(1)); + int padding = nClasses - classes.size(); + if (padding != 0 && padding != 4) { + throw new IllegalStateException( + "Expected classes: " + (nClasses - 4) + ", got " + classes.size()); + } ArrayList intermediateResults = new ArrayList<>(); // reverse order search in heap; searches through #maxBoxes for optimization when set @@ -78,6 +83,7 @@ protected DetectedObjects processFromBoxOutput(NDList list) { maxIndex = c; } } + maxIndex -= padding; if (maxClassProb > threshold) { float xPos = buf[index]; // center x diff --git a/basicdataset/src/main/resources/imagenet/extract_imagenet.py b/basicdataset/src/main/resources/imagenet/extract_imagenet.py index c618fe05e443..2f161b5757a2 100644 --- a/basicdataset/src/main/resources/imagenet/extract_imagenet.py +++ b/basicdataset/src/main/resources/imagenet/extract_imagenet.py @@ -14,6 +14,7 @@ _VAL_TAR = 'ILSVRC2012_img_val.tar' _VAL_TAR_SHA1 = '5f3f73da3395154b60528b2b2a2caf2374f5f178' + def download(url, path=None, overwrite=False, sha1_hash=None): """Download an given URL Parameters @@ -42,26 +43,29 @@ def download(url, path=None, overwrite=False, sha1_hash=None): else: fname = path - if overwrite or not os.path.exists(fname) or (sha1_hash and not check_sha1(fname, sha1_hash)): + if overwrite or not os.path.exists(fname) or ( + sha1_hash and not check_sha1(fname, sha1_hash)): dirname = os.path.dirname(os.path.abspath(os.path.expanduser(fname))) if not os.path.exists(dirname): os.makedirs(dirname) - print('Downloading %s from %s...'%(fname, url)) + print('Downloading %s from %s...' % (fname, url)) r = requests.get(url, stream=True) if r.status_code != 200: - raise RuntimeError("Failed downloading url %s"%url) + raise RuntimeError("Failed downloading url %s" % url) total_length = r.headers.get('content-length') with open(fname, 'wb') as f: - if total_length is None: # no content length header + if total_length is None: # no content length header for chunk in r.iter_content(chunk_size=1024): - if chunk: # filter out keep-alive new chunks + if chunk: # filter out keep-alive new chunks f.write(chunk) else: total_length = int(total_length) for chunk in tqdm(r.iter_content(chunk_size=1024), total=int(total_length / 1024. + 0.5), - unit='KB', unit_scale=False, dynamic_ncols=True): + unit='KB', + unit_scale=False, + dynamic_ncols=True): f.write(chunk) if sha1_hash and not check_sha1(fname, sha1_hash): @@ -72,25 +76,34 @@ def download(url, path=None, overwrite=False, sha1_hash=None): return fname + def parse_args(): parser = argparse.ArgumentParser( description='Setup the ImageNet dataset.', formatter_class=argparse.ArgumentDefaultsHelpFormatter) - parser.add_argument('--download-dir', required=True, - help="The directory that contains downloaded tar files") + parser.add_argument( + '--download-dir', + required=True, + help="The directory that contains downloaded tar files") parser.add_argument('--target-dir', help="The directory to store extracted images") - parser.add_argument('--checksum', action='store_true', + parser.add_argument('--checksum', + action='store_true', help="If check integrity before extracting.") - parser.add_argument('--with-rec', action='store_true', + parser.add_argument('--with-rec', + action='store_true', help="If build image record files.") - parser.add_argument('--num-thread', type=int, default=1, - help="Number of threads to use when building image record file.") + parser.add_argument( + '--num-thread', + type=int, + default=1, + help="Number of threads to use when building image record file.") args = parser.parse_args() if args.target_dir is None: args.target_dir = args.download_dir return args + def check_sha1(filename, sha1_hash): """Check whether the sha1 hash of the file content matches the expected hash. @@ -116,11 +129,13 @@ def check_sha1(filename, sha1_hash): return sha1.hexdigest() == sha1_hash + def check_file(filename, checksum, sha1): if not os.path.exists(filename): - raise ValueError('File not found: '+filename) + raise ValueError('File not found: ' + filename) if checksum and not check_sha1(filename, sha1): - raise ValueError('Corrupted file: '+filename) + raise ValueError('Corrupted file: ' + filename) + def build_rec_process(img_dir, train=False, num_thread=1): rec_dir = os.path.abspath(os.path.join(img_dir, '../rec')) @@ -141,14 +156,8 @@ def build_rec_process(img_dir, train=False, num_thread=1): # execution import sys cmd = [ - sys.executable, - script_path, - rec_dir, - img_dir, - '--recursive', - '--pass-through', - '--pack-label', - '--num-thread', + sys.executable, script_path, rec_dir, img_dir, '--recursive', + '--pass-through', '--pack-label', '--num-thread', str(num_thread) ] subprocess.call(cmd) @@ -156,87 +165,75 @@ def build_rec_process(img_dir, train=False, num_thread=1): os.remove(lst_path) print('ImageRecord file for ' + prefix + ' has been built!') + +def is_within_directory(directory, target): + abs_directory = os.path.abspath(directory) + abs_target = os.path.abspath(target) + prefix = os.path.commonprefix([abs_directory, abs_target]) + return prefix == abs_directory + + +def safe_extract(tar, path=".", members=None, *, numeric_owner=False): + for member in tar.getmembers(): + member_path = os.path.join(path, member.name) + if not is_within_directory(path, member_path): + raise Exception("Attempted Path Traversal in Tar File") + tar.extractall(path, members, numeric_owner=numeric_owner) + + def extract_train(tar_fname, target_dir, with_rec=False, num_thread=1): os.makedirs(target_dir) with tarfile.open(tar_fname) as tar: - print("Extracting "+tar_fname+"...") + print("Extracting " + tar_fname + "...") # extract each class one-by-one pbar = tqdm(total=len(tar.getnames())) for class_tar in tar: - pbar.set_description('Extract '+class_tar.name) - tar.extract(class_tar, target_dir) + pbar.set_description('Extract ' + class_tar.name) class_fname = os.path.join(target_dir, class_tar.name) + if not is_within_directory(target_dir, class_fname): + raise Exception("Attempted Path Traversal in Tar File") + + tar.extract(class_tar, target_dir) class_dir = os.path.splitext(class_fname)[0] os.mkdir(class_dir) with tarfile.open(class_fname) as f: - def is_within_directory(directory, target): - - abs_directory = os.path.abspath(directory) - abs_target = os.path.abspath(target) - - prefix = os.path.commonprefix([abs_directory, abs_target]) - - return prefix == abs_directory - - def safe_extract(tar, path=".", members=None, *, numeric_owner=False): - - for member in tar.getmembers(): - member_path = os.path.join(path, member.name) - if not is_within_directory(path, member_path): - raise Exception("Attempted Path Traversal in Tar File") - - tar.extractall(path, members, numeric_owner=numeric_owner) - - safe_extract(f, class_dir) + os.remove(class_fname) pbar.update(1) pbar.close() if with_rec: build_rec_process(target_dir, True, num_thread) + def extract_val(tar_fname, target_dir, with_rec=False, num_thread=1): os.makedirs(target_dir) print('Extracting ' + tar_fname) with tarfile.open(tar_fname) as tar: - def is_within_directory(directory, target): - - abs_directory = os.path.abspath(directory) - abs_target = os.path.abspath(target) - - prefix = os.path.commonprefix([abs_directory, abs_target]) - - return prefix == abs_directory - - def safe_extract(tar, path=".", members=None, *, numeric_owner=False): - - for member in tar.getmembers(): - member_path = os.path.join(path, member.name) - if not is_within_directory(path, member_path): - raise Exception("Attempted Path Traversal in Tar File") - - tar.extractall(path, members, numeric_owner=numeric_owner) - - safe_extract(tar, target_dir) + # build rec file before images are moved into subfolders if with_rec: build_rec_process(target_dir, False, num_thread) # move images to proper subfolders - val_maps_file = os.path.join(os.path.dirname(__file__), 'imagenet_val_maps.pklz') + val_maps_file = os.path.join(os.path.dirname(__file__), + 'imagenet_val_maps.pklz') with gzip.open(val_maps_file, 'rb') as f: dirs, mappings = pickle.load(f) for d in dirs: os.makedirs(os.path.join(target_dir, d)) for m in mappings: - os.rename(os.path.join(target_dir, m[0]), os.path.join(target_dir, m[1], m[0])) + os.rename(os.path.join(target_dir, m[0]), + os.path.join(target_dir, m[1], m[0])) + def main(): args = parse_args() target_dir = os.path.expanduser(args.target_dir) if os.path.exists(target_dir): - raise ValueError('Target dir ['+target_dir+'] exists. Remove it first') + raise ValueError('Target dir [' + target_dir + + '] exists. Remove it first') download_dir = os.path.expanduser(args.download_dir) train_tar_fname = os.path.join(download_dir, _TRAIN_TAR) @@ -247,8 +244,11 @@ def main(): build_rec = args.with_rec if build_rec: os.makedirs(os.path.join(target_dir, 'rec')) - extract_train(train_tar_fname, os.path.join(target_dir, 'train'), build_rec, args.num_thread) - extract_val(val_tar_fname, os.path.join(target_dir, 'val'), build_rec, args.num_thread) + extract_train(train_tar_fname, os.path.join(target_dir, 'train'), + build_rec, args.num_thread) + extract_val(val_tar_fname, os.path.join(target_dir, 'val'), build_rec, + args.num_thread) + if __name__ == '__main__': main() diff --git a/basicdataset/src/test/resources/mlrepo/dataset/cv/ai/djl/basicdataset/mnist/metadata.json b/basicdataset/src/test/resources/mlrepo/dataset/cv/ai/djl/basicdataset/mnist/metadata.json index 5e5c1b81a95b..0b5f61d1d32c 100644 --- a/basicdataset/src/test/resources/mlrepo/dataset/cv/ai/djl/basicdataset/mnist/metadata.json +++ b/basicdataset/src/test/resources/mlrepo/dataset/cv/ai/djl/basicdataset/mnist/metadata.json @@ -19,23 +19,23 @@ "snapshot": false, "files": { "train_data": { - "uri": "https://apache-mxnet.s3-accelerate.dualstack.amazonaws.com/gluon/dataset/mnist/train-images-idx3-ubyte.gz", - "sha1Hash": "6c95f4b05d2bf285e1bfb0e7960c31bd3b3f8a7d", - "size": 9912422 + "uri": "https://mlrepo.djl.ai/dataset/cv/ai/djl/basicdataset/mnist/1.0/train-images-idx3-ubyte.gz", + "sha1Hash": "0e0d45c28981154deda73aabc437dc09aa5a4fd1", + "size": 9822052 }, "train_labels": { - "uri": "https://apache-mxnet.s3-accelerate.dualstack.amazonaws.com/gluon/dataset/mnist/train-labels-idx1-ubyte.gz", - "sha1Hash": "2a80914081dc54586dbdf242f9805a6b8d2a15fc", - "size": 28881 + "uri": "https://mlrepo.djl.ai/dataset/cv/ai/djl/basicdataset/mnist/1.0/train-labels-idx1-ubyte.gz", + "sha1Hash": "af3fbf34a4396c1ee1a6128dfde57812d8abe06e", + "size": 28902 }, "test_data": { - "uri": "https://apache-mxnet.s3-accelerate.dualstack.amazonaws.com/gluon/dataset/mnist/t10k-images-idx3-ubyte.gz", - "sha1Hash": "c3a25af1f52dad7f726cce8cacb138654b760d48", - "size": 1648877 + "uri": "https://mlrepo.djl.ai/dataset/cv/ai/djl/basicdataset/mnist/1.0/t10k-images-idx3-ubyte.gz", + "sha1Hash": "5a939b565aa3e5063d816efc7f3dfb721135648d", + "size": 1634335 }, "test_labels": { - "uri": "https://apache-mxnet.s3-accelerate.dualstack.amazonaws.com/gluon/dataset/mnist/t10k-labels-idx1-ubyte.gz", - "sha1Hash": "763e7fa3757d93b0cdec073cef058b2004252c17", + "uri": "https://mlrepo.djl.ai/dataset/cv/ai/djl/basicdataset/mnist/1.0/t10k-labels-idx1-ubyte.gz", + "sha1Hash": "0e4e66587e3a14f5775793e2ae10d1c48be8ae46", "size": 4542 } } diff --git a/basicdataset/src/test/resources/mlrepo/dataset/nlp/ai/djl/basicdataset/wikitext-2/metadata.json b/basicdataset/src/test/resources/mlrepo/dataset/nlp/ai/djl/basicdataset/wikitext-2/metadata.json index f9c64dc8028b..1f31ac3afcd4 100644 --- a/basicdataset/src/test/resources/mlrepo/dataset/nlp/ai/djl/basicdataset/wikitext-2/metadata.json +++ b/basicdataset/src/test/resources/mlrepo/dataset/nlp/ai/djl/basicdataset/wikitext-2/metadata.json @@ -20,10 +20,10 @@ "name": "wikitext-2", "files": { "wikitext-2": { - "uri": "https://s3.amazonaws.com/research.metamind.io/wikitext/wikitext-2-v1.zip", - "sha1Hash": "3c914d17d80b1459be871a5039ac23e752a53cbe", + "uri": "https://mlrepo.djl.ai/dataset/nlp/ai/djl/basicdataset/wikitext-2/1.0/wikitext-2-v1.zip", + "sha1Hash": "46965bdeca1d8165e688598752ca467bb5bee018", "name": "", - "size": 4475746 + "size": 4475596 } } } diff --git a/docs/development/cache_management.md b/docs/development/cache_management.md index b0b56460e54c..2bdacb9a6995 100644 --- a/docs/development/cache_management.md +++ b/docs/development/cache_management.md @@ -30,10 +30,10 @@ ONNXRuntime will extract native libraries into system default temporary-file dir ### Huggingface tokenizer -If the `TOKENIZERS_CACHE` environment variable is set, Huggingface tokenizer will store cache files in it. +If the `HF_HOME` or `HF_HUB_CACHE` environment variable is set, Huggingface tokenizer will store cache files in it. It is the responsibility of the user to make sure this path is correct. Otherwise, we try to use the default cache directory as defined for each OS: -- macOS: `/Users/{user}/Library/Caches/huggingface/tokenizers` -- linux: `/home/{user}/.cache/huggingface/tokenizers` -- windows: `C:\Users\{user}\AppData\Local\huggingface\tokenizers` +- macOS: `/Users/{user}/.cache/huggingface/hub` +- linux: `/home/{user}/.cache/huggingface/hub` +- windows: `C:\Users\{user}\.cache\huggingface\hub` diff --git a/docs/development/inference_performance_optimization.md b/docs/development/inference_performance_optimization.md index 27bccfd3f3e7..0fdc67d999c3 100644 --- a/docs/development/inference_performance_optimization.md +++ b/docs/development/inference_performance_optimization.md @@ -85,6 +85,23 @@ You can enable it by setting the environment variable: You might see an exception if a data type or operator is not supported with the oneDNN device. +#### oneDNN(MKLDNN) tuning on AWS Graviton3 +AWS Graviton3(E) (e.g. c7g/m7g/r7g, c7gn and Hpc7g instances) supports BF16 format for ML acceleration. This can be enabled in oneDNN by setting the below environment variable +``` +grep -q bf16 /proc/cpuinfo && export DNNL_DEFAULT_FPMATH_MODE=BF16 +``` +To avoid redundant primitive creation latency overhead, enable primitive caching by setting the LRU cache capacity. Please note this caching feature increases the memory footprint. It is recommended to tune the capacity to an optimal value for a given use case. + +``` +export LRU_CACHE_CAPACITY=1024 +``` + +In addition to avoiding the redundant allocations, tensor memory allocation latencies can be optimized with Linux transparent huge pages (THP). To enable THP allocations, set the following torch environment variable. +``` +export THP_MEM_ALLOC_ENABLE=1 +``` +Please refer to [PyTorch Graviton tutorial](https://pytorch.org/tutorials/recipes/inference_tuning_on_aws_graviton.html) for more details on how to achieve the best PyTorch inference performance on AWS Graviton3 instances. + #### CuDNN acceleration PyTorch has a special flag that is used for a CNN or related network speed up. If your input size won't change frequently, you may benefit from enabling this configuration in your model: diff --git a/engines/pytorch/pytorch-engine/src/main/java/ai/djl/pytorch/engine/PtModel.java b/engines/pytorch/pytorch-engine/src/main/java/ai/djl/pytorch/engine/PtModel.java index 35e95f7de860..db0e95e79c17 100644 --- a/engines/pytorch/pytorch-engine/src/main/java/ai/djl/pytorch/engine/PtModel.java +++ b/engines/pytorch/pytorch-engine/src/main/java/ai/djl/pytorch/engine/PtModel.java @@ -33,6 +33,7 @@ import java.nio.file.Files; import java.nio.file.Path; import java.util.ArrayList; +import java.util.Collections; import java.util.List; import java.util.Map; import java.util.function.Predicate; @@ -132,7 +133,8 @@ public void load(Path modelPath, String prefix, Map options) /** {@inheritDoc} */ @Override - public void load(InputStream modelStream, Map options) throws IOException { + public void load(InputStream modelStream, Map options) + throws IOException, MalformedModelException { boolean mapLocation = false; if (options != null) { mapLocation = Boolean.parseBoolean((String) options.get("mapLocation")); @@ -146,11 +148,26 @@ public void load(InputStream modelStream, Map options) throws IOExcep * @param modelStream the stream of the model file * @param mapLocation force load to specified device if true * @throws IOException model loading error + * @throws MalformedModelException if model file is corrupted */ - public void load(InputStream modelStream, boolean mapLocation) throws IOException { - modelDir = Files.createTempDirectory("pt-model"); - modelDir.toFile().deleteOnExit(); - block = JniUtils.loadModule((PtNDManager) manager, modelStream, mapLocation, false); + public void load(InputStream modelStream, boolean mapLocation) + throws IOException, MalformedModelException { + wasLoaded = true; + if (block == null) { + modelDir = Files.createTempDirectory("pt-model"); + modelDir.toFile().deleteOnExit(); + block = JniUtils.loadModule((PtNDManager) manager, modelStream, mapLocation, false); + + /* + * By default, the parameters are frozen, since the previous version before adding this + * trainParam, they were frozen due to the setting JITCallGuard guard, which disables + * autograd. Also, the pretrained parameters usually should not be updated too much. It + * is safe to freeze it. Users may unfreeze it and set their learning rate small. + */ + block.freezeParameters(true); + } else { + readParameters(modelStream, Collections.emptyMap()); + } } private Path findModelFile(String... prefixes) { diff --git a/engines/pytorch/pytorch-engine/src/main/java/ai/djl/pytorch/jni/LibUtils.java b/engines/pytorch/pytorch-engine/src/main/java/ai/djl/pytorch/jni/LibUtils.java index 03835b6ca68f..b208c79bb950 100644 --- a/engines/pytorch/pytorch-engine/src/main/java/ai/djl/pytorch/jni/LibUtils.java +++ b/engines/pytorch/pytorch-engine/src/main/java/ai/djl/pytorch/jni/LibUtils.java @@ -65,6 +65,7 @@ public final class LibUtils { private static final Pattern VERSION_PATTERN = Pattern.compile("(\\d+\\.\\d+\\.\\d+(-[a-z]+)?)(-SNAPSHOT)?(-\\d+)?"); + private static final Pattern LIB_PATTERN = Pattern.compile("(.*\\.(so(\\.\\d+)*|dll|dylib))"); private static LibTorch libTorch; @@ -136,7 +137,9 @@ private static void loadLibTorch(LibTorch libTorch) { paths.filter( path -> { String name = path.getFileName().toString(); - if (!isCuda + if (!LIB_PATTERN.matcher(name).matches()) { + return false; + } else if (!isCuda && name.contains("nvrtc") && name.contains("cudart") && name.contains("nvTools")) { diff --git a/extensions/spark/setup/djl_spark/util/files_util.py b/extensions/spark/setup/djl_spark/util/files_util.py index 5e31fc9e1777..dd9224000cf7 100644 --- a/extensions/spark/setup/djl_spark/util/files_util.py +++ b/extensions/spark/setup/djl_spark/util/files_util.py @@ -70,6 +70,21 @@ def download_and_extract(url, path): :param url: The url of the tar file. :param path: The path to the file to download to. """ + + def is_within_directory(directory, target): + abs_directory = os.path.abspath(directory) + abs_target = os.path.abspath(target) + prefix = os.path.commonprefix([abs_directory, abs_target]) + return prefix == abs_directory + + def safe_extract(tar, path=".", members=None, *, numeric_owner=False): + for member in tar.getmembers(): + member_path = os.path.join(path, member.name) + if not is_within_directory(path, member_path): + raise Exception("Attempted Path Traversal in Tar File") + + tar.extractall(path, members, numeric_owner=numeric_owner) + if not os.path.exists(path): os.makedirs(path) if not os.listdir(path): @@ -78,9 +93,9 @@ def download_and_extract(url, path): if url.startswith("s3://"): s3_download(url, tmp_file) with tarfile.open(name=tmp_file, mode="r:gz") as t: - t.extractall(path=path) + safe_extract(t, path=path) elif url.startswith("http://") or url.startswith("https://"): with urlopen(url) as response, open(tmp_file, 'wb') as f: shutil.copyfileobj(response, f) with tarfile.open(name=tmp_file, mode="r:gz") as t: - t.extractall(path=path) + safe_extract(t, path=path) diff --git a/integration/src/main/java/ai/djl/integration/tests/training/ModelTest.java b/integration/src/main/java/ai/djl/integration/tests/training/ModelTest.java index ca680129062b..3ace9c2bdf5a 100644 --- a/integration/src/main/java/ai/djl/integration/tests/training/ModelTest.java +++ b/integration/src/main/java/ai/djl/integration/tests/training/ModelTest.java @@ -27,6 +27,7 @@ import org.testng.annotations.Test; import java.io.IOException; +import java.nio.file.Files; import java.nio.file.Paths; public class ModelTest { @@ -37,7 +38,9 @@ public void testModelSaveAndLoad() throws IOException, MalformedModelException { block.add(Conv2d.builder().setKernelShape(new Shape(1, 1)).setFilters(10).build()); block.add(BatchNorm.builder().build()); try (Model saveModel = Model.newInstance("saveModel", TestUtils.getEngine()); - Model loadModel = Model.newInstance("loadModel", TestUtils.getEngine())) { + Model loadModel = Model.newInstance("loadModel", TestUtils.getEngine()); + Model loadStreamModel = + Model.newInstance("loadStreamModel", TestUtils.getEngine()); ) { block.initialize(saveModel.getNDManager(), DataType.FLOAT32, new Shape(1, 3, 32, 32)); ParameterList savedParameters = block.getParameters(); saveModel.setBlock(block); @@ -48,6 +51,13 @@ public void testModelSaveAndLoad() throws IOException, MalformedModelException { loadModel.load(Paths.get("build/tmp/test/models"), "saveAndLoad"); ParameterList loadedParameters = loadModel.getBlock().getParameters(); compareParameters(savedParameters, loadedParameters); + + loadStreamModel.setBlock(block); + loadStreamModel.load( + Files.newInputStream( + Paths.get("build/tmp/test/models/saveAndLoad-0000.params"))); + loadedParameters = loadStreamModel.getBlock().getParameters(); + compareParameters(savedParameters, loadedParameters); } }