Skip to content

Commit 70307b8

Browse files
committed
* Upgrade presets for TensorRT 10.6.0.26, ONNX Runtime 1.20.0
1 parent 159ba39 commit 70307b8

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

43 files changed

+1283
-909
lines changed

.github/actions/deploy-ubuntu/action.yml

+4-4
Original file line numberDiff line numberDiff line change
@@ -213,16 +213,16 @@ runs:
213213
if [[ "$CI_DEPLOY_PLATFORM" == "linux-arm64" ]] && [[ "$CI_DEPLOY_MODULE" == "tensorrt" ]]; then
214214
echo Installing TensorRT
215215
# python3 -m gdown 1LZRCv4ZAGiDQAu4pvADJIGntq4cGl5tU
216-
curl -LO https://developer.nvidia.com/downloads/compute/machine-learning/tensorrt/10.5.0/tars/TensorRT-10.5.0.18.Ubuntu-24.04.aarch64-gnu.cuda-12.6.tar.gz
217-
$SUDO tar -hxvf TensorRT-10.5.0.18.Ubuntu-24.04.aarch64-gnu.cuda-12.6.tar.gz -C /usr/local/
216+
curl -LO https://developer.nvidia.com/downloads/compute/machine-learning/tensorrt/10.6.0/tars/TensorRT-10.6.0.26.Ubuntu-24.04.aarch64-gnu.cuda-12.6.tar.gz
217+
$SUDO tar -hxvf TensorRT-10.6.0.26.Ubuntu-24.04.aarch64-gnu.cuda-12.6.tar.gz -C /usr/local/
218218
$SUDO ln -sf /usr/local/TensorRT* /usr/local/tensorrt
219219
fi
220220
221221
if [[ "$CI_DEPLOY_PLATFORM" == "linux-x86_64" ]] && [[ "$CI_DEPLOY_MODULE" == "tensorrt" ]]; then
222222
echo Installing TensorRT
223223
# python3 -m gdown 1dVhD-DEYY42QbZe1GXl-vxe3k6KqWGsL
224-
curl -LO https://developer.nvidia.com/downloads/compute/machine-learning/tensorrt/10.5.0/tars/TensorRT-10.5.0.18.Linux.x86_64-gnu.cuda-12.6.tar.gz
225-
$SUDO tar -hxvf TensorRT-10.5.0.18.Linux.x86_64-gnu.cuda-12.6.tar.gz -C /usr/local/
224+
curl -LO https://developer.nvidia.com/downloads/compute/machine-learning/tensorrt/10.6.0/tars/TensorRT-10.6.0.26.Linux.x86_64-gnu.cuda-12.6.tar.gz
225+
$SUDO tar -hxvf TensorRT-10.6.0.26.Linux.x86_64-gnu.cuda-12.6.tar.gz -C /usr/local/
226226
$SUDO ln -sf /usr/local/TensorRT* /usr/local/tensorrt
227227
fi
228228

.github/actions/deploy-windows/action.yml

+3-3
Original file line numberDiff line numberDiff line change
@@ -142,9 +142,9 @@ runs:
142142
if "%CI_DEPLOY_MODULE%"=="tensorrt" (
143143
echo Installing TensorRT
144144
rem python -m gdown 1GfmJ1BKbacLpUU-0i_mGu0sjrAS0Xzzi
145-
curl -LO https://developer.nvidia.com/downloads/compute/machine-learning/tensorrt/10.5.0/zip/TensorRT-10.5.0.18.Windows.win10.cuda-12.6.zip
146-
unzip TensorRT-10.5.0.18.Windows.win10.cuda-12.6.zip
147-
move TensorRT-10.5.0.18 "%ProgramFiles%\NVIDIA GPU Computing Toolkit\TensorRT"
145+
curl -LO https://developer.nvidia.com/downloads/compute/machine-learning/tensorrt/10.6.0/zip/TensorRT-10.6.0.26.Windows.win10.cuda-12.6.zip
146+
unzip TensorRT-10.6.0.26.Windows.win10.cuda-12.6.zip
147+
move TensorRT-10.6.0.26 "%ProgramFiles%\NVIDIA GPU Computing Toolkit\TensorRT"
148148
)
149149
150150
if "%CI_DEPLOY_MODULE%"=="mkl" (

CHANGELOG.md

+1-1
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,7 @@
99
* Build FFmpeg with zimg to enable zscale filter ([pull #1481](https://github.com/bytedeco/javacpp-presets/pull/1481))
1010
* Enable PulseAudio support for FFmpeg on Linux ([pull #1472](https://github.com/bytedeco/javacpp-presets/pull/1472))
1111
* Virtualize `btCollisionWorld`, `btOverlapFilterCallback`, `btOverlapCallback` from Bullet Physics SDK ([pull #1475](https://github.com/bytedeco/javacpp-presets/pull/1475))
12-
* Upgrade presets for OpenCV 4.10.0, FFmpeg 7.1, Spinnaker 4.0.0.116 ([pull #1524](https://github.com/bytedeco/javacpp-presets/pull/1524)), MKL 2025.0, DNNL 3.6, OpenBLAS 0.3.28, CMINPACK 1.3.11, GSL 2.8, CPython 3.13.0, NumPy 2.1.2, SciPy 1.14.1, LLVM 19.1.3, LibRaw 0.21.2 ([pull #1520](https://github.com/bytedeco/javacpp-presets/pull/1520)), Leptonica 1.85.0, Tesseract 5.4.1, libffi 3.4.6, CUDA 12.6.2, cuDNN 9.5.1, NCCL 2.23.4, nvCOMP 4.1.0.6, OpenCL 3.0.16, NVIDIA Video Codec SDK 12.2.72, PyTorch 2.5.1 ([pull #1466](https://github.com/bytedeco/javacpp-presets/pull/1466)), SentencePiece 0.2.0, TensorFlow Lite 2.18.0, TensorRT 10.5.0.18, Triton Inference Server 2.51.0, ONNX 1.17.0, ONNX Runtime 1.19.2, TVM 0.18.0, and their dependencies
12+
* Upgrade presets for OpenCV 4.10.0, FFmpeg 7.1, Spinnaker 4.0.0.116 ([pull #1524](https://github.com/bytedeco/javacpp-presets/pull/1524)), MKL 2025.0, DNNL 3.6, OpenBLAS 0.3.28, CMINPACK 1.3.11, GSL 2.8, CPython 3.13.0, NumPy 2.1.2, SciPy 1.14.1, LLVM 19.1.3, LibRaw 0.21.2 ([pull #1520](https://github.com/bytedeco/javacpp-presets/pull/1520)), Leptonica 1.85.0, Tesseract 5.4.1, libffi 3.4.6, CUDA 12.6.2, cuDNN 9.5.1, NCCL 2.23.4, nvCOMP 4.1.0.6, OpenCL 3.0.16, NVIDIA Video Codec SDK 12.2.72, PyTorch 2.5.1 ([pull #1466](https://github.com/bytedeco/javacpp-presets/pull/1466)), SentencePiece 0.2.0, TensorFlow Lite 2.18.0, TensorRT 10.6.0.26, Triton Inference Server 2.51.0, ONNX 1.17.0, ONNX Runtime 1.20.0, TVM 0.18.0, and their dependencies
1313

1414
### January 29, 2024 version 1.5.10
1515
* Introduce `macosx-arm64` builds for PyTorch ([pull #1463](https://github.com/bytedeco/javacpp-presets/pull/1463))

README.md

+2-2
Original file line numberDiff line numberDiff line change
@@ -227,13 +227,13 @@ Each child module in turn relies by default on the included [`cppbuild.sh` scrip
227227
* SentencePiece 0.2.0 https://github.com/google/sentencepiece
228228
* TensorFlow 1.15.x https://github.com/tensorflow/tensorflow
229229
* TensorFlow Lite 2.18.x https://github.com/tensorflow/tensorflow
230-
* TensorRT 10.5.x https://developer.nvidia.com/tensorrt
230+
* TensorRT 10.6.x https://developer.nvidia.com/tensorrt
231231
* Triton Inference Server 2.51.x https://developer.nvidia.com/nvidia-triton-inference-server
232232
* The Arcade Learning Environment 0.8.x https://github.com/mgbellemare/Arcade-Learning-Environment
233233
* DepthAI 2.24.x https://github.com/luxonis/depthai-core
234234
* ONNX 1.17.x https://github.com/onnx/onnx
235235
* nGraph 0.26.0 https://github.com/NervanaSystems/ngraph
236-
* ONNX Runtime 1.19.x https://github.com/microsoft/onnxruntime
236+
* ONNX Runtime 1.20.x https://github.com/microsoft/onnxruntime
237237
* TVM 0.18.x https://github.com/apache/tvm
238238
* Bullet Physics SDK 3.25 https://pybullet.org
239239
* LiquidFun http://google.github.io/liquidfun/

onnxruntime/README.md

+3-3
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,7 @@ Introduction
99
------------
1010
This directory contains the JavaCPP Presets module for:
1111

12-
* ONNX Runtime 1.19.2 https://microsoft.github.io/onnxruntime/
12+
* ONNX Runtime 1.20.0 https://microsoft.github.io/onnxruntime/
1313

1414
Please refer to the parent README.md file for more detailed information about the JavaCPP Presets.
1515

@@ -46,14 +46,14 @@ We can use [Maven 3](http://maven.apache.org/) to download and install automatic
4646
<dependency>
4747
<groupId>org.bytedeco</groupId>
4848
<artifactId>onnxruntime-platform</artifactId>
49-
<version>1.19.2-1.5.11-SNAPSHOT</version>
49+
<version>1.20.0-1.5.11-SNAPSHOT</version>
5050
</dependency>
5151

5252
<!-- Additional dependencies required to use CUDA and cuDNN -->
5353
<dependency>
5454
<groupId>org.bytedeco</groupId>
5555
<artifactId>onnxruntime-platform-gpu</artifactId>
56-
<version>1.19.2-1.5.11-SNAPSHOT</version>
56+
<version>1.20.0-1.5.11-SNAPSHOT</version>
5757
</dependency>
5858

5959
<!-- Additional dependencies to use bundled CUDA and cuDNN -->

onnxruntime/cppbuild.sh

+4-2
Original file line numberDiff line numberDiff line change
@@ -22,7 +22,7 @@ if [[ "$EXTENSION" == *gpu ]]; then
2222
GPU_FLAGS="--use_cuda"
2323
fi
2424

25-
ONNXRUNTIME=1.19.2
25+
ONNXRUNTIME=1.20.0
2626

2727
mkdir -p "$PLATFORM$EXTENSION"
2828
cd "$PLATFORM$EXTENSION"
@@ -84,7 +84,7 @@ sedinplace 's/MLAS_CPUIDINFO::GetCPUIDInfo().HasArmNeon_I8MM()/false/g' onnxrunt
8484

8585
# work around toolchain issues on Mac and Windows
8686
patch -p1 < ../../../onnxruntime.patch
87-
patch -p1 < ../../../onnxruntime-cuda.patch # https://github.com/microsoft/onnxruntime/pull/22316
87+
#patch -p1 < ../../../onnxruntime-cuda.patch # https://github.com/microsoft/onnxruntime/pull/22316
8888
#patch -p1 < ../../../onnxruntime-windows.patch # https://github.com/microsoft/onnxruntime/pull/7883
8989
sedinplace '/--Werror/d' cmake/CMakeLists.txt
9090
sedinplace '/-DCMAKE_CUDA_COMPILER=/d' tools/ci_build/build.py
@@ -113,6 +113,8 @@ sedinplace 's/, data_dims);/);/g' onnxruntime/core/providers/dnnl/subgraph/dnnl_
113113
sedinplace 's/, dims);/);/g' onnxruntime/contrib_ops/cuda/quantization/qordered_ops/qordered_qdq.cc
114114
sedinplace '/omp_get_max_threads/d' onnxruntime/core/providers/dnnl/dnnl_execution_provider.cc
115115
sedinplace '/omp_set_num_threads/d' onnxruntime/core/providers/dnnl/dnnl_execution_provider.cc
116+
sedinplace '/cvtfp16Avx/d' cmake/onnxruntime_mlas.cmake
117+
sedinplace 's/MlasCastF16ToF32KernelAvx;/MlasCastF16ToF32KernelAvx2;/g' onnxruntime/core/mlas/lib/platform.cpp
116118

117119
# use PTX instead of compiling for all CUDA archs to reduce library size
118120
sedinplace 's/-gencode=arch=compute_52,code=sm_52/-gencode arch=compute_50,code=sm_50 -gencode arch=compute_60,code=sm_60 -gencode arch=compute_70,code=sm_70 -gencode arch=compute_80,code=sm_80 -gencode arch=compute_90,code=sm_90/g' cmake/CMakeLists.txt

onnxruntime/platform/gpu/pom.xml

+1-1
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,7 @@
1212

1313
<groupId>org.bytedeco</groupId>
1414
<artifactId>onnxruntime-platform-gpu</artifactId>
15-
<version>1.19.2-${project.parent.version}</version>
15+
<version>1.20.0-${project.parent.version}</version>
1616
<name>JavaCPP Presets Platform GPU for ONNX Runtime</name>
1717

1818
<properties>

onnxruntime/platform/pom.xml

+1-1
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,7 @@
1212

1313
<groupId>org.bytedeco</groupId>
1414
<artifactId>onnxruntime-platform</artifactId>
15-
<version>1.19.2-${project.parent.version}</version>
15+
<version>1.20.0-${project.parent.version}</version>
1616
<name>JavaCPP Presets Platform for ONNX Runtime</name>
1717

1818
<properties>

onnxruntime/pom.xml

+1-1
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,7 @@
1111

1212
<groupId>org.bytedeco</groupId>
1313
<artifactId>onnxruntime</artifactId>
14-
<version>1.19.2-${project.parent.version}</version>
14+
<version>1.20.0-${project.parent.version}</version>
1515
<name>JavaCPP Presets for ONNX Runtime</name>
1616

1717
<properties>

onnxruntime/samples/pom.xml

+2-2
Original file line numberDiff line numberDiff line change
@@ -12,14 +12,14 @@
1212
<dependency>
1313
<groupId>org.bytedeco</groupId>
1414
<artifactId>onnxruntime-platform</artifactId>
15-
<version>1.19.2-1.5.11-SNAPSHOT</version>
15+
<version>1.20.0-1.5.11-SNAPSHOT</version>
1616
</dependency>
1717

1818
<!-- Additional dependencies required to use CUDA and cuDNN -->
1919
<dependency>
2020
<groupId>org.bytedeco</groupId>
2121
<artifactId>onnxruntime-platform-gpu</artifactId>
22-
<version>1.19.2-1.5.11-SNAPSHOT</version>
22+
<version>1.20.0-1.5.11-SNAPSHOT</version>
2323
</dependency>
2424

2525
<!-- Additional dependencies to use bundled CUDA and cuDNN -->
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,50 @@
1+
// Targeted by JavaCPP version 1.5.11-SNAPSHOT: DO NOT EDIT THIS FILE
2+
3+
package org.bytedeco.onnxruntime;
4+
5+
import java.nio.*;
6+
import org.bytedeco.javacpp.*;
7+
import org.bytedeco.javacpp.annotation.*;
8+
9+
import static org.bytedeco.javacpp.presets.javacpp.*;
10+
import org.bytedeco.opencl.*;
11+
import static org.bytedeco.opencl.global.OpenCL.*;
12+
import org.bytedeco.dnnl.*;
13+
import static org.bytedeco.dnnl.global.dnnl.*;
14+
15+
import static org.bytedeco.onnxruntime.global.onnxruntime.*;
16+
17+
@Name("Ort::detail::Base<OrtLoraAdapter>") @NoOffset @Properties(inherit = org.bytedeco.onnxruntime.presets.onnxruntime.class)
18+
public class BaseOrtLoraAdapter extends Pointer {
19+
static { Loader.load(); }
20+
/** Pointer cast constructor. Invokes {@link Pointer#Pointer(Pointer)}. */
21+
public BaseOrtLoraAdapter(Pointer p) { super(p); }
22+
/** Native array allocator. Access with {@link Pointer#position(long)}. */
23+
public BaseOrtLoraAdapter(long size) { super((Pointer)null); allocateArray(size); }
24+
private native void allocateArray(long size);
25+
@Override public BaseOrtLoraAdapter position(long position) {
26+
return (BaseOrtLoraAdapter)super.position(position);
27+
}
28+
@Override public BaseOrtLoraAdapter getPointer(long i) {
29+
return new BaseOrtLoraAdapter((Pointer)this).offsetAddress(i);
30+
}
31+
32+
33+
public BaseOrtLoraAdapter() { super((Pointer)null); allocate(); }
34+
private native void allocate();
35+
public BaseOrtLoraAdapter(@Cast("Ort::detail::Base<OrtLoraAdapter>::contained_type*") OrtLoraAdapter p) { super((Pointer)null); allocate(p); }
36+
@NoException(true) private native void allocate(@Cast("Ort::detail::Base<OrtLoraAdapter>::contained_type*") OrtLoraAdapter p);
37+
38+
39+
40+
41+
public BaseOrtLoraAdapter(@ByRef(true) BaseOrtLoraAdapter v) { super((Pointer)null); allocate(v); }
42+
@NoException(true) private native void allocate(@ByRef(true) BaseOrtLoraAdapter v);
43+
public native @ByRef @Name("operator =") @NoException(true) BaseOrtLoraAdapter put(@ByRef(true) BaseOrtLoraAdapter v);
44+
45+
public native @Cast("Ort::detail::Base<OrtLoraAdapter>::contained_type*") @Name("operator Ort::detail::Base<OrtLoraAdapter>::contained_type*") @NoException(true) OrtLoraAdapter asOrtLoraAdapter();
46+
47+
/** \brief Relinquishes ownership of the contained C object pointer
48+
* The underlying object is not destroyed */
49+
public native @Cast("Ort::detail::Base<OrtLoraAdapter>::contained_type*") OrtLoraAdapter release();
50+
}
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,57 @@
1+
// Targeted by JavaCPP version 1.5.11-SNAPSHOT: DO NOT EDIT THIS FILE
2+
3+
package org.bytedeco.onnxruntime;
4+
5+
import java.nio.*;
6+
import org.bytedeco.javacpp.*;
7+
import org.bytedeco.javacpp.annotation.*;
8+
9+
import static org.bytedeco.javacpp.presets.javacpp.*;
10+
import org.bytedeco.opencl.*;
11+
import static org.bytedeco.opencl.global.OpenCL.*;
12+
import org.bytedeco.dnnl.*;
13+
import static org.bytedeco.dnnl.global.dnnl.*;
14+
15+
import static org.bytedeco.onnxruntime.global.onnxruntime.*;
16+
17+
18+
/** \brief LoraAdapter holds a set of Lora Parameters loaded from a single file */
19+
@Namespace("Ort") @Properties(inherit = org.bytedeco.onnxruntime.presets.onnxruntime.class)
20+
public class LoraAdapter extends BaseOrtLoraAdapter {
21+
static { Loader.load(); }
22+
/** Default native constructor. */
23+
public LoraAdapter() { super((Pointer)null); allocate(); }
24+
/** Native array allocator. Access with {@link Pointer#position(long)}. */
25+
public LoraAdapter(long size) { super((Pointer)null); allocateArray(size); }
26+
/** Pointer cast constructor. Invokes {@link Pointer#Pointer(Pointer)}. */
27+
public LoraAdapter(Pointer p) { super(p); }
28+
private native void allocate();
29+
private native void allocateArray(long size);
30+
@Override public LoraAdapter position(long position) {
31+
return (LoraAdapter)super.position(position);
32+
}
33+
@Override public LoraAdapter getPointer(long i) {
34+
return new LoraAdapter((Pointer)this).offsetAddress(i);
35+
}
36+
37+
/** \brief Wraps OrtApi::CreateLoraAdapter
38+
*
39+
* The function attempts to load the adapter from the specified file
40+
* @param adapter_path The path to the Lora adapter
41+
* @param allocator optional pointer to a device allocator. If nullptr, the data stays on CPU. It would still
42+
* be copied to device if required by the model at inference time. */
43+
44+
///
45+
public static native @ByVal LoraAdapter CreateLoraAdapter(@Cast("const std::basic_string<ORTCHAR_T>*") @ByRef Pointer adapter_path,
46+
OrtAllocator allocator);
47+
48+
/** \brief Wraps OrtApi::CreateLoraAdapterFromArray
49+
*
50+
* The function attempts to load the adapter from the specified byte array.
51+
* @param bytes The byte array containing file LoraAdapter format
52+
* @param num_bytes The number of bytes in the byte array
53+
* @param allocator optional pointer to a device allocator. If nullptr, the data stays on CPU. It would still
54+
* be copied to device if required by the model at inference time. */
55+
public static native @ByVal LoraAdapter CreateLoraAdapterFromArray(@Const Pointer bytes, @Cast("size_t") long num_bytes,
56+
OrtAllocator allocator);
57+
}

0 commit comments

Comments
 (0)