Skip to content

Commit bcaa6df

Browse files
authored
Add VAD demo for Java API (#928)
1 parent b1c7d04 commit bcaa6df

14 files changed

+604
-0
lines changed

.github/workflows/run-java-test.yaml

+20
Original file line numberDiff line numberDiff line change
@@ -100,12 +100,32 @@ jobs:
100100
-DBUILD_SHARED_LIBS=ON \
101101
-DSHERPA_ONNX_ENABLE_PORTAUDIO=OFF \
102102
-DSHERPA_ONNX_ENABLE_BINARY=OFF \
103+
-DBUILD_ESPEAK_NG_EXE=OFF \
103104
-DSHERPA_ONNX_ENABLE_JNI=ON \
104105
..
105106
106107
make -j4
107108
ls -lh lib
108109
110+
- name: Run java test (VAD + Non-streaming Paraformer)
111+
shell: bash
112+
run: |
113+
cd ./java-api-examples
114+
./run-vad-non-streaming-paraformer.sh
115+
rm *.onnx
116+
ls -lh *.wav
117+
rm *.wav
118+
rm -rf sherpa-onnx-*
119+
120+
- name: Run java test (VAD remove silence)
121+
shell: bash
122+
run: |
123+
cd ./java-api-examples
124+
./run-vad-remove-slience.sh
125+
rm *.onnx
126+
ls -lh *.wav
127+
rm *.wav
128+
109129
- name: Run java test (speaker identification)
110130
shell: bash
111131
run: |

java-api-examples/README.md

+12
Original file line numberDiff line numberDiff line change
@@ -56,3 +56,15 @@ The punctuation model supports both English and Chinese.
5656
```bash
5757
./run-speaker-identification.sh
5858
```
59+
60+
## VAD (Remove silence)
61+
62+
```bash
63+
./run-vad-remove-slience.sh
64+
```
65+
66+
## VAD + Non-streaming Paraformer for speech recognition
67+
68+
```bash
69+
./run-vad-non-streaming-paraformer.sh
70+
```
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,104 @@
1+
// Copyright 2024 Xiaomi Corporation
2+
3+
// This file shows how to use a silero_vad model with a non-streaming Paraformer
4+
// for speech recognition.
5+
6+
import com.k2fsa.sherpa.onnx.*;
7+
import java.util.Arrays;
8+
9+
public class VadNonStreamingParaformer {
10+
public static Vad createVad() {
11+
// please download ./silero_vad.onnx from
12+
// https://github.com/k2-fsa/sherpa-onnx/releases/tag/asr-models
13+
String model = "./silero_vad.onnx";
14+
SileroVadModelConfig sileroVad =
15+
SileroVadModelConfig.builder()
16+
.setModel(model)
17+
.setThreshold(0.5f)
18+
.setMinSilenceDuration(0.25f)
19+
.setMinSpeechDuration(0.5f)
20+
.setWindowSize(512)
21+
.build();
22+
23+
VadModelConfig config =
24+
VadModelConfig.builder()
25+
.setSileroVadModelConfig(sileroVad)
26+
.setSampleRate(16000)
27+
.setNumThreads(1)
28+
.setDebug(true)
29+
.setProvider("cpu")
30+
.build();
31+
32+
return new Vad(config);
33+
}
34+
35+
public static OfflineRecognizer createOfflineRecognizer() {
36+
// please refer to
37+
// https://k2-fsa.github.io/sherpa/onnx/pretrained_models/offline-paraformer/paraformer-models.html#csukuangfj-sherpa-onnx-paraformer-zh-2023-03-28-chinese-english
38+
// to download model files
39+
String model = "./sherpa-onnx-paraformer-zh-2023-03-28/model.int8.onnx";
40+
String tokens = "./sherpa-onnx-paraformer-zh-2023-03-28/tokens.txt";
41+
42+
String waveFilename = "./sherpa-onnx-paraformer-zh-2023-03-28/test_wavs/3-sichuan.wav";
43+
44+
WaveReader reader = new WaveReader(waveFilename);
45+
46+
OfflineParaformerModelConfig paraformer =
47+
OfflineParaformerModelConfig.builder().setModel(model).build();
48+
49+
OfflineModelConfig modelConfig =
50+
OfflineModelConfig.builder()
51+
.setParaformer(paraformer)
52+
.setTokens(tokens)
53+
.setNumThreads(1)
54+
.setDebug(true)
55+
.build();
56+
57+
OfflineRecognizerConfig config =
58+
OfflineRecognizerConfig.builder()
59+
.setOfflineModelConfig(modelConfig)
60+
.setDecodingMethod("greedy_search")
61+
.build();
62+
63+
return new OfflineRecognizer(config);
64+
}
65+
66+
public static void main(String[] args) {
67+
68+
Vad vad = createVad();
69+
OfflineRecognizer recognizer = createOfflineRecognizer();
70+
71+
// You can download the test file from
72+
// https://github.com/k2-fsa/sherpa-onnx/releases/tag/asr-models
73+
String testWaveFilename = "./lei-jun-test.wav";
74+
WaveReader reader = new WaveReader(testWaveFilename);
75+
76+
int numSamples = reader.getSamples().length;
77+
int numIter = numSamples / 512;
78+
79+
for (int i = 0; i != numIter; ++i) {
80+
int start = i * 512;
81+
int end = start + 512;
82+
float[] samples = Arrays.copyOfRange(reader.getSamples(), start, end);
83+
vad.acceptWaveform(samples);
84+
if (vad.isSpeechDetected()) {
85+
while (!vad.empty()) {
86+
SpeechSegment segment = vad.front();
87+
float startTime = segment.getStart() / 16000.0f;
88+
float duration = segment.getSamples().length / 16000.0f;
89+
90+
OfflineStream stream = recognizer.createStream();
91+
stream.acceptWaveform(segment.getSamples(), 16000);
92+
recognizer.decode(stream);
93+
String text = recognizer.getResult(stream).getText();
94+
95+
if (!text.isEmpty()) {
96+
System.out.printf("%.3f--%.3f: %s\n", startTime, startTime + duration, text);
97+
}
98+
99+
vad.pop();
100+
}
101+
}
102+
}
103+
}
104+
}
+79
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,79 @@
1+
// Copyright 2024 Xiaomi Corporation
2+
3+
// This file shows how to use a silero_vad model to remove silences from
4+
// a wave file.
5+
6+
import com.k2fsa.sherpa.onnx.*;
7+
import java.util.ArrayList;
8+
import java.util.Arrays;
9+
10+
public class VadRemoveSilence {
11+
public static void main(String[] args) {
12+
// please download ./silero_vad.onnx from
13+
// https://github.com/k2-fsa/sherpa-onnx/releases/tag/asr-models
14+
String model = "./silero_vad.onnx";
15+
SileroVadModelConfig sileroVad =
16+
SileroVadModelConfig.builder()
17+
.setModel(model)
18+
.setThreshold(0.5f)
19+
.setMinSilenceDuration(0.25f)
20+
.setMinSpeechDuration(0.5f)
21+
.setWindowSize(512)
22+
.build();
23+
24+
VadModelConfig config =
25+
VadModelConfig.builder()
26+
.setSileroVadModelConfig(sileroVad)
27+
.setSampleRate(16000)
28+
.setNumThreads(1)
29+
.setDebug(true)
30+
.setProvider("cpu")
31+
.build();
32+
33+
Vad vad = new Vad(config);
34+
35+
// You can download the test file from
36+
// https://github.com/k2-fsa/sherpa-onnx/releases/tag/asr-models
37+
String testWaveFilename = "./lei-jun-test.wav";
38+
WaveReader reader = new WaveReader(testWaveFilename);
39+
40+
int numSamples = reader.getSamples().length;
41+
int numIter = numSamples / 512;
42+
43+
ArrayList<float[]> segments = new ArrayList<float[]>();
44+
45+
for (int i = 0; i != numIter; ++i) {
46+
int start = i * 512;
47+
int end = start + 512;
48+
float[] samples = Arrays.copyOfRange(reader.getSamples(), start, end);
49+
vad.acceptWaveform(samples);
50+
if (vad.isSpeechDetected()) {
51+
while (!vad.empty()) {
52+
53+
// if you want to get the starting time of this segment, you can use
54+
/* float startTime = vad.front().getStart() / 16000.0f; */
55+
56+
segments.add(vad.front().getSamples());
57+
vad.pop();
58+
}
59+
}
60+
}
61+
62+
// get total number of samples
63+
int n = 0;
64+
for (float[] s : segments) {
65+
n += s.length;
66+
}
67+
68+
float[] allSamples = new float[n];
69+
int i = 0;
70+
for (float[] s : segments) {
71+
System.arraycopy(s, 0, allSamples, i, s.length);
72+
i += s.length;
73+
}
74+
75+
String outFilename = "lei-jun-test-no-silence.wav";
76+
WaveWriter.write(outFilename, allSamples, 16000);
77+
System.out.printf("Saved to %s\n", outFilename);
78+
}
79+
}
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,46 @@
1+
#!/usr/bin/env bash
2+
3+
set -ex
4+
5+
if [[ ! -f ../build/lib/libsherpa-onnx-jni.dylib && ! -f ../build/lib/libsherpa-onnx-jni.so ]]; then
6+
mkdir -p ../build
7+
pushd ../build
8+
cmake \
9+
-DSHERPA_ONNX_ENABLE_PYTHON=OFF \
10+
-DSHERPA_ONNX_ENABLE_TESTS=OFF \
11+
-DSHERPA_ONNX_ENABLE_CHECK=OFF \
12+
-DBUILD_SHARED_LIBS=ON \
13+
-DSHERPA_ONNX_ENABLE_PORTAUDIO=OFF \
14+
-DSHERPA_ONNX_ENABLE_JNI=ON \
15+
..
16+
17+
make -j4
18+
ls -lh lib
19+
popd
20+
fi
21+
22+
if [ ! -f ../sherpa-onnx/java-api/build/sherpa-onnx.jar ]; then
23+
pushd ../sherpa-onnx/java-api
24+
make
25+
popd
26+
fi
27+
28+
if [ ! -f ./silero_vad.onnx ]; then
29+
curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/silero_vad.onnx
30+
fi
31+
32+
if [ ! -f ./lei-jun-test.wav ]; then
33+
curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/lei-jun-test.wav
34+
fi
35+
36+
if [ ! -f ./sherpa-onnx-paraformer-zh-2023-03-28/tokens.txt ]; then
37+
curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-paraformer-zh-2023-03-28.tar.bz2
38+
39+
tar xvf sherpa-onnx-paraformer-zh-2023-03-28.tar.bz2
40+
rm sherpa-onnx-paraformer-zh-2023-03-28.tar.bz2
41+
fi
42+
43+
java \
44+
-Djava.library.path=$PWD/../build/lib \
45+
-cp ../sherpa-onnx/java-api/build/sherpa-onnx.jar \
46+
./VadNonStreamingParaformer.java
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,39 @@
1+
#!/usr/bin/env bash
2+
3+
set -ex
4+
5+
if [[ ! -f ../build/lib/libsherpa-onnx-jni.dylib && ! -f ../build/lib/libsherpa-onnx-jni.so ]]; then
6+
mkdir -p ../build
7+
pushd ../build
8+
cmake \
9+
-DSHERPA_ONNX_ENABLE_PYTHON=OFF \
10+
-DSHERPA_ONNX_ENABLE_TESTS=OFF \
11+
-DSHERPA_ONNX_ENABLE_CHECK=OFF \
12+
-DBUILD_SHARED_LIBS=ON \
13+
-DSHERPA_ONNX_ENABLE_PORTAUDIO=OFF \
14+
-DSHERPA_ONNX_ENABLE_JNI=ON \
15+
..
16+
17+
make -j4
18+
ls -lh lib
19+
popd
20+
fi
21+
22+
if [ ! -f ../sherpa-onnx/java-api/build/sherpa-onnx.jar ]; then
23+
pushd ../sherpa-onnx/java-api
24+
make
25+
popd
26+
fi
27+
28+
if [ ! -f ./silero_vad.onnx ]; then
29+
curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/silero_vad.onnx
30+
fi
31+
32+
if [ ! -f ./lei-jun-test.wav ]; then
33+
curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/lei-jun-test.wav
34+
fi
35+
36+
java \
37+
-Djava.library.path=$PWD/../build/lib \
38+
-cp ../sherpa-onnx/java-api/build/sherpa-onnx.jar \
39+
./VadRemoveSilence.java

sherpa-onnx/java-api/Makefile

+6
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,7 @@ out_jar := $(out_dir)/sherpa-onnx.jar
77
package_dir := com/k2fsa/sherpa/onnx
88

99
java_files := WaveReader.java
10+
java_files += WaveWriter.java
1011
java_files += EndpointRule.java
1112
java_files += EndpointConfig.java
1213
java_files += FeatureConfig.java
@@ -56,6 +57,11 @@ java_files += SpeakerEmbeddingExtractorConfig.java
5657
java_files += SpeakerEmbeddingExtractor.java
5758
java_files += SpeakerEmbeddingManager.java
5859

60+
java_files += SileroVadModelConfig.java
61+
java_files += VadModelConfig.java
62+
java_files += SpeechSegment.java
63+
java_files += Vad.java
64+
5965
class_files := $(java_files:%.java=%.class)
6066

6167
java_files := $(addprefix src/$(package_dir)/,$(java_files))

0 commit comments

Comments
 (0)