Skip to content

Commit eee5d8a

Browse files
authoredMay 11, 2024
Add node-addon-api for VAD (#864)
1 parent 677bc1d commit eee5d8a

15 files changed

+914
-10
lines changed
 

‎nodejs-addon-examples/.gitignore

+1
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
crash.log

‎nodejs-addon-examples/README.md

+9
Original file line numberDiff line numberDiff line change
@@ -38,3 +38,12 @@ node ./test_asr_streaming_transducer.js
3838

3939
node ./test_asr_streaming_transducer_microphone.js
4040
```
41+
42+
# VAD
43+
44+
```bash
45+
wget https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/silero_vad.onnx
46+
47+
node ./test_vad_microphone.js
48+
```
49+

‎nodejs-addon-examples/test_asr_streaming_transducer.js

+2-2
Original file line numberDiff line numberDiff line change
@@ -36,10 +36,10 @@ console.log('Started')
3636
let start = performance.now();
3737
const stream = recognizer.createStream();
3838
const wave = sherpa_onnx.readWave(waveFilename);
39-
stream.acceptWaveform(wave.samples, wave.sampleRate);
39+
stream.acceptWaveform({sampleRate: wave.sampleRate, samples: wave.samples});
4040

4141
const tailPadding = new Float32Array(wave.sampleRate * 0.4);
42-
stream.acceptWaveform(tailPadding, wave.sampleRate);
42+
stream.acceptWaveform({samples: tailPadding, sampleRate: wave.sampleRate});
4343

4444
while (recognizer.isReady(stream)) {
4545
recognizer.decode(stream);

‎nodejs-addon-examples/test_asr_streaming_transducer_microphone.js

+2-1
Original file line numberDiff line numberDiff line change
@@ -60,7 +60,8 @@ const display = new sherpa_onnx.Display(50);
6060
ai.on('data', data => {
6161
const samples = new Float32Array(data.buffer);
6262

63-
stream.acceptWaveform(samples, recognizer.config.featConfig.sampleRate);
63+
stream.acceptWaveform(
64+
{sampleRate: recognizer.config.featConfig.sampleRate, samples: samples});
6465

6566
while (recognizer.isReady(stream)) {
6667
recognizer.decode(stream);
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,88 @@
1+
// Copyright (c) 2023-2024 Xiaomi Corporation (authors: Fangjun Kuang)
2+
3+
const portAudio = require('naudiodon2');
4+
// console.log(portAudio.getDevices());
5+
6+
const sherpa_onnx = require('sherpa-onnx-node');
7+
8+
function createVad() {
9+
// please download silero_vad.onnx from
10+
// https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/silero_vad.onnx
11+
const config = {
12+
sileroVad: {
13+
model: './silero_vad.onnx',
14+
threshold: 0.5,
15+
minSpeechDuration: 0.25,
16+
minSilenceDuration: 0.5,
17+
windowSize: 512,
18+
},
19+
sampleRate: 16000,
20+
debug: true,
21+
numThreads: 1,
22+
};
23+
24+
const bufferSizeInSeconds = 60;
25+
26+
return new sherpa_onnx.Vad(config, bufferSizeInSeconds);
27+
}
28+
29+
vad = createVad();
30+
31+
const bufferSizeInSeconds = 30;
32+
const buffer =
33+
new sherpa_onnx.CircularBuffer(bufferSizeInSeconds * vad.config.sampleRate);
34+
35+
36+
const ai = new portAudio.AudioIO({
37+
inOptions: {
38+
channelCount: 1,
39+
closeOnError: true, // Close the stream if an audio error is detected, if
40+
// set false then just log the error
41+
deviceId: -1, // Use -1 or omit the deviceId to select the default device
42+
sampleFormat: portAudio.SampleFormatFloat32,
43+
sampleRate: vad.config.sampleRate,
44+
}
45+
});
46+
47+
let printed = false;
48+
let index = 0;
49+
ai.on('data', data => {
50+
const windowSize = vad.config.sileroVad.windowSize;
51+
buffer.push(new Float32Array(data.buffer));
52+
while (buffer.size() > windowSize) {
53+
const samples = buffer.get(buffer.head(), windowSize);
54+
buffer.pop(windowSize);
55+
vad.acceptWaveform(samples)
56+
if (vad.isDetected() && !printed) {
57+
console.log(`${index}: Detected speech`)
58+
printed = true;
59+
}
60+
61+
if (!vad.isDetected()) {
62+
printed = false;
63+
}
64+
65+
while (!vad.isEmpty()) {
66+
const segment = vad.front();
67+
vad.pop();
68+
const filename = `${index}-${
69+
new Date()
70+
.toLocaleTimeString('en-US', {hour12: false})
71+
.split(' ')[0]}.wav`;
72+
sherpa_onnx.writeWave(
73+
filename,
74+
{samples: segment.samples, sampleRate: vad.config.sampleRate})
75+
const duration = segment.samples.length / vad.config.sampleRate;
76+
console.log(`${index} End of speech. Duration: ${duration} seconds`);
77+
console.log(`Saved to ${filename}`);
78+
index += 1;
79+
}
80+
}
81+
});
82+
83+
ai.on('close', () => {
84+
console.log('Free resources');
85+
});
86+
87+
ai.start();
88+
console.log('Started! Please speak')

‎python-api-examples/offline-tts-play.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -47,7 +47,7 @@
4747
--vits-model=./sherpa-onnx-vits-zh-ll/model.onnx \
4848
--vits-lexicon=./sherpa-onnx-vits-zh-ll/lexicon.txt \
4949
--vits-tokens=./sherpa-onnx-vits-zh-ll/tokens.txt \
50-
--tts-rule-fsts='./sherpa-onnx-vits-zh-ll/phone.fst,./sherpa-onnx-vits-zh-ll/date.fst,./sherpa-onnx-vits-zh-ll/number.fst' \
50+
--tts-rule-fsts=./sherpa-onnx-vits-zh-ll/phone.fst,./sherpa-onnx-vits-zh-ll/date.fst,./sherpa-onnx-vits-zh-ll/number.fst \
5151
--vits-dict-dir=./sherpa-onnx-vits-zh-ll/dict \
5252
--sid=2 \
5353
--output-filename=./test-2.wav \

‎python-api-examples/offline-tts.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -48,7 +48,7 @@
4848
--vits-model=./sherpa-onnx-vits-zh-ll/model.onnx \
4949
--vits-lexicon=./sherpa-onnx-vits-zh-ll/lexicon.txt \
5050
--vits-tokens=./sherpa-onnx-vits-zh-ll/tokens.txt \
51-
--tts-rule-fsts='./sherpa-onnx-vits-zh-ll/phone.fst,./sherpa-onnx-vits-zh-ll/date.fst,./sherpa-onnx-vits-zh-ll/number.fst' \
51+
--tts-rule-fsts=./sherpa-onnx-vits-zh-ll/phone.fst,./sherpa-onnx-vits-zh-ll/date.fst,./sherpa-onnx-vits-zh-ll/number.fst \
5252
--vits-dict-dir=./sherpa-onnx-vits-zh-ll/dict \
5353
--sid=2 \
5454
--output-filename=./test-2.wav \

‎scripts/node-addon-api/CMakeLists.txt

+2
Original file line numberDiff line numberDiff line change
@@ -20,7 +20,9 @@ include_directories(${CMAKE_JS_INC})
2020
set(srcs
2121
src/sherpa-onnx-node-addon-api.cc
2222
src/streaming-asr.cc
23+
src/vad.cc
2324
src/wave-reader.cc
25+
src/wave-writer.cc
2426
)
2527

2628
if(NOT DEFINED ENV{SHERPA_ONNX_INSTALL_DIR})
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,12 @@
11
const addon = require('./addon.js')
22
const streaming_asr = require('./streaming-asr.js');
3+
const vad = require('./vad.js');
34

45
module.exports = {
56
OnlineRecognizer: streaming_asr.OnlineRecognizer,
67
readWave: addon.readWave,
8+
writeWave: addon.writeWave,
79
Display: streaming_asr.Display,
10+
Vad: vad.Vad,
11+
CircularBuffer: vad.CircularBuffer,
812
}

‎scripts/node-addon-api/lib/streaming-asr.js

+4-3
Original file line numberDiff line numberDiff line change
@@ -15,10 +15,11 @@ class OnlineStream {
1515
this.handle = handle;
1616
}
1717

18+
// obj is {samples: samples, sampleRate: sampleRate}
1819
// samples is a float32 array containing samples in the range [-1, 1]
19-
acceptWaveform(samples, sampleRate) {
20-
addon.acceptWaveformOnline(
21-
this.handle, {samples: samples, sampleRate: sampleRate})
20+
// sampleRate is a number
21+
acceptWaveform(obj) {
22+
addon.acceptWaveformOnline(this.handle, obj)
2223
}
2324

2425
inputFinished() {

‎scripts/node-addon-api/lib/vad.js

+88
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,88 @@
1+
const addon = require('./addon.js');
2+
3+
class CircularBuffer {
4+
constructor(capacity) {
5+
this.handle = addon.createCircularBuffer(capacity);
6+
}
7+
8+
// samples is a float32 array
9+
push(samples) {
10+
addon.circularBufferPush(this.handle, samples);
11+
}
12+
13+
// return a float32 array
14+
get(startIndex, n) {
15+
return addon.circularBufferGet(this.handle, startIndex, n);
16+
}
17+
18+
pop(n) {
19+
return addon.circularBufferPop(this.handle, n);
20+
}
21+
22+
size() {
23+
return addon.circularBufferSize(this.handle);
24+
}
25+
26+
head() {
27+
return addon.circularBufferHead(this.handle);
28+
}
29+
30+
reset() {
31+
return addon.circularBufferReset(this.handle);
32+
}
33+
}
34+
35+
class Vad {
36+
/*
37+
config = {
38+
sileroVad: {
39+
model: "./silero_vad.onnx",
40+
threshold: 0.5,
41+
}
42+
}
43+
*/
44+
constructor(config, bufferSizeInSeconds) {
45+
this.handle =
46+
addon.createVoiceActivityDetector(config, bufferSizeInSeconds);
47+
this.config = config;
48+
}
49+
50+
acceptWaveform(samples) {
51+
addon.voiceActivityDetectorAcceptWaveform(this.handle, samples)
52+
}
53+
54+
isEmpty() {
55+
return addon.voiceActivityDetectorIsEmpty(this.handle)
56+
}
57+
58+
isDetected() {
59+
return addon.voiceActivityDetectorIsDetected(this.handle)
60+
}
61+
62+
pop() {
63+
addon.voiceActivityDetectorPop(this.handle)
64+
}
65+
66+
clear() {
67+
addon.VoiceActivityDetectorClearWrapper(this.handle)
68+
}
69+
70+
/*
71+
{
72+
samples: a 1-d float32 array,
73+
start: a int32
74+
}
75+
*/
76+
front() {
77+
return addon.voiceActivityDetectorFront(this.handle)
78+
}
79+
80+
reset() {
81+
return addon.VoiceActivityDetectorResetWrapper(this.handle)
82+
}
83+
}
84+
85+
module.exports = {
86+
Vad,
87+
CircularBuffer,
88+
}

‎scripts/node-addon-api/src/sherpa-onnx-node-addon-api.cc

+4
Original file line numberDiff line numberDiff line change
@@ -5,10 +5,14 @@
55

66
void InitStreamingAsr(Napi::Env env, Napi::Object exports);
77
void InitWaveReader(Napi::Env env, Napi::Object exports);
8+
void InitWaveWriter(Napi::Env env, Napi::Object exports);
9+
void InitVad(Napi::Env env, Napi::Object exports);
810

911
Napi::Object Init(Napi::Env env, Napi::Object exports) {
1012
InitStreamingAsr(env, exports);
1113
InitWaveReader(env, exports);
14+
InitWaveWriter(env, exports);
15+
InitVad(env, exports);
1216

1317
return exports;
1418
}

‎scripts/node-addon-api/src/streaming-asr.cc

+7-2
Original file line numberDiff line numberDiff line change
@@ -125,8 +125,13 @@ static SherpaOnnxOnlineModelConfig GetOnlineModelConfig(Napi::Object obj) {
125125
config.provider = p;
126126
}
127127

128-
if (o.Has("debug") && o.Get("debug").IsNumber()) {
129-
config.debug = o.Get("debug").As<Napi::Number>().Int32Value();
128+
if (o.Has("debug") &&
129+
(o.Get("debug").IsNumber() || o.Get("debug").IsBoolean())) {
130+
if (o.Get("debug").IsBoolean()) {
131+
config.debug = o.Get("debug").As<Napi::Boolean>().Value();
132+
} else {
133+
config.debug = o.Get("debug").As<Napi::Number>().Int32Value();
134+
}
130135
}
131136

132137
if (o.Has("modelType") && o.Get("modelType").IsString()) {

0 commit comments

Comments
 (0)