Skip to content

Commit 384f96c

Browse files
authored
Add streaming CTC ASR APIs for node-addon-api (#867)
1 parent db85b2c commit 384f96c

15 files changed

+445
-31
lines changed

.github/scripts/test-nodejs-addon-npm.sh

+11-9
Original file line numberDiff line numberDiff line change
@@ -5,15 +5,6 @@ set -ex
55
d=nodejs-addon-examples
66
echo "dir: $d"
77
cd $d
8-
npm install --verbose
9-
git status
10-
ls -lh
11-
ls -lh node_modules
12-
13-
export DYLD_LIBRARY_PATH=$PWD/node_modules/sherpa-onnx-darwin-x64:$DYLD_LIBRARY_PATH
14-
export DYLD_LIBRARY_PATH=$PWD/node_modules/sherpa-onnx-darwin-arm64:$DYLD_LIBRARY_PATH
15-
export LD_LIBRARY_PATH=$PWD/node_modules/sherpa-onnx-linux-x64:$LD_LIBRARY_PATH
16-
export LD_LIBRARY_PATH=$PWD/node_modules/sherpa-onnx-linux-arm64:$LD_LIBRARY_PATH
178

189
curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20.tar.bz2
1910
tar xvf sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20.tar.bz2
@@ -22,3 +13,14 @@ rm sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20.tar.bz2
2213
node test_asr_streaming_transducer.js
2314

2415
rm -rf sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20
16+
17+
curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-streaming-zipformer-ctc-small-2024-03-18.tar.bz2
18+
tar xvf sherpa-onnx-streaming-zipformer-ctc-small-2024-03-18.tar.bz2
19+
rm sherpa-onnx-streaming-zipformer-ctc-small-2024-03-18.tar.bz2
20+
21+
node ./test_asr_streaming_ctc.js
22+
23+
# To decode with HLG.fst
24+
node ./test_asr_streaming_ctc_hlg.js
25+
26+
rm -rf sherpa-onnx-streaming-zipformer-ctc-small-2024-03-18

.github/workflows/test-nodejs-addon-api.yaml

+16-10
Original file line numberDiff line numberDiff line change
@@ -152,17 +152,23 @@ jobs:
152152
153153
./node_modules/.bin/cmake-js compile --log-level verbose
154154
155-
- name: Test streaming transducer
155+
- name: Run tests
156156
shell: bash
157157
run: |
158158
export PATH=$PWD/build/install/lib:$PATH
159159
export LD_LIBRARY_PATH=$PWD/build/install/lib:$LD_LIBRARY_PATH
160-
161-
cd scripts/node-addon-api
162-
curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20.tar.bz2
163-
tar xvf sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20.tar.bz2
164-
rm sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20.tar.bz2
165-
166-
node test/test_asr_streaming_transducer.js
167-
168-
rm -rf sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20
160+
d=nodejs-addon-examples
161+
cd $d
162+
files=$(ls *.js)
163+
echo $files
164+
for f in ${files[@]}; do
165+
echo $f
166+
sed -i.bak s%sherpa-onnx-node%./sherpa-onnx% ./$f
167+
done
168+
cd ..
169+
170+
cp -v scripts/node-addon-api/build/Release/sherpa-onnx.node $d/
171+
cp -v scripts/node-addon-api/lib/*.js $d/
172+
cp -v ./build/install/lib/lib* $d/
173+
174+
.github/scripts/test-nodejs-addon-npm.sh

.github/workflows/test-nodejs-addon-npm.yaml

+15
Original file line numberDiff line numberDiff line change
@@ -63,4 +63,19 @@ jobs:
6363
- name: Run tests
6464
shell: bash
6565
run: |
66+
d=nodejs-addon-examples
67+
echo "dir: $d"
68+
cd $d
69+
npm install --verbose
70+
git status
71+
ls -lh
72+
ls -lh node_modules
73+
74+
export DYLD_LIBRARY_PATH=$PWD/node_modules/sherpa-onnx-darwin-x64:$DYLD_LIBRARY_PATH
75+
export DYLD_LIBRARY_PATH=$PWD/node_modules/sherpa-onnx-darwin-arm64:$DYLD_LIBRARY_PATH
76+
export LD_LIBRARY_PATH=$PWD/node_modules/sherpa-onnx-linux-x64:$LD_LIBRARY_PATH
77+
export LD_LIBRARY_PATH=$PWD/node_modules/sherpa-onnx-linux-arm64:$LD_LIBRARY_PATH
78+
79+
cd ../
80+
6681
.github/scripts/test-nodejs-addon-npm.sh

nodejs-addon-examples/README.md

+24-6
Original file line numberDiff line numberDiff line change
@@ -27,6 +27,18 @@ export LD_LIBRARY_PATH=$PWD/node_modules/sherpa-onnx-linux-x64:$LD_LIBRARY_PATH
2727
export LD_LIBRARY_PATH=$PWD/node_modules/sherpa-onnx-linux-arm64:$LD_LIBRARY_PATH
2828
```
2929

30+
# Voice Activity detection (VAD)
31+
32+
```bash
33+
wget https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/silero_vad.onnx
34+
35+
36+
# To run the test with a microphone, you need to install the package naudiodon2
37+
npm install naudiodon2
38+
39+
node ./test_vad_microphone.js
40+
```
41+
3042
## Streaming speech recognition with zipformer transducer
3143

3244
```bash
@@ -36,21 +48,27 @@ rm sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20.tar.bz2
3648

3749
node ./test_asr_streaming_transducer.js
3850

39-
# To run the test with microphone, you need to install the package naudiodon2
51+
# To run the test with a microphone, you need to install the package naudiodon2
4052
npm install naudiodon2
4153

4254
node ./test_asr_streaming_transducer_microphone.js
4355
```
4456

45-
# VAD
57+
## Streaming speech recognition with zipformer CTC
4658

4759
```bash
48-
wget https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/silero_vad.onnx
60+
wget https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-streaming-zipformer-ctc-small-2024-03-18.tar.bz2
61+
tar xvf sherpa-onnx-streaming-zipformer-ctc-small-2024-03-18.tar.bz2
62+
rm sherpa-onnx-streaming-zipformer-ctc-small-2024-03-18.tar.bz2
4963

64+
node ./test_asr_streaming_ctc.js
5065

51-
# To run the test with microphone, you need to install the package naudiodon2
66+
# To decode with HLG.fst
67+
node ./test_asr_streaming_ctc_hlg.js
68+
69+
# To run the test with a microphone, you need to install the package naudiodon2
5270
npm install naudiodon2
5371

54-
node ./test_vad_microphone.js
72+
node ./test_asr_streaming_ctc_microphone.js
73+
node ./test_asr_streaming_ctc_hlg_microphone.js
5574
```
56-
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,55 @@
1+
// Copyright (c) 2024 Xiaomi Corporation
2+
const sherpa_onnx = require('sherpa-onnx-node');
3+
const performance = require('perf_hooks').performance;
4+
5+
6+
// Please download test files from
7+
// https://github.com/k2-fsa/sherpa-onnx/releases/tag/asr-models
8+
const config = {
9+
'featConfig': {
10+
'sampleRate': 16000,
11+
'featureDim': 80,
12+
},
13+
'modelConfig': {
14+
'zipformer2Ctc': {
15+
'model':
16+
'./sherpa-onnx-streaming-zipformer-ctc-small-2024-03-18/ctc-epoch-30-avg-3-chunk-16-left-128.int8.onnx',
17+
},
18+
'tokens':
19+
'./sherpa-onnx-streaming-zipformer-ctc-small-2024-03-18/tokens.txt',
20+
'numThreads': 2,
21+
'provider': 'cpu',
22+
'debug': 1,
23+
}
24+
};
25+
26+
const waveFilename =
27+
'./sherpa-onnx-streaming-zipformer-ctc-small-2024-03-18/test_wavs/0.wav';
28+
29+
const recognizer = new sherpa_onnx.OnlineRecognizer(config);
30+
console.log('Started')
31+
let start = performance.now();
32+
const stream = recognizer.createStream();
33+
const wave = sherpa_onnx.readWave(waveFilename);
34+
stream.acceptWaveform({sampleRate: wave.sampleRate, samples: wave.samples});
35+
36+
const tailPadding = new Float32Array(wave.sampleRate * 0.4);
37+
stream.acceptWaveform({samples: tailPadding, sampleRate: wave.sampleRate});
38+
39+
while (recognizer.isReady(stream)) {
40+
recognizer.decode(stream);
41+
}
42+
result = recognizer.getResult(stream)
43+
let stop = performance.now();
44+
console.log('Done')
45+
46+
const elapsed_seconds = (stop - start) / 1000;
47+
const duration = wave.samples.length / wave.sampleRate;
48+
const real_time_factor = elapsed_seconds / duration;
49+
console.log('Wave duration', duration.toFixed(3), 'secodns')
50+
console.log('Elapsed', elapsed_seconds.toFixed(3), 'secodns')
51+
console.log(
52+
`RTF = ${elapsed_seconds.toFixed(3)}/${duration.toFixed(3)} =`,
53+
real_time_factor.toFixed(3))
54+
console.log(waveFilename)
55+
console.log('result\n', result)
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,58 @@
1+
// Copyright (c) 2024 Xiaomi Corporation
2+
const sherpa_onnx = require('sherpa-onnx-node');
3+
const performance = require('perf_hooks').performance;
4+
5+
6+
// Please download test files from
7+
// https://github.com/k2-fsa/sherpa-onnx/releases/tag/asr-models
8+
const config = {
9+
'featConfig': {
10+
'sampleRate': 16000,
11+
'featureDim': 80,
12+
},
13+
'modelConfig': {
14+
'zipformer2Ctc': {
15+
'model':
16+
'./sherpa-onnx-streaming-zipformer-ctc-small-2024-03-18/ctc-epoch-30-avg-3-chunk-16-left-128.int8.onnx',
17+
},
18+
'tokens':
19+
'./sherpa-onnx-streaming-zipformer-ctc-small-2024-03-18/tokens.txt',
20+
'numThreads': 2,
21+
'provider': 'cpu',
22+
'debug': 1,
23+
},
24+
'ctcFstDecoderConfig': {
25+
'graph': './sherpa-onnx-streaming-zipformer-ctc-small-2024-03-18/HLG.fst',
26+
},
27+
};
28+
29+
const waveFilename =
30+
'./sherpa-onnx-streaming-zipformer-ctc-small-2024-03-18/test_wavs/1.wav';
31+
32+
const recognizer = new sherpa_onnx.OnlineRecognizer(config);
33+
console.log('Started')
34+
let start = performance.now();
35+
const stream = recognizer.createStream();
36+
const wave = sherpa_onnx.readWave(waveFilename);
37+
stream.acceptWaveform({sampleRate: wave.sampleRate, samples: wave.samples});
38+
39+
const tailPadding = new Float32Array(wave.sampleRate * 0.4);
40+
stream.acceptWaveform({samples: tailPadding, sampleRate: wave.sampleRate});
41+
42+
while (recognizer.isReady(stream)) {
43+
recognizer.decode(stream);
44+
}
45+
result = recognizer.getResult(stream)
46+
let stop = performance.now();
47+
console.log('Done')
48+
49+
const elapsed_seconds = (stop - start) / 1000;
50+
const duration = wave.samples.length / wave.sampleRate;
51+
const real_time_factor = elapsed_seconds / duration;
52+
console.log('Wave duration', duration.toFixed(3), 'secodns')
53+
console.log('Elapsed', elapsed_seconds.toFixed(3), 'secodns')
54+
console.log(
55+
`RTF = ${elapsed_seconds.toFixed(3)}/${duration.toFixed(3)} =`,
56+
real_time_factor.toFixed(3))
57+
console.log(waveFilename)
58+
console.log('result\n', result)
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,89 @@
1+
// Copyright (c) 2023-2024 Xiaomi Corporation (authors: Fangjun Kuang)
2+
//
3+
const portAudio = require('naudiodon2');
4+
// console.log(portAudio.getDevices());
5+
6+
const sherpa_onnx = require('sherpa-onnx-node');
7+
8+
function createOnlineRecognizer() {
9+
const config = {
10+
'featConfig': {
11+
'sampleRate': 16000,
12+
'featureDim': 80,
13+
},
14+
'modelConfig': {
15+
'zipformer2Ctc': {
16+
'model':
17+
'./sherpa-onnx-streaming-zipformer-ctc-small-2024-03-18/ctc-epoch-30-avg-3-chunk-16-left-128.int8.onnx',
18+
},
19+
'tokens':
20+
'./sherpa-onnx-streaming-zipformer-ctc-small-2024-03-18/tokens.txt',
21+
'numThreads': 2,
22+
'provider': 'cpu',
23+
'debug': 1,
24+
},
25+
'ctcFstDecoderConfig': {
26+
'graph': './sherpa-onnx-streaming-zipformer-ctc-small-2024-03-18/HLG.fst',
27+
},
28+
'enableEndpoint': true,
29+
'rule1MinTrailingSilence': 2.4,
30+
'rule2MinTrailingSilence': 1.2,
31+
'rule3MinUtteranceLength': 20
32+
};
33+
34+
return new sherpa_onnx.OnlineRecognizer(config);
35+
}
36+
37+
const recognizer = createOnlineRecognizer();
38+
const stream = recognizer.createStream();
39+
40+
let lastText = '';
41+
let segmentIndex = 0;
42+
43+
const ai = new portAudio.AudioIO({
44+
inOptions: {
45+
channelCount: 1,
46+
closeOnError: true, // Close the stream if an audio error is detected, if
47+
// set false then just log the error
48+
deviceId: -1, // Use -1 or omit the deviceId to select the default device
49+
sampleFormat: portAudio.SampleFormatFloat32,
50+
sampleRate: recognizer.config.featConfig.sampleRate
51+
}
52+
});
53+
54+
const display = new sherpa_onnx.Display(50);
55+
56+
ai.on('data', data => {
57+
const samples = new Float32Array(data.buffer);
58+
59+
stream.acceptWaveform(
60+
{sampleRate: recognizer.config.featConfig.sampleRate, samples: samples});
61+
62+
while (recognizer.isReady(stream)) {
63+
recognizer.decode(stream);
64+
}
65+
66+
const isEndpoint = recognizer.isEndpoint(stream);
67+
const text = recognizer.getResult(stream).text.toLowerCase();
68+
69+
if (text.length > 0 && lastText != text) {
70+
lastText = text;
71+
display.print(segmentIndex, lastText);
72+
}
73+
if (isEndpoint) {
74+
if (text.length > 0) {
75+
lastText = text;
76+
segmentIndex += 1;
77+
}
78+
recognizer.reset(stream)
79+
}
80+
});
81+
82+
ai.on('close', () => {
83+
console.log('Free resources');
84+
stream.free();
85+
recognizer.free();
86+
});
87+
88+
ai.start();
89+
console.log('Started! Please speak')

0 commit comments

Comments
 (0)