Skip to content

Commit 03c956a

Browse files
authored
Add keyword spotting API for node-addon-api (#877)
1 parent 75630b9 commit 03c956a

18 files changed

+492
-26
lines changed

.github/scripts/node-addon/run.sh

+1-1
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,7 @@ fi
1818
SHERPA_ONNX_VERSION=$(grep "SHERPA_ONNX_VERSION" ./CMakeLists.txt | cut -d " " -f 2 | cut -d '"' -f 2)
1919
echo "SHERPA_ONNX_VERSION $SHERPA_ONNX_VERSION"
2020

21-
# SHERPA_ONNX_VERSION=1.0.23
21+
# SHERPA_ONNX_VERSION=1.0.24
2222

2323
if [ -z $owner ]; then
2424
owner=k2-fsa

.github/scripts/test-nodejs-addon-npm.sh

+9
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,15 @@ d=nodejs-addon-examples
66
echo "dir: $d"
77
cd $d
88

9+
echo "----------keyword spotting----------"
10+
11+
curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/kws-models/sherpa-onnx-kws-zipformer-wenetspeech-3.3M-2024-01-01.tar.bz2
12+
tar xvf sherpa-onnx-kws-zipformer-wenetspeech-3.3M-2024-01-01.tar.bz2
13+
rm sherpa-onnx-kws-zipformer-wenetspeech-3.3M-2024-01-01.tar.bz2
14+
15+
node ./test_keyword_spotter_transducer.js
16+
rm -rf sherpa-onnx-kws-zipformer-wenetspeech-3.3M-2024-01-01
17+
918
echo "----------add punctuations----------"
1019

1120
curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/punctuation-models/sherpa-onnx-punct-ct-transformer-zh-en-vocab272727-2024-04-12.tar.bz2

.github/workflows/npm-addon.yaml

+1-1
Original file line numberDiff line numberDiff line change
@@ -55,7 +55,7 @@ jobs:
5555
5656
SHERPA_ONNX_VERSION=$(grep "SHERPA_ONNX_VERSION" ./CMakeLists.txt | cut -d " " -f 2 | cut -d '"' -f 2)
5757
echo "SHERPA_ONNX_VERSION $SHERPA_ONNX_VERSION"
58-
# SHERPA_ONNX_VERSION=1.0.23
58+
# SHERPA_ONNX_VERSION=1.0.24
5959
6060
src_dir=.github/scripts/node-addon
6161
sed -i.bak s/SHERPA_ONNX_VERSION/$SHERPA_ONNX_VERSION/g $src_dir/package.json

nodejs-addon-examples/README.md

+21
Original file line numberDiff line numberDiff line change
@@ -62,6 +62,13 @@ The following tables list the examples in this folder.
6262
|[./test_audio_tagging_zipformer.js](./test_audio_tagging_zipformer.js)| Audio tagging with a Zipformer model|
6363
|[./test_audio_tagging_ced.js](./test_audio_tagging_ced.js)| Audio tagging with a [CED](https://github.com/RicherMans/CED) model|
6464

65+
## Keyword spotting
66+
67+
|File| Description|
68+
|---|---|
69+
|[./test_keyword_spotter_transducer.js](./test_keyword_spotter_transducer.js)| Keyword spotting from a file using a Zipformer model|
70+
|[./test_keyword_spotter_transducer_microphone.js](./test_keyword_spotter_transducer_microphone.js)| Keyword spotting from a microphone using a Zipformer model|
71+
6572
## Streaming speech-to-text from files
6673

6774
|File| Description|
@@ -325,3 +332,17 @@ rm sherpa-onnx-punct-ct-transformer-zh-en-vocab272727-2024-04-12.tar.bz2
325332

326333
node ./test_punctuation.js
327334
```
335+
336+
## Keyword spotting
337+
338+
```bash
339+
wget https://github.com/k2-fsa/sherpa-onnx/releases/download/kws-models/sherpa-onnx-kws-zipformer-wenetspeech-3.3M-2024-01-01.tar.bz2
340+
tar xvf sherpa-onnx-kws-zipformer-wenetspeech-3.3M-2024-01-01.tar.bz2
341+
rm sherpa-onnx-kws-zipformer-wenetspeech-3.3M-2024-01-01.tar.bz2
342+
343+
node ./test_keyword_spotter_transducer.js
344+
345+
# To run keyword spotting using a microphone
346+
npm install naudiodon2
347+
node ./test_keyword_spotter_transducer_microphone.js
348+
```

nodejs-addon-examples/test_asr_streaming_ctc_hlg_microphone.js

-6
Original file line numberDiff line numberDiff line change
@@ -79,11 +79,5 @@ ai.on('data', data => {
7979
}
8080
});
8181

82-
ai.on('close', () => {
83-
console.log('Free resources');
84-
stream.free();
85-
recognizer.free();
86-
});
87-
8882
ai.start();
8983
console.log('Started! Please speak')

nodejs-addon-examples/test_asr_streaming_ctc_microphone.js

-5
Original file line numberDiff line numberDiff line change
@@ -78,11 +78,6 @@ ai.on('data', data => {
7878
}
7979
});
8080

81-
ai.on('close', () => {
82-
console.log('Free resources');
83-
stream.free();
84-
recognizer.free();
85-
});
8681

8782
ai.start();
8883
console.log('Started! Please speak')

nodejs-addon-examples/test_asr_streaming_paraformer_microphone.js

-6
Original file line numberDiff line numberDiff line change
@@ -94,11 +94,5 @@ ai.on('data', data => {
9494
}
9595
});
9696

97-
ai.on('close', () => {
98-
console.log('Free resources');
99-
stream.free();
100-
recognizer.free();
101-
});
102-
10397
ai.start();
10498
console.log('Started! Please speak')

nodejs-addon-examples/test_asr_streaming_transducer_microphone.js

-6
Original file line numberDiff line numberDiff line change
@@ -82,11 +82,5 @@ ai.on('data', data => {
8282
}
8383
});
8484

85-
ai.on('close', () => {
86-
console.log('Free resources');
87-
stream.free();
88-
recognizer.free();
89-
});
90-
9185
ai.start();
9286
console.log('Started! Please speak')
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,66 @@
1+
// Copyright (c) 2024 Xiaomi Corporation
2+
const sherpa_onnx = require('sherpa-onnx-node');
3+
const performance = require('perf_hooks').performance;
4+
5+
6+
// Please download test files from
7+
// https://github.com/k2-fsa/sherpa-onnx/releases/tag/kws-models
8+
const config = {
9+
'featConfig': {
10+
'sampleRate': 16000,
11+
'featureDim': 80,
12+
},
13+
'modelConfig': {
14+
'transducer': {
15+
'encoder':
16+
'./sherpa-onnx-kws-zipformer-wenetspeech-3.3M-2024-01-01/encoder-epoch-12-avg-2-chunk-16-left-64.onnx',
17+
'decoder':
18+
'./sherpa-onnx-kws-zipformer-wenetspeech-3.3M-2024-01-01/decoder-epoch-12-avg-2-chunk-16-left-64.onnx',
19+
'joiner':
20+
'./sherpa-onnx-kws-zipformer-wenetspeech-3.3M-2024-01-01/joiner-epoch-12-avg-2-chunk-16-left-64.onnx',
21+
},
22+
'tokens':
23+
'./sherpa-onnx-kws-zipformer-wenetspeech-3.3M-2024-01-01/tokens.txt',
24+
'numThreads': 1,
25+
'provider': 'cpu',
26+
'debug': 1,
27+
},
28+
'keywordsFile':
29+
'./sherpa-onnx-kws-zipformer-wenetspeech-3.3M-2024-01-01/test_wavs/test_keywords.txt',
30+
};
31+
32+
const waveFilename =
33+
'./sherpa-onnx-kws-zipformer-wenetspeech-3.3M-2024-01-01/test_wavs/3.wav';
34+
35+
const kws = new sherpa_onnx.KeywordSpotter(config);
36+
console.log('Started')
37+
let start = performance.now();
38+
const stream = kws.createStream();
39+
const wave = sherpa_onnx.readWave(waveFilename);
40+
stream.acceptWaveform({sampleRate: wave.sampleRate, samples: wave.samples});
41+
42+
const tailPadding = new Float32Array(wave.sampleRate * 0.4);
43+
stream.acceptWaveform({samples: tailPadding, sampleRate: wave.sampleRate});
44+
45+
const detectedKeywords = [];
46+
while (kws.isReady(stream)) {
47+
const keyword = kws.getResult(stream).keyword;
48+
if (keyword != '') {
49+
detectedKeywords.push(keyword);
50+
}
51+
kws.decode(stream);
52+
}
53+
let stop = performance.now();
54+
55+
console.log('Done')
56+
57+
const elapsed_seconds = (stop - start) / 1000;
58+
const duration = wave.samples.length / wave.sampleRate;
59+
const real_time_factor = elapsed_seconds / duration;
60+
console.log('Wave duration', duration.toFixed(3), 'secodns')
61+
console.log('Elapsed', elapsed_seconds.toFixed(3), 'secodns')
62+
console.log(
63+
`RTF = ${elapsed_seconds.toFixed(3)}/${duration.toFixed(3)} =`,
64+
real_time_factor.toFixed(3))
65+
console.log(waveFilename)
66+
console.log('result\n', detectedKeywords)
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,74 @@
1+
// Copyright (c) 2023-2024 Xiaomi Corporation (authors: Fangjun Kuang)
2+
//
3+
const portAudio = require('naudiodon2');
4+
// console.log(portAudio.getDevices());
5+
6+
const sherpa_onnx = require('sherpa-onnx-node');
7+
8+
function createKeywordSpotter() {
9+
const config = {
10+
'featConfig': {
11+
'sampleRate': 16000,
12+
'featureDim': 80,
13+
},
14+
'modelConfig': {
15+
'transducer': {
16+
'encoder':
17+
'./sherpa-onnx-kws-zipformer-wenetspeech-3.3M-2024-01-01/encoder-epoch-12-avg-2-chunk-16-left-64.onnx',
18+
'decoder':
19+
'./sherpa-onnx-kws-zipformer-wenetspeech-3.3M-2024-01-01/decoder-epoch-12-avg-2-chunk-16-left-64.onnx',
20+
'joiner':
21+
'./sherpa-onnx-kws-zipformer-wenetspeech-3.3M-2024-01-01/joiner-epoch-12-avg-2-chunk-16-left-64.onnx',
22+
},
23+
'tokens':
24+
'./sherpa-onnx-kws-zipformer-wenetspeech-3.3M-2024-01-01/tokens.txt',
25+
'numThreads': 2,
26+
'provider': 'cpu',
27+
'debug': 1,
28+
},
29+
'keywordsFile':
30+
'./sherpa-onnx-kws-zipformer-wenetspeech-3.3M-2024-01-01/keywords.txt',
31+
};
32+
33+
return new sherpa_onnx.KeywordSpotter(config);
34+
}
35+
36+
const kws = createKeywordSpotter();
37+
const stream = kws.createStream();
38+
39+
let lastText = '';
40+
let segmentIndex = 0;
41+
42+
const ai = new portAudio.AudioIO({
43+
inOptions: {
44+
channelCount: 1,
45+
closeOnError: true, // Close the stream if an audio error is detected, if
46+
// set false then just log the error
47+
deviceId: -1, // Use -1 or omit the deviceId to select the default device
48+
sampleFormat: portAudio.SampleFormatFloat32,
49+
sampleRate: kws.config.featConfig.sampleRate
50+
}
51+
});
52+
53+
const display = new sherpa_onnx.Display(50);
54+
55+
ai.on('data', data => {
56+
const samples = new Float32Array(data.buffer);
57+
58+
stream.acceptWaveform(
59+
{sampleRate: kws.config.featConfig.sampleRate, samples: samples});
60+
61+
while (kws.isReady(stream)) {
62+
kws.decode(stream);
63+
}
64+
65+
const keyword = kws.getResult(stream).keyword
66+
if (keyword != '') {
67+
display.print(segmentIndex, keyword);
68+
segmentIndex += 1;
69+
}
70+
});
71+
72+
ai.start();
73+
console.log('Started! Please speak.')
74+
console.log(`Only words from ${kws.config.keywordsFile} can be recognized`)

scripts/node-addon-api/CMakeLists.txt

+1
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,7 @@ include_directories(${CMAKE_JS_INC})
1919

2020
set(srcs
2121
src/audio-tagging.cc
22+
src/keyword-spotting.cc
2223
src/non-streaming-asr.cc
2324
src/non-streaming-tts.cc
2425
src/punctuation.cc
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,32 @@
1+
const addon = require('./addon.js');
2+
const streaming_asr = require('./streaming-asr.js');
3+
4+
class KeywordSpotter {
5+
constructor(config) {
6+
this.handle = addon.createKeywordSpotter(config);
7+
this.config = config
8+
}
9+
10+
createStream() {
11+
const handle = addon.createKeywordStream(this.handle);
12+
return new streaming_asr.OnlineStream(handle);
13+
}
14+
15+
isReady(stream) {
16+
return addon.isKeywordStreamReady(this.handle, stream.handle);
17+
}
18+
19+
decode(stream) {
20+
addon.decodeKeywordStream(this.handle, stream.handle);
21+
}
22+
23+
getResult(stream) {
24+
const jsonStr = addon.getKeywordResultAsJson(this.handle, stream.handle);
25+
26+
return JSON.parse(jsonStr);
27+
}
28+
}
29+
30+
module.exports = {
31+
KeywordSpotter,
32+
}

scripts/node-addon-api/lib/sherpa-onnx.js

+2
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,7 @@ const slid = require('./spoken-language-identification.js');
77
const sid = require('./speaker-identification.js');
88
const at = require('./audio-tagg.js');
99
const punct = require('./punctuation.js');
10+
const kws = require('./keyword-spotter.js');
1011

1112
module.exports = {
1213
OnlineRecognizer: streaming_asr.OnlineRecognizer,
@@ -22,4 +23,5 @@ module.exports = {
2223
SpeakerEmbeddingManager: sid.SpeakerEmbeddingManager,
2324
AudioTagging: at.AudioTagging,
2425
Punctuation: punct.Punctuation,
26+
KeywordSpotter: kws.KeywordSpotter,
2527
}

0 commit comments

Comments
 (0)