Skip to content

Commit 7774e35

Browse files
authored
feat: add mic example for better compatibility (#1909)
Co-authored-by: wanghsinche <wanghsinche>
1 parent 94728bf commit 7774e35

File tree

3 files changed

+229
-1
lines changed

3 files changed

+229
-1
lines changed

nodejs-examples/README.md

+22-1
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,8 @@
11
# Introduction
22

3-
Note: You need `Node >= 18`.
3+
Note: You need `Node >= 18`.
4+
5+
Note: For Mac M1 and other silicon chip series, do check the example `test-online-paraformer-microphone-mic.js`
46

57
This directory contains nodejs examples for [sherpa-onnx](https://github.com/k2-fsa/sherpa-onnx).
68

@@ -278,6 +280,25 @@ rm sherpa-onnx-streaming-paraformer-bilingual-zh-en.tar.bz2
278280
node ./test-online-paraformer-microphone.js
279281
```
280282

283+
284+
## ./test-online-paraformer-microphone-mic.js
285+
286+
[./test-online-paraformer-microphone-mic.js](./test-online-paraformer-microphone-mic.js)
287+
demonstrates how to do real-time speech recognition from microphone
288+
with a streaming Paraformer model. In the code we use
289+
[sherpa-onnx-streaming-paraformer-bilingual-zh-en](https://k2-fsa.github.io/sherpa/onnx/pretrained_models/online-paraformer/paraformer-models.html#csukuangfj-sherpa-onnx-streaming-paraformer-bilingual-zh-en-chinese-english).
290+
291+
It uses `mic` for better compatibility, do check its [npm](https://www.npmjs.com/package/mic) before running it.
292+
293+
You can use the following command to run it:
294+
295+
```bash
296+
wget -q https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-streaming-paraformer-bilingual-zh-en.tar.bz2
297+
rm sherpa-onnx-streaming-paraformer-bilingual-zh-en.tar.bz2
298+
node ./test-online-paraformer-microphone-mic.js
299+
```
300+
301+
281302
## ./test-online-paraformer.js
282303
[./test-online-paraformer.js](./test-online-paraformer.js) demonstrates
283304
how to decode a file using a streaming Paraformer model. In the code we use

nodejs-examples/package.json

+1
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11
{
22
"dependencies": {
3+
"mic": "^2.1.2",
34
"naudiodon2": "^2.4.0",
45
"sherpa-onnx": "^1.10.45",
56
"wav": "^1.0.2"
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,206 @@
1+
// Copyright (c) 2023 Xiaomi Corporation (authors: Fangjun Kuang)
2+
const mic = require('mic'); // It uses `mic` for better compatibility, do check its [npm](https://www.npmjs.com/package/mic) before running it.
3+
const sherpa_onnx = require('sherpa-onnx');
4+
5+
function createOnlineRecognizer() {
6+
let onlineParaformerModelConfig = {
7+
encoder: './sherpa-onnx-streaming-paraformer-bilingual-zh-en/encoder.int8.onnx',
8+
decoder: './sherpa-onnx-streaming-paraformer-bilingual-zh-en/decoder.int8.onnx',
9+
};
10+
11+
let onlineModelConfig = {
12+
paraformer: onlineParaformerModelConfig,
13+
tokens: './sherpa-onnx-streaming-paraformer-bilingual-zh-en/tokens.txt',
14+
};
15+
16+
let recognizerConfig = {
17+
modelConfig: onlineModelConfig,
18+
enableEndpoint: 1,
19+
rule1MinTrailingSilence: 2.4,
20+
rule2MinTrailingSilence: 1.2,
21+
rule3MinUtteranceLength: 20,
22+
};
23+
24+
return sherpa_onnx.createOnlineRecognizer(recognizerConfig);
25+
}
26+
27+
/**
28+
* SpeechSession class, work as a session manager with the formatOutput function
29+
* Sample output:
30+
=== Automated Speech Recognition ===
31+
Current Session #1
32+
Time: 8:44:46 PM
33+
------------------------
34+
Recognized Sentences:
35+
[8:44:43 PM] 1. it's so great three result is great great 她还支持中文
36+
[8:44:46 PM] 2. 很厉
37+
------------------------
38+
Recognizing: 真的很厉害太厉害
39+
40+
*/
41+
class SpeechSession {
42+
constructor() {
43+
this.startTime = Date.now();
44+
this.sentences = [];
45+
this.currentText = '';
46+
this.lastUpdateTime = Date.now();
47+
}
48+
49+
addOrUpdateText(text) {
50+
this.currentText = text;
51+
this.lastUpdateTime = Date.now();
52+
}
53+
54+
finalizeSentence() {
55+
if (this.currentText.trim()) {
56+
this.sentences.push({
57+
text: this.currentText.trim(),
58+
timestamp: new Date().toLocaleTimeString()
59+
});
60+
}
61+
this.currentText = '';
62+
}
63+
64+
shouldStartNewSession() {
65+
return Date.now() - this.lastUpdateTime > 10000; // 10 seconds of silence
66+
}
67+
}
68+
69+
function formatOutput() {
70+
clearConsole();
71+
console.log('\n=== Automated Speech Recognition ===');
72+
console.log(`Current Session #${sessionCount}`);
73+
console.log('Time:', new Date().toLocaleTimeString());
74+
console.log('------------------------');
75+
76+
// 显示历史句子
77+
if (currentSession.sentences.length > 0) {
78+
console.log('Recognized Sentences:');
79+
currentSession.sentences.forEach((sentence, index) => {
80+
console.log(`[${sentence.timestamp}] ${index + 1}. ${sentence.text}`);
81+
});
82+
console.log('------------------------');
83+
}
84+
85+
// 显示当前正在识别的内容
86+
if (currentSession.currentText) {
87+
console.log('Recognizing:', currentSession.currentText);
88+
}
89+
}
90+
91+
92+
const recognizer = createOnlineRecognizer();
93+
const stream = recognizer.createStream();
94+
let currentSession = new SpeechSession();
95+
let sessionCount = 1;
96+
97+
function clearConsole() {
98+
process.stdout.write('\x1B[2J\x1B[0f');
99+
}
100+
101+
102+
function exitHandler(options, exitCode) {
103+
if (options.cleanup) {
104+
console.log('\nCleaned up resources...');
105+
micInstance.stop();
106+
stream.free();
107+
recognizer.free();
108+
}
109+
if (exitCode || exitCode === 0) console.log('Exit code:', exitCode);
110+
if (options.exit) process.exit();
111+
}
112+
113+
const micInstance = mic({
114+
rate: recognizer.config.featConfig.sampleRate,
115+
channels: 1,
116+
debug: false, // 关闭调试输出
117+
device: 'default',
118+
bitwidth: 16,
119+
encoding: 'signed-integer',
120+
exitOnSilence: 6,
121+
fileType: 'raw'
122+
});
123+
124+
const micInputStream = micInstance.getAudioStream();
125+
126+
function startMic() {
127+
return new Promise((resolve, reject) => {
128+
micInputStream.once('startComplete', () => {
129+
console.log('Mic phone started.');
130+
resolve();
131+
});
132+
133+
micInputStream.once('error', (err) => {
134+
console.error('Mic phone start error:', err);
135+
reject(err);
136+
});
137+
138+
micInstance.start();
139+
});
140+
}
141+
142+
micInputStream.on('data', buffer => {
143+
const int16Array = new Int16Array(buffer.buffer);
144+
const samples = new Float32Array(int16Array.length);
145+
146+
for (let i = 0; i < int16Array.length; i++) {
147+
samples[i] = int16Array[i] / 32768.0;
148+
}
149+
150+
stream.acceptWaveform(recognizer.config.featConfig.sampleRate, samples);
151+
152+
while (recognizer.isReady(stream)) {
153+
recognizer.decode(stream);
154+
}
155+
156+
const isEndpoint = recognizer.isEndpoint(stream);
157+
const text = recognizer.getResult(stream).text;
158+
159+
if (text.length > 0) {
160+
// 检查是否需要开始新会话
161+
if (currentSession.shouldStartNewSession()) {
162+
currentSession.finalizeSentence();
163+
sessionCount++;
164+
currentSession = new SpeechSession();
165+
}
166+
167+
currentSession.addOrUpdateText(text);
168+
formatOutput();
169+
}
170+
171+
if (isEndpoint) {
172+
if (text.length > 0) {
173+
currentSession.finalizeSentence();
174+
formatOutput();
175+
}
176+
recognizer.reset(stream);
177+
}
178+
});
179+
180+
micInputStream.on('error', err => {
181+
console.error('Audio stream error:', err);
182+
});
183+
184+
micInputStream.on('close', () => {
185+
console.log('Mic phone closed.');
186+
});
187+
188+
process.on('exit', exitHandler.bind(null, {cleanup: true}));
189+
process.on('SIGINT', exitHandler.bind(null, {exit: true}));
190+
process.on('SIGUSR1', exitHandler.bind(null, {exit: true}));
191+
process.on('SIGUSR2', exitHandler.bind(null, {exit: true}));
192+
process.on('uncaughtException', exitHandler.bind(null, {exit: true}));
193+
194+
async function main() {
195+
try {
196+
console.log('Starting ...');
197+
await startMic();
198+
console.log('Initialized, waiting for speech ...');
199+
formatOutput();
200+
} catch (err) {
201+
console.error('Failed to initialize:', err);
202+
process.exit(1);
203+
}
204+
}
205+
206+
main();

0 commit comments

Comments
 (0)