Skip to content

Commit 6a97f8a

Browse files
authored
Add JavaScript (node-addon) API for speech enhancement GTCRN models (#1996)
1 parent fd78a48 commit 6a97f8a

21 files changed

+500
-119
lines changed

.github/scripts/test-nodejs-addon-npm.sh

+9
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,15 @@ arch=$(node -p "require('os').arch()")
1010
platform=$(node -p "require('os').platform()")
1111
node_version=$(node -p "process.versions.node.split('.')[0]")
1212

13+
echo "----------non-streaming speech denoiser----------"
14+
15+
curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/speech-enhancement-models/gtcrn_simple.onnx
16+
curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/speech-enhancement-models/inp_16k.wav
17+
18+
node ./test_offline_speech_enhancement_gtcrn.js
19+
rm gtcrn_simple.onnx
20+
ls -lh *.wav
21+
1322
echo "----------non-streaming asr FireRedAsr----------"
1423
curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-fire-red-asr-large-zh_en-2025-02-16.tar.bz2
1524
tar xvf sherpa-onnx-fire-red-asr-large-zh_en-2025-02-16.tar.bz2

flutter/sherpa_onnx/example/example.md

+1-1
Original file line numberDiff line numberDiff line change
@@ -22,5 +22,5 @@ Hint: All of the following functions can be used in Flutter, even if some of the
2222
|Speaker identification and verification| [Address](https://github.com/k2-fsa/sherpa-onnx/tree/master/dart-api-examples/speaker-identification)| macOS, Windows, Linux|
2323
|Audio tagging| [Address](https://github.com/k2-fsa/sherpa-onnx/tree/master/dart-api-examples/audio-tagging)| macOS, Windows, Linux|
2424
|Keyword spotter| [Address](https://github.com/k2-fsa/sherpa-onnx/tree/master/dart-api-examples/keyword-spotter)| macOS, Windows, Linux|
25-
|Add punctuions| [Address](https://github.com/k2-fsa/sherpa-onnx/tree/master/dart-api-examples/add-punctuations)| macOS, Windows, Linux|
25+
|Add punctuations| [Address](https://github.com/k2-fsa/sherpa-onnx/tree/master/dart-api-examples/add-punctuations)| macOS, Windows, Linux|
2626
|Speech enhancement/denoising| [Address](https://github.com/k2-fsa/sherpa-onnx/tree/master/dart-api-examples/speech-enhancement-gtcrn)| macOS, Windows, Linux|

harmony-os/SherpaOnnxHar/sherpa_onnx/src/main/cpp/CMakeLists.txt

+1
Original file line numberDiff line numberDiff line change
@@ -44,6 +44,7 @@ add_library(sherpa_onnx SHARED
4444
keyword-spotting.cc
4545
non-streaming-asr.cc
4646
non-streaming-speaker-diarization.cc
47+
non-streaming-speech-denoiser.cc
4748
non-streaming-tts.cc
4849
punctuation.cc
4950
sherpa-onnx-node-addon-api.cc

harmony-os/SherpaOnnxHar/sherpa_onnx/src/main/cpp/audio-tagging.cc

+3-3
Original file line numberDiff line numberDiff line change
@@ -121,7 +121,7 @@ AudioTaggingCreateOfflineStreamWrapper(const Napi::CallbackInfo &info) {
121121
return {};
122122
}
123123

124-
SherpaOnnxAudioTagging *at =
124+
const SherpaOnnxAudioTagging *at =
125125
info[0].As<Napi::External<SherpaOnnxAudioTagging>>().Data();
126126

127127
const SherpaOnnxOfflineStream *stream =
@@ -169,10 +169,10 @@ static Napi::Object AudioTaggingComputeWrapper(const Napi::CallbackInfo &info) {
169169
return {};
170170
}
171171

172-
SherpaOnnxAudioTagging *at =
172+
const SherpaOnnxAudioTagging *at =
173173
info[0].As<Napi::External<SherpaOnnxAudioTagging>>().Data();
174174

175-
SherpaOnnxOfflineStream *stream =
175+
const SherpaOnnxOfflineStream *stream =
176176
info[1].As<Napi::External<SherpaOnnxOfflineStream>>().Data();
177177

178178
int32_t top_k = info[2].As<Napi::Number>().Int32Value();

harmony-os/SherpaOnnxHar/sherpa_onnx/src/main/cpp/non-streaming-asr.cc

+5-5
Original file line numberDiff line numberDiff line change
@@ -341,7 +341,7 @@ static Napi::External<SherpaOnnxOfflineStream> CreateOfflineStreamWrapper(
341341
return {};
342342
}
343343

344-
SherpaOnnxOfflineRecognizer *recognizer =
344+
const SherpaOnnxOfflineRecognizer *recognizer =
345345
info[0].As<Napi::External<SherpaOnnxOfflineRecognizer>>().Data();
346346

347347
const SherpaOnnxOfflineStream *stream =
@@ -373,7 +373,7 @@ static void AcceptWaveformOfflineWrapper(const Napi::CallbackInfo &info) {
373373
return;
374374
}
375375

376-
SherpaOnnxOfflineStream *stream =
376+
const SherpaOnnxOfflineStream *stream =
377377
info[0].As<Napi::External<SherpaOnnxOfflineStream>>().Data();
378378

379379
if (!info[1].IsObject()) {
@@ -454,10 +454,10 @@ static void DecodeOfflineStreamWrapper(const Napi::CallbackInfo &info) {
454454
return;
455455
}
456456

457-
SherpaOnnxOfflineRecognizer *recognizer =
457+
const SherpaOnnxOfflineRecognizer *recognizer =
458458
info[0].As<Napi::External<SherpaOnnxOfflineRecognizer>>().Data();
459459

460-
SherpaOnnxOfflineStream *stream =
460+
const SherpaOnnxOfflineStream *stream =
461461
info[1].As<Napi::External<SherpaOnnxOfflineStream>>().Data();
462462

463463
SherpaOnnxDecodeOfflineStream(recognizer, stream);
@@ -482,7 +482,7 @@ static Napi::String GetOfflineStreamResultAsJsonWrapper(
482482
return {};
483483
}
484484

485-
SherpaOnnxOfflineStream *stream =
485+
const SherpaOnnxOfflineStream *stream =
486486
info[0].As<Napi::External<SherpaOnnxOfflineStream>>().Data();
487487

488488
const char *json = SherpaOnnxGetOfflineStreamResultAsJson(stream);
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,278 @@
1+
// scripts/node-addon-api/src/non-streaming-speech-denoiser.cc
2+
//
3+
// Copyright (c) 2025 Xiaomi Corporation
4+
#include <sstream>
5+
6+
#include "macros.h" // NOLINT
7+
#include "napi.h" // NOLINT
8+
#include "sherpa-onnx/c-api/c-api.h"
9+
10+
static SherpaOnnxOfflineSpeechDenoiserGtcrnModelConfig
11+
GetOfflineSpeechDenoiserGtcrnModelConfig(Napi::Object obj) {
12+
SherpaOnnxOfflineSpeechDenoiserGtcrnModelConfig c;
13+
memset(&c, 0, sizeof(c));
14+
15+
if (!obj.Has("gtcrn") || !obj.Get("gtcrn").IsObject()) {
16+
return c;
17+
}
18+
19+
Napi::Object o = obj.Get("gtcrn").As<Napi::Object>();
20+
21+
SHERPA_ONNX_ASSIGN_ATTR_STR(model, model);
22+
23+
return c;
24+
}
25+
26+
static SherpaOnnxOfflineSpeechDenoiserModelConfig
27+
GetOfflineSpeechDenoiserModelConfig(Napi::Object obj) {
28+
SherpaOnnxOfflineSpeechDenoiserModelConfig c;
29+
memset(&c, 0, sizeof(c));
30+
31+
if (!obj.Has("model") || !obj.Get("model").IsObject()) {
32+
return c;
33+
}
34+
35+
Napi::Object o = obj.Get("model").As<Napi::Object>();
36+
37+
c.gtcrn = GetOfflineSpeechDenoiserGtcrnModelConfig(o);
38+
39+
SHERPA_ONNX_ASSIGN_ATTR_INT32(num_threads, numThreads);
40+
41+
if (o.Has("debug") &&
42+
(o.Get("debug").IsNumber() || o.Get("debug").IsBoolean())) {
43+
if (o.Get("debug").IsBoolean()) {
44+
c.debug = o.Get("debug").As<Napi::Boolean>().Value();
45+
} else {
46+
c.debug = o.Get("debug").As<Napi::Number>().Int32Value();
47+
}
48+
}
49+
50+
SHERPA_ONNX_ASSIGN_ATTR_STR(provider, provider);
51+
52+
return c;
53+
}
54+
55+
static Napi::External<SherpaOnnxOfflineSpeechDenoiser>
56+
CreateOfflineSpeechDenoiserWrapper(const Napi::CallbackInfo &info) {
57+
Napi::Env env = info.Env();
58+
#if __OHOS__
59+
// the last argument is the NativeResourceManager
60+
if (info.Length() != 2) {
61+
std::ostringstream os;
62+
os << "Expect only 2 arguments. Given: " << info.Length();
63+
64+
Napi::TypeError::New(env, os.str()).ThrowAsJavaScriptException();
65+
66+
return {};
67+
}
68+
#else
69+
if (info.Length() != 1) {
70+
std::ostringstream os;
71+
os << "Expect only 1 argument. Given: " << info.Length();
72+
73+
Napi::TypeError::New(env, os.str()).ThrowAsJavaScriptException();
74+
75+
return {};
76+
}
77+
#endif
78+
79+
if (!info[0].IsObject()) {
80+
Napi::TypeError::New(env, "Expect an object as the argument")
81+
.ThrowAsJavaScriptException();
82+
83+
return {};
84+
}
85+
86+
Napi::Object o = info[0].As<Napi::Object>();
87+
88+
SherpaOnnxOfflineSpeechDenoiserConfig c;
89+
memset(&c, 0, sizeof(c));
90+
c.model = GetOfflineSpeechDenoiserModelConfig(o);
91+
92+
#if __OHOS__
93+
std::unique_ptr<NativeResourceManager,
94+
decltype(&OH_ResourceManager_ReleaseNativeResourceManager)>
95+
mgr(OH_ResourceManager_InitNativeResourceManager(env, info[1]),
96+
&OH_ResourceManager_ReleaseNativeResourceManager);
97+
98+
const SherpaOnnxOfflineSpeechDenoiser *sd =
99+
SherpaOnnxCreateOfflineSpeechDenoiserOHOS(&c, mgr.get());
100+
#else
101+
const SherpaOnnxOfflineSpeechDenoiser *sd =
102+
SherpaOnnxCreateOfflineSpeechDenoiser(&c);
103+
#endif
104+
105+
SHERPA_ONNX_DELETE_C_STR(c.model.gtcrn.model);
106+
SHERPA_ONNX_DELETE_C_STR(c.model.provider);
107+
108+
if (!sd) {
109+
Napi::TypeError::New(env, "Please check your config!")
110+
.ThrowAsJavaScriptException();
111+
112+
return {};
113+
}
114+
115+
return Napi::External<SherpaOnnxOfflineSpeechDenoiser>::New(
116+
env, const_cast<SherpaOnnxOfflineSpeechDenoiser *>(sd),
117+
[](Napi::Env env, SherpaOnnxOfflineSpeechDenoiser *sd) {
118+
SherpaOnnxDestroyOfflineSpeechDenoiser(sd);
119+
});
120+
}
121+
122+
static Napi::Object OfflineSpeechDenoiserRunWrapper(
123+
const Napi::CallbackInfo &info) {
124+
Napi::Env env = info.Env();
125+
126+
if (info.Length() != 2) {
127+
std::ostringstream os;
128+
os << "Expect only 2 arguments. Given: " << info.Length();
129+
130+
Napi::TypeError::New(env, os.str()).ThrowAsJavaScriptException();
131+
132+
return {};
133+
}
134+
135+
if (!info[0].IsExternal()) {
136+
Napi::TypeError::New(
137+
env, "Argument 0 should be an offline speech denoiser pointer.")
138+
.ThrowAsJavaScriptException();
139+
140+
return {};
141+
}
142+
143+
const SherpaOnnxOfflineSpeechDenoiser *sd =
144+
info[0].As<Napi::External<SherpaOnnxOfflineSpeechDenoiser>>().Data();
145+
146+
if (!info[1].IsObject()) {
147+
Napi::TypeError::New(env, "Argument 1 should be an object")
148+
.ThrowAsJavaScriptException();
149+
150+
return {};
151+
}
152+
153+
Napi::Object obj = info[1].As<Napi::Object>();
154+
155+
if (!obj.Has("samples")) {
156+
Napi::TypeError::New(env, "The argument object should have a field samples")
157+
.ThrowAsJavaScriptException();
158+
159+
return {};
160+
}
161+
162+
if (!obj.Get("samples").IsTypedArray()) {
163+
Napi::TypeError::New(env, "The object['samples'] should be a typed array")
164+
.ThrowAsJavaScriptException();
165+
166+
return {};
167+
}
168+
169+
if (!obj.Has("sampleRate")) {
170+
Napi::TypeError::New(env,
171+
"The argument object should have a field sampleRate")
172+
.ThrowAsJavaScriptException();
173+
174+
return {};
175+
}
176+
177+
if (!obj.Get("sampleRate").IsNumber()) {
178+
Napi::TypeError::New(env, "The object['samples'] should be a number")
179+
.ThrowAsJavaScriptException();
180+
181+
return {};
182+
}
183+
184+
Napi::Float32Array samples = obj.Get("samples").As<Napi::Float32Array>();
185+
int32_t sample_rate = obj.Get("sampleRate").As<Napi::Number>().Int32Value();
186+
187+
const SherpaOnnxDenoisedAudio *audio;
188+
189+
#if __OHOS__
190+
// Note(fangjun): For unknown reasons on HarmonyOS, we need to divide it by
191+
// sizeof(float) here
192+
audio = SherpaOnnxOfflineSpeechDenoiserRun(
193+
sd, samples.Data(), samples.ElementLength() / sizeof(float), sample_rate);
194+
#else
195+
audio = SherpaOnnxOfflineSpeechDenoiserRun(
196+
sd, samples.Data(), samples.ElementLength(), sample_rate);
197+
#endif
198+
199+
bool enable_external_buffer = true;
200+
if (obj.Has("enableExternalBuffer") &&
201+
obj.Get("enableExternalBuffer").IsBoolean()) {
202+
enable_external_buffer =
203+
obj.Get("enableExternalBuffer").As<Napi::Boolean>().Value();
204+
}
205+
206+
if (enable_external_buffer) {
207+
Napi::ArrayBuffer arrayBuffer = Napi::ArrayBuffer::New(
208+
env, const_cast<float *>(audio->samples), sizeof(float) * audio->n,
209+
[](Napi::Env /*env*/, void * /*data*/,
210+
const SherpaOnnxDenoisedAudio *hint) {
211+
SherpaOnnxDestroyDenoisedAudio(hint);
212+
},
213+
audio);
214+
Napi::Float32Array float32Array =
215+
Napi::Float32Array::New(env, audio->n, arrayBuffer, 0);
216+
217+
Napi::Object ans = Napi::Object::New(env);
218+
ans.Set(Napi::String::New(env, "samples"), float32Array);
219+
ans.Set(Napi::String::New(env, "sampleRate"), audio->sample_rate);
220+
return ans;
221+
} else {
222+
// don't use external buffer
223+
Napi::ArrayBuffer arrayBuffer =
224+
Napi::ArrayBuffer::New(env, sizeof(float) * audio->n);
225+
226+
Napi::Float32Array float32Array =
227+
Napi::Float32Array::New(env, audio->n, arrayBuffer, 0);
228+
229+
std::copy(audio->samples, audio->samples + audio->n, float32Array.Data());
230+
231+
Napi::Object ans = Napi::Object::New(env);
232+
ans.Set(Napi::String::New(env, "samples"), float32Array);
233+
ans.Set(Napi::String::New(env, "sampleRate"), audio->sample_rate);
234+
SherpaOnnxDestroyDenoisedAudio(audio);
235+
return ans;
236+
}
237+
}
238+
239+
static Napi::Number OfflineSpeechDenoiserGetSampleRateWrapper(
240+
const Napi::CallbackInfo &info) {
241+
Napi::Env env = info.Env();
242+
243+
if (info.Length() != 1) {
244+
std::ostringstream os;
245+
os << "Expect only 1 argument. Given: " << info.Length();
246+
247+
Napi::TypeError::New(env, os.str()).ThrowAsJavaScriptException();
248+
249+
return {};
250+
}
251+
252+
if (!info[0].IsExternal()) {
253+
Napi::TypeError::New(
254+
env, "Argument 0 should be an offline speech denoiser pointer.")
255+
.ThrowAsJavaScriptException();
256+
257+
return {};
258+
}
259+
260+
const SherpaOnnxOfflineSpeechDenoiser *sd =
261+
info[0].As<Napi::External<SherpaOnnxOfflineSpeechDenoiser>>().Data();
262+
263+
int32_t sample_rate = SherpaOnnxOfflineSpeechDenoiserGetSampleRate(sd);
264+
265+
return Napi::Number::New(env, sample_rate);
266+
}
267+
268+
void InitNonStreamingSpeechDenoiser(Napi::Env env, Napi::Object exports) {
269+
exports.Set(Napi::String::New(env, "createOfflineSpeechDenoiser"),
270+
Napi::Function::New(env, CreateOfflineSpeechDenoiserWrapper));
271+
272+
exports.Set(Napi::String::New(env, "offlineSpeechDenoiserRunWrapper"),
273+
Napi::Function::New(env, OfflineSpeechDenoiserRunWrapper));
274+
275+
exports.Set(
276+
Napi::String::New(env, "offlineSpeechDenoiserGetSampleRateWrapper"),
277+
Napi::Function::New(env, OfflineSpeechDenoiserGetSampleRateWrapper));
278+
}

0 commit comments

Comments
 (0)