Skip to content

Commit b1c7d04

Browse files
authored
Support not using external buffers for node-addon (#925)
1 parent cd65e76 commit b1c7d04

File tree

11 files changed

+228
-82
lines changed

11 files changed

+228
-82
lines changed

.github/scripts/node-addon/run.sh

+1-1
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,7 @@ fi
1818
SHERPA_ONNX_VERSION=$(grep "SHERPA_ONNX_VERSION" ./CMakeLists.txt | cut -d " " -f 2 | cut -d '"' -f 2)
1919
echo "SHERPA_ONNX_VERSION $SHERPA_ONNX_VERSION"
2020

21-
# SHERPA_ONNX_VERSION=1.0.25
21+
# SHERPA_ONNX_VERSION=1.0.27
2222

2323
if [ -z $owner ]; then
2424
owner=k2-fsa

.github/workflows/npm-addon.yaml

+1-1
Original file line numberDiff line numberDiff line change
@@ -55,7 +55,7 @@ jobs:
5555
5656
SHERPA_ONNX_VERSION=$(grep "SHERPA_ONNX_VERSION" ./CMakeLists.txt | cut -d " " -f 2 | cut -d '"' -f 2)
5757
echo "SHERPA_ONNX_VERSION $SHERPA_ONNX_VERSION"
58-
# SHERPA_ONNX_VERSION=1.0.25
58+
# SHERPA_ONNX_VERSION=1.0.27
5959
6060
src_dir=.github/scripts/node-addon
6161
sed -i.bak s/SHERPA_ONNX_VERSION/$SHERPA_ONNX_VERSION/g $src_dir/package.json

nodejs-addon-examples/package.json

+1-1
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
{
22
"dependencies": {
3-
"sherpa-onnx-node": "^1.0.25"
3+
"sherpa-onnx-node": "^1.0.27"
44
}
55
}

nodejs-addon-examples/test_tts_non_streaming_vits_coqui_de.js

+6-1
Original file line numberDiff line numberDiff line change
@@ -24,7 +24,12 @@ const tts = createOfflineTts();
2424
const text = 'Alles hat ein Ende, nur die Wurst hat zwei.'
2525

2626
let start = Date.now();
27-
const audio = tts.generate({text: text, sid: 0, speed: 1.0});
27+
const audio = tts.generate({
28+
text: text,
29+
sid: 0,
30+
speed: 1.0,
31+
enableExternalBuffer: true,
32+
});
2833
let stop = Date.now();
2934
const elapsed_seconds = (stop - start) / 1000;
3035
const duration = audio.samples.length / audio.sampleRate;

scripts/check_style_cpplint.sh

+1-1
Original file line numberDiff line numberDiff line change
@@ -99,7 +99,7 @@ function do_check() {
9999
;;
100100
2)
101101
echo "Check all files"
102-
files=$(find $sherpa_onnx_dir/sherpa-onnx -name "*.h" -o -name "*.cc")
102+
files=$(find $sherpa_onnx_dir/sherpa-onnx/csrc $sherpa_onnx_dir/sherpa-onnx/python $sherpa_onnx_dir/scripts/node-addon-api/src $sherpa_onnx_dir/sherpa-onnx/jni $sherpa_onnx_dir/sherpa-onnx/c-api -name "*.h" -o -name "*.cc")
103103
;;
104104
*)
105105
echo "Check last commit"

scripts/node-addon-api/lib/speaker-identification.js

+2-2
Original file line numberDiff line numberDiff line change
@@ -18,9 +18,9 @@ class SpeakerEmbeddingExtractor {
1818
}
1919

2020
// return a float32 array
21-
compute(stream) {
21+
compute(stream, enableExternalBuffer = true) {
2222
return addon.speakerEmbeddingExtractorComputeEmbedding(
23-
this.handle, stream.handle);
23+
this.handle, stream.handle, enableExternalBuffer);
2424
}
2525
}
2626

scripts/node-addon-api/lib/vad.js

+11-10
Original file line numberDiff line numberDiff line change
@@ -11,8 +11,9 @@ class CircularBuffer {
1111
}
1212

1313
// return a float32 array
14-
get(startIndex, n) {
15-
return addon.circularBufferGet(this.handle, startIndex, n);
14+
get(startIndex, n, enableExternalBuffer = true) {
15+
return addon.circularBufferGet(
16+
this.handle, startIndex, n, enableExternalBuffer);
1617
}
1718

1819
pop(n) {
@@ -48,23 +49,23 @@ config = {
4849
}
4950

5051
acceptWaveform(samples) {
51-
addon.voiceActivityDetectorAcceptWaveform(this.handle, samples)
52+
addon.voiceActivityDetectorAcceptWaveform(this.handle, samples);
5253
}
5354

5455
isEmpty() {
55-
return addon.voiceActivityDetectorIsEmpty(this.handle)
56+
return addon.voiceActivityDetectorIsEmpty(this.handle);
5657
}
5758

5859
isDetected() {
59-
return addon.voiceActivityDetectorIsDetected(this.handle)
60+
return addon.voiceActivityDetectorIsDetected(this.handle);
6061
}
6162

6263
pop() {
63-
addon.voiceActivityDetectorPop(this.handle)
64+
addon.voiceActivityDetectorPop(this.handle);
6465
}
6566

6667
clear() {
67-
addon.VoiceActivityDetectorClearWrapper(this.handle)
68+
addon.VoiceActivityDetectorClearWrapper(this.handle);
6869
}
6970

7071
/*
@@ -73,12 +74,12 @@ config = {
7374
start: a int32
7475
}
7576
*/
76-
front() {
77-
return addon.voiceActivityDetectorFront(this.handle)
77+
front(enableExternalBuffer = true) {
78+
return addon.voiceActivityDetectorFront(this.handle, enableExternalBuffer);
7879
}
7980

8081
reset() {
81-
return addon.VoiceActivityDetectorResetWrapper(this.handle)
82+
return addon.VoiceActivityDetectorResetWrapper(this.handle);
8283
}
8384
}
8485

scripts/node-addon-api/src/non-streaming-tts.cc

+39-14
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@
22
//
33
// Copyright (c) 2024 Xiaomi Corporation
44

5+
#include <algorithm>
56
#include <sstream>
67

78
#include "macros.h" // NOLINT
@@ -265,6 +266,13 @@ static Napi::Object OfflineTtsGenerateWrapper(const Napi::CallbackInfo &info) {
265266
return {};
266267
}
267268

269+
bool enable_external_buffer = true;
270+
if (obj.Has("enableExternalBuffer") &&
271+
obj.Get("enableExternalBuffer").IsBoolean()) {
272+
enable_external_buffer =
273+
obj.Get("enableExternalBuffer").As<Napi::Boolean>().Value();
274+
}
275+
268276
Napi::String _text = obj.Get("text").As<Napi::String>();
269277
std::string text = _text.Utf8Value();
270278
int32_t sid = obj.Get("sid").As<Napi::Number>().Int32Value();
@@ -273,20 +281,37 @@ static Napi::Object OfflineTtsGenerateWrapper(const Napi::CallbackInfo &info) {
273281
const SherpaOnnxGeneratedAudio *audio =
274282
SherpaOnnxOfflineTtsGenerate(tts, text.c_str(), sid, speed);
275283

276-
Napi::ArrayBuffer arrayBuffer = Napi::ArrayBuffer::New(
277-
env, const_cast<float *>(audio->samples), sizeof(float) * audio->n,
278-
[](Napi::Env /*env*/, void * /*data*/,
279-
const SherpaOnnxGeneratedAudio *hint) {
280-
SherpaOnnxDestroyOfflineTtsGeneratedAudio(hint);
281-
},
282-
audio);
283-
Napi::Float32Array float32Array =
284-
Napi::Float32Array::New(env, audio->n, arrayBuffer, 0);
285-
286-
Napi::Object ans = Napi::Object::New(env);
287-
ans.Set(Napi::String::New(env, "samples"), float32Array);
288-
ans.Set(Napi::String::New(env, "sampleRate"), audio->sample_rate);
289-
return ans;
284+
if (enable_external_buffer) {
285+
Napi::ArrayBuffer arrayBuffer = Napi::ArrayBuffer::New(
286+
env, const_cast<float *>(audio->samples), sizeof(float) * audio->n,
287+
[](Napi::Env /*env*/, void * /*data*/,
288+
const SherpaOnnxGeneratedAudio *hint) {
289+
SherpaOnnxDestroyOfflineTtsGeneratedAudio(hint);
290+
},
291+
audio);
292+
Napi::Float32Array float32Array =
293+
Napi::Float32Array::New(env, audio->n, arrayBuffer, 0);
294+
295+
Napi::Object ans = Napi::Object::New(env);
296+
ans.Set(Napi::String::New(env, "samples"), float32Array);
297+
ans.Set(Napi::String::New(env, "sampleRate"), audio->sample_rate);
298+
return ans;
299+
} else {
300+
// don't use external buffer
301+
Napi::ArrayBuffer arrayBuffer =
302+
Napi::ArrayBuffer::New(env, sizeof(float) * audio->n);
303+
304+
Napi::Float32Array float32Array =
305+
Napi::Float32Array::New(env, audio->n, arrayBuffer, 0);
306+
307+
std::copy(audio->samples, audio->samples + audio->n, float32Array.Data());
308+
309+
Napi::Object ans = Napi::Object::New(env);
310+
ans.Set(Napi::String::New(env, "samples"), float32Array);
311+
ans.Set(Napi::String::New(env, "sampleRate"), audio->sample_rate);
312+
SherpaOnnxDestroyOfflineTtsGeneratedAudio(audio);
313+
return ans;
314+
}
290315
}
291316

292317
void InitNonStreamingTts(Napi::Env env, Napi::Object exports) {

scripts/node-addon-api/src/speaker-identification.cc

+35-9
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
// scripts/node-addon-api/src/speaker-identification.cc
22
//
33
// Copyright (c) 2024 Xiaomi Corporation
4+
#include <algorithm>
45
#include <sstream>
56

67
#include "macros.h" // NOLINT
@@ -175,9 +176,9 @@ static Napi::Boolean SpeakerEmbeddingExtractorIsReadyWrapper(
175176
static Napi::Float32Array SpeakerEmbeddingExtractorComputeEmbeddingWrapper(
176177
const Napi::CallbackInfo &info) {
177178
Napi::Env env = info.Env();
178-
if (info.Length() != 2) {
179+
if (info.Length() != 2 && info.Length() != 3) {
179180
std::ostringstream os;
180-
os << "Expect only 2 arguments. Given: " << info.Length();
181+
os << "Expect only 2 or 3 arguments. Given: " << info.Length();
181182

182183
Napi::TypeError::New(env, os.str()).ThrowAsJavaScriptException();
183184

@@ -199,6 +200,16 @@ static Napi::Float32Array SpeakerEmbeddingExtractorComputeEmbeddingWrapper(
199200
return {};
200201
}
201202

203+
bool enable_external_buffer = true;
204+
if (info.Length() == 3) {
205+
if (info[2].IsBoolean()) {
206+
enable_external_buffer = info[2].As<Napi::Boolean>().Value();
207+
} else {
208+
Napi::TypeError::New(env, "Argument 2 should be a boolean.")
209+
.ThrowAsJavaScriptException();
210+
}
211+
}
212+
202213
SherpaOnnxSpeakerEmbeddingExtractor *extractor =
203214
info[0].As<Napi::External<SherpaOnnxSpeakerEmbeddingExtractor>>().Data();
204215

@@ -210,14 +221,29 @@ static Napi::Float32Array SpeakerEmbeddingExtractorComputeEmbeddingWrapper(
210221

211222
int32_t dim = SherpaOnnxSpeakerEmbeddingExtractorDim(extractor);
212223

213-
Napi::ArrayBuffer arrayBuffer = Napi::ArrayBuffer::New(
214-
env, const_cast<float *>(v), sizeof(float) * dim,
215-
[](Napi::Env /*env*/, void *data) {
216-
SherpaOnnxSpeakerEmbeddingExtractorDestroyEmbedding(
217-
reinterpret_cast<float *>(data));
218-
});
224+
if (enable_external_buffer) {
225+
Napi::ArrayBuffer arrayBuffer = Napi::ArrayBuffer::New(
226+
env, const_cast<float *>(v), sizeof(float) * dim,
227+
[](Napi::Env /*env*/, void *data) {
228+
SherpaOnnxSpeakerEmbeddingExtractorDestroyEmbedding(
229+
reinterpret_cast<float *>(data));
230+
});
231+
232+
return Napi::Float32Array::New(env, dim, arrayBuffer, 0);
233+
} else {
234+
// don't use external buffer
235+
Napi::ArrayBuffer arrayBuffer =
236+
Napi::ArrayBuffer::New(env, sizeof(float) * dim);
237+
238+
Napi::Float32Array float32Array =
239+
Napi::Float32Array::New(env, dim, arrayBuffer, 0);
219240

220-
return Napi::Float32Array::New(env, dim, arrayBuffer, 0);
241+
std::copy(v, v + dim, float32Array.Data());
242+
243+
SherpaOnnxSpeakerEmbeddingExtractorDestroyEmbedding(v);
244+
245+
return float32Array;
246+
}
221247
}
222248

223249
static Napi::External<SherpaOnnxSpeakerEmbeddingManager>

0 commit comments

Comments
 (0)