diff --git a/.github/scripts/node-addon/run.sh b/.github/scripts/node-addon/run.sh index 5fd9a9b9ed..19c98abec0 100755 --- a/.github/scripts/node-addon/run.sh +++ b/.github/scripts/node-addon/run.sh @@ -18,7 +18,7 @@ fi SHERPA_ONNX_VERSION=$(grep "SHERPA_ONNX_VERSION" ./CMakeLists.txt | cut -d " " -f 2 | cut -d '"' -f 2) echo "SHERPA_ONNX_VERSION $SHERPA_ONNX_VERSION" -# SHERPA_ONNX_VERSION=1.0.25 +# SHERPA_ONNX_VERSION=1.0.27 if [ -z $owner ]; then owner=k2-fsa diff --git a/.github/workflows/npm-addon.yaml b/.github/workflows/npm-addon.yaml index 2b4af16268..0564223332 100644 --- a/.github/workflows/npm-addon.yaml +++ b/.github/workflows/npm-addon.yaml @@ -55,7 +55,7 @@ jobs: SHERPA_ONNX_VERSION=$(grep "SHERPA_ONNX_VERSION" ./CMakeLists.txt | cut -d " " -f 2 | cut -d '"' -f 2) echo "SHERPA_ONNX_VERSION $SHERPA_ONNX_VERSION" - # SHERPA_ONNX_VERSION=1.0.25 + # SHERPA_ONNX_VERSION=1.0.27 src_dir=.github/scripts/node-addon sed -i.bak s/SHERPA_ONNX_VERSION/$SHERPA_ONNX_VERSION/g $src_dir/package.json diff --git a/nodejs-addon-examples/package.json b/nodejs-addon-examples/package.json index 005514c07b..5fb5743af9 100644 --- a/nodejs-addon-examples/package.json +++ b/nodejs-addon-examples/package.json @@ -1,5 +1,5 @@ { "dependencies": { - "sherpa-onnx-node": "^1.0.25" + "sherpa-onnx-node": "^1.0.27" } } diff --git a/nodejs-addon-examples/test_tts_non_streaming_vits_coqui_de.js b/nodejs-addon-examples/test_tts_non_streaming_vits_coqui_de.js index 182cd95169..73ba4b61ce 100644 --- a/nodejs-addon-examples/test_tts_non_streaming_vits_coqui_de.js +++ b/nodejs-addon-examples/test_tts_non_streaming_vits_coqui_de.js @@ -24,7 +24,12 @@ const tts = createOfflineTts(); const text = 'Alles hat ein Ende, nur die Wurst hat zwei.' let start = Date.now(); -const audio = tts.generate({text: text, sid: 0, speed: 1.0}); +const audio = tts.generate({ + text: text, + sid: 0, + speed: 1.0, + enableExternalBuffer: true, +}); let stop = Date.now(); const elapsed_seconds = (stop - start) / 1000; const duration = audio.samples.length / audio.sampleRate; diff --git a/scripts/check_style_cpplint.sh b/scripts/check_style_cpplint.sh index f81e02a2ae..eedc9afc1d 100755 --- a/scripts/check_style_cpplint.sh +++ b/scripts/check_style_cpplint.sh @@ -99,7 +99,7 @@ function do_check() { ;; 2) echo "Check all files" - files=$(find $sherpa_onnx_dir/sherpa-onnx -name "*.h" -o -name "*.cc") + files=$(find $sherpa_onnx_dir/sherpa-onnx/csrc $sherpa_onnx_dir/sherpa-onnx/python $sherpa_onnx_dir/scripts/node-addon-api/src $sherpa_onnx_dir/sherpa-onnx/jni $sherpa_onnx_dir/sherpa-onnx/c-api -name "*.h" -o -name "*.cc") ;; *) echo "Check last commit" diff --git a/scripts/node-addon-api/lib/speaker-identification.js b/scripts/node-addon-api/lib/speaker-identification.js index 4539cf535a..29a493a86b 100644 --- a/scripts/node-addon-api/lib/speaker-identification.js +++ b/scripts/node-addon-api/lib/speaker-identification.js @@ -18,9 +18,9 @@ class SpeakerEmbeddingExtractor { } // return a float32 array - compute(stream) { + compute(stream, enableExternalBuffer = true) { return addon.speakerEmbeddingExtractorComputeEmbedding( - this.handle, stream.handle); + this.handle, stream.handle, enableExternalBuffer); } } diff --git a/scripts/node-addon-api/lib/vad.js b/scripts/node-addon-api/lib/vad.js index 30ecc52736..977255206d 100644 --- a/scripts/node-addon-api/lib/vad.js +++ b/scripts/node-addon-api/lib/vad.js @@ -11,8 +11,9 @@ class CircularBuffer { } // return a float32 array - get(startIndex, n) { - return addon.circularBufferGet(this.handle, startIndex, n); + get(startIndex, n, enableExternalBuffer = true) { + return addon.circularBufferGet( + this.handle, startIndex, n, enableExternalBuffer); } pop(n) { @@ -48,23 +49,23 @@ config = { } acceptWaveform(samples) { - addon.voiceActivityDetectorAcceptWaveform(this.handle, samples) + addon.voiceActivityDetectorAcceptWaveform(this.handle, samples); } isEmpty() { - return addon.voiceActivityDetectorIsEmpty(this.handle) + return addon.voiceActivityDetectorIsEmpty(this.handle); } isDetected() { - return addon.voiceActivityDetectorIsDetected(this.handle) + return addon.voiceActivityDetectorIsDetected(this.handle); } pop() { - addon.voiceActivityDetectorPop(this.handle) + addon.voiceActivityDetectorPop(this.handle); } clear() { - addon.VoiceActivityDetectorClearWrapper(this.handle) + addon.VoiceActivityDetectorClearWrapper(this.handle); } /* @@ -73,12 +74,12 @@ config = { start: a int32 } */ - front() { - return addon.voiceActivityDetectorFront(this.handle) + front(enableExternalBuffer = true) { + return addon.voiceActivityDetectorFront(this.handle, enableExternalBuffer); } reset() { - return addon.VoiceActivityDetectorResetWrapper(this.handle) + return addon.VoiceActivityDetectorResetWrapper(this.handle); } } diff --git a/scripts/node-addon-api/src/non-streaming-tts.cc b/scripts/node-addon-api/src/non-streaming-tts.cc index 2adc0bbc18..c230b972a3 100644 --- a/scripts/node-addon-api/src/non-streaming-tts.cc +++ b/scripts/node-addon-api/src/non-streaming-tts.cc @@ -2,6 +2,7 @@ // // Copyright (c) 2024 Xiaomi Corporation +#include <algorithm> #include <sstream> #include "macros.h" // NOLINT @@ -265,6 +266,13 @@ static Napi::Object OfflineTtsGenerateWrapper(const Napi::CallbackInfo &info) { return {}; } + bool enable_external_buffer = true; + if (obj.Has("enableExternalBuffer") && + obj.Get("enableExternalBuffer").IsBoolean()) { + enable_external_buffer = + obj.Get("enableExternalBuffer").As<Napi::Boolean>().Value(); + } + Napi::String _text = obj.Get("text").As<Napi::String>(); std::string text = _text.Utf8Value(); int32_t sid = obj.Get("sid").As<Napi::Number>().Int32Value(); @@ -273,20 +281,37 @@ static Napi::Object OfflineTtsGenerateWrapper(const Napi::CallbackInfo &info) { const SherpaOnnxGeneratedAudio *audio = SherpaOnnxOfflineTtsGenerate(tts, text.c_str(), sid, speed); - Napi::ArrayBuffer arrayBuffer = Napi::ArrayBuffer::New( - env, const_cast<float *>(audio->samples), sizeof(float) * audio->n, - [](Napi::Env /*env*/, void * /*data*/, - const SherpaOnnxGeneratedAudio *hint) { - SherpaOnnxDestroyOfflineTtsGeneratedAudio(hint); - }, - audio); - Napi::Float32Array float32Array = - Napi::Float32Array::New(env, audio->n, arrayBuffer, 0); - - Napi::Object ans = Napi::Object::New(env); - ans.Set(Napi::String::New(env, "samples"), float32Array); - ans.Set(Napi::String::New(env, "sampleRate"), audio->sample_rate); - return ans; + if (enable_external_buffer) { + Napi::ArrayBuffer arrayBuffer = Napi::ArrayBuffer::New( + env, const_cast<float *>(audio->samples), sizeof(float) * audio->n, + [](Napi::Env /*env*/, void * /*data*/, + const SherpaOnnxGeneratedAudio *hint) { + SherpaOnnxDestroyOfflineTtsGeneratedAudio(hint); + }, + audio); + Napi::Float32Array float32Array = + Napi::Float32Array::New(env, audio->n, arrayBuffer, 0); + + Napi::Object ans = Napi::Object::New(env); + ans.Set(Napi::String::New(env, "samples"), float32Array); + ans.Set(Napi::String::New(env, "sampleRate"), audio->sample_rate); + return ans; + } else { + // don't use external buffer + Napi::ArrayBuffer arrayBuffer = + Napi::ArrayBuffer::New(env, sizeof(float) * audio->n); + + Napi::Float32Array float32Array = + Napi::Float32Array::New(env, audio->n, arrayBuffer, 0); + + std::copy(audio->samples, audio->samples + audio->n, float32Array.Data()); + + Napi::Object ans = Napi::Object::New(env); + ans.Set(Napi::String::New(env, "samples"), float32Array); + ans.Set(Napi::String::New(env, "sampleRate"), audio->sample_rate); + SherpaOnnxDestroyOfflineTtsGeneratedAudio(audio); + return ans; + } } void InitNonStreamingTts(Napi::Env env, Napi::Object exports) { diff --git a/scripts/node-addon-api/src/speaker-identification.cc b/scripts/node-addon-api/src/speaker-identification.cc index 5d5bb7cf2e..6a4b721d53 100644 --- a/scripts/node-addon-api/src/speaker-identification.cc +++ b/scripts/node-addon-api/src/speaker-identification.cc @@ -1,6 +1,7 @@ // scripts/node-addon-api/src/speaker-identification.cc // // Copyright (c) 2024 Xiaomi Corporation +#include <algorithm> #include <sstream> #include "macros.h" // NOLINT @@ -175,9 +176,9 @@ static Napi::Boolean SpeakerEmbeddingExtractorIsReadyWrapper( static Napi::Float32Array SpeakerEmbeddingExtractorComputeEmbeddingWrapper( const Napi::CallbackInfo &info) { Napi::Env env = info.Env(); - if (info.Length() != 2) { + if (info.Length() != 2 && info.Length() != 3) { std::ostringstream os; - os << "Expect only 2 arguments. Given: " << info.Length(); + os << "Expect only 2 or 3 arguments. Given: " << info.Length(); Napi::TypeError::New(env, os.str()).ThrowAsJavaScriptException(); @@ -199,6 +200,16 @@ static Napi::Float32Array SpeakerEmbeddingExtractorComputeEmbeddingWrapper( return {}; } + bool enable_external_buffer = true; + if (info.Length() == 3) { + if (info[2].IsBoolean()) { + enable_external_buffer = info[2].As<Napi::Boolean>().Value(); + } else { + Napi::TypeError::New(env, "Argument 2 should be a boolean.") + .ThrowAsJavaScriptException(); + } + } + SherpaOnnxSpeakerEmbeddingExtractor *extractor = info[0].As<Napi::External<SherpaOnnxSpeakerEmbeddingExtractor>>().Data(); @@ -210,14 +221,29 @@ static Napi::Float32Array SpeakerEmbeddingExtractorComputeEmbeddingWrapper( int32_t dim = SherpaOnnxSpeakerEmbeddingExtractorDim(extractor); - Napi::ArrayBuffer arrayBuffer = Napi::ArrayBuffer::New( - env, const_cast<float *>(v), sizeof(float) * dim, - [](Napi::Env /*env*/, void *data) { - SherpaOnnxSpeakerEmbeddingExtractorDestroyEmbedding( - reinterpret_cast<float *>(data)); - }); + if (enable_external_buffer) { + Napi::ArrayBuffer arrayBuffer = Napi::ArrayBuffer::New( + env, const_cast<float *>(v), sizeof(float) * dim, + [](Napi::Env /*env*/, void *data) { + SherpaOnnxSpeakerEmbeddingExtractorDestroyEmbedding( + reinterpret_cast<float *>(data)); + }); + + return Napi::Float32Array::New(env, dim, arrayBuffer, 0); + } else { + // don't use external buffer + Napi::ArrayBuffer arrayBuffer = + Napi::ArrayBuffer::New(env, sizeof(float) * dim); + + Napi::Float32Array float32Array = + Napi::Float32Array::New(env, dim, arrayBuffer, 0); - return Napi::Float32Array::New(env, dim, arrayBuffer, 0); + std::copy(v, v + dim, float32Array.Data()); + + SherpaOnnxSpeakerEmbeddingExtractorDestroyEmbedding(v); + + return float32Array; + } } static Napi::External<SherpaOnnxSpeakerEmbeddingManager> diff --git a/scripts/node-addon-api/src/vad.cc b/scripts/node-addon-api/src/vad.cc index 418c299a19..217033c9da 100644 --- a/scripts/node-addon-api/src/vad.cc +++ b/scripts/node-addon-api/src/vad.cc @@ -2,6 +2,7 @@ // // Copyright (c) 2024 Xiaomi Corporation +#include <algorithm> #include <sstream> #include "macros.h" // NOLINT @@ -75,9 +76,9 @@ static Napi::Float32Array CircularBufferGetWrapper( const Napi::CallbackInfo &info) { Napi::Env env = info.Env(); - if (info.Length() != 3) { + if (info.Length() != 3 && info.Length() != 4) { std::ostringstream os; - os << "Expect only 3 arguments. Given: " << info.Length(); + os << "Expect only 3 or 4 arguments. Given: " << info.Length(); Napi::TypeError::New(env, os.str()).ThrowAsJavaScriptException(); @@ -108,21 +109,46 @@ static Napi::Float32Array CircularBufferGetWrapper( return {}; } + bool enable_external_buffer = true; + if (info.Length() == 4) { + if (info[3].IsBoolean()) { + enable_external_buffer = info[3].As<Napi::Boolean>().Value(); + } else { + Napi::TypeError::New(env, "Argument 3 should be a boolean.") + .ThrowAsJavaScriptException(); + } + } + int32_t start_index = info[1].As<Napi::Number>().Int32Value(); int32_t n = info[2].As<Napi::Number>().Int32Value(); const float *data = SherpaOnnxCircularBufferGet(buf, start_index, n); - Napi::ArrayBuffer arrayBuffer = Napi::ArrayBuffer::New( - env, const_cast<float *>(data), sizeof(float) * n, - [](Napi::Env /*env*/, void *p) { - SherpaOnnxCircularBufferFree(reinterpret_cast<const float *>(p)); - }); + if (enable_external_buffer) { + Napi::ArrayBuffer arrayBuffer = Napi::ArrayBuffer::New( + env, const_cast<float *>(data), sizeof(float) * n, + [](Napi::Env /*env*/, void *p) { + SherpaOnnxCircularBufferFree(reinterpret_cast<const float *>(p)); + }); + + Napi::Float32Array float32Array = + Napi::Float32Array::New(env, n, arrayBuffer, 0); - Napi::Float32Array float32Array = - Napi::Float32Array::New(env, n, arrayBuffer, 0); + return float32Array; + } else { + // don't use external buffer + Napi::ArrayBuffer arrayBuffer = Napi::ArrayBuffer::New( + env, const_cast<float *>(data), sizeof(float) * n); - return float32Array; + Napi::Float32Array float32Array = + Napi::Float32Array::New(env, n, arrayBuffer, 0); + + std::copy(data, data + n, float32Array.Data()); + + SherpaOnnxCircularBufferFree(data); + + return float32Array; + } } static void CircularBufferPopWrapper(const Napi::CallbackInfo &info) { @@ -470,9 +496,9 @@ static Napi::Object VoiceActivityDetectorFrontWrapper( const Napi::CallbackInfo &info) { Napi::Env env = info.Env(); - if (info.Length() != 1) { + if (info.Length() != 1 && info.Length() != 2) { std::ostringstream os; - os << "Expect only 1 argument. Given: " << info.Length(); + os << "Expect only 1 or 2 arguments. Given: " << info.Length(); Napi::TypeError::New(env, os.str()).ThrowAsJavaScriptException(); @@ -486,28 +512,57 @@ static Napi::Object VoiceActivityDetectorFrontWrapper( return {}; } + bool enable_external_buffer = true; + if (info.Length() == 2) { + if (info[1].IsBoolean()) { + enable_external_buffer = info[1].As<Napi::Boolean>().Value(); + } else { + Napi::TypeError::New(env, "Argument 1 should be a boolean.") + .ThrowAsJavaScriptException(); + } + } + SherpaOnnxVoiceActivityDetector *vad = info[0].As<Napi::External<SherpaOnnxVoiceActivityDetector>>().Data(); const SherpaOnnxSpeechSegment *segment = SherpaOnnxVoiceActivityDetectorFront(vad); - Napi::ArrayBuffer arrayBuffer = Napi::ArrayBuffer::New( - env, const_cast<float *>(segment->samples), sizeof(float) * segment->n, - [](Napi::Env /*env*/, void * /*data*/, - const SherpaOnnxSpeechSegment *hint) { - SherpaOnnxDestroySpeechSegment(hint); - }, - segment); + if (enable_external_buffer) { + Napi::ArrayBuffer arrayBuffer = Napi::ArrayBuffer::New( + env, const_cast<float *>(segment->samples), sizeof(float) * segment->n, + [](Napi::Env /*env*/, void * /*data*/, + const SherpaOnnxSpeechSegment *hint) { + SherpaOnnxDestroySpeechSegment(hint); + }, + segment); + + Napi::Float32Array float32Array = + Napi::Float32Array::New(env, segment->n, arrayBuffer, 0); - Napi::Float32Array float32Array = - Napi::Float32Array::New(env, segment->n, arrayBuffer, 0); + Napi::Object obj = Napi::Object::New(env); + obj.Set(Napi::String::New(env, "start"), segment->start); + obj.Set(Napi::String::New(env, "samples"), float32Array); - Napi::Object obj = Napi::Object::New(env); - obj.Set(Napi::String::New(env, "start"), segment->start); - obj.Set(Napi::String::New(env, "samples"), float32Array); + return obj; + } else { + Napi::ArrayBuffer arrayBuffer = + Napi::ArrayBuffer::New(env, sizeof(float) * segment->n); - return obj; + Napi::Float32Array float32Array = + Napi::Float32Array::New(env, segment->n, arrayBuffer, 0); + + std::copy(segment->samples, segment->samples + segment->n, + float32Array.Data()); + + Napi::Object obj = Napi::Object::New(env); + obj.Set(Napi::String::New(env, "start"), segment->start); + obj.Set(Napi::String::New(env, "samples"), float32Array); + + SherpaOnnxDestroySpeechSegment(segment); + + return obj; + } } static void VoiceActivityDetectorResetWrapper(const Napi::CallbackInfo &info) { diff --git a/scripts/node-addon-api/src/wave-reader.cc b/scripts/node-addon-api/src/wave-reader.cc index b2c8c57b3c..874f61babb 100644 --- a/scripts/node-addon-api/src/wave-reader.cc +++ b/scripts/node-addon-api/src/wave-reader.cc @@ -2,6 +2,7 @@ // // Copyright (c) 2024 Xiaomi Corporation +#include <algorithm> #include <sstream> #include "napi.h" // NOLINT @@ -9,16 +10,17 @@ static Napi::Object ReadWaveWrapper(const Napi::CallbackInfo &info) { Napi::Env env = info.Env(); - if (info.Length() != 1) { + if (info.Length() > 2) { std::ostringstream os; - os << "Expect only 1 argument. Given: " << info.Length(); + os << "Expect only 2 arguments. Given: " << info.Length(); Napi::TypeError::New(env, os.str()).ThrowAsJavaScriptException(); return {}; } + if (!info[0].IsString()) { - Napi::TypeError::New(env, "Argument should be a string") + Napi::TypeError::New(env, "Argument 0 should be a string") .ThrowAsJavaScriptException(); return {}; @@ -26,6 +28,18 @@ static Napi::Object ReadWaveWrapper(const Napi::CallbackInfo &info) { std::string filename = info[0].As<Napi::String>().Utf8Value(); + bool enable_external_buffer = true; + if (info.Length() == 2) { + if (info[1].IsBoolean()) { + enable_external_buffer = info[1].As<Napi::Boolean>().Value(); + } else { + Napi::TypeError::New(env, "Argument 1 should be a boolean") + .ThrowAsJavaScriptException(); + + return {}; + } + } + const SherpaOnnxWave *wave = SherpaOnnxReadWave(filename.c_str()); if (!wave) { std::ostringstream os; @@ -35,20 +49,40 @@ static Napi::Object ReadWaveWrapper(const Napi::CallbackInfo &info) { return {}; } - Napi::ArrayBuffer arrayBuffer = Napi::ArrayBuffer::New( - env, const_cast<float *>(wave->samples), - sizeof(float) * wave->num_samples, - [](Napi::Env /*env*/, void * /*data*/, const SherpaOnnxWave *hint) { - SherpaOnnxFreeWave(hint); - }, - wave); - Napi::Float32Array float32Array = - Napi::Float32Array::New(env, wave->num_samples, arrayBuffer, 0); - - Napi::Object obj = Napi::Object::New(env); - obj.Set(Napi::String::New(env, "samples"), float32Array); - obj.Set(Napi::String::New(env, "sampleRate"), wave->sample_rate); - return obj; + if (enable_external_buffer) { + Napi::ArrayBuffer arrayBuffer = Napi::ArrayBuffer::New( + env, const_cast<float *>(wave->samples), + sizeof(float) * wave->num_samples, + [](Napi::Env /*env*/, void * /*data*/, const SherpaOnnxWave *hint) { + SherpaOnnxFreeWave(hint); + }, + wave); + Napi::Float32Array float32Array = + Napi::Float32Array::New(env, wave->num_samples, arrayBuffer, 0); + + Napi::Object obj = Napi::Object::New(env); + obj.Set(Napi::String::New(env, "samples"), float32Array); + obj.Set(Napi::String::New(env, "sampleRate"), wave->sample_rate); + return obj; + } else { + // don't use external buffer + Napi::ArrayBuffer arrayBuffer = + Napi::ArrayBuffer::New(env, sizeof(float) * wave->num_samples); + + Napi::Float32Array float32Array = + Napi::Float32Array::New(env, wave->num_samples, arrayBuffer, 0); + + std::copy(wave->samples, wave->samples + wave->num_samples, + float32Array.Data()); + + Napi::Object obj = Napi::Object::New(env); + obj.Set(Napi::String::New(env, "samples"), float32Array); + obj.Set(Napi::String::New(env, "sampleRate"), wave->sample_rate); + + SherpaOnnxFreeWave(wave); + + return obj; + } } void InitWaveReader(Napi::Env env, Napi::Object exports) {