Skip to content

Commit 031134b

Browse files
authored
Add TTS for node-addon-api (#871)
1 parent 740d7ae commit 031134b

21 files changed

+691
-10
lines changed

.github/scripts/test-nodejs-addon-npm.sh

+36
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,8 @@ d=nodejs-addon-examples
66
echo "dir: $d"
77
cd $d
88

9+
echo "----------streaming asr----------"
10+
911
curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20.tar.bz2
1012
tar xvf sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20.tar.bz2
1113
rm sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20.tar.bz2
@@ -31,6 +33,8 @@ rm sherpa-onnx-streaming-paraformer-bilingual-zh-en.tar.bz2
3133
node ./test_asr_streaming_paraformer.js
3234
rm -rf sherpa-onnx-streaming-paraformer-bilingual-zh-en
3335

36+
echo "----------non-streaming asr----------"
37+
3438
curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-zipformer-en-2023-04-01.tar.bz2
3539
tar xvf sherpa-onnx-zipformer-en-2023-04-01.tar.bz2
3640
rm sherpa-onnx-zipformer-en-2023-04-01.tar.bz2
@@ -58,3 +62,35 @@ rm sherpa-onnx-paraformer-zh-2023-03-28.tar.bz2
5862

5963
node ./test_asr_non_streaming_paraformer.js
6064
rm -rf sherpa-onnx-paraformer-zh-2023-03-28
65+
66+
echo "----------tts----------"
67+
68+
curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/vits-piper-en_GB-cori-medium.tar.bz2
69+
tar xvf vits-piper-en_GB-cori-medium.tar.bz2
70+
rm vits-piper-en_GB-cori-medium.tar.bz2
71+
72+
node ./test_tts_non_streaming_vits_piper_en.js
73+
rm -rf vits-piper-en_GB-cori-medium
74+
75+
curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/vits-coqui-de-css10.tar.bz2
76+
tar xvf vits-coqui-de-css10.tar.bz2
77+
rm vits-coqui-de-css10.tar.bz2
78+
79+
node ./test_tts_non_streaming_vits_coqui_de.js
80+
rm -rf vits-coqui-de-css10
81+
82+
curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/sherpa-onnx-vits-zh-ll.tar.bz2
83+
tar xvf sherpa-onnx-vits-zh-ll.tar.bz2
84+
rm sherpa-onnx-vits-zh-ll.tar.bz2
85+
86+
node ./test_tts_non_streaming_vits_zh_ll.js
87+
rm -rf sherpa-onnx-vits-zh-ll
88+
89+
curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/vits-icefall-zh-aishell3.tar.bz2
90+
tar xvf vits-icefall-zh-aishell3.tar.bz2
91+
rm vits-icefall-zh-aishell3.tar.bz2
92+
93+
node ./test_tts_non_streaming_vits_zh_aishell3.js
94+
rm -rf vits-icefall-zh-aishell3
95+
96+
ls -lh

.github/workflows/npm-addon-linux-aarch64.yaml

+1-1
Original file line numberDiff line numberDiff line change
@@ -94,7 +94,7 @@ jobs:
9494
-DSHERPA_ONNX_ENABLE_BINARY=OFF \
9595
..
9696
97-
make -j
97+
make -j2
9898
make install
9999
cd ..
100100

.gitignore

+1
Original file line numberDiff line numberDiff line change
@@ -105,3 +105,4 @@ sherpa-onnx-ced-*
105105
node_modules
106106
package-lock.json
107107
sherpa-onnx-nemo-*
108+
sherpa-onnx-vits-*

nodejs-addon-examples/README.md

+40
Original file line numberDiff line numberDiff line change
@@ -143,3 +143,43 @@ node ./test_asr_non_streaming_paraformer.js
143143
npm install naudiodon2
144144
node ./test_vad_asr_non_streaming_paraformer_microphone.js
145145
```
146+
147+
## Text-to-speech with piper VITS models (TTS)
148+
149+
```bash
150+
wget https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/vits-piper-en_GB-cori-medium.tar.bz2
151+
tar xvf vits-piper-en_GB-cori-medium.tar.bz2
152+
rm vits-piper-en_GB-cori-medium.tar.bz2
153+
154+
node ./test_tts_non_streaming_vits_piper_en.js
155+
```
156+
157+
## Text-to-speech with piper Coqui-ai/TTS models (TTS)
158+
159+
```bash
160+
wget https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/vits-coqui-de-css10.tar.bz2
161+
tar xvf vits-coqui-de-css10.tar.bz2
162+
rm vits-coqui-de-css10.tar.bz2
163+
164+
node ./test_tts_non_streaming_vits_coqui_de.js
165+
```
166+
167+
## Text-to-speech with vits Chinese models (1/2)
168+
169+
```bash
170+
wget https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/sherpa-onnx-vits-zh-ll.tar.bz2
171+
tar xvf sherpa-onnx-vits-zh-ll.tar.bz2
172+
rm sherpa-onnx-vits-zh-ll.tar.bz2
173+
174+
node ./test_tts_non_streaming_vits_zh_ll.js
175+
```
176+
177+
## Text-to-speech with vits Chinese models (2/2)
178+
179+
```bash
180+
wget https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/vits-icefall-zh-aishell3.tar.bz2
181+
tar xvf vits-icefall-zh-aishell3.tar.bz2
182+
rm vits-icefall-zh-aishell3.tar.bz2
183+
184+
node ./test_tts_non_streaming_vits_zh_aishell3.js
185+
```
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,43 @@
1+
// Copyright (c) 2024 Xiaomi Corporation
2+
const sherpa_onnx = require('sherpa-onnx-node');
3+
const performance = require('perf_hooks').performance;
4+
5+
// please download model files from
6+
// https://github.com/k2-fsa/sherpa-onnx/releases/tag/tts-models
7+
function createOfflineTts() {
8+
const config = {
9+
model: {
10+
vits: {
11+
model: './vits-coqui-de-css10/model.onnx',
12+
tokens: './vits-coqui-de-css10/tokens.txt',
13+
},
14+
debug: true,
15+
numThreads: 1,
16+
provider: 'cpu',
17+
},
18+
maxNumStences: 1,
19+
};
20+
return new sherpa_onnx.OfflineTts(config);
21+
}
22+
23+
const tts = createOfflineTts();
24+
25+
const text = 'Alles hat ein Ende, nur die Wurst hat zwei.'
26+
27+
let start = performance.now();
28+
const audio = tts.generate({text: text, sid: 0, speed: 1.0});
29+
let stop = performance.now();
30+
const elapsed_seconds = (stop - start) / 1000;
31+
const duration = audio.samples.length / audio.sampleRate;
32+
const real_time_factor = elapsed_seconds / duration;
33+
console.log('Wave duration', duration.toFixed(3), 'secodns')
34+
console.log('Elapsed', elapsed_seconds.toFixed(3), 'secodns')
35+
console.log(
36+
`RTF = ${elapsed_seconds.toFixed(3)}/${duration.toFixed(3)} =`,
37+
real_time_factor.toFixed(3))
38+
39+
const filename = 'test-coqui-de.wav';
40+
sherpa_onnx.writeWave(
41+
filename, {samples: audio.samples, sampleRate: audio.sampleRate});
42+
43+
console.log(`Saved to ${filename}`);
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,46 @@
1+
// Copyright (c) 2024 Xiaomi Corporation
2+
const sherpa_onnx = require('sherpa-onnx-node');
3+
const performance = require('perf_hooks').performance;
4+
5+
// please download model files from
6+
// https://github.com/k2-fsa/sherpa-onnx/releases/tag/tts-models
7+
function createOfflineTts() {
8+
const config = {
9+
model: {
10+
vits: {
11+
model: './vits-piper-en_GB-cori-medium/en_GB-cori-medium.onnx',
12+
tokens: './vits-piper-en_GB-cori-medium/tokens.txt',
13+
dataDir: './vits-piper-en_GB-cori-medium/espeak-ng-data',
14+
},
15+
debug: true,
16+
numThreads: 1,
17+
provider: 'cpu',
18+
},
19+
maxNumStences: 1,
20+
};
21+
return new sherpa_onnx.OfflineTts(config);
22+
}
23+
24+
const tts = createOfflineTts();
25+
26+
const text =
27+
'Today as always, men fall into two groups: slaves and free men. Whoever does not have two-thirds of his day for himself, is a slave, whatever he may be: a statesman, a businessman, an official, or a scholar.'
28+
29+
30+
let start = performance.now();
31+
const audio = tts.generate({text: text, sid: 0, speed: 1.0});
32+
let stop = performance.now();
33+
const elapsed_seconds = (stop - start) / 1000;
34+
const duration = audio.samples.length / audio.sampleRate;
35+
const real_time_factor = elapsed_seconds / duration;
36+
console.log('Wave duration', duration.toFixed(3), 'secodns')
37+
console.log('Elapsed', elapsed_seconds.toFixed(3), 'secodns')
38+
console.log(
39+
`RTF = ${elapsed_seconds.toFixed(3)}/${duration.toFixed(3)} =`,
40+
real_time_factor.toFixed(3))
41+
42+
const filename = 'test-piper-en.wav';
43+
sherpa_onnx.writeWave(
44+
filename, {samples: audio.samples, sampleRate: audio.sampleRate});
45+
46+
console.log(`Saved to ${filename}`);
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,48 @@
1+
// Copyright (c) 2024 Xiaomi Corporation
2+
const sherpa_onnx = require('sherpa-onnx-node');
3+
const performance = require('perf_hooks').performance;
4+
5+
// please download model files from
6+
// https://github.com/k2-fsa/sherpa-onnx/releases/tag/tts-models
7+
function createOfflineTts() {
8+
const config = {
9+
model: {
10+
vits: {
11+
model: './vits-icefall-zh-aishell3/model.onnx',
12+
tokens: './vits-icefall-zh-aishell3/tokens.txt',
13+
lexicon: './vits-icefall-zh-aishell3/lexicon.txt',
14+
},
15+
debug: true,
16+
numThreads: 1,
17+
provider: 'cpu',
18+
},
19+
maxNumStences: 1,
20+
ruleFsts:
21+
'./vits-icefall-zh-aishell3/date.fst,./vits-icefall-zh-aishell3/phone.fst,./vits-icefall-zh-aishell3/number.fst,./vits-icefall-zh-aishell3/new_heteronym.fst',
22+
ruleFars: './vits-icefall-zh-aishell3/rule.far',
23+
};
24+
return new sherpa_onnx.OfflineTts(config);
25+
}
26+
27+
const tts = createOfflineTts();
28+
29+
const text =
30+
'他在长沙出生,长白山长大,去过长江,现在他是一个银行的行长,主管行政工作。有困难,请拨110,或者13020240513。今天是2024年5月13号, 他上个月的工资是12345块钱。'
31+
32+
let start = performance.now();
33+
const audio = tts.generate({text: text, sid: 88, speed: 1.0});
34+
let stop = performance.now();
35+
const elapsed_seconds = (stop - start) / 1000;
36+
const duration = audio.samples.length / audio.sampleRate;
37+
const real_time_factor = elapsed_seconds / duration;
38+
console.log('Wave duration', duration.toFixed(3), 'secodns')
39+
console.log('Elapsed', elapsed_seconds.toFixed(3), 'secodns')
40+
console.log(
41+
`RTF = ${elapsed_seconds.toFixed(3)}/${duration.toFixed(3)} =`,
42+
real_time_factor.toFixed(3))
43+
44+
const filename = 'test-zh-aishell3.wav';
45+
sherpa_onnx.writeWave(
46+
filename, {samples: audio.samples, sampleRate: audio.sampleRate});
47+
48+
console.log(`Saved to ${filename}`);
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,48 @@
1+
// Copyright (c) 2024 Xiaomi Corporation
2+
const sherpa_onnx = require('sherpa-onnx-node');
3+
const performance = require('perf_hooks').performance;
4+
5+
// please download model files from
6+
// https://github.com/k2-fsa/sherpa-onnx/releases/tag/tts-models
7+
function createOfflineTts() {
8+
const config = {
9+
model: {
10+
vits: {
11+
model: './sherpa-onnx-vits-zh-ll/model.onnx',
12+
tokens: './sherpa-onnx-vits-zh-ll/tokens.txt',
13+
lexicon: './sherpa-onnx-vits-zh-ll/lexicon.txt',
14+
dictDir: './sherpa-onnx-vits-zh-ll/dict',
15+
},
16+
debug: true,
17+
numThreads: 1,
18+
provider: 'cpu',
19+
},
20+
maxNumStences: 1,
21+
ruleFsts:
22+
'./sherpa-onnx-vits-zh-ll/date.fst,./sherpa-onnx-vits-zh-ll/phone.fst,./sherpa-onnx-vits-zh-ll/number.fst',
23+
};
24+
return new sherpa_onnx.OfflineTts(config);
25+
}
26+
27+
const tts = createOfflineTts();
28+
29+
const text =
30+
'当夜幕降临,星光点点,伴随着微风拂面,我在静谧中感受着时光的流转,思念如涟漪荡漾,梦境如画卷展开,我与自然融为一体,沉静在这片宁静的美丽之中,感受着生命的奇迹与温柔。2024年5月13号,拨打110或者18920240513。123456块钱。'
31+
32+
let start = performance.now();
33+
const audio = tts.generate({text: text, sid: 2, speed: 1.0});
34+
let stop = performance.now();
35+
const elapsed_seconds = (stop - start) / 1000;
36+
const duration = audio.samples.length / audio.sampleRate;
37+
const real_time_factor = elapsed_seconds / duration;
38+
console.log('Wave duration', duration.toFixed(3), 'secodns')
39+
console.log('Elapsed', elapsed_seconds.toFixed(3), 'secodns')
40+
console.log(
41+
`RTF = ${elapsed_seconds.toFixed(3)}/${duration.toFixed(3)} =`,
42+
real_time_factor.toFixed(3))
43+
44+
const filename = 'test-zh-ll.wav';
45+
sherpa_onnx.writeWave(
46+
filename, {samples: audio.samples, sampleRate: audio.sampleRate});
47+
48+
console.log(`Saved to ${filename}`);

nodejs-addon-examples/test_vad_asr_non_streaming_nemo_ctc_microphone.js

+1-1
Original file line numberDiff line numberDiff line change
@@ -99,7 +99,7 @@ ai.on('data', data => {
9999
.split(' ')[0]}.wav`;
100100
sherpa_onnx.writeWave(
101101
filename,
102-
{samples: segment.samples, sampleRate: vad.config.sampleRate})
102+
{samples: segment.samples, sampleRate: vad.config.sampleRate});
103103

104104
index += 1;
105105
}

nodejs-addon-examples/test_vad_asr_non_streaming_paraformer_microphone.js

+1-1
Original file line numberDiff line numberDiff line change
@@ -97,7 +97,7 @@ ai.on('data', data => {
9797
.split(' ')[0]}.wav`;
9898
sherpa_onnx.writeWave(
9999
filename,
100-
{samples: segment.samples, sampleRate: vad.config.sampleRate})
100+
{samples: segment.samples, sampleRate: vad.config.sampleRate});
101101

102102
index += 1;
103103
}

nodejs-addon-examples/test_vad_asr_non_streaming_transducer_microphone.js

+1-1
Original file line numberDiff line numberDiff line change
@@ -102,7 +102,7 @@ ai.on('data', data => {
102102
.split(' ')[0]}.wav`;
103103
sherpa_onnx.writeWave(
104104
filename,
105-
{samples: segment.samples, sampleRate: vad.config.sampleRate})
105+
{samples: segment.samples, sampleRate: vad.config.sampleRate});
106106

107107
index += 1;
108108
}

nodejs-addon-examples/test_vad_asr_non_streaming_whisper_microphone.js

+1-1
Original file line numberDiff line numberDiff line change
@@ -98,7 +98,7 @@ ai.on('data', data => {
9898
.split(' ')[0]}.wav`;
9999
sherpa_onnx.writeWave(
100100
filename,
101-
{samples: segment.samples, sampleRate: vad.config.sampleRate})
101+
{samples: segment.samples, sampleRate: vad.config.sampleRate});
102102

103103
index += 1;
104104
}

nodejs-addon-examples/test_vad_microphone.js

+1-1
Original file line numberDiff line numberDiff line change
@@ -71,7 +71,7 @@ ai.on('data', data => {
7171
.split(' ')[0]}.wav`;
7272
sherpa_onnx.writeWave(
7373
filename,
74-
{samples: segment.samples, sampleRate: vad.config.sampleRate})
74+
{samples: segment.samples, sampleRate: vad.config.sampleRate});
7575
const duration = segment.samples.length / vad.config.sampleRate;
7676
console.log(`${index} End of speech. Duration: ${duration} seconds`);
7777
console.log(`Saved to ${filename}`);

scripts/node-addon-api/CMakeLists.txt

+1
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,7 @@ include_directories(${CMAKE_JS_INC})
1919

2020
set(srcs
2121
src/non-streaming-asr.cc
22+
src/non-streaming-tts.cc
2223
src/sherpa-onnx-node-addon-api.cc
2324
src/streaming-asr.cc
2425
src/vad.cc

scripts/node-addon-api/lib/addon.js

+2-2
Original file line numberDiff line numberDiff line change
@@ -25,8 +25,8 @@ for (const p of possible_paths) {
2525
}
2626

2727
if (!found) {
28-
let msg =
29-
`Could not find sherpa-onnx. Tried\n\n ${possible_paths.join('\n ')}\n`
28+
let msg = `Could not find sherpa-onnx-node. Tried\n\n ${
29+
possible_paths.join('\n ')}\n`
3030
if (os.platform() == 'darwin' && process.env.DYLD_LIBRARY_PATH &&
3131
!process.env.DYLD_LIBRARY_PATH.includes(
3232
`node_modules/sherpa-onnx-${platform_arch}`)) {
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,25 @@
1+
const addon = require('./addon.js');
2+
3+
class OfflineTts {
4+
constructor(config) {
5+
this.handle = addon.createOfflineTts(config);
6+
this.config = config;
7+
8+
this.numSpeakers = addon.getOfflineTtsNumSpeakers(this.handle);
9+
this.sampleRate = addon.getOfflineTtsSampleRate(this.handle);
10+
}
11+
12+
/*
13+
input obj: {text: "xxxx", sid: 0, speed: 1.0}
14+
where text is a string, sid is a int32, speed is a float
15+
16+
return an object {samples: Float32Array, sampleRate: <a number>}
17+
*/
18+
generate(obj) {
19+
return addon.offlineTtsGenerate(this.handle, obj);
20+
}
21+
}
22+
23+
module.exports = {
24+
OfflineTts,
25+
}

0 commit comments

Comments
 (0)