Skip to content

Commit 960eb75

Browse files
authored
Add C++ runtime for MeloTTS (#1138)
1 parent 9548541 commit 960eb75

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

51 files changed

+693
-156
lines changed

.github/workflows/export-melo-tts-to-onnx.yaml

+10
Original file line numberDiff line numberDiff line change
@@ -63,10 +63,16 @@ jobs:
6363
echo "pwd: $PWD"
6464
ls -lh ../scripts/melo-tts
6565
66+
rm -rf ./
67+
6668
cp -v ../scripts/melo-tts/*.onnx .
6769
cp -v ../scripts/melo-tts/lexicon.txt .
6870
cp -v ../scripts/melo-tts/tokens.txt .
71+
cp -v ../scripts/melo-tts/README.md .
72+
73+
curl -SL -O https://raw.githubusercontent.com/myshell-ai/MeloTTS/main/LICENSE
6974
75+
curl -SL -O https://huggingface.co/csukuangfj/icefall-tts-aishell3-vits-low-2024-04-06/resolve/main/data/new_heteronym.fst
7076
curl -SL -O https://huggingface.co/csukuangfj/icefall-tts-aishell3-vits-low-2024-04-06/resolve/main/data/date.fst
7177
curl -SL -O https://huggingface.co/csukuangfj/icefall-tts-aishell3-vits-low-2024-04-06/resolve/main/data/number.fst
7278
curl -SL -O https://huggingface.co/csukuangfj/icefall-tts-aishell3-vits-low-2024-04-06/resolve/main/data/phone.fst
@@ -77,6 +83,10 @@ jobs:
7783
git lfs track "*.onnx"
7884
git add .
7985
86+
ls -lh
87+
88+
git status
89+
8090
git commit -m "add models"
8191
git push https://csukuangfj:$HF_TOKEN@huggingface.co/csukuangfj/vits-melo-tts-zh_en main || true
8292

.github/workflows/windows-x64-jni.yaml

+7-3
Original file line numberDiff line numberDiff line change
@@ -39,10 +39,14 @@ jobs:
3939
cd build
4040
cmake \
4141
-A x64 \
42-
-D CMAKE_BUILD_TYPE=Release \
43-
-D BUILD_SHARED_LIBS=ON \
42+
-DBUILD_SHARED_LIBS=ON \
4443
-D SHERPA_ONNX_ENABLE_JNI=ON \
45-
-D CMAKE_INSTALL_PREFIX=./install \
44+
-DCMAKE_INSTALL_PREFIX=./install \
45+
-DCMAKE_BUILD_TYPE=Release \
46+
-DSHERPA_ONNX_ENABLE_WEBSOCKET=OFF \
47+
-DBUILD_ESPEAK_NG_EXE=OFF \
48+
-DSHERPA_ONNX_BUILD_C_API_EXAMPLES=OFF \
49+
-DSHERPA_ONNX_ENABLE_BINARY=ON \
4650
..
4751
4852
- name: Build sherpa-onnx for windows

CHANGELOG.md

+4
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,7 @@
1+
## 1.10.16
2+
3+
* Support zh-en TTS model from MeloTTS.
4+
15
## 1.10.15
26

37
* Downgrade onnxruntime from v1.18.1 to v1.17.1

CMakeLists.txt

+1-1
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,7 @@ project(sherpa-onnx)
1111
# ./nodejs-addon-examples
1212
# ./dart-api-examples/
1313
# ./CHANGELOG.md
14-
set(SHERPA_ONNX_VERSION "1.10.15")
14+
set(SHERPA_ONNX_VERSION "1.10.16")
1515

1616
# Disable warning about
1717
#

dart-api-examples/non-streaming-asr/pubspec.yaml

+1-1
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,7 @@ environment:
1010

1111
# Add regular dependencies here.
1212
dependencies:
13-
sherpa_onnx: ^1.10.15
13+
sherpa_onnx: ^1.10.16
1414
path: ^1.9.0
1515
args: ^2.5.0
1616

dart-api-examples/streaming-asr/pubspec.yaml

+1-1
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,7 @@ environment:
1111

1212
# Add regular dependencies here.
1313
dependencies:
14-
sherpa_onnx: ^1.10.15
14+
sherpa_onnx: ^1.10.16
1515
path: ^1.9.0
1616
args: ^2.5.0
1717

dart-api-examples/tts/pubspec.yaml

+1-1
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,7 @@ environment:
88

99
# Add regular dependencies here.
1010
dependencies:
11-
sherpa_onnx: ^1.10.15
11+
sherpa_onnx: ^1.10.16
1212
path: ^1.9.0
1313
args: ^2.5.0
1414

dart-api-examples/vad/pubspec.yaml

+1-1
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,7 @@ environment:
99
sdk: ^3.4.0
1010

1111
dependencies:
12-
sherpa_onnx: ^1.10.15
12+
sherpa_onnx: ^1.10.16
1313
path: ^1.9.0
1414
args: ^2.5.0
1515

flutter-examples/streaming_asr/pubspec.yaml

+2-2
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,7 @@ description: >
55
66
publish_to: 'none'
77

8-
version: 1.10.14
8+
version: 1.10.16
99

1010
topics:
1111
- speech-recognition
@@ -30,7 +30,7 @@ dependencies:
3030
record: ^5.1.0
3131
url_launcher: ^6.2.6
3232

33-
sherpa_onnx: ^1.10.15
33+
sherpa_onnx: ^1.10.16
3434
# sherpa_onnx:
3535
# path: ../../flutter/sherpa_onnx
3636

flutter-examples/tts/pubspec.yaml

+2-2
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,7 @@ description: >
55
66
publish_to: 'none' # Remove this line if you wish to publish to pub.dev
77

8-
version: 1.0.0
8+
version: 1.10.16
99

1010
environment:
1111
sdk: '>=3.4.0 <4.0.0'
@@ -17,7 +17,7 @@ dependencies:
1717
cupertino_icons: ^1.0.6
1818
path_provider: ^2.1.3
1919
path: ^1.9.0
20-
sherpa_onnx: ^1.10.15
20+
sherpa_onnx: ^1.10.16
2121
url_launcher: ^6.2.6
2222
audioplayers: ^5.0.0
2323

flutter/sherpa_onnx/pubspec.yaml

+6-6
Original file line numberDiff line numberDiff line change
@@ -17,7 +17,7 @@ topics:
1717
- voice-activity-detection
1818

1919
# remember to change the version in ../sherpa_onnx_macos/macos/sherpa_onnx_macos.podspec
20-
version: 1.10.15
20+
version: 1.10.16
2121

2222
homepage: https://github.com/k2-fsa/sherpa-onnx
2323

@@ -30,19 +30,19 @@ dependencies:
3030
flutter:
3131
sdk: flutter
3232

33-
sherpa_onnx_android: ^1.10.15
33+
sherpa_onnx_android: ^1.10.16
3434
# path: ../sherpa_onnx_android
3535

36-
sherpa_onnx_macos: ^1.10.15
36+
sherpa_onnx_macos: ^1.10.16
3737
# path: ../sherpa_onnx_macos
3838

39-
sherpa_onnx_linux: ^1.10.15
39+
sherpa_onnx_linux: ^1.10.16
4040
# path: ../sherpa_onnx_linux
4141
#
42-
sherpa_onnx_windows: ^1.10.15
42+
sherpa_onnx_windows: ^1.10.16
4343
# path: ../sherpa_onnx_windows
4444

45-
sherpa_onnx_ios: ^1.10.15
45+
sherpa_onnx_ios: ^1.10.16
4646
# sherpa_onnx_ios:
4747
# path: ../sherpa_onnx_ios
4848

flutter/sherpa_onnx_ios/ios/sherpa_onnx_ios.podspec

+1-1
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,7 @@
77
# https://groups.google.com/g/dart-ffi/c/nUATMBy7r0c
88
Pod::Spec.new do |s|
99
s.name = 'sherpa_onnx_ios'
10-
s.version = '1.10.15'
10+
s.version = '1.10.16'
1111
s.summary = 'A new Flutter FFI plugin project.'
1212
s.description = <<-DESC
1313
A new Flutter FFI plugin project.

flutter/sherpa_onnx_macos/macos/sherpa_onnx_macos.podspec

+1-1
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,7 @@
44
#
55
Pod::Spec.new do |s|
66
s.name = 'sherpa_onnx_macos'
7-
s.version = '1.10.15'
7+
s.version = '1.10.16'
88
s.summary = 'sherpa-onnx Flutter FFI plugin project.'
99
s.description = <<-DESC
1010
sherpa-onnx Flutter FFI plugin project.

nodejs-addon-examples/package.json

+1-1
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
{
22
"dependencies": {
3-
"sherpa-onnx-node": "^1.10.15"
3+
"sherpa-onnx-node": "^1.10.16"
44
}
55
}

scripts/apk/build-apk-tts-engine.sh.in

+4
Original file line numberDiff line numberDiff line change
@@ -78,6 +78,10 @@ sed -i.bak s/"lang = null"/"lang = \"$lang_iso_639_3\""/ ./TtsEngine.kt
7878
git diff
7979
popd
8080

81+
if [[ $model_dir == vits-melo-tts-zh_en ]]; then
82+
lang=zh_en
83+
fi
84+
8185
for arch in arm64-v8a armeabi-v7a x86_64 x86; do
8286
log "------------------------------------------------------------"
8387
log "build tts apk for $arch"

scripts/apk/build-apk-tts.sh.in

+4
Original file line numberDiff line numberDiff line change
@@ -76,6 +76,10 @@ sed -i.bak s/"modelName = null"/"modelName = \"$model_name\""/ ./MainActivity.kt
7676
git diff
7777
popd
7878

79+
if [[ $model_dir == vits-melo-tts-zh_en ]]; then
80+
lang=zh_en
81+
fi
82+
7983
for arch in arm64-v8a armeabi-v7a x86_64 x86; do
8084
log "------------------------------------------------------------"
8185
log "build tts apk for $arch"

scripts/apk/generate-tts-apk-script.py

+13-5
Original file line numberDiff line numberDiff line change
@@ -312,6 +312,11 @@ def get_vits_models() -> List[TtsModel]:
312312
model_name="vits-zh-hf-fanchen-wnj.onnx",
313313
lang="zh",
314314
),
315+
TtsModel(
316+
model_dir="vits-melo-tts-zh_en",
317+
model_name="model.onnx",
318+
lang="zh",
319+
),
315320
TtsModel(
316321
model_dir="vits-zh-hf-fanchen-C",
317322
model_name="vits-zh-hf-fanchen-C.onnx",
@@ -339,18 +344,21 @@ def get_vits_models() -> List[TtsModel]:
339344
),
340345
]
341346

342-
rule_fsts = ["phone.fst", "date.fst", "number.fst", "new_heteronym.fst"]
347+
rule_fsts = ["phone.fst", "date.fst", "number.fst"]
343348
for m in chinese_models:
344349
s = [f"{m.model_dir}/{r}" for r in rule_fsts]
345-
if "vits-zh-hf" in m.model_dir or "sherpa-onnx-vits-zh-ll" == m.model_dir:
350+
if (
351+
"vits-zh-hf" in m.model_dir
352+
or "sherpa-onnx-vits-zh-ll" == m.model_dir
353+
or "melo-tts" in m.model_dir
354+
):
346355
s = s[:-1]
347356
m.dict_dir = m.model_dir + "/dict"
357+
else:
358+
m.rule_fars = f"{m.model_dir}/rule.far"
348359

349360
m.rule_fsts = ",".join(s)
350361

351-
if "vits-zh-hf" not in m.model_dir and "zh-ll" not in m.model_dir:
352-
m.rule_fars = f"{m.model_dir}/rule.far"
353-
354362
all_models = chinese_models + [
355363
TtsModel(
356364
model_dir="vits-cantonese-hf-xiaomaiiwn",

scripts/dart/sherpa-onnx-pubspec.yaml

+1-1
Original file line numberDiff line numberDiff line change
@@ -17,7 +17,7 @@ topics:
1717
- voice-activity-detection
1818

1919
# remember to change the version in ../sherpa_onnx_macos/macos/sherpa_onnx.podspec
20-
version: 1.10.15
20+
version: 1.10.16
2121

2222
homepage: https://github.com/k2-fsa/sherpa-onnx
2323

scripts/flutter/generate-tts.py

+18-17
Original file line numberDiff line numberDiff line change
@@ -6,9 +6,6 @@
66

77
import jinja2
88

9-
# pip install iso639-lang
10-
from iso639 import Lang
11-
129

1310
def get_args():
1411
parser = argparse.ArgumentParser()
@@ -37,13 +34,6 @@ class TtsModel:
3734
data_dir: Optional[str] = None
3835
dict_dir: Optional[str] = None
3936
is_char: bool = False
40-
lang_iso_639_3: str = ""
41-
42-
43-
def convert_lang_to_iso_639_3(models: List[TtsModel]):
44-
for m in models:
45-
if m.lang_iso_639_3 == "":
46-
m.lang_iso_639_3 = Lang(m.lang).pt3
4737

4838

4939
def get_coqui_models() -> List[TtsModel]:
@@ -312,6 +302,11 @@ def get_vits_models() -> List[TtsModel]:
312302
model_name="vits-zh-hf-fanchen-wnj.onnx",
313303
lang="zh",
314304
),
305+
TtsModel(
306+
model_dir="vits-melo-tts-zh_en",
307+
model_name="model.onnx",
308+
lang="zh_en",
309+
),
315310
TtsModel(
316311
model_dir="vits-zh-hf-fanchen-C",
317312
model_name="vits-zh-hf-fanchen-C.onnx",
@@ -332,26 +327,33 @@ def get_vits_models() -> List[TtsModel]:
332327
model_name="vits-zh-hf-fanchen-unity.onnx",
333328
lang="zh",
334329
),
330+
TtsModel(
331+
model_dir="sherpa-onnx-vits-zh-ll",
332+
model_name="model.onnx",
333+
lang="zh",
334+
),
335335
]
336336

337-
rule_fsts = ["phone.fst", "date.fst", "number.fst", "new_heteronym.fst"]
337+
rule_fsts = ["phone.fst", "date.fst", "number.fst"]
338338
for m in chinese_models:
339339
s = [f"{m.model_dir}/{r}" for r in rule_fsts]
340-
if "vits-zh-hf" in m.model_dir:
340+
if (
341+
"vits-zh-hf" in m.model_dir
342+
or "sherpa-onnx-vits-zh-ll" == m.model_dir
343+
or "melo-tts" in m.model_dir
344+
):
341345
s = s[:-1]
342346
m.dict_dir = m.model_dir + "/dict"
347+
else:
348+
m.rule_fars = f"{m.model_dir}/rule.far"
343349

344350
m.rule_fsts = ",".join(s)
345351

346-
if "vits-zh-hf" not in m.model_dir:
347-
m.rule_fars = f"{m.model_dir}/rule.far"
348-
349352
all_models = chinese_models + [
350353
TtsModel(
351354
model_dir="vits-cantonese-hf-xiaomaiiwn",
352355
model_name="vits-cantonese-hf-xiaomaiiwn.onnx",
353356
lang="cantonese",
354-
lang_iso_639_3="yue",
355357
rule_fsts="vits-cantonese-hf-xiaomaiiwn/rule.fst",
356358
),
357359
# English (US)
@@ -374,7 +376,6 @@ def main():
374376
all_model_list += get_piper_models()
375377
all_model_list += get_mimic3_models()
376378
all_model_list += get_coqui_models()
377-
convert_lang_to_iso_639_3(all_model_list)
378379

379380
num_models = len(all_model_list)
380381

scripts/melo-tts/README.md

+6
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,6 @@
1+
# Introduction
2+
3+
Models in this directory are converted from
4+
https://github.com/myshell-ai/MeloTTS
5+
6+
Note there is only a single female speaker in the model.

scripts/melo-tts/export-onnx.py

+3-1
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,6 @@
88
from melo.text.chinese import pinyin_to_symbol_map
99
from melo.text.english import eng_dict, refine_syllables
1010
from pypinyin import Style, lazy_pinyin, phrases_dict, pinyin_dict
11-
from melo.text.symbols import language_tone_start_map
1211

1312
for k, v in pinyin_to_symbol_map.items():
1413
if isinstance(v, list):
@@ -82,6 +81,7 @@ def generate_tokens(symbol_list):
8281
def generate_lexicon():
8382
word_dict = pinyin_dict.pinyin_dict
8483
phrases = phrases_dict.phrases_dict
84+
eng_dict["kaldi"] = [["K", "AH0"], ["L", "D", "IH0"]]
8585
with open("lexicon.txt", "w", encoding="utf-8") as f:
8686
for word in eng_dict:
8787
phones, tones = refine_syllables(eng_dict[word])
@@ -237,9 +237,11 @@ def main():
237237
meta_data = {
238238
"model_type": "melo-vits",
239239
"comment": "melo",
240+
"version": 2,
240241
"language": "Chinese + English",
241242
"add_blank": int(model.hps.data.add_blank),
242243
"n_speakers": 1,
244+
"jieba": 1,
243245
"sample_rate": model.hps.data.sampling_rate,
244246
"bert_dim": 1024,
245247
"ja_bert_dim": 768,

scripts/melo-tts/run.sh

+1-1
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,7 @@ function install() {
1212
cd MeloTTS
1313
pip install -r ./requirements.txt
1414

15-
pip install soundfile onnx onnxruntime
15+
pip install soundfile onnx==1.15.0 onnxruntime==1.16.3
1616

1717
python3 -m unidic download
1818
popd

0 commit comments

Comments
 (0)