Skip to content

Commit ac5e0cd

Browse files
committed
Add CXX API for MatchaTTS models
1 parent 9aa4897 commit ac5e0cd

12 files changed

+403
-8
lines changed

.github/workflows/c-api.yaml

+8
Original file line numberDiff line numberDiff line change
@@ -101,6 +101,10 @@ jobs:
101101
102102
./matcha-tts-zh-c-api
103103
104+
rm ./matcha-tts-zh-c-api
105+
rm -rf matcha-icefall-*
106+
rm hifigan_v2.onnx
107+
104108
- name: Test Matcha TTS (en)
105109
shell: bash
106110
run: |
@@ -121,6 +125,10 @@ jobs:
121125
122126
./matcha-tts-en-c-api
123127
128+
rm ./matcha-tts-en-c-api
129+
rm -rf matcha-icefall-*
130+
rm hifigan_v2.onnx
131+
124132
- uses: actions/upload-artifact@v4
125133
with:
126134
name: matcha-tts-${{ matrix.os }}

.github/workflows/cxx-api.yaml

+55
Original file line numberDiff line numberDiff line change
@@ -83,6 +83,61 @@ jobs:
8383
otool -L ./install/lib/libsherpa-onnx-cxx-api.dylib
8484
fi
8585
86+
- name: Test Matcha TTS (zh)
87+
shell: bash
88+
run: |
89+
g++ -std=c++17 -o matcha-tts-zh-cxx-api ./cxx-api-examples/matcha-tts-zh-cxx-api.cc \
90+
-I ./build/install/include \
91+
-L ./build/install/lib/ \
92+
-l sherpa-onnx-cxx-api \
93+
-l sherpa-onnx-c-api \
94+
-l onnxruntime
95+
96+
curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/matcha-icefall-zh-baker.tar.bz2
97+
tar xvf matcha-icefall-zh-baker.tar.bz2
98+
rm matcha-icefall-zh-baker.tar.bz2
99+
100+
curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/vocoder-models/hifigan_v2.onnx
101+
102+
export LD_LIBRARY_PATH=$PWD/build/install/lib:$LD_LIBRARY_PATH
103+
export DYLD_LIBRARY_PATH=$PWD/build/install/lib:$DYLD_LIBRARY_PATH
104+
105+
./matcha-tts-zh-cxx-api
106+
107+
rm -rf matcha-icefall-*
108+
rm hifigan_v2.onnx
109+
rm matcha-tts-zh-cxx-api
110+
111+
- name: Test Matcha TTS (en)
112+
shell: bash
113+
run: |
114+
g++ -std=c++17 -o matcha-tts-en-cxx-api ./cxx-api-examples/matcha-tts-en-cxx-api.cc \
115+
-I ./build/install/include \
116+
-L ./build/install/lib/ \
117+
-l sherpa-onnx-cxx-api \
118+
-l sherpa-onnx-c-api \
119+
-l onnxruntime
120+
121+
curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/matcha-icefall-en_US-ljspeech.tar.bz2
122+
tar xvf matcha-icefall-en_US-ljspeech.tar.bz2
123+
rm matcha-icefall-en_US-ljspeech.tar.bz2
124+
125+
curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/vocoder-models/hifigan_v2.onnx
126+
127+
export LD_LIBRARY_PATH=$PWD/build/install/lib:$LD_LIBRARY_PATH
128+
export DYLD_LIBRARY_PATH=$PWD/build/install/lib:$DYLD_LIBRARY_PATH
129+
130+
./matcha-tts-en-cxx-api
131+
132+
rm matcha-tts-en-cxx-api
133+
rm -rf matcha-icefall-*
134+
rm hifigan_v2.onnx
135+
136+
- uses: actions/upload-artifact@v4
137+
with:
138+
name: matcha-tts-${{ matrix.os }}
139+
path: ./generated-matcha-*.wav
140+
86141
- name: Test Moonshine tiny
87142
shell: bash
88143
run: |

c-api-examples/matcha-tts-en-c-api.c

+1-1
Original file line numberDiff line numberDiff line change
@@ -60,7 +60,7 @@ int32_t main(int32_t argc, char *argv[]) {
6060
"Friends fell out often because life was changing so fast. The easiest "
6161
"thing in the world was to lose touch with someone.";
6262

63-
SherpaOnnxOfflineTts *tts = SherpaOnnxCreateOfflineTts(&config);
63+
const SherpaOnnxOfflineTts *tts = SherpaOnnxCreateOfflineTts(&config);
6464
int32_t sid = 0;
6565
float speed = 1.0; // larger -> faster in speech speed
6666

c-api-examples/matcha-tts-zh-c-api.c

+1-1
Original file line numberDiff line numberDiff line change
@@ -60,7 +60,7 @@ int32_t main(int32_t argc, char *argv[]) {
6060
"某某银行的副行长和一些行政领导表示,他们去过长江和长白山; "
6161
"经济不断增长。2024年12月31号,拨打110或者18920240511。123456块钱。";
6262

63-
SherpaOnnxOfflineTts *tts = SherpaOnnxCreateOfflineTts(&config);
63+
const SherpaOnnxOfflineTts *tts = SherpaOnnxCreateOfflineTts(&config);
6464
int32_t sid = 0;
6565
float speed = 1.0; // larger -> faster in speech speed
6666

c-api-examples/offline-tts-c-api.c

+1-1
Original file line numberDiff line numberDiff line change
@@ -229,7 +229,7 @@ int32_t main(int32_t argc, char *argv[]) {
229229
ShowUsage();
230230
}
231231

232-
SherpaOnnxOfflineTts *tts = SherpaOnnxCreateOfflineTts(&config);
232+
const SherpaOnnxOfflineTts *tts = SherpaOnnxCreateOfflineTts(&config);
233233

234234
const SherpaOnnxGeneratedAudio *audio =
235235
SherpaOnnxOfflineTtsGenerate(tts, text, sid, 1.0);

cxx-api-examples/CMakeLists.txt

+8
Original file line numberDiff line numberDiff line change
@@ -14,3 +14,11 @@ target_link_libraries(moonshine-cxx-api sherpa-onnx-cxx-api)
1414

1515
add_executable(sense-voice-cxx-api ./sense-voice-cxx-api.cc)
1616
target_link_libraries(sense-voice-cxx-api sherpa-onnx-cxx-api)
17+
18+
if(SHERPA_ONNX_ENABLE_TTS)
19+
add_executable(matcha-tts-zh-cxx-api ./matcha-tts-zh-cxx-api.cc)
20+
target_link_libraries(matcha-tts-zh-cxx-api sherpa-onnx-cxx-api)
21+
22+
add_executable(matcha-tts-en-cxx-api ./matcha-tts-en-cxx-api.cc)
23+
target_link_libraries(matcha-tts-en-cxx-api sherpa-onnx-cxx-api)
24+
endif()
+80
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,80 @@
1+
// cxx-api-examples/matcha-tts-en-cxx-api.c
2+
//
3+
// Copyright (c) 2025 Xiaomi Corporation
4+
5+
// This file shows how to use sherpa-onnx CXX API
6+
// for Chinese TTS with MatchaTTS.
7+
//
8+
// clang-format off
9+
/*
10+
Usage
11+
12+
wget https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/matcha-icefall-en_US-ljspeech.tar.bz2
13+
tar xvf matcha-icefall-en_US-ljspeech.tar.bz2
14+
rm matcha-icefall-en_US-ljspeech.tar.bz2
15+
16+
wget https://github.com/k2-fsa/sherpa-onnx/releases/download/vocoder-models/hifigan_v2.onnx
17+
18+
./matcha-tts-en-cxx-api
19+
20+
*/
21+
// clang-format on
22+
23+
#include <string>
24+
25+
#include "sherpa-onnx/c-api/cxx-api.h"
26+
27+
static int32_t ProgressCallback(const float *samples, int32_t num_samples,
28+
float progress, void *arg) {
29+
fprintf(stderr, "Progress: %.3f%%\n", progress * 100);
30+
// return 1 to continue generating
31+
// return 0 to stop generating
32+
return 1;
33+
}
34+
35+
int32_t main(int32_t argc, char *argv[]) {
36+
using namespace sherpa_onnx::cxx; // NOLINT
37+
OfflineTtsConfig config;
38+
39+
config.model.matcha.acoustic_model =
40+
"./matcha-icefall-en_US-ljspeech/model-steps-3.onnx";
41+
42+
config.model.matcha.vocoder = "./hifigan_v2.onnx";
43+
44+
config.model.matcha.tokens = "./matcha-icefall-en_US-ljspeech/tokens.txt";
45+
46+
config.model.matcha.data_dir =
47+
"./matcha-icefall-en_US-ljspeech/espeak-ng-data";
48+
49+
config.model.num_threads = 1;
50+
51+
// If you don't want to see debug messages, please set it to 0
52+
config.model.debug = 1;
53+
54+
std::string filename = "./generated-matcha-en-cxx.wav";
55+
std::string text =
56+
"Today as always, men fall into two groups: slaves and free men. Whoever "
57+
"does not have two-thirds of his day for himself, is a slave, whatever "
58+
"he may be: a statesman, a businessman, an official, or a scholar. "
59+
"Friends fell out often because life was changing so fast. The easiest "
60+
"thing in the world was to lose touch with someone.";
61+
62+
auto tts = OfflineTts::Create(config);
63+
int32_t sid = 0;
64+
float speed = 1.0; // larger -> faster in speech speed
65+
66+
#if 0
67+
// If you don't want to use a callback, then please enable this branch
68+
GeneratedAudio audio = tts.Generate(text, sid, speed);
69+
#else
70+
GeneratedAudio audio = tts.Generate(text, sid, speed, ProgressCallback);
71+
#endif
72+
73+
WriteWave(filename, {audio.samples, audio.sample_rate});
74+
75+
fprintf(stderr, "Input text is: %s\n", text.c_str());
76+
fprintf(stderr, "Speaker ID is is: %d\n", sid);
77+
fprintf(stderr, "Saved to: %s\n", filename.c_str());
78+
79+
return 0;
80+
}
+79
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,79 @@
1+
// cxx-api-examples/matcha-tts-zh-cxx-api.c
2+
//
3+
// Copyright (c) 2025 Xiaomi Corporation
4+
5+
// This file shows how to use sherpa-onnx CXX API
6+
// for Chinese TTS with MatchaTTS.
7+
//
8+
// clang-format off
9+
/*
10+
Usage
11+
12+
wget https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/matcha-icefall-zh-baker.tar.bz2
13+
tar xvf matcha-icefall-zh-baker.tar.bz2
14+
rm matcha-icefall-zh-baker.tar.bz2
15+
16+
wget https://github.com/k2-fsa/sherpa-onnx/releases/download/vocoder-models/hifigan_v2.onnx
17+
18+
./matcha-tts-zh-cxx-api
19+
20+
*/
21+
// clang-format on
22+
23+
#include <string>
24+
25+
#include "sherpa-onnx/c-api/cxx-api.h"
26+
27+
static int32_t ProgressCallback(const float *samples, int32_t num_samples,
28+
float progress, void *arg) {
29+
fprintf(stderr, "Progress: %.3f%%\n", progress * 100);
30+
// return 1 to continue generating
31+
// return 0 to stop generating
32+
return 1;
33+
}
34+
35+
int32_t main(int32_t argc, char *argv[]) {
36+
using namespace sherpa_onnx::cxx; // NOLINT
37+
OfflineTtsConfig config;
38+
config.model.matcha.acoustic_model =
39+
"./matcha-icefall-zh-baker/model-steps-3.onnx";
40+
config.model.matcha.vocoder = "./hifigan_v2.onnx";
41+
config.model.matcha.lexicon = "./matcha-icefall-zh-baker/lexicon.txt";
42+
config.model.matcha.tokens = "./matcha-icefall-zh-baker/tokens.txt";
43+
config.model.matcha.dict_dir = "./matcha-icefall-zh-baker/dict";
44+
config.model.num_threads = 1;
45+
46+
// If you don't want to see debug messages, please set it to 0
47+
config.model.debug = 1;
48+
49+
// clang-format off
50+
config.rule_fsts = "./matcha-icefall-zh-baker/phone.fst,./matcha-icefall-zh-baker/date.fst,./matcha-icefall-zh-baker/number.fst"; // NOLINT
51+
// clang-format on
52+
53+
std::string filename = "./generated-matcha-zh-cxx.wav";
54+
std::string text =
55+
"当夜幕降临,星光点点,伴随着微风拂面,我在静谧中感受着时光的流转,思念如"
56+
"涟漪荡漾,梦境如画卷展开,我与自然融为一体,沉静在这片宁静的美丽之中,感"
57+
"受着生命的奇迹与温柔."
58+
"某某银行的副行长和一些行政领导表示,他们去过长江和长白山; "
59+
"经济不断增长。2024年12月31号,拨打110或者18920240511。123456块钱。";
60+
61+
auto tts = OfflineTts::Create(config);
62+
int32_t sid = 0;
63+
float speed = 1.0; // larger -> faster in speech speed
64+
65+
#if 0
66+
// If you don't want to use a callback, then please enable this branch
67+
GeneratedAudio audio = tts.Generate(text, sid, speed);
68+
#else
69+
GeneratedAudio audio = tts.Generate(text, sid, speed, ProgressCallback);
70+
#endif
71+
72+
WriteWave(filename, {audio.samples, audio.sample_rate});
73+
74+
fprintf(stderr, "Input text is: %s\n", text.c_str());
75+
fprintf(stderr, "Speaker ID is is: %d\n", sid);
76+
fprintf(stderr, "Saved to: %s\n", filename.c_str());
77+
78+
return 0;
79+
}

sherpa-onnx/c-api/c-api.cc

+4-2
Original file line numberDiff line numberDiff line change
@@ -1114,7 +1114,7 @@ static sherpa_onnx::OfflineTtsConfig GetOfflineTtsConfig(
11141114
return tts_config;
11151115
}
11161116

1117-
SherpaOnnxOfflineTts *SherpaOnnxCreateOfflineTts(
1117+
const SherpaOnnxOfflineTts *SherpaOnnxCreateOfflineTts(
11181118
const SherpaOnnxOfflineTtsConfig *config) {
11191119
auto tts_config = GetOfflineTtsConfig(config);
11201120

@@ -1130,7 +1130,9 @@ SherpaOnnxOfflineTts *SherpaOnnxCreateOfflineTts(
11301130
return tts;
11311131
}
11321132

1133-
void SherpaOnnxDestroyOfflineTts(SherpaOnnxOfflineTts *tts) { delete tts; }
1133+
void SherpaOnnxDestroyOfflineTts(const SherpaOnnxOfflineTts *tts) {
1134+
delete tts;
1135+
}
11341136

11351137
int32_t SherpaOnnxOfflineTtsSampleRate(const SherpaOnnxOfflineTts *tts) {
11361138
return tts->impl->SampleRate();

sherpa-onnx/c-api/c-api.h

+3-3
Original file line numberDiff line numberDiff line change
@@ -950,11 +950,12 @@ SHERPA_ONNX_API typedef struct SherpaOnnxOfflineTts SherpaOnnxOfflineTts;
950950

951951
// Create an instance of offline TTS. The user has to use DestroyOfflineTts()
952952
// to free the returned pointer to avoid memory leak.
953-
SHERPA_ONNX_API SherpaOnnxOfflineTts *SherpaOnnxCreateOfflineTts(
953+
SHERPA_ONNX_API const SherpaOnnxOfflineTts *SherpaOnnxCreateOfflineTts(
954954
const SherpaOnnxOfflineTtsConfig *config);
955955

956956
// Free the pointer returned by SherpaOnnxCreateOfflineTts()
957-
SHERPA_ONNX_API void SherpaOnnxDestroyOfflineTts(SherpaOnnxOfflineTts *tts);
957+
SHERPA_ONNX_API void SherpaOnnxDestroyOfflineTts(
958+
const SherpaOnnxOfflineTts *tts);
958959

959960
// Return the sample rate of the current TTS object
960961
SHERPA_ONNX_API int32_t
@@ -984,7 +985,6 @@ SHERPA_ONNX_API
984985
const SherpaOnnxGeneratedAudio *
985986
SherpaOnnxOfflineTtsGenerateWithProgressCallback(
986987
const SherpaOnnxOfflineTts *tts, const char *text, int32_t sid, float speed,
987-
988988
SherpaOnnxGeneratedAudioProgressCallback callback);
989989

990990
SHERPA_ONNX_API

0 commit comments

Comments
 (0)