Skip to content

Commit b2ba12e

Browse files
committed
Add Java API for Matcha-TTS models.
1 parent f457bae commit b2ba12e

11 files changed

+359
-2
lines changed

.github/workflows/run-java-test.yaml

+7
Original file line numberDiff line numberDiff line change
@@ -235,6 +235,13 @@ jobs:
235235
shell: bash
236236
run: |
237237
cd ./java-api-examples
238+
239+
./run-non-streaming-tts-matcha-zh.sh
240+
./run-non-streaming-tts-matcha-en.sh
241+
242+
rm -rf matcha-icefall-*
243+
rm hifigan_v2.onnx
244+
238245
./run-non-streaming-tts-piper-en.sh
239246
rm -rf vits-piper-*
240247
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,60 @@
1+
// Copyright 2025 Xiaomi Corporation
2+
3+
// This file shows how to use a matcha English model
4+
// to convert text to speech
5+
import com.k2fsa.sherpa.onnx.*;
6+
7+
public class NonStreamingTtsMatchaEn {
8+
public static void main(String[] args) {
9+
// please visit
10+
// https://k2-fsa.github.io/sherpa/onnx/tts/pretrained_models/matcha.html#matcha-icefall-en-us-ljspeech-american-english-1-female-speaker
11+
// to download model files
12+
String acousticModel = "./matcha-icefall-en_US-ljspeech/model-steps-3.onnx";
13+
String vocoder = "./hifigan_v2.onnx";
14+
String tokens = "./matcha-icefall-en_US-ljspeech/tokens.txt";
15+
String dataDir = "./matcha-icefall-en_US-ljspeech/espeak-ng-data";
16+
String text =
17+
"Today as always, men fall into two groups: slaves and free men. Whoever does not have"
18+
+ " two-thirds of his day for himself, is a slave, whatever he may be: a statesman, a"
19+
+ " businessman, an official, or a scholar.";
20+
21+
OfflineTtsMatchaModelConfig matchaModelConfig =
22+
OfflineTtsMatchaModelConfig.builder()
23+
.setAcousticModel(acousticModel)
24+
.setVocoder(vocoder)
25+
.setTokens(tokens)
26+
.setDataDir(dataDir)
27+
.build();
28+
29+
OfflineTtsModelConfig modelConfig =
30+
OfflineTtsModelConfig.builder()
31+
.setMatcha(matchaModelConfig)
32+
.setNumThreads(1)
33+
.setDebug(true)
34+
.build();
35+
36+
OfflineTtsConfig config = OfflineTtsConfig.builder().setModel(modelConfig).build();
37+
OfflineTts tts = new OfflineTts(config);
38+
39+
int sid = 0;
40+
float speed = 1.0f;
41+
long start = System.currentTimeMillis();
42+
GeneratedAudio audio = tts.generate(text, sid, speed);
43+
long stop = System.currentTimeMillis();
44+
45+
float timeElapsedSeconds = (stop - start) / 1000.0f;
46+
47+
float audioDuration = audio.getSamples().length / (float) audio.getSampleRate();
48+
float real_time_factor = timeElapsedSeconds / audioDuration;
49+
50+
String waveFilename = "tts-matcha-en.wav";
51+
audio.save(waveFilename);
52+
System.out.printf("-- elapsed : %.3f seconds\n", timeElapsedSeconds);
53+
System.out.printf("-- audio duration: %.3f seconds\n", timeElapsedSeconds);
54+
System.out.printf("-- real-time factor (RTF): %.3f\n", real_time_factor);
55+
System.out.printf("-- text: %s\n", text);
56+
System.out.printf("-- Saved to %s\n", waveFilename);
57+
58+
tts.release();
59+
}
60+
}
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,66 @@
1+
// Copyright 2025 Xiaomi Corporation
2+
3+
// This file shows how to use a matcha Chinese TTS model
4+
// to convert text to speech
5+
import com.k2fsa.sherpa.onnx.*;
6+
7+
public class NonStreamingTtsMatchaZh {
8+
public static void main(String[] args) {
9+
// please visit
10+
// https://k2-fsa.github.io/sherpa/onnx/tts/pretrained_models/matcha.html#matcha-icefall-zh-baker-chinese-1-female-speaker
11+
// to download model files
12+
String acousticModel = "./matcha-icefall-zh-baker/model-steps-3.onnx";
13+
String vocoder = "./hifigan_v2.onnx";
14+
String tokens = "./matcha-icefall-zh-baker/tokens.txt";
15+
String lexicon = "./matcha-icefall-zh-baker/lexicon.txt";
16+
String dictDir = "./matcha-icefall-zh-baker/dict";
17+
String ruleFsts =
18+
"./matcha-icefall-zh-baker/phone.fst,./matcha-icefall-zh-baker/date.fst,./matcha-icefall-zh-baker/number.fst";
19+
String text =
20+
"某某银行的副行长和一些行政领导表示,他们去过长江"
21+
+ "和长白山; 经济不断增长。"
22+
+ "2024年12月31号,拨打110或者18920240511。"
23+
+ "123456块钱。";
24+
25+
OfflineTtsMatchaModelConfig matchaModelConfig =
26+
OfflineTtsMatchaModelConfig.builder()
27+
.setAcousticModel(acousticModel)
28+
.setVocoder(vocoder)
29+
.setTokens(tokens)
30+
.setLexicon(lexicon)
31+
.setDictDir(dictDir)
32+
.build();
33+
34+
OfflineTtsModelConfig modelConfig =
35+
OfflineTtsModelConfig.builder()
36+
.setMatcha(matchaModelConfig)
37+
.setNumThreads(1)
38+
.setDebug(true)
39+
.build();
40+
41+
OfflineTtsConfig config =
42+
OfflineTtsConfig.builder().setModel(modelConfig).setRuleFsts(ruleFsts).build();
43+
OfflineTts tts = new OfflineTts(config);
44+
45+
int sid = 0;
46+
float speed = 1.0f;
47+
long start = System.currentTimeMillis();
48+
GeneratedAudio audio = tts.generate(text, sid, speed);
49+
long stop = System.currentTimeMillis();
50+
51+
float timeElapsedSeconds = (stop - start) / 1000.0f;
52+
53+
float audioDuration = audio.getSamples().length / (float) audio.getSampleRate();
54+
float real_time_factor = timeElapsedSeconds / audioDuration;
55+
56+
String waveFilename = "tts-matcha-zh.wav";
57+
audio.save(waveFilename);
58+
System.out.printf("-- elapsed : %.3f seconds\n", timeElapsedSeconds);
59+
System.out.printf("-- audio duration: %.3f seconds\n", timeElapsedSeconds);
60+
System.out.printf("-- real-time factor (RTF): %.3f\n", real_time_factor);
61+
System.out.printf("-- text: %s\n", text);
62+
System.out.printf("-- Saved to %s\n", waveFilename);
63+
64+
tts.release();
65+
}
66+
}
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
/Users/fangjun/open-source/icefall-models/matcha-icefall-en_US-ljspeech
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,45 @@
1+
#!/usr/bin/env bash
2+
3+
set -ex
4+
5+
if [[ ! -f ../build/lib/libsherpa-onnx-jni.dylib && ! -f ../build/lib/libsherpa-onnx-jni.so ]]; then
6+
mkdir -p ../build
7+
pushd ../build
8+
cmake \
9+
-DSHERPA_ONNX_ENABLE_PYTHON=OFF \
10+
-DSHERPA_ONNX_ENABLE_TESTS=OFF \
11+
-DSHERPA_ONNX_ENABLE_CHECK=OFF \
12+
-DBUILD_SHARED_LIBS=ON \
13+
-DSHERPA_ONNX_ENABLE_PORTAUDIO=OFF \
14+
-DSHERPA_ONNX_ENABLE_JNI=ON \
15+
..
16+
17+
make -j4
18+
ls -lh lib
19+
popd
20+
fi
21+
22+
if [ ! -f ../sherpa-onnx/java-api/build/sherpa-onnx.jar ]; then
23+
pushd ../sherpa-onnx/java-api
24+
make
25+
popd
26+
fi
27+
28+
# please visit
29+
# https://k2-fsa.github.io/sherpa/onnx/tts/pretrained_models/matcha.html#matcha-icefall-en-us-ljspeech-american-english-1-female-speaker
30+
# matcha.html#matcha-icefall-en-us-ljspeech-american-english-1-female-speaker
31+
# to download more models
32+
if [ ! -f ./matcha-icefall-en_US-ljspeech/model-steps-3.onnx ]; then
33+
curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/matcha-icefall-en_US-ljspeech.tar.bz2
34+
tar xvf matcha-icefall-en_US-ljspeech.tar.bz2
35+
rm matcha-icefall-en_US-ljspeech.tar.bz2
36+
fi
37+
38+
if [ ! -f ./hifigan_v2.onnx ]; then
39+
curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/vocoder-models/hifigan_v2.onnx
40+
fi
41+
42+
java \
43+
-Djava.library.path=$PWD/../build/lib \
44+
-cp ../sherpa-onnx/java-api/build/sherpa-onnx.jar \
45+
NonStreamingTtsMatchaEn.java
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,44 @@
1+
#!/usr/bin/env bash
2+
3+
set -ex
4+
5+
if [[ ! -f ../build/lib/libsherpa-onnx-jni.dylib && ! -f ../build/lib/libsherpa-onnx-jni.so ]]; then
6+
mkdir -p ../build
7+
pushd ../build
8+
cmake \
9+
-DSHERPA_ONNX_ENABLE_PYTHON=OFF \
10+
-DSHERPA_ONNX_ENABLE_TESTS=OFF \
11+
-DSHERPA_ONNX_ENABLE_CHECK=OFF \
12+
-DBUILD_SHARED_LIBS=ON \
13+
-DSHERPA_ONNX_ENABLE_PORTAUDIO=OFF \
14+
-DSHERPA_ONNX_ENABLE_JNI=ON \
15+
..
16+
17+
make -j4
18+
ls -lh lib
19+
popd
20+
fi
21+
22+
if [ ! -f ../sherpa-onnx/java-api/build/sherpa-onnx.jar ]; then
23+
pushd ../sherpa-onnx/java-api
24+
make
25+
popd
26+
fi
27+
28+
# please visit
29+
# https://k2-fsa.github.io/sherpa/onnx/tts/pretrained_models/matcha.html#matcha-icefall-zh-baker-chinese-1-female-speaker
30+
# to download more models
31+
if [ ! -f ./matcha-icefall-zh-baker/model-steps-3.onnx ]; then
32+
curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/matcha-icefall-zh-baker.tar.bz2
33+
tar xvf matcha-icefall-zh-baker.tar.bz2
34+
rm matcha-icefall-zh-baker.tar.bz2
35+
fi
36+
37+
if [ ! -f ./hifigan_v2.onnx ]; then
38+
curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/vocoder-models/hifigan_v2.onnx
39+
fi
40+
41+
java \
42+
-Djava.library.path=$PWD/../build/lib \
43+
-cp ../sherpa-onnx/java-api/build/sherpa-onnx.jar \
44+
NonStreamingTtsMatchaZh.java

sherpa-onnx/csrc/piper-phonemize-lexicon.cc

+5
Original file line numberDiff line numberDiff line change
@@ -366,6 +366,11 @@ template PiperPhonemizeLexicon::PiperPhonemizeLexicon(
366366
#endif
367367

368368
#if __OHOS__
369+
template PiperPhonemizeLexicon::PiperPhonemizeLexicon(
370+
NativeResourceManager *mgr, const std::string &tokens,
371+
const std::string &data_dir,
372+
const OfflineTtsVitsModelMetaData &vits_meta_data);
373+
369374
template PiperPhonemizeLexicon::PiperPhonemizeLexicon(
370375
NativeResourceManager *mgr, const std::string &tokens,
371376
const std::string &data_dir,

sherpa-onnx/java-api/Makefile

+1
Original file line numberDiff line numberDiff line change
@@ -35,6 +35,7 @@ java_files += OfflineRecognizerResult.java
3535
java_files += OfflineStream.java
3636
java_files += OfflineRecognizer.java
3737

38+
java_files += OfflineTtsMatchaModelConfig.java
3839
java_files += OfflineTtsVitsModelConfig.java
3940
java_files += OfflineTtsModelConfig.java
4041
java_files += OfflineTtsConfig.java
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,116 @@
1+
// Copyright 2025 Xiaomi Corporation
2+
3+
package com.k2fsa.sherpa.onnx;
4+
5+
public class OfflineTtsMatchaModelConfig {
6+
private final String acousticModel;
7+
private final String vocoder;
8+
private final String lexicon;
9+
private final String tokens;
10+
private final String dataDir;
11+
private final String dictDir;
12+
private final float noiseScale;
13+
private final float lengthScale;
14+
15+
private OfflineTtsMatchaModelConfig(Builder builder) {
16+
this.acousticModel = builder.acousticModel;
17+
this.vocoder = builder.vocoder;
18+
this.lexicon = builder.lexicon;
19+
this.tokens = builder.tokens;
20+
this.dataDir = builder.dataDir;
21+
this.dictDir = builder.dictDir;
22+
this.noiseScale = builder.noiseScale;
23+
this.lengthScale = builder.lengthScale;
24+
}
25+
26+
public static Builder builder() {
27+
return new Builder();
28+
}
29+
30+
public String getAcousticModel() {
31+
return acousticModel;
32+
}
33+
34+
public String getVocoder() {
35+
return vocoder;
36+
}
37+
38+
public String getLexicon() {
39+
return lexicon;
40+
}
41+
42+
public String getTokens() {
43+
return tokens;
44+
}
45+
46+
public String getDataDir() {
47+
return dataDir;
48+
}
49+
50+
public String getDictDir() {
51+
return dictDir;
52+
}
53+
54+
public float getLengthScale() {
55+
return lengthScale;
56+
}
57+
58+
public float getNoiseScale() {
59+
return noiseScale;
60+
}
61+
62+
public static class Builder {
63+
private String acousticModel = "";
64+
private String vocoder = "";
65+
private String lexicon = "";
66+
private String tokens = "";
67+
private String dataDir = "";
68+
private String dictDir = "";
69+
private float noiseScale = 1.0f;
70+
private float lengthScale = 1.0f;
71+
72+
public OfflineTtsMatchaModelConfig build() {
73+
return new OfflineTtsMatchaModelConfig(this);
74+
}
75+
76+
public Builder setAcousticModel(String acousticModel) {
77+
this.acousticModel = acousticModel;
78+
return this;
79+
}
80+
81+
public Builder setVocoder(String vocoder) {
82+
this.vocoder = vocoder;
83+
return this;
84+
}
85+
86+
public Builder setTokens(String tokens) {
87+
this.tokens = tokens;
88+
return this;
89+
}
90+
91+
public Builder setLexicon(String lexicon) {
92+
this.lexicon = lexicon;
93+
return this;
94+
}
95+
96+
public Builder setDataDir(String dataDir) {
97+
this.dataDir = dataDir;
98+
return this;
99+
}
100+
101+
public Builder setDictDir(String dictDir) {
102+
this.dictDir = dictDir;
103+
return this;
104+
}
105+
106+
public Builder setNoiseScale(float noiseScale) {
107+
this.noiseScale = noiseScale;
108+
return this;
109+
}
110+
111+
public Builder setLengthScale(float lengthScale) {
112+
this.lengthScale = lengthScale;
113+
return this;
114+
}
115+
}
116+
}

0 commit comments

Comments
 (0)