Skip to content

Commit 4335e2a

Browse files
authored
Add Dart API for Kokoro TTS models (#1723)
1 parent 2086f8c commit 4335e2a

File tree

5 files changed

+162
-1
lines changed

5 files changed

+162
-1
lines changed

.github/scripts/test-dart.sh

+1
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,7 @@ cd dart-api-examples
77
pushd tts
88

99
echo '----------matcha tts----------'
10+
./run-kokoro-en.sh
1011
./run-matcha-zh.sh
1112
./run-matcha-en.sh
1213
ls -lh *.wav
+86
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,86 @@
1+
// Copyright (c) 2025 Xiaomi Corporation
2+
import 'dart:io';
3+
4+
import 'package:args/args.dart';
5+
import 'package:sherpa_onnx/sherpa_onnx.dart' as sherpa_onnx;
6+
7+
import './init.dart';
8+
9+
void main(List<String> arguments) async {
10+
await initSherpaOnnx();
11+
12+
final parser = ArgParser()
13+
..addOption('model', help: 'Path to the onnx model')
14+
..addOption('voices', help: 'Path to the voices.bin')
15+
..addOption('tokens', help: 'Path to tokens.txt')
16+
..addOption(
17+
'data-dir',
18+
help: 'Path to espeak-ng-data directory',
19+
defaultsTo: '',
20+
)
21+
..addOption('rule-fsts', help: 'Path to rule fsts', defaultsTo: '')
22+
..addOption('rule-fars', help: 'Path to rule fars', defaultsTo: '')
23+
..addOption('text', help: 'Text to generate TTS for')
24+
..addOption('output-wav', help: 'Filename to save the generated audio')
25+
..addOption('speed', help: 'Speech speed', defaultsTo: '1.0')
26+
..addOption(
27+
'sid',
28+
help: 'Speaker ID to select. Used only for multi-speaker TTS',
29+
defaultsTo: '0',
30+
);
31+
final res = parser.parse(arguments);
32+
if (res['model'] == null ||
33+
res['voices'] == null ||
34+
res['tokens'] == null ||
35+
res['data-dir'] == null ||
36+
res['output-wav'] == null ||
37+
res['text'] == null) {
38+
print(parser.usage);
39+
exit(1);
40+
}
41+
final model = res['model'] as String;
42+
final voices = res['voices'] as String;
43+
final tokens = res['tokens'] as String;
44+
final dataDir = res['data-dir'] as String;
45+
final ruleFsts = res['rule-fsts'] as String;
46+
final ruleFars = res['rule-fars'] as String;
47+
final text = res['text'] as String;
48+
final outputWav = res['output-wav'] as String;
49+
var speed = double.tryParse(res['speed'] as String) ?? 1.0;
50+
final sid = int.tryParse(res['sid'] as String) ?? 0;
51+
52+
if (speed == 0) {
53+
speed = 1.0;
54+
}
55+
56+
final kokoro = sherpa_onnx.OfflineTtsKokoroModelConfig(
57+
model: model,
58+
voices: voices,
59+
tokens: tokens,
60+
dataDir: dataDir,
61+
lengthScale: 1 / speed,
62+
);
63+
64+
final modelConfig = sherpa_onnx.OfflineTtsModelConfig(
65+
kokoro: kokoro,
66+
numThreads: 1,
67+
debug: true,
68+
);
69+
final config = sherpa_onnx.OfflineTtsConfig(
70+
model: modelConfig,
71+
maxNumSenetences: 1,
72+
ruleFsts: ruleFsts,
73+
ruleFars: ruleFars,
74+
);
75+
76+
final tts = sherpa_onnx.OfflineTts(config);
77+
final audio = tts.generate(text: text, sid: sid, speed: speed);
78+
tts.free();
79+
80+
sherpa_onnx.writeWave(
81+
filename: outputWav,
82+
samples: audio.samples,
83+
sampleRate: audio.sampleRate,
84+
);
85+
print('Saved to $outputWav');
86+
}
+27
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,27 @@
1+
#!/usr/bin/env bash
2+
3+
set -ex
4+
5+
dart pub get
6+
7+
# please visit
8+
# https://k2-fsa.github.io/sherpa/onnx/tts/pretrained_models/kokoro.html
9+
# to download more models
10+
if [ ! -f ./kokoro-en-v0_19/model.onnx ]; then
11+
curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/kokoro-en-v0_19.tar.bz2
12+
tar xf kokoro-en-v0_19.tar.bz2
13+
rm kokoro-en-v0_19.tar.bz2
14+
fi
15+
16+
dart run \
17+
./bin/kokoro-en.dart \
18+
--model ./kokoro-en-v0_19/model.onnx \
19+
--voices ./kokoro-en-v0_19/voices.bin \
20+
--tokens ./kokoro-en-v0_19/tokens.txt \
21+
--data-dir ./kokoro-en-v0_19/espeak-ng-data \
22+
--sid 9 \
23+
--speed 1.0 \
24+
--output-wav kokoro-en-9.wav \
25+
--text "Friends fell out often because life was changing so fast. The easiest thing in the world was to lose touch with someone." \
26+
27+
ls -lh *.wav

flutter/sherpa_onnx/lib/src/sherpa_onnx_bindings.dart

+11
Original file line numberDiff line numberDiff line change
@@ -147,6 +147,16 @@ final class SherpaOnnxOfflineTtsMatchaModelConfig extends Struct {
147147
external Pointer<Utf8> dictDir;
148148
}
149149

150+
final class SherpaOnnxOfflineTtsKokoroModelConfig extends Struct {
151+
external Pointer<Utf8> model;
152+
external Pointer<Utf8> voices;
153+
external Pointer<Utf8> tokens;
154+
external Pointer<Utf8> dataDir;
155+
156+
@Float()
157+
external double lengthScale;
158+
}
159+
150160
final class SherpaOnnxOfflineTtsModelConfig extends Struct {
151161
external SherpaOnnxOfflineTtsVitsModelConfig vits;
152162
@Int32()
@@ -157,6 +167,7 @@ final class SherpaOnnxOfflineTtsModelConfig extends Struct {
157167

158168
external Pointer<Utf8> provider;
159169
external SherpaOnnxOfflineTtsMatchaModelConfig matcha;
170+
external SherpaOnnxOfflineTtsKokoroModelConfig kokoro;
160171
}
161172

162173
final class SherpaOnnxOfflineTtsConfig extends Struct {

flutter/sherpa_onnx/lib/src/tts.dart

+37-1
Original file line numberDiff line numberDiff line change
@@ -60,22 +60,45 @@ class OfflineTtsMatchaModelConfig {
6060
final String dictDir;
6161
}
6262

63+
class OfflineTtsKokoroModelConfig {
64+
const OfflineTtsKokoroModelConfig({
65+
this.model = '',
66+
this.voices = '',
67+
this.tokens = '',
68+
this.dataDir = '',
69+
this.lengthScale = 1.0,
70+
});
71+
72+
@override
73+
String toString() {
74+
return 'OfflineTtsKokoroModelConfig(model: $model, voices: $voices, tokens: $tokens, dataDir: $dataDir, lengthScale: $lengthScale)';
75+
}
76+
77+
final String model;
78+
final String voices;
79+
final String tokens;
80+
final String dataDir;
81+
final double lengthScale;
82+
}
83+
6384
class OfflineTtsModelConfig {
6485
const OfflineTtsModelConfig({
6586
this.vits = const OfflineTtsVitsModelConfig(),
6687
this.matcha = const OfflineTtsMatchaModelConfig(),
88+
this.kokoro = const OfflineTtsKokoroModelConfig(),
6789
this.numThreads = 1,
6890
this.debug = true,
6991
this.provider = 'cpu',
7092
});
7193

7294
@override
7395
String toString() {
74-
return 'OfflineTtsModelConfig(vits: $vits, matcha: $matcha, numThreads: $numThreads, debug: $debug, provider: $provider)';
96+
return 'OfflineTtsModelConfig(vits: $vits, matcha: $matcha, kokoro: $kokoro, numThreads: $numThreads, debug: $debug, provider: $provider)';
7597
}
7698

7799
final OfflineTtsVitsModelConfig vits;
78100
final OfflineTtsMatchaModelConfig matcha;
101+
final OfflineTtsKokoroModelConfig kokoro;
79102
final int numThreads;
80103
final bool debug;
81104
final String provider;
@@ -138,6 +161,12 @@ class OfflineTts {
138161
c.ref.model.matcha.lengthScale = config.model.matcha.lengthScale;
139162
c.ref.model.matcha.dictDir = config.model.matcha.dictDir.toNativeUtf8();
140163

164+
c.ref.model.kokoro.model = config.model.kokoro.model.toNativeUtf8();
165+
c.ref.model.kokoro.voices = config.model.kokoro.voices.toNativeUtf8();
166+
c.ref.model.kokoro.tokens = config.model.kokoro.tokens.toNativeUtf8();
167+
c.ref.model.kokoro.dataDir = config.model.kokoro.dataDir.toNativeUtf8();
168+
c.ref.model.kokoro.lengthScale = config.model.kokoro.lengthScale;
169+
141170
c.ref.model.numThreads = config.model.numThreads;
142171
c.ref.model.debug = config.model.debug ? 1 : 0;
143172
c.ref.model.provider = config.model.provider.toNativeUtf8();
@@ -151,12 +180,19 @@ class OfflineTts {
151180
calloc.free(c.ref.ruleFars);
152181
calloc.free(c.ref.ruleFsts);
153182
calloc.free(c.ref.model.provider);
183+
184+
calloc.free(c.ref.model.kokoro.dataDir);
185+
calloc.free(c.ref.model.kokoro.tokens);
186+
calloc.free(c.ref.model.kokoro.voices);
187+
calloc.free(c.ref.model.kokoro.model);
188+
154189
calloc.free(c.ref.model.matcha.dictDir);
155190
calloc.free(c.ref.model.matcha.dataDir);
156191
calloc.free(c.ref.model.matcha.tokens);
157192
calloc.free(c.ref.model.matcha.lexicon);
158193
calloc.free(c.ref.model.matcha.vocoder);
159194
calloc.free(c.ref.model.matcha.acousticModel);
195+
160196
calloc.free(c.ref.model.vits.dictDir);
161197
calloc.free(c.ref.model.vits.dataDir);
162198
calloc.free(c.ref.model.vits.tokens);

0 commit comments

Comments
 (0)