Skip to content

Commit e52d32b

Browse files
authored
Add TTS API and examples for Dart (#1010)
1 parent e307767 commit e52d32b

20 files changed

+874
-0
lines changed

.github/scripts/test-dart.sh

+16
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,22 @@ set -ex
44

55
cd dart-api-examples
66

7+
pushd tts
8+
9+
echo '----------piper tts----------'
10+
./run-piper.sh
11+
rm -rf vits-piper-*
12+
13+
echo '----------coqui tts----------'
14+
./run-coqui.sh
15+
rm -rf vits-coqui-*
16+
17+
echo '----------zh tts----------'
18+
./run-zh.sh
19+
rm -rf sherpa-onnx-*
20+
21+
popd # tts
22+
723
pushd streaming-asr
824

925
echo '----------streaming zipformer ctc HLG----------'

.github/workflows/test-dart.yaml

+1
Original file line numberDiff line numberDiff line change
@@ -92,5 +92,6 @@ jobs:
9292
cp scripts/dart/vad-pubspec.yaml dart-api-examples/vad/pubspec.yaml
9393
cp scripts/dart/non-streaming-asr-pubspec.yaml dart-api-examples/non-streaming-asr/pubspec.yaml
9494
cp scripts/dart/streaming-asr-pubspec.yaml dart-api-examples/streaming-asr/pubspec.yaml
95+
cp scripts/dart/tts-pubspec.yaml dart-api-examples/tts/pubspec.yaml
9596
9697
.github/scripts/test-dart.sh

dart-api-examples/streaming-asr/README.md

+1
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
# Introduction
22

33
This folder contains examples for streaming ASR with Dart API.
4+
45
| File | Description|
56
|------|------------|
67
|[./bin/nemo-transducer.dart](./bin/nemo-transducer.dart)| Use a NeMo transducer model for speech recognition. See [./run-nemo-transducer.sh](./run-nemo-transducer.sh)|

dart-api-examples/tts/.gitignore

+3
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,3 @@
1+
# https://dart.dev/guides/libraries/private-files
2+
# Created by `dart pub`
3+
.dart_tool/

dart-api-examples/tts/CHANGELOG.md

+3
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,3 @@
1+
## 1.0.0
2+
3+
- Initial version.

dart-api-examples/tts/README.md

+10
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,10 @@
1+
# Introduction
2+
3+
This folder contains examples for text to speech with Dart API.
4+
5+
| File | Description|
6+
|------|------------|
7+
|[./bin/piper.dart](./bin/piper.dart)| Use a Piper tts model for text to speech. See [./run-piper.sh](./run-piper.sh)|
8+
|[./bin/coqui.dart](./bin/coqui.dart)| Use a Coqui tts model for text to speech. See [./run-coqui.sh](./run-coqui.sh)|
9+
|[./bin/zh.dart](./bin/zh.dart)| Use a Chinese VITS tts model for text to speech. See [./run-zh.sh](./run-zh.sh)|
10+
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,30 @@
1+
# This file configures the static analysis results for your project (errors,
2+
# warnings, and lints).
3+
#
4+
# This enables the 'recommended' set of lints from `package:lints`.
5+
# This set helps identify many issues that may lead to problems when running
6+
# or consuming Dart code, and enforces writing Dart using a single, idiomatic
7+
# style and format.
8+
#
9+
# If you want a smaller set of lints you can change this to specify
10+
# 'package:lints/core.yaml'. These are just the most critical lints
11+
# (the recommended set includes the core lints).
12+
# The core lints are also what is used by pub.dev for scoring packages.
13+
14+
include: package:lints/recommended.yaml
15+
16+
# Uncomment the following section to specify additional rules.
17+
18+
# linter:
19+
# rules:
20+
# - camel_case_types
21+
22+
# analyzer:
23+
# exclude:
24+
# - path/to/excluded/files/**
25+
26+
# For more information about the core and recommended set of lints, see
27+
# https://dart.dev/go/core-lints
28+
29+
# For additional information about configuring this file, see
30+
# https://dart.dev/guides/language/analysis-options

dart-api-examples/tts/bin/coqui.dart

+69
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,69 @@
1+
// Copyright (c) 2024 Xiaomi Corporation
2+
import 'dart:io';
3+
import 'dart:typed_data';
4+
5+
import 'package:args/args.dart';
6+
import 'package:sherpa_onnx/sherpa_onnx.dart' as sherpa_onnx;
7+
8+
import './init.dart';
9+
10+
void main(List<String> arguments) async {
11+
await initSherpaOnnx();
12+
13+
final parser = ArgParser()
14+
..addOption('model', help: 'Path to the ONNX model')
15+
..addOption('tokens', help: 'Path to tokens.txt')
16+
..addOption('text', help: 'Text to generate TTS for')
17+
..addOption('output-wav', help: 'Filename to save the generated audio')
18+
..addOption('speed', help: 'Speech speed', defaultsTo: '1.0')
19+
..addOption(
20+
'sid',
21+
help: 'Speaker ID to select. Used only for multi-speaker TTS',
22+
defaultsTo: '0',
23+
);
24+
final res = parser.parse(arguments);
25+
if (res['model'] == null ||
26+
res['tokens'] == null ||
27+
res['output-wav'] == null ||
28+
res['text'] == null) {
29+
print(parser.usage);
30+
exit(1);
31+
}
32+
final model = res['model'] as String;
33+
final tokens = res['tokens'] as String;
34+
final text = res['text'] as String;
35+
final outputWav = res['output-wav'] as String;
36+
var speed = double.tryParse(res['speed'] as String) ?? 1.0;
37+
final sid = int.tryParse(res['sid'] as String) ?? 0;
38+
39+
if (speed == 0) {
40+
speed = 1.0;
41+
}
42+
43+
final vits = sherpa_onnx.OfflineTtsVitsModelConfig(
44+
model: model,
45+
tokens: tokens,
46+
lengthScale: 1 / speed,
47+
);
48+
49+
final modelConfig = sherpa_onnx.OfflineTtsModelConfig(
50+
vits: vits,
51+
numThreads: 1,
52+
debug: true,
53+
);
54+
final config = sherpa_onnx.OfflineTtsConfig(
55+
model: modelConfig,
56+
maxNumSenetences: 1,
57+
);
58+
59+
final tts = sherpa_onnx.OfflineTts(config);
60+
final audio = tts.generate(text: text, sid: sid, speed: speed);
61+
tts.free();
62+
63+
sherpa_onnx.writeWave(
64+
filename: outputWav,
65+
samples: audio.samples,
66+
sampleRate: audio.sampleRate,
67+
);
68+
print('Saved to ${outputWav}');
69+
}

dart-api-examples/tts/bin/init.dart

+1
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
../../vad/bin/init.dart

dart-api-examples/tts/bin/piper.dart

+80
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,80 @@
1+
// Copyright (c) 2024 Xiaomi Corporation
2+
import 'dart:io';
3+
import 'dart:typed_data';
4+
5+
import 'package:args/args.dart';
6+
import 'package:sherpa_onnx/sherpa_onnx.dart' as sherpa_onnx;
7+
8+
import './init.dart';
9+
10+
void main(List<String> arguments) async {
11+
await initSherpaOnnx();
12+
13+
final parser = ArgParser()
14+
..addOption('model', help: 'Path to the ONNX model')
15+
..addOption('tokens', help: 'Path to tokens.txt')
16+
..addOption('data-dir', help: 'Path to espeak-ng-data directory')
17+
..addOption('text', help: 'Text to generate TTS for')
18+
..addOption('output-wav', help: 'Filename to save the generated audio')
19+
..addOption('speed', help: 'Speech speed', defaultsTo: '1.0')
20+
..addOption(
21+
'sid',
22+
help: 'Speaker ID to select. Used only for multi-speaker TTS',
23+
defaultsTo: '0',
24+
);
25+
final res = parser.parse(arguments);
26+
if (res['model'] == null ||
27+
res['tokens'] == null ||
28+
res['data-dir'] == null ||
29+
res['output-wav'] == null ||
30+
res['text'] == null) {
31+
print(parser.usage);
32+
exit(1);
33+
}
34+
final model = res['model'] as String;
35+
final tokens = res['tokens'] as String;
36+
final dataDir = res['data-dir'] as String;
37+
final text = res['text'] as String;
38+
final outputWav = res['output-wav'] as String;
39+
var speed = double.tryParse(res['speed'] as String) ?? 1.0;
40+
final sid = int.tryParse(res['sid'] as String) ?? 0;
41+
42+
if (speed == 0) {
43+
speed = 1.0;
44+
}
45+
46+
final vits = sherpa_onnx.OfflineTtsVitsModelConfig(
47+
model: model,
48+
tokens: tokens,
49+
dataDir: dataDir,
50+
lengthScale: 1 / speed,
51+
);
52+
53+
final modelConfig = sherpa_onnx.OfflineTtsModelConfig(
54+
vits: vits,
55+
numThreads: 1,
56+
debug: true,
57+
);
58+
final config = sherpa_onnx.OfflineTtsConfig(
59+
model: modelConfig,
60+
maxNumSenetences: 1,
61+
);
62+
63+
final tts = sherpa_onnx.OfflineTts(config);
64+
final audio = tts.generateWithCallback(
65+
text: text,
66+
sid: sid,
67+
speed: speed,
68+
callback: (Float32List samples) {
69+
print('${samples.length} samples received');
70+
// You can play samples in a separate thread/isolate
71+
});
72+
tts.free();
73+
74+
sherpa_onnx.writeWave(
75+
filename: outputWav,
76+
samples: audio.samples,
77+
sampleRate: audio.sampleRate,
78+
);
79+
print('Saved to ${outputWav}');
80+
}

dart-api-examples/tts/bin/zh.dart

+86
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,86 @@
1+
// Copyright (c) 2024 Xiaomi Corporation
2+
import 'dart:io';
3+
import 'dart:typed_data';
4+
5+
import 'package:args/args.dart';
6+
import 'package:sherpa_onnx/sherpa_onnx.dart' as sherpa_onnx;
7+
8+
import './init.dart';
9+
10+
void main(List<String> arguments) async {
11+
await initSherpaOnnx();
12+
13+
final parser = ArgParser()
14+
..addOption('model', help: 'Path to the ONNX model')
15+
..addOption('tokens', help: 'Path to tokens.txt')
16+
..addOption('lexicon', help: 'Path to lexicon.txt')
17+
..addOption(
18+
'dict-dir',
19+
help: 'Path to jieba dict directory',
20+
defaultsTo: '',
21+
)
22+
..addOption('rule-fsts', help: 'Path to rule fsts', defaultsTo: '')
23+
..addOption('rule-fars', help: 'Path to rule fars', defaultsTo: '')
24+
..addOption('text', help: 'Text to generate TTS for')
25+
..addOption('output-wav', help: 'Filename to save the generated audio')
26+
..addOption('speed', help: 'Speech speed', defaultsTo: '1.0')
27+
..addOption(
28+
'sid',
29+
help: 'Speaker ID to select. Used only for multi-speaker TTS',
30+
defaultsTo: '0',
31+
);
32+
final res = parser.parse(arguments);
33+
if (res['model'] == null ||
34+
res['lexicon'] == null ||
35+
res['tokens'] == null ||
36+
res['output-wav'] == null ||
37+
res['text'] == null) {
38+
print(parser.usage);
39+
exit(1);
40+
}
41+
final model = res['model'] as String;
42+
final lexicon = res['lexicon'] as String;
43+
final tokens = res['tokens'] as String;
44+
final dictDir = res['dict-dir'] as String;
45+
final ruleFsts = res['rule-fsts'] as String;
46+
final ruleFars = res['rule-fars'] as String;
47+
final text = res['text'] as String;
48+
final outputWav = res['output-wav'] as String;
49+
var speed = double.tryParse(res['speed'] as String) ?? 1.0;
50+
final sid = int.tryParse(res['sid'] as String) ?? 0;
51+
52+
if (speed == 0) {
53+
speed = 1.0;
54+
}
55+
56+
final vits = sherpa_onnx.OfflineTtsVitsModelConfig(
57+
model: model,
58+
lexicon: lexicon,
59+
tokens: tokens,
60+
dictDir: dictDir,
61+
lengthScale: 1 / speed,
62+
);
63+
64+
final modelConfig = sherpa_onnx.OfflineTtsModelConfig(
65+
vits: vits,
66+
numThreads: 1,
67+
debug: true,
68+
);
69+
final config = sherpa_onnx.OfflineTtsConfig(
70+
model: modelConfig,
71+
maxNumSenetences: 1,
72+
ruleFsts: ruleFsts,
73+
ruleFars: ruleFars,
74+
);
75+
76+
final tts = sherpa_onnx.OfflineTts(config);
77+
final audio = tts.generate(text: text, sid: sid, speed: speed);
78+
tts.free();
79+
80+
sherpa_onnx.writeWave(
81+
filename: outputWav,
82+
samples: audio.samples,
83+
sampleRate: audio.sampleRate,
84+
);
85+
print('Saved to ${outputWav}');
86+
}

0 commit comments

Comments
 (0)