Skip to content

Commit 36ed364

Browse files
authored
Add Dart API for Moonshine models. (k2-fsa#1481)
1 parent 5f71c63 commit 36ed364

File tree

7 files changed

+302
-1
lines changed

7 files changed

+302
-1
lines changed

.github/scripts/test-dart.sh

+8
Original file line numberDiff line numberDiff line change
@@ -36,6 +36,10 @@ echo "----zipformer transducer----"
3636
./run-zipformer-transducer.sh
3737
rm -rf sherpa-onnx-*
3838

39+
echo "----moonshine----"
40+
./run-moonshine.sh
41+
rm -rf sherpa-onnx-*
42+
3943
echo "----whisper----"
4044
./run-whisper.sh
4145
rm -rf sherpa-onnx-*
@@ -77,6 +81,10 @@ echo '----------TeleSpeech CTC----------'
7781
./run-telespeech-ctc.sh
7882
rm -rf sherpa-onnx-*
7983

84+
echo '----------moonshine----------'
85+
./run-moonshine.sh
86+
rm -rf sherpa-onnx-*
87+
8088
echo '----------whisper----------'
8189
./run-whisper.sh
8290
rm -rf sherpa-onnx-*
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,69 @@
1+
// Copyright (c) 2024 Xiaomi Corporation
2+
import 'dart:io';
3+
4+
import 'package:args/args.dart';
5+
import 'package:sherpa_onnx/sherpa_onnx.dart' as sherpa_onnx;
6+
7+
import './init.dart';
8+
9+
void main(List<String> arguments) async {
10+
await initSherpaOnnx();
11+
12+
final parser = ArgParser()
13+
..addOption('preprocessor',
14+
help: 'Path to the moonshine preprocessor model')
15+
..addOption('encoder', help: 'Path to the moonshine encoder model')
16+
..addOption('uncached-decoder',
17+
help: 'Path to moonshine uncached decoder model')
18+
..addOption('cached-decoder',
19+
help: 'Path to moonshine cached decoder model')
20+
..addOption('tokens', help: 'Path to tokens.txt')
21+
..addOption('input-wav', help: 'Path to input.wav to transcribe');
22+
23+
final res = parser.parse(arguments);
24+
if (res['preprocessor'] == null ||
25+
res['encoder'] == null ||
26+
res['uncached-decoder'] == null ||
27+
res['cached-decoder'] == null ||
28+
res['tokens'] == null ||
29+
res['input-wav'] == null) {
30+
print(parser.usage);
31+
exit(1);
32+
}
33+
34+
final preprocessor = res['preprocessor'] as String;
35+
final encoder = res['encoder'] as String;
36+
final uncachedDecoder = res['uncached-decoder'] as String;
37+
final cachedDecoder = res['cached-decoder'] as String;
38+
final tokens = res['tokens'] as String;
39+
final inputWav = res['input-wav'] as String;
40+
41+
final moonshine = sherpa_onnx.OfflineMoonshineModelConfig(
42+
preprocessor: preprocessor,
43+
encoder: encoder,
44+
uncachedDecoder: uncachedDecoder,
45+
cachedDecoder: cachedDecoder,
46+
);
47+
48+
final modelConfig = sherpa_onnx.OfflineModelConfig(
49+
moonshine: moonshine,
50+
tokens: tokens,
51+
debug: false,
52+
numThreads: 1,
53+
);
54+
final config = sherpa_onnx.OfflineRecognizerConfig(model: modelConfig);
55+
final recognizer = sherpa_onnx.OfflineRecognizer(config);
56+
57+
final waveData = sherpa_onnx.readWave(inputWav);
58+
final stream = recognizer.createStream();
59+
60+
stream.acceptWaveform(
61+
samples: waveData.samples, sampleRate: waveData.sampleRate);
62+
recognizer.decode(stream);
63+
64+
final result = recognizer.getResult(stream);
65+
print(result.text);
66+
67+
stream.free();
68+
recognizer.free();
69+
}
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,20 @@
1+
#!/usr/bin/env bash
2+
3+
set -ex
4+
5+
dart pub get
6+
7+
if [ ! -f ./sherpa-onnx-moonshine-tiny-en-int8/tokens.txt ]; then
8+
curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-moonshine-tiny-en-int8.tar.bz2
9+
tar xvf sherpa-onnx-moonshine-tiny-en-int8.tar.bz2
10+
rm sherpa-onnx-moonshine-tiny-en-int8.tar.bz2
11+
fi
12+
13+
dart run \
14+
./bin/moonshine.dart \
15+
--preprocessor ./sherpa-onnx-moonshine-tiny-en-int8/preprocess.onnx \
16+
--encoder ./sherpa-onnx-moonshine-tiny-en-int8/encode.int8.onnx \
17+
--uncached-decoder ./sherpa-onnx-moonshine-tiny-en-int8/uncached_decode.int8.onnx \
18+
--cached-decoder ./sherpa-onnx-moonshine-tiny-en-int8/cached_decode.int8.onnx \
19+
--tokens ./sherpa-onnx-moonshine-tiny-en-int8/tokens.txt \
20+
--input-wav ./sherpa-onnx-moonshine-tiny-en-int8/test_wavs/0.wav
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,134 @@
1+
// Copyright (c) 2024 Xiaomi Corporation
2+
import 'dart:io';
3+
import 'dart:typed_data';
4+
5+
import 'package:args/args.dart';
6+
import 'package:sherpa_onnx/sherpa_onnx.dart' as sherpa_onnx;
7+
8+
import './init.dart';
9+
10+
void main(List<String> arguments) async {
11+
await initSherpaOnnx();
12+
13+
final parser = ArgParser()
14+
..addOption('silero-vad', help: 'Path to silero_vad.onnx')
15+
..addOption('preprocessor',
16+
help: 'Path to the moonshine preprocessor model')
17+
..addOption('encoder', help: 'Path to the moonshine encoder model')
18+
..addOption('uncached-decoder',
19+
help: 'Path to moonshine uncached decoder model')
20+
..addOption('cached-decoder',
21+
help: 'Path to moonshine cached decoder model')
22+
..addOption('tokens', help: 'Path to tokens.txt')
23+
..addOption('input-wav', help: 'Path to input.wav to transcribe');
24+
25+
final res = parser.parse(arguments);
26+
if (res['silero-vad'] == null ||
27+
res['preprocessor'] == null ||
28+
res['encoder'] == null ||
29+
res['uncached-decoder'] == null ||
30+
res['cached-decoder'] == null ||
31+
res['tokens'] == null ||
32+
res['input-wav'] == null) {
33+
print(parser.usage);
34+
exit(1);
35+
}
36+
37+
// create VAD
38+
final sileroVad = res['silero-vad'] as String;
39+
40+
final sileroVadConfig = sherpa_onnx.SileroVadModelConfig(
41+
model: sileroVad,
42+
minSilenceDuration: 0.25,
43+
minSpeechDuration: 0.5,
44+
maxSpeechDuration: 5.0,
45+
);
46+
47+
final vadConfig = sherpa_onnx.VadModelConfig(
48+
sileroVad: sileroVadConfig,
49+
numThreads: 1,
50+
debug: true,
51+
);
52+
53+
final vad = sherpa_onnx.VoiceActivityDetector(
54+
config: vadConfig, bufferSizeInSeconds: 10);
55+
56+
// create whisper recognizer
57+
final preprocessor = res['preprocessor'] as String;
58+
final encoder = res['encoder'] as String;
59+
final uncachedDecoder = res['uncached-decoder'] as String;
60+
final cachedDecoder = res['cached-decoder'] as String;
61+
final tokens = res['tokens'] as String;
62+
final inputWav = res['input-wav'] as String;
63+
64+
final moonshine = sherpa_onnx.OfflineMoonshineModelConfig(
65+
preprocessor: preprocessor,
66+
encoder: encoder,
67+
uncachedDecoder: uncachedDecoder,
68+
cachedDecoder: cachedDecoder,
69+
);
70+
final modelConfig = sherpa_onnx.OfflineModelConfig(
71+
moonshine: moonshine,
72+
tokens: tokens,
73+
debug: false,
74+
numThreads: 1,
75+
);
76+
final config = sherpa_onnx.OfflineRecognizerConfig(model: modelConfig);
77+
final recognizer = sherpa_onnx.OfflineRecognizer(config);
78+
79+
final waveData = sherpa_onnx.readWave(inputWav);
80+
if (waveData.sampleRate != 16000) {
81+
print('Only 16000 Hz is supported. Given: ${waveData.sampleRate}');
82+
exit(1);
83+
}
84+
85+
int numSamples = waveData.samples.length;
86+
int numIter = numSamples ~/ vadConfig.sileroVad.windowSize;
87+
88+
for (int i = 0; i != numIter; ++i) {
89+
int start = i * vadConfig.sileroVad.windowSize;
90+
vad.acceptWaveform(Float32List.sublistView(
91+
waveData.samples, start, start + vadConfig.sileroVad.windowSize));
92+
93+
while (!vad.isEmpty()) {
94+
final samples = vad.front().samples;
95+
final startTime = vad.front().start.toDouble() / waveData.sampleRate;
96+
final endTime =
97+
startTime + samples.length.toDouble() / waveData.sampleRate;
98+
99+
final stream = recognizer.createStream();
100+
stream.acceptWaveform(samples: samples, sampleRate: waveData.sampleRate);
101+
recognizer.decode(stream);
102+
103+
final result = recognizer.getResult(stream);
104+
stream.free();
105+
print(
106+
'${startTime.toStringAsPrecision(5)} -- ${endTime.toStringAsPrecision(5)} : ${result.text}');
107+
108+
vad.pop();
109+
}
110+
}
111+
112+
vad.flush();
113+
114+
while (!vad.isEmpty()) {
115+
final samples = vad.front().samples;
116+
final startTime = vad.front().start.toDouble() / waveData.sampleRate;
117+
final endTime = startTime + samples.length.toDouble() / waveData.sampleRate;
118+
119+
final stream = recognizer.createStream();
120+
stream.acceptWaveform(samples: samples, sampleRate: waveData.sampleRate);
121+
recognizer.decode(stream);
122+
123+
final result = recognizer.getResult(stream);
124+
stream.free();
125+
print(
126+
'${startTime.toStringAsPrecision(5)} -- ${endTime.toStringAsPrecision(5)} : ${result.text}');
127+
128+
vad.pop();
129+
}
130+
131+
vad.free();
132+
133+
recognizer.free();
134+
}
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,29 @@
1+
#!/usr/bin/env bash
2+
3+
set -ex
4+
5+
dart pub get
6+
7+
if [ ! -f ./sherpa-onnx-moonshine-tiny-en-int8/tokens.txt ]; then
8+
curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-moonshine-tiny-en-int8.tar.bz2
9+
tar xvf sherpa-onnx-moonshine-tiny-en-int8.tar.bz2
10+
rm sherpa-onnx-moonshine-tiny-en-int8.tar.bz2
11+
fi
12+
13+
if [ ! -f ./Obama.wav ]; then
14+
curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/Obama.wav
15+
fi
16+
17+
if [[ ! -f ./silero_vad.onnx ]]; then
18+
curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/silero_vad.onnx
19+
fi
20+
21+
dart run \
22+
./bin/moonshine.dart \
23+
--silero-vad ./silero_vad.onnx \
24+
--preprocessor ./sherpa-onnx-moonshine-tiny-en-int8/preprocess.onnx \
25+
--encoder ./sherpa-onnx-moonshine-tiny-en-int8/encode.int8.onnx \
26+
--uncached-decoder ./sherpa-onnx-moonshine-tiny-en-int8/uncached_decode.int8.onnx \
27+
--cached-decoder ./sherpa-onnx-moonshine-tiny-en-int8/cached_decode.int8.onnx \
28+
--tokens ./sherpa-onnx-moonshine-tiny-en-int8/tokens.txt \
29+
--input-wav ./Obama.wav

flutter/sherpa_onnx/lib/src/offline_recognizer.dart

+34-1
Original file line numberDiff line numberDiff line change
@@ -68,6 +68,24 @@ class OfflineWhisperModelConfig {
6868
final int tailPaddings;
6969
}
7070

71+
class OfflineMoonshineModelConfig {
72+
const OfflineMoonshineModelConfig(
73+
{this.preprocessor = '',
74+
this.encoder = '',
75+
this.uncachedDecoder = '',
76+
this.cachedDecoder = ''});
77+
78+
@override
79+
String toString() {
80+
return 'OfflineMoonshineModelConfig(preprocessor: $preprocessor, encoder: $encoder, uncachedDecoder: $uncachedDecoder, cachedDecoder: $cachedDecoder)';
81+
}
82+
83+
final String preprocessor;
84+
final String encoder;
85+
final String uncachedDecoder;
86+
final String cachedDecoder;
87+
}
88+
7189
class OfflineTdnnModelConfig {
7290
const OfflineTdnnModelConfig({this.model = ''});
7391

@@ -116,6 +134,7 @@ class OfflineModelConfig {
116134
this.whisper = const OfflineWhisperModelConfig(),
117135
this.tdnn = const OfflineTdnnModelConfig(),
118136
this.senseVoice = const OfflineSenseVoiceModelConfig(),
137+
this.moonshine = const OfflineMoonshineModelConfig(),
119138
required this.tokens,
120139
this.numThreads = 1,
121140
this.debug = true,
@@ -128,7 +147,7 @@ class OfflineModelConfig {
128147

129148
@override
130149
String toString() {
131-
return 'OfflineModelConfig(transducer: $transducer, paraformer: $paraformer, nemoCtc: $nemoCtc, whisper: $whisper, tdnn: $tdnn, senseVoice: $senseVoice, tokens: $tokens, numThreads: $numThreads, debug: $debug, provider: $provider, modelType: $modelType, modelingUnit: $modelingUnit, bpeVocab: $bpeVocab, telespeechCtc: $telespeechCtc)';
150+
return 'OfflineModelConfig(transducer: $transducer, paraformer: $paraformer, nemoCtc: $nemoCtc, whisper: $whisper, tdnn: $tdnn, senseVoice: $senseVoice, moonshine: $moonshine, tokens: $tokens, numThreads: $numThreads, debug: $debug, provider: $provider, modelType: $modelType, modelingUnit: $modelingUnit, bpeVocab: $bpeVocab, telespeechCtc: $telespeechCtc)';
132151
}
133152

134153
final OfflineTransducerModelConfig transducer;
@@ -137,6 +156,7 @@ class OfflineModelConfig {
137156
final OfflineWhisperModelConfig whisper;
138157
final OfflineTdnnModelConfig tdnn;
139158
final OfflineSenseVoiceModelConfig senseVoice;
159+
final OfflineMoonshineModelConfig moonshine;
140160

141161
final String tokens;
142162
final int numThreads;
@@ -257,6 +277,15 @@ class OfflineRecognizer {
257277
c.ref.model.senseVoice.useInverseTextNormalization =
258278
config.model.senseVoice.useInverseTextNormalization ? 1 : 0;
259279

280+
c.ref.model.moonshine.preprocessor =
281+
config.model.moonshine.preprocessor.toNativeUtf8();
282+
c.ref.model.moonshine.encoder =
283+
config.model.moonshine.encoder.toNativeUtf8();
284+
c.ref.model.moonshine.uncachedDecoder =
285+
config.model.moonshine.uncachedDecoder.toNativeUtf8();
286+
c.ref.model.moonshine.cachedDecoder =
287+
config.model.moonshine.cachedDecoder.toNativeUtf8();
288+
260289
c.ref.model.tokens = config.model.tokens.toNativeUtf8();
261290

262291
c.ref.model.numThreads = config.model.numThreads;
@@ -294,6 +323,10 @@ class OfflineRecognizer {
294323
calloc.free(c.ref.model.modelType);
295324
calloc.free(c.ref.model.provider);
296325
calloc.free(c.ref.model.tokens);
326+
calloc.free(c.ref.model.moonshine.cachedDecoder);
327+
calloc.free(c.ref.model.moonshine.uncachedDecoder);
328+
calloc.free(c.ref.model.moonshine.encoder);
329+
calloc.free(c.ref.model.moonshine.preprocessor);
297330
calloc.free(c.ref.model.senseVoice.language);
298331
calloc.free(c.ref.model.senseVoice.model);
299332
calloc.free(c.ref.model.tdnn.model);

flutter/sherpa_onnx/lib/src/sherpa_onnx_bindings.dart

+8
Original file line numberDiff line numberDiff line change
@@ -194,6 +194,13 @@ final class SherpaOnnxOfflineWhisperModelConfig extends Struct {
194194
external int tailPaddings;
195195
}
196196

197+
final class SherpaOnnxOfflineMoonshineModelConfig extends Struct {
198+
external Pointer<Utf8> preprocessor;
199+
external Pointer<Utf8> encoder;
200+
external Pointer<Utf8> uncachedDecoder;
201+
external Pointer<Utf8> cachedDecoder;
202+
}
203+
197204
final class SherpaOnnxOfflineTdnnModelConfig extends Struct {
198205
external Pointer<Utf8> model;
199206
}
@@ -236,6 +243,7 @@ final class SherpaOnnxOfflineModelConfig extends Struct {
236243
external Pointer<Utf8> telespeechCtc;
237244

238245
external SherpaOnnxOfflineSenseVoiceModelConfig senseVoice;
246+
external SherpaOnnxOfflineMoonshineModelConfig moonshine;
239247
}
240248

241249
final class SherpaOnnxOfflineRecognizerConfig extends Struct {

0 commit comments

Comments
 (0)