Skip to content

Commit 03c3c84

Browse files
authored
Add streaming ASR examples for Dart API (k2-fsa#1009)
1 parent 56ad7fc commit 03c3c84

30 files changed

+1021
-2
lines changed

.github/scripts/test-dart.sh

+25-1
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,30 @@ set -ex
44

55
cd dart-api-examples
66

7+
pushd streaming-asr
8+
9+
echo '----------streaming zipformer ctc HLG----------'
10+
./run-zipformer-ctc-hlg.sh
11+
rm -rf sherpa-onnx-*
12+
13+
echo '----------streaming zipformer ctc----------'
14+
./run-zipformer-ctc.sh
15+
rm -rf sherpa-onnx-*
16+
17+
echo '----------streaming zipformer transducer----------'
18+
./run-zipformer-transducer.sh
19+
rm -rf sherpa-onnx-*
20+
21+
echo '----------streaming NeMo transducer----------'
22+
./run-nemo-transducer.sh
23+
rm -rf sherpa-onnx-*
24+
25+
echo '----------streaming paraformer----------'
26+
./run-paraformer.sh
27+
rm -rf sherpa-onnx-*
28+
29+
popd # streaming-asr
30+
731
pushd non-streaming-asr
832

933
echo '----------VAD with paraformer----------'
@@ -34,7 +58,7 @@ echo '----------zipformer transducer----------'
3458
./run-zipformer-transducer.sh
3559
rm -rf sherpa-onnx-*
3660

37-
popd
61+
popd # non-streaming-asr
3862

3963
pushd vad
4064
./run.sh

.github/workflows/test-dart.yaml

+3
Original file line numberDiff line numberDiff line change
@@ -6,12 +6,14 @@ on:
66
- master
77
paths:
88
- '.github/workflows/test-dart.yaml'
9+
- '.github/scripts/test-dart.sh'
910
- 'dart-api-examples/**'
1011
pull_request:
1112
branches:
1213
- master
1314
paths:
1415
- '.github/workflows/test-dart.yaml'
16+
- '.github/scripts/test-dart.sh'
1517
- 'dart-api-examples/**'
1618

1719
workflow_dispatch:
@@ -89,5 +91,6 @@ jobs:
8991
run: |
9092
cp scripts/dart/vad-pubspec.yaml dart-api-examples/vad/pubspec.yaml
9193
cp scripts/dart/non-streaming-asr-pubspec.yaml dart-api-examples/non-streaming-asr/pubspec.yaml
94+
cp scripts/dart/streaming-asr-pubspec.yaml dart-api-examples/streaming-asr/pubspec.yaml
9295
9396
.github/scripts/test-dart.sh

dart-api-examples/non-streaming-asr/bin/nemo-ctc.dart

+1
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,4 @@
1+
// Copyright (c) 2024 Xiaomi Corporation
12
import 'dart:io';
23
import 'dart:typed_data';
34

dart-api-examples/non-streaming-asr/bin/nemo-transducer.dart

+1
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,4 @@
1+
// Copyright (c) 2024 Xiaomi Corporation
12
import 'dart:io';
23
import 'dart:typed_data';
34

dart-api-examples/non-streaming-asr/bin/paraformer.dart

+1
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,4 @@
1+
// Copyright (c) 2024 Xiaomi Corporation
12
import 'dart:io';
23
import 'dart:typed_data';
34

dart-api-examples/non-streaming-asr/bin/telespeech-ctc.dart

+1
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,4 @@
1+
// Copyright (c) 2024 Xiaomi Corporation
12
import 'dart:io';
23
import 'dart:typed_data';
34

dart-api-examples/non-streaming-asr/bin/vad-with-paraformer.dart

+1
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,4 @@
1+
// Copyright (c) 2024 Xiaomi Corporation
12
import 'dart:io';
23
import 'dart:typed_data';
34

dart-api-examples/non-streaming-asr/bin/whisper.dart

+1
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,4 @@
1+
// Copyright (c) 2024 Xiaomi Corporation
12
import 'dart:io';
23
import 'dart:typed_data';
34

dart-api-examples/non-streaming-asr/bin/zipformer-transducer.dart

+1
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,4 @@
1+
// Copyright (c) 2024 Xiaomi Corporation
12
import 'dart:io';
23
import 'dart:typed_data';
34

Original file line numberDiff line numberDiff line change
@@ -0,0 +1,3 @@
1+
# https://dart.dev/guides/libraries/private-files
2+
# Created by `dart pub`
3+
.dart_tool/
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,3 @@
1+
## 1.0.0
2+
3+
- Initial version.
+11
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,11 @@
1+
# Introduction
2+
3+
This folder contains examples for streaming ASR with Dart API.
4+
| File | Description|
5+
|------|------------|
6+
|[./bin/nemo-transducer.dart](./bin/nemo-transducer.dart)| Use a NeMo transducer model for speech recognition. See [./run-nemo-transducer.sh](./run-nemo-transducer.sh)|
7+
|[./bin/paraformer.dart](./bin/paraformer.dart)| Use a Paraformer model for speech recognition. See [./run-paraformer.sh](./run-paraformer.sh)|
8+
|[./bin/zipformer-ctc-hlg.dart](./bin/zipformer-ctc-hlg.dart)| Use a Zipformer CTC model with HLG graph for speech recognition. See [./run-zipformer-ctc-hlg.sh](./run-zipformer-ctc-hlg.sh)|
9+
|[./bin/zipformer-ctc.dart](./bin/zipformer-ctc.dart)| Use a Zipformer CTC model for speech recognition. See [./run-zipformer-ctc.sh](./run-zipformer-ctc.sh)|
10+
|[./bin/zipformer-transducer.dart](./bin/zipformer-transducer.dart)| Use a Zipformer transducer model for speech recognition. See [./run-zipformer-transducer.sh](./run-zipformer-transducer.sh)|
11+
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,30 @@
1+
# This file configures the static analysis results for your project (errors,
2+
# warnings, and lints).
3+
#
4+
# This enables the 'recommended' set of lints from `package:lints`.
5+
# This set helps identify many issues that may lead to problems when running
6+
# or consuming Dart code, and enforces writing Dart using a single, idiomatic
7+
# style and format.
8+
#
9+
# If you want a smaller set of lints you can change this to specify
10+
# 'package:lints/core.yaml'. These are just the most critical lints
11+
# (the recommended set includes the core lints).
12+
# The core lints are also what is used by pub.dev for scoring packages.
13+
14+
include: package:lints/recommended.yaml
15+
16+
# Uncomment the following section to specify additional rules.
17+
18+
# linter:
19+
# rules:
20+
# - camel_case_types
21+
22+
# analyzer:
23+
# exclude:
24+
# - path/to/excluded/files/**
25+
26+
# For more information about the core and recommended set of lints, see
27+
# https://dart.dev/go/core-lints
28+
29+
# For additional information about configuring this file, see
30+
# https://dart.dev/guides/language/analysis-options
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
../../vad/bin/init.dart
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
zipformer-transducer.dart
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,92 @@
1+
// Copyright (c) 2024 Xiaomi Corporation
2+
import 'dart:io';
3+
import 'dart:typed_data';
4+
5+
import 'package:args/args.dart';
6+
import 'package:sherpa_onnx/sherpa_onnx.dart' as sherpa_onnx;
7+
8+
import './init.dart';
9+
10+
void main(List<String> arguments) async {
11+
await initSherpaOnnx();
12+
13+
final parser = ArgParser()
14+
..addOption('encoder', help: 'Path to the encoder model')
15+
..addOption('decoder', help: 'Path to decoder model')
16+
..addOption('tokens', help: 'Path to tokens.txt')
17+
..addOption('input-wav', help: 'Path to input.wav to transcribe');
18+
19+
final res = parser.parse(arguments);
20+
if (res['encoder'] == null ||
21+
res['decoder'] == null ||
22+
res['tokens'] == null ||
23+
res['input-wav'] == null) {
24+
print(parser.usage);
25+
exit(1);
26+
}
27+
28+
final encoder = res['encoder'] as String;
29+
final decoder = res['decoder'] as String;
30+
final tokens = res['tokens'] as String;
31+
final inputWav = res['input-wav'] as String;
32+
33+
final paraformer = sherpa_onnx.OnlineParaformerModelConfig(
34+
encoder: encoder,
35+
decoder: decoder,
36+
);
37+
38+
final modelConfig = sherpa_onnx.OnlineModelConfig(
39+
paraformer: paraformer,
40+
tokens: tokens,
41+
debug: true,
42+
numThreads: 1,
43+
);
44+
final config = sherpa_onnx.OnlineRecognizerConfig(model: modelConfig);
45+
final recognizer = sherpa_onnx.OnlineRecognizer(config);
46+
47+
final waveData = sherpa_onnx.readWave(inputWav);
48+
final stream = recognizer.createStream();
49+
50+
// simulate streaming. You can choose an arbitrary chunk size.
51+
// chunkSize of a single sample is also ok, i.e, chunkSize = 1
52+
final chunkSize = 1600; // 0.1 second for 16kHz
53+
final numChunks = waveData.samples.length ~/ chunkSize;
54+
55+
var last = '';
56+
for (int i = 0; i != numChunks; ++i) {
57+
int start = i * chunkSize;
58+
stream.acceptWaveform(
59+
samples:
60+
Float32List.sublistView(waveData.samples, start, start + chunkSize),
61+
sampleRate: waveData.sampleRate,
62+
);
63+
while (recognizer.isReady(stream)) {
64+
recognizer.decode(stream);
65+
}
66+
final result = recognizer.getResult(stream);
67+
if (result.text != last && result.text != '') {
68+
last = result.text;
69+
print(last);
70+
}
71+
}
72+
73+
// 0.5 seconds, assume sampleRate is 16kHz
74+
final tailPaddings = Float32List(8000);
75+
stream.acceptWaveform(
76+
samples: tailPaddings,
77+
sampleRate: waveData.sampleRate,
78+
);
79+
80+
while (recognizer.isReady(stream)) {
81+
recognizer.decode(stream);
82+
}
83+
84+
final result = recognizer.getResult(stream);
85+
86+
if (result.text != '') {
87+
print(result.text);
88+
}
89+
90+
stream.free();
91+
recognizer.free();
92+
}
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,94 @@
1+
// Copyright (c) 2024 Xiaomi Corporation
2+
import 'dart:io';
3+
import 'dart:typed_data';
4+
5+
import 'package:args/args.dart';
6+
import 'package:sherpa_onnx/sherpa_onnx.dart' as sherpa_onnx;
7+
8+
import './init.dart';
9+
10+
void main(List<String> arguments) async {
11+
await initSherpaOnnx();
12+
13+
final parser = ArgParser()
14+
..addOption('model', help: 'Path to the model')
15+
..addOption('hlg', help: 'Path to HLG.fst')
16+
..addOption('tokens', help: 'Path to tokens.txt')
17+
..addOption('input-wav', help: 'Path to input.wav to transcribe');
18+
19+
final res = parser.parse(arguments);
20+
if (res['model'] == null ||
21+
res['hlg'] == null ||
22+
res['tokens'] == null ||
23+
res['input-wav'] == null) {
24+
print(parser.usage);
25+
exit(1);
26+
}
27+
28+
final model = res['model'] as String;
29+
final hlg = res['hlg'] as String;
30+
final tokens = res['tokens'] as String;
31+
final inputWav = res['input-wav'] as String;
32+
33+
final ctc = sherpa_onnx.OnlineZipformer2CtcModelConfig(
34+
model: model,
35+
);
36+
37+
final modelConfig = sherpa_onnx.OnlineModelConfig(
38+
zipformer2Ctc: ctc,
39+
tokens: tokens,
40+
debug: true,
41+
numThreads: 1,
42+
);
43+
final config = sherpa_onnx.OnlineRecognizerConfig(
44+
model: modelConfig,
45+
ctcFstDecoderConfig: sherpa_onnx.OnlineCtcFstDecoderConfig(graph: hlg),
46+
);
47+
final recognizer = sherpa_onnx.OnlineRecognizer(config);
48+
49+
final waveData = sherpa_onnx.readWave(inputWav);
50+
final stream = recognizer.createStream();
51+
52+
// simulate streaming. You can choose an arbitrary chunk size.
53+
// chunkSize of a single sample is also ok, i.e, chunkSize = 1
54+
final chunkSize = 1600; // 0.1 second for 16kHz
55+
final numChunks = waveData.samples.length ~/ chunkSize;
56+
57+
var last = '';
58+
for (int i = 0; i != numChunks; ++i) {
59+
int start = i * chunkSize;
60+
stream.acceptWaveform(
61+
samples:
62+
Float32List.sublistView(waveData.samples, start, start + chunkSize),
63+
sampleRate: waveData.sampleRate,
64+
);
65+
while (recognizer.isReady(stream)) {
66+
recognizer.decode(stream);
67+
}
68+
final result = recognizer.getResult(stream);
69+
if (result.text != last && result.text != '') {
70+
last = result.text;
71+
print(last);
72+
}
73+
}
74+
75+
// 0.5 seconds, assume sampleRate is 16kHz
76+
final tailPaddings = Float32List(8000);
77+
stream.acceptWaveform(
78+
samples: tailPaddings,
79+
sampleRate: waveData.sampleRate,
80+
);
81+
82+
while (recognizer.isReady(stream)) {
83+
recognizer.decode(stream);
84+
}
85+
86+
final result = recognizer.getResult(stream);
87+
88+
if (result.text != '') {
89+
print(result.text);
90+
}
91+
92+
stream.free();
93+
recognizer.free();
94+
}

0 commit comments

Comments
 (0)