Skip to content

Commit 76ff8c1

Browse files
authored
Dart API for speaker diarization (k2-fsa#1418)
1 parent c8cba83 commit 76ff8c1

21 files changed

+733
-17
lines changed

.github/scripts/test-dart.sh

+5
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,11 @@ set -ex
44

55
cd dart-api-examples
66

7+
pushd speaker-diarization
8+
echo '----------speaker diarization----------'
9+
./run.sh
10+
popd
11+
712
pushd speaker-identification
813
echo '----------3d speaker----------'
914
./run-3d-speaker.sh

.github/workflows/test-dart.yaml

+1
Original file line numberDiff line numberDiff line change
@@ -114,6 +114,7 @@ jobs:
114114
cp scripts/dart/audio-tagging-pubspec.yaml dart-api-examples/audio-tagging/pubspec.yaml
115115
cp scripts/dart/add-punctuations-pubspec.yaml dart-api-examples/add-punctuations/pubspec.yaml
116116
cp scripts/dart/speaker-id-pubspec.yaml dart-api-examples/speaker-identification/pubspec.yaml
117+
cp scripts/dart/speaker-diarization-pubspec.yaml dart-api-examples/speaker-diarization/pubspec.yaml
117118
118119
cp scripts/dart/sherpa-onnx-pubspec.yaml flutter/sherpa_onnx/pubspec.yaml
119120

dart-api-examples/README.md

+1
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,7 @@ https://pub.dev/packages/sherpa_onnx
99

1010
| Directory | Description |
1111
|-----------|-------------|
12+
| [./speaker-diarization](./speaker-diarization)| Example for speaker diarization.|
1213
| [./add-punctuations](./add-punctuations)| Example for adding punctuations to text.|
1314
| [./audio-tagging](./audio-tagging)| Example for audio tagging.|
1415
| [./keyword-spotter](./keyword-spotter)| Example for keyword spotting|
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,3 @@
1+
# https://dart.dev/guides/libraries/private-files
2+
# Created by `dart pub`
3+
.dart_tool/
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,3 @@
1+
## 1.0.0
2+
3+
- Initial version.
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,7 @@
1+
# Introduction
2+
3+
This example shows how to use the Dart API from sherpa-onnx for speaker diarization.
4+
5+
# Usage
6+
7+
Please see [./run.sh](./run.sh)
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,30 @@
1+
# This file configures the static analysis results for your project (errors,
2+
# warnings, and lints).
3+
#
4+
# This enables the 'recommended' set of lints from `package:lints`.
5+
# This set helps identify many issues that may lead to problems when running
6+
# or consuming Dart code, and enforces writing Dart using a single, idiomatic
7+
# style and format.
8+
#
9+
# If you want a smaller set of lints you can change this to specify
10+
# 'package:lints/core.yaml'. These are just the most critical lints
11+
# (the recommended set includes the core lints).
12+
# The core lints are also what is used by pub.dev for scoring packages.
13+
14+
include: package:lints/recommended.yaml
15+
16+
# Uncomment the following section to specify additional rules.
17+
18+
# linter:
19+
# rules:
20+
# - camel_case_types
21+
22+
# analyzer:
23+
# exclude:
24+
# - path/to/excluded/files/**
25+
26+
# For more information about the core and recommended set of lints, see
27+
# https://dart.dev/go/core-lints
28+
29+
# For additional information about configuring this file, see
30+
# https://dart.dev/guides/language/analysis-options
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
../../vad/bin/init.dart
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,100 @@
1+
// Copyright (c) 2024 Xiaomi Corporation
2+
import 'dart:io';
3+
import 'dart:typed_data';
4+
import 'dart:ffi';
5+
6+
import 'package:sherpa_onnx/sherpa_onnx.dart' as sherpa_onnx;
7+
import './init.dart';
8+
9+
void main(List<String> arguments) async {
10+
await initSherpaOnnx();
11+
12+
/* Please use the following commands to download files used in this file
13+
Step 1: Download a speaker segmentation model
14+
15+
Please visit https://github.com/k2-fsa/sherpa-onnx/releases/tag/speaker-segmentation-models
16+
for a list of available models. The following is an example
17+
18+
wget https://github.com/k2-fsa/sherpa-onnx/releases/download/speaker-segmentation-models/sherpa-onnx-pyannote-segmentation-3-0.tar.bz2
19+
tar xvf sherpa-onnx-pyannote-segmentation-3-0.tar.bz2
20+
rm sherpa-onnx-pyannote-segmentation-3-0.tar.bz2
21+
22+
Step 2: Download a speaker embedding extractor model
23+
24+
Please visit https://github.com/k2-fsa/sherpa-onnx/releases/tag/speaker-recongition-models
25+
for a list of available models. The following is an example
26+
27+
wget https://github.com/k2-fsa/sherpa-onnx/releases/download/speaker-recongition-models/3dspeaker_speech_eres2net_base_sv_zh-cn_3dspeaker_16k.onnx
28+
29+
Step 3. Download test wave files
30+
31+
Please visit https://github.com/k2-fsa/sherpa-onnx/releases/tag/speaker-segmentation-models
32+
for a list of available test wave files. The following is an example
33+
34+
wget https://github.com/k2-fsa/sherpa-onnx/releases/download/speaker-segmentation-models/0-four-speakers-zh.wav
35+
36+
Step 4. Run it
37+
*/
38+
39+
final segmentationModel =
40+
"./sherpa-onnx-pyannote-segmentation-3-0/model.onnx";
41+
42+
final embeddingModel =
43+
"./3dspeaker_speech_eres2net_base_sv_zh-cn_3dspeaker_16k.onnx";
44+
45+
final waveFilename = "./0-four-speakers-zh.wav";
46+
47+
final segmentationConfig = sherpa_onnx.OfflineSpeakerSegmentationModelConfig(
48+
pyannote: sherpa_onnx.OfflineSpeakerSegmentationPyannoteModelConfig(
49+
model: segmentationModel),
50+
);
51+
52+
final embeddingConfig =
53+
sherpa_onnx.SpeakerEmbeddingExtractorConfig(model: embeddingModel);
54+
55+
// since we know there are 4 speakers in ./0-four-speakers-zh.wav, we set
56+
// numClusters to 4. If you don't know the exact number, please set it to -1.
57+
// in that case, you have to set threshold. A larger threshold leads to
58+
// fewer clusters, i.e., fewer speakers.
59+
final clusteringConfig =
60+
sherpa_onnx.FastClusteringConfig(numClusters: 4, threshold: 0.5);
61+
62+
var config = sherpa_onnx.OfflineSpeakerDiarizationConfig(
63+
segmentation: segmentationConfig,
64+
embedding: embeddingConfig,
65+
clustering: clusteringConfig,
66+
minDurationOn: 0.2,
67+
minDurationOff: 0.5);
68+
69+
final sd = sherpa_onnx.OfflineSpeakerDiarization(config);
70+
if (sd.ptr == nullptr) {
71+
return;
72+
}
73+
74+
final waveData = sherpa_onnx.readWave(waveFilename);
75+
if (sd.sampleRate != waveData.sampleRate) {
76+
print(
77+
'Expected sample rate: ${sd.sampleRate}, given: ${waveData.sampleRate}');
78+
return;
79+
}
80+
81+
print('started');
82+
83+
// Use the following statement if you don't want to use a callback
84+
// final segments = sd.process(samples: waveData.samples);
85+
86+
final segments = sd.processWithCallback(
87+
samples: waveData.samples,
88+
callback: (int numProcessedChunk, int numTotalChunks) {
89+
final progress = 100.0 * numProcessedChunk / numTotalChunks;
90+
91+
print('Progress ${progress.toStringAsFixed(2)}%');
92+
93+
return 0;
94+
});
95+
96+
for (int i = 0; i < segments.length; ++i) {
97+
print(
98+
'${segments[i].start.toStringAsFixed(3)} -- ${segments[i].end.toStringAsFixed(3)} speaker_${segments[i].speaker}');
99+
}
100+
}
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,17 @@
1+
name: speaker_diarization
2+
description: >
3+
This example demonstrates how to use the Dart API for speaker diarization.
4+
5+
version: 1.0.0
6+
7+
environment:
8+
sdk: ">=3.0.0 <4.0.0"
9+
10+
dependencies:
11+
sherpa_onnx: ^1.10.27
12+
# sherpa_onnx:
13+
# path: ../../flutter/sherpa_onnx
14+
path: ^1.9.0
15+
16+
dev_dependencies:
17+
lints: ^3.0.0
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,21 @@
1+
#!/usr/bin/env bash
2+
3+
set -ex
4+
5+
dart pub get
6+
7+
if [ ! -f ./sherpa-onnx-pyannote-segmentation-3-0/model.onnx ]; then
8+
curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/speaker-segmentation-models/sherpa-onnx-pyannote-segmentation-3-0.tar.bz2
9+
tar xvf sherpa-onnx-pyannote-segmentation-3-0.tar.bz2
10+
rm sherpa-onnx-pyannote-segmentation-3-0.tar.bz2
11+
fi
12+
13+
if [ ! -f ./3dspeaker_speech_eres2net_base_sv_zh-cn_3dspeaker_16k.onnx ]; then
14+
curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/speaker-recongition-models/3dspeaker_speech_eres2net_base_sv_zh-cn_3dspeaker_16k.onnx
15+
fi
16+
17+
if [ ! -f ./0-four-speakers-zh.wav ]; then
18+
curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/speaker-segmentation-models/0-four-speakers-zh.wav
19+
fi
20+
21+
dart run ./bin/speaker-diarization.dart

flutter/sherpa_onnx/example/example.md

+1
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,7 @@
1111

1212
| Functions | URL | Supported Platforms|
1313
|---|---|---|
14+
|Speaker diarization| [Address](https://github.com/k2-fsa/sherpa-onnx/tree/master/dart-api-examples/speaker-diarization)| macOS, Windows, Linux|
1415
|Streaming speech recognition| [Address](https://github.com/k2-fsa/sherpa-onnx/tree/master/dart-api-examples/streaming-asr)| macOS, Windows, Linux|
1516
|Non-Streaming speech recognition| [Address](https://github.com/k2-fsa/sherpa-onnx/tree/master/dart-api-examples/non-streaming-asr)| macOS, Windows, Linux|
1617
|Text to speech| [Address](https://github.com/k2-fsa/sherpa-onnx/tree/master/dart-api-examples/tts)| macOS, Windows, Linux|

flutter/sherpa_onnx/lib/sherpa_onnx.dart

+1
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,7 @@ export 'src/audio_tagging.dart';
66
export 'src/feature_config.dart';
77
export 'src/keyword_spotter.dart';
88
export 'src/offline_recognizer.dart';
9+
export 'src/offline_speaker_diarization.dart';
910
export 'src/offline_stream.dart';
1011
export 'src/online_recognizer.dart';
1112
export 'src/online_stream.dart';

0 commit comments

Comments
 (0)