Skip to content

Commit c2cc9de

Browse files
authored
Add Flush to VAD so that the last segment can be detected. (#1099)
1 parent 3e4307e commit c2cc9de

File tree

35 files changed

+237
-29
lines changed

35 files changed

+237
-29
lines changed

.github/workflows/dot-net.yaml

+8-6
Original file line numberDiff line numberDiff line change
@@ -52,11 +52,6 @@ jobs:
5252
cmake --build . --target install --config Release
5353
rm -rf install/pkgconfig
5454
55-
- uses: actions/upload-artifact@v4
56-
with:
57-
name: windows-${{ matrix.arch }}
58-
path: ./build/install/lib/
59-
6055
- name: Create tar file
6156
shell: bash
6257
run: |
@@ -72,6 +67,11 @@ jobs:
7267
ls -lh *.tar.bz2
7368
mv *.tar.bz2 ../
7469
70+
- uses: actions/upload-artifact@v4
71+
with:
72+
name: windows-${{ matrix.arch }}
73+
path: ./*.tar.bz2
74+
7575
# https://huggingface.co/docs/hub/spaces-github-actions
7676
- name: Publish to huggingface
7777
if: (github.repository_owner == 'csukuangfj' || github.repository_owner == 'k2-fsa') && (github.event_name == 'push' || github.event_name == 'workflow_dispatch')
@@ -88,7 +88,9 @@ jobs:
8888
8989
rm -rf huggingface
9090
export GIT_CLONE_PROTECTION_ACTIVE=false
91-
GIT_LFS_SKIP_SMUDGE=1 git clone https://huggingface.co/csukuangfj/sherpa-onnx-libs huggingface
91+
export GIT_LFS_SKIP_SMUDGE=1
92+
93+
git clone https://huggingface.co/csukuangfj/sherpa-onnx-libs huggingface
9294
9395
cd huggingface
9496
mkdir -p windows-for-dotnet

CHANGELOG.md

+5
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,8 @@
1+
## 1.10.12
2+
3+
* Add Flush to VAD so that the last speech segment can be detected. See also
4+
https://github.com/k2-fsa/sherpa-onnx/discussions/1077#discussioncomment-9979740
5+
16
## 1.10.11
27

38
* Support the iOS platform for iOS.

CMakeLists.txt

+2-2
Original file line numberDiff line numberDiff line change
@@ -10,8 +10,8 @@ project(sherpa-onnx)
1010
# Remember to update
1111
# ./nodejs-addon-examples
1212
# ./dart-api-examples/
13-
# ./sherpa-onnx/flutter/CHANGELOG.md
14-
set(SHERPA_ONNX_VERSION "1.10.11")
13+
# ./CHANGELOG.md
14+
set(SHERPA_ONNX_VERSION "1.10.12")
1515

1616
# Disable warning about
1717
#

dart-api-examples/non-streaming-asr/bin/vad-with-paraformer.dart

+22
Original file line numberDiff line numberDiff line change
@@ -93,6 +93,28 @@ void main(List<String> arguments) async {
9393
}
9494
}
9595

96+
vad.flush();
97+
while (!vad.isEmpty()) {
98+
final stream = recognizer.createStream();
99+
final segment = vad.front();
100+
stream.acceptWaveform(
101+
samples: segment.samples, sampleRate: waveData.sampleRate);
102+
recognizer.decode(stream);
103+
104+
final result = recognizer.getResult(stream);
105+
106+
final startTime = segment.start * 1.0 / waveData.sampleRate;
107+
final duration = segment.samples.length * 1.0 / waveData.sampleRate;
108+
final stopTime = startTime + duration;
109+
if (result.text != '') {
110+
print(
111+
'${startTime.toStringAsPrecision(4)} -- ${stopTime.toStringAsPrecision(4)}: ${result.text}');
112+
}
113+
114+
stream.free();
115+
vad.pop();
116+
}
117+
96118
vad.free();
97119
recognizer.free();
98120
}

dart-api-examples/non-streaming-asr/pubspec.yaml

+1-1
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,7 @@ environment:
1010

1111
# Add regular dependencies here.
1212
dependencies:
13-
sherpa_onnx: ^1.10.11
13+
sherpa_onnx: ^1.10.12
1414
path: ^1.9.0
1515
args: ^2.5.0
1616

dart-api-examples/streaming-asr/pubspec.yaml

+1-1
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,7 @@ environment:
1111

1212
# Add regular dependencies here.
1313
dependencies:
14-
sherpa_onnx: ^1.10.11
14+
sherpa_onnx: ^1.10.12
1515
path: ^1.9.0
1616
args: ^2.5.0
1717

dart-api-examples/tts/pubspec.yaml

+1-1
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,7 @@ environment:
88

99
# Add regular dependencies here.
1010
dependencies:
11-
sherpa_onnx: ^1.10.11
11+
sherpa_onnx: ^1.10.12
1212
path: ^1.9.0
1313
args: ^2.5.0
1414

dart-api-examples/vad/bin/vad.dart

+6
Original file line numberDiff line numberDiff line change
@@ -65,6 +65,12 @@ void main(List<String> arguments) async {
6565
}
6666
}
6767

68+
vad.flush();
69+
while (!vad.isEmpty()) {
70+
allSamples.add(vad.front().samples);
71+
vad.pop();
72+
}
73+
6874
vad.free();
6975

7076
final s = Float32List.fromList(allSamples.expand((x) => x).toList());

dart-api-examples/vad/pubspec.yaml

+1-1
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,7 @@ environment:
99
sdk: ^3.4.0
1010

1111
dependencies:
12-
sherpa_onnx: ^1.10.11
12+
sherpa_onnx: ^1.10.12
1313
path: ^1.9.0
1414
args: ^2.5.0
1515

dotnet-examples/vad-non-streaming-asr-paraformer/Program.cs

+20
Original file line numberDiff line numberDiff line change
@@ -57,6 +57,26 @@ static void Main(string[] args)
5757
}
5858
}
5959
}
60+
61+
vad.Flush();
62+
63+
while (!vad.IsEmpty()) {
64+
SpeechSegment segment = vad.Front();
65+
float startTime = segment.Start / (float)sampleRate;
66+
float duration = segment.Samples.Length / (float)sampleRate;
67+
68+
OfflineStream stream = recognizer.CreateStream();
69+
stream.AcceptWaveform(sampleRate, segment.Samples);
70+
recognizer.Decode(stream);
71+
String text = stream.Result.Text;
72+
73+
if (!String.IsNullOrEmpty(text)) {
74+
Console.WriteLine("{0}--{1}: {2}", String.Format("{0:0.00}", startTime),
75+
String.Format("{0:0.00}", startTime+duration), text);
76+
}
77+
78+
vad.Pop();
79+
}
6080
}
6181
}
6282

flutter-examples/streaming_asr/pubspec.yaml

+2-2
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,7 @@ description: >
55
66
publish_to: 'none'
77

8-
version: 1.10.11
8+
version: 1.10.12
99

1010
topics:
1111
- speech-recognition
@@ -30,7 +30,7 @@ dependencies:
3030
record: ^5.1.0
3131
url_launcher: ^6.2.6
3232

33-
sherpa_onnx: ^1.10.11
33+
sherpa_onnx: ^1.10.12
3434
# sherpa_onnx:
3535
# path: ../../flutter/sherpa_onnx
3636

flutter-examples/tts/pubspec.yaml

+1-1
Original file line numberDiff line numberDiff line change
@@ -17,7 +17,7 @@ dependencies:
1717
cupertino_icons: ^1.0.6
1818
path_provider: ^2.1.3
1919
path: ^1.9.0
20-
sherpa_onnx: ^1.10.11
20+
sherpa_onnx: ^1.10.12
2121
url_launcher: ^6.2.6
2222
audioplayers: ^5.0.0
2323

flutter/sherpa_onnx/lib/src/sherpa_onnx_bindings.dart

+13
Original file line numberDiff line numberDiff line change
@@ -491,6 +491,12 @@ typedef SherpaOnnxVoiceActivityDetectorResetNative = Void Function(
491491
typedef SherpaOnnxVoiceActivityDetectorReset = void Function(
492492
Pointer<SherpaOnnxVoiceActivityDetector>);
493493

494+
typedef SherpaOnnxVoiceActivityDetectorFlushNative = Void Function(
495+
Pointer<SherpaOnnxVoiceActivityDetector>);
496+
497+
typedef SherpaOnnxVoiceActivityDetectorFlush = void Function(
498+
Pointer<SherpaOnnxVoiceActivityDetector>);
499+
494500
typedef SherpaOnnxVoiceActivityDetectorFrontNative
495501
= Pointer<SherpaOnnxSpeechSegment> Function(
496502
Pointer<SherpaOnnxVoiceActivityDetector>);
@@ -779,6 +785,8 @@ class SherpaOnnxBindings {
779785

780786
static SherpaOnnxVoiceActivityDetectorReset? voiceActivityDetectorReset;
781787

788+
static SherpaOnnxVoiceActivityDetectorFlush? voiceActivityDetectorFlush;
789+
782790
static SherpaOnnxCreateCircularBuffer? createCircularBuffer;
783791

784792
static SherpaOnnxDestroyCircularBuffer? destroyCircularBuffer;
@@ -1036,6 +1044,11 @@ class SherpaOnnxBindings {
10361044
'SherpaOnnxVoiceActivityDetectorReset')
10371045
.asFunction();
10381046

1047+
voiceActivityDetectorFlush ??= dynamicLibrary
1048+
.lookup<NativeFunction<SherpaOnnxVoiceActivityDetectorFlushNative>>(
1049+
'SherpaOnnxVoiceActivityDetectorFlush')
1050+
.asFunction();
1051+
10391052
createCircularBuffer ??= dynamicLibrary
10401053
.lookup<NativeFunction<SherpaOnnxCreateCircularBufferNative>>(
10411054
'SherpaOnnxCreateCircularBuffer')

flutter/sherpa_onnx/lib/src/vad.dart

+4
Original file line numberDiff line numberDiff line change
@@ -207,6 +207,10 @@ class VoiceActivityDetector {
207207
SherpaOnnxBindings.voiceActivityDetectorReset?.call(ptr);
208208
}
209209

210+
void flush() {
211+
SherpaOnnxBindings.voiceActivityDetectorFlush?.call(ptr);
212+
}
213+
210214
Pointer<SherpaOnnxVoiceActivityDetector> ptr;
211215
final VadModelConfig config;
212216
}

flutter/sherpa_onnx/pubspec.yaml

+6-6
Original file line numberDiff line numberDiff line change
@@ -17,7 +17,7 @@ topics:
1717
- voice-activity-detection
1818

1919
# remember to change the version in ../sherpa_onnx_macos/macos/sherpa_onnx_macos.podspec
20-
version: 1.10.11
20+
version: 1.10.12
2121

2222
homepage: https://github.com/k2-fsa/sherpa-onnx
2323

@@ -30,19 +30,19 @@ dependencies:
3030
flutter:
3131
sdk: flutter
3232

33-
sherpa_onnx_android: ^1.10.11
33+
sherpa_onnx_android: ^1.10.12
3434
# path: ../sherpa_onnx_android
3535

36-
sherpa_onnx_macos: ^1.10.11
36+
sherpa_onnx_macos: ^1.10.12
3737
# path: ../sherpa_onnx_macos
3838

39-
sherpa_onnx_linux: ^1.10.11
39+
sherpa_onnx_linux: ^1.10.12
4040
# path: ../sherpa_onnx_linux
4141
#
42-
sherpa_onnx_windows: ^1.10.11
42+
sherpa_onnx_windows: ^1.10.12
4343
# path: ../sherpa_onnx_windows
4444

45-
sherpa_onnx_ios: ^1.10.11
45+
sherpa_onnx_ios: ^1.10.12
4646
# sherpa_onnx_ios:
4747
# path: ../sherpa_onnx_ios
4848

flutter/sherpa_onnx_ios/ios/sherpa_onnx_ios.podspec

+1-1
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,7 @@
77
# https://groups.google.com/g/dart-ffi/c/nUATMBy7r0c
88
Pod::Spec.new do |s|
99
s.name = 'sherpa_onnx_ios'
10-
s.version = '1.10.11'
10+
s.version = '1.10.12'
1111
s.summary = 'A new Flutter FFI plugin project.'
1212
s.description = <<-DESC
1313
A new Flutter FFI plugin project.

flutter/sherpa_onnx_macos/macos/sherpa_onnx_macos.podspec

+1-1
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,7 @@
44
#
55
Pod::Spec.new do |s|
66
s.name = 'sherpa_onnx_macos'
7-
s.version = '1.10.11'
7+
s.version = '1.10.12'
88
s.summary = 'sherpa-onnx Flutter FFI plugin project.'
99
s.description = <<-DESC
1010
sherpa-onnx Flutter FFI plugin project.

java-api-examples/VadNonStreamingParaformer.java

+19
Original file line numberDiff line numberDiff line change
@@ -98,6 +98,25 @@ public static void main(String[] args) {
9898
}
9999
}
100100

101+
vad.flush();
102+
while (!vad.empty()) {
103+
SpeechSegment segment = vad.front();
104+
float startTime = segment.getStart() / 16000.0f;
105+
float duration = segment.getSamples().length / 16000.0f;
106+
107+
OfflineStream stream = recognizer.createStream();
108+
stream.acceptWaveform(segment.getSamples(), 16000);
109+
recognizer.decode(stream);
110+
String text = recognizer.getResult(stream).getText();
111+
stream.release();
112+
113+
if (!text.isEmpty()) {
114+
System.out.printf("%.3f--%.3f: %s\n", startTime, startTime + duration, text);
115+
}
116+
117+
vad.pop();
118+
}
119+
101120
vad.release();
102121
recognizer.release();
103122
}

java-api-examples/VadRemoveSilence.java

+10
Original file line numberDiff line numberDiff line change
@@ -59,6 +59,16 @@ public static void main(String[] args) {
5959
}
6060
}
6161

62+
vad.flush();
63+
while (!vad.empty()) {
64+
65+
// if you want to get the starting time of this segment, you can use
66+
/* float startTime = vad.front().getStart() / 16000.0f; */
67+
68+
segments.add(vad.front().getSamples());
69+
vad.pop();
70+
}
71+
6272
// get total number of samples
6373
int n = 0;
6474
for (float[] s : segments) {

nodejs-addon-examples/package.json

+1-1
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
{
22
"dependencies": {
3-
"sherpa-onnx-node": "^1.10.6"
3+
"sherpa-onnx-node": "^1.10.12"
44
}
55
}

python-api-examples/vad-remove-non-speech-segments-from-file.py

+6
Original file line numberDiff line numberDiff line change
@@ -105,6 +105,12 @@ def main():
105105
speech_samples.extend(vad.front.samples)
106106
vad.pop()
107107

108+
vad.flush()
109+
110+
while not vad.empty():
111+
speech_samples.extend(vad.front.samples)
112+
vad.pop()
113+
108114
speech_samples = np.array(speech_samples, dtype=np.float32)
109115

110116
sf.write(args.output, speech_samples, samplerate=sample_rate)

scripts/dart/sherpa-onnx-pubspec.yaml

+1-1
Original file line numberDiff line numberDiff line change
@@ -17,7 +17,7 @@ topics:
1717
- voice-activity-detection
1818

1919
# remember to change the version in ../sherpa_onnx_macos/macos/sherpa_onnx.podspec
20-
version: 1.10.6
20+
version: 1.10.12
2121

2222
homepage: https://github.com/k2-fsa/sherpa-onnx
2323

scripts/dotnet/VoiceActivityDetector.cs

+7
Original file line numberDiff line numberDiff line change
@@ -53,6 +53,11 @@ public void Reset()
5353
SherpaOnnxVoiceActivityDetectorReset(_handle.Handle);
5454
}
5555

56+
public void Flush()
57+
{
58+
SherpaOnnxVoiceActivityDetectorFlush(_handle.Handle);
59+
}
60+
5661
public void Dispose()
5762
{
5863
Cleanup();
@@ -106,5 +111,7 @@ private void Cleanup()
106111
[DllImport(Dll.Filename)]
107112
private static extern void SherpaOnnxVoiceActivityDetectorReset(IntPtr handle);
108113

114+
[DllImport(Dll.Filename)]
115+
private static extern void SherpaOnnxVoiceActivityDetectorFlush(IntPtr handle);
109116
}
110117
}

scripts/go/sherpa_onnx.go

+4
Original file line numberDiff line numberDiff line change
@@ -856,6 +856,10 @@ func (vad *VoiceActivityDetector) Reset() {
856856
C.SherpaOnnxVoiceActivityDetectorReset(vad.impl)
857857
}
858858

859+
func (vad *VoiceActivityDetector) Flush() {
860+
C.SherpaOnnxVoiceActivityDetectorFlush(vad.impl)
861+
}
862+
859863
// Spoken language identification
860864

861865
type SpokenLanguageIdentificationWhisperConfig struct {

0 commit comments

Comments
 (0)