Skip to content

Commit 0196f1d

Browse files
authored
Wrap VAD APIs to C# (#946)
1 parent a99c7cb commit 0196f1d

17 files changed

+474
-10
lines changed

.github/scripts/test-dot-net.sh

+4-1
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,10 @@
22

33
cd dotnet-examples/
44

5-
cd offline-punctuation
5+
cd vad-non-streaming-asr-paraformer
6+
./run.sh
7+
8+
cd ../offline-punctuation
69
./run.sh
710

811
cd ../speaker-identification

.github/workflows/test-dot-net.yaml

+2-1
Original file line numberDiff line numberDiff line change
@@ -67,7 +67,7 @@ jobs:
6767
-DCMAKE_BUILD_TYPE=Release \
6868
-DSHERPA_ONNX_ENABLE_WEBSOCKET=OFF \
6969
-DBUILD_ESPEAK_NG_EXE=OFF \
70-
-DSHERPA_ONNX_ENABLE_BINARY=OFF \
70+
-DSHERPA_ONNX_ENABLE_BINARY=ON \
7171
..
7272
7373
cmake --build . --target install --config Release
@@ -197,6 +197,7 @@ jobs:
197197
cp -v scripts/dotnet/examples/streaming-hlg-decoding.csproj dotnet-examples/streaming-hlg-decoding
198198
cp -v scripts/dotnet/examples/speaker-identification.csproj dotnet-examples/speaker-identification
199199
cp -v scripts/dotnet/examples/offline-punctuation.csproj dotnet-examples/offline-punctuation
200+
cp -v scripts/dotnet/examples/vad-non-streaming-asr-paraformer.csproj dotnet-examples/vad-non-streaming-asr-paraformer
200201
201202
ls -lh /tmp
202203

dotnet-examples/offline-punctuation/Program.cs

-2
Original file line numberDiff line numberDiff line change
@@ -17,7 +17,6 @@
1717

1818
class OfflinePunctuationDemo
1919
{
20-
2120
static void Main(string[] args)
2221
{
2322
var config = new OfflinePunctuationConfig();
@@ -42,4 +41,3 @@ static void Main(string[] args)
4241
}
4342
}
4443
}
45-

dotnet-examples/sherpa-onnx.sln

+6
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,8 @@ Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "speaker-identification", "s
2121
EndProject
2222
Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "offline-punctuation", "offline-punctuation\offline-punctuation.csproj", "{42D85582-BB63-4259-A4EA-837D66AC078B}"
2323
EndProject
24+
Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "vad-non-streaming-asr-paraformer", "vad-non-streaming-asr-paraformer\vad-non-streaming-asr-paraformer.csproj", "{8CD6B7E5-F59F-47B3-BB87-2B2E3678924D}"
25+
EndProject
2426
Global
2527
GlobalSection(SolutionConfigurationPlatforms) = preSolution
2628
Debug|Any CPU = Debug|Any CPU
@@ -66,5 +68,9 @@ Global
6668
{42D85582-BB63-4259-A4EA-837D66AC078B}.Debug|Any CPU.Build.0 = Debug|Any CPU
6769
{42D85582-BB63-4259-A4EA-837D66AC078B}.Release|Any CPU.ActiveCfg = Release|Any CPU
6870
{42D85582-BB63-4259-A4EA-837D66AC078B}.Release|Any CPU.Build.0 = Release|Any CPU
71+
{8CD6B7E5-F59F-47B3-BB87-2B2E3678924D}.Debug|Any CPU.ActiveCfg = Debug|Any CPU
72+
{8CD6B7E5-F59F-47B3-BB87-2B2E3678924D}.Debug|Any CPU.Build.0 = Debug|Any CPU
73+
{8CD6B7E5-F59F-47B3-BB87-2B2E3678924D}.Release|Any CPU.ActiveCfg = Release|Any CPU
74+
{8CD6B7E5-F59F-47B3-BB87-2B2E3678924D}.Release|Any CPU.Build.0 = Release|Any CPU
6975
EndGlobalSection
7076
EndGlobal
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,62 @@
1+
// Copyright (c) 2024 Xiaomi Corporation
2+
//
3+
// This file shows how to use a silero_vad model with a non-streaming Paraformer
4+
// for speech recognition.
5+
using SherpaOnnx;
6+
using System.Collections.Generic;
7+
using System;
8+
9+
class VadNonStreamingAsrParaformer
10+
{
11+
static void Main(string[] args)
12+
{
13+
// please download model files from
14+
// https://github.com/k2-fsa/sherpa-onnx/releases/tag/asr-models
15+
OfflineRecognizerConfig config = new OfflineRecognizerConfig();
16+
config.ModelConfig.Paraformer.Model = "./sherpa-onnx-paraformer-zh-2023-03-28/model.int8.onnx";
17+
config.ModelConfig.Tokens = "./sherpa-onnx-paraformer-zh-2023-03-28/tokens.txt";
18+
config.ModelConfig.Debug = 0;
19+
OfflineRecognizer recognizer = new OfflineRecognizer(config);
20+
21+
VadModelConfig vadModelConfig = new VadModelConfig();
22+
vadModelConfig.SileroVad.Model = "./silero_vad.onnx";
23+
vadModelConfig.Debug = 0;
24+
25+
VoiceActivityDetector vad = new VoiceActivityDetector(vadModelConfig, 60);
26+
27+
string testWaveFilename = "./lei-jun-test.wav";
28+
WaveReader reader = new WaveReader(testWaveFilename);
29+
30+
int numSamples = reader.Samples.Length;
31+
int windowSize = vadModelConfig.SileroVad.WindowSize;
32+
int sampleRate = vadModelConfig.SampleRate;
33+
int numIter = numSamples / windowSize;
34+
35+
for (int i = 0; i != numIter; ++i) {
36+
int start = i * windowSize;
37+
float[] samples = new float[windowSize];
38+
Array.Copy(reader.Samples, start, samples, 0, windowSize);
39+
vad.AcceptWaveform(samples);
40+
if (vad.IsSpeechDetected()) {
41+
while (!vad.IsEmpty()) {
42+
SpeechSegment segment = vad.Front();
43+
float startTime = segment.Start / (float)sampleRate;
44+
float duration = segment.Samples.Length / (float)sampleRate;
45+
46+
OfflineStream stream = recognizer.CreateStream();
47+
stream.AcceptWaveform(sampleRate, segment.Samples);
48+
recognizer.Decode(stream);
49+
String text = stream.Result.Text;
50+
51+
if (!String.IsNullOrEmpty(text)) {
52+
Console.WriteLine("{0}--{1}: {2}", String.Format("{0:0.00}", startTime),
53+
String.Format("{0:0.00}", startTime+duration), text);
54+
}
55+
56+
vad.Pop();
57+
}
58+
}
59+
}
60+
}
61+
}
62+
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
../online-decode-files/WaveReader.cs
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,20 @@
1+
#!/usr/bin/env bash
2+
3+
set -ex
4+
5+
if [ ! -f ./silero_vad.onnx ]; then
6+
curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/silero_vad.onnx
7+
fi
8+
9+
if [ ! -f ./lei-jun-test.wav ]; then
10+
curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/lei-jun-test.wav
11+
fi
12+
13+
if [ ! -f ./sherpa-onnx-paraformer-zh-2023-03-28/tokens.txt ]; then
14+
curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-paraformer-zh-2023-03-28.tar.bz2
15+
16+
tar xvf sherpa-onnx-paraformer-zh-2023-03-28.tar.bz2
17+
rm sherpa-onnx-paraformer-zh-2023-03-28.tar.bz2
18+
fi
19+
20+
dotnet run
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,15 @@
1+
<Project Sdk="Microsoft.NET.Sdk">
2+
3+
<PropertyGroup>
4+
<OutputType>Exe</OutputType>
5+
<TargetFramework>net6.0</TargetFramework>
6+
<RootNamespace>vad_non_streaming_asr_paraformer</RootNamespace>
7+
<ImplicitUsings>enable</ImplicitUsings>
8+
<Nullable>enable</Nullable>
9+
</PropertyGroup>
10+
11+
<ItemGroup>
12+
<PackageReference Include="org.k2fsa.sherpa.onnx" Version="*" />
13+
</ItemGroup>
14+
15+
</Project>

java-api-examples/VadNonStreamingParaformer.java

-4
Original file line numberDiff line numberDiff line change
@@ -39,10 +39,6 @@ public static OfflineRecognizer createOfflineRecognizer() {
3939
String model = "./sherpa-onnx-paraformer-zh-2023-03-28/model.int8.onnx";
4040
String tokens = "./sherpa-onnx-paraformer-zh-2023-03-28/tokens.txt";
4141

42-
String waveFilename = "./sherpa-onnx-paraformer-zh-2023-03-28/test_wavs/3-sichuan.wav";
43-
44-
WaveReader reader = new WaveReader(waveFilename);
45-
4642
OfflineParaformerModelConfig paraformer =
4743
OfflineParaformerModelConfig.builder().setModel(model).build();
4844

scripts/dotnet/CircularBuffer.cs

+112
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,112 @@
1+
/// Copyright (c) 2024 Xiaomi Corporation (authors: Fangjun Kuang)
2+
3+
using System.Linq;
4+
using System.Collections.Generic;
5+
using System.Runtime.InteropServices;
6+
using System.Text;
7+
using System;
8+
9+
namespace SherpaOnnx
10+
{
11+
public class CircularBuffer : IDisposable
12+
{
13+
public CircularBuffer(int capacity)
14+
{
15+
IntPtr h = SherpaOnnxCreateCircularBuffer(capacity);
16+
_handle = new HandleRef(this, h);
17+
}
18+
19+
public void Push(float[] data)
20+
{
21+
SherpaOnnxCircularBufferPush(_handle.Handle, data, data.Length);
22+
}
23+
24+
public float[] Get(int startIndex, int n)
25+
{
26+
IntPtr p = SherpaOnnxCircularBufferGet(_handle.Handle, startIndex, n);
27+
28+
float[] ans = new float[n];
29+
Marshal.Copy(p, ans, 0, n);
30+
31+
SherpaOnnxCircularBufferFree(p);
32+
33+
return ans;
34+
}
35+
36+
public void Pop(int n)
37+
{
38+
SherpaOnnxCircularBufferPop(_handle.Handle, n);
39+
}
40+
41+
public int Size
42+
{
43+
get
44+
{
45+
return SherpaOnnxCircularBufferSize(_handle.Handle);
46+
}
47+
}
48+
49+
public int Head
50+
{
51+
get
52+
{
53+
return SherpaOnnxCircularBufferHead(_handle.Handle);
54+
}
55+
}
56+
57+
public void Reset()
58+
{
59+
SherpaOnnxCircularBufferReset(_handle.Handle);
60+
}
61+
62+
public void Dispose()
63+
{
64+
Cleanup();
65+
// Prevent the object from being placed on the
66+
// finalization queue
67+
System.GC.SuppressFinalize(this);
68+
}
69+
70+
~CircularBuffer()
71+
{
72+
Cleanup();
73+
}
74+
75+
private void Cleanup()
76+
{
77+
SherpaOnnxDestroyCircularBuffer(_handle.Handle);
78+
79+
// Don't permit the handle to be used again.
80+
_handle = new HandleRef(this, IntPtr.Zero);
81+
}
82+
83+
private HandleRef _handle;
84+
85+
[DllImport(Dll.Filename)]
86+
private static extern IntPtr SherpaOnnxCreateCircularBuffer(int capacity);
87+
88+
[DllImport(Dll.Filename)]
89+
private static extern void SherpaOnnxDestroyCircularBuffer(IntPtr handle);
90+
91+
[DllImport(Dll.Filename)]
92+
private static extern void SherpaOnnxCircularBufferPush(IntPtr handle, float[] p, int n);
93+
94+
[DllImport(Dll.Filename)]
95+
private static extern IntPtr SherpaOnnxCircularBufferGet(IntPtr handle, int startIndex, int n);
96+
97+
[DllImport(Dll.Filename)]
98+
private static extern void SherpaOnnxCircularBufferFree(IntPtr p);
99+
100+
[DllImport(Dll.Filename)]
101+
private static extern void SherpaOnnxCircularBufferPop(IntPtr handle, int n);
102+
103+
[DllImport(Dll.Filename)]
104+
private static extern int SherpaOnnxCircularBufferSize(IntPtr handle);
105+
106+
[DllImport(Dll.Filename)]
107+
private static extern int SherpaOnnxCircularBufferHead(IntPtr handle);
108+
109+
[DllImport(Dll.Filename)]
110+
private static extern void SherpaOnnxCircularBufferReset(IntPtr handle);
111+
}
112+
}
+34
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,34 @@
1+
/// Copyright (c) 2024 Xiaomi Corporation (authors: Fangjun Kuang)
2+
3+
using System.Linq;
4+
using System.Collections.Generic;
5+
using System.Runtime.InteropServices;
6+
using System.Text;
7+
using System;
8+
9+
namespace SherpaOnnx
10+
{
11+
[StructLayout(LayoutKind.Sequential)]
12+
public struct SileroVadModelConfig
13+
{
14+
public SileroVadModelConfig()
15+
{
16+
Model = "";
17+
Threshold = 0.5F;
18+
MinSilenceDuration = 0.5F;
19+
MinSpeechDuration = 0.25F;
20+
WindowSize = 512;
21+
}
22+
23+
[MarshalAs(UnmanagedType.LPStr)]
24+
public string Model;
25+
26+
public float Threshold;
27+
28+
public float MinSilenceDuration;
29+
30+
public float MinSpeechDuration;
31+
32+
public int WindowSize;
33+
}
34+
}

scripts/dotnet/SpeechSegment.cs

+47
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,47 @@
1+
/// Copyright (c) 2024 Xiaomi Corporation (authors: Fangjun Kuang)
2+
3+
using System.Linq;
4+
using System.Collections.Generic;
5+
using System.Runtime.InteropServices;
6+
using System.Text;
7+
using System;
8+
9+
namespace SherpaOnnx
10+
{
11+
public class SpeechSegment
12+
{
13+
public SpeechSegment(IntPtr handle)
14+
{
15+
Impl impl = (Impl)Marshal.PtrToStructure(handle, typeof(Impl));
16+
17+
_start = impl.Start;
18+
19+
unsafe
20+
{
21+
float* t = (float*)impl.Samples;
22+
_samples = new float[impl.Count];
23+
fixed (float* pTarget = _samples)
24+
{
25+
for (int i = 0; i < impl.Count; i++)
26+
{
27+
pTarget[i] = t[i];
28+
}
29+
}
30+
}
31+
}
32+
33+
public int _start;
34+
public int Start => _start;
35+
36+
private float[] _samples;
37+
public float[] Samples => _samples;
38+
39+
[StructLayout(LayoutKind.Sequential)]
40+
struct Impl
41+
{
42+
public int Start;
43+
public IntPtr Samples;
44+
public int Count;
45+
}
46+
}
47+
}

0 commit comments

Comments
 (0)