Skip to content

Commit d3e27d5

Browse files
authored
Add C# API for speech enhancement GTCRN models (#1990)
1 parent c12d1d8 commit d3e27d5

10 files changed

+301
-1
lines changed

.github/scripts/test-dot-net.sh

+5-1
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,11 @@
22

33
cd dotnet-examples/
44

5-
cd ./kokoro-tts
5+
cd ./speech-enhancement-gtcrn
6+
./run.sh
7+
ls -lh
8+
9+
cd ../kokoro-tts
610
./run-kokoro.sh
711
ls -lh
812

dotnet-examples/sherpa-onnx.sln

+6
Original file line numberDiff line numberDiff line change
@@ -35,6 +35,8 @@ Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "kokoro-tts", "kokoro-tts\ko
3535
EndProject
3636
Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "kokoro-tts-play", "kokoro-tts-play\kokoro-tts-play.csproj", "{EC0BCEAB-1B4E-4129-82CE-9880426AFA0B}"
3737
EndProject
38+
Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "speech-enhancement-gtcrn", "speech-enhancement-gtcrn\speech-enhancement-gtcrn.csproj", "{DF2569C6-6011-4716-9538-F9E9069E00EB}"
39+
EndProject
3840
Global
3941
GlobalSection(SolutionConfigurationPlatforms) = preSolution
4042
Debug|Any CPU = Debug|Any CPU
@@ -105,6 +107,10 @@ Global
105107
{EC0BCEAB-1B4E-4129-82CE-9880426AFA0B}.Debug|Any CPU.Build.0 = Debug|Any CPU
106108
{EC0BCEAB-1B4E-4129-82CE-9880426AFA0B}.Release|Any CPU.ActiveCfg = Release|Any CPU
107109
{EC0BCEAB-1B4E-4129-82CE-9880426AFA0B}.Release|Any CPU.Build.0 = Release|Any CPU
110+
{DF2569C6-6011-4716-9538-F9E9069E00EB}.Debug|Any CPU.ActiveCfg = Debug|Any CPU
111+
{DF2569C6-6011-4716-9538-F9E9069E00EB}.Debug|Any CPU.Build.0 = Debug|Any CPU
112+
{DF2569C6-6011-4716-9538-F9E9069E00EB}.Release|Any CPU.ActiveCfg = Release|Any CPU
113+
{DF2569C6-6011-4716-9538-F9E9069E00EB}.Release|Any CPU.Build.0 = Release|Any CPU
108114
EndGlobalSection
109115
GlobalSection(SolutionProperties) = preSolution
110116
HideSolutionNode = FALSE
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,45 @@
1+
// Copyright (c) 2025 Xiaomi Corporation
2+
//
3+
// This file shows how to use speech enhancement API with GTCRN models.
4+
//
5+
// 1. Download a model from
6+
// https://github.com/k2-fsa/sherpa-onnx/releases/tag/speech-enhancement-models
7+
//
8+
// wget https://github.com/k2-fsa/sherpa-onnx/releases/download/speech-enhancement-models/gtcrn_simple.onnx
9+
//
10+
// 2. Download a test file
11+
//
12+
// wget https://github.com/k2-fsa/sherpa-onnx/releases/download/speech-enhancement-models/inp_16k.wav
13+
//
14+
// 3. Now run it
15+
//
16+
// dotnet run
17+
18+
using SherpaOnnx;
19+
20+
class OfflineSpeechEnhancementDemo
21+
{
22+
static void Main(string[] args)
23+
{
24+
var config = new OfflineSpeechDenoiserConfig();
25+
config.Model.Gtcrn.Model = "./gtcrn_simple.onnx";
26+
config.Model.Debug = 1;
27+
config.Model.NumThreads = 1;
28+
var sd = new OfflineSpeechDenoiser(config);
29+
30+
WaveReader waveReader = new WaveReader("./inp_16k.wav");
31+
var denoisedAudio = sd.Run(waveReader.Samples, waveReader.SampleRate);
32+
33+
var outputFilename = "./enhanced-16k.wav";
34+
var ok = denoisedAudio.SaveToWaveFile(outputFilename);
35+
36+
if (ok)
37+
{
38+
Console.WriteLine($"Wrote to {outputFilename} succeeded!");
39+
}
40+
else
41+
{
42+
Console.WriteLine($"Failed to write {outputFilename}");
43+
}
44+
}
45+
}
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,12 @@
1+
#!/usr/bin/env bash
2+
set -ex
3+
4+
if [ ! -f ./gtcrn_simple.onnx ]; then
5+
curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/speech-enhancement-models/gtcrn_simple.onnx
6+
fi
7+
8+
if [ ! -f ./inp_16k.wav ]; then
9+
curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/speech-enhancement-models/inp_16k.wav
10+
fi
11+
12+
dotnet run
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,15 @@
1+
<Project Sdk="Microsoft.NET.Sdk">
2+
3+
<PropertyGroup>
4+
<OutputType>Exe</OutputType>
5+
<TargetFramework>net8.0</TargetFramework>
6+
<RootNamespace>speech_enhancement_gtcrn</RootNamespace>
7+
<ImplicitUsings>enable</ImplicitUsings>
8+
<Nullable>enable</Nullable>
9+
</PropertyGroup>
10+
11+
<ItemGroup>
12+
<ProjectReference Include="..\Common\Common.csproj" />
13+
</ItemGroup>
14+
15+
</Project>

scripts/dotnet/DenoisedAudio.cs

+94
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,94 @@
1+
/// Copyright (c) 2025 Xiaomi Corporation (authors: Fangjun Kuang)
2+
using System;
3+
using System.Runtime.InteropServices;
4+
using System.Text;
5+
6+
namespace SherpaOnnx
7+
{
8+
public class DenoisedAudio
9+
{
10+
public DenoisedAudio(IntPtr p)
11+
{
12+
_handle = new HandleRef(this, p);
13+
}
14+
15+
public bool SaveToWaveFile(String filename)
16+
{
17+
Impl impl = (Impl)Marshal.PtrToStructure(Handle, typeof(Impl));
18+
byte[] utf8Filename = Encoding.UTF8.GetBytes(filename);
19+
byte[] utf8FilenameWithNull = new byte[utf8Filename.Length + 1]; // +1 for null terminator
20+
Array.Copy(utf8Filename, utf8FilenameWithNull, utf8Filename.Length);
21+
utf8FilenameWithNull[utf8Filename.Length] = 0; // Null terminator
22+
int status = SherpaOnnxWriteWave(impl.Samples, impl.NumSamples, impl.SampleRate, utf8FilenameWithNull);
23+
return status == 1;
24+
}
25+
26+
~DenoisedAudio()
27+
{
28+
Cleanup();
29+
}
30+
31+
public void Dispose()
32+
{
33+
Cleanup();
34+
// Prevent the object from being placed on the
35+
// finalization queue
36+
System.GC.SuppressFinalize(this);
37+
}
38+
39+
private void Cleanup()
40+
{
41+
SherpaOnnxDestroyDenoisedAudio(Handle);
42+
43+
// Don't permit the handle to be used again.
44+
_handle = new HandleRef(this, IntPtr.Zero);
45+
}
46+
47+
[StructLayout(LayoutKind.Sequential)]
48+
struct Impl
49+
{
50+
public IntPtr Samples;
51+
public int NumSamples;
52+
public int SampleRate;
53+
}
54+
55+
private HandleRef _handle;
56+
public IntPtr Handle => _handle.Handle;
57+
58+
public int NumSamples
59+
{
60+
get
61+
{
62+
Impl impl = (Impl)Marshal.PtrToStructure(Handle, typeof(Impl));
63+
return impl.NumSamples;
64+
}
65+
}
66+
67+
public int SampleRate
68+
{
69+
get
70+
{
71+
Impl impl = (Impl)Marshal.PtrToStructure(Handle, typeof(Impl));
72+
return impl.SampleRate;
73+
}
74+
}
75+
76+
public float[] Samples
77+
{
78+
get
79+
{
80+
Impl impl = (Impl)Marshal.PtrToStructure(Handle, typeof(Impl));
81+
82+
float[] samples = new float[impl.NumSamples];
83+
Marshal.Copy(impl.Samples, samples, 0, impl.NumSamples);
84+
return samples;
85+
}
86+
}
87+
88+
[DllImport(Dll.Filename)]
89+
private static extern void SherpaOnnxDestroyDenoisedAudio(IntPtr handle);
90+
91+
[DllImport(Dll.Filename)]
92+
private static extern int SherpaOnnxWriteWave(IntPtr samples, int n, int sample_rate, [MarshalAs(UnmanagedType.LPArray, ArraySubType = UnmanagedType.I1)] byte[] utf8Filename);
93+
}
94+
}
+64
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,64 @@
1+
/// Copyright (c) 2025 Xiaomi Corporation (authors: Fangjun Kuang)
2+
3+
using System.Runtime.InteropServices;
4+
5+
namespace SherpaOnnx
6+
{
7+
public class OfflineSpeechDenoiser: IDisposable
8+
{
9+
public OfflineSpeechDenoiser(OfflineSpeechDenoiserConfig config)
10+
{
11+
IntPtr h = SherpaOnnxCreateOfflineSpeechDenoiser(ref config);
12+
_handle = new HandleRef(this, h);
13+
}
14+
15+
public DenoisedAudio Run(float[] samples, int sampleRate)
16+
{
17+
IntPtr p = SherpaOnnxOfflineSpeechDenoiserRun(_handle.Handle, samples, samples.Length, sampleRate);
18+
return new DenoisedAudio(p);
19+
}
20+
21+
public void Dispose()
22+
{
23+
Cleanup();
24+
// Prevent the object from being placed on the
25+
// finalization queue
26+
System.GC.SuppressFinalize(this);
27+
}
28+
29+
~OfflineSpeechDenoiser()
30+
{
31+
Cleanup();
32+
}
33+
34+
private void Cleanup()
35+
{
36+
SherpaOnnxDestroyOfflineSpeechDenoiser(_handle.Handle);
37+
38+
// Don't permit the handle to be used again.
39+
_handle = new HandleRef(this, IntPtr.Zero);
40+
}
41+
42+
private HandleRef _handle;
43+
44+
public int SampleRate
45+
{
46+
get
47+
{
48+
return SherpaOnnxOfflineSpeechDenoiserGetSampleRate(_handle.Handle);
49+
}
50+
}
51+
52+
[DllImport(Dll.Filename)]
53+
private static extern IntPtr SherpaOnnxCreateOfflineSpeechDenoiser(ref OfflineSpeechDenoiserConfig config);
54+
55+
[DllImport(Dll.Filename)]
56+
private static extern void SherpaOnnxDestroyOfflineSpeechDenoiser(IntPtr handle);
57+
58+
[DllImport(Dll.Filename)]
59+
private static extern int SherpaOnnxOfflineSpeechDenoiserGetSampleRate(IntPtr handle);
60+
61+
[DllImport(Dll.Filename)]
62+
private static extern IntPtr SherpaOnnxOfflineSpeechDenoiserRun(IntPtr handle, float[] samples, int n, int sampleRate);
63+
}
64+
}
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,16 @@
1+
/// Copyright (c) 2025 Xiaomi Corporation (authors: Fangjun Kuang)
2+
3+
using System.Runtime.InteropServices;
4+
5+
namespace SherpaOnnx
6+
{
7+
[StructLayout(LayoutKind.Sequential)]
8+
public struct OfflineSpeechDenoiserConfig
9+
{
10+
public OfflineSpeechDenoiserConfig()
11+
{
12+
Model = new OfflineSpeechDenoiserModelConfig();
13+
}
14+
public OfflineSpeechDenoiserModelConfig Model;
15+
}
16+
}
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,17 @@
1+
/// Copyright (c) 2025 Xiaomi Corporation (authors: Fangjun Kuang)
2+
3+
using System.Runtime.InteropServices;
4+
5+
namespace SherpaOnnx
6+
{
7+
[StructLayout(LayoutKind.Sequential)]
8+
public struct OfflineSpeechDenoiserGtcrnModelConfig
9+
{
10+
public OfflineSpeechDenoiserGtcrnModelConfig()
11+
{
12+
Model = "";
13+
}
14+
[MarshalAs(UnmanagedType.LPStr)]
15+
public string Model;
16+
}
17+
}
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,27 @@
1+
/// Copyright (c) 2025 Xiaomi Corporation (authors: Fangjun Kuang)
2+
3+
using System.Runtime.InteropServices;
4+
5+
namespace SherpaOnnx
6+
{
7+
[StructLayout(LayoutKind.Sequential)]
8+
public struct OfflineSpeechDenoiserModelConfig
9+
{
10+
public OfflineSpeechDenoiserModelConfig()
11+
{
12+
Gtcrn = new OfflineSpeechDenoiserGtcrnModelConfig();
13+
NumThreads = 1;
14+
Debug = 0;
15+
Provider = "cpu";
16+
}
17+
18+
public OfflineSpeechDenoiserGtcrnModelConfig Gtcrn;
19+
20+
public int NumThreads;
21+
22+
public int Debug;
23+
24+
[MarshalAs(UnmanagedType.LPStr)]
25+
public string Provider;
26+
}
27+
}

0 commit comments

Comments
 (0)