Skip to content

Commit 9b0c683

Browse files
authored
Wrap punctuation APIs to C#. (k2-fsa#945)
1 parent 0337b93 commit 9b0c683

31 files changed

+263
-55
lines changed

.github/scripts/test-dot-net.sh

+4-1
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,10 @@
22

33
cd dotnet-examples/
44

5-
cd speaker-identification
5+
cd offline-punctuation
6+
./run.sh
7+
8+
cd ../speaker-identification
69
./run.sh
710

811
cd ../streaming-hlg-decoding/

.github/workflows/test-dot-net.yaml

+1
Original file line numberDiff line numberDiff line change
@@ -196,6 +196,7 @@ jobs:
196196
cp -v scripts/dotnet/examples/spoken-language-identification.csproj dotnet-examples/spoken-language-identification/
197197
cp -v scripts/dotnet/examples/streaming-hlg-decoding.csproj dotnet-examples/streaming-hlg-decoding
198198
cp -v scripts/dotnet/examples/speaker-identification.csproj dotnet-examples/speaker-identification
199+
cp -v scripts/dotnet/examples/offline-punctuation.csproj dotnet-examples/offline-punctuation
199200
200201
ls -lh /tmp
201202
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,45 @@
1+
// Copyright (c) 2024 Xiaomi Corporation
2+
//
3+
// This file shows how to add punctuations to text.
4+
//
5+
// 1. Download a model from
6+
// https://github.com/k2-fsa/sherpa-onnx/releases/tag/punctuation-models
7+
//
8+
// wget https://github.com/k2-fsa/sherpa-onnx/releases/download/punctuation-models/sherpa-onnx-punct-ct-transformer-zh-en-vocab272727-2024-04-12.tar.bz2
9+
//
10+
// 3. Now run it
11+
//
12+
// dotnet run
13+
14+
using SherpaOnnx;
15+
using System.Collections.Generic;
16+
using System;
17+
18+
class OfflinePunctuationDemo
19+
{
20+
21+
static void Main(string[] args)
22+
{
23+
var config = new OfflinePunctuationConfig();
24+
config.Model.CtTransformer = "./sherpa-onnx-punct-ct-transformer-zh-en-vocab272727-2024-04-12/model.onnx";
25+
config.Model.Debug = 1;
26+
config.Model.NumThreads = 1;
27+
var punct = new OfflinePunctuation(config);
28+
29+
string[] textList = new string[] {
30+
"这是一个测试你好吗How are you我很好thank you are you ok谢谢你",
31+
"我们都是木头人不会说话不会动",
32+
"The African blogosphere is rapidly expanding bringing more voices online in the form of commentaries opinions analyses rants and poetry",
33+
};
34+
35+
Console.WriteLine("---------");
36+
foreach (string text in textList)
37+
{
38+
string textWithPunct = punct.AddPunct(text);
39+
Console.WriteLine("Input text: {0}", text);
40+
Console.WriteLine("Output text: {0}", textWithPunct);
41+
Console.WriteLine("---------");
42+
}
43+
}
44+
}
45+
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,15 @@
1+
<Project Sdk="Microsoft.NET.Sdk">
2+
3+
<PropertyGroup>
4+
<OutputType>Exe</OutputType>
5+
<TargetFramework>net6.0</TargetFramework>
6+
<RootNamespace>offline_punctuation</RootNamespace>
7+
<ImplicitUsings>enable</ImplicitUsings>
8+
<Nullable>enable</Nullable>
9+
</PropertyGroup>
10+
11+
<ItemGroup>
12+
<PackageReference Include="org.k2fsa.sherpa.onnx" Version="*" />
13+
</ItemGroup>
14+
15+
</Project>
+11
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,11 @@
1+
#!/usr/bin/env bash
2+
3+
set -ex
4+
5+
if [ ! -e ./sherpa-onnx-punct-ct-transformer-zh-en-vocab272727-2024-04-12/model.onnx ]; then
6+
curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/punctuation-models/sherpa-onnx-punct-ct-transformer-zh-en-vocab272727-2024-04-12.tar.bz2
7+
tar xvf sherpa-onnx-punct-ct-transformer-zh-en-vocab272727-2024-04-12.tar.bz2
8+
rm sherpa-onnx-punct-ct-transformer-zh-en-vocab272727-2024-04-12.tar.bz2
9+
fi
10+
11+
dotnet run

dotnet-examples/sherpa-onnx.sln

+6
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,8 @@ Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "streaming-hlg-decoding", "s
1919
EndProject
2020
Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "speaker-identification", "speaker-identification\speaker-identification.csproj", "{2B1B140E-A92F-426B-B0DF-5D916B67304F}"
2121
EndProject
22+
Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "offline-punctuation", "offline-punctuation\offline-punctuation.csproj", "{42D85582-BB63-4259-A4EA-837D66AC078B}"
23+
EndProject
2224
Global
2325
GlobalSection(SolutionConfigurationPlatforms) = preSolution
2426
Debug|Any CPU = Debug|Any CPU
@@ -60,5 +62,9 @@ Global
6062
{2B1B140E-A92F-426B-B0DF-5D916B67304F}.Debug|Any CPU.Build.0 = Debug|Any CPU
6163
{2B1B140E-A92F-426B-B0DF-5D916B67304F}.Release|Any CPU.ActiveCfg = Release|Any CPU
6264
{2B1B140E-A92F-426B-B0DF-5D916B67304F}.Release|Any CPU.Build.0 = Release|Any CPU
65+
{42D85582-BB63-4259-A4EA-837D66AC078B}.Debug|Any CPU.ActiveCfg = Debug|Any CPU
66+
{42D85582-BB63-4259-A4EA-837D66AC078B}.Debug|Any CPU.Build.0 = Debug|Any CPU
67+
{42D85582-BB63-4259-A4EA-837D66AC078B}.Release|Any CPU.ActiveCfg = Release|Any CPU
68+
{42D85582-BB63-4259-A4EA-837D66AC078B}.Release|Any CPU.Build.0 = Release|Any CPU
6369
EndGlobalSection
6470
EndGlobal

scripts/dotnet/FeatureConfig.cs

+1-1
Original file line numberDiff line numberDiff line change
@@ -29,4 +29,4 @@ public FeatureConfig()
2929
public int FeatureDim;
3030
}
3131

32-
}
32+
}

scripts/dotnet/OfflineLMConfig.cs

+1-3
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,6 @@
88

99
namespace SherpaOnnx
1010
{
11-
1211
[StructLayout(LayoutKind.Sequential)]
1312
public struct OfflineLMConfig
1413
{
@@ -22,5 +21,4 @@ public OfflineLMConfig()
2221

2322
public float Scale;
2423
}
25-
26-
}
24+
}

scripts/dotnet/OfflineModelConfig.cs

+1-4
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,6 @@
88

99
namespace SherpaOnnx
1010
{
11-
1211
[StructLayout(LayoutKind.Sequential)]
1312
public struct OfflineModelConfig
1413
{
@@ -44,6 +43,4 @@ public OfflineModelConfig()
4443
[MarshalAs(UnmanagedType.LPStr)]
4544
public string ModelType;
4645
}
47-
48-
49-
}
46+
}

scripts/dotnet/OfflineNemoEncDecCtcModelConfig.cs

+1-2
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,6 @@
88

99
namespace SherpaOnnx
1010
{
11-
1211
[StructLayout(LayoutKind.Sequential)]
1312
public struct OfflineNemoEncDecCtcModelConfig
1413
{
@@ -19,4 +18,4 @@ public OfflineNemoEncDecCtcModelConfig()
1918
[MarshalAs(UnmanagedType.LPStr)]
2019
public string Model;
2120
}
22-
}
21+
}

scripts/dotnet/OfflineParaformerModelConfig.cs

+1-2
Original file line numberDiff line numberDiff line change
@@ -18,5 +18,4 @@ public OfflineParaformerModelConfig()
1818
[MarshalAs(UnmanagedType.LPStr)]
1919
public string Model;
2020
}
21-
22-
}
21+
}

scripts/dotnet/OfflinePunctuation.cs

+88
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,88 @@
1+
/// Copyright (c) 2024 Xiaomi Corporation (authors: Fangjun Kuang)
2+
3+
using System.Linq;
4+
using System.Collections.Generic;
5+
using System.Runtime.InteropServices;
6+
using System.Text;
7+
using System;
8+
9+
namespace SherpaOnnx
10+
{
11+
public class OfflinePunctuation : IDisposable
12+
{
13+
public OfflinePunctuation(OfflinePunctuationConfig config)
14+
{
15+
IntPtr h = SherpaOnnxCreateOfflinePunctuation(ref config);
16+
_handle = new HandleRef(this, h);
17+
}
18+
19+
public String AddPunct(String text)
20+
{
21+
IntPtr p = SherpaOfflinePunctuationAddPunct(_handle.Handle, text);
22+
23+
string s = "";
24+
int length = 0;
25+
26+
unsafe
27+
{
28+
byte* b = (byte*)p;
29+
if (b != null)
30+
{
31+
while (*b != 0)
32+
{
33+
++b;
34+
length += 1;
35+
}
36+
}
37+
}
38+
39+
if (length > 0)
40+
{
41+
byte[] stringBuffer = new byte[length];
42+
Marshal.Copy(p, stringBuffer, 0, length);
43+
s = Encoding.UTF8.GetString(stringBuffer);
44+
}
45+
46+
SherpaOfflinePunctuationFreeText(p);
47+
48+
return s;
49+
}
50+
51+
public void Dispose()
52+
{
53+
Cleanup();
54+
// Prevent the object from being placed on the
55+
// finalization queue
56+
System.GC.SuppressFinalize(this);
57+
}
58+
59+
~OfflinePunctuation()
60+
{
61+
Cleanup();
62+
}
63+
64+
private void Cleanup()
65+
{
66+
SherpaOnnxDestroyOfflinePunctuation(_handle.Handle);
67+
68+
// Don't permit the handle to be used again.
69+
_handle = new HandleRef(this, IntPtr.Zero);
70+
}
71+
72+
private HandleRef _handle;
73+
74+
75+
[DllImport(Dll.Filename)]
76+
private static extern IntPtr SherpaOnnxCreateOfflinePunctuation(ref OfflinePunctuationConfig config);
77+
78+
[DllImport(Dll.Filename)]
79+
private static extern void SherpaOnnxDestroyOfflinePunctuation(IntPtr handle);
80+
81+
[DllImport(Dll.Filename)]
82+
private static extern IntPtr SherpaOfflinePunctuationAddPunct(IntPtr handle, [MarshalAs(UnmanagedType.LPStr)] string text);
83+
84+
[DllImport(Dll.Filename)]
85+
private static extern void SherpaOfflinePunctuationFreeText(IntPtr p);
86+
}
87+
}
88+
+21
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,21 @@
1+
/// Copyright (c) 2024 Xiaomi Corporation (authors: Fangjun Kuang)
2+
3+
using System.Linq;
4+
using System.Collections.Generic;
5+
using System.Runtime.InteropServices;
6+
using System.Text;
7+
using System;
8+
9+
namespace SherpaOnnx
10+
{
11+
[StructLayout(LayoutKind.Sequential)]
12+
public struct OfflinePunctuationConfig
13+
{
14+
public OfflinePunctuationConfig()
15+
{
16+
Model = new OfflinePunctuationModelConfig();
17+
}
18+
public OfflinePunctuationModelConfig Model;
19+
}
20+
}
21+
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,32 @@
1+
/// Copyright (c) 2024 Xiaomi Corporation (authors: Fangjun Kuang)
2+
3+
using System.Linq;
4+
using System.Collections.Generic;
5+
using System.Runtime.InteropServices;
6+
using System.Text;
7+
using System;
8+
9+
namespace SherpaOnnx
10+
{
11+
[StructLayout(LayoutKind.Sequential)]
12+
public struct OfflinePunctuationModelConfig
13+
{
14+
public OfflinePunctuationModelConfig()
15+
{
16+
CtTransformer = "";
17+
NumThreads = 1;
18+
Debug = 0;
19+
Provider = "cpu";
20+
}
21+
22+
[MarshalAs(UnmanagedType.LPStr)]
23+
public string CtTransformer;
24+
25+
public int NumThreads;
26+
27+
public int Debug;
28+
29+
[MarshalAs(UnmanagedType.LPStr)]
30+
public string Provider;
31+
}
32+
}

scripts/dotnet/OfflineRecognizer.cs

+1-2
Original file line numberDiff line numberDiff line change
@@ -72,5 +72,4 @@ private void Cleanup()
7272
[DllImport(Dll.Filename, EntryPoint = "DecodeMultipleOfflineStreams")]
7373
private static extern void Decode(IntPtr handle, IntPtr[] streams, int n);
7474
}
75-
76-
}
75+
}

scripts/dotnet/OfflineRecognizerConfig.cs

+1-4
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,6 @@
88

99
namespace SherpaOnnx
1010
{
11-
1211
[StructLayout(LayoutKind.Sequential)]
1312
public struct OfflineRecognizerConfig
1413
{
@@ -38,6 +37,4 @@ public OfflineRecognizerConfig()
3837

3938
public float HotwordsScore;
4039
}
41-
42-
43-
}
40+
}

scripts/dotnet/OfflineRecognizerResult.cs

+1-4
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,6 @@
88

99
namespace SherpaOnnx
1010
{
11-
1211
public class OfflineRecognizerResult
1312
{
1413
public OfflineRecognizerResult(IntPtr handle)
@@ -44,6 +43,4 @@ struct Impl
4443
private String _text;
4544
public String Text => _text;
4645
}
47-
48-
49-
}
46+
}

scripts/dotnet/OfflineStream.cs

+1-3
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,6 @@
88

99
namespace SherpaOnnx
1010
{
11-
1211
public class OfflineStream : IDisposable
1312
{
1413
public OfflineStream(IntPtr p)
@@ -68,5 +67,4 @@ private void Cleanup()
6867
[DllImport(Dll.Filename, EntryPoint = "DestroyOfflineRecognizerResult")]
6968
private static extern void DestroyResult(IntPtr handle);
7069
}
71-
72-
}
70+
}

scripts/dotnet/OfflineTdnnModelConfig.cs

+1-2
Original file line numberDiff line numberDiff line change
@@ -18,5 +18,4 @@ public OfflineTdnnModelConfig()
1818
[MarshalAs(UnmanagedType.LPStr)]
1919
public string Model;
2020
}
21-
22-
}
21+
}

scripts/dotnet/OfflineTransducerModelConfig.cs

+1-2
Original file line numberDiff line numberDiff line change
@@ -26,5 +26,4 @@ public OfflineTransducerModelConfig()
2626
[MarshalAs(UnmanagedType.LPStr)]
2727
public string Joiner;
2828
}
29-
30-
}
29+
}

0 commit comments

Comments
 (0)