Skip to content

Commit df319fa

Browse files
authored
Swift API for keyword spotting. (k2-fsa#1027)
1 parent f8b717e commit df319fa

File tree

5 files changed

+230
-0
lines changed

5 files changed

+230
-0
lines changed

.github/scripts/test-swift.sh

+4
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,10 @@ echo "pwd: $PWD"
77
cd swift-api-examples
88
ls -lh
99

10+
./run-keyword-spotting-from-file.sh
11+
rm ./keyword-spotting-from-file
12+
rm -rf sherpa-onnx-kws-*
13+
1014
./run-streaming-hlg-decode-file.sh
1115
rm ./streaming-hlg-decode-file
1216
rm -rf sherpa-onnx-streaming-zipformer-ctc-small-2024-03-18

swift-api-examples/.gitignore

+1
Original file line numberDiff line numberDiff line change
@@ -8,3 +8,4 @@ sherpa-onnx-paraformer-zh-2023-09-14
88
!*.sh
99
*.bak
1010
streaming-hlg-decode-file
11+
keyword-spotting-from-file

swift-api-examples/SherpaOnnx.swift

+108
Original file line numberDiff line numberDiff line change
@@ -832,3 +832,111 @@ class SherpaOnnxSpokenLanguageIdentificationWrapper {
832832
return SherpaOnnxSpokenLanguageIdentificationResultWrapper(result: result)
833833
}
834834
}
835+
836+
// keyword spotting
837+
838+
class SherpaOnnxKeywordResultWrapper {
839+
/// A pointer to the underlying counterpart in C
840+
let result: UnsafePointer<SherpaOnnxKeywordResult>!
841+
842+
var keyword: String {
843+
return String(cString: result.pointee.keyword)
844+
}
845+
846+
var count: Int32 {
847+
return result.pointee.count
848+
}
849+
850+
var tokens: [String] {
851+
if let tokensPointer = result.pointee.tokens_arr {
852+
var tokens: [String] = []
853+
for index in 0..<count {
854+
if let tokenPointer = tokensPointer[Int(index)] {
855+
let token = String(cString: tokenPointer)
856+
tokens.append(token)
857+
}
858+
}
859+
return tokens
860+
} else {
861+
let tokens: [String] = []
862+
return tokens
863+
}
864+
}
865+
866+
init(result: UnsafePointer<SherpaOnnxKeywordResult>!) {
867+
self.result = result
868+
}
869+
870+
deinit {
871+
if let result {
872+
DestroyKeywordResult(result)
873+
}
874+
}
875+
}
876+
877+
func sherpaOnnxKeywordSpotterConfig(
878+
featConfig: SherpaOnnxFeatureConfig,
879+
modelConfig: SherpaOnnxOnlineModelConfig,
880+
keywordsFile: String,
881+
maxActivePaths: Int = 4,
882+
numTrailingBlanks: Int = 1,
883+
keywordsScore: Float = 1.0,
884+
keywordsThreshold: Float = 0.25
885+
) -> SherpaOnnxKeywordSpotterConfig {
886+
return SherpaOnnxKeywordSpotterConfig(
887+
feat_config: featConfig,
888+
model_config: modelConfig,
889+
max_active_paths: Int32(maxActivePaths),
890+
num_trailing_blanks: Int32(numTrailingBlanks),
891+
keywords_score: keywordsScore,
892+
keywords_threshold: keywordsThreshold,
893+
keywords_file: toCPointer(keywordsFile)
894+
)
895+
}
896+
897+
class SherpaOnnxKeywordSpotterWrapper {
898+
/// A pointer to the underlying counterpart in C
899+
let spotter: OpaquePointer!
900+
var stream: OpaquePointer!
901+
902+
init(
903+
config: UnsafePointer<SherpaOnnxKeywordSpotterConfig>!
904+
) {
905+
spotter = CreateKeywordSpotter(config)
906+
stream = CreateKeywordStream(spotter)
907+
}
908+
909+
deinit {
910+
if let stream {
911+
DestroyOnlineStream(stream)
912+
}
913+
914+
if let spotter {
915+
DestroyKeywordSpotter(spotter)
916+
}
917+
}
918+
919+
func acceptWaveform(samples: [Float], sampleRate: Int = 16000) {
920+
AcceptWaveform(stream, Int32(sampleRate), samples, Int32(samples.count))
921+
}
922+
923+
func isReady() -> Bool {
924+
return IsKeywordStreamReady(spotter, stream) == 1 ? true : false
925+
}
926+
927+
func decode() {
928+
DecodeKeywordStream(spotter, stream)
929+
}
930+
931+
func getResult() -> SherpaOnnxKeywordResultWrapper {
932+
let result: UnsafePointer<SherpaOnnxKeywordResult>? = GetKeywordResult(
933+
spotter, stream)
934+
return SherpaOnnxKeywordResultWrapper(result: result)
935+
}
936+
937+
/// Signal that no more audio samples would be available.
938+
/// After this call, you cannot call acceptWaveform() any more.
939+
func inputFinished() {
940+
InputFinished(stream)
941+
}
942+
}
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,83 @@
1+
import AVFoundation
2+
3+
extension AudioBuffer {
4+
func array() -> [Float] {
5+
return Array(UnsafeBufferPointer(self))
6+
}
7+
}
8+
9+
extension AVAudioPCMBuffer {
10+
func array() -> [Float] {
11+
return self.audioBufferList.pointee.mBuffers.array()
12+
}
13+
}
14+
15+
func run() {
16+
let filePath = "./sherpa-onnx-kws-zipformer-wenetspeech-3.3M-2024-01-01/test_wavs/3.wav"
17+
let encoder =
18+
"./sherpa-onnx-kws-zipformer-wenetspeech-3.3M-2024-01-01/encoder-epoch-12-avg-2-chunk-16-left-64.onnx"
19+
let decoder =
20+
"./sherpa-onnx-kws-zipformer-wenetspeech-3.3M-2024-01-01/decoder-epoch-12-avg-2-chunk-16-left-64.onnx"
21+
let joiner =
22+
"./sherpa-onnx-kws-zipformer-wenetspeech-3.3M-2024-01-01/joiner-epoch-12-avg-2-chunk-16-left-64.onnx"
23+
let tokens =
24+
"./sherpa-onnx-kws-zipformer-wenetspeech-3.3M-2024-01-01/tokens.txt"
25+
let keywordsFile =
26+
"./sherpa-onnx-kws-zipformer-wenetspeech-3.3M-2024-01-01/test_wavs/test_keywords.txt"
27+
let transducerConfig = sherpaOnnxOnlineTransducerModelConfig(
28+
encoder: encoder,
29+
decoder: decoder,
30+
joiner: joiner
31+
)
32+
33+
let modelConfig = sherpaOnnxOnlineModelConfig(
34+
tokens: tokens,
35+
transducer: transducerConfig
36+
)
37+
38+
let featConfig = sherpaOnnxFeatureConfig(
39+
sampleRate: 16000,
40+
featureDim: 80
41+
)
42+
var config = sherpaOnnxKeywordSpotterConfig(
43+
featConfig: featConfig,
44+
modelConfig: modelConfig,
45+
keywordsFile: keywordsFile
46+
)
47+
48+
let spotter = SherpaOnnxKeywordSpotterWrapper(config: &config)
49+
50+
let fileURL: NSURL = NSURL(fileURLWithPath: filePath)
51+
let audioFile = try! AVAudioFile(forReading: fileURL as URL)
52+
53+
let audioFormat = audioFile.processingFormat
54+
assert(audioFormat.sampleRate == 16000)
55+
assert(audioFormat.channelCount == 1)
56+
assert(audioFormat.commonFormat == AVAudioCommonFormat.pcmFormatFloat32)
57+
58+
let audioFrameCount = UInt32(audioFile.length)
59+
let audioFileBuffer = AVAudioPCMBuffer(pcmFormat: audioFormat, frameCapacity: audioFrameCount)
60+
61+
try! audioFile.read(into: audioFileBuffer!)
62+
let array: [Float]! = audioFileBuffer?.array()
63+
spotter.acceptWaveform(samples: array)
64+
65+
let tailPadding = [Float](repeating: 0.0, count: 3200)
66+
spotter.acceptWaveform(samples: tailPadding)
67+
68+
spotter.inputFinished()
69+
while spotter.isReady() {
70+
spotter.decode()
71+
let keyword = spotter.getResult().keyword
72+
if keyword != "" {
73+
print("Detected: \(keyword)")
74+
}
75+
}
76+
}
77+
78+
@main
79+
struct App {
80+
static func main() {
81+
run()
82+
}
83+
}
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,34 @@
1+
#!/usr/bin/env bash
2+
3+
set -ex
4+
5+
if [ ! -d ../build-swift-macos ]; then
6+
echo "Please run ../build-swift-macos.sh first!"
7+
exit 1
8+
fi
9+
10+
if [ ! -d ./sherpa-onnx-kws-zipformer-wenetspeech-3.3M-2024-01-01 ]; then
11+
curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/kws-models/sherpa-onnx-kws-zipformer-wenetspeech-3.3M-2024-01-01.tar.bz2
12+
tar xf sherpa-onnx-kws-zipformer-wenetspeech-3.3M-2024-01-01.tar.bz2
13+
rm sherpa-onnx-kws-zipformer-wenetspeech-3.3M-2024-01-01.tar.bz2
14+
fi
15+
16+
if [ ! -e ./keyword-spotting-from-file ]; then
17+
# Note: We use -lc++ to link against libc++ instead of libstdc++
18+
swiftc \
19+
-lc++ \
20+
-I ../build-swift-macos/install/include \
21+
-import-objc-header ./SherpaOnnx-Bridging-Header.h \
22+
./keyword-spotting-from-file.swift ./SherpaOnnx.swift \
23+
-L ../build-swift-macos/install/lib/ \
24+
-l sherpa-onnx \
25+
-l onnxruntime \
26+
-o keyword-spotting-from-file
27+
28+
strip keyword-spotting-from-file
29+
else
30+
echo "./keyword-spotting-from-file exists - skip building"
31+
fi
32+
33+
export DYLD_LIBRARY_PATH=$PWD/../build-swift-macos/install/lib:$DYLD_LIBRARY_PATH
34+
./keyword-spotting-from-file

0 commit comments

Comments
 (0)