Skip to content

Commit 97d3ec0

Browse files
authored
Add C++ support for non-streaming NeMo fast conformer hybrid transducer ctc (the ctc branch) (k2-fsa#848)
1 parent 94993a0 commit 97d3ec0

8 files changed

+155
-33
lines changed

.github/scripts/test-offline-ctc.sh

+99-2
Original file line numberDiff line numberDiff line change
@@ -13,14 +13,111 @@ echo "PATH: $PATH"
1313

1414
which $EXE
1515

16+
log "-----------------------------------------------------------------"
17+
log "Run Nemo fast conformer hybrid transducer ctc models (CTC branch)"
18+
log "-----------------------------------------------------------------"
19+
20+
url=https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-nemo-fast-conformer-ctc-be-de-en-es-fr-hr-it-pl-ru-uk-20k.tar.bz2
21+
name=$(basename $url)
22+
curl -SL -O $url
23+
tar xvf $name
24+
rm $name
25+
repo=$(basename -s .tar.bz2 $name)
26+
ls -lh $repo
27+
28+
log "test $repo"
29+
test_wavs=(
30+
de-german.wav
31+
es-spanish.wav
32+
hr-croatian.wav
33+
po-polish.wav
34+
uk-ukrainian.wav
35+
en-english.wav
36+
fr-french.wav
37+
it-italian.wav
38+
ru-russian.wav
39+
)
40+
for w in ${test_wavs[@]}; do
41+
time $EXE \
42+
--tokens=$repo/tokens.txt \
43+
--nemo-ctc-model=$repo/model.onnx \
44+
--debug=1 \
45+
$repo/test_wavs/$w
46+
done
47+
48+
rm -rf $repo
49+
50+
url=https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-nemo-fast-conformer-ctc-en-24500.tar.bz2
51+
name=$(basename $url)
52+
curl -SL -O $url
53+
tar xvf $name
54+
rm $name
55+
repo=$(basename -s .tar.bz2 $name)
56+
ls -lh $repo
57+
58+
log "Test $repo"
59+
60+
time $EXE \
61+
--tokens=$repo/tokens.txt \
62+
--nemo-ctc-model=$repo/model.onnx \
63+
--debug=1 \
64+
$repo/test_wavs/en-english.wav
65+
66+
rm -rf $repo
67+
68+
url=https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-nemo-fast-conformer-ctc-es-1424.tar.bz2
69+
name=$(basename $url)
70+
curl -SL -O $url
71+
tar xvf $name
72+
rm $name
73+
repo=$(basename -s .tar.bz2 $name)
74+
ls -lh $repo
75+
76+
log "test $repo"
77+
78+
time $EXE \
79+
--tokens=$repo/tokens.txt \
80+
--nemo-ctc-model=$repo/model.onnx \
81+
--debug=1 \
82+
$repo/test_wavs/es-spanish.wav
83+
84+
rm -rf $repo
85+
86+
url=https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-nemo-fast-conformer-ctc-en-de-es-fr-14288.tar.bz2
87+
name=$(basename $url)
88+
curl -SL -O $url
89+
tar xvf $name
90+
rm $name
91+
repo=$(basename -s .tar.bz2 $name)
92+
ls -lh $repo
93+
94+
log "Test $repo"
95+
96+
test_wavs=(
97+
en-english.wav
98+
de-german.wav
99+
fr-french.wav
100+
es-spanish.wav
101+
)
102+
103+
for w in ${test_wavs[@]}; do
104+
time $EXE \
105+
--tokens=$repo/tokens.txt \
106+
--nemo-ctc-model=$repo/model.onnx \
107+
--debug=1 \
108+
$repo/test_wavs/$w
109+
done
110+
111+
rm -rf $repo
112+
16113
log "------------------------------------------------------------"
17114
log "Run Wenet models"
18115
log "------------------------------------------------------------"
19116
wenet_models=(
20117
sherpa-onnx-zh-wenet-aishell
21-
sherpa-onnx-zh-wenet-aishell2
118+
# sherpa-onnx-zh-wenet-aishell2
22119
# sherpa-onnx-zh-wenet-wenetspeech
23-
sherpa-onnx-zh-wenet-multi-cn
120+
# sherpa-onnx-zh-wenet-multi-cn
24121
sherpa-onnx-en-wenet-librispeech
25122
# sherpa-onnx-en-wenet-gigaspeech
26123
)

.github/scripts/test-spoken-language-identification.sh

+7-2
Original file line numberDiff line numberDiff line change
@@ -62,6 +62,11 @@ for wav in ${waves[@]}; do
6262
ls -lh *.wav
6363
done
6464

65+
curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/spoken-language-identification-test-wavs.tar.bz2
66+
tar xvf spoken-language-identification-test-wavs.tar.bz2
67+
rm spoken-language-identification-test-wavs.tar.bz2
68+
data=spoken-language-identification-test-wavs
69+
6570
for name in ${names[@]}; do
6671
log "------------------------------------------------------------"
6772
log "Run $name"
@@ -85,14 +90,14 @@ for name in ${names[@]}; do
8590
time $EXE \
8691
--whisper-encoder=$repo/${name}-encoder.onnx \
8792
--whisper-decoder=$repo/${name}-decoder.onnx \
88-
$wav
93+
$data/$wav
8994

9095
log "test int8 onnx"
9196

9297
time $EXE \
9398
--whisper-encoder=$repo/${name}-encoder.int8.onnx \
9499
--whisper-decoder=$repo/${name}-decoder.int8.onnx \
95-
$wav
100+
$data/$wav
96101
done
97102
rm -rf $repo
98103
done

.github/workflows/linux.yaml

+16-18
Original file line numberDiff line numberDiff line change
@@ -128,13 +128,13 @@ jobs:
128128
name: release-${{ matrix.build_type }}-with-shared-lib-${{ matrix.shared_lib }}-with-tts-${{ matrix.with_tts }}
129129
path: install/*
130130

131-
- name: Test offline punctuation
131+
- name: Test spoken language identification (C++ API)
132132
shell: bash
133133
run: |
134134
export PATH=$PWD/build/bin:$PATH
135-
export EXE=sherpa-onnx-offline-punctuation
135+
export EXE=sherpa-onnx-offline-language-identification
136136
137-
.github/scripts/test-offline-punctuation.sh
137+
.github/scripts/test-spoken-language-identification.sh
138138
139139
- name: Test C API
140140
shell: bash
@@ -147,13 +147,13 @@ jobs:
147147
148148
.github/scripts/test-c-api.sh
149149
150-
- name: Test Audio tagging
150+
- name: Test offline CTC
151151
shell: bash
152152
run: |
153153
export PATH=$PWD/build/bin:$PATH
154-
export EXE=sherpa-onnx-offline-audio-tagging
154+
export EXE=sherpa-onnx-offline
155155
156-
.github/scripts/test-audio-tagging.sh
156+
.github/scripts/test-offline-ctc.sh
157157
158158
- name: Test online CTC
159159
shell: bash
@@ -163,14 +163,21 @@ jobs:
163163
164164
.github/scripts/test-online-ctc.sh
165165
166+
- name: Test offline punctuation
167+
shell: bash
168+
run: |
169+
export PATH=$PWD/build/bin:$PATH
170+
export EXE=sherpa-onnx-offline-punctuation
171+
172+
.github/scripts/test-offline-punctuation.sh
166173
167-
- name: Test spoken language identification (C++ API)
174+
- name: Test Audio tagging
168175
shell: bash
169176
run: |
170177
export PATH=$PWD/build/bin:$PATH
171-
export EXE=sherpa-onnx-offline-language-identification
178+
export EXE=sherpa-onnx-offline-audio-tagging
172179
173-
.github/scripts/test-spoken-language-identification.sh
180+
.github/scripts/test-audio-tagging.sh
174181
175182
- name: Test transducer kws
176183
shell: bash
@@ -180,7 +187,6 @@ jobs:
180187
181188
.github/scripts/test-kws.sh
182189
183-
184190
- name: Test offline Whisper
185191
if: matrix.build_type != 'Debug'
186192
shell: bash
@@ -192,14 +198,6 @@ jobs:
192198
193199
.github/scripts/test-offline-whisper.sh
194200
195-
- name: Test offline CTC
196-
shell: bash
197-
run: |
198-
export PATH=$PWD/build/bin:$PATH
199-
export EXE=sherpa-onnx-offline
200-
201-
.github/scripts/test-offline-ctc.sh
202-
203201
- name: Test offline TTS
204202
if: matrix.with_tts == 'ON'
205203
shell: bash

.github/workflows/macos.yaml

+8-8
Original file line numberDiff line numberDiff line change
@@ -107,6 +107,14 @@ jobs:
107107
otool -L build/bin/sherpa-onnx
108108
otool -l build/bin/sherpa-onnx
109109
110+
- name: Test online CTC
111+
shell: bash
112+
run: |
113+
export PATH=$PWD/build/bin:$PATH
114+
export EXE=sherpa-onnx
115+
116+
.github/scripts/test-online-ctc.sh
117+
110118
- name: Test offline punctuation
111119
shell: bash
112120
run: |
@@ -150,14 +158,6 @@ jobs:
150158
151159
.github/scripts/test-kws.sh
152160
153-
- name: Test online CTC
154-
shell: bash
155-
run: |
156-
export PATH=$PWD/build/bin:$PATH
157-
export EXE=sherpa-onnx
158-
159-
.github/scripts/test-online-ctc.sh
160-
161161
- name: Test offline TTS
162162
if: matrix.with_tts == 'ON'
163163
shell: bash

sherpa-onnx/csrc/offline-ctc-model.cc

+13-1
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,7 @@ namespace {
2020

2121
enum class ModelType {
2222
kEncDecCTCModelBPE,
23+
kEncDecHybridRNNTCTCBPEModel,
2324
kTdnn,
2425
kZipformerCtc,
2526
kWenetCtc,
@@ -55,7 +56,10 @@ static ModelType GetModelType(char *model_data, size_t model_data_length,
5556
"No model_type in the metadata!\n"
5657
"If you are using models from NeMo, please refer to\n"
5758
"https://huggingface.co/csukuangfj/"
58-
"sherpa-onnx-nemo-ctc-en-citrinet-512/blob/main/add-model-metadata.py"
59+
"sherpa-onnx-nemo-ctc-en-citrinet-512/blob/main/add-model-metadata.py\n"
60+
"or "
61+
"https://github.com/k2-fsa/sherpa-onnx/tree/master/scripts/nemo/"
62+
"fast-conformer-hybrid-transducer-ctc\n"
5963
"If you are using models from WeNet, please refer to\n"
6064
"https://github.com/k2-fsa/sherpa-onnx/blob/master/scripts/wenet/"
6165
"run.sh\n"
@@ -66,6 +70,8 @@ static ModelType GetModelType(char *model_data, size_t model_data_length,
6670

6771
if (model_type.get() == std::string("EncDecCTCModelBPE")) {
6872
return ModelType::kEncDecCTCModelBPE;
73+
} else if (model_type.get() == std::string("EncDecHybridRNNTCTCBPEModel")) {
74+
return ModelType::kEncDecHybridRNNTCTCBPEModel;
6975
} else if (model_type.get() == std::string("tdnn")) {
7076
return ModelType::kTdnn;
7177
} else if (model_type.get() == std::string("zipformer2_ctc")) {
@@ -106,6 +112,9 @@ std::unique_ptr<OfflineCtcModel> OfflineCtcModel::Create(
106112
case ModelType::kEncDecCTCModelBPE:
107113
return std::make_unique<OfflineNemoEncDecCtcModel>(config);
108114
break;
115+
case ModelType::kEncDecHybridRNNTCTCBPEModel:
116+
return std::make_unique<OfflineNemoEncDecHybridRNNTCTCBPEModel>(config);
117+
break;
109118
case ModelType::kTdnn:
110119
return std::make_unique<OfflineTdnnCtcModel>(config);
111120
break;
@@ -153,6 +162,9 @@ std::unique_ptr<OfflineCtcModel> OfflineCtcModel::Create(
153162
case ModelType::kEncDecCTCModelBPE:
154163
return std::make_unique<OfflineNemoEncDecCtcModel>(mgr, config);
155164
break;
165+
case ModelType::kEncDecHybridRNNTCTCBPEModel:
166+
return std::make_unique<OfflineNemoEncDecHybridRNNTCTCBPEModel>(config);
167+
break;
156168
case ModelType::kTdnn:
157169
return std::make_unique<OfflineTdnnCtcModel>(mgr, config);
158170
break;

sherpa-onnx/csrc/offline-nemo-enc-dec-ctc-model.h

+2
Original file line numberDiff line numberDiff line change
@@ -81,6 +81,8 @@ class OfflineNemoEncDecCtcModel : public OfflineCtcModel {
8181
std::unique_ptr<Impl> impl_;
8282
};
8383

84+
using OfflineNemoEncDecHybridRNNTCTCBPEModel = OfflineNemoEncDecCtcModel;
85+
8486
} // namespace sherpa_onnx
8587

8688
#endif // SHERPA_ONNX_CSRC_OFFLINE_NEMO_ENC_DEC_CTC_MODEL_H_

sherpa-onnx/csrc/offline-recognizer-impl.cc

+6-2
Original file line numberDiff line numberDiff line change
@@ -122,7 +122,8 @@ std::unique_ptr<OfflineRecognizerImpl> OfflineRecognizerImpl::Create(
122122
return std::make_unique<OfflineRecognizerParaformerImpl>(config);
123123
}
124124

125-
if (model_type == "EncDecCTCModelBPE" || model_type == "tdnn" ||
125+
if (model_type == "EncDecCTCModelBPE" ||
126+
model_type == "EncDecHybridRNNTCTCBPEModel" || model_type == "tdnn" ||
126127
model_type == "zipformer2_ctc" || model_type == "wenet_ctc") {
127128
return std::make_unique<OfflineRecognizerCtcImpl>(config);
128129
}
@@ -137,6 +138,7 @@ std::unique_ptr<OfflineRecognizerImpl> OfflineRecognizerImpl::Create(
137138
" - Non-streaming transducer models from icefall\n"
138139
" - Non-streaming Paraformer models from FunASR\n"
139140
" - EncDecCTCModelBPE models from NeMo\n"
141+
" - EncDecHybridRNNTCTCBPEModel models from NeMo\n"
140142
" - Whisper models\n"
141143
" - Tdnn models\n"
142144
" - Zipformer CTC models\n"
@@ -252,7 +254,8 @@ std::unique_ptr<OfflineRecognizerImpl> OfflineRecognizerImpl::Create(
252254
return std::make_unique<OfflineRecognizerParaformerImpl>(mgr, config);
253255
}
254256

255-
if (model_type == "EncDecCTCModelBPE" || model_type == "tdnn" ||
257+
if (model_type == "EncDecCTCModelBPE" ||
258+
model_type == "EncDecHybridRNNTCTCBPEModel" || model_type == "tdnn" ||
256259
model_type == "zipformer2_ctc" || model_type == "wenet_ctc") {
257260
return std::make_unique<OfflineRecognizerCtcImpl>(mgr, config);
258261
}
@@ -267,6 +270,7 @@ std::unique_ptr<OfflineRecognizerImpl> OfflineRecognizerImpl::Create(
267270
" - Non-streaming transducer models from icefall\n"
268271
" - Non-streaming Paraformer models from FunASR\n"
269272
" - EncDecCTCModelBPE models from NeMo\n"
273+
" - EncDecHybridRNNTCTCBPEModel models from NeMo\n"
270274
" - Whisper models\n"
271275
" - Tdnn models\n"
272276
" - Zipformer CTC models\n"

sherpa-onnx/csrc/symbol-table.cc

+4
Original file line numberDiff line numberDiff line change
@@ -67,9 +67,13 @@ void SymbolTable::Init(std::istream &is) {
6767
// the following check.
6868
//
6969
// Note: Only id2sym_ matters as we use it to convert ID to symbols.
70+
#if 0
71+
// we disable the test here since for some multi-lingual BPE models
72+
// from NeMo, the same symbol can appear multiple times with different IDs.
7073
if (sym != " ") {
7174
assert(sym2id_.count(sym) == 0);
7275
}
76+
#endif
7377

7478
assert(id2sym_.count(id) == 0);
7579

0 commit comments

Comments
 (0)