Skip to content

Commit 08d7713

Browse files
authored
Add a byte-level BPE Chinese+English non-streaming zipformer model (#1645)
1 parent fe3265a commit 08d7713

File tree

5 files changed

+127
-0
lines changed

5 files changed

+127
-0
lines changed

.github/scripts/test-python.sh

+21
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,27 @@ log() {
88
echo -e "$(date '+%Y-%m-%d %H:%M:%S') (${fname}:${BASH_LINENO[0]}:${FUNCNAME[1]}) $*"
99
}
1010

11+
log "test offline zipformer (byte-level bpe, Chinese+English)"
12+
curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-zipformer-zh-en-2023-11-22.tar.bz2
13+
tar xvf sherpa-onnx-zipformer-zh-en-2023-11-22.tar.bz2
14+
rm sherpa-onnx-zipformer-zh-en-2023-11-22.tar.bz2
15+
16+
repo=sherpa-onnx-zipformer-zh-en-2023-11-22
17+
18+
./python-api-examples/offline-decode-files.py \
19+
--tokens=$repo/tokens.txt \
20+
--encoder=$repo/encoder-epoch-34-avg-19.int8.onnx \
21+
--decoder=$repo/decoder-epoch-34-avg-19.onnx \
22+
--joiner=$repo/joiner-epoch-34-avg-19.int8.onnx \
23+
--num-threads=2 \
24+
--decoding-method=greedy_search \
25+
--debug=true \
26+
$repo/test_wavs/0.wav \
27+
$repo/test_wavs/1.wav \
28+
$repo/test_wavs/2.wav
29+
30+
rm -rf sherpa-onnx-zipformer-zh-en-2023-11-22
31+
1132
log "test offline Moonshine"
1233

1334
curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-moonshine-tiny-en-int8.tar.bz2
+61
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,61 @@
1+
name: add-new-asr-models
2+
3+
on:
4+
# push:
5+
# branches:
6+
# - new-asr-models
7+
workflow_dispatch:
8+
9+
concurrency:
10+
group: add-new-asr-models-${{ github.ref }}
11+
cancel-in-progress: true
12+
13+
jobs:
14+
add-new-asr-models:
15+
runs-on: ${{ matrix.os }}
16+
name: New asr models
17+
strategy:
18+
fail-fast: false
19+
matrix:
20+
os: [ubuntu-latest]
21+
22+
steps:
23+
- uses: actions/checkout@v4
24+
with:
25+
fetch-depth: 0
26+
27+
- name: Download icefall-asr-zipformer-multi-zh-en-2023-11-22
28+
shell: bash
29+
run: |
30+
d=sherpa-onnx-zipformer-zh-en-2023-11-22
31+
mkdir $d
32+
pushd $d
33+
34+
wget -q https://huggingface.co/zrjin/icefall-asr-zipformer-multi-zh-en-2023-11-22/resolve/main/data/lang_bbpe_2000/tokens.txt
35+
wget -q https://huggingface.co/zrjin/icefall-asr-zipformer-multi-zh-en-2023-11-22/resolve/main/data/lang_bbpe_2000/bbpe.model
36+
wget -q https://huggingface.co/zrjin/icefall-asr-zipformer-multi-zh-en-2023-11-22/resolve/main/exp/decoder-epoch-34-avg-19.onnx
37+
wget -q https://huggingface.co/zrjin/icefall-asr-zipformer-multi-zh-en-2023-11-22/resolve/main/exp/encoder-epoch-34-avg-19.int8.onnx
38+
wget -q https://huggingface.co/zrjin/icefall-asr-zipformer-multi-zh-en-2023-11-22/resolve/main/exp/encoder-epoch-34-avg-19.onnx
39+
wget -q https://huggingface.co/zrjin/icefall-asr-zipformer-multi-zh-en-2023-11-22/resolve/main/exp/joiner-epoch-34-avg-19.int8.onnx
40+
wget -q https://huggingface.co/zrjin/icefall-asr-zipformer-multi-zh-en-2023-11-22/resolve/main/exp/joiner-epoch-34-avg-19.onnx
41+
42+
mkdir test_wavs
43+
cd test_wavs
44+
wget -O 0.wav -q https://huggingface.co/zrjin/icefall-asr-zipformer-multi-zh-en-2023-11-22/resolve/main/test_wavs/_1634_210_2577_1_1525157964032_3712259_29.wav
45+
wget -O 1.wav -q https://huggingface.co/zrjin/icefall-asr-zipformer-multi-zh-en-2023-11-22/resolve/main/test_wavs/_1634_210_2577_1_1525157964032_3712259_55.wav
46+
47+
wget -O 2.wav -q https://huggingface.co/zrjin/icefall-asr-zipformer-multi-zh-en-2023-11-22/resolve/main/test_wavs/_1634_210_2577_1_1525157964032_3712259_75.wav
48+
popd
49+
tar cvjf $d.tar.bz2 $d
50+
ls -lh $d
51+
rm -rf $d
52+
53+
- name: Release
54+
uses: svenstaro/upload-release-action@v2
55+
with:
56+
file_glob: true
57+
file: ./*.tar.bz2
58+
overwrite: true
59+
repo_name: k2-fsa/sherpa-onnx
60+
repo_token: ${{ secrets.UPLOAD_GH_SHERPA_ONNX_TOKEN }}
61+
tag: asr-models

harmony-os/SherpaOnnxVadAsr/entry/src/main/ets/pages/NonStreamingAsrModels.ets

+12
Original file line numberDiff line numberDiff line change
@@ -229,6 +229,18 @@ export function getOfflineModelConfig(type: number): OfflineModelConfig {
229229

230230
break;
231231
}
232+
233+
case 23: {
234+
const modelDir = "sherpa-onnx-zipformer-zh-en-2023-11-22";
235+
c.transducer.encoder = `${modelDir}/encoder-epoch-34-avg-19.int8.onnx`;
236+
c.transducer.decoder = `${modelDir}/decoder-epoch-34-avg-19.onnx`;
237+
c.transducer.joiner = `${modelDir}/joiner-epoch-34-avg-19.int8.onnx`;
238+
c.tokens = `${modelDir}/tokens.txt`;
239+
c.modelType = "transducer";
240+
241+
break;
242+
}
243+
232244
default: {
233245
console.log(`Please specify a supported type. Given type ${type}`);
234246
}

scripts/apk/generate-vad-asr-apk-script.py

+20
Original file line numberDiff line numberDiff line change
@@ -420,6 +420,26 @@ def get_models():
420420
421421
ls -lh
422422
423+
popd
424+
""",
425+
),
426+
Model(
427+
model_name="sherpa-onnx-zipformer-zh-en-2023-11-22",
428+
idx=23,
429+
lang="zh_en",
430+
lang2="Chinese,English",
431+
short_name="zipformer",
432+
cmd="""
433+
pushd $model_name
434+
435+
rm -rfv test_wavs
436+
437+
rm -fv encoder-epoch-34-avg-19.onnx
438+
rm -fv joiner-epoch-34-avg-19.onnx
439+
rm -fv bbpe.model
440+
441+
ls -lh
442+
423443
popd
424444
""",
425445
),

sherpa-onnx/kotlin-api/OfflineRecognizer.kt

+13
Original file line numberDiff line numberDiff line change
@@ -451,6 +451,19 @@ fun getOfflineModelConfig(type: Int): OfflineModelConfig? {
451451
tokens = "$modelDir/tokens.txt",
452452
)
453453
}
454+
455+
23 -> {
456+
val modelDir = "sherpa-onnx-zipformer-zh-en-2023-11-22"
457+
return OfflineModelConfig(
458+
transducer = OfflineTransducerModelConfig(
459+
encoder = "$modelDir/encoder-epoch-34-avg-19.int8.onnx",
460+
decoder = "$modelDir/decoder-epoch-34-avg-19.onnx",
461+
joiner = "$modelDir/joiner-epoch-34-avg-19.int8.onnx",
462+
),
463+
tokens = "$modelDir/tokens.txt",
464+
modelType = "transducer",
465+
)
466+
}
454467
}
455468
return null
456469
}

0 commit comments

Comments
 (0)