-
Notifications
You must be signed in to change notification settings - Fork 321
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Support computing RNN-T loss with torchaudio (#316)
- Loading branch information
1 parent
021c798
commit fce7f3c
Showing
37 changed files
with
2,536 additions
and
680 deletions.
There are no files selected for viewing
47 changes: 47 additions & 0 deletions
47
.github/scripts/run-librispeech-pruned-transducer-stateless-2022-03-12.sh
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,47 @@ | ||
#!/usr/bin/env bash | ||
|
||
log() { | ||
# This function is from espnet | ||
local fname=${BASH_SOURCE[1]##*/} | ||
echo -e "$(date '+%Y-%m-%d %H:%M:%S') (${fname}:${BASH_LINENO[0]}:${FUNCNAME[1]}) $*" | ||
} | ||
|
||
cd egs/librispeech/ASR | ||
|
||
repo_url=https://huggingface.co/csukuangfj/icefall-asr-librispeech-pruned-transducer-stateless-2022-03-12 | ||
|
||
log "Downloading pre-trained model from $repo_url" | ||
git lfs install | ||
git clone $repo_url | ||
repo=$(basename $repo_url) | ||
|
||
log "Display test files" | ||
tree $repo/ | ||
soxi $repo/test_wavs/*.wav | ||
ls -lh $repo/test_wavs/*.wav | ||
|
||
for sym in 1 2 3; do | ||
log "Greedy search with --max-sym-per-frame $sym" | ||
|
||
./pruned_transducer_stateless/pretrained.py \ | ||
--method greedy_search \ | ||
--max-sym-per-frame $sym \ | ||
--checkpoint $repo/exp/pretrained.pt \ | ||
--bpe-model $repo/data/lang_bpe_500/bpe.model \ | ||
$repo/test_wavs/1089-134686-0001.wav \ | ||
$repo/test_wavs/1221-135766-0001.wav \ | ||
$repo/test_wavs/1221-135766-0002.wav | ||
done | ||
|
||
for method in modified_beam_search beam_search; do | ||
log "$method" | ||
|
||
./pruned_transducer_stateless/pretrained.py \ | ||
--method $method \ | ||
--beam-size 4 \ | ||
--checkpoint $repo/exp/pretrained.pt \ | ||
--bpe-model $repo/data/lang_bpe_500/bpe.model \ | ||
$repo/test_wavs/1089-134686-0001.wav \ | ||
$repo/test_wavs/1221-135766-0001.wav \ | ||
$repo/test_wavs/1221-135766-0002.wav | ||
done |
47 changes: 47 additions & 0 deletions
47
.github/scripts/run-librispeech-transducer-stateless2-2022-04-19.sh
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,47 @@ | ||
#!/usr/bin/env bash | ||
|
||
log() { | ||
# This function is from espnet | ||
local fname=${BASH_SOURCE[1]##*/} | ||
echo -e "$(date '+%Y-%m-%d %H:%M:%S') (${fname}:${BASH_LINENO[0]}:${FUNCNAME[1]}) $*" | ||
} | ||
|
||
cd egs/librispeech/ASR | ||
|
||
repo_url=https://huggingface.co/csukuangfj/icefall-asr-librispeech-transducer-stateless2-torchaudio-2022-04-19 | ||
|
||
log "Downloading pre-trained model from $repo_url" | ||
git lfs install | ||
git clone $repo_url | ||
repo=$(basename $repo_url) | ||
|
||
log "Display test files" | ||
tree $repo/ | ||
soxi $repo/test_wavs/*.wav | ||
ls -lh $repo/test_wavs/*.wav | ||
|
||
for sym in 1 2 3; do | ||
log "Greedy search with --max-sym-per-frame $sym" | ||
|
||
./transducer_stateless2/pretrained.py \ | ||
--method greedy_search \ | ||
--max-sym-per-frame $sym \ | ||
--checkpoint $repo/exp/pretrained.pt \ | ||
--bpe-model $repo/data/lang_bpe_500/bpe.model \ | ||
$repo/test_wavs/1089-134686-0001.wav \ | ||
$repo/test_wavs/1221-135766-0001.wav \ | ||
$repo/test_wavs/1221-135766-0002.wav | ||
done | ||
|
||
for method in modified_beam_search beam_search; do | ||
log "$method" | ||
|
||
./transducer_stateless2/pretrained.py \ | ||
--method $method \ | ||
--beam-size 4 \ | ||
--checkpoint $repo/exp/pretrained.pt \ | ||
--bpe-model $repo/data/lang_bpe_500/bpe.model \ | ||
$repo/test_wavs/1089-134686-0001.wav \ | ||
$repo/test_wavs/1221-135766-0001.wav \ | ||
$repo/test_wavs/1221-135766-0002.wav | ||
done |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,46 @@ | ||
#!/usr/bin/env bash | ||
|
||
log() { | ||
# This function is from espnet | ||
local fname=${BASH_SOURCE[1]##*/} | ||
echo -e "$(date '+%Y-%m-%d %H:%M:%S') (${fname}:${BASH_LINENO[0]}:${FUNCNAME[1]}) $*" | ||
} | ||
|
||
cd egs/librispeech/ASR | ||
|
||
repo_url=https://github.com/csukuangfj/icefall-asr-conformer-ctc-bpe-500 | ||
git lfs install | ||
git clone $repo | ||
|
||
log "Downloading pre-trained model from $repo_url" | ||
git clone $repo_url | ||
repo=$(basename $repo_url) | ||
|
||
log "Display test files" | ||
tree $repo/ | ||
soxi $repo/test_wavs/*.flac | ||
ls -lh $repo/test_wavs/*.flac | ||
|
||
log "CTC decoding" | ||
|
||
./conformer_ctc/pretrained.py \ | ||
--method ctc-decoding \ | ||
--num-classes 500 \ | ||
--checkpoint $repo/exp/pretrained.pt \ | ||
--bpe-model $repo/data/lang_bpe_500/bpe.model \ | ||
$repo/test_wavs/1089-134686-0001.flac \ | ||
$repo/test_wavs/1221-135766-0001.flac \ | ||
$repo/test_wavs/1221-135766-0002.flac | ||
|
||
log "HLG decoding" | ||
|
||
./conformer_ctc/pretrained.py \ | ||
--method 1best \ | ||
--num-classes 500 \ | ||
--checkpoint $repo/exp/pretrained.pt \ | ||
--bpe-model $repo/data/lang_bpe_500/bpe.model \ | ||
--words-file $repo/data/lang_bpe_500/words.txt \ | ||
--HLG $repo/data/lang_bpe_500/HLG.pt \ | ||
$repo/test_wavs/1089-134686-0001.flac \ | ||
$repo/test_wavs/1221-135766-0001.flac \ | ||
$repo/test_wavs/1221-135766-0002.flac |
47 changes: 47 additions & 0 deletions
47
.github/scripts/run-pre-trained-transducer-stateless-librispeech-100h.sh
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,47 @@ | ||
#!/usr/bin/env bash | ||
|
||
log() { | ||
# This function is from espnet | ||
local fname=${BASH_SOURCE[1]##*/} | ||
echo -e "$(date '+%Y-%m-%d %H:%M:%S') (${fname}:${BASH_LINENO[0]}:${FUNCNAME[1]}) $*" | ||
} | ||
|
||
cd egs/librispeech/ASR | ||
|
||
repo_url=https://huggingface.co/csukuangfj/icefall-asr-librispeech-100h-transducer-stateless-multi-datasets-bpe-500-2022-02-21 | ||
|
||
log "Downloading pre-trained model from $repo_url" | ||
git lfs install | ||
git clone $repo_url | ||
repo=$(basename $repo_url) | ||
|
||
log "Display test files" | ||
tree $repo/ | ||
soxi $repo/test_wavs/*.wav | ||
ls -lh $repo/test_wavs/*.wav | ||
|
||
for sym in 1 2 3; do | ||
log "Greedy search with --max-sym-per-frame $sym" | ||
|
||
./transducer_stateless_multi_datasets/pretrained.py \ | ||
--method greedy_search \ | ||
--max-sym-per-frame $sym \ | ||
--checkpoint $repo/exp/pretrained.pt \ | ||
--bpe-model $repo/data/lang_bpe_500/bpe.model \ | ||
$repo/test_wavs/1089-134686-0001.wav \ | ||
$repo/test_wavs/1221-135766-0001.wav \ | ||
$repo/test_wavs/1221-135766-0002.wav | ||
done | ||
|
||
for method in modified_beam_search beam_search; do | ||
log "$method" | ||
|
||
./transducer_stateless_multi_datasets/pretrained.py \ | ||
--method $method \ | ||
--beam-size 4 \ | ||
--checkpoint $repo/exp/pretrained.pt \ | ||
--bpe-model $repo/data/lang_bpe_500/bpe.model \ | ||
$repo/test_wavs/1089-134686-0001.wav \ | ||
$repo/test_wavs/1221-135766-0001.wav \ | ||
$repo/test_wavs/1221-135766-0002.wav | ||
done |
47 changes: 47 additions & 0 deletions
47
.github/scripts/run-pre-trained-transducer-stateless-librispeech-960h.sh
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,47 @@ | ||
#!/usr/bin/env bash | ||
|
||
log() { | ||
# This function is from espnet | ||
local fname=${BASH_SOURCE[1]##*/} | ||
echo -e "$(date '+%Y-%m-%d %H:%M:%S') (${fname}:${BASH_LINENO[0]}:${FUNCNAME[1]}) $*" | ||
} | ||
|
||
cd egs/librispeech/ASR | ||
|
||
repo_url=https://huggingface.co/csukuangfj/icefall-asr-librispeech-transducer-stateless-multi-datasets-bpe-500-2022-03-01 | ||
|
||
log "Downloading pre-trained model from $repo_url" | ||
git lfs install | ||
git clone $repo_url | ||
repo=$(basename $repo_url) | ||
|
||
log "Display test files" | ||
tree $repo/ | ||
soxi $repo/test_wavs/*.wav | ||
ls -lh $repo/test_wavs/*.wav | ||
|
||
for sym in 1 2 3; do | ||
log "Greedy search with --max-sym-per-frame $sym" | ||
|
||
./transducer_stateless_multi_datasets/pretrained.py \ | ||
--method greedy_search \ | ||
--max-sym-per-frame $sym \ | ||
--checkpoint $repo/exp/pretrained.pt \ | ||
--bpe-model $repo/data/lang_bpe_500/bpe.model \ | ||
$repo/test_wavs/1089-134686-0001.wav \ | ||
$repo/test_wavs/1221-135766-0001.wav \ | ||
$repo/test_wavs/1221-135766-0002.wav | ||
done | ||
|
||
for method in modified_beam_search beam_search; do | ||
log "$method" | ||
|
||
./transducer_stateless_multi_datasets/pretrained.py \ | ||
--method $method \ | ||
--beam-size 4 \ | ||
--checkpoint $repo/exp/pretrained.pt \ | ||
--bpe-model $repo/data/lang_bpe_500/bpe.model \ | ||
$repo/test_wavs/1089-134686-0001.wav \ | ||
$repo/test_wavs/1221-135766-0001.wav \ | ||
$repo/test_wavs/1221-135766-0002.wav | ||
done |
47 changes: 47 additions & 0 deletions
47
.github/scripts/run-pre-trained-transducer-stateless-modified-2-aishell.sh
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,47 @@ | ||
#!/usr/bin/env bash | ||
|
||
log() { | ||
# This function is from espnet | ||
local fname=${BASH_SOURCE[1]##*/} | ||
echo -e "$(date '+%Y-%m-%d %H:%M:%S') (${fname}:${BASH_LINENO[0]}:${FUNCNAME[1]}) $*" | ||
} | ||
|
||
cd egs/aishell/ASR | ||
|
||
repo_url=https://huggingface.co/csukuangfj/icefall-aishell-transducer-stateless-modified-2-2022-03-01 | ||
|
||
log "Downloading pre-trained model from $repo_url" | ||
git lfs install | ||
git clone $repo_url | ||
repo=$(basename $repo_url) | ||
|
||
log "Display test files" | ||
tree $repo/ | ||
soxi $repo/test_wavs/*.wav | ||
ls -lh $repo/test_wavs/*.wav | ||
|
||
for sym in 1 2 3; do | ||
log "Greedy search with --max-sym-per-frame $sym" | ||
|
||
./transducer_stateless_modified-2/pretrained.py \ | ||
--method greedy_search \ | ||
--max-sym-per-frame $sym \ | ||
--checkpoint $repo/exp/pretrained.pt \ | ||
--lang-dir $repo/data/lang_char \ | ||
$repo/test_wavs/BAC009S0764W0121.wav \ | ||
$repo/test_wavs/BAC009S0764W0122.wav \ | ||
$repo/test_wavs/BAC009S0764W0123.wav | ||
done | ||
|
||
for method in modified_beam_search beam_search; do | ||
log "$method" | ||
|
||
./transducer_stateless_modified-2/pretrained.py \ | ||
--method $method \ | ||
--beam-size 4 \ | ||
--checkpoint $repo/exp/pretrained.pt \ | ||
--lang-dir $repo/data/lang_char \ | ||
$repo/test_wavs/BAC009S0764W0121.wav \ | ||
$repo/test_wavs/BAC009S0764W0122.wav \ | ||
$repo/test_wavs/BAC009S0764W0123.wav | ||
done |
47 changes: 47 additions & 0 deletions
47
.github/scripts/run-pre-trained-transducer-stateless-modified-aishell.sh
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,47 @@ | ||
#!/usr/bin/env bash | ||
|
||
log() { | ||
# This function is from espnet | ||
local fname=${BASH_SOURCE[1]##*/} | ||
echo -e "$(date '+%Y-%m-%d %H:%M:%S') (${fname}:${BASH_LINENO[0]}:${FUNCNAME[1]}) $*" | ||
} | ||
|
||
cd egs/aishell/ASR | ||
|
||
repo_url=https://huggingface.co/csukuangfj/icefall-aishell-transducer-stateless-modified-2022-03-01 | ||
|
||
log "Downloading pre-trained model from $repo_url" | ||
git lfs install | ||
git clone $repo_url | ||
repo=$(basename $repo_url) | ||
|
||
log "Display test files" | ||
tree $repo/ | ||
soxi $repo/test_wavs/*.wav | ||
ls -lh $repo/test_wavs/*.wav | ||
|
||
for sym in 1 2 3; do | ||
log "Greedy search with --max-sym-per-frame $sym" | ||
|
||
./transducer_stateless_modified/pretrained.py \ | ||
--method greedy_search \ | ||
--max-sym-per-frame $sym \ | ||
--checkpoint $repo/exp/pretrained.pt \ | ||
--lang-dir $repo/data/lang_char \ | ||
$repo/test_wavs/BAC009S0764W0121.wav \ | ||
$repo/test_wavs/BAC009S0764W0122.wav \ | ||
$repo/test_wavs/BAC009S0764W0123.wav | ||
done | ||
|
||
for method in modified_beam_search beam_search; do | ||
log "$method" | ||
|
||
./transducer_stateless_modified/pretrained.py \ | ||
--method $method \ | ||
--beam-size 4 \ | ||
--checkpoint $repo/exp/pretrained.pt \ | ||
--lang-dir $repo/data/lang_char \ | ||
$repo/test_wavs/BAC009S0764W0121.wav \ | ||
$repo/test_wavs/BAC009S0764W0122.wav \ | ||
$repo/test_wavs/BAC009S0764W0123.wav | ||
done |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,60 @@ | ||
#!/usr/bin/env bash | ||
|
||
log() { | ||
# This function is from espnet | ||
local fname=${BASH_SOURCE[1]##*/} | ||
echo -e "$(date '+%Y-%m-%d %H:%M:%S') (${fname}:${BASH_LINENO[0]}:${FUNCNAME[1]}) $*" | ||
} | ||
|
||
cd egs/librispeech/ASR | ||
|
||
repo_url=https://huggingface.co/csukuangfj/icefall-asr-librispeech-transducer-stateless-bpe-500-2022-02-07 | ||
|
||
log "Downloading pre-trained model from $repo_url" | ||
git lfs install | ||
git clone $repo_url | ||
repo=$(basename $repo_url) | ||
|
||
log "Display test files" | ||
tree $repo/ | ||
soxi $repo/test_wavs/*.wav | ||
ls -lh $repo/test_wavs/*.wav | ||
|
||
for sym in 1 2 3; do | ||
log "Greedy search with --max-sym-per-frame $sym" | ||
|
||
./transducer_stateless/pretrained.py \ | ||
--method greedy_search \ | ||
--max-sym-per-frame $sym \ | ||
--checkpoint $repo/exp/pretrained.pt \ | ||
--bpe-model $repo/data/lang_bpe_500/bpe.model \ | ||
$repo/test_wavs/1089-134686-0001.wav \ | ||
$repo/test_wavs/1221-135766-0001.wav \ | ||
$repo/test_wavs/1221-135766-0002.wav | ||
done | ||
|
||
for method in modified_beam_search beam_search; do | ||
log "$method" | ||
|
||
./transducer_stateless/pretrained.py \ | ||
--method $method \ | ||
--beam-size 4 \ | ||
--checkpoint $repo/exp/pretrained.pt \ | ||
--bpe-model $repo/data/lang_bpe_500/bpe.model \ | ||
$repo/test_wavs/1089-134686-0001.wav \ | ||
$repo/test_wavs/1221-135766-0001.wav \ | ||
$repo/test_wavs/1221-135766-0002.wav | ||
done | ||
|
||
for method in modified_beam_search beam_search; do | ||
log "$method" | ||
|
||
./transducer_stateless_multi_datasets/pretrained.py \ | ||
--method $method \ | ||
--beam-size 4 \ | ||
--checkpoint $repo/exp/pretrained.pt \ | ||
--bpe-model $repo/data/lang_bpe_500/bpe.model \ | ||
$repo/test_wavs/1089-134686-0001.wav \ | ||
$repo/test_wavs/1221-135766-0001.wav \ | ||
$repo/test_wavs/1221-135766-0002.wav | ||
done |
Oops, something went wrong.