Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Run decode.py in GitHub actions. #356

Merged
merged 1 commit into from
May 10, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
#!/usr/bin/env bash

# This script computes fbank features for the test-clean and test-other datasets.
# The computed features are saved to ~/tmp/fbank-libri and are
# cached for later runs

export PYTHONPATH=$PWD:$PYTHONPATH
echo $PYTHONPATH

mkdir ~/tmp/fbank-libri
cd egs/librispeech/ASR
mkdir -p data
cd data
[ ! -e fbank ] && ln -s ~/tmp/fbank-libri fbank
cd ..
./local/compute_fbank_librispeech.py
ls -lh data/fbank/
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
#!/usr/bin/env bash

# This script downloads the test-clean and test-other datasets
# of LibriSpeech and unzip them to the folder ~/tmp/download,
# which is cached by GitHub actions for later runs.
#
# You will find directories ~/tmp/download/LibriSpeech after running
# this script.

mkdir ~/tmp/download
cd egs/librispeech/ASR
ln -s ~/tmp/download .
cd download
wget -q --no-check-certificate https://www.openslr.org/resources/12/test-clean.tar.gz
tar xf test-clean.tar.gz
rm test-clean.tar.gz

wget -q --no-check-certificate https://www.openslr.org/resources/12/test-other.tar.gz
tar xf test-other.tar.gz
rm test-other.tar.gz
pwd
ls -lh
ls -lh LibriSpeech
13 changes: 13 additions & 0 deletions .github/scripts/install-kaldifeat.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
#!/usr/bin/env bash

# This script installs kaldifeat into the directory ~/tmp/kaldifeat
# which is cached by GitHub actions for later runs.

mkdir -p ~/tmp
cd ~/tmp
git clone https://github.com/csukuangfj/kaldifeat
cd kaldifeat
mkdir build
cd build
cmake -DCMAKE_BUILD_TYPE=Release ..
make -j2 _kaldifeat
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
#!/usr/bin/env bash

# This script assumes that test-clean and test-other are downloaded
# to egs/librispeech/ASR/download/LibriSpeech and generates manifest
# files in egs/librispeech/ASR/data/manifests

cd egs/librispeech/ASR
[ ! -e download ] && ln -s ~/tmp/download .
mkdir -p data/manifests
lhotse prepare librispeech -j 2 -p test-clean -p test-other ./download/LibriSpeech data/manifests
ls -lh data/manifests
Original file line number Diff line number Diff line change
Expand Up @@ -45,3 +45,31 @@ for method in modified_beam_search beam_search; do
$repo/test_wavs/1221-135766-0001.wav \
$repo/test_wavs/1221-135766-0002.wav
done

echo "GITHUB_EVENT_NAME: ${GITHUB_EVENT_NAME}"
if [[ x"${GITHUB_EVENT_NAME}" == x"schedule" ]]; then
mkdir -p pruned_transducer_stateless/exp
ln -s $PWD/$repo/exp/pretrained.pt pruned_transducer_stateless/exp/epoch-999.pt
ln -s $PWD/$repo/data/lang_bpe_500 data/

ls -lh data
ls -lh pruned_transducer_stateless/exp

log "Decoding test-clean and test-other"

# use a small value for decoding with CPU
max_duration=50

for method in greedy_search fast_beam_search; do
log "Decoding with $method"

./pruned_transducer_stateless/decode.py \
--decoding-method $method \
--epoch 999 \
--avg 1 \
--max-duration $max_duration \
--exp-dir pruned_transducer_stateless/exp
done

rm pruned_transducer_stateless/exp/*.pt
fi
Original file line number Diff line number Diff line change
Expand Up @@ -49,3 +49,31 @@ for method in modified_beam_search beam_search fast_beam_search; do
$repo/test_wavs/1221-135766-0001.wav \
$repo/test_wavs/1221-135766-0002.wav
done

echo "GITHUB_EVENT_NAME: ${GITHUB_EVENT_NAME}"
if [[ x"${GITHUB_EVENT_NAME}" == x"schedule" ]]; then
mkdir -p pruned_transducer_stateless2/exp
ln -s $PWD/$repo/exp/pretrained.pt pruned_transducer_stateless2/exp/epoch-999.pt
ln -s $PWD/$repo/data/lang_bpe_500 data/

ls -lh data
ls -lh pruned_transducer_stateless2/exp

log "Decoding test-clean and test-other"

# use a small value for decoding with CPU
max_duration=50

for method in greedy_search fast_beam_search; do
log "Decoding with $method"

./pruned_transducer_stateless2/decode.py \
--decoding-method $method \
--epoch 999 \
--avg 1 \
--max-duration $max_duration \
--exp-dir pruned_transducer_stateless2/exp
done

rm pruned_transducer_stateless2/exp/*.pt
fi
Original file line number Diff line number Diff line change
Expand Up @@ -49,3 +49,31 @@ for method in modified_beam_search beam_search fast_beam_search; do
$repo/test_wavs/1221-135766-0001.wav \
$repo/test_wavs/1221-135766-0002.wav
done

echo "GITHUB_EVENT_NAME: ${GITHUB_EVENT_NAME}"
if [[ x"${GITHUB_EVENT_NAME}" == x"schedule" ]]; then
mkdir -p pruned_transducer_stateless3/exp
ln -s $PWD/$repo/exp/pretrained.pt pruned_transducer_stateless3/exp/epoch-999.pt
ln -s $PWD/$repo/data/lang_bpe_500 data/

ls -lh data
ls -lh pruned_transducer_stateless3/exp

log "Decoding test-clean and test-other"

# use a small value for decoding with CPU
max_duration=50

for method in greedy_search fast_beam_search; do
log "Decoding with $method"

./pruned_transducer_stateless3/decode.py \
--decoding-method $method \
--epoch 999 \
--avg 1 \
--max-duration $max_duration \
--exp-dir pruned_transducer_stateless3/exp
done

rm pruned_transducer_stateless3/exp/*.pt
fi
Original file line number Diff line number Diff line change
Expand Up @@ -45,3 +45,31 @@ for method in modified_beam_search beam_search; do
$repo/test_wavs/1221-135766-0001.wav \
$repo/test_wavs/1221-135766-0002.wav
done

echo "GITHUB_EVENT_NAME: ${GITHUB_EVENT_NAME}"
if [[ x"${GITHUB_EVENT_NAME}" == x"schedule" ]]; then
mkdir -p transducer_stateless2/exp
ln -s $PWD/$repo/exp/pretrained.pt transducer_stateless2/exp/epoch-999.pt
ln -s $PWD/$repo/data/lang_bpe_500 data/

ls -lh data
ls -lh transducer_stateless2/exp

log "Decoding test-clean and test-other"

# use a small value for decoding with CPU
max_duration=50

for method in greedy_search modified_beam_search; do
log "Decoding with $method"

./transducer_stateless2/decode.py \
--decoding-method $method \
--epoch 999 \
--avg 1 \
--max-duration $max_duration \
--exp-dir transducer_stateless2/exp
done

rm transducer_stateless2/exp/*.pt
fi
85 changes: 76 additions & 9 deletions .github/workflows/run-librispeech-2022-03-12.yml
Original file line number Diff line number Diff line change
Expand Up @@ -24,9 +24,18 @@ on:
pull_request:
types: [labeled]

schedule:
# minute (0-59)
# hour (0-23)
# day of the month (1-31)
# month (1-12)
# day of the week (0-6)
# nightly build at 15:50 UTC time every day
- cron: "50 15 * * *"

jobs:
run_librispeech_2022_03_12:
if: github.event.label.name == 'ready' || github.event_name == 'push'
if: github.event.label.name == 'ready' || github.event_name == 'push' || github.event_name == 'schedule'
runs-on: ${{ matrix.os }}
strategy:
matrix:
Expand Down Expand Up @@ -63,20 +72,78 @@ jobs:
if: steps.my-cache.outputs.cache-hit != 'true'
shell: bash
run: |
mkdir -p ~/tmp
cd ~/tmp
git clone https://github.com/csukuangfj/kaldifeat
cd kaldifeat
mkdir build
cd build
cmake -DCMAKE_BUILD_TYPE=Release ..
make -j2 _kaldifeat
.github/scripts/install-kaldifeat.sh

- name: Cache LibriSpeech test-clean and test-other datasets
id: libri-test-clean-and-test-other-data
uses: actions/cache@v2
with:
path: |
~/tmp/download
key: cache-libri-test-clean-and-test-other

- name: Download LibriSpeech test-clean and test-other
if: steps.libri-test-clean-and-test-other-data.outputs.cache-hit != 'true'
shell: bash
run: |
.github/scripts/download-librispeech-test-clean-and-test-other-dataset.sh

- name: Prepare manifests for LibriSpeech test-clean and test-other
shell: bash
run: |
.github/scripts/prepare-librispeech-test-clean-and-test-other-manifests.sh

- name: Cache LibriSpeech test-clean and test-other fbank features
id: libri-test-clean-and-test-other-fbank
uses: actions/cache@v2
with:
path: |
~/tmp/fbank-libri
key: cache-libri-fbank-test-clean-and-test-other

- name: Compute fbank for LibriSpeech test-clean and test-other
if: steps.libri-test-clean-and-test-other-fbank.outputs.cache-hit != 'true'
shell: bash
run: |
.github/scripts/compute-fbank-librispeech-test-clean-and-test-other.sh


- name: Inference with pre-trained model
shell: bash
env:
GITHUB_EVENT_NAME: ${{ github.event_name }}
run: |
mkdir -p egs/librispeech/ASR/data
ln -sfv ~/tmp/fbank-libri egs/librispeech/ASR/data/fbank
ls -lh egs/librispeech/ASR/data/*

sudo apt-get -qq install git-lfs tree sox
export PYTHONPATH=$PWD:$PYTHONPATH
export PYTHONPATH=~/tmp/kaldifeat/kaldifeat/python:$PYTHONPATH
export PYTHONPATH=~/tmp/kaldifeat/build/lib:$PYTHONPATH

.github/scripts/run-librispeech-pruned-transducer-stateless-2022-03-12.sh

- name: Display decoding results
if: github.event_name == 'schedule'
shell: bash
run: |
cd egs/librispeech/ASR/
tree ./pruned_transducer_stateless/exp

cd pruned_transducer_stateless
echo "results for pruned_transducer_stateless"
echo "===greedy search==="
find exp/greedy_search -name "log-*" -exec grep -n --color "best for test-clean" {} + | sort -n -k2
find exp/greedy_search -name "log-*" -exec grep -n --color "best for test-other" {} + | sort -n -k2

echo "===fast_beam_search==="
find exp/fast_beam_search -name "log-*" -exec grep -n --color "best for test-clean" {} + | sort -n -k2
find exp/fast_beam_search -name "log-*" -exec grep -n --color "best for test-other" {} + | sort -n -k2

- name: Upload decoding results for pruned_transducer_stateless
uses: actions/upload-artifact@v2
if: github.event_name == 'schedule'
with:
name: torch-${{ matrix.torch }}-python-${{ matrix.python-version }}-ubuntu-18.04-cpu-pruned_transducer_stateless-2022-03-12
path: egs/librispeech/ASR/pruned_transducer_stateless/exp/
Loading