-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy path12.reduce-tmx.sh
executable file
·58 lines (49 loc) · 1.49 KB
/
12.reduce-tmx.sh
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
#!/bin/bash
set -euo pipefail
. ./env/init.sh
. ./config.sh
. ./functions.sh
lang=$1
shift
collections=$@
collection_hash=$(printf "%s\n" $collections | sort | join_by -)
# Load the bicleaner model as we need to know the BICLEANER_THESHOLD
bicleaner_model ${lang%~*}
output_base="${DATA_CLEANING}/${TARGET_LANG}-${lang}/${TARGET_LANG%~*}-${lang%~*}.${collection_hash}"
# Lots of work to determine which files need to be (re)generated while only
# calling `confirm` once.
needs_tmx=false
needs_deferred=false
if ! $RETRY || [ ! -f "${output_base}.tmx.gz" ]; then
echo "${output_base}.tmx.gz" >&2
needs_tmx=true
fi
if ! $RETRY || [ ! -f "${output_base}.deferred.tmx.gz" ]; then
echo "${output_base}.deferred.tmx.gz" >&2
needs_deferred=true
fi
if ( $needs_tmx || $needs_deferred ) && confirm; then
if $needs_tmx; then
schedule \
-J reduce-tmx-${lang%~*} \
--time 36:00:00 \
--cpus-per-task 4 \
-e ${SLURM_LOGS}/12.reduce-tmx-%A.err \
-o ${SLURM_LOGS}/12.reduce-tmx-%A.out \
${SCRIPTS}/12.reduce-tmx ${lang%~*} \
"${output_base}.tmx.gz" \
"${output_base}.txt.gz" \
"${output_base}.filtered${BICLEANER_THRESHOLD/./}.gz"
fi
if $needs_deferred; then
schedule \
-J reduce-tmx-deferred-${lang%~*} \
--time 36:00:00 \
--cpus-per-task 4 \
-e ${SLURM_LOGS}/12.reduce-tmx-%A.err \
-o ${SLURM_LOGS}/12.reduce-tmx-%A.out \
${SCRIPTS}/12.reduce-tmx-deferred ${lang%~*} \
"${output_base}.deferred.tmx.gz" \
"${output_base}.filtered${BICLEANER_THRESHOLD/./}.gz"
fi
fi