Skip to content

Commit 5207195

Browse files
committed
checks the reference genome and create indices if necessary
1 parent 5faa1bd commit 5207195

7 files changed

+66740
-2
lines changed

.gitignore

+3-1
Original file line numberDiff line numberDiff line change
@@ -6,4 +6,6 @@ report.html*
66
timeline.html*
77
trace.txt*
88
dag.dot*
9-
*.swp
9+
*.swp
10+
/test_data/ucsc.hg19.minimal.without_indices.dict
11+
/test_data/ucsc.hg19.minimal.without_indices.fasta.fai

Makefile

+1
Original file line numberDiff line numberDiff line change
@@ -17,3 +17,4 @@ test:
1717
bash tests/test_08.sh
1818
bash tests/test_09.sh
1919
bash tests/test_10.sh
20+
bash tests/test_11.sh

main.nf

+24
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,7 @@ include { MARK_DUPLICATES; SPLIT_CIGAR_N_READS } from './modules/02_mark_duplica
77
include { METRICS; HS_METRICS; COVERAGE_ANALYSIS; FLAGSTAT } from './modules/03_metrics'
88
include { REALIGNMENT_AROUND_INDELS } from './modules/04_realignment_around_indels'
99
include { BQSR; CREATE_OUTPUT } from './modules/05_bqsr'
10+
include { CREATE_FAIDX; CREATE_DICT } from './modules/00_reference_indices'
1011

1112
params.help= false
1213
params.input_files = false
@@ -82,9 +83,32 @@ else if (params.input_files) {
8283
.set { input_files }
8384
}
8485

86+
workflow CHECK_REFERENCE {
87+
take:
88+
reference
89+
90+
main:
91+
// checks the reference and its indexes, if the indexes are not there creates them
92+
reference_file = file(reference)
93+
if (reference_file.isEmpty()) {
94+
log.error "--reference points to a non existing file"
95+
exit 1
96+
}
97+
faidx = file("${reference}.fai")
98+
if (faidx.isEmpty()) {
99+
CREATE_FAIDX(reference)
100+
}
101+
dict = file("${reference_file.getParent() }/${reference_file.baseName }*.dict")
102+
if (dict.isEmpty()) {
103+
CREATE_DICT(reference)
104+
}
105+
}
106+
85107

86108
workflow {
87109

110+
CHECK_REFERENCE(params.reference)
111+
88112
PREPARE_BAM(input_files, params.reference)
89113

90114
if (!params.skip_deduplication) {

modules/00_reference_indices.nf

+30
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,30 @@
1+
2+
process CREATE_FAIDX {
3+
cpus "1"
4+
memory "4g"
5+
tag "${name}"
6+
7+
conda (params.enable_conda ? "bioconda::samtools=1.12" : null)
8+
9+
input:
10+
val(reference)
11+
12+
"""
13+
samtools faidx ${reference}
14+
"""
15+
}
16+
17+
process CREATE_DICT {
18+
cpus "1"
19+
memory "4g"
20+
tag "${name}"
21+
22+
conda (params.enable_conda ? "bioconda::gatk4=4.2.5.0" : null)
23+
24+
input:
25+
val(reference)
26+
27+
"""
28+
gatk CreateSequenceDictionary --REFERENCE ${reference}
29+
"""
30+
}

nextflow.config

+1-1
Original file line numberDiff line numberDiff line change
@@ -44,7 +44,7 @@ process.shell = ['/bin/bash', '-euo', 'pipefail']
4444

4545
cleanup = true
4646

47-
VERSION = '2.0.1'
47+
VERSION = '2.1.0'
4848
DOI = 'https://zenodo.org/badge/latestdoi/358400957'
4949

5050
manifest {

test_data/ucsc.hg19.minimal.without_indices.fasta

+66,672
Large diffs are not rendered by default.

tests/test_11.sh

+9
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,9 @@
1+
#!/bin/bash
2+
3+
4+
source tests/assert.sh
5+
output=output/test11
6+
nextflow main.nf -profile test,conda --output $output --reference `pwd`/test_data/ucsc.hg19.minimal.without_indices.fasta
7+
8+
test -s `pwd`/test_data/ucsc.hg19.minimal.without_indices.fasta.fai || { echo "Missing output FAI index!"; exit 1; }
9+
test -s `pwd`/test_data/ucsc.hg19.minimal.without_indices.dict || { echo "Missing output dict index!"; exit 1; }

0 commit comments

Comments
 (0)