-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathSTAR_and_QC.txt
88 lines (68 loc) · 2.78 KB
/
STAR_and_QC.txt
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
#!/bin/bash/
# This script takes a fastq file of RNA-seq data, runs FastQC, STAR, Qualimap and Salmon.
# USAGE: sh rnaseq_analysis_on_input_file.sh <name of fastq file>
# initialize a variable with an intuitive name to store the name of the input fastq file
fq=$1 ## Giving the $1 input a new name that makes more sense
# grab base of filename for naming outputs
base=`basename $fq .fq` ##use backticks to enclose the entire command for the "base" variable
echo "Sample name is $base" ## echo prints whatever you have written-- INSERT -- 1,1 Top
# specify the number of cores to use
cores=12
# directory with the genome and transcriptome index files + name of the gene annotation file
genome=/n/groups/churchman/djm46/Genomes/GRCm38/STARindex
transcriptome=/n/groups/churchman/djm46/Genomes/GRCm38/salmon_index
gtf=/n/groups/churchman/djm46/Genomes/GRCm38/gencode.vM21.annotation.gtf
# make all of the output directories
# The -p option means mkdir will create the whole path if it
# does not exist and refrain from complaining if it does exist
mkdir -p ../results/fastqc/
mkdir -p ../results/STAR/
mkdir -p ../results/qualimap/
mkdir -p ../results/salmon/
# set up output filenames and locations
fastqc_out= ../results/fastqc/
align_out= ../results/STAR/${base}_
align_out_bam= ../results/STAR/${base}_Aligned.sortedByCoord.out.bam
qualimap_out= ../results/qualimap/${base}.qualimap
salmon_out= ../results/salmon/${base}.salmon
salmon_mappings= ../results/salmon/${base}_salmon.out
echo "output filenames created"
# set up the software environment
echo "loading modules and setting" ## using echo can help troubl shoot where the script doesn't work
module load fastqc/0.11.3
module load gcc/6.2.0
module load star/2.5.4a
module load samtools/1.3.1 ##use salmon -v or qualimap -v to check version and load the module
unset DISPLAY
export PATH=/n/app/bcbio/dev/anaconda/bin:$PATH
echo "Processing file $fq"
# Run FastQC and move output to the appropriate folder
fastqc $fq1 $fq2
# Run STAR
STAR --runThreadN $cores \
--genomeDir $genome \
--readFilesIn $fq1 $fq2 \
--outFileNamePrefix $align_out \
--outSAMtype BAM SortedByCoordinate \
--outSAMunmapped Within \
--outSAMattributes Standard
# Run Qualimap
qualimap rnaseq \
-outdir $qualimap_out \
-a proportional \
-bam $align_out_bam \
-p strand-specific-reverse \
-gtf $gtf \
--paired \
--java-mem-size=32G
$salmon_version=`salmon -v`
echo "Salmon version is $salmon_version" ##can do same thing for Qualimap
# Run salmon
salmon quant -i $transcriptome \
-l A \
-r $fq \
-o $salmon_out \
--seqBias \
--useVBOpt \
--writeMappings=$salmon_mappings
##commented out since the large file created here is not required