Skip to content

Commit 7938af2

Browse files
committed
reduce size of raw vcf chunks processed in R
1 parent 201a942 commit 7938af2

File tree

5 files changed

+25
-21
lines changed

5 files changed

+25
-21
lines changed

GBSapp

+6-2
Original file line numberDiff line numberDiff line change
@@ -439,13 +439,17 @@ mv unixformat.sh config.sh
439439
awk '{ sub("\r$",""); print}' config.sh > unixformat.sh
440440
mv unixformat.sh config.sh
441441

442-
string="$(awk '/^ref/&&/=/&&/.fa/ {print}' config.sh | wc -l)"
443-
string_hap="$(awk '/^hap_ref/&&/=/&&/.fa/ {print}' config.sh | wc -l)"
442+
string="$(awk '/^ref/&&/=/&&/.f/ {print}' config.sh | wc -l)"
443+
string_hap="$(awk '/^hap_ref/&&/=/&&/.f/ {print}' config.sh | wc -l)"
444444
if [[ "$string" -gt 0 ]] && [[ "$string_hap" -gt 0 ]]; then
445445
echo -e "${magenta}\n- You cannot specify both haploid and haplotype-resolved reference genome. ${white}\n"
446446
exit 1
447447
fi
448448

449+
if [[ "$string" -eq 0 ]] && [[ "$(ls ./refgenomes/*.f* | wc -l)" -eq 0 ]]; then
450+
echo -e "${magenta}- specify reference genome fasta file(s) in config file and provide the file(s) in refgenomes folder ${white}\n"
451+
sleep 5; exit 0
452+
fi
449453
if [[ "$string" -eq 0 ]]; then
450454
string=1
451455
fi

scripts/install.sh

+1-1
Original file line numberDiff line numberDiff line change
@@ -228,7 +228,7 @@ fi
228228

229229
main9 () {
230230
echo -e "${blue}\n############################################## \n- installing R-package: reshape2 ${blue}\n##############################################${white}"
231-
R -e 'install.packages("reshape2", dependencies = TRUE, repos="http://cran.r-project.org", lib="./")'
231+
R -e 'install.packages("reshape2", dependencies = TRUE, lib="./")'
232232
cd $R_dir
233233
}
234234
dirtool=./reshape2

scripts/subgenome_ref_files_1.sh

+6-6
Original file line numberDiff line numberDiff line change
@@ -134,6 +134,7 @@ main () {
134134
export ramg=20
135135
export Xmxg=-Xmx${ramg}G
136136
export gN=$(($ram2/$ramg))
137+
if [[ "$gN" -lt 1 ]]; then export gN=1; fi
137138
export gthreads=$(($threads /$gN ))
138139
if [[ "$gN" -gt "$((threads/4))" ]]; then
139140
gN=$((threads/4))
@@ -669,7 +670,6 @@ main () {
669670
for i in samples_list_node_*.txt; do
670671
:> ${i%.txt}_hold.txt
671672
while IFS="" read -r line; do
672-
sleep $((RANDOM % 2))
673673
ls -l ./samples/$line | awk '{print $5"\t"$9}' >> ${i%.txt}_hold.txt
674674
done < <(grep -v '_tmp.fa' $i | grep -v _R2.f | grep -v _uniq.fasta | grep -v _uniq_R1.fasta | grep -v _uniq_R2.fasta | grep -v _uniq.hold.fasta | grep -v _uniq_R1.hold.fasta | grep -v _uniq_R2.hold.fasta | grep -v fq.gz)
675675
sort -nr -k1 ${i%.txt}_hold.txt | awk '{gsub(/.\/samples\//,""); print $2}' > $i
@@ -1700,7 +1700,7 @@ if [[ "${file1xG}" -lt 1 ]]; then
17001700
$GATK --java-options "$Xmxg -Djava.io.tmpdir=${projdir}/snpcall/tmp -XX:+UseParallelGC -XX:ParallelGCThreads=$gthreads" LeftAlignAndTrimVariants -R ${projdir}/refgenomes/$refg -V $i -O ${i%.vcf}0.vcf --split-multi-allelics --dont-trim-alleles --keep-original-ac &&
17011701
wait
17021702
awk '!/^##/' ${i%.vcf}0.vcf | awk '{gsub(/^#/,""); print $0}' > ${i%.vcf}trim.vcf &&
1703-
awk -v file=${i%.vcf} 'BEGIN{getline f;}NR%100000==2{x=file"1x_rawSPLIT"++i".vcf";a[i]=x;print f>x;}{print > x}' ${i%.vcf}trim.vcf & PID=$!
1703+
awk -v file=${i%.vcf} 'BEGIN{getline f;}NR%10000==2{x=file"1x_rawSPLIT"++i".vcf";a[i]=x;print f>x;}{print > x}' ${i%.vcf}trim.vcf & PID=$!
17041704
wait $PID
17051705
rm "${i%.vcf}"0.vcf* "${i%.vcf}"trim.vcf* &&
17061706
wait
@@ -1745,7 +1745,7 @@ if [[ "${file2xG}" -lt 1 ]]; then
17451745
$GATK --java-options "$Xmxg -Djava.io.tmpdir=${projdir}/snpcall/tmp -XX:+UseParallelGC -XX:ParallelGCThreads=$gthreads" LeftAlignAndTrimVariants -R ${projdir}/refgenomes/$refg -V $i -O ${i%.vcf}0.vcf --split-multi-allelics --dont-trim-alleles --keep-original-ac &&
17461746
wait
17471747
awk '!/^##/' ${i%.vcf}0.vcf | awk '{gsub(/^#/,""); print $0}' > ${i%.vcf}trim.vcf &&
1748-
awk -v file=${i%.vcf} 'BEGIN{getline f;}NR%100000==2{x=file"2x_rawSPLIT"++i".vcf";a[i]=x;print f>x;}{print > x}' ${i%.vcf}trim.vcf & PID=$!
1748+
awk -v file=${i%.vcf} 'BEGIN{getline f;}NR%10000==2{x=file"2x_rawSPLIT"++i".vcf";a[i]=x;print f>x;}{print > x}' ${i%.vcf}trim.vcf & PID=$!
17491749
wait $PID
17501750
rm "${i%.vcf}"0.vcf* "${i%.vcf}"trim.vcf* &&
17511751
wait
@@ -1790,7 +1790,7 @@ if [[ "${file4xG}" -lt 1 ]]; then
17901790
$GATK --java-options "$Xmxg -Djava.io.tmpdir=${projdir}/snpcall/tmp -XX:+UseParallelGC -XX:ParallelGCThreads=$gthreads" LeftAlignAndTrimVariants -R ${projdir}/refgenomes/$refg -V $i -O ${i%.vcf}0.vcf --split-multi-allelics --dont-trim-alleles --keep-original-ac &&
17911791
wait
17921792
awk '!/^##/' ${i%.vcf}0.vcf | awk '{gsub(/^#/,""); print $0}' > ${i%.vcf}trim.vcf &&
1793-
awk -v file=${i%.vcf} 'BEGIN{getline f;}NR%100000==2{x=file"4x_rawSPLIT"++i".vcf";a[i]=x;print f>x;}{print > x}' ${i%.vcf}trim.vcf & PID=$!
1793+
awk -v file=${i%.vcf} 'BEGIN{getline f;}NR%10000==2{x=file"4x_rawSPLIT"++i".vcf";a[i]=x;print f>x;}{print > x}' ${i%.vcf}trim.vcf & PID=$!
17941794
wait $PID
17951795
rm "${i%.vcf}"0.vcf* "${i%.vcf}"trim.vcf* &&
17961796
wait
@@ -1835,7 +1835,7 @@ if [[ "${file6xG}" -lt 1 ]]; then
18351835
$GATK --java-options "$Xmxg -Djava.io.tmpdir=${projdir}/snpcall/tmp -XX:+UseParallelGC -XX:ParallelGCThreads=$gthreads" LeftAlignAndTrimVariants -R ${projdir}/refgenomes/$refg -V $i -O ${i%.vcf}0.vcf --split-multi-allelics --dont-trim-alleles --keep-original-ac &&
18361836
wait
18371837
awk '!/^##/' ${i%.vcf}0.vcf | awk '{gsub(/^#/,""); print $0}' > ${i%.vcf}trim.vcf &&
1838-
awk -v file=${i%.vcf} 'BEGIN{getline f;}NR%100000==2{x=file"6x_rawSPLIT"++i".vcf";a[i]=x;print f>x;}{print > x}' ${i%.vcf}trim.vcf & PID=$!
1838+
awk -v file=${i%.vcf} 'BEGIN{getline f;}NR%10000==2{x=file"6x_rawSPLIT"++i".vcf";a[i]=x;print f>x;}{print > x}' ${i%.vcf}trim.vcf & PID=$!
18391839
wait $PID
18401840
rm "${i%.vcf}"0.vcf* "${i%.vcf}"trim.vcf* &&
18411841
wait
@@ -1880,7 +1880,7 @@ if [[ "${file8xG}" -lt 1 ]]; then
18801880
$GATK --java-options "$Xmxg -Djava.io.tmpdir=${projdir}/snpcall/tmp -XX:+UseParallelGC -XX:ParallelGCThreads=$gthreads" LeftAlignAndTrimVariants -R ${projdir}/refgenomes/$refg -V $i -O ${i%.vcf}0.vcf --split-multi-allelics --dont-trim-alleles --keep-original-ac &&
18811881
wait
18821882
awk '!/^##/' ${i%.vcf}0.vcf | awk '{gsub(/^#/,""); print $0}' > ${i%.vcf}trim.vcf &&
1883-
awk -v file=${i%.vcf} 'BEGIN{getline f;}NR%100000==2{x=file"8x_rawSPLIT"++i".vcf";a[i]=x;print f>x;}{print > x}' ${i%.vcf}trim.vcf & PID=$!
1883+
awk -v file=${i%.vcf} 'BEGIN{getline f;}NR%10000==2{x=file"8x_rawSPLIT"++i".vcf";a[i]=x;print f>x;}{print > x}' ${i%.vcf}trim.vcf & PID=$!
18841884
wait $PID
18851885
rm "${i%.vcf}"0.vcf* "${i%.vcf}"trim.vcf* &&
18861886
wait

scripts/subgenome_ref_files_2.sh

+6-6
Original file line numberDiff line numberDiff line change
@@ -127,6 +127,7 @@ main () {
127127
export ramg=20
128128
export Xmxg=-Xmx${ramg}G
129129
export gN=$(($ram2/$ramg))
130+
if [[ "$gN" -lt 1 ]]; then export gN=1; fi
130131
export gthreads=$(($threads /$gN ))
131132
if [[ "$gN" -gt "$((threads/4))" ]]; then
132133
gN=$((threads/4))
@@ -653,7 +654,6 @@ main () {
653654
for i in samples_list_node_*.txt; do
654655
:> ${i%.txt}_hold.txt
655656
while IFS="" read -r line; do
656-
sleep $((RANDOM % 2))
657657
ls -l ./samples/$line | awk '{print $5"\t"$9}' >> ${i%.txt}_hold.txt
658658
done < <(grep -v '_tmp.fa' $i | grep -v _R2.f | grep -v _uniq.fasta | grep -v _uniq_R1.fasta | grep -v _uniq_R2.fasta | grep -v _uniq.hold.fasta | grep -v _uniq_R1.hold.fasta | grep -v _uniq_R2.hold.fasta | grep -v fq.gz)
659659
sort -nr -k1 ${i%.txt}_hold.txt | awk '{gsub(/.\/samples\//,""); print $2}' > $i
@@ -2289,7 +2289,7 @@ if [[ "${file1xG}" -lt 1 ]]; then
22892289
$GATK --java-options "$Xmxg -Djava.io.tmpdir=${projdir}/snpcall/tmp -XX:+UseParallelGC -XX:ParallelGCThreads=$gthreads" LeftAlignAndTrimVariants -R ${projdir}/refgenomes/$refg -V $i -O ${i%.vcf}0.vcf --split-multi-allelics --dont-trim-alleles --keep-original-ac &&
22902290
wait
22912291
awk '!/^##/' ${i%.vcf}0.vcf | awk '{gsub(/^#/,""); print $0}' > ${i%.vcf}trim.vcf &&
2292-
awk -v file=${i%.vcf} 'BEGIN{getline f;}NR%100000==2{x=file"1x_rawSPLIT"++i".vcf";a[i]=x;print f>x;}{print > x}' ${i%.vcf}trim.vcf & PID=$!
2292+
awk -v file=${i%.vcf} 'BEGIN{getline f;}NR%10000==2{x=file"1x_rawSPLIT"++i".vcf";a[i]=x;print f>x;}{print > x}' ${i%.vcf}trim.vcf & PID=$!
22932293
wait $PID
22942294
rm "${i%.vcf}"0.vcf* "${i%.vcf}"trim.vcf* &&
22952295
wait
@@ -2334,7 +2334,7 @@ if [[ "${file2xG}" -lt 1 ]]; then
23342334
$GATK --java-options "$Xmxg -Djava.io.tmpdir=${projdir}/snpcall/tmp -XX:+UseParallelGC -XX:ParallelGCThreads=$gthreads" LeftAlignAndTrimVariants -R ${projdir}/refgenomes/$refg -V $i -O ${i%.vcf}0.vcf --split-multi-allelics --dont-trim-alleles --keep-original-ac &&
23352335
wait
23362336
awk '!/^##/' ${i%.vcf}0.vcf | awk '{gsub(/^#/,""); print $0}' > ${i%.vcf}trim.vcf &&
2337-
awk -v file=${i%.vcf} 'BEGIN{getline f;}NR%100000==2{x=file"2x_rawSPLIT"++i".vcf";a[i]=x;print f>x;}{print > x}' ${i%.vcf}trim.vcf & PID=$!
2337+
awk -v file=${i%.vcf} 'BEGIN{getline f;}NR%10000==2{x=file"2x_rawSPLIT"++i".vcf";a[i]=x;print f>x;}{print > x}' ${i%.vcf}trim.vcf & PID=$!
23382338
wait $PID
23392339
rm "${i%.vcf}"0.vcf* "${i%.vcf}"trim.vcf* &&
23402340
wait
@@ -2379,7 +2379,7 @@ if [[ "${file4xG}" -lt 1 ]]; then
23792379
$GATK --java-options "$Xmxg -Djava.io.tmpdir=${projdir}/snpcall/tmp -XX:+UseParallelGC -XX:ParallelGCThreads=$gthreads" LeftAlignAndTrimVariants -R ${projdir}/refgenomes/$refg -V $i -O ${i%.vcf}0.vcf --split-multi-allelics --dont-trim-alleles --keep-original-ac &&
23802380
wait
23812381
awk '!/^##/' ${i%.vcf}0.vcf | awk '{gsub(/^#/,""); print $0}' > ${i%.vcf}trim.vcf &&
2382-
awk -v file=${i%.vcf} 'BEGIN{getline f;}NR%100000==2{x=file"4x_rawSPLIT"++i".vcf";a[i]=x;print f>x;}{print > x}' ${i%.vcf}trim.vcf & PID=$!
2382+
awk -v file=${i%.vcf} 'BEGIN{getline f;}NR%10000==2{x=file"4x_rawSPLIT"++i".vcf";a[i]=x;print f>x;}{print > x}' ${i%.vcf}trim.vcf & PID=$!
23832383
wait $PID
23842384
rm "${i%.vcf}"0.vcf* "${i%.vcf}"trim.vcf* &&
23852385
wait
@@ -2424,7 +2424,7 @@ if [[ "${file6xG}" -lt 1 ]]; then
24242424
$GATK --java-options "$Xmxg -Djava.io.tmpdir=${projdir}/snpcall/tmp -XX:+UseParallelGC -XX:ParallelGCThreads=$gthreads" LeftAlignAndTrimVariants -R ${projdir}/refgenomes/$refg -V $i -O ${i%.vcf}0.vcf --split-multi-allelics --dont-trim-alleles --keep-original-ac &&
24252425
wait
24262426
awk '!/^##/' ${i%.vcf}0.vcf | awk '{gsub(/^#/,""); print $0}' > ${i%.vcf}trim.vcf &&
2427-
awk -v file=${i%.vcf} 'BEGIN{getline f;}NR%100000==2{x=file"6x_rawSPLIT"++i".vcf";a[i]=x;print f>x;}{print > x}' ${i%.vcf}trim.vcf & PID=$!
2427+
awk -v file=${i%.vcf} 'BEGIN{getline f;}NR%10000==2{x=file"6x_rawSPLIT"++i".vcf";a[i]=x;print f>x;}{print > x}' ${i%.vcf}trim.vcf & PID=$!
24282428
wait $PID
24292429
rm "${i%.vcf}"0.vcf* "${i%.vcf}"trim.vcf* &&
24302430
wait
@@ -2469,7 +2469,7 @@ if [[ "${file8xG}" -lt 1 ]]; then
24692469
$GATK --java-options "$Xmxg -Djava.io.tmpdir=${projdir}/snpcall/tmp -XX:+UseParallelGC -XX:ParallelGCThreads=$gthreads" LeftAlignAndTrimVariants -R ${projdir}/refgenomes/$refg -V $i -O ${i%.vcf}0.vcf --split-multi-allelics --dont-trim-alleles --keep-original-ac &&
24702470
wait
24712471
awk '!/^##/' ${i%.vcf}0.vcf | awk '{gsub(/^#/,""); print $0}' > ${i%.vcf}trim.vcf &&
2472-
awk -v file=${i%.vcf} 'BEGIN{getline f;}NR%100000==2{x=file"8x_rawSPLIT"++i".vcf";a[i]=x;print f>x;}{print > x}' ${i%.vcf}trim.vcf & PID=$!
2472+
awk -v file=${i%.vcf} 'BEGIN{getline f;}NR%10000==2{x=file"8x_rawSPLIT"++i".vcf";a[i]=x;print f>x;}{print > x}' ${i%.vcf}trim.vcf & PID=$!
24732473
wait $PID
24742474
rm "${i%.vcf}"0.vcf* "${i%.vcf}"trim.vcf* &&
24752475
wait

scripts/subgenome_ref_files_3.sh

+6-6
Original file line numberDiff line numberDiff line change
@@ -127,6 +127,7 @@ main () {
127127
export ramg=20
128128
export Xmxg=-Xmx${ramg}G
129129
export gN=$(($ram2/$ramg))
130+
if [[ "$gN" -lt 1 ]]; then export gN=1; fi
130131
export gthreads=$(($threads /$gN ))
131132
if [[ "$gN" -gt "$((threads/4))" ]]; then
132133
gN=$((threads/4))
@@ -662,7 +663,6 @@ main () {
662663
for i in samples_list_node_*.txt; do
663664
:> ${i%.txt}_hold.txt
664665
while IFS="" read -r line; do
665-
sleep $((RANDOM % 2))
666666
ls -l ./samples/$line | awk '{print $5"\t"$9}' >> ${i%.txt}_hold.txt
667667
done < <(grep -v '_tmp.fa' $i | grep -v _R2.f | grep -v _uniq.fasta | grep -v _uniq_R1.fasta | grep -v _uniq_R2.fasta | grep -v _uniq.hold.fasta | grep -v _uniq_R1.hold.fasta | grep -v _uniq_R2.hold.fasta | grep -v fq.gz)
668668
sort -nr -k1 ${i%.txt}_hold.txt | awk '{gsub(/.\/samples\//,""); print $2}' > $i
@@ -3677,7 +3677,7 @@ main () {
36773677
$GATK --java-options "$Xmxg -Djava.io.tmpdir=${projdir}/snpcall/tmp -XX:+UseParallelGC -XX:ParallelGCThreads=$gthreads" LeftAlignAndTrimVariants -R ${projdir}/refgenomes/$refg -V $i -O ${i%.vcf}0.vcf --split-multi-allelics --dont-trim-alleles --keep-original-ac &&
36783678
wait
36793679
awk '!/^##/' ${i%.vcf}0.vcf | awk '{gsub(/^#/,""); print $0}' > ${i%.vcf}trim.vcf &&
3680-
awk -v file=${i%.vcf} 'BEGIN{getline f;}NR%100000==2{x=file"1x_rawSPLIT"++i".vcf";a[i]=x;print f>x;}{print > x}' ${i%.vcf}trim.vcf & PID=$!
3680+
awk -v file=${i%.vcf} 'BEGIN{getline f;}NR%10000==2{x=file"1x_rawSPLIT"++i".vcf";a[i]=x;print f>x;}{print > x}' ${i%.vcf}trim.vcf & PID=$!
36813681
wait $PID
36823682
rm "${i%.vcf}"0.vcf* "${i%.vcf}"trim.vcf* &&
36833683
wait
@@ -3722,7 +3722,7 @@ main () {
37223722
$GATK --java-options "$Xmxg -Djava.io.tmpdir=${projdir}/snpcall/tmp -XX:+UseParallelGC -XX:ParallelGCThreads=$gthreads" LeftAlignAndTrimVariants -R ${projdir}/refgenomes/$refg -V $i -O ${i%.vcf}0.vcf --split-multi-allelics --dont-trim-alleles --keep-original-ac &&
37233723
wait
37243724
awk '!/^##/' ${i%.vcf}0.vcf | awk '{gsub(/^#/,""); print $0}' > ${i%.vcf}trim.vcf &&
3725-
awk -v file=${i%.vcf} 'BEGIN{getline f;}NR%100000==2{x=file"2x_rawSPLIT"++i".vcf";a[i]=x;print f>x;}{print > x}' ${i%.vcf}trim.vcf & PID=$!
3725+
awk -v file=${i%.vcf} 'BEGIN{getline f;}NR%10000==2{x=file"2x_rawSPLIT"++i".vcf";a[i]=x;print f>x;}{print > x}' ${i%.vcf}trim.vcf & PID=$!
37263726
wait $PID
37273727
rm "${i%.vcf}"0.vcf* "${i%.vcf}"trim.vcf* &&
37283728
wait
@@ -3767,7 +3767,7 @@ main () {
37673767
$GATK --java-options "$Xmx1 -Djava.io.tmpdir=${projdir}/snpcall/tmp -XX:+UseParallelGC -XX:ParallelGCThreads=$loopthreads" LeftAlignAndTrimVariants -R ${projdir}/refgenomes/$refg -V $i -O ${i%.vcf}0.vcf --split-multi-allelics --dont-trim-alleles --keep-original-ac &&
37683768
wait
37693769
awk '!/^##/' ${i%.vcf}0.vcf | awk '{gsub(/^#/,""); print $0}' > ${i%.vcf}trim.vcf &&
3770-
awk -v file=${i%.vcf} 'BEGIN{getline f;}NR%100000==2{x=file"4x_rawSPLIT"++i".vcf";a[i]=x;print f>x;}{print > x}' ${i%.vcf}trim.vcf & PID=$!
3770+
awk -v file=${i%.vcf} 'BEGIN{getline f;}NR%10000==2{x=file"4x_rawSPLIT"++i".vcf";a[i]=x;print f>x;}{print > x}' ${i%.vcf}trim.vcf & PID=$!
37713771
wait $PID
37723772
rm "${i%.vcf}"0.vcf* "${i%.vcf}"trim.vcf* &&
37733773
wait
@@ -3812,7 +3812,7 @@ main () {
38123812
$GATK --java-options "$Xmx1 -Djava.io.tmpdir=${projdir}/snpcall/tmp -XX:+UseParallelGC -XX:ParallelGCThreads=$loopthreads" LeftAlignAndTrimVariants -R ${projdir}/refgenomes/$refg -V $i -O ${i%.vcf}0.vcf --split-multi-allelics --dont-trim-alleles --keep-original-ac &&
38133813
wait
38143814
awk '!/^##/' ${i%.vcf}0.vcf | awk '{gsub(/^#/,""); print $0}' > ${i%.vcf}trim.vcf &&
3815-
awk -v file=${i%.vcf} 'BEGIN{getline f;}NR%100000==2{x=file"6x_rawSPLIT"++i".vcf";a[i]=x;print f>x;}{print > x}' ${i%.vcf}trim.vcf & PID=$!
3815+
awk -v file=${i%.vcf} 'BEGIN{getline f;}NR%10000==2{x=file"6x_rawSPLIT"++i".vcf";a[i]=x;print f>x;}{print > x}' ${i%.vcf}trim.vcf & PID=$!
38163816
wait $PID
38173817
rm "${i%.vcf}"0.vcf* "${i%.vcf}"trim.vcf* &&
38183818
wait
@@ -3857,7 +3857,7 @@ main () {
38573857
$GATK --java-options "$Xmx1 -Djava.io.tmpdir=${projdir}/snpcall/tmp -XX:+UseParallelGC -XX:ParallelGCThreads=$loopthreads" LeftAlignAndTrimVariants -R ${projdir}/refgenomes/$refg -V $i -O ${i%.vcf}0.vcf --split-multi-allelics --dont-trim-alleles --keep-original-ac &&
38583858
wait
38593859
awk '!/^##/' ${i%.vcf}0.vcf | awk '{gsub(/^#/,""); print $0}' > ${i%.vcf}trim.vcf &&
3860-
awk -v file=${i%.vcf} 'BEGIN{getline f;}NR%100000==2{x=file"8x_rawSPLIT"++i".vcf";a[i]=x;print f>x;}{print > x}' ${i%.vcf}trim.vcf & PID=$!
3860+
awk -v file=${i%.vcf} 'BEGIN{getline f;}NR%10000==2{x=file"8x_rawSPLIT"++i".vcf";a[i]=x;print f>x;}{print > x}' ${i%.vcf}trim.vcf & PID=$!
38613861
wait $PID
38623862
rm "${i%.vcf}"0.vcf* "${i%.vcf}"trim.vcf* &&
38633863
wait

0 commit comments

Comments
 (0)