@@ -88,27 +88,31 @@ def run_intersect():
88
88
# Merge matched variants on sorted files
89
89
logger .info ("Joining & outputting matched variants -> matched_variants.txt" )
90
90
n_matched = 0
91
+ n_PCA_ELIGIBLE = 0
91
92
with open ('matched_variants.txt' , 'w' ) as csvfile :
92
93
for vmatch in sorted_join_variants ('reference_variants.txt' , 'target_variants.txt' ):
93
94
n_matched += 1
94
95
vmatch ['SAME_REF' ] = vmatch ['REF_REF' ] == vmatch ['REF_REF' ]
95
96
96
97
# Define variant's eligibility for PCA
97
98
# From original implementation: ((IS_MA_REF == FALSE) && (IS_MA_TARGET == FALSE)) && (((IS_INDEL == FALSE) && (STRANDAMB == FALSE)) || ((IS_INDEL == TRUE) && (SAME_REF == TRUE)))
98
- PCA_ELIGIBLE = ((vmatch ['IS_MA_REF' ] is False ) and (vmatch ['IS_MA_TARGET' ] is False )) and \
99
- (((vmatch ['IS_INDEL' ] is False ) and (vmatch ['STRANDAMB' ] is False )) or ((vmatch ['IS_INDEL' ] is True ) and (vmatch ['SAME_REF' ] is True )))
99
+ PCA_ELIGIBLE = ((vmatch ['IS_MA_REF' ] == ' False' ) and (vmatch ['IS_MA_TARGET' ] == ' False' )) and \
100
+ (((vmatch ['IS_INDEL' ] == ' False' ) and (vmatch ['STRANDAMB' ] == ' False' )) or ((vmatch ['IS_INDEL' ] == ' True' ) and (vmatch ['SAME_REF' ] == ' True' )))
100
101
101
- PCA_ELIGIBLE = PCA_ELIGIBLE and (vmatch ['MAF' ] > args .maf_filter ) and (vmatch ['F_MISS_DOSAGE' ] < args .maf_filter )
102
+ PCA_ELIGIBLE = PCA_ELIGIBLE and (float ( vmatch ['MAF' ]) > args .maf_filter ) and (float ( vmatch ['F_MISS_DOSAGE' ]) < args .maf_filter )
102
103
vmatch ['PCA_ELIGIBLE' ] = PCA_ELIGIBLE
104
+ if PCA_ELIGIBLE is True :
105
+ n_PCA_ELIGIBLE += 1
103
106
104
107
if n_matched == 1 :
105
108
writer = csv .DictWriter (csvfile , fieldnames = vmatch .keys (), delimiter = '\t ' )
106
109
writer .writeheader ()
107
110
writer .writerow (vmatch )
111
+ logger .info ("{}/{} ({:.2f} variants are eligible for PCA" .format (n_PCA_ELIGIBLE , n_matched , 100 * n_PCA_ELIGIBLE / n_matched ))
108
112
109
113
# Output counts
110
114
logger .info ("Outputting variant counts -> intersect_counts_$.txt" )
111
- with open ('intersect_counts_{}.txt' .format (args .chrom ), 'w' ) as outf :
115
+ with open ('intersect_counts_{}.txt' .format (args .filter_chrom ), 'w' ) as outf :
112
116
outf .write ('\n ' .join (map (str , [n_target , n_ref , n_matched ])))
113
117
114
118
0 commit comments