@@ -32,7 +32,7 @@ def run_intersect():
32
32
STRANDAMB = (v ['REF' ] == allele_complement (ALT ))
33
33
heapq .heappush (ref_heap , ([key , v ['ID' ], v ['REF' ]],[IS_INDEL , STRANDAMB , IS_MA_REF ]))
34
34
35
- # Output the sorted variants
35
+ # Output the sorted reference variants
36
36
for i in range (len (ref_heap )):
37
37
popped = heapq .heappop (ref_heap )
38
38
outf .write ('\t ' .join ([str (x ) for x in popped [0 ] + popped [1 ]]) + '\n ' )
@@ -43,15 +43,14 @@ def run_intersect():
43
43
outf .write ('CHR:POS:A0:A1\t ID_TARGET\t REF_TARGET\t IS_MA_TARGET\t ALT_FREQ\t F_MISS_DOSAGE\n ' )
44
44
target_heap = []
45
45
for path in args .target :
46
- pvar = read_var_general (path )
46
+ pvar = read_var_general (path , chrom = None ) # essential not to filter if it is target (messes up common indexing )
47
47
48
48
loc_afreq = path .replace ('.pvar.zst' , '.afreq.gz' )
49
- afreq = read_var_general (loc_afreq )
49
+ afreq = read_var_general (loc_afreq , chrom = None ) # essential not to filter if it is target (messes up common indexing )
50
50
51
51
loc_vmiss = path .replace ('.pvar.zst' , '.vmiss.gz' )
52
- vmiss = read_var_general (loc_vmiss )
52
+ vmiss = read_var_general (loc_vmiss , chrom = None ) # essential not to filter if it is target (messes up common indexing )
53
53
54
- ## ToDo: figure out if it needs to change to bim files
55
54
for v , freq , miss in zip (pvar , afreq , vmiss ):
56
55
# if v['ID'] != freq['ID'] != miss['ID']:
57
56
# print(v)
@@ -64,8 +63,11 @@ def run_intersect():
64
63
key = '{}:{}:{}:{}' .format (v ['#CHROM' ], v ['POS' ], v ['REF' ], ALT )
65
64
else :
66
65
key = '{}:{}:{}:{}' .format (v ['#CHROM' ], v ['POS' ], ALT , v ['REF' ])
67
- heapq .heappush (target_heap , ([key , v ['ID' ], v ['REF' ]], [IS_MA_TARGET , ALT_FREQS [i ], F_MISS_DOSAGE ]))
66
+ # outf.write('{}\t{}\t{}\t{}\t{}\t{}\n'.format(key, v['ID'], v['REF'], str(IS_MA_TARGET), ALT_FREQS[i],
67
+ # F_MISS_DOSAGE))
68
+ heapq .heappush (target_heap , ([key , v ['ID' ], v ['REF' ]], [IS_MA_TARGET , ALT_FREQS [i ],F_MISS_DOSAGE ]))
68
69
70
+ # Output the sorted reference variants
69
71
for i in range (len (target_heap )):
70
72
popped = heapq .heappop (target_heap )
71
73
outf .write ('\t ' .join ([str (x ) for x in popped [0 ] + popped [1 ]]) + '\n ' )
@@ -87,6 +89,31 @@ def read_var_general(path, chrom=None):
87
89
yield row
88
90
89
91
92
+ def sorted_join (reffile , targetfile ):
93
+ with read_var_general (reffile ) as f1 , read_var_general (targetfile ) as f2 :
94
+ f1_iter = iter (f1 )
95
+ f2_iter = iter (f2 )
96
+
97
+ line1 = next (f1_iter , None )
98
+ line2 = next (f2_iter , None )
99
+
100
+ while line1 is not None and line2 is not None :
101
+ key1 = line1 ['CHR:POS:A0:A1' ]
102
+ key2 = line2 ['CHR:POS:A0:A1' ]
103
+
104
+ if key1 == key2 :
105
+ yield line1 .strip () + delimiter + line2 .strip ()
106
+ line1 = next (f1_iter , None )
107
+ line2 = next (f2_iter , None )
108
+ elif key1 < key2 :
109
+ line1 = next (f1_iter , None )
110
+ else :
111
+ line2 = next (f2_iter , None )
112
+
113
+
114
+
115
+
116
+
90
117
def allele_complement (s ):
91
118
return s .replace ("A" , "V" ).replace ("T" , "X" ).replace ("C" , "Y" ).replace ("G" , "Z" ).replace ("V" , "T" ).replace ("X" , "A" ).replace ("Y" , "G" ).replace ("Z" , "C" )
92
119
0 commit comments