Skip to content

Commit 9501eab

Browse files
committed
Handle .bim TARGET files
1 parent 486b2e5 commit 9501eab

File tree

1 file changed

+20
-8
lines changed

1 file changed

+20
-8
lines changed

pgscatalog.match/src/pgscatalog/match/cli/intersect_cli.py

+20-8
Original file line numberDiff line numberDiff line change
@@ -49,7 +49,6 @@ def run_intersect():
4949
del ref_heap
5050

5151
# Process & sort target variants
52-
# ToDo: check if it works for bim format files?
5352
with xopen('target_variants.txt', 'wt') as outf:
5453
outf.write('CHR:POS:A0:A1\tID_TARGET\tREF_TARGET\tIS_MA_TARGET\tMAF\tF_MISS_DOSAGE\n')
5554
target_heap = []
@@ -130,15 +129,28 @@ def read_var_general(path, chrom=None):
130129
:return: row of a df as a dict
131130
"""
132131
with xopen(path, "rt") as f:
133-
# ToDo: check if this is memory inefficent
134-
reader = csv.DictReader(filter(lambda r: r[:2] != '##', f), delimiter="\t") # need to remove comments of VCF-like characters, might be fully in memory though
135-
if (chrom is None) or (chrom == 'ALL'):
136-
for row in reader:
137-
yield row
132+
if 'bim' in path:
133+
reader = csv.reader(f, delimiter="\t")
134+
# yes, A1/A2 in bim isn't ref/alt
135+
fields = ["#CHROM", "ID", "pos_cm", "POS", "REF", "ALT"]
136+
if (chrom is None) or (chrom == 'ALL'):
137+
for row in reader:
138+
yield dict(zip(fields, row, strict=True))
139+
else:
140+
for row in reader:
141+
row = dict(zip(fields, row, strict=True))
142+
if row['#CHROM'] == chrom:
143+
yield row
138144
else:
139-
for row in reader:
140-
if row['#CHROM'] == chrom:
145+
# ToDo: check if filter is memory inefficent
146+
reader = csv.DictReader(filter(lambda r: r[:2] != '##', f), delimiter="\t") # need to remove comments of VCF-like characters, might be fully in memory though
147+
if (chrom is None) or (chrom == 'ALL'):
148+
for row in reader:
141149
yield row
150+
else:
151+
for row in reader:
152+
if row['#CHROM'] == chrom:
153+
yield row
142154

143155

144156
def sorted_join_variants(path_ref, path_target):

0 commit comments

Comments
 (0)