Skip to content

Commit 4774734

Browse files
committed
add more complex target variant pvar
1 parent 08da4a4 commit 4774734

File tree

2 files changed

+2711
-1
lines changed

2 files changed

+2711
-1
lines changed

pgscatalog.core/src/pgscatalog/core/lib/targetvariants.py

+15-1
Original file line numberDiff line numberDiff line change
@@ -109,6 +109,14 @@ class TargetVariants:
109109
Note, A1/A2 isn't guaranteed to be ref/alt because of PLINK1 file format
110110
limitations. PGS Catalog libraries handle this internally, but you should be
111111
aware REF/ALT can be swapped by plink during VCF to bim conversion.
112+
113+
Some pvar files can contain a lot of comments in the header, which are ignored:
114+
115+
>>> pvar = TargetVariants(Config.ROOT_DIR / "tests" / "data" / "1000G.pvar")
116+
>>> for variant in pvar:
117+
... variant
118+
... break
119+
TargetVariant(chrom='1', pos=10390, ref='CCCCTAACCCCTAACCCTAACCCTAACCCTAACCCTAACCCTAA', alt='C', id='1:10390:CCCCTAACCCCTAACCCTAACCCTAACCCTAACCCTAACCCTAA:C')
112120
"""
113121

114122
def __init__(self, path, chrom=None):
@@ -193,7 +201,13 @@ def to_pa_recordbatch(self):
193201
def read_pvar(path):
194202
with xopen(path, "rt") as f:
195203
# pvars do have a header column and support arbitrary columns
196-
reader = csv.DictReader(f, delimiter="\t")
204+
for line in f:
205+
if line.startswith("##"):
206+
continue
207+
else:
208+
fieldnames = line.strip().split("\t")
209+
break
210+
reader = csv.DictReader(f, fieldnames=fieldnames, delimiter="\t")
197211
fields = {
198212
"#CHROM": "chrom",
199213
"POS": "pos",

0 commit comments

Comments
 (0)