Skip to content

Commit 80ff9d4

Browse files
committed
Process target variants and merge with frequency and missingness.
1 parent 08da4a4 commit 80ff9d4

File tree

2 files changed

+82
-0
lines changed

2 files changed

+82
-0
lines changed

pgscatalog.match/pyproject.toml

+1
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,7 @@ pyarrow = "^15.0.0"
1717
[tool.poetry.scripts]
1818
pgscatalog-match = 'pgscatalog.match.cli.match_cli:run_match'
1919
pgscatalog-matchmerge = 'pgscatalog.match.cli.merge_cli:run_merge'
20+
pgscatalog-intersect = 'pgscatalog.match.cli.intersect_cli:run_intersect'
2021

2122
[tool.poetry.group.dev.dependencies]
2223
pytest = "^8.0.0"
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,81 @@
1+
import argparse
2+
import logging
3+
import sys
4+
from xopen import xopen
5+
import csv
6+
import textwrap
7+
8+
9+
from pgscatalog.core import TargetVariants
10+
11+
logger = logging.getLogger(__name__)
12+
13+
14+
def run_intersect():
15+
args = parse_args()
16+
17+
18+
# Process target variants
19+
with xopen('target_variants.txt', 'wt') as outf:
20+
outf.write('CHR:POS:A0:A1\tID_TARGET\tREF_TARGET\tIS_MA_TARGET\tALT_FREQ\tF_MISS_DOSAGE\n')
21+
for path in args.target:
22+
pvar = read_var_general(path)
23+
24+
loc_afreq = path.replace('.pvar.zst', '.afreq.gz')
25+
afreq = read_var_general(loc_afreq)
26+
27+
loc_vmiss = path.replace('.pvar.zst', '.vmiss.gz')
28+
vmiss = read_var_general(loc_vmiss)
29+
30+
for v, freq, miss in zip(pvar, afreq, vmiss):
31+
# if v['ID'] != freq['ID'] != miss['ID']:
32+
# print(v)
33+
ALTs = v['ALT'].split(',')
34+
ALT_FREQS = freq['ALT_FREQS'].split(',')
35+
F_MISS_DOSAGE = miss['F_MISS_DOSAGE']
36+
IS_MA_TARGET = len(ALTs) > 1
37+
for i, ALT in enumerate(ALTs):
38+
if v['REF'] < ALT:
39+
key = '{}:{}:{}:{}'.format(v['#CHROM'], v['POS'], v['REF'], ALT)
40+
else:
41+
key = '{}:{}:{}:{}'.format(v['#CHROM'], v['POS'], ALT, v['REF'])
42+
outf.write('{}\t{}\t{}\t{}\t{}\t{}\n'.format(key,v['ID'],v['REF'], str(IS_MA_TARGET), ALT_FREQS[i], F_MISS_DOSAGE))
43+
44+
45+
def read_var_general(path):
46+
with xopen(path, "rt") as f:
47+
# pvars do have a header column and support arbitrary columns
48+
reader = csv.DictReader(filter(lambda row: row[:2]!='##', f), delimiter="\t") # need to remove comments of VCF-like characters
49+
for row in reader:
50+
yield row
51+
def parse_args(args=None):
52+
parser = argparse.ArgumentParser(
53+
description=_description_text(),
54+
epilog=_epilog_text(),
55+
formatter_class=argparse.RawDescriptionHelpFormatter,
56+
)
57+
parser.add_argument(
58+
"-t",
59+
"--target",
60+
dest="target",
61+
required=True,
62+
nargs="+",
63+
help="<Required> A list of paths of target genomic variants (.bim/pvar format)",
64+
)
65+
return parser.parse_args(args)
66+
67+
68+
def _description_text() -> str:
69+
return textwrap.dedent(
70+
"""\
71+
PLACEHOLDER
72+
"""
73+
)
74+
75+
76+
def _epilog_text() -> str:
77+
return textwrap.dedent(
78+
"""\
79+
PLACEHOLDER
80+
"""
81+
)

0 commit comments

Comments
 (0)