Skip to content

Commit af6e2ef

Browse files
committed
Parse reference variants
1 parent 80ff9d4 commit af6e2ef

File tree

1 file changed

+43
-3
lines changed

1 file changed

+43
-3
lines changed

pgscatalog.match/src/pgscatalog/match/cli/intersect_cli.py

+43-3
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,22 @@
1414
def run_intersect():
1515
args = parse_args()
1616

17+
# Process reference variants
18+
with xopen('reference_variants.txt', 'wt') as outf:
19+
outf.write('CHR:POS:A0:A1\tID_REF\tREF_REF\tIS_INDEL\tSTRANDAMB\tIS_MA_REF\n')
20+
ref_pvar = read_var_general(args.reference, chrom=args.filter_chrom)
21+
for v in ref_pvar:
22+
ALTs = v['ALT'].split(',')
23+
IS_MA_REF = len(ALTs) > 1
24+
for i, ALT in enumerate(ALTs):
25+
if v['REF'] < ALT:
26+
key = '{}:{}:{}:{}'.format(v['#CHROM'], v['POS'], v['REF'], ALT)
27+
else:
28+
key = '{}:{}:{}:{}'.format(v['#CHROM'], v['POS'], ALT, v['REF'])
29+
30+
IS_INDEL = (len(v['REF']) > 1) | (len(ALT) > 1)
31+
STRANDAMB = (v['REF'] == allele_complement(ALT))
32+
outf.write('{}\t{}\t{}\t{}\t{}\t{}\n'.format(key,v['ID'], v['REF'], IS_INDEL, STRANDAMB, IS_MA_REF))
1733

1834
# Process target variants
1935
with xopen('target_variants.txt', 'wt') as outf:
@@ -42,18 +58,35 @@ def run_intersect():
4258
outf.write('{}\t{}\t{}\t{}\t{}\t{}\n'.format(key,v['ID'],v['REF'], str(IS_MA_TARGET), ALT_FREQS[i], F_MISS_DOSAGE))
4359

4460

45-
def read_var_general(path):
61+
def read_var_general(path, chrom=None):
4662
with xopen(path, "rt") as f:
4763
# pvars do have a header column and support arbitrary columns
4864
reader = csv.DictReader(filter(lambda row: row[:2]!='##', f), delimiter="\t") # need to remove comments of VCF-like characters
49-
for row in reader:
50-
yield row
65+
if chrom is None:
66+
for row in reader:
67+
yield row
68+
else:
69+
for row in reader:
70+
if row['#CHROM'] == chrom:
71+
yield row
72+
73+
74+
def allele_complement(s):
75+
return s.replace("A", "V").replace("T", "X").replace("C", "Y").replace("G", "Z").replace("V", "T").replace("X", "A").replace("Y", "G").replace("Z", "C")
76+
5177
def parse_args(args=None):
5278
parser = argparse.ArgumentParser(
5379
description=_description_text(),
5480
epilog=_epilog_text(),
5581
formatter_class=argparse.RawDescriptionHelpFormatter,
5682
)
83+
parser.add_argument(
84+
"-r",
85+
"--reference",
86+
dest="reference",
87+
required=True,
88+
help="path/to/REFERENCE/pvar",
89+
)
5790
parser.add_argument(
5891
"-t",
5992
"--target",
@@ -62,6 +95,13 @@ def parse_args(args=None):
6295
nargs="+",
6396
help="<Required> A list of paths of target genomic variants (.bim/pvar format)",
6497
)
98+
parser.add_argument(
99+
"-c",
100+
"--chrom",
101+
dest="filter_chrom",
102+
required=False,
103+
help="whether to limit matches to specific chromosome of the reference",
104+
)
65105
return parser.parse_args(args)
66106

67107

0 commit comments

Comments
 (0)