14
14
def run_intersect ():
15
15
args = parse_args ()
16
16
17
+ # Process reference variants
18
+ with xopen ('reference_variants.txt' , 'wt' ) as outf :
19
+ outf .write ('CHR:POS:A0:A1\t ID_REF\t REF_REF\t IS_INDEL\t STRANDAMB\t IS_MA_REF\n ' )
20
+ ref_pvar = read_var_general (args .reference , chrom = args .filter_chrom )
21
+ for v in ref_pvar :
22
+ ALTs = v ['ALT' ].split (',' )
23
+ IS_MA_REF = len (ALTs ) > 1
24
+ for i , ALT in enumerate (ALTs ):
25
+ if v ['REF' ] < ALT :
26
+ key = '{}:{}:{}:{}' .format (v ['#CHROM' ], v ['POS' ], v ['REF' ], ALT )
27
+ else :
28
+ key = '{}:{}:{}:{}' .format (v ['#CHROM' ], v ['POS' ], ALT , v ['REF' ])
29
+
30
+ IS_INDEL = (len (v ['REF' ]) > 1 ) | (len (ALT ) > 1 )
31
+ STRANDAMB = (v ['REF' ] == allele_complement (ALT ))
32
+ outf .write ('{}\t {}\t {}\t {}\t {}\t {}\n ' .format (key ,v ['ID' ], v ['REF' ], IS_INDEL , STRANDAMB , IS_MA_REF ))
17
33
18
34
# Process target variants
19
35
with xopen ('target_variants.txt' , 'wt' ) as outf :
@@ -42,18 +58,35 @@ def run_intersect():
42
58
outf .write ('{}\t {}\t {}\t {}\t {}\t {}\n ' .format (key ,v ['ID' ],v ['REF' ], str (IS_MA_TARGET ), ALT_FREQS [i ], F_MISS_DOSAGE ))
43
59
44
60
45
- def read_var_general (path ):
61
+ def read_var_general (path , chrom = None ):
46
62
with xopen (path , "rt" ) as f :
47
63
# pvars do have a header column and support arbitrary columns
48
64
reader = csv .DictReader (filter (lambda row : row [:2 ]!= '##' , f ), delimiter = "\t " ) # need to remove comments of VCF-like characters
49
- for row in reader :
50
- yield row
65
+ if chrom is None :
66
+ for row in reader :
67
+ yield row
68
+ else :
69
+ for row in reader :
70
+ if row ['#CHROM' ] == chrom :
71
+ yield row
72
+
73
+
74
+ def allele_complement (s ):
75
+ return s .replace ("A" , "V" ).replace ("T" , "X" ).replace ("C" , "Y" ).replace ("G" , "Z" ).replace ("V" , "T" ).replace ("X" , "A" ).replace ("Y" , "G" ).replace ("Z" , "C" )
76
+
51
77
def parse_args (args = None ):
52
78
parser = argparse .ArgumentParser (
53
79
description = _description_text (),
54
80
epilog = _epilog_text (),
55
81
formatter_class = argparse .RawDescriptionHelpFormatter ,
56
82
)
83
+ parser .add_argument (
84
+ "-r" ,
85
+ "--reference" ,
86
+ dest = "reference" ,
87
+ required = True ,
88
+ help = "path/to/REFERENCE/pvar" ,
89
+ )
57
90
parser .add_argument (
58
91
"-t" ,
59
92
"--target" ,
@@ -62,6 +95,13 @@ def parse_args(args=None):
62
95
nargs = "+" ,
63
96
help = "<Required> A list of paths of target genomic variants (.bim/pvar format)" ,
64
97
)
98
+ parser .add_argument (
99
+ "-c" ,
100
+ "--chrom" ,
101
+ dest = "filter_chrom" ,
102
+ required = False ,
103
+ help = "whether to limit matches to specific chromosome of the reference" ,
104
+ )
65
105
return parser .parse_args (args )
66
106
67
107
0 commit comments