9
9
import pyliftover
10
10
11
11
from .genomebuild import GenomeBuild
12
- from .scorevariant import EffectType , ScoreVariant , EffectAllele
12
+ from .scorevariant import ScoreVariant , Allele
13
13
from .pgsexceptions import LiftoverError
14
14
15
15
logger = logging .getLogger (__name__ )
@@ -45,8 +45,6 @@ def normalise(
45
45
if drop_missing :
46
46
variants = drop_hla (variants )
47
47
48
- variants = assign_effect_type (variants )
49
- variants = check_effect_weight (variants )
50
48
variants = assign_other_allele (variants )
51
49
variants = check_effect_allele (variants , drop_missing )
52
50
variants = detect_complex (variants )
@@ -75,19 +73,11 @@ def check_duplicates(variants):
75
73
seen_ids = {}
76
74
current_accession = accession
77
75
78
- # None other allele -> empty string
79
- variant_id : str = ":" .join (
80
- [
81
- str (getattr (variant , k ) or "" )
82
- for k in ["chr_name" , "chr_position" , "effect_allele" , "other_allele" ]
83
- ]
84
- )
85
-
86
- if variant_id in seen_ids :
76
+ if variant .variant_id in seen_ids :
87
77
variant .is_duplicated = True
88
78
n_duplicates += 1
89
79
90
- seen_ids [variant_id ] = True
80
+ seen_ids [variant . variant_id ] = True
91
81
92
82
yield variant
93
83
n_variants += 1
@@ -110,9 +100,12 @@ def drop_hla(variants):
110
100
[]
111
101
"""
112
102
n_dropped = 0
103
+ p = Allele (allele = "P" )
104
+ n = Allele (allele = "N" )
105
+
113
106
for variant in variants :
114
107
match variant :
115
- case _ if variant .effect_allele in (EffectAllele ( "P" ), EffectAllele ( "N" ) ):
108
+ case _ if variant .effect_allele in (p , n ):
116
109
n_dropped += 1
117
110
continue
118
111
case _:
@@ -121,39 +114,15 @@ def drop_hla(variants):
121
114
logger .warning (f"{ n_dropped } HLA alleles detected and dropped" )
122
115
123
116
124
- def check_effect_weight (variants ):
125
- """Check that effect weights are valid floats. Effect weights are intentionally
126
- left as strings during processing.
127
-
128
- >>> variant = ScoreVariant(**{"effect_allele": "A", "effect_weight": 5, "accession": "test", "row_nr": 0})
129
- >>> list(check_effect_weight([variant])) # doctest: +ELLIPSIS
130
- [ScoreVariant(effect_allele='A',effect_weight=5,...)]
131
-
132
- >>> variant = ScoreVariant(**{"effect_allele": "A", "effect_weight": "potato", "accession": "test", "row_nr": 0})
133
- >>> list(check_effect_weight([variant])) # doctest: +ELLIPSIS
134
- Traceback (most recent call last):
135
- ...
136
- ValueError
137
- """
138
- for variant in variants :
139
- try :
140
- float (variant .effect_weight )
141
- except ValueError as e :
142
- logger .critical (f"{ variant } has bad effect weight" )
143
- raise ValueError from e
144
- else :
145
- yield variant
146
-
147
-
148
117
def assign_other_allele (variants ):
149
118
"""Check if there's more than one possible other allele, remove if true
150
119
151
- >>> variant = ScoreVariant(**{"effect_allele ": "A" , "effect_weight ": 5 , "accession ": "test ", "row_nr ": 0 , "other_allele": "A"})
120
+ >>> variant = ScoreVariant(**{"chr_position ": 1, "rsID": None , "chr_name ": "1" , "effect_allele ": "A ", "effect_weight ": 5 , "other_allele": "A"})
152
121
>>> list(assign_other_allele([variant])) # doctest: +ELLIPSIS
153
- [ScoreVariant(effect_allele='A',..., other_allele='A',...)]
154
- >>> variant = ScoreVariant(**{"effect_allele ": "A" , "effect_weight ": 5 , "accession ": "test ", "row_nr ": 0 , "other_allele": "A/C"})
122
+ [ScoreVariant(..., effect_allele='A', other_allele='A', ...)]
123
+ >>> variant = ScoreVariant(**{"chr_position ": 1, "rsID": None , "chr_name ": "1" , "effect_allele ": "A ", "effect_weight ": 5 , "other_allele": "A/C"})
155
124
>>> list(assign_other_allele([variant])) # doctest: +ELLIPSIS
156
- [ScoreVariant(effect_allele='A',..., other_allele=None,...)]
125
+ [ScoreVariant(..., effect_allele='A', other_allele=None,...)]
157
126
"""
158
127
n_dropped = 0
159
128
for variant in variants :
@@ -171,51 +140,16 @@ def assign_other_allele(variants):
171
140
logger .warning ("Other allele for these variants is set to missing" )
172
141
173
142
174
- def assign_effect_type (variants ):
175
- """Convert PGS Catalog effect type columns to EffectType enums
176
-
177
- The most common type of effect type is additive:
178
-
179
- >>> variant = ScoreVariant(**{"effect_allele": "A", "effect_weight": 5, "accession": "test", "row_nr": 0, "is_recessive": "False", "is_dominant": "False"})
180
- >>> list(assign_effect_type([variant])) # doctest: +ELLIPSIS
181
- [ScoreVariant(...,effect_type=EffectType.ADDITIVE,...)]
182
- >>> variant = ScoreVariant(**{"effect_allele": "A", "effect_weight": 5, "accession": "test", "row_nr": 0, "is_recessive": "True", "is_dominant": "False"})
183
- >>> list(assign_effect_type([variant])) # doctest: +ELLIPSIS
184
- [ScoreVariant(...,effect_type=EffectType.RECESSIVE,...)]
185
- >>> variant = ScoreVariant(**{"effect_allele": "A", "effect_weight": 5, "accession": "test", "row_nr": 0, "is_recessive": "False", "is_dominant": "True"})
186
- >>> list(assign_effect_type([variant])) # doctest: +ELLIPSIS
187
- [ScoreVariant(...,effect_type=EffectType.DOMINANT,...)]
188
-
189
- is_recessive and is_dominant fields are parsed from strings to bools during __init__.
190
- """
191
- for variant in variants :
192
- match (variant .is_recessive , variant .is_dominant ):
193
- case (None , None ) | (False , False ) | (None , False ) | (False , None ):
194
- # none is OK because is_recessive or is_dominant column may be missing
195
- # default value is already set to additive, so just yield the variant
196
- pass
197
- case (False , True ) | (None , True ):
198
- # none is OK because is_recessive column may be missing
199
- variant .effect_type = EffectType .DOMINANT
200
- case (True , False ) | (True , None ):
201
- # none is OK because is_dominant column may be missing
202
- variant .effect_type = EffectType .RECESSIVE
203
- case _:
204
- logger .critical (f"Bad effect type setting: { variant } " )
205
- raise Exception
206
- yield variant
207
-
208
-
209
143
def remap_harmonised (variants , harmonised , target_build ):
210
144
"""
211
145
Overwrite key attributes with harmonised data, if available.
212
146
213
147
In this case chr_name, chr_position, and other allele are missing.
214
148
Perhaps authors submitted rsID and effect allele originally:
215
149
216
- >>> variant = ScoreVariant(**{"effect_allele ": "A" , "effect_weight ": 5 , "accession ": "test ", "row_nr ": 0 , "hm_chr": 1, "hm_pos": 100, "hm_inferOtherAllele": "A"})
150
+ >>> variant = ScoreVariant(**{"chr_position ": 1, "rsID": None , "chr_name ": "2" , "effect_allele ": "A ", "effect_weight ": 5, "accession": "test" , "hm_chr": 1, "hm_pos": 100, "hm_inferOtherAllele": "A"})
217
151
>>> list(remap_harmonised([variant], harmonised=True, target_build=GenomeBuild.GRCh38)) # doctest: +ELLIPSIS
218
- [ScoreVariant(..., chr_name=1,chr_position=100,...other_allele='A'...)]
152
+ [ScoreVariant(chr_name=1,chr_position=100,...other_allele='A'...)]
219
153
"""
220
154
if harmonised :
221
155
for variant in variants :
0 commit comments