@@ -49,9 +49,13 @@ def filter_structural_variants(
49
49
Filter structural variants to remove non-high quality events unless they are matched/annotated or
50
50
they involve a gene that is a known fusion partner
51
51
"""
52
- matched_svs = {match ["variant" ] for match in kb_matches if match ["variantType" ] == "sv" }
52
+ matched_svs = {
53
+ match ["variant" ] for match in kb_matches if match ["variantType" ] == "sv"
54
+ }
53
55
fusion_genes = {
54
- gene ["name" ] for gene in gene_annotations if gene .get ("knownFusionPartner" , False )
56
+ gene ["name" ]
57
+ for gene in gene_annotations
58
+ if gene .get ("knownFusionPartner" , False )
55
59
}
56
60
57
61
result = []
@@ -89,7 +93,9 @@ def get_evidencelevel_mapping(graphkb_conn: GraphKBConnection) -> Dict[str, str]
89
93
90
94
# Filter IPR EvidenceLevel and map each outgoing CrossReferenceOf to displayName
91
95
ipr_source_rid = graphkb_conn .get_source ("ipr" )["@rid" ]
92
- ipr_evidence_levels = filter (lambda d : d .get ("source" ) == ipr_source_rid , evidence_levels )
96
+ ipr_evidence_levels = filter (
97
+ lambda d : d .get ("source" ) == ipr_source_rid , evidence_levels
98
+ )
93
99
cross_references_mapping : Dict [str , str ] = dict ()
94
100
ipr_rids_to_displayname : Dict [str , str ] = dict ()
95
101
for level in ipr_evidence_levels :
@@ -138,7 +144,9 @@ def convert_statements_to_alterations(
138
144
"""
139
145
disease_matches = {
140
146
r ["@rid" ]
141
- for r in gkb_vocab .get_term_tree (graphkb_conn , disease_name , ontology_class = "Disease" )
147
+ for r in gkb_vocab .get_term_tree (
148
+ graphkb_conn , disease_name , ontology_class = "Disease"
149
+ )
142
150
}
143
151
144
152
if not disease_matches :
@@ -151,7 +159,9 @@ def convert_statements_to_alterations(
151
159
152
160
# get the recruitment status for any trial associated with a statement
153
161
clinical_trials = [
154
- s ["subject" ]["@rid" ] for s in statements if s ["subject" ]["@class" ] == "ClinicalTrial"
162
+ s ["subject" ]["@rid" ]
163
+ for s in statements
164
+ if s ["subject" ]["@class" ] == "ClinicalTrial"
155
165
]
156
166
recruitment_statuses = {}
157
167
if clinical_trials :
@@ -168,7 +178,9 @@ def convert_statements_to_alterations(
168
178
169
179
for statement in statements :
170
180
variants = [
171
- cast (Variant , c ) for c in statement ["conditions" ] if c ["@class" ] in VARIANT_CLASSES
181
+ cast (Variant , c )
182
+ for c in statement ["conditions" ]
183
+ if c ["@class" ] in VARIANT_CLASSES
172
184
]
173
185
diseases = [c for c in statement ["conditions" ] if c ["@class" ] == "Disease" ]
174
186
disease_match = len (diseases ) == 1 and diseases [0 ]["@rid" ] in disease_matches
@@ -189,8 +201,12 @@ def convert_statements_to_alterations(
189
201
190
202
evidence_level_str = display_evidence_levels (statement )
191
203
evidence_levels = statement .get ("evidenceLevel" ) or []
192
- ipr_evidence_levels = [ev_map [el .get ("@rid" , "" )] for el in evidence_levels if el ]
193
- ipr_evidence_levels_str = ";" .join (sorted (set ([el for el in ipr_evidence_levels ])))
204
+ ipr_evidence_levels = [
205
+ ev_map [el .get ("@rid" , "" )] for el in evidence_levels if el
206
+ ]
207
+ ipr_evidence_levels_str = ";" .join (
208
+ sorted (set ([el for el in ipr_evidence_levels ]))
209
+ )
194
210
195
211
for variant in variants :
196
212
if variant ["@rid" ] not in variant_matches :
@@ -200,10 +216,16 @@ def convert_statements_to_alterations(
200
216
"approvedTherapy" : approved_therapy or False ,
201
217
"category" : ipr_section or "unknown" ,
202
218
"context" : (
203
- statement ["subject" ]["displayName" ] if statement ["subject" ] else ""
219
+ statement ["subject" ]["displayName" ]
220
+ if statement ["subject" ]
221
+ else ""
222
+ ),
223
+ "kbContextId" : (
224
+ statement ["subject" ]["@rid" ] if statement ["subject" ] else ""
225
+ ),
226
+ "disease" : ";" .join (
227
+ sorted (d .get ("displayName" , "" ) for d in diseases )
204
228
),
205
- "kbContextId" : (statement ["subject" ]["@rid" ] if statement ["subject" ] else "" ),
206
- "disease" : ";" .join (sorted (d .get ("displayName" , "" ) for d in diseases )),
207
229
"evidenceLevel" : evidence_level_str or "" ,
208
230
"iprEvidenceLevel" : ipr_evidence_levels_str or "" ,
209
231
"kbStatementId" : statement ["@rid" ],
@@ -327,7 +349,9 @@ def select_expression_plots(
327
349
gene = str (variant .get ("gene" , "" ))
328
350
hist = str (variant .get ("histogramImage" , "" ))
329
351
if hist :
330
- images_by_gene [gene ] = ImageDefinition ({"key" : f"expDensity.{ gene } " , "path" : hist })
352
+ images_by_gene [gene ] = ImageDefinition (
353
+ {"key" : f"expDensity.{ gene } " , "path" : hist }
354
+ )
331
355
return [images_by_gene [gene ] for gene in selected_genes if gene in images_by_gene ]
332
356
333
357
@@ -370,7 +394,9 @@ def create_key_alterations(
370
394
counts [type_mapping [variant_type ]].add (variant_key )
371
395
372
396
if variant_type == "exp" :
373
- alterations .append (f'{ variant .get ("gene" ,"" )} ({ variant .get ("expressionState" )} )' )
397
+ alterations .append (
398
+ f'{ variant .get ("gene" ,"" )} ({ variant .get ("expressionState" )} )'
399
+ )
374
400
elif variant_type == "cnv" :
375
401
alterations .append (f'{ variant .get ("gene" ,"" )} ({ variant .get ("cnvState" )} )' )
376
402
# only show germline if relevant
@@ -447,15 +473,19 @@ def germline_kb_matches(
447
473
# Remove any matches to germline events
448
474
for alt in somatic_alts :
449
475
var_list = [v for v in all_variants if v ["key" ] == alt ["variant" ]]
450
- somatic_var_list = [v for v in var_list if not v .get ("germline" , not assume_somatic )]
476
+ somatic_var_list = [
477
+ v for v in var_list if not v .get ("germline" , not assume_somatic )
478
+ ]
451
479
if var_list and not somatic_var_list :
452
480
logger .debug (
453
481
f"Dropping germline match to somatic statement kbStatementId:{ alt ['kbStatementId' ]} : { alt ['kbVariant' ]} { alt ['category' ]} "
454
482
)
455
483
elif somatic_var_list :
456
484
ret_list .append (alt ) # match to somatic variant
457
485
else :
458
- ret_list .append (alt ) # alteration not in any specific keys matches to check.
486
+ ret_list .append (
487
+ alt
488
+ ) # alteration not in any specific keys matches to check.
459
489
460
490
return ret_list
461
491
@@ -530,11 +560,15 @@ def multi_variant_filtering(
530
560
531
561
# Filtering out incompleted matches of gkb_matches
532
562
return [
533
- match for match in gkb_matches if match ["kbStatementId" ] in complete_matching_statements
563
+ match
564
+ for match in gkb_matches
565
+ if match ["kbStatementId" ] in complete_matching_statements
534
566
]
535
567
536
568
537
- def get_kb_variants (gkb_matches : List [KbMatch ] | List [Hashabledict ]) -> List [KbVariantMatch ]:
569
+ def get_kb_variants (
570
+ gkb_matches : List [KbMatch ] | List [Hashabledict ],
571
+ ) -> List [KbVariantMatch ]:
538
572
"""Extracts the set of distinct kb variant records from the input
539
573
list of gkb_matches records, which combine statement and variant matches.
540
574
@@ -572,17 +606,12 @@ def get_kb_matched_statements(
572
606
kbMatchedStatements = {}
573
607
kbs_keys = KbMatchedStatement .__annotations__ .keys ()
574
608
for item in gkb_matches :
575
- x = copy (item )
576
- x ['requiredKbMatches' ].sort ()
577
- kbs = KbMatchedStatement ({key : val for (key , val ) in x .items () if key in kbs_keys })
578
- # import pdb; pdb.set_trace()
579
- # kbs = {key: val for (key, val) in kbs.items() if key in kbs_keys}
580
- # import pdb; pdb.set_trace()
581
- # sort list to ensure there are no extra string representations of kbs
582
- # TODO add test for this
583
- # kbs['requiredKbMatches'].sort()
609
+ stmt = copy (item )
610
+ stmt ["requiredKbMatches" ].sort ()
611
+ kbs = KbMatchedStatement (
612
+ {key : val for (key , val ) in stmt .items () if key in kbs_keys }
613
+ )
584
614
dict_key = str (kbs )
585
- # kbs = cast(KbMatchedStatement, kbs)
586
615
kbMatchedStatements [dict_key ] = kbs
587
616
return [* kbMatchedStatements .values ()]
588
617
@@ -627,7 +656,11 @@ def get_kb_statement_matched_conditions(
627
656
kbMatchedStatementConditions = {}
628
657
629
658
for kbStmt in kbMatchedStatements :
630
- stmts = [item for item in gkb_matches if item ["kbStatementId" ] == kbStmt ["kbStatementId" ]]
659
+ stmts = [
660
+ item
661
+ for item in gkb_matches
662
+ if item ["kbStatementId" ] == kbStmt ["kbStatementId" ]
663
+ ]
631
664
requirements = {}
632
665
for requirement in stmts [0 ]["requiredKbMatches" ]:
633
666
if not requirements .get (requirement , False ):
@@ -645,7 +678,7 @@ def get_kb_statement_matched_conditions(
645
678
for item in gkb_matches
646
679
if (
647
680
item ["kbVariantId" ] == requirement
648
- and item [' kbStatementId' ] == kbStmt [' kbStatementId' ]
681
+ and item [" kbStatementId" ] == kbStmt [" kbStatementId" ]
649
682
)
650
683
]
651
684
requirements [requirement ] = reqlist
@@ -661,7 +694,9 @@ def get_kb_statement_matched_conditions(
661
694
]
662
695
for conditionSet in conditionSets :
663
696
# remove Nones
664
- observedVariantKeys = [item for item in conditionSet ["observedVariantKeys" ] if item ]
697
+ observedVariantKeys = [
698
+ item for item in conditionSet ["observedVariantKeys" ] if item
699
+ ]
665
700
observedVariantKeys .sort ()
666
701
kbmc = KbMatchedStatementConditionSet (
667
702
{
0 commit comments