@@ -44,9 +44,13 @@ def filter_structural_variants(
44
44
Filter structural variants to remove non-high quality events unless they are matched/annotated or
45
45
they involve a gene that is a known fusion partner
46
46
"""
47
- matched_svs = {match ["variant" ] for match in kb_matches if match ["variantType" ] == "sv" }
47
+ matched_svs = {
48
+ match ["variant" ] for match in kb_matches if match ["variantType" ] == "sv"
49
+ }
48
50
fusion_genes = {
49
- gene ["name" ] for gene in gene_annotations if gene .get ("knownFusionPartner" , False )
51
+ gene ["name" ]
52
+ for gene in gene_annotations
53
+ if gene .get ("knownFusionPartner" , False )
50
54
}
51
55
52
56
result = []
@@ -84,7 +88,9 @@ def get_evidencelevel_mapping(graphkb_conn: GraphKBConnection) -> Dict[str, str]
84
88
85
89
# Filter IPR EvidenceLevel and map each outgoing CrossReferenceOf to displayName
86
90
ipr_source_rid = graphkb_conn .get_source ("ipr" )["@rid" ]
87
- ipr_evidence_levels = filter (lambda d : d .get ("source" ) == ipr_source_rid , evidence_levels )
91
+ ipr_evidence_levels = filter (
92
+ lambda d : d .get ("source" ) == ipr_source_rid , evidence_levels
93
+ )
88
94
cross_references_mapping : Dict [str , str ] = dict ()
89
95
ipr_rids_to_displayname : Dict [str , str ] = dict ()
90
96
for level in ipr_evidence_levels :
@@ -132,7 +138,9 @@ def convert_statements_to_alterations(
132
138
"""
133
139
disease_matches = {
134
140
r ["@rid" ]
135
- for r in gkb_vocab .get_term_tree (graphkb_conn , disease_name , ontology_class = "Disease" )
141
+ for r in gkb_vocab .get_term_tree (
142
+ graphkb_conn , disease_name , ontology_class = "Disease"
143
+ )
136
144
}
137
145
138
146
if not disease_matches :
@@ -145,7 +153,9 @@ def convert_statements_to_alterations(
145
153
146
154
# get the recruitment status for any trial associated with a statement
147
155
clinical_trials = [
148
- s ["subject" ]["@rid" ] for s in statements if s ["subject" ]["@class" ] == "ClinicalTrial"
156
+ s ["subject" ]["@rid" ]
157
+ for s in statements
158
+ if s ["subject" ]["@class" ] == "ClinicalTrial"
149
159
]
150
160
recruitment_statuses = {}
151
161
if clinical_trials :
@@ -162,7 +172,9 @@ def convert_statements_to_alterations(
162
172
163
173
for statement in statements :
164
174
variants = [
165
- cast (Variant , c ) for c in statement ["conditions" ] if c ["@class" ] in VARIANT_CLASSES
175
+ cast (Variant , c )
176
+ for c in statement ["conditions" ]
177
+ if c ["@class" ] in VARIANT_CLASSES
166
178
]
167
179
diseases = [c for c in statement ["conditions" ] if c ["@class" ] == "Disease" ]
168
180
disease_match = len (diseases ) == 1 and diseases [0 ]["@rid" ] in disease_matches
@@ -183,8 +195,12 @@ def convert_statements_to_alterations(
183
195
184
196
evidence_level_str = display_evidence_levels (statement )
185
197
evidence_levels = statement .get ("evidenceLevel" ) or []
186
- ipr_evidence_levels = [ev_map [el .get ("@rid" , "" )] for el in evidence_levels if el ]
187
- ipr_evidence_levels_str = ";" .join (sorted (set ([el for el in ipr_evidence_levels ])))
198
+ ipr_evidence_levels = [
199
+ ev_map [el .get ("@rid" , "" )] for el in evidence_levels if el
200
+ ]
201
+ ipr_evidence_levels_str = ";" .join (
202
+ sorted (set ([el for el in ipr_evidence_levels ]))
203
+ )
188
204
189
205
for variant in variants :
190
206
if variant ["@rid" ] not in variant_matches :
@@ -194,10 +210,16 @@ def convert_statements_to_alterations(
194
210
"approvedTherapy" : approved_therapy or False ,
195
211
"category" : ipr_section or "unknown" ,
196
212
"context" : (
197
- statement ["subject" ]["displayName" ] if statement ["subject" ] else ""
213
+ statement ["subject" ]["displayName" ]
214
+ if statement ["subject" ]
215
+ else ""
216
+ ),
217
+ "kbContextId" : (
218
+ statement ["subject" ]["@rid" ] if statement ["subject" ] else ""
219
+ ),
220
+ "disease" : ";" .join (
221
+ sorted (d .get ("displayName" , "" ) for d in diseases )
198
222
),
199
- "kbContextId" : (statement ["subject" ]["@rid" ] if statement ["subject" ] else "" ),
200
- "disease" : ";" .join (sorted (d .get ("displayName" , "" ) for d in diseases )),
201
223
"evidenceLevel" : evidence_level_str or "" ,
202
224
"iprEvidenceLevel" : ipr_evidence_levels_str or "" ,
203
225
"kbStatementId" : statement ["@rid" ],
@@ -259,7 +281,9 @@ def select_expression_plots(
259
281
gene = str (variant .get ("gene" , "" ))
260
282
hist = str (variant .get ("histogramImage" , "" ))
261
283
if hist :
262
- images_by_gene [gene ] = ImageDefinition ({"key" : f"expDensity.{ gene } " , "path" : hist })
284
+ images_by_gene [gene ] = ImageDefinition (
285
+ {"key" : f"expDensity.{ gene } " , "path" : hist }
286
+ )
263
287
return [images_by_gene [gene ] for gene in selected_genes if gene in images_by_gene ]
264
288
265
289
@@ -302,7 +326,9 @@ def create_key_alterations(
302
326
counts [type_mapping [variant_type ]].add (variant_key )
303
327
304
328
if variant_type == "exp" :
305
- alterations .append (f'{ variant .get ("gene" ,"" )} ({ variant .get ("expressionState" )} )' )
329
+ alterations .append (
330
+ f'{ variant .get ("gene" ,"" )} ({ variant .get ("expressionState" )} )'
331
+ )
306
332
elif variant_type == "cnv" :
307
333
alterations .append (f'{ variant .get ("gene" ,"" )} ({ variant .get ("cnvState" )} )' )
308
334
# only show germline if relevant
@@ -326,7 +352,9 @@ def create_key_alterations(
326
352
327
353
328
354
def germline_kb_matches (
329
- kb_matches : List [Hashabledict ], all_variants : Sequence [IprVariant ], assume_somatic : bool = True
355
+ kb_matches : List [Hashabledict ],
356
+ all_variants : Sequence [IprVariant ],
357
+ assume_somatic : bool = True ,
330
358
) -> List [Hashabledict ]:
331
359
"""Filter kb_matches for matching to germline or somatic events using the 'germline' optional property.
332
360
@@ -377,23 +405,27 @@ def germline_kb_matches(
377
405
# Remove any matches to germline events
378
406
for alt in somatic_alts :
379
407
var_list = [v for v in all_variants if v ["key" ] == alt ["variant" ]]
380
- somatic_var_list = [v for v in var_list if not v .get ("germline" , not assume_somatic )]
408
+ somatic_var_list = [
409
+ v for v in var_list if not v .get ("germline" , not assume_somatic )
410
+ ]
381
411
if var_list and not somatic_var_list :
382
412
logger .debug (
383
413
f"Dropping germline match to somatic statement kbStatementId:{ alt ['kbStatementId' ]} : { alt ['kbVariant' ]} { alt ['category' ]} "
384
414
)
385
415
elif somatic_var_list :
386
416
ret_list .append (alt ) # match to somatic variant
387
417
else :
388
- ret_list .append (alt ) # alteration not in any specific keys matches to check.
418
+ ret_list .append (
419
+ alt
420
+ ) # alteration not in any specific keys matches to check.
389
421
390
422
return ret_list
391
423
392
424
393
425
def multi_variant_filtering (
394
426
graphkb_conn : GraphKBConnection ,
395
427
gkb_matches : List [KbMatch ],
396
- excludedTypes : List [str ] = [' wildtype' ],
428
+ excludedTypes : List [str ] = [" wildtype" ],
397
429
) -> List [KbMatch ]:
398
430
"""Filters out GraphKB matches that doesn't match to all required variants on multi-variant statements
399
431
@@ -413,8 +445,8 @@ def multi_variant_filtering(
413
445
filtered list of KbMatch statements
414
446
"""
415
447
# All matching statements & variants (GKB RIDs)
416
- matching_statement_rids = {match [' kbStatementId' ] for match in gkb_matches }
417
- matching_variant_rids = {match [' kbVariantId' ] for match in gkb_matches }
448
+ matching_statement_rids = {match [" kbStatementId" ] for match in gkb_matches }
449
+ matching_variant_rids = {match [" kbVariantId" ] for match in gkb_matches }
418
450
419
451
# Get conditions detail on all matching statements
420
452
res = graphkb_conn .post (
@@ -423,7 +455,7 @@ def multi_variant_filtering(
423
455
"target" : "Statement" ,
424
456
"filters" : {
425
457
"@rid" : list (matching_statement_rids ),
426
- "operator" : 'IN' ,
458
+ "operator" : "IN" ,
427
459
},
428
460
"history" : True ,
429
461
"returnProperties" : [
@@ -434,21 +466,21 @@ def multi_variant_filtering(
434
466
],
435
467
},
436
468
)
437
- statements = res [' result' ]
469
+ statements = res [" result" ]
438
470
439
471
# Get set of excluded Vocabulary RIDs for variant types
440
472
excluded = {}
441
- if len (excludedTypes ) != 0 and excludedTypes [0 ] != '' :
473
+ if len (excludedTypes ) != 0 and excludedTypes [0 ] != "" :
442
474
excluded = gkb_vocab .get_terms_set (graphkb_conn , excludedTypes )
443
475
444
476
# Mapping statements to their conditional variants
445
477
# (discarding non-variant conditions & variant conditions from excluded types)
446
478
statement_to_variants = {}
447
479
for statement in statements :
448
- statement_to_variants [statement [' @rid' ]] = {
449
- el [' @rid' ]
450
- for el in statement [' conditions' ]
451
- if (el [' @class' ] in VARIANT_CLASSES and el .get (' type' , '' ) not in excluded )
480
+ statement_to_variants [statement [" @rid" ]] = {
481
+ el [" @rid" ]
482
+ for el in statement [" conditions" ]
483
+ if (el [" @class" ] in VARIANT_CLASSES and el .get (" type" , "" ) not in excluded )
452
484
}
453
485
454
486
# Set of statements with complete matching
@@ -460,56 +492,54 @@ def multi_variant_filtering(
460
492
461
493
# Filtering out incompleted matches of gkb_matches
462
494
return [
463
- match for match in gkb_matches if match ['kbStatementId' ] in complete_matching_statements
495
+ match
496
+ for match in gkb_matches
497
+ if match ["kbStatementId" ] in complete_matching_statements
464
498
]
465
499
466
- THERAPEUTIC = 'therapeutic'
467
- BEST_THERAPEUTIC = 'best_therapeutic'
468
- PCP = 'pcp'
469
- DIAGNOSTIC = 'diagnostic'
470
- PROGNOSTIC = 'prognostic'
471
- BIOLOGICAL = 'biological'
472
- OTHER = 'other'
473
500
474
- def assign_kb_match_tables (gkb_matches ):
501
+ THERAPEUTIC = "therapeutic"
502
+ BEST_THERAPEUTIC = "best_therapeutic"
503
+ PCP = "pcp"
504
+ DIAGNOSTIC = "diagnostic"
505
+ PROGNOSTIC = "prognostic"
506
+ BIOLOGICAL = "biological"
507
+ OTHER = "other"
508
+
509
+
510
+ def assign_kb_match_tables (gkb_matches : List [KbMatch ]) -> List [KbMatch ]:
511
+ """
512
+ Adds property kbsectionTag to kbData for kb statements. This property is used to
513
+ determine which table in the kbmatches section the statement will be displayed in.
514
+
515
+ NB the value of approvedTherapy is ignored if the category is not therapeutic.
516
+
517
+ Params:
518
+ gkb_matches: KbMatch statements to be filtered
519
+ Returns:
520
+ list of KbMatch statements
521
+ """
475
522
for item in gkb_matches :
476
- """
477
- this does not handle 'approvedTherapy' = True but category != 'therapeutic',
478
- but this situation is already not handled in ipr. here, the value of approvedTherapy
479
- is ignored if the category is not therapeutic.
480
-
481
- there are no cases in the db where approvedTherapy=True and category != therapeutic.
482
-
483
- TODO: handle this bit from the client
484
- targetedGermlineGenes: coalesceEntries([
485
- ...pharmacogenomicResp,
486
- ...cancerPredisResp.filter(({ kbMatches }) => (kbMatches as any)?.variant?.germline),
487
- ]), -> normal - this is the pcp table where it's germline. but what happens
488
- to the nongermline variants?
489
- there are no pharmacogenomic, nongermline variants,
490
- but there ARE MANY cancer predisposition nongermline variants - it looks like they may
491
- just not be displayed. other it is
492
-
493
- # leave this one in - it's a different endpoint
494
- targetedSomaticGenes: targetedSomaticGenesResp.filter((tg) => !/germline/.test(tg?.sample)),
495
- """
496
- if item ['category' ] == 'therapeutic' :
497
- #approvedTherapy=true&category=therapeutic&matchedCancer=true&iprEvidenceLevel=IPR-A,IPR-B
498
- if item ['approvedTherapy' ] and item ['matchedCancer' ] and item ['iprEvidenceLevel' ] in ['IPR-A' , 'IPR-B' ]:
523
+ if item ["category" ] == "therapeutic" :
524
+ if (
525
+ item ["approvedTherapy" ]
526
+ and item ["matchedCancer" ]
527
+ and item ["iprEvidenceLevel" ] in ["IPR-A" , "IPR-B" ]
528
+ ):
499
529
kbmatch_tag = BEST_THERAPEUTIC
500
530
kbmatch_tag = THERAPEUTIC
501
- elif item [' category' ] in [' pharmacogenomic' , ' cancer-predisposition' ]:
502
- if item [' germline' ]:
531
+ elif item [" category" ] in [" pharmacogenomic" , " cancer-predisposition" ]:
532
+ if item [" germline" ]:
503
533
kbmatch_tag = PCP
504
534
else :
505
535
kbmatch_tag = OTHER
506
- elif item [' category' ] == ' diagnostic' :
536
+ elif item [" category" ] == " diagnostic" :
507
537
kbmatch_tag = DIAGNOSTIC
508
- elif item [' category' ] == ' prognostic' :
509
- kbmatch_tag = PROGNOSTIC
510
- elif item [' category' ] == ' biological' :
511
- kbmatch_tag = BIOLOGICAL
512
- else : # category == 'unknown' or 'novel'
538
+ elif item [" category" ] == " prognostic" :
539
+ kbmatch_tag = PROGNOSTIC
540
+ elif item [" category" ] == " biological" :
541
+ kbmatch_tag = BIOLOGICAL
542
+ else : # category == 'unknown' or 'novel'
513
543
kbmatch_tag = OTHER
514
- item [' kbData' ][ ' kbmatchTag' ] = kbmatch_tag
544
+ item [" kbData" ][ " kbmatchTag" ] = kbmatch_tag
515
545
return gkb_matches
0 commit comments