35
35
create_key_alterations ,
36
36
filter_structural_variants ,
37
37
germline_kb_matches ,
38
+ multi_variant_filtering ,
38
39
select_expression_plots ,
39
40
)
40
41
from .summary import auto_analyst_comments
@@ -246,6 +247,7 @@ def ipr_report(
246
247
custom_kb_match_filter = None ,
247
248
async_upload : bool = False ,
248
249
mins_to_wait : int = 5 ,
250
+ multi_variant_filter : bool = True ,
249
251
) -> Dict :
250
252
"""Run the matching and create the report JSON for upload to IPR.
251
253
@@ -269,6 +271,7 @@ def ipr_report(
269
271
custom_kb_match_filter: function(List[kbMatch]) -> List[kbMatch]
270
272
async_upload: use report_async endpoint to upload reports
271
273
mins_to_wait: if using report_async, number of minutes to wait for success before exception raised
274
+ multi_variant_filter: filters out matches that doesn't match to all required variants on multi-variant statements
272
275
273
276
Returns:
274
277
ipr_conn.upload_report return dictionary
@@ -300,10 +303,11 @@ def ipr_report(
300
303
small_mutations , expression_variants , copy_variants , structural_variants
301
304
)
302
305
303
- # Setup connections
306
+ # IPR CONNECTION
304
307
ipr_conn = IprConnection (username , password , ipr_url )
305
308
ipr_spec = ipr_conn .get_spec ()
306
309
310
+ # GKB CONNECTION
307
311
if graphkb_url :
308
312
logger .info (f"connecting to graphkb: { graphkb_url } " )
309
313
graphkb_conn = GraphKBConnection (graphkb_url )
@@ -315,9 +319,10 @@ def ipr_report(
315
319
316
320
graphkb_conn .login (gkb_user , gkb_pass )
317
321
322
+ # GKB MATCHING
318
323
gkb_matches : List [Hashabledict ] = []
319
324
320
- # Signature category variants
325
+ # MATCHING TMB
321
326
tmb_variant : IprVariant = {} # type: ignore
322
327
tmb_matches = []
323
328
if "tmburMutationBurden" in content .keys ():
@@ -351,6 +356,7 @@ def ipr_report(
351
356
gkb_matches .extend ([Hashabledict (tmb_statement ) for tmb_statement in tmb_matches ])
352
357
logger .debug (f"\t gkb_matches: { len (gkb_matches )} " )
353
358
359
+ # MATCHING MSI
354
360
msi = content .get ("msi" , [])
355
361
msi_matches = []
356
362
msi_variant : IprVariant = {} # type: ignore
@@ -374,6 +380,7 @@ def ipr_report(
374
380
gkb_matches .extend ([Hashabledict (msi ) for msi in msi_matches ])
375
381
logger .debug (f"\t gkb_matches: { len (gkb_matches )} " )
376
382
383
+ # MATCHING SMALL MUTATIONS
377
384
logger .info (f"annotating { len (small_mutations )} small mutations" )
378
385
gkb_matches .extend (
379
386
annotate_positional_variants (
@@ -382,6 +389,7 @@ def ipr_report(
382
389
)
383
390
logger .debug (f"\t gkb_matches: { len (gkb_matches )} " )
384
391
392
+ # MATCHING STRUCTURAL VARIANTS
385
393
logger .info (f"annotating { len (structural_variants )} structural variants" )
386
394
gkb_matches .extend (
387
395
annotate_positional_variants (
@@ -390,6 +398,7 @@ def ipr_report(
390
398
)
391
399
logger .debug (f"\t gkb_matches: { len (gkb_matches )} " )
392
400
401
+ # MATCHING COPY VARIANTS
393
402
logger .info (f"annotating { len (copy_variants )} copy variants" )
394
403
gkb_matches .extend (
395
404
[
@@ -401,6 +410,7 @@ def ipr_report(
401
410
)
402
411
logger .debug (f"\t gkb_matches: { len (gkb_matches )} " )
403
412
413
+ # MATCHING EXPRESSION VARIANTS
404
414
logger .info (f"annotating { len (expression_variants )} expression variants" )
405
415
gkb_matches .extend (
406
416
[
@@ -412,13 +422,15 @@ def ipr_report(
412
422
)
413
423
logger .debug (f"\t gkb_matches: { len (gkb_matches )} " )
414
424
425
+ # ALL VARIANTS
415
426
all_variants : Sequence [IprVariant ]
416
427
all_variants = expression_variants + copy_variants + structural_variants + small_mutations # type: ignore
417
428
if msi_matches :
418
429
all_variants .append (msi_variant ) # type: ignore
419
430
if tmb_matches :
420
431
all_variants .append (tmb_variant ) # type: ignore
421
432
433
+ # GKB_MATCHES FILTERING
422
434
if match_germline :
423
435
# verify germline kb statements matched germline observed variants, not somatic variants
424
436
org_len = len (gkb_matches )
@@ -434,17 +446,28 @@ def ipr_report(
434
446
gkb_matches = [Hashabledict (match ) for match in custom_kb_match_filter (gkb_matches )]
435
447
logger .info (f"\t custom_kb_match_filter left { len (gkb_matches )} variants" )
436
448
449
+ if multi_variant_filter :
450
+ logger .info (
451
+ f"Filtering out incomplete matches on multi-variant statements for { len (gkb_matches )} matches"
452
+ )
453
+ gkb_matches = multi_variant_filtering (graphkb_conn , gkb_matches )
454
+ logger .info (f"multi_variant_filtering left { len (gkb_matches )} matches" )
455
+
456
+ # KEY ALTERATIONS
437
457
key_alterations , variant_counts = create_key_alterations (gkb_matches , all_variants )
438
458
459
+ # GENE INFORMATION
439
460
logger .info ("fetching gene annotations" )
440
461
gene_information = get_gene_information (graphkb_conn , sorted (genes_with_variants ))
441
462
463
+ # THERAPEUTIC OPTIONS
442
464
if generate_therapeutics :
443
465
logger .info ("generating therapeutic options" )
444
466
targets = create_therapeutic_options (graphkb_conn , gkb_matches , all_variants )
445
467
else :
446
468
targets = []
447
469
470
+ # ANALYST COMMENTS
448
471
logger .info ("generating analyst comments" )
449
472
if generate_comments :
450
473
comments = {
@@ -455,6 +478,7 @@ def ipr_report(
455
478
else :
456
479
comments = {"comments" : "" }
457
480
481
+ # OUTPUT CONTENT
458
482
# thread safe deep-copy the original content
459
483
output = json .loads (json .dumps (content ))
460
484
output .update (
@@ -491,6 +515,7 @@ def ipr_report(
491
515
ipr_result = None
492
516
upload_error = None
493
517
518
+ # UPLOAD TO IPR
494
519
if ipr_upload :
495
520
try :
496
521
logger .info (f"Uploading to IPR { ipr_conn .url } " )
@@ -500,11 +525,14 @@ def ipr_report(
500
525
except Exception as err :
501
526
upload_error = err
502
527
logger .error (f"ipr_conn.upload_report failed: { err } " , exc_info = True )
528
+
529
+ # SAVE TO JSON FILE
503
530
if output_json_path :
504
531
if always_write_output_json or not ipr_result :
505
532
logger .info (f"Writing IPR upload json to: { output_json_path } " )
506
533
with open (output_json_path , "w" ) as fh :
507
534
fh .write (json .dumps (output ))
535
+
508
536
logger .info (f"made { graphkb_conn .request_count } requests to graphkb" )
509
537
logger .info (f"average load { int (graphkb_conn .load or 0 )} req/s" )
510
538
if upload_error :
0 commit comments