@@ -71,45 +71,36 @@ def run():
71
71
n_finished = 0
72
72
for scorefile in tqdm (scoring_files , total = len (scoring_files )):
73
73
logger .info (f"Processing { scorefile .pgs_id } " )
74
- normalised_score : Optional [list [ScoreVariant ]] = None
74
+ dumped_variants : Optional [list [dict ]] = None
75
75
is_compatible = True
76
76
try :
77
- normalised_score = list (
78
- scorefile .normalise (
79
- drop_missing = args .drop_missing ,
80
- ** liftover_kwargs ,
81
- target_build = target_build ,
82
- )
77
+ normalised_score = scorefile .normalise (
78
+ drop_missing = args .drop_missing ,
79
+ ** liftover_kwargs ,
80
+ target_build = target_build ,
81
+ )
82
+ fields : set [str ] = set (ScoreVariant .output_fields ).union (
83
+ {"accession" , "row_nr" , "hm_source" }
84
+ )
85
+ # it's important to create the list here to raise EffectTypeErrors
86
+ # list is more efficient than using itertools.tee
87
+ # and we're materialising a small subset of fields only
88
+ dumped_variants = list (
89
+ x .model_dump (include = fields ) for x in normalised_score
83
90
)
84
91
except EffectTypeError :
85
92
logger .warning (
86
93
f"Unsupported non-additive effect types in { scorefile = } , skipping"
87
94
)
88
95
is_compatible = False
89
96
else :
90
- # TODO: go back to parallel execution + write to multiple files
91
97
writer = TextFileWriter (compress = compress_output , filename = out_path )
92
-
93
- # model_dump returns a dict with a subset of keys
94
- dumped_variants = (
95
- x .model_dump (include = set (ScoreVariant .output_fields ))
96
- for x in normalised_score
97
- )
98
98
writer .write (dumped_variants )
99
99
n_finished += 1
100
100
finally :
101
- # grab essential information only for the score log
102
- if normalised_score is not None :
103
- log_variants = (
104
- x .model_dump (include = {"accession" , "row_nr" , "hm_source" })
105
- for x in normalised_score
106
- )
107
- else :
108
- log_variants = None
109
-
110
101
log = ScoreLog (
111
102
header = scorefile .header ,
112
- variant_sources = log_variants ,
103
+ variant_sources = dumped_variants ,
113
104
compatible_effect_type = is_compatible ,
114
105
)
115
106
if log .variants_are_missing :
0 commit comments