Skip to content

Commit 9446c30

Browse files
committed
fix writing out
1 parent 3bcd8ec commit 9446c30

File tree

3 files changed

+30
-31
lines changed

3 files changed

+30
-31
lines changed

pgscatalog.core/src/pgscatalog/core/cli/_combine.py

+13-6
Original file line numberDiff line numberDiff line change
@@ -48,10 +48,17 @@ def write(self, batch):
4848
mode = "at" if os.path.exists(self.filename) else "wt"
4949
with self.open_function(self.filename, mode) as f:
5050
writer = csv.DictWriter(
51-
f, delimiter="\t", lineterminator="\n", fieldnames=self.fieldnames
51+
f,
52+
delimiter="\t",
53+
lineterminator="\n",
54+
fieldnames=self.fieldnames,
55+
extrasaction="ignore",
5256
)
53-
if mode == "wt":
54-
writer.writeheader()
55-
writer.writerows(batch)
56-
57-
writer.writerows(batch)
57+
match mode:
58+
case "wt":
59+
writer.writeheader()
60+
writer.writerows(batch)
61+
case "at":
62+
writer.writerows(batch)
63+
case _:
64+
raise ValueError(f"Invalid {mode=}")

pgscatalog.core/src/pgscatalog/core/cli/combine_cli.py

+15-24
Original file line numberDiff line numberDiff line change
@@ -71,45 +71,36 @@ def run():
7171
n_finished = 0
7272
for scorefile in tqdm(scoring_files, total=len(scoring_files)):
7373
logger.info(f"Processing {scorefile.pgs_id}")
74-
normalised_score: Optional[list[ScoreVariant]] = None
74+
dumped_variants: Optional[list[dict]] = None
7575
is_compatible = True
7676
try:
77-
normalised_score = list(
78-
scorefile.normalise(
79-
drop_missing=args.drop_missing,
80-
**liftover_kwargs,
81-
target_build=target_build,
82-
)
77+
normalised_score = scorefile.normalise(
78+
drop_missing=args.drop_missing,
79+
**liftover_kwargs,
80+
target_build=target_build,
81+
)
82+
fields: set[str] = set(ScoreVariant.output_fields).union(
83+
{"accession", "row_nr", "hm_source"}
84+
)
85+
# it's important to create the list here to raise EffectTypeErrors
86+
# list is more efficient than using itertools.tee
87+
# and we're materialising a small subset of fields only
88+
dumped_variants = list(
89+
x.model_dump(include=fields) for x in normalised_score
8390
)
8491
except EffectTypeError:
8592
logger.warning(
8693
f"Unsupported non-additive effect types in {scorefile=}, skipping"
8794
)
8895
is_compatible = False
8996
else:
90-
# TODO: go back to parallel execution + write to multiple files
9197
writer = TextFileWriter(compress=compress_output, filename=out_path)
92-
93-
# model_dump returns a dict with a subset of keys
94-
dumped_variants = (
95-
x.model_dump(include=set(ScoreVariant.output_fields))
96-
for x in normalised_score
97-
)
9898
writer.write(dumped_variants)
9999
n_finished += 1
100100
finally:
101-
# grab essential information only for the score log
102-
if normalised_score is not None:
103-
log_variants = (
104-
x.model_dump(include={"accession", "row_nr", "hm_source"})
105-
for x in normalised_score
106-
)
107-
else:
108-
log_variants = None
109-
110101
log = ScoreLog(
111102
header=scorefile.header,
112-
variant_sources=log_variants,
103+
variant_sources=dumped_variants,
113104
compatible_effect_type=is_compatible,
114105
)
115106
if log.variants_are_missing:

pgscatalog.core/src/pgscatalog/core/lib/models.py

+2-1
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,8 @@
1212
import pathlib
1313
from datetime import date
1414
from functools import cached_property
15-
from typing import ClassVar, Optional, Union, Any, Self, Literal
15+
from typing import ClassVar, Optional, Union, Any, Literal
16+
from typing_extensions import Self
1617

1718
from pydantic import (
1819
BaseModel,

0 commit comments

Comments
 (0)