Skip to content

Commit 5e3bdf9

Browse files
committed
set up type checks for pgscatalog.core
1 parent 7d330d8 commit 5e3bdf9

File tree

7 files changed

+114
-22
lines changed

7 files changed

+114
-22
lines changed

pgscatalog.core/poetry.lock

+61-3
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

pgscatalog.core/pyproject.toml

+2-1
Original file line numberDiff line numberDiff line change
@@ -28,6 +28,7 @@ pydantic = "^2.9.0"
2828
pytest = "^7.4.4"
2929
sphinx-autoapi = "^3.0.0"
3030
pytest-cov = "^4.1.0"
31+
mypy = "^1.11.2"
3132

3233
[build-system]
3334
requires = ["poetry-core"]
@@ -39,4 +40,4 @@ addopts = "-ra -q --doctest-modules"
3940
filterwarnings = ["error"]
4041

4142
[tool.coverage.run]
42-
source = ['src/pgscatalog/core']
43+
source = ['src/pgscatalog/core']

pgscatalog.core/src/pgscatalog/core/__init__.py

+2-1
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,5 @@
1-
""" Public interface to the Polygenic Score Catalog core package """
1+
"""Public interface to the Polygenic Score Catalog core package"""
2+
23
import logging
34
import importlib.metadata
45

pgscatalog.core/src/pgscatalog/core/lib/_read.py

+9-3
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@
33

44
import logging
55
import pathlib
6+
from typing import Generator, Iterator
67

78
from xopen import xopen
89

@@ -12,8 +13,13 @@
1213

1314

1415
def read_rows_lazy(
15-
*, csv_reader, fields: list[str], name: str, wide: bool, row_nr: int
16-
):
16+
*,
17+
csv_reader: Iterator[list[str]],
18+
fields: list[str],
19+
name: str,
20+
wide: bool,
21+
row_nr: int,
22+
) -> Generator[ScoreVariant, None, None]:
1723
"""Read rows from an open scoring file and instantiate them as ScoreVariants"""
1824
for row in csv_reader:
1925
variant = dict(zip(fields, row))
@@ -81,7 +87,7 @@ def detect_wide(cols: list[str]) -> bool:
8187
return False
8288

8389

84-
def read_header(path: pathlib.Path):
90+
def read_header(path: pathlib.Path) -> dict:
8591
"""Parses the header of a PGS Catalog format scoring file into a dictionary"""
8692
header = {}
8793

pgscatalog.core/src/pgscatalog/core/lib/_sortpaths.py

+2-1
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,8 @@
1-
""" This module assumes you're working with paths that follow the format:
1+
"""This module assumes you're working with paths that follow the format:
22
33
{sampleset}_{chrom}_{effect_type}_{n}
44
"""
5+
56
from natsort import natsort_keygen, ns
67

78

pgscatalog.core/src/pgscatalog/core/lib/models.py

+24-11
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
""" PGS Catalog pydantic models for data validation
1+
"""PGS Catalog pydantic models for data validation
22
33
Best way to reuse:
44
@@ -7,9 +7,10 @@
77
* `import pgscatalog.core` and use fully qualified name: `pgscatalog.core.models.CatalogScoreVariant`)
88
99
"""
10+
1011
from functools import cached_property
1112
from typing import ClassVar, Optional
12-
from typing_extensions import Self
13+
from typing_extensions import Self, Literal
1314

1415
from pydantic import (
1516
BaseModel,
@@ -46,7 +47,7 @@ class Allele(BaseModel):
4647
allele: str
4748
_valid_snp_bases: ClassVar[frozenset[str]] = frozenset({"A", "C", "T", "G"})
4849

49-
@computed_field
50+
@computed_field # type: ignore
5051
@cached_property
5152
def is_snp(self) -> bool:
5253
"""SNPs are the most common type of effect allele in PGS Catalog scoring
@@ -242,23 +243,35 @@ class CatalogScoreVariant(BaseModel):
242243
)
243244

244245
# helpful class attributes (not used by pydantic to instantiate a class)
245-
harmonised_columns: ClassVar[tuple[str]] = (
246+
harmonised_columns: ClassVar[
247+
tuple[Literal["hm_rsID"], Literal["hm_chr"], Literal["hm_pos"]]
248+
] = (
246249
"hm_rsID",
247250
"hm_chr",
248251
"hm_pos",
249252
) # it's OK if (""hm_source", "hm_inferOtherAllele", "hm_match_chr", "hm_match_pos") are missing
250-
complex_columns: ClassVar[tuple[str]] = (
253+
complex_columns: ClassVar[
254+
tuple[
255+
Literal["is_haplotype"], Literal["is_diplotype"], Literal["is_interaction"]
256+
]
257+
] = (
251258
"is_haplotype",
252259
"is_diplotype",
253260
"is_interaction",
254261
)
255-
non_additive_columns: ClassVar[tuple[str]] = (
262+
non_additive_columns: ClassVar[
263+
tuple[
264+
Literal["dosage_0_weight"],
265+
Literal["dosage_1_weight"],
266+
Literal["dosage_2_weight"],
267+
]
268+
] = (
256269
"dosage_0_weight",
257270
"dosage_1_weight",
258271
"dosage_2_weight",
259272
)
260273

261-
@computed_field
274+
@computed_field # type: ignore
262275
@cached_property
263276
def variant_id(self) -> str:
264277
"""ID = chr:pos:effect_allele:other_allele"""
@@ -269,7 +282,7 @@ def variant_id(self) -> str:
269282
]
270283
)
271284

272-
@computed_field
285+
@computed_field # type: ignore
273286
@cached_property
274287
def is_harmonised(self) -> bool:
275288
# simple check: do any of the harmonised columns have data?
@@ -278,7 +291,7 @@ def is_harmonised(self) -> bool:
278291
return True
279292
return False
280293

281-
@computed_field
294+
@computed_field # type: ignore
282295
@cached_property
283296
def is_complex(self) -> bool:
284297
# checking flag fields here, which are defaulted to False
@@ -287,7 +300,7 @@ def is_complex(self) -> bool:
287300
return True
288301
return False
289302

290-
@computed_field
303+
@computed_field # type: ignore
291304
@cached_property
292305
def is_non_additive(self) -> bool:
293306
# simple check: do any of the weight dosage columns have data?
@@ -296,7 +309,7 @@ def is_non_additive(self) -> bool:
296309
return True
297310
return False
298311

299-
@computed_field
312+
@computed_field # type: ignore
300313
@cached_property
301314
def effect_type(self) -> EffectType:
302315
match (self.is_recessive, self.is_dominant, self.is_non_additive):

pgscatalog.core/src/pgscatalog/core/lib/scorevariant.py

+14-2
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
from typing import Optional, ClassVar
1+
from typing import Optional, ClassVar, Literal
22
from pydantic import (
33
Field,
44
field_serializer,
@@ -62,7 +62,19 @@ class ScoreVariant(CatalogScoreVariant):
6262
)
6363

6464
# column names for output are used by __iter__ and when writing out
65-
output_fields: ClassVar[tuple[str]] = (
65+
output_fields: ClassVar[
66+
tuple[
67+
Literal["chr_name"],
68+
Literal["chr_position"],
69+
Literal["effect_allele"],
70+
Literal["other_allele"],
71+
Literal["effect_weight"],
72+
Literal["effect_type"],
73+
Literal["is_duplicated"],
74+
Literal["accession"],
75+
Literal["row_nr"],
76+
]
77+
] = (
6678
"chr_name",
6779
"chr_position",
6880
"effect_allele",

0 commit comments

Comments
 (0)