|
8 | 8 |
|
9 | 9 | """
|
10 | 10 | import enum
|
| 11 | +import itertools |
11 | 12 | import pathlib
|
12 | 13 | from datetime import date
|
13 | 14 | from functools import cached_property
|
14 |
| -from typing import ClassVar, Optional, Union |
15 |
| -from typing_extensions import Self, Literal |
| 15 | +from typing import ClassVar, Optional, Union, Any, Self, Literal |
16 | 16 |
|
17 | 17 | from pydantic import (
|
18 | 18 | BaseModel,
|
@@ -87,8 +87,25 @@ def __hash__(self):
|
87 | 87 | class CatalogScoreVariant(BaseModel):
|
88 | 88 | """A model representing a row from a PGS Catalog scoring file, defined here:
|
89 | 89 | https://www.pgscatalog.org/downloads/#scoring_columns
|
| 90 | +
|
| 91 | + Supports dynamic ancestry specific allele frequency information as reported by authors (e.g. first row from PGS000662): |
| 92 | +
|
| 93 | + >>> variant_with_allelefrequency = {"chr_name": "1", "chr_position": 5743196, "effect_allele": "T", "other_allele": "C", "effect_weight": 0.102298257, "allelefrequency_effect_European": 0.067, "allelefrequency_effect_African": 0.439, "allelefrequency_effect_Asian": 0.113, "allelefrequency_effect_Hispanic": 0.157} |
| 94 | + >>> CatalogScoreVariant(**variant_with_allelefrequency) # doctest: +ELLIPSIS |
| 95 | + CatalogScoreVariant(rsID=None, chr_name='1', chr_position=5743196..., allelefrequency_effect_European=0.067, allelefrequency_effect_African=0.439, allelefrequency_effect_Asian=0.113, allelefrequency_effect_Hispanic=0.157, ...) |
| 96 | +
|
| 97 | + >>> bad_extra_fields = variant_with_allelefrequency | {"favourite_ice_cream": "vanilla"} |
| 98 | + >>> CatalogScoreVariant(**bad_extra_fields) |
| 99 | + Traceback (most recent call last): |
| 100 | + ... |
| 101 | + pydantic_core._pydantic_core.ValidationError: 1 validation error for CatalogScoreVariant |
| 102 | + Value error, Invalid extra fields detected: ['favourite_ice_cream'] ... |
90 | 103 | """
|
91 | 104 |
|
| 105 | + model_config = ConfigDict( |
| 106 | + extra="allow" |
| 107 | + ) # extra fields are checked by a model validator |
| 108 | + |
92 | 109 | # variant description
|
93 | 110 | rsID: Optional[str] = Field(
|
94 | 111 | default=None,
|
@@ -205,11 +222,6 @@ class CatalogScoreVariant(BaseModel):
|
205 | 222 | title="Effect Allele Frequency",
|
206 | 223 | description="Reported effect allele frequency, if the associated locus is a haplotype then haplotype frequency will be extracted.",
|
207 | 224 | )
|
208 |
| - allelefrequency_effect_Ancestry: Optional[float] = Field( |
209 |
| - default=None, |
210 |
| - title="Population-specific effect allele frequency", |
211 |
| - description="Reported effect allele frequency in a specific population (described by the authors).", |
212 |
| - ) |
213 | 225 |
|
214 | 226 | # harmonised files - additional columns
|
215 | 227 | hm_source: Optional[str] = Field(
|
@@ -362,12 +374,34 @@ def effect_weight_must_float(cls, weight: str) -> str:
|
362 | 374 | "effect_allele", "other_allele", "hm_inferOtherAllele", mode="before"
|
363 | 375 | )
|
364 | 376 | @classmethod
|
365 |
| - def alleles_must_parse(cls, value): |
| 377 | + def alleles_must_parse(cls, value: Any) -> Allele: |
366 | 378 | if isinstance(value, str):
|
367 | 379 | return Allele(allele=value)
|
368 | 380 | else:
|
369 | 381 | raise ValueError(f"Can't parse {value=}")
|
370 | 382 |
|
| 383 | + @model_validator(mode="after") |
| 384 | + def check_extra_fields(self) -> Self: |
| 385 | + """Only allelefrequency_effect_{ancestry} is supported as an extra field |
| 386 | + {ancestry} is dynamic and set by submitters""" |
| 387 | + extra: list[str] = list(self.model_extra.keys()) |
| 388 | + if extra: |
| 389 | + field_match: list[bool] = [ |
| 390 | + x.startswith("allelefrequency_effect_") for x in extra |
| 391 | + ] |
| 392 | + if not all(field_match): |
| 393 | + bad_extra_fields: list[str] = list( |
| 394 | + itertools.compress(extra, [not x for x in field_match]) |
| 395 | + ) |
| 396 | + raise ValueError(f"Invalid extra fields detected: {bad_extra_fields}") |
| 397 | + else: |
| 398 | + for field in extra: |
| 399 | + # make sure allele frequency is a float or raise a value error |
| 400 | + allelefrequency: float = float(getattr(self, field)) |
| 401 | + setattr(self, field, allelefrequency) |
| 402 | + |
| 403 | + return self |
| 404 | + |
371 | 405 | @model_validator(mode="after")
|
372 | 406 | def check_effect_weights(self) -> Self:
|
373 | 407 | match (
|
@@ -406,9 +440,16 @@ def check_position(self) -> Self:
|
406 | 440 | return self
|
407 | 441 |
|
408 | 442 | @field_validator(
|
409 |
| - "rsID", "chr_name", "chr_position", "hm_chr", "hm_pos", mode="before" |
| 443 | + "rsID", |
| 444 | + "chr_name", |
| 445 | + "chr_position", |
| 446 | + "hm_chr", |
| 447 | + "hm_pos", |
| 448 | + "allelefrequency_effect", |
| 449 | + mode="before", |
410 | 450 | )
|
411 |
| - def empty_string_to_none(cls, v): |
| 451 | + @classmethod |
| 452 | + def empty_string_to_none(cls, v: Any) -> Optional[str]: |
412 | 453 | if isinstance(v, str) and v.strip() == "":
|
413 | 454 | return None
|
414 | 455 | return v
|
|
0 commit comments