Skip to content

Commit 3bcd8ec

Browse files
committed
add support for ancestry specific allele frequencies
1 parent e7294eb commit 3bcd8ec

File tree

1 file changed

+51
-10
lines changed
  • pgscatalog.core/src/pgscatalog/core/lib

1 file changed

+51
-10
lines changed

pgscatalog.core/src/pgscatalog/core/lib/models.py

+51-10
Original file line numberDiff line numberDiff line change
@@ -8,11 +8,11 @@
88
99
"""
1010
import enum
11+
import itertools
1112
import pathlib
1213
from datetime import date
1314
from functools import cached_property
14-
from typing import ClassVar, Optional, Union
15-
from typing_extensions import Self, Literal
15+
from typing import ClassVar, Optional, Union, Any, Self, Literal
1616

1717
from pydantic import (
1818
BaseModel,
@@ -87,8 +87,25 @@ def __hash__(self):
8787
class CatalogScoreVariant(BaseModel):
8888
"""A model representing a row from a PGS Catalog scoring file, defined here:
8989
https://www.pgscatalog.org/downloads/#scoring_columns
90+
91+
Supports dynamic ancestry specific allele frequency information as reported by authors (e.g. first row from PGS000662):
92+
93+
>>> variant_with_allelefrequency = {"chr_name": "1", "chr_position": 5743196, "effect_allele": "T", "other_allele": "C", "effect_weight": 0.102298257, "allelefrequency_effect_European": 0.067, "allelefrequency_effect_African": 0.439, "allelefrequency_effect_Asian": 0.113, "allelefrequency_effect_Hispanic": 0.157}
94+
>>> CatalogScoreVariant(**variant_with_allelefrequency) # doctest: +ELLIPSIS
95+
CatalogScoreVariant(rsID=None, chr_name='1', chr_position=5743196..., allelefrequency_effect_European=0.067, allelefrequency_effect_African=0.439, allelefrequency_effect_Asian=0.113, allelefrequency_effect_Hispanic=0.157, ...)
96+
97+
>>> bad_extra_fields = variant_with_allelefrequency | {"favourite_ice_cream": "vanilla"}
98+
>>> CatalogScoreVariant(**bad_extra_fields)
99+
Traceback (most recent call last):
100+
...
101+
pydantic_core._pydantic_core.ValidationError: 1 validation error for CatalogScoreVariant
102+
Value error, Invalid extra fields detected: ['favourite_ice_cream'] ...
90103
"""
91104

105+
model_config = ConfigDict(
106+
extra="allow"
107+
) # extra fields are checked by a model validator
108+
92109
# variant description
93110
rsID: Optional[str] = Field(
94111
default=None,
@@ -205,11 +222,6 @@ class CatalogScoreVariant(BaseModel):
205222
title="Effect Allele Frequency",
206223
description="Reported effect allele frequency, if the associated locus is a haplotype then haplotype frequency will be extracted.",
207224
)
208-
allelefrequency_effect_Ancestry: Optional[float] = Field(
209-
default=None,
210-
title="Population-specific effect allele frequency",
211-
description="Reported effect allele frequency in a specific population (described by the authors).",
212-
)
213225

214226
# harmonised files - additional columns
215227
hm_source: Optional[str] = Field(
@@ -362,12 +374,34 @@ def effect_weight_must_float(cls, weight: str) -> str:
362374
"effect_allele", "other_allele", "hm_inferOtherAllele", mode="before"
363375
)
364376
@classmethod
365-
def alleles_must_parse(cls, value):
377+
def alleles_must_parse(cls, value: Any) -> Allele:
366378
if isinstance(value, str):
367379
return Allele(allele=value)
368380
else:
369381
raise ValueError(f"Can't parse {value=}")
370382

383+
@model_validator(mode="after")
384+
def check_extra_fields(self) -> Self:
385+
"""Only allelefrequency_effect_{ancestry} is supported as an extra field
386+
{ancestry} is dynamic and set by submitters"""
387+
extra: list[str] = list(self.model_extra.keys())
388+
if extra:
389+
field_match: list[bool] = [
390+
x.startswith("allelefrequency_effect_") for x in extra
391+
]
392+
if not all(field_match):
393+
bad_extra_fields: list[str] = list(
394+
itertools.compress(extra, [not x for x in field_match])
395+
)
396+
raise ValueError(f"Invalid extra fields detected: {bad_extra_fields}")
397+
else:
398+
for field in extra:
399+
# make sure allele frequency is a float or raise a value error
400+
allelefrequency: float = float(getattr(self, field))
401+
setattr(self, field, allelefrequency)
402+
403+
return self
404+
371405
@model_validator(mode="after")
372406
def check_effect_weights(self) -> Self:
373407
match (
@@ -406,9 +440,16 @@ def check_position(self) -> Self:
406440
return self
407441

408442
@field_validator(
409-
"rsID", "chr_name", "chr_position", "hm_chr", "hm_pos", mode="before"
443+
"rsID",
444+
"chr_name",
445+
"chr_position",
446+
"hm_chr",
447+
"hm_pos",
448+
"allelefrequency_effect",
449+
mode="before",
410450
)
411-
def empty_string_to_none(cls, v):
451+
@classmethod
452+
def empty_string_to_none(cls, v: Any) -> Optional[str]:
412453
if isinstance(v, str) and v.strip() == "":
413454
return None
414455
return v

0 commit comments

Comments
 (0)