Skip to content

Commit 634b7ed

Browse files
committed
make pytest strict: fail on warnings
1 parent 3cb51f5 commit 634b7ed

File tree

14 files changed

+63
-41
lines changed

14 files changed

+63
-41
lines changed

.github/workflows/pytest.yaml

+1-1
Original file line numberDiff line numberDiff line change
@@ -34,5 +34,5 @@ jobs:
3434
- run: poetry install --with dev --all-extras
3535
working-directory: ${{ inputs.package-directory }}
3636

37-
- run: poetry run pytest --doctest-modules
37+
- run: poetry run pytest
3838
working-directory: ${{ inputs.package-directory }}

pgscatalog.calcapp/pyproject.toml

+8
Original file line numberDiff line numberDiff line change
@@ -21,3 +21,11 @@ pgscatalog-aggregate = 'pgscatalog.downloadapp.cli:run'
2121
[build-system]
2222
requires = ["poetry-core"]
2323
build-backend = "poetry.core.masonry.api"
24+
25+
[tool.pytest.ini_options]
26+
minversion = "6.0"
27+
addopts = "-ra -q"
28+
testpaths = [
29+
"tests"
30+
]
31+
filterwarnings = ["error"]

pgscatalog.calcapp/tests/test_aggregate.py

+4-14
Original file line numberDiff line numberDiff line change
@@ -27,13 +27,8 @@ def test_split_aggregate(tmp_path_factory, scorefiles):
2727
outf = list(outdir.glob("*.txt.gz"))
2828
assert [x.name for x in outf] == ["hgdp_pgs.txt.gz"]
2929
outdf = pd.read_csv(outf[0], sep="\t")
30-
assert list(outdf.columns) == [
31-
"sampleset",
32-
"IID",
33-
"DENOM",
34-
"PGS001229_hmPOS_GRCh38_SUM",
35-
]
36-
assert outdf.shape == (929, 4)
30+
assert list(outdf.columns) == ["sampleset", "IID", "DENOM", "PGS", "SUM"]
31+
assert outdf.shape == (929, 5)
3732

3833

3934
def test_nosplit_aggregate(tmp_path_factory, scorefiles):
@@ -58,10 +53,5 @@ def test_nosplit_aggregate(tmp_path_factory, scorefiles):
5853
outf = list(outdir.glob("*.txt.gz"))
5954
assert [x.name for x in outf] == ["aggregated_scores.txt.gz"]
6055
outdf = pd.read_csv(outf[0], sep="\t")
61-
assert list(outdf.columns) == [
62-
"sampleset",
63-
"IID",
64-
"DENOM",
65-
"PGS001229_hmPOS_GRCh38_SUM",
66-
]
67-
assert outdf.shape == (929, 4)
56+
assert list(outdf.columns) == ["sampleset", "IID", "DENOM", "PGS", "SUM"]
57+
assert outdf.shape == (929, 5)

pgscatalog.calclib/pyproject.toml

+5
Original file line numberDiff line numberDiff line change
@@ -24,3 +24,8 @@ pytest = "^8.0.0"
2424
[build-system]
2525
requires = ["poetry-core"]
2626
build-backend = "poetry.core.masonry.api"
27+
28+
[tool.pytest.ini_options]
29+
minversion = "6.0"
30+
addopts = "-ra -q --doctest-modules"
31+
filterwarnings = ["error"]

pgscatalog.calclib/src/pgscatalog/calclib/polygenicscore.py

+4-7
Original file line numberDiff line numberDiff line change
@@ -2,9 +2,6 @@
22

33
import pandas as pd
44

5-
import reprlib
6-
7-
85

96
class PolygenicScore:
107
"""Represents the output of plink2 --score written to a file
@@ -13,7 +10,7 @@ class PolygenicScore:
1310
>>> score1 = Config.ROOT_DIR / "tests" / "cineca_22_additive_0.sscore.zst"
1411
>>> pgs1 = PolygenicScore(sampleset="test", path=score1) # doctest: +ELLIPSIS
1512
>>> pgs1
16-
PolygenicScore(sampleset='test', path=PosixPath('.../cineca_22_additive_0.sscore.zst'), df=None)
13+
PolygenicScore(sampleset='test', path=PosixPath('.../cineca_22_additive_0.sscore.zst'))
1714
>>> pgs2 = PolygenicScore(sampleset="test", path=score1)
1815
>>> pgs1.read().to_dict() # doctest: +ELLIPSIS
1916
{'DENOM': ...}, 'PGS001229_22_SUM': {('test', 'HG00096'): 0.54502, ('test', 'HG00097'): 0.674401, ('test', 'HG00099'): 0.63727, ('test', 'HG00100'): 0.863944, ...}}
@@ -22,7 +19,7 @@ class PolygenicScore:
2219
2320
>>> aggregated_score = pgs1 + pgs2
2421
>>> aggregated_score # doctest: +ELLIPSIS
25-
PolygenicScore(sampleset='test', path=None, df={'DENOM': ...}, 'PGS001229_22_SUM': {('test', 'HG00096'): 1.09004, ('test', 'HG00097'): 1.348802, ('test', 'HG00099'): 1.27454, ('test', 'HG00100'): 1.727888, ...}})
22+
PolygenicScore(sampleset='test', path=None)
2623
2724
Once a score has been fully aggregated it can be helpful to recalculate an average:
2825
@@ -45,7 +42,7 @@ class PolygenicScore:
4542
['test_pgs.txt.gz']
4643
"""
4744

48-
def __init__(self, *, sampleset, path=None, df=None):
45+
def __init__(self, *, path=None, df=None, sampleset=None):
4946
match (path, df):
5047
case (None, None):
5148
raise ValueError("init with path or df")
@@ -164,6 +161,7 @@ def _select_agg_cols(cols):
164161
if (x.endswith("_SUM") and (x != "NAMED_ALLELE_DOSAGE_SUM")) or (x in keep_cols)
165162
]
166163

164+
167165
def _melt(df, value_name):
168166
"""Melt the score dataframe from wide format to long format"""
169167
df = df.melt(
@@ -175,4 +173,3 @@ def _melt(df, value_name):
175173
# e.g. PGS000822_SUM -> PGS000822
176174
df["PGS"] = df["PGS"].str.replace(f"_{value_name}", "")
177175
return df
178-

pgscatalog.combineapp/pyproject.toml

+5-3
Original file line numberDiff line numberDiff line change
@@ -25,7 +25,9 @@ build-backend = "poetry.core.masonry.api"
2525
pgscatalog-combine = 'pgscatalog.combineapp.cli:run'
2626

2727
[tool.pytest.ini_options]
28-
pythonpath = [
29-
"src"
28+
minversion = "6.0"
29+
addopts = "-ra -q"
30+
testpaths = [
31+
"tests"
3032
]
31-
33+
filterwarnings = ["error"]

pgscatalog.corelib/pyproject.toml

+5
Original file line numberDiff line numberDiff line change
@@ -27,3 +27,8 @@ pyarrow = ["pyarrow"]
2727
[build-system]
2828
requires = ["poetry-core"]
2929
build-backend = "poetry.core.masonry.api"
30+
31+
[tool.pytest.ini_options]
32+
minversion = "6.0"
33+
addopts = "-ra -q --doctest-modules"
34+
filterwarnings = ["error"]

pgscatalog.corelib/src/pgscatalog/corelib/scorefiles.py

+3-4
Original file line numberDiff line numberDiff line change
@@ -291,7 +291,7 @@ def _init_from_accession(self, accession, target_build):
291291

292292
def _init_from_path(self, target_build=None):
293293
logger.debug(f"Instantiating ScoringFile from {self.local_path=}")
294-
294+
295295
if target_build is not None:
296296
raise ValueError(
297297
"target_build must be None for local files. "
@@ -660,8 +660,6 @@ def __init__(self, *args, target_build=None, **kwargs):
660660
f"{arg.target_build=} doesn't match {target_build=}"
661661
)
662662
case _ if pathlib.Path(arg).is_file() and target_build is None:
663-
scorefiles.append(ScoringFile(arg))
664-
case _ if pathlib.Path(arg).is_file() and target_build is not None:
665663
logger.info(f"Local path: {arg}, no target build is OK")
666664
scorefiles.append(ScoringFile(arg))
667665
case _ if pathlib.Path(arg).is_file() and target_build is not None:
@@ -794,7 +792,8 @@ class NormalisedScoringFile:
794792

795793
def __init__(self, path):
796794
try:
797-
xopen(path)
795+
with xopen(path):
796+
pass
798797
except TypeError:
799798
self.path = False
800799
else:

pgscatalog.downloadapp/pyproject.toml

+5-4
Original file line numberDiff line numberDiff line change
@@ -25,8 +25,9 @@ build-backend = "poetry.core.masonry.api"
2525
pgscatalog-download = 'pgscatalog.downloadapp.cli:run'
2626

2727
[tool.pytest.ini_options]
28-
29-
pythonpath = [
30-
"src"
28+
minversion = "6.0"
29+
addopts = "-ra -q"
30+
testpaths = [
31+
"tests"
3132
]
32-
33+
filterwarnings = ["error"]

pgscatalog.matchapp/pyproject.toml

+8
Original file line numberDiff line numberDiff line change
@@ -25,3 +25,11 @@ pgscatalog-matchmerge = 'pgscatalog.matchapp.merge_cli:run_merge'
2525
[build-system]
2626
requires = ["poetry-core"]
2727
build-backend = "poetry.core.masonry.api"
28+
29+
[tool.pytest.ini_options]
30+
minversion = "6.0"
31+
addopts = "-ra -q"
32+
testpaths = [
33+
"tests"
34+
]
35+
filterwarnings = ["error"]

pgscatalog.matchlib/pyproject.toml

+5
Original file line numberDiff line numberDiff line change
@@ -21,3 +21,8 @@ pytest = "^8.0.0"
2121
[build-system]
2222
requires = ["poetry-core"]
2323
build-backend = "poetry.core.masonry.api"
24+
25+
[tool.pytest.ini_options]
26+
minversion = "6.0"
27+
addopts = "-ra -q --doctest-modules"
28+
filterwarnings = ["error"]

pgscatalog.matchlib/src/pgscatalog/matchlib/_match/label.py

+4-4
Original file line numberDiff line numberDiff line change
@@ -52,12 +52,12 @@ def _encode_match_priority(df: pl.LazyFrame) -> pl.LazyFrame:
5252
return (
5353
df.with_columns(
5454
# set false best match to not_best
55-
match_priority=pl.col("best_match").apply(
55+
match_priority=pl.col("best_match").map_elements(
5656
lambda x: {None: 0, True: 1, False: 3}[x]
5757
)
5858
)
5959
.with_columns(
60-
excluded_match_priority=pl.col("exclude").apply(
60+
excluded_match_priority=pl.col("exclude").map_elements(
6161
lambda x: {None: 0, True: 2, False: 0}[x]
6262
)
6363
)
@@ -66,7 +66,7 @@ def _encode_match_priority(df: pl.LazyFrame) -> pl.LazyFrame:
6666
)
6767
.with_columns(
6868
match_status=pl.col("max")
69-
.apply(
69+
.map_elements(
7070
lambda x: {0: "unmatched", 1: "matched", 2: "excluded", 3: "not_best"}[
7171
x
7272
]
@@ -140,7 +140,7 @@ def _label_duplicate_best_match(df: pl.LazyFrame) -> pl.LazyFrame:
140140
.otherwise(pl.lit(False))
141141
)
142142
.drop("count")
143-
.with_row_count(
143+
.with_row_index(
144144
name="temp_row_nr"
145145
) # add temporary row count to get first variant
146146
.with_columns(

pgscatalog.matchlib/src/pgscatalog/matchlib/_match/log.py

+4-2
Original file line numberDiff line numberDiff line change
@@ -43,7 +43,8 @@ def make_summary_log(
4343
pl.col("match_status").fill_null(value="unmatched"), dataset=pl.lit(dataset)
4444
) # fill in unmatched variants
4545
.group_by(cols)
46-
.count()
46+
.len()
47+
.rename({"len": "count"})
4748
.join(filter_summary, how="left", on="accession")
4849
.pipe(_prettify_summary)
4950
)
@@ -55,7 +56,8 @@ def check_log_count(scorefile: pl.LazyFrame, summary_log: pl.LazyFrame):
5556

5657
log_count: pl.DataFrame = (
5758
scorefile.group_by("accession")
58-
.count()
59+
.len()
60+
.rename({"len": "count"})
5961
.join(summary_count, on="accession")
6062
.collect()
6163
)

pgscatalog.matchlib/src/pgscatalog/matchlib/_plinkframe.py

+2-2
Original file line numberDiff line numberDiff line change
@@ -28,8 +28,8 @@ def __repr__(self):
2828

2929
def split_pivot(self):
3030
"""Splitting scoring files is helpful for split - apply - combine on big data"""
31-
dfs = self.df.collect().partition_by("chr_name", as_dict=True)
32-
return {k: v.pipe(pivot_score) for k, v in dfs.items()}
31+
dfs = self.df.collect().partition_by(["chr_name"], as_dict=True)
32+
return {k[0]: v.pipe(pivot_score) for k, v in dfs.items()}
3333

3434
def pivot_wide(self):
3535
"""Pivoting wide is important to enable parallel score calculation"""

0 commit comments

Comments
 (0)