Skip to content

Commit fae06c0

Browse files
committed
add relabel cli to combineapp
1 parent cec4e10 commit fae06c0

9 files changed

+181
-3
lines changed

pgscatalog.combineapp/pyproject.toml

+2-1
Original file line numberDiff line numberDiff line change
@@ -22,7 +22,8 @@ requires = ["poetry-core"]
2222
build-backend = "poetry.core.masonry.api"
2323

2424
[tool.poetry.scripts]
25-
pgscatalog-combine = 'pgscatalog.combineapp.cli:run'
25+
pgscatalog-combine = 'pgscatalog.combineapp.combine_cli:run'
26+
pgscatalog-relabel = 'pgscatalog.combineapp.relabel_cli:run'
2627

2728
[tool.pytest.ini_options]
2829
minversion = "6.0"
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,3 @@
1-
from pgscatalog.combineapp.cli import run
1+
from pgscatalog.combineapp.combine_cli import run
22

33
__all__ = ["run"]
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,107 @@
1+
import argparse
2+
import logging
3+
import pathlib
4+
5+
from pgscatalog.corelib import relabel, relabel_write, RelabelArgs
6+
7+
logger = logging.getLogger(__name__)
8+
9+
10+
def _parse_args(args=None):
11+
parser = argparse.ArgumentParser(
12+
description="Relabel the column values in one file based on a pair of columns in another",
13+
formatter_class=argparse.RawDescriptionHelpFormatter,
14+
)
15+
parser.add_argument(
16+
"-d",
17+
"--dataset",
18+
dest="dataset",
19+
required=True,
20+
help="<Required> Label for target genomic dataset",
21+
)
22+
parser.add_argument(
23+
"-m",
24+
"--maps",
25+
help="mapping filenames",
26+
dest="map_files",
27+
nargs="+",
28+
required=True,
29+
)
30+
parser.add_argument(
31+
"-o", "--outdir", help="output directory", dest="outdir", required=True
32+
)
33+
parser.add_argument(
34+
"--col_from", help="column to change FROM", dest="col_from", required=True
35+
)
36+
parser.add_argument(
37+
"--col_to", help="column to change TO", dest="col_to", required=True
38+
)
39+
parser.add_argument(
40+
"--target_file", help="target file", dest="target_file", required=True
41+
)
42+
parser.add_argument(
43+
"--target_col",
44+
help="target column to revalue",
45+
dest="target_col",
46+
required=True,
47+
)
48+
parser.add_argument(
49+
"-v",
50+
"--verbose",
51+
dest="verbose",
52+
action="store_true",
53+
help="<Optional> Extra logging information",
54+
)
55+
parser.add_argument("--split", dest="split", action="store_true", required=False)
56+
parser.add_argument(
57+
"--combined", dest="combined", action="store_true", required=False
58+
)
59+
parser.add_argument("-cc", "--comment_char", dest="comment_char", default="##")
60+
args = parser.parse_args()
61+
62+
if not (args.split or args.combined):
63+
parser.error("At least one of --combined or --split is required")
64+
65+
return args
66+
67+
68+
def run():
69+
args = _parse_args()
70+
71+
if args.verbose:
72+
logging.getLogger("pgscatalog.corelib").setLevel(logging.DEBUG)
73+
logger.setLevel(logging.DEBUG)
74+
logger.debug("Verbose logging enabled")
75+
76+
relabel_args = RelabelArgs(
77+
comment_char=args.comment_char,
78+
dataset=args.dataset,
79+
map_col_from=args.col_from,
80+
map_col_to=args.col_to,
81+
target_col=args.target_col,
82+
)
83+
logger.debug(f"Relabel arguments {relabel_args}")
84+
85+
map_paths = [pathlib.Path(x) for x in args.map_files]
86+
in_path = pathlib.Path(args.target_file)
87+
88+
for x in [*map_paths, in_path]:
89+
if not x.exists():
90+
raise FileNotFoundError(f"{x}")
91+
92+
logger.debug("Relabelling variants")
93+
relabelled = relabel(
94+
in_path=in_path, map_paths=map_paths, relabel_args=relabel_args
95+
)
96+
logger.debug(f"Writing relabelled data to {args.outdir}")
97+
relabel_write(
98+
relabelled=relabelled,
99+
dataset=relabel_args.dataset,
100+
split_output=args.split,
101+
combined_output=args.combined,
102+
out_dir=args.outdir,
103+
)
104+
105+
106+
if __name__ == "__main__":
107+
run()

pgscatalog.combineapp/tests/test_cli.py pgscatalog.combineapp/tests/test_combine_cli.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,7 @@
55
from unittest.mock import patch
66
import pytest
77

8-
from pgscatalog.combineapp.cli import run
8+
from pgscatalog.combineapp.combine_cli import run
99
from pgscatalog.corelib import ScoringFile
1010

1111

Original file line numberDiff line numberDiff line change
@@ -0,0 +1,67 @@
1+
import csv
2+
import itertools
3+
import os
4+
from unittest.mock import patch
5+
6+
import pytest
7+
from xopen import xopen
8+
9+
from pgscatalog.combineapp.relabel_cli import run
10+
11+
12+
@pytest.fixture(scope="package")
13+
def map_files(request):
14+
return [
15+
str(request.path.parent / "testdata" / "relabel_map_chr1.txt.gz"),
16+
str(request.path.parent / "testdata" / "relabel_map_chr8.txt.gz"),
17+
]
18+
19+
20+
@pytest.fixture(scope="package")
21+
def relabel_scorefile(request):
22+
return str(request.path.parent / "testdata" / "hgdp_ALL_additive_0.scorefile")
23+
24+
25+
def test_relabel(tmp_path_factory, relabel_scorefile, map_files):
26+
"""Test relabelling a scorefile."""
27+
out_dir = tmp_path_factory.mktemp("outdir")
28+
29+
args = [
30+
("pgscatalog-relabel", "-m"),
31+
map_files,
32+
(
33+
"--target_file",
34+
relabel_scorefile,
35+
"--target_col",
36+
"ID",
37+
"-d",
38+
"hgdp",
39+
"--col_from",
40+
"ID_TARGET",
41+
"--col_to",
42+
"ID_REF",
43+
"-o",
44+
str(out_dir),
45+
"--combined",
46+
"--split",
47+
),
48+
]
49+
flargs = list(itertools.chain(*args))
50+
51+
with patch("sys.argv", flargs):
52+
run()
53+
54+
out_f = os.listdir(out_dir)
55+
assert sorted(out_f) == [
56+
"hgdp_1_relabelled.gz",
57+
"hgdp_8_relabelled.gz",
58+
"hgdp_ALL_relabelled.gz",
59+
]
60+
61+
for x in out_f:
62+
with xopen(out_dir / x) as f:
63+
reader = csv.DictReader(f, delimiter="\t")
64+
for line in reader:
65+
assert "ID" in line
66+
assert "effect_allele" in line
67+
assert "PGS000802_hmPOS_GRCh38" in line
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,3 @@
1+
ID effect_allele PGS000802_hmPOS_GRCh38
2+
1:11796321:G:A A 0.16
3+
8:127401060:G:T G 0.217
Binary file not shown.
Binary file not shown.

0 commit comments

Comments
 (0)