PGScatalog · nebfield · Jan 16, 2024 · Jan 16, 2024 · Jan 16, 2024 · Jan 18, 2024
diff --git a/.github/workflows/calclib-pytest.yml b/.github/workflows/calclib-pytest.yml
@@ -0,0 +1,13 @@
+on:
+  push:
+    paths:
+      - 'pgscatalog.calclib/**.py'
+  pull_request:
+    paths:
+      - 'pgscatalog.calclib/**.py'
+
+jobs:
+  pytest-calclib:
+    uses: ./.github/workflows/pytest.yaml
+    with:
+      package-directory: "pgscatalog.calclib"
diff --git a/.github/workflows/combineapp-pytest.yml b/.github/workflows/combineapp-pytest.yml
@@ -0,0 +1,13 @@
+on:
+  push:
+    paths:
+      - 'pgscatalog.combineapp/**.py'
+  pull_request:
+    paths:
+      - 'pgscatalog.combineapp/**.py'
+
+jobs:
+  combineapp-pytest:
+    uses: ./.github/workflows/pytest.yaml
+    with:
+      package-directory: "pgscatalog.combineapp"
diff --git a/.github/workflows/corelib-pytest.yml b/.github/workflows/corelib-pytest.yml
@@ -7,7 +7,7 @@ on:
       - 'pgscatalog.corelib/**.py'
 
 jobs:
-  downloadapp-corelib:
+  pytest-corelib:
     uses: ./.github/workflows/pytest.yaml
     with:
       package-directory: "pgscatalog.corelib"
diff --git a/.github/workflows/matchapp-pytest.yml b/.github/workflows/matchapp-pytest.yml
@@ -0,0 +1,13 @@
+on:
+  push:
+    paths:
+      - 'pgscatalog.matchapp/**.py'
+  pull_request:
+    paths:
+      - 'pgscatalog.matchapp/**.py'
+
+jobs:
+  matchapp-pytest:
+    uses: ./.github/workflows/pytest.yaml
+    with:
+      package-directory: "pgscatalog.matchapp"
diff --git a/.github/workflows/matchlib-pytest.yml b/.github/workflows/matchlib-pytest.yml
@@ -0,0 +1,13 @@
+on:
+  push:
+    paths:
+      - 'pgscatalog.matchlib/**.py'
+  pull_request:
+    paths:
+      - 'pgscatalog.matchlib/**.py'
+
+jobs:
+  pytest-matchlib:
+    uses: ./.github/workflows/pytest.yaml
+    with:
+      package-directory: "pgscatalog.matchlib"
diff --git a/.github/workflows/pytest.yaml b/.github/workflows/pytest.yaml
@@ -19,20 +19,20 @@ jobs:
       - uses: actions/checkout@v4
 
       - name: Install Python
-        uses: actions/setup-python@v4
+        uses: actions/setup-python@v5
         with:
           python-version: ${{ inputs.python-version }}
           cache: 'pip'
 
-      - uses: actions/cache@v3
+      - uses: actions/cache@v4
         with:
           path: ${{ inputs.package-directory }}/.venv
           key: venv-${{ hashFiles('poetry.lock') }}
 
       - run: pip install poetry
 
-      - run: poetry install --with dev
+      - run: poetry install --with dev --all-extras
         working-directory: ${{ inputs.package-directory }}
 
       - run: poetry run pytest --doctest-modules
-        working-directory: ${{ inputs.package-directory }}
+        working-directory: ${{ inputs.package-directory }}
diff --git a/.gitignore b/.gitignore
@@ -7,6 +7,7 @@
 */dist/*
 build
 _build
+docs/autoapi
 .cache
 *.so
 
@@ -19,6 +20,7 @@ pip-log.txt
 
 .DS_Store
 .idea/*
+*/.idea/*
 .python-version
 .vscode/*
 

diff --git a/README.md b/README.md
@@ -5,9 +5,9 @@ This repository contains Python applications and libraries for working with poly
 
 These CLI applications are used by the PGS Catalog Calculator workflow. 
 
-| Application           | Description                                    | Link                                                  |
-|-----------------------|------------------------------------------------|-------------------------------------------------------|
-| `pgscatalog-download` | Download scoring files from the PGS Catalog    | [README](pgscatalog.downloadapp/pgscatalog/README.md) |
+| Application           | Description                                    | Link                                                 |
+|-----------------------|------------------------------------------------|------------------------------------------------------|
+| `pgscatalog-download` | Download scoring files from the PGS Catalog    | [README](pgscatalog.downloadapp/README.md) |
 | `pgscatalog-combine`  | Combine scoring files into a consistent format |
 
 
@@ -23,6 +23,23 @@ If you write  code to work with PGS, we publish some libraries that might be hel
 | `pgscatalog-calclib`   | Ancestry estimation and normalisation                    |
 
 
+## Installation
+
+### pip
+
+If you want to use the packages in this repository, use pip:
+
+### Local install for developers
+
+If you want to make changes to a package or application, it's simplest to clone the repository and install packages in editable mode.
+
+```
+$ git clone https://github.com/PGScatalog/pygscatalog.git
+$ cd pygscatalog/pgscatalog.downloadapp # replace with the package you want to edit
+$ poetry add --editable ../pgscatalog.corelib # downloadapp requires corelib
+$ poetry install  
+```
+
 ## Documentation
 
 Full documentation is provided.. 

diff --git a/docs/Makefile b/docs/Makefile
@@ -0,0 +1,20 @@
+# Minimal makefile for Sphinx documentation
+#
+
+# You can set these variables from the command line, and also
+# from the environment for the first two.
+SPHINXOPTS    ?=
+SPHINXBUILD   ?= sphinx-build
+SOURCEDIR     = .
+BUILDDIR      = _build
+
+# Put it first so that "make" without argument is like "make help".
+help:
+	@$(SPHINXBUILD) -M help "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)
+
+.PHONY: help Makefile
+
+# Catch-all target: route all unknown targets to Sphinx using the new
+# "make mode" option.  $(O) is meant as a shortcut for $(SPHINXOPTS).
+%: Makefile
+	@$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)
diff --git a/docs/_templates/autoapi/index.rst b/docs/_templates/autoapi/index.rst
@@ -0,0 +1,24 @@
+API Reference
+=============
+
+This page contains auto-generated API reference documentation, which describes
+pygscatalog libraries.
+
+The information is mostly useful for developers: people that want to write Python
+code to work with polygenic scores.
+
+
+.. toctree::
+   :titlesonly:
+
+   {% for page in pages | sort %}
+   {#
+      Add the top most levels in "pgscatalog.X" to the index file
+      This is needed because we don't have __init__.py file in pgscatalog package
+      as we use nested implicit namespace packages.
+      https://github.com/readthedocs/sphinx-autoapi/issues/298
+   #}
+   {% if (page.top_level_object or page.name.split('.') | length == 2) and page.display %}
+   {{ page.short_name }} <{{ page.include_path }}>
+   {% endif %}
+   {% endfor %}
diff --git a/docs/conf.py b/docs/conf.py
@@ -0,0 +1,56 @@
+# Configuration file for the Sphinx documentation builder.
+#
+# For the full list of built-in configuration values, see the documentation:
+# https://www.sphinx-doc.org/en/master/usage/configuration.html
+
+# -- Project information -----------------------------------------------------
+# https://www.sphinx-doc.org/en/master/usage/configuration.html#project-information
+
+project = "pygscatalog"
+copyright = "2024, PGS Catalog"
+author = "PGS Catalog"
+
+# -- General configuration ---------------------------------------------------
+# https://www.sphinx-doc.org/en/master/usage/configuration.html#general-configuration
+
+extensions = ["autoapi.extension"]
+
+templates_path = ["_templates"]
+exclude_patterns = ["_build", "Thumbs.db", ".DS_Store"]
+
+
+# -- Options for HTML output -------------------------------------------------
+# https://www.sphinx-doc.org/en/master/usage/configuration.html#options-for-html-output
+
+html_theme = "alabaster"
+html_static_path = ["_static"]
+
+# use autoapi for packages that provide APIs (libraries)
+autoapi_dirs = [
+    "../pgscatalog.corelib/src/pgscatalog",
+    "../pgscatalog.matchlib/src/pgscatalog",
+]
+# see _templates/autoapi/index.rst for autoapi fix
+autoapi_template_dir = "_templates/autoapi"
+autoapi_python_use_implicit_namespaces = True
+autoapi_keep_files = True
+
+# hide private members
+autoapi_options = [
+    "members",
+    "undoc-members",
+    "show-inheritance",
+    "show-module-summary",
+    "imported-members",
+]
+autoapi_member_order = "groupwise"
+
+
+def skip_submodules(app, what, name, obj, skip, options):
+    if what == "module":
+        skip = True
+    return skip
+
+
+def setup(sphinx):
+    sphinx.connect("autoapi-skip-member", skip_submodules)
diff --git a/docs/how-to/guides/combine.rst b/docs/how-to/guides/combine.rst
@@ -0,0 +1,69 @@
+How to combine scoring files from the PGS Catalog
+=================================================
+
+``pgscatalog-combine`` is a CLI application that makes it easy to combine scoring files into a standardised output.
+
+The process involves:
+
+* extracting important fields from scoring files
+* doing some quality control checks
+* optionally lifting over variants to a consistent genome build
+* writing a long format / melted output file
+
+Input scoring files must follow PGS Catalog standards. The output file is useful for
+doing data science tasks, like matching variants across a scoring file and target
+genome.
+
+Installation
+------------
+
+::
+
+    $ pip install pgscatalog-combine
+
+Usage
+-----
+
+Combining PGS Catalog scoring files
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+.. tip:: It's easiest to get started by downloading scoring files in the same genome build: :doc:`download`
+
+::
+
+    $ pgscatalog-combine -s PGS000001_hmPOS_GRCh38.txt.gz PGS0001229_hmPOS_GRCh38.txt.gz -t GRCh38 -o combined.txt
+
+.. note:: If you're combining lots of files, you can compress the output automatically ``--o combined.txt.gz``
+
+Lifting over scoring files
+~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+It's possible to combine scoring files with different genome builds using liftover.
+
+.. danger:: You should only do this when combining PGS Catalog and custom scoring files, because the PGS Catalog provides harmonised data
+
+First, download chain files from UCSC:
+
+* `hg19ToHg38.over.chain.gz`_
+* `hg38ToHg19.over.chain.gz`_
+
+.. _hg19ToHg38.over.chain.gz: https://hgdownload.soe.ucsc.edu/goldenPath/hg38/liftOver/
+.. _hg38ToHg19.over.chain.gz: https://hgdownload.soe.ucsc.edu/goldenPath/hg19/liftOver/
+
+And copy them into a directory (e.g. ``my_chain_dir/``).
+
+Assuming you have a custom scoring file in GRCh37 (``my_scorefile_grch37.txt.gz``), and you want to combine it with a PGS Catalog scoring file in GRCh38.
+
+::
+
+    $ pgscatalog-combine -s PGS000001_hmPOS_GRCh38.txt.gz my_scorefile_grch37.txt.gz \
+        --chain_dir my_chain_dir/ \
+        -t GRCh38 \
+        -o combined.txt
+
+Help
+----
+
+::
+
+    $ pgscatalog-combine --help
diff --git a/docs/how-to/guides/download.rst b/docs/how-to/guides/download.rst
@@ -0,0 +1,66 @@
+How to download scoring files from the PGS Catalog
+==================================================
+
+``pgscatalog-download`` is a CLI application that makes it easy to download scoring files from the
+PGS Catalog with a mixture of PGS, publication, or trait accessions. The application:
+
+* automatically retries downloads if they fail
+* validates the checksum of downloaded scoring files
+* automatically selects scoring files aligned to a requested genome build
+
+Installation
+-------------
+
+::
+
+    $ pip install pgscatalog-download
+
+Usage
+-----
+
+Downloading PGS IDs scoring files aligned to GRCh38
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+::
+
+    $ mkdir downloads
+    $ pgscatalog-download --pgs PGS000822 PGS001229 --build GRCh38 -o downloads
+
+.. note::
+
+    Setting ``--build`` will download scoring files harmonised by the PGS Catalog. This means scoring fields have consistent fields, like genomic coordinates.
+
+Downloading all scores associated with a trait
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+To download all scores associated with Alzheimer's disease:
+
+::
+
+    $ mkdir downloads
+    $ pgscatalog-download --efo MONDO_0004975 -b GRCh38 -o downloads
+
+By default scores associated with child traits, like late-onset Alzheimer's disease, are included.
+To exclude them use:
+
+::
+
+    $ mkdir downloads
+    $ pgscatalog-download --efo MONDO_0004975 -b GRCh38 -o downloads --efo_direct
+
+Downloading all scores associated with a publication
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+If you're interested in scores from a specific publication:
+
+::
+
+    $ mkdir downloads
+    $ pgscatalog-download --pgp PGP000517 -b GRCh38 -o downloads
+
+Help
+----
+
+::
+
+    $ pgscatalog-download --help