Skip to content

Commit

Permalink
feat: binary conversion with annonars and worker (#35)
Browse files Browse the repository at this point in the history
  • Loading branch information
holtgrewe committed Jun 7, 2023
1 parent 2f6deac commit d731bf5
Show file tree
Hide file tree
Showing 26 changed files with 129 additions and 1,599 deletions.
102 changes: 73 additions & 29 deletions Snakefile
Original file line number Diff line number Diff line change
Expand Up @@ -6,13 +6,17 @@
# ``varfish-server-worker`` and is used in the backend for filtering and/or exposed to the
# user via a REST API.

from varfish_db_downloader.data_versions import DATA_VERSIONS as DV
from varfish_db_downloader.versions import DATA_VERSIONS as DV, PACKAGE_VERSIONS as PV

# The prefix to use for all shell commands.
SHELL_PREFIX = "export LC_ALL=C; set -x -euo pipefail;"
# Setup the shell prefix by default.
shell.prefix(SHELL_PREFIX)

# Regular expression for genome release.
RE_GENOME = r"grch(37|38)"
# Regular expression for versions.
RE_VERSION = r"\d+(\.\d+)*"

# ===============================================================================================
# Test Mode
Expand Down Expand Up @@ -58,6 +62,8 @@ rule help:
## all -- run all rules
rule all:
input:
# == work directory =====================================================================
#
# genes
f"work/genes/dbnsfp/{DV.dbnsfp}/genes.tsv.gz",
f"work/genes/ensembl/{DV.ensembl}/ensembl_xlink.tsv",
Expand All @@ -76,7 +82,7 @@ rule all:
f"work/download/annos/grch37/seqvars/dbnsfp/{DV.dbnsfp}c/LICENSE.txt",
f"work/download/annos/grch37/seqvars/dbscsnv/{DV.dbscsnv}/dbscSNV{DV.dbscsnv}.chr1",
f"work/download/annos/grch37/seqvars/dbsnp/{DV.dbsnp}/dbsnp.vcf.gz",
"work/annos/grch37/seqvars/helixmtdb/20200327/helixmtdb.vcf.gz",
f"work/annos/grch37/seqvars/helixmtdb/{DV.helixmtdb}/helixmtdb.vcf.gz",
f"work/annos/grch37/seqvars/gnomad_mtdna/{DV.gnomad_mtdna}/gnomad_mtdna.vcf.gz",
f"work/annos/grch37/seqvars/gnomad_exomes/{DV.gnomad_v2}/.done",
f"work/annos/grch37/seqvars/gnomad_genomes/{DV.gnomad_v2}/.done",
Expand All @@ -86,10 +92,10 @@ rule all:
# NB: dbNSFP is dual reference (for download)
# NB: dbscSNV is dual reference (for download)
f"work/download/annos/grch37/seqvars/dbsnp/{DV.dbsnp}/dbsnp.vcf.gz",
"work/annos/grch38/seqvars/helixmtdb/20200327/helixmtdb.vcf.gz",
f"work/annos/grch38/seqvars/gnomad_mtdna/{DV.gnomad_mtdna}/gnomad_mtdna.vcf.gz",
f"work/annos/grch38/seqvars/gnomad_exomes/{DV.gnomad_v2}/.done",
f"work/annos/grch38/seqvars/gnomad_genomes/{DV.gnomad_v3}/.done",
f"work/download/annos/grch38/seqvars/helixmtdb/{DV.helixmtdb}/helixmtdb.vcf.gz",
f"work/download/annos/grch38/seqvars/gnomad_mtdna/{DV.gnomad_mtdna}/gnomad_mtdna.vcf.gz",
f"work/download/annos/grch38/seqvars/gnomad_exomes/{DV.gnomad_v2}/.done",
f"work/download/annos/grch38/seqvars/gnomad_genomes/{DV.gnomad_v3}/.done",
# -- background/population structural variants and annoations thereof
# ---- GRCh37
f"work/annos/grch37/strucvars/dbvar/{DV.dbvar}/dbvar.bed.gz",
Expand Down Expand Up @@ -122,38 +128,76 @@ rule all:
f"work/annos/grch38/features/ucsc/{DV.ucsc_rmsk_38}/rmsk.bed.gz",
f"work/annos/grch38/features/ucsc/{DV.ucsc_alt_seq_liftover_38}/altSeqLiftOverPsl.bed.gz",
f"work/annos/grch38/features/ucsc/{DV.ucsc_fix_seq_liftover_38}/fixSeqLiftOverPsl.bed.gz",
#
# == output directory ===================================================================
#
# -- mehari data
# ---- frequencies (via annonars)
f"output/mehari/freqs-grch37-{DV.gnomad_v2}+{DV.gnomad_v2}+{DV.gnomad_mtdna}+{DV.helixmtdb}+{PV.annonars}/rocksdb/IDENTITY",
f"output/mehari/freqs-grch38-{DV.gnomad_v3}+{DV.gnomad_v2}+{DV.gnomad_mtdna}+{DV.helixmtdb}+{PV.annonars}/rocksdb/IDENTITY",
# # -- varfish-server-worker data
# # ---- CADD
# f"output/worker/annos/seqvars/cadd-grch37-{DV.cadd}+{PV.annonars}/rocksdb/IDENTITY",
# f"output/worker/annos/seqvars/cadd-grch38-{DV.cadd}+{PV.annonars}/rocksdb/IDENTITY",
# # ---- dbSNP
# f"output/worker/annos/seqvars/dbsnp-grch37-{DV.dbsnp}+{PV.annonars}/rocksdb/IDENTITY",
# f"output/worker/annos/seqvars/dbsnp-grch38-{DV.dbsnp}+{PV.annonars}/rocksdb/IDENTITY",
# # ---- dbNSFP
# f"output/worker/annos/seqvars/dbnsfp-grch37-{DV.dbnsfp}+{PV.annonars}/rocksdb/IDENTITY",
# f"output/worker/annos/seqvars/dbnsfp-grch38-{DV.dbnsfp}+{PV.annonars}/rocksdb/IDENTITY",
# # ---- dbscSNV
# f"output/worker/annos/seqvars/dbscsnv-grch37-{DV.dbscsnv}+{PV.annonars}/rocksdb/IDENTITY",
# f"output/worker/annos/seqvars/dbscsnv-grch38-{DV.dbscsnv}+{PV.annonars}/rocksdb/IDENTITY",
# # ---- gnomAD mtDNA
# f"output/worker/annos/seqvars/gnomad-mtdna-grch37-{DV.gnomad_mtdna}+{PV.annonars}/rocksdb/IDENTITY",
# f"output/worker/annos/seqvars/gnomad-mtdna-grch38-{DV.gnomad_mtdna}+{PV.annonars}/rocksdb/IDENTITY",
# # ---- gnomAD exomes
# f"output/worker/annos/seqvars/gnomad-exomes-grch37-{DV.gnomad_v2}+{PV.annonars}/rocksdb/IDENTITY",
# f"output/worker/annos/seqvars/gnomad-exomes-grch38-{DV.gnomad_v2}+{PV.annonars}/rocksdb/IDENTITY",
# # ---- gnomAD genomes
# f"output/worker/annos/seqvars/gnomad-genomes-grch37-{DV.gnomad_v2}+{PV.annonars}/rocksdb/IDENTITY",
# f"output/worker/annos/seqvars/gnomad-genomes-grch38-{DV.gnomad_v3}+{PV.annonars}/rocksdb/IDENTITY",
# # ---- HelixMtDb
# f"output/worker/annos/seqvars/helixmtdb-grch37-{DV.helixmtdb}+{PV.annonars}/rocksdb/IDENTITY",
# f"output/worker/annos/seqvars/helixmtdb-grch38-{DV.helixmtdb}+{PV.annonars}/rocksdb/IDENTITY",
# # ---- UCSC conservation
# f"output/worker/annos/seqvars/cons-grch37-{DV.ucsc_cons_37}+{PV.annonars}/rocksdb/IDENTITY",
# f"output/worker/annos/seqvars/cons-grch38-{DV.ucsc_cons_38}+{PV.annonars}/rocksdb/IDENTITY",


# ===============================================================================================
# Modular Snakefile Includes
# ===============================================================================================


# -- work directory -----------------------------------------------------------------------------
# Gene-related rules.
include: "rules/genes/dbnsfp.smk"
include: "rules/genes/ensembl.smk"
include: "rules/genes/gnomad.smk"
include: "rules/genes/hgnc.smk"
include: "rules/genes/ncbi.smk"
include: "rules/work/genes/dbnsfp.smk"
include: "rules/work/genes/ensembl.smk"
include: "rules/work/genes/gnomad.smk"
include: "rules/work/genes/hgnc.smk"
include: "rules/work/genes/ncbi.smk"
# Reference sequence--related rules.
include: "rules/reference/human.smk"
include: "rules/work/reference/human.smk"
# Features (position and not variant specific).
include: "rules/annos/features/cons.smk"
include: "rules/annos/features/ensembl.smk"
include: "rules/annos/features/refseq.smk"
include: "rules/annos/features/tads.smk"
include: "rules/annos/features/ucsc.smk"
include: "rules/work/annos/features/cons.smk"
include: "rules/work/annos/features/ensembl.smk"
include: "rules/work/annos/features/refseq.smk"
include: "rules/work/annos/features/tads.smk"
include: "rules/work/annos/features/ucsc.smk"
# Sequence variants and annotations.
include: "rules/annos/seqvars/cadd.smk"
include: "rules/annos/seqvars/dbnsfp.smk"
include: "rules/annos/seqvars/dbscsnv.smk"
include: "rules/annos/seqvars/dbsnp.smk"
include: "rules/annos/seqvars/gnomad_mtdna.smk"
include: "rules/annos/seqvars/gnomad_nuclear.smk"
include: "rules/annos/seqvars/helix.smk"
include: "rules/work/annos/seqvars/cadd.smk"
include: "rules/work/annos/seqvars/dbnsfp.smk"
include: "rules/work/annos/seqvars/dbscsnv.smk"
include: "rules/work/annos/seqvars/dbsnp.smk"
include: "rules/work/annos/seqvars/gnomad_mtdna.smk"
include: "rules/work/annos/seqvars/gnomad_nuclear.smk"
include: "rules/work/annos/seqvars/helix.smk"
# Structural variant related.
include: "rules/annos/strucvars/dbvar.smk"
include: "rules/annos/strucvars/dgv.smk"
include: "rules/annos/strucvars/exac.smk"
include: "rules/annos/strucvars/g1k.smk"
include: "rules/annos/strucvars/gnomad.smk"
include: "rules/work/annos/strucvars/dbvar.smk"
include: "rules/work/annos/strucvars/dgv.smk"
include: "rules/work/annos/strucvars/exac.smk"
include: "rules/work/annos/strucvars/g1k.smk"
include: "rules/work/annos/strucvars/gnomad.smk"
# -- output directory ---------------------------------------------------------------------------
include: "rules/output/mehari/freqs.smk"
105 changes: 0 additions & 105 deletions rules/annos/features/cons.smk

This file was deleted.

63 changes: 0 additions & 63 deletions rules/annos/features/ensembl.smk

This file was deleted.

Loading

0 comments on commit d731bf5

Please sign in to comment.