Skip to content

Commit

Permalink
feat: adding files for varfish-server-worker (#50) (#51)
Browse files Browse the repository at this point in the history
  • Loading branch information
holtgrewe authored Jun 28, 2023
1 parent e899051 commit ba84443
Show file tree
Hide file tree
Showing 48 changed files with 925 additions and 156 deletions.
2 changes: 1 addition & 1 deletion Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -86,4 +86,4 @@ black:
.PHONY: run-snakefmt
run-snakefmt:
snakefmt --line-length 100 Snakefile
snakefmt --line-length 100 rules/*/*/*.smk rules/*/*/*.smk
snakefmt --line-length 100 rules/*/*.smk rules/*/*/*.smk rules/*/*/*/*.smk
95 changes: 68 additions & 27 deletions Snakefile
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@
from varfish_db_downloader.versions import (
DATA_VERSIONS as DV,
PACKAGE_VERSIONS as PV,
FORCE_TODAY,
TODAY,
RUNS_IN_CI,
)
Expand Down Expand Up @@ -90,7 +91,6 @@ rule all:
f"work/genes/entrez/{DV.today}/gene_info.jsonl",
f"work/genes/gnomad/{DV.gnomad_constraints}/gnomad_constraints.tsv",
f"work/genes/hgnc/{DV.today}/hgnc_info.jsonl",
f"work/genes/mim2gene/{DV.today}/mim2gene.tsv",
# reference-specific annotations
# -- background/population sequence variants and annotations thereof
# ---- GRCh37
Expand Down Expand Up @@ -157,41 +157,67 @@ rule all:
# ----- genes
f"output/full/annonars/genes-{DV.acmg_sf}+{DV.gnomad_constraints}+{DV.dbnsfp}+{DV.today}+{PV.worker}/rocksdb/IDENTITY",
# -- worker data
# ----- Genes
f"output/full/worker/genes-xlink-{DV.today}/genes-xlink.tsv",
f"output/full/worker/genes-txs-grch37-{DV.mehari_tx}/mehari-data-txs-grch37-{DV.mehari_tx}.bin.zst",
f"output/full/worker/genes-txs-grch38-{DV.mehari_tx}/mehari-data-txs-grch38-{DV.mehari_tx}.bin.zst",
f"output/full/worker/genes-regions-grch37-{DV.refseq_37}+{PV.worker}/refseq_genes.bin",
f"output/full/worker/genes-regions-grch37-{DV.ensembl_37}+{PV.worker}/ensembl_genes.bin",
f"output/full/worker/genes-regions-grch38-{DV.refseq_38}+{PV.worker}/refseq_genes.bin",
f"output/full/worker/genes-regions-grch38-{DV.ensembl_38}+{PV.worker}/ensembl_genes.bin",
f"output/full/worker/genes-xlink-{DV.today}+{PV.worker}/genes-xlink.bin",
f"output/full/worker/acmg-sf-{DV.acmg_sf}+{PV.worker}/acmg_sf.tsv",
f"output/full/worker/mim2gene-{DV.today}+{PV.worker}/mim2gene.tsv",
f"output/full/worker/masked-repeat-grch37-{DV.ucsc_rmsk_37}+{PV.worker}/masked-repeat.bin",
f"output/full/worker/masked-repeat-grch38-{DV.ucsc_rmsk_38}+{PV.worker}/masked-repeat.bin",
f"output/full/worker/masked-segdup-grch37-{DV.ucsc_genomic_super_dups_37}+{PV.worker}/masked-segdup.bin",
f"output/full/worker/masked-segdup-grch38-{DV.ucsc_genomic_super_dups_38}+{PV.worker}/masked-segdup.bin",
f"output/full/worker/bgdb-dbvar-grch37-{DV.dbvar}+{PV.worker}/bgdb-dbvar.bin",
f"output/full/worker/bgdb-dbvar-grch38-{DV.dbvar}+{PV.worker}/bgdb-dbvar.bin",
f"output/full/worker/bgdb-dgv-grch37-{DV.dgv}+{PV.worker}/bgdb-dgv.bin",
f"output/full/worker/bgdb-dgv-grch38-{DV.dgv}+{PV.worker}/bgdb-dgv.bin",
f"output/full/worker/bgdb-dgv-gs-grch37-{DV.dgv}+{PV.worker}/bgdb-dgv-gs.bin",
f"output/full/worker/bgdb-dgv-gs-grch38-{DV.dgv}+{PV.worker}/bgdb-dgv-gs.bin",
f"output/full/worker/bgdb-gnomad-grch37-{DV.gnomad_sv}+{PV.worker}/bgdb-gnomad.bin",
f"output/full/worker/bgdb-exac-grch37-{DV.exac_cnv}+{PV.worker}/bgdb-exac.bin",
f"output/full/worker/bgdb-g1k-grch37-{DV.g1k_svs}+{PV.worker}/bgdb-g1k.bin",
f"output/full/worker/clinvar-strucvars-grch37-{DV.clinvar_version}+{PV.worker}/clinvar-strucvars.bin",
f"output/full/worker/clinvar-strucvars-grch38-{DV.clinvar_version}+{PV.worker}/clinvar-strucvars.bin",
f"output/full/worker/patho-mms-grch37-{DV.patho_mms}+{PV.worker}/patho-mms.bed",
f"output/full/worker/patho-mms-grch38-{DV.patho_mms}+{PV.worker}/patho-mms.bed",
"output/full/worker/tads-grch37-dixon2015/hesc.bed",
"output/full/worker/tads-grch38-dixon2015/hesc.bed",
# -- mehari data
f"output/full/mehari/genes-xlink-{DV.today}/genes-xlink.tsv",
f"output/full/mehari/genes-txs-grch37-{DV.mehari_tx}/mehari-data-txs-grch37-{DV.mehari_tx}.bin.zst",
f"output/full/mehari/genes-txs-grch38-{DV.mehari_tx}/mehari-data-txs-grch38-{DV.mehari_tx}.bin.zst",
# ----- HPO
f"output/full/viguno/hpo-{DV.hpo}+{PV.viguno}/hp.obo",
f"output/full/viguno/hpo-{DV.hpo}+{PV.viguno}/phenotype.hpoa",
f"output/full/viguno/hpo-{DV.hpo}+{PV.viguno}/phenotype_to_genes.txt",
f"output/full/viguno/hpo-{DV.hpo}+{PV.viguno}/hpo.bin",
f"output/full/viguno/hpo-{DV.hpo}+{PV.viguno}/scores-fun-sim-avg-resnik-gene/IDENTITY",
# ----- background/population structural variants and annotations thereof
f"output/full/worker/annos/strucvars/dbvar-grch37-{DV.dbvar}/dbvar.bed.gz",
f"output/full/worker/annos/strucvars/dbvar-grch38-{DV.dbvar}/dbvar.bed.gz",
f"output/full/worker/annos/strucvars/dgv-grch37-{DV.dgv}/dgv.bed.gz",
f"output/full/worker/annos/strucvars/dgv-grch38-{DV.dgv}/dgv.bed.gz",
f"output/full/worker/annos/strucvars/dgv-gs-grch37-{DV.dgv_gs}/dgv-gs.bed.gz",
f"output/full/worker/annos/strucvars/dgv-gs-grch38-{DV.dgv_gs}/dgv-gs.bed.gz",
f"output/full/worker/annos/strucvars/exac-grch37-{DV.exac_cnv}/exac.bed.gz",
f"output/full/worker/annos/strucvars/g1k-grch37-{DV.g1k_svs}/g1k.bed.gz",
f"output/full/worker/annos/strucvars/gnomad-grch37-{DV.gnomad_sv}/gnomad.bed.gz",
f"output/full/tracks/track-strucvars-dbvar-grch37-{DV.dbvar}+{DV.tracks}/dbvar.bed.gz",
f"output/full/tracks/track-strucvars-dbvar-grch38-{DV.dbvar}+{DV.tracks}/dbvar.bed.gz",
f"output/full/tracks/track-strucvars-dgv-grch37-{DV.dgv}+{DV.tracks}/dgv.bed.gz",
f"output/full/tracks/track-strucvars-dgv-grch38-{DV.dgv}+{DV.tracks}/dgv.bed.gz",
f"output/full/tracks/track-strucvars-dgv-gs-grch37-{DV.dgv_gs}+{DV.tracks}/dgv-gs.bed.gz",
f"output/full/tracks/track-strucvars-dgv-gs-grch38-{DV.dgv_gs}+{DV.tracks}/dgv-gs.bed.gz",
f"output/full/tracks/track-strucvars-exac-grch37-{DV.exac_cnv}+{DV.tracks}/exac.bed.gz",
f"output/full/tracks/track-strucvars-g1k-grch37-{DV.g1k_svs}+{DV.tracks}/g1k.bed.gz",
f"output/full/tracks/track-strucvars-gnomad-grch37-{DV.gnomad_sv}+{DV.tracks}/gnomad.bed.gz",
# ----- known pathogenic MMS
f"output/full/worker/annos/strucvars/patho-mms-grch37-{DV.patho_mms}/patho-mms.bed",
f"output/full/worker/annos/strucvars/patho-mms-grch38-{DV.patho_mms}/patho-mms.bed",
f"output/full/tracks/track-strucvars-patho-mms-grch37-{DV.patho_mms}+{DV.tracks}/patho-mms.bed",
f"output/full/tracks/track-strucvars-patho-mms-grch38-{DV.patho_mms}+{DV.tracks}/patho-mms.bed",
# ----- problematic regions (rmsk, genomicSuperDups, altSeqLiftOverPsl, fixSeqLiftOverPsl)
f"output/full/worker/annos/features/ucsc-genomicsuperdups-grch37-{DV.ucsc_genomic_super_dups_37}/genomicSuperDups.bed.gz",
f"output/full/worker/annos/features/ucsc-genomicsuperdups-grch38-{DV.ucsc_genomic_super_dups_38}/genomicSuperDups.bed.gz",
f"output/full/worker/annos/features/ucsc-rmsk-grch37-{DV.ucsc_rmsk_37}/rmsk.bed.gz",
f"output/full/worker/annos/features/ucsc-rmsk-grch38-{DV.ucsc_rmsk_38}/rmsk.bed.gz",
f"output/full/worker/annos/features/ucsc-altseqliftoverpsl-grch37-{DV.ucsc_alt_seq_liftover_37}/altSeqLiftOverPsl.bed.gz",
f"output/full/worker/annos/features/ucsc-altseqliftoverpsl-grch38-{DV.ucsc_alt_seq_liftover_38}/altSeqLiftOverPsl.bed.gz",
f"output/full/worker/annos/features/ucsc-fixseqliftoverpsl-grch37-{DV.ucsc_fix_seq_liftover_37}/fixSeqLiftOverPsl.bed.gz",
f"output/full/worker/annos/features/ucsc-fixseqliftoverpsl-grch38-{DV.ucsc_fix_seq_liftover_38}/fixSeqLiftOverPsl.bed.gz",
f"output/full/tracks/track-features-ucsc-genomicsuperdups-grch37-{DV.ucsc_genomic_super_dups_37}+{DV.tracks}/genomicSuperDups.bed.gz",
f"output/full/tracks/track-features-ucsc-genomicsuperdups-grch38-{DV.ucsc_genomic_super_dups_38}+{DV.tracks}/genomicSuperDups.bed.gz",
f"output/full/tracks/track-features-ucsc-rmsk-grch37-{DV.ucsc_rmsk_37}+{DV.tracks}/rmsk.bed.gz",
f"output/full/tracks/track-features-ucsc-rmsk-grch38-{DV.ucsc_rmsk_38}+{DV.tracks}/rmsk.bed.gz",
f"output/full/tracks/track-features-ucsc-altseqliftoverpsl-grch37-{DV.ucsc_alt_seq_liftover_37}+{DV.tracks}/altSeqLiftOverPsl.bed.gz",
f"output/full/tracks/track-features-ucsc-altseqliftoverpsl-grch38-{DV.ucsc_alt_seq_liftover_38}+{DV.tracks}/altSeqLiftOverPsl.bed.gz",
f"output/full/tracks/track-features-ucsc-fixseqliftoverpsl-grch37-{DV.ucsc_fix_seq_liftover_37}+{DV.tracks}/fixSeqLiftOverPsl.bed.gz",
f"output/full/tracks/track-features-ucsc-fixseqliftoverpsl-grch38-{DV.ucsc_fix_seq_liftover_38}+{DV.tracks}/fixSeqLiftOverPsl.bed.gz",
# ----- tads
"output/full/worker/annos/strucvars/tads-grch37-dixon2015/hesc.bed",
"output/full/worker/annos/strucvars/tads-grch38-dixon2015/hesc.bed",
f"output/full/tracks/track-tads-grch37-dixon2015+{DV.tracks}/hesc.bed",
f"output/full/tracks/track-tads-grch38-dixon2015+{DV.tracks}/hesc.bed",
#
# == development (reduced data) directories =============================================
#
Expand Down Expand Up @@ -221,6 +247,9 @@ rule all:
f"output/reduced-dev/annonars/gnomad-exomes-grch38-{DV.gnomad_v2}+{PV.annonars}/rocksdb/IDENTITY",
f"output/reduced-dev/annonars/gnomad-genomes-grch37-{DV.gnomad_v2}+{PV.annonars}/rocksdb/IDENTITY",
f"output/reduced-dev/annonars/gnomad-genomes-grch38-{DV.gnomad_v3}+{PV.annonars}/rocksdb/IDENTITY",
# -- mehari
f"output/reduced-dev/mehari/freqs-grch37-{DV.gnomad_v2}+{DV.gnomad_v2}+{DV.gnomad_mtdna}+{DV.helixmtdb}+{PV.annonars}/rocksdb/IDENTITY",
f"output/reduced-dev/mehari/freqs-grch38-{DV.gnomad_v3}+{DV.gnomad_v2}+{DV.gnomad_mtdna}+{DV.helixmtdb}+{PV.annonars}/rocksdb/IDENTITY",
#
# == exomes (reduced data) directories ==================================================
#
Expand Down Expand Up @@ -250,6 +279,9 @@ rule all:
f"output/reduced-exomes/annonars/gnomad-exomes-grch38-{DV.gnomad_v2}+{PV.annonars}/rocksdb/IDENTITY",
f"output/reduced-exomes/annonars/gnomad-genomes-grch37-{DV.gnomad_v2}+{PV.annonars}/rocksdb/IDENTITY",
f"output/reduced-exomes/annonars/gnomad-genomes-grch38-{DV.gnomad_v3}+{PV.annonars}/rocksdb/IDENTITY",
# -- mehari
f"output/reduced-exomes/mehari/freqs-grch37-{DV.gnomad_v2}+{DV.gnomad_v2}+{DV.gnomad_mtdna}+{DV.helixmtdb}+{PV.annonars}/rocksdb/IDENTITY",
f"output/reduced-exomes/mehari/freqs-grch38-{DV.gnomad_v3}+{DV.gnomad_v2}+{DV.gnomad_mtdna}+{DV.helixmtdb}+{PV.annonars}/rocksdb/IDENTITY",


# ===============================================================================================
Expand Down Expand Up @@ -289,6 +321,7 @@ include: "rules/work/annos/strucvars/dgv.smk"
include: "rules/work/annos/strucvars/exac.smk"
include: "rules/work/annos/strucvars/g1k.smk"
include: "rules/work/annos/strucvars/gnomad.smk"
include: "rules/work/annos/strucvars/clinvar.smk"
# -- output directory ---------------------------------------------------------------------------
# ---- mehari
include: "rules/output/mehari/freqs.smk"
Expand All @@ -306,10 +339,18 @@ include: "rules/output/annonars/gnomad_mtdna.smk"
include: "rules/output/annonars/helix.smk"
include: "rules/output/annonars/genes.smk"
# ---- worker
# ------ global
include: "rules/output/worker/patho_mms.smk"
include: "rules/output/worker/clinvar.smk"
include: "rules/output/worker/genes_regions.smk"
include: "rules/output/worker/hgnc.smk"
include: "rules/output/worker/acmg.smk"
include: "rules/output/worker/mim2gene.smk"
include: "rules/output/worker/masked.smk"
include: "rules/output/worker/bgdb.smk"
include: "rules/output/worker/tads.smk"
# -- reduced output directory (dev/exomes) ------------------------------------------------------
# ---- bed file
include: "rules/reduced/annonars.smk"
include: "rules/reduced/hpo.smk"
include: "rules/reduced/targets.smk"
include: "rules/reduced/mehari.smk"
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
dc.format: text/tsv
dc.identifier: genes/acmg/sf:3.1
dc.identifier: varfish-server-worker/acmg_sf:3.1
dc.title: ACMG Secondary Findings (SF) Gene List (v3.1)
dc.description: >
This is version 3.1 of the ACMG gene list for reporting incidental
Expand Down
File renamed without changes.
25 changes: 0 additions & 25 deletions data/patho-mms/20220730/patho-mms-grch38.spec.yaml

This file was deleted.

9 changes: 9 additions & 0 deletions download_urls.yml
Original file line number Diff line number Diff line change
@@ -1,3 +1,12 @@
- url: https://github.com/bihealth/annonars-data-clinvar/releases/download/clinvar-weekly-20230625/clinvar-strucvar-grch37-2023-0625+0.6.3.tar.gz
excerpt_strategy:
strategy: no-excerpt
count: null
- url: https://github.com/bihealth/annonars-data-clinvar/releases/download/clinvar-weekly-20230625/clinvar-strucvar-grch38-2023-0625+0.6.3.tar.gz
excerpt_strategy:
strategy: no-excerpt
count: null

- url: https://github.com/bihealth/mehari-data-tx/releases/download/v0.2.2/mehari-data-txs-grch37-0.2.2.bin.zst
excerpt_strategy:
strategy: no-excerpt
Expand Down
2 changes: 1 addition & 1 deletion environment.yml
Original file line number Diff line number Diff line change
Expand Up @@ -43,6 +43,6 @@ dependencies:
- annonars =0.12.7
- viguno =0.1.6
- mehari =0.5.7
- varfish-server-worker =0.7.0
- varfish-server-worker =0.8.0
# S3 uploads
- s5cmd =2.1.0
Git LFS file not shown
3 changes: 3 additions & 0 deletions excerpt-data/2c4af2ee68c51be6/url.txt
Git LFS file not shown
Git LFS file not shown
3 changes: 3 additions & 0 deletions excerpt-data/d344a91c116abfac/url.txt
Git LFS file not shown
2 changes: 1 addition & 1 deletion rules/output/annonars/genes.smk
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@

rule output_annonars_genes: # -- build annonars genes RocksDB file
input:
acmg_sf="data/acmg/{v_acmg_sf}/acmg.tsv",
acmg_sf="data/acmg_sf/{v_acmg_sf}/acmg_sf.tsv",
gnomad_constraints="work/genes/gnomad/{v_gnomad_constraints}/gnomad_constraints.tsv",
dbnsfp="work/genes/dbnsfp/{v_dbnsfp}/genes.tsv.gz",
hgnc="work/genes/hgnc/{date}/hgnc_info.jsonl",
Expand Down
15 changes: 15 additions & 0 deletions rules/output/worker/acmg.smk
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
## Rules to prepare acmg listing for worker.


rule acmg_prepare_worker:
input:
tsv="data/acmg_sf/{v_acmg_sf}/acmg_sf.tsv",
spec="data/acmg_sf/{v_acmg_sf}/acmg_sf.spec.yaml",
output:
tsv=f"output/full/worker/acmg-sf-{{v_acmg_sf}}+{PV.worker}/acmg_sf.tsv",
spec=f"output/full/worker/acmg-sf-{{v_acmg_sf}}+{PV.worker}/acmg_sf.spec.yaml",
shell:
r"""
cp {input.tsv} {output.tsv}
cp {input.spec} {output.spec}
"""
Loading

0 comments on commit ba84443

Please sign in to comment.