Joon-Klaps
diff --git a/‎.gitignore
+1 b/‎.gitignore
+1
diff --git a/‎.nf-core.yml
-4 b/‎.nf-core.yml
-4
diff --git a/‎CHANGELOG.md
+3 b/‎CHANGELOG.md
+3
diff --git a/‎README.md
+6-3 b/‎README.md
+6-3
diff --git a/‎assets/custom_table_headers.yml
+71-62 b/‎assets/custom_table_headers.yml
+71-62
diff --git a/‎assets/mqc_comment/ivar_variants_header_mqc.txt ‎assets/ivar_variants_header_mqc.txt b/‎assets/mqc_comment/ivar_variants_header_mqc.txt ‎assets/ivar_variants_header_mqc.txt
diff --git a/‎assets/methods_description_template.yml
-5 b/‎assets/methods_description_template.yml
-5
diff --git a/‎assets/mqc_comment/blast_mqc.txt
-5 b/‎assets/mqc_comment/blast_mqc.txt
-5
diff --git a/‎assets/mqc_comment/checkv_mqc.txt
-5 b/‎assets/mqc_comment/checkv_mqc.txt
-5
diff --git a/‎assets/mqc_comment/clusters_summary_mqc.txt
-18 b/‎assets/mqc_comment/clusters_summary_mqc.txt
-18
diff --git a/‎assets/mqc_comment/contig_overview_mqc.txt
-5 b/‎assets/mqc_comment/contig_overview_mqc.txt
-5
diff --git a/‎assets/mqc_comment/mapping_constrains_mqc.txt
-5 b/‎assets/mqc_comment/mapping_constrains_mqc.txt
-5
diff --git a/‎assets/mqc_comment/mapping_constrains_summary_mqc.txt
-5 b/‎assets/mqc_comment/mapping_constrains_summary_mqc.txt
-5
diff --git a/‎assets/mqc_comment/quast_mqc.txt
-4 b/‎assets/mqc_comment/quast_mqc.txt
-4
diff --git a/‎assets/mqc_comment/sample_metadata_mqc.txt
-5 b/‎assets/mqc_comment/sample_metadata_mqc.txt
-5
diff --git a/‎assets/multiqc_config.yml
+21-26 b/‎assets/multiqc_config.yml
+21-26
diff --git a/‎assets/nf-core-viralgenie_logo_light.png
-76.6 KB b/‎assets/nf-core-viralgenie_logo_light.png
-76.6 KB
diff --git a/‎assets/schemas/mapping_constrains.json
+1-1 b/‎assets/schemas/mapping_constrains.json
+1-1
@@ -8,3 +8,4 @@ testing*
 *.pyc
 .vscode/
 null/
+__pycache__/
@@ -34,10 +34,6 @@ lint:
     - CITATIONS.md
     - conf/test.config
     - conf/test_full.config
-    - lib/Utils.groovy
-    - lib/WorkflowMain.groovy
-    - lib/NfcoreTemplate.groovy
-    - lib/WorkflowViralgenie.groovy
   actions_ci: false
 template:
   name: viralgenie
 
@@ -11,13 +11,16 @@ Initial release of Joon-Klaps/viralgenie, created with the [nf-core](https://nf-
 
 - Set default umitools dedup strategy to cluster ([#126](https://github.com/Joon-Klaps/viralgenie/pull/126))
 - Include both krakenreport &nodes.dmp in taxonomy filtering ([#128](https://github.com/Joon-Klaps/viralgenie/pull/128))
+- Include coverage plot & subset contig results in mqc report ([#129](https://github.com/Joon-Klaps/viralgenie/pull/129))
 - Add Sspace indiv to each assembler seperatly ([#132](https://github.com/Joon-Klaps/viralgenie/pull/132))
 - Add read & contig decomplexification using prinseq++  ([#133](https://github.com/Joon-Klaps/viralgenie/pull/133))
 - Add option to filter contig clusters based on cumulative read coverage ([#138](https://github.com/Joon-Klaps/viralgenie/pull/138))
+- Reffurbish mqc implementation ([#139](https://github.com/Joon-Klaps/viralgenie/pull/139))
 - Adding mash-screen output to result table ([#140](https://github.com/Joon-Klaps/viralgenie/pull/140))
 - Add logic to allow samples with no reference hits to be analysed ([#141](https://github.com/Joon-Klaps/viralgenie/pull/141))
 - Add visualisation for hybrid scaffold ([#143](https://github.com/Joon-Klaps/viralgenie/pull/143))
 
+
 ### `Fixed`
 
 - OOM with longer contigs for lowcov_to_reference, uses more RAM now ([#125](https://github.com/Joon-Klaps/viralgenie/pull/125))
 
@@ -127,15 +127,18 @@ For further information or help, don't hesitate to get in touch on the [Slack `#
 
 ## Citations
 
-<!-- TODO nf-core: Add citation for pipeline after first release. Uncomment lines below and update Zenodo doi and badge at the top of this file. -->
 <!-- If you use nf-core/viralgenie for your analysis, please cite it using the following doi: [10.5281/zenodo.XXXXXX](https://doi.org/10.5281/zenodo.XXXXXX) -->
+>[!WARNING]
+> Viralgenie is currently not Published. Please cite as:
+> Klaps J, Lemey P, Kafetzopoulou L. Viralgenie: A metagenomics analysis pipeline for eukaryotic viruses. __Github__ https://github.com/Joon-Klaps/viralgenie
+
 
 An extensive list of references for the tools used by the pipeline can be found in the [`CITATIONS.md`](https://joon-klaps.github.io/viralgenie/latest/CITATIONS) file.
 
-You can cite the `nf-core` publication as follows:
+<!-- You can cite the `nf-core` publication as follows:
 
 > **The nf-core framework for community-curated bioinformatics pipelines.**
 >
 > Philip Ewels, Alexander Peltzer, Sven Fillinger, Harshil Patel, Johannes Alneberg, Andreas Wilm, Maxime Ulysse Garcia, Paolo Di Tommaso & Sven Nahnsen.
 >
-> _Nat Biotechnol._ 2020 Feb 13. doi: [10.1038/s41587-020-0439-x](https://dx.doi.org/10.1038/s41587-020-0439-x).
+> _Nat Biotechnol._ 2020 Feb 13. doi: [10.1038/s41587-020-0439-x](https://dx.doi.org/10.1038/s41587-020-0439-x). -->
@@ -1,60 +1,65 @@
-umitools_dedup:
-  - input_reads: "mapped reads"
-  - removed_reads
-  - output_reads: "deduplicated reads"
-  - positions_deduplicated
-  - max_umi_per_pos
-  - mean_umi_per_pos
-  - total_umis
-  - percent_passing_dedup: "% passing dedup"
-  - unique_umis
-picard_dups:
-  - UNPAIRED_READS_EXAMINED: "unpaired reads examined"
-  - READ_PAIR_OPTICAL_DUPLICATES: "read pair optical duplicates"
-  - UNMAPPED_READS: "unmapped reads"
-  - ESTIMATED_LIBRARY_SIZE: "estimated library size"
-  - UNPAIRED_READ_DUPLICATES: "unpaired read duplicates"
-  - SECONDARY_OR_SUPPLEMENTARY_RDS: "secondary or supplementary rds"
-  - READ_PAIRS_EXAMINED: "read pairs examined"
-  - READ_PAIR_DUPLICATES: "read pair duplicates"
-  - PERCENT_DUPLICATION: "% duplication"
-  - LIBRARY: "library"
-samtools_stats:
-  - sequences
-  - reads_paired_percent: "reads paired %"
-  - average_length
-  - is_sorted
-  - bases_mapped_(cigar)
-  - reads_QC_failed_percent: "reads QC failed %"
-  - reads_unmapped
-  - reads_unmapped_percent: "reads unmapped %"
-  - reads_properly_paired_percent: "reads properly paired %"
-  - average_quality
-  - reads_paired
-  - non-primary_alignments
-  - supplementary_alignments
-  - reads_mapped
-  - reads_mapped_percent: "reads mapped %"
-  - bases_trimmed
-  - bases_duplicated
-  - reads_properly_paired
-  - outward_oriented_pairs
-  - reads_duplicated
-  - reads_duplicated_percent: "reads duplicated %"
-  - bases_mapped
-  - insert_size_average
-  - insert_size_standard_deviation
-  - inward_oriented_pairs
-  - error_rate
-  - mismatches
-  - reads_MQ0
-  - total_length
-  - reads_QC_failed
+failed_mapped:
+  - mapped reads
+umitools:
+  - multiqc_umitools_dedup:
+      - input_reads: "mapped reads"
+      - removed_reads
+      - output_reads: "deduplicated reads"
+      - positions_deduplicated
+      - max_umi_per_pos
+      - mean_umi_per_pos
+      - total_umis
+      - percent_passing_dedup: "% passing dedup"
+      - unique_umis
+picard:
+  - multiqc_picard_dups:
+      - UNPAIRED_READS_EXAMINED: "unpaired reads examined"
+      - READ_PAIR_OPTICAL_DUPLICATES: "read pair optical duplicates"
+      - UNMAPPED_READS: "unmapped reads"
+      - ESTIMATED_LIBRARY_SIZE: "estimated library size"
+      - UNPAIRED_READ_DUPLICATES: "unpaired read duplicates"
+      - SECONDARY_OR_SUPPLEMENTARY_RDS: "secondary or supplementary rds"
+      - READ_PAIRS_EXAMINED: "read pairs examined"
+      - READ_PAIR_DUPLICATES: "read pair duplicates"
+      - PERCENT_DUPLICATION: "% duplication"
+      - LIBRARY: "library"
+samtools:
+  - multiqc_samtools_stats:
+      - sequences: "reads mapped"
+      - reads_paired_percent: "reads paired %"
+      - average_length
+      - is_sorted
+      - bases_mapped_(cigar)
+      - reads_QC_failed_percent: "reads QC failed %"
+      - reads_unmapped
+      - reads_unmapped_percent: "reads unmapped %"
+      - reads_properly_paired_percent: "reads properly paired %"
+      - average_quality
+      - reads_paired
+      - non-primary_alignments
+      - supplementary_alignments
+      - reads_mapped
+      - reads_mapped_percent: "reads mapped %"
+      - bases_trimmed
+      - bases_duplicated
+      - reads_properly_paired
+      - outward_oriented_pairs
+      - reads_duplicated
+      - reads_duplicated_percent: "reads duplicated %"
+      - bases_mapped
+      - insert_size_average
+      - insert_size_standard_deviation
+      - inward_oriented_pairs
+      - error_rate
+      - mismatches
+      - reads_MQ0
+      - total_length
+      - reads_QC_failed
 ivar_variants:
   - INS: "raw inserts"
   - SNP: "raw SNPs"
   - DEL: "raw deletions"
-bcftools_stats:
+bcftools:
   - number_of_indels: "number of indels"
   - number_of_samples: "number of samples"
   - number_of_SNPs: "number of SNPs"
@@ -83,12 +88,16 @@ bcftools_stats:
   - substitution_type_G>A: "substitution G->A"
   - substitution_type_A>G: "substitution A->G"
 general_stats:
-  - mosdepth-1_x_pc: "mosdepth 1X coverage"
-  - mosdepth-5_x_pc: "mosdepth 5X coverage"
-  - mosdepth-10_x_pc: "mosdepth 10X coverage"
-  - mosdepth-30_x_pc: "mosdepth 30X coverage"
-  - mosdepth-30_x_pc: "mosdepth 50X coverage"
-  - mosdepth-median_coverage: "mosdepth Median read depth"
-  - mosdepth-mean_coverage: "mosdepth Mean read depth"
-  - mosdepth-min_coverage: "mosdepth Min read depth"
-  - mosdepth-max_coverage: "mosdepth Max read depth"
+  - "CLUSTER: mosdepth.mean_coverage": "mosdepth Mean read depth"
+  - "CLUSTER: mosdepth.min_coverage": "mosdepth Min read depth"
+  - "CLUSTER: mosdepth.max_coverage": "mosdepth Max read depth"
+  - "CLUSTER: mosdepth.median_coverage": "mosdepth Median read depth"
+  - "CLUSTER: mosdepth.1_x_pc": "mosdepth 1X coverage"
+  - "CLUSTER: mosdepth.5_x_pc": "mosdepth 5X coverage"
+  - "CLUSTER: mosdepth.10_x_pc": "mosdepth 10X coverage"
+  - "CLUSTER: mosdepth.50_x_pc": "mosdepth 50X coverage"
+  - "CLUSTER: mosdepth.100_x_pc": "mosdepth 100X coverage"
+  - "CLUSTER: mosdepth.200_x_pc": "mosdepth 200X coverage"
+  - "CLUSTER: mosdepth.500_x_pc": "mosdepth 500X coverage"
+  - "CLUSTER: mosdepth.750_x_pc": "mosdepth 750X coverage"
+  - "CLUSTER: mosdepth.1000_x_pc": "mosdepth 1000X coverage"
@@ -3,7 +3,6 @@ description: "Suggested text and references to use when describing pipeline usag
 section_name: "Joon-Klaps/viralgenie Methods Description"
 section_href: "https://github.com/Joon-Klaps/viralgenie"
 plot_type: "html"
-## TODO nf-core: Update the HTML below to your preferred methods description, e.g. add publication citation for this pipeline
 ## You inject any metadata in the Nextflow '${workflow}' object
 data: |
   <h4>Methods</h4>
@@ -13,10 +12,6 @@ data: |
   <p>${tool_citations}</p>
   <h4>References</h4>
   <ul>
-    <li>Di Tommaso, P., Chatzou, M., Floden, E. W., Barja, P. P., Palumbo, E., & Notredame, C. (2017). Nextflow enables reproducible computational workflows. Nature Biotechnology, 35(4), 316-319. doi: <a href="https://doi.org/10.1038/nbt.3820">10.1038/nbt.3820</a></li>
-    <li>Ewels, P. A., Peltzer, A., Fillinger, S., Patel, H., Alneberg, J., Wilm, A., Garcia, M. U., Di Tommaso, P., & Nahnsen, S. (2020). The nf-core framework for community-curated bioinformatics pipelines. Nature Biotechnology, 38(3), 276-278. doi: <a href="https://doi.org/10.1038/s41587-020-0439-x">10.1038/s41587-020-0439-x</a></li>
-    <li>Grüning, B., Dale, R., Sjödin, A., Chapman, B. A., Rowe, J., Tomkins-Tinch, C. H., Valieris, R., Köster, J., & Bioconda Team. (2018). Bioconda: sustainable and comprehensive software distribution for the life sciences. Nature Methods, 15(7), 475–476. doi: <a href="https://doi.org/10.1038/s41592-018-0046-7">10.1038/s41592-018-0046-7</a></li>
-    <li>da Veiga Leprevost, F., Grüning, B. A., Alves Aflitos, S., Röst, H. L., Uszkoreit, J., Barsnes, H., Vaudel, M., Moreno, P., Gatto, L., Weber, J., Bai, M., Jimenez, R. C., Sachsenberg, T., Pfeuffer, J., Vera Alvarez, R., Griss, J., Nesvizhskii, A. I., & Perez-Riverol, Y. (2017). BioContainers: an open-source and community-driven framework for software standardization. Bioinformatics (Oxford, England), 33(16), 2580–2582. doi: <a href="https://doi.org/10.1093/bioinformatics/btx192">10.1093/bioinformatics/btx192</a></li>
     ${tool_bibliography}
   </ul>
   <div class="alert alert-info">
 
@@ -4,7 +4,7 @@ report_comment: >
   <a href="https://joon-klaps.github.io/viralgenie/latest/dev/usage/" target="_blank">documentation</a>.
 
 export_plots: true
-
+data_format: "yaml"
 max_table_rows: 100000
 
 report_section_order:
@@ -18,36 +18,18 @@ report_section_order:
     before: summary_contigs
   failed_mapped:
     before: summary_contigs
-  contig_overview:
+  cluster_summary:
     before: samtools_stats
   "viralgenie-methods-description":
     order: -1000
-  software_versions:
+  "Software-versions":
     order: -1001
   "Joon-Klaps-viralgenie-summary":
     order: -1002
 
 use_filename_as_sample_name:
   - fastp
 
-run_modules:
-  - custom_content
-  - fastqc
-  - fastp
-  - trimmomatic
-  - humid
-  - bbduk
-  - umitools
-  - bowtie2
-  - kaiju
-  - mosdepth
-  - kraken
-  - bracken
-  - quast
-  - samtools
-  - bcftools
-  - picard
-
 module_order:
   - "fastqc":
       name: "SAMPLE: FastQC (Raw)"
@@ -98,6 +80,8 @@ module_order:
       anchor: "quast_trinity"
       path_filters:
         - "*_trinity.tsv"
+  - "cluster-summary":
+      name: "SAMPLE: Contig clustering"
   - "picard":
       name: "CLUSTER: Picard"
   - "umitools":
@@ -110,23 +94,35 @@ module_order:
       name: "CLUSTER: Bcftools"
   - "custom_content"
 
+mosdepth_config:
+  general_stats_coverage:
+    - 1
+    - 5
+    - 10
+    - 50
+    - 100
+    - 200
+    - 500
+    - 750
+    - 1000
+
 # Summary table names
 table_columns_name:
   "SAMPLE: FastQC (raw)":
     total_sequences: "Nr. Input Reads"
-    avg_sequence_length: "Length Input Reads"
+    avg_sequence_length: "Average Length Input Reads"
     percent_gc: "% GC Input Reads"
     percent_duplicates: "% Dups Input Reads"
     percent_fails: "% Failed Input Reads"
   "SAMPLE: FastQC (post-Trimming)":
     total_sequences: "Nr. reads post Trimming"
-    avg_sequence_length: "Length reads post Trimming"
+    avg_sequence_length: "Average Length reads post Trimming"
     percent_gc: "% GC reads post Trimming"
     percent_duplicates: "% Dups reads post Trimming"
     percent_fails: "% Failed reads post Trimming"
   "SAMPLE: FastQC (post-Host-removal)":
     total_sequences: "Nr. Processed Reads"
-    avg_sequence_length: "Length Processed Reads"
+    avg_sequence_length: "Average Length Processed Reads"
     percent_gc: "% GC Processed Reads"
     percent_duplicates: "% Dups Processed Reads"
     percent_fails: "% Failed Processed Reads"
@@ -150,6 +146,7 @@ table_columns_name:
     pct_unclassified: "% non-host reads"
   "SAMPLE: Kraken2 (Diversity)":
     pct_top_n: "% reads top 5 Species (Kraken2)"
+    pct_unclassified: "% reads unclassified (Kraken2)"
   "SAMPLE: KAIJU (Diversity)":
     "% Assigned": "% Reads assigned (Kaiju)"
     assigned: "M reads assigned (Kaiju)"
@@ -205,8 +202,6 @@ table_columns_visible:
     Largest contig: True
     Total length: False
 
-skip_versions_section: true
-
 # Viral reads are not so big
 read_count_multiplier: 1
 read_count_prefix: ""
 
@@ -55,5 +55,5 @@
             "segment": ["species"]
         }
     },
-    "allOf": [{ "uniqueEntries": ["id","species", "segment"] }, { "uniqueEntries": ["id"] }]
+    "allOf": [{ "uniqueEntries": ["id", "species", "segment"] }, { "uniqueEntries": ["id"] }]
 }
Original file line number	Diff line number	Diff line change
`@@ -55,5 +55,5 @@`
`55`	`55`	`"segment": ["species"]`
`56`	`56`	`}`
`57`	`57`	`},`
`58`		`- "allOf": [{ "uniqueEntries": ["id","species", "segment"] }, { "uniqueEntries": ["id"] }]`
	`58`	`+ "allOf": [{ "uniqueEntries": ["id", "species", "segment"] }, { "uniqueEntries": ["id"] }]`
`59`	`59`	`}`