Skip to content

Commit 2fc48c0

Browse files
committed
include empty samples in multiqc sample overview
1 parent aaacf5e commit 2fc48c0

File tree

6 files changed

+27
-11
lines changed

6 files changed

+27
-11
lines changed

assets/samplesheets/samplesheet.csv

+1
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,4 @@
11
sample,fastq_1,fastq_2
22
SRR11140744,https://raw.githubusercontent.com/nf-core/test-datasets/viralrecon/illumina/sispa/SRR11140744_R1.fastq.gz,https://raw.githubusercontent.com/nf-core/test-datasets/viralrecon/illumina/sispa/SRR11140744_R2.fastq.gz
33
SRR11140748,https://raw.githubusercontent.com/nf-core/test-datasets/viralrecon/illumina/sispa/SRR11140748_R1.fastq.gz,https://raw.githubusercontent.com/nf-core/test-datasets/viralrecon/illumina/sispa/SRR11140748_R2.fastq.gz
4+
empty-SRR,https://raw.githubusercontent.com/Joon-Klaps/nextclade_data/master/data/fastq/empty_1.fastq.gz,https://raw.githubusercontent.com/Joon-Klaps/nextclade_data/master/data/fastq/empty_2.fastq.gz

bin/custom_multiqc.py

+20-5
Original file line numberDiff line numberDiff line change
@@ -10,9 +10,8 @@
1010
from typing import Dict, List, Optional, Tuple, Union
1111

1212
import multiqc as mqc
13-
import numpy as np
1413
import pandas as pd
15-
from multiqc.plots import bargraph, table
14+
from multiqc.plots import bargraph
1615
from multiqc.types import Anchor
1716
from utils.constant_variables import CLUSTER_PCONFIG
1817
from utils.file_tools import filelist_to_df, get_module_selection, read_in_quast, write_df
@@ -178,6 +177,21 @@ def file_choices(choices, fname):
178177
)
179178
return parser.parse_args(argv)
180179

180+
def get_failed_samples(samples: List[str]) -> List[str]:
181+
"""
182+
Get failed samples from the modules
183+
- sample_low_reads
184+
- samples_without_contigs
185+
"""
186+
if (samples_low_reads := get_module_data(mqc, 'samples_low_reads')):
187+
logger.info("samples_low_reads %s", samples_low_reads)
188+
samples.extend([k for k in samples_low_reads.keys()])
189+
190+
if (samples_without_contigs := get_module_data(mqc, 'samples_without_contigs')):
191+
logger.info("samples_without_contigs %s", samples_without_contigs)
192+
samples.extend([k for k in samples_without_contigs.keys() ])
193+
194+
return samples
181195

182196
def load_custom_data(args) -> List[pd.DataFrame]:
183197
"""
@@ -409,11 +423,12 @@ def extract_mqc_data(table_headers: Union[str, Path]) -> Optional[pd.DataFrame]:
409423
return join_df(result, data) if data else result, columns_result
410424

411425

412-
def write_results(contigs_mqc, constrains_mqc, constrains_genstats, args) -> int:
426+
def write_results(contigs_mqc: pd.DataFrame, constrains_mqc: pd.DataFrame, constrains_genstats: pd.DataFrame) -> int:
413427
"""
414428
Write the results to files.
415429
"""
416-
samples = []
430+
samples = get_failed_samples([])
431+
logger.info("samples %s", samples)
417432
if not contigs_mqc.empty:
418433
logger.info("Writing Unfiltered Denovo constructs table file: contigs_overview.tsv")
419434
samples.extend(contigs_mqc["sample"])
@@ -511,7 +526,7 @@ def main(argv=None):
511526

512527
coalesced_constrains, constrains_genstats = reformat_constrain_df(constrains_mqc, renamed_columns, args)
513528

514-
write_results(contigs_mqc, coalesced_constrains, constrains_genstats, args)
529+
write_results(contigs_mqc, coalesced_constrains, constrains_genstats)
515530
return 0
516531

517532

bin/utils/pandas_tools.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -42,7 +42,7 @@ def reorder_rows(dataframe):
4242
rank_dict = {step: rank for rank, step in enumerate(ordered_list, start=1)}
4343

4444
# Sort the DataFrame by 'step' based on the ranking dictionary
45-
df["rank"] = df["step"].replace(rank_dict)
45+
df["rank"] = df["step"].map(pd.Series(rank_dict))
4646
df = df.sort_values(["sample", "cluster", "rank"])
4747

4848
return df

conf/modules.config

+2-2
Original file line numberDiff line numberDiff line change
@@ -894,7 +894,7 @@ process {
894894
withName: RENAME_FASTA_HEADER_SINGLETON {
895895
ext.prefix = { "${meta.id}_singleton" } // DON'T CHANGE
896896
publishDir = [
897-
path: { "${params.outdir}/consensus/seq/consensus/${meta.sample}"},
897+
path: { "${params.outdir}/consensus/seq/scaffold_consensus/${meta.sample}"},
898898
mode: params.publish_dir_mode,
899899
pattern: "*.fasta",
900900
saveAs: { filename -> params.prefix || params.global_prefix ? "${params.global_prefix}-$filename" : filename }
@@ -957,7 +957,7 @@ process {
957957
withName: RENAME_FASTA_HEADER_CONTIG_CONSENSUS{
958958
ext.prefix = { "${meta.id}_consensus" } // DON'T CHANGE
959959
publishDir = [
960-
path: { "${params.outdir}/consensus/seq/consensus/${meta.sample}"},
960+
path: { "${params.outdir}/consensus/seq/scaffold_consensus/${meta.sample}"},
961961
mode: params.publish_dir_mode,
962962
pattern: "*.fasta",
963963
saveAs: { filename -> params.prefix || params.global_prefix ? "${params.global_prefix}-$filename" : filename }

docs/output.md

+2-2
Original file line numberDiff line numberDiff line change
@@ -666,9 +666,9 @@ The consensus sequences are generated by [`BCFTools`](http://samtools.github.io/
666666
???- abstract "Output files - iterations & variants"
667667

668668
- `consensus`
669-
- `seq/<it# | consensus | singleton | constrain>/ `
669+
- `seq/<it# | scaffold_consensus | variant-calling | constrain>/ `
670670
- `<sample-id>/*.fasta`: A fasta file containing the consensus sequence.
671-
- `mask/<it# | consensus | singleton | constrain>`
671+
- `mask/<it# | variant-calling | constrain>`
672672
- `<sample-id>/*.qual.txt`: A log file of the consensus run containing statistics. [`iVar` only]
673673
- `<sample-id>/*.bed`: A bed file containing the masked regions. [`BCFtools` only]
674674
- `<sample-id>/*.mpileup`: A mpileup file containing information on the depth and the quality of each aligned base.

subworkflows/local/utils_nfcore_viralgenie_pipeline/main.nf

+1-1
Original file line numberDiff line numberDiff line change
@@ -381,7 +381,7 @@ def lowReadSamplesToMultiQC(tsv_data, min_trimmed_reads) {
381381
[
382382
"id: 'samples_low_reads'",
383383
"anchor: 'WARNING: Filtered samples'",
384-
"section_name: 'Samples with to few reads'",
384+
"section_name: 'Samples with too few reads'",
385385
"format: 'tsv'",
386386
"description: 'Samples that did not have the minimum number of reads (<${min_trimmed_reads}) after trimming, complexity filtering & host removal'",
387387
"plot_type: 'table'"

0 commit comments

Comments
 (0)