Skip to content

Commit 0428fd7

Browse files
committed
add number of total 'finished' clusters in samples overview
1 parent d8266ba commit 0428fd7

File tree

2 files changed

+28
-0
lines changed

2 files changed

+28
-0
lines changed

assets/multiqc_config.yml

+2
Original file line numberDiff line numberDiff line change
@@ -46,6 +46,8 @@ module_order:
4646
- "*_trim*fastqc.zip"
4747
- "humid":
4848
name: "SAMPLE: humid"
49+
- "prinseqplusplus":
50+
name: "SAMPLE: prinseq++"
4951
- "bbduk":
5052
name: "SAMPLE: bbduk"
5153
- "kraken":

bin/custom_multiqc.py

+26
Original file line numberDiff line numberDiff line change
@@ -442,6 +442,31 @@ def extract_mqc_data(table_headers: Union[str, Path]) -> Optional[pd.DataFrame]:
442442

443443
return join_df(result, data) if data else result, columns_result
444444

445+
def add_n_consensus_clusters_to_mqc(dataframe: pd.DataFrame)-> pd.DataFrame:
446+
"""
447+
Add the number of consensus clusters to the multiqc general stats data.
448+
"""
449+
df = dataframe.copy()
450+
ordered_list = ["constraint"] + [f"it{i}" for i in range(100, 0, -1)] + ["itvariant-calling", "consensus", "singleton"]
451+
df["step"] = pd.Categorical(df["step"], categories=ordered_list, ordered=True)
452+
453+
last_iteration = df["step"].max()
454+
logger.info("Last iteration: %s", last_iteration)
455+
456+
# Count how often samples have the last iteration
457+
last_iteration_count = df[df["step"] == last_iteration].groupby("sample").size().to_frame(name="# Final denovo clusters")
458+
459+
if last_iteration_count.empty:
460+
return None
461+
462+
# Add the number of consensus clusters to the general stats data
463+
module = mqc.BaseMultiqcModule(name="Consensus Count", anchor=Anchor("custom_data"))
464+
content = last_iteration_count.to_dict(orient="index")
465+
module.general_stats_addcols(content)
466+
mqc.report.modules.append(module)
467+
return 0
468+
469+
445470

446471
def write_results(contigs_mqc: pd.DataFrame, constraints_mqc: pd.DataFrame, constraints_genstats: pd.DataFrame) -> int:
447472

@@ -455,6 +480,7 @@ def write_results(contigs_mqc: pd.DataFrame, constraints_mqc: pd.DataFrame, cons
455480
samples.extend(contigs_mqc["sample"])
456481
write_df(contigs_mqc.sort_values(by=["sample", "cluster", "step"]), "contigs_overview-with-iterations.tsv", [])
457482
table_plot = contigs_mqc[~contigs_mqc.index.isin(generate_ignore_samples(contigs_mqc))]
483+
add_n_consensus_clusters_to_mqc(table_plot)
458484
write_df(table_plot.sort_values(by=["sample", "cluster", "step"]), "contigs_overview.tsv", [])
459485

460486
if not constraints_mqc.empty:

0 commit comments

Comments
 (0)