Skip to content

Commit d5f6cb2

Browse files
committed
patch inclusion of cluster info
1 parent 1904531 commit d5f6cb2

6 files changed

+22
-14
lines changed

bin/custom_multiqc.py

+4-3
Original file line numberDiff line numberDiff line change
@@ -257,7 +257,7 @@ def load_custom_data(args) -> List[pd.DataFrame]:
257257
result.extend([annotation_df])
258258

259259
# Cluster table - cluster summary of members & centroids
260-
clusters_df = filelist_to_df(args.clusters_files)
260+
clusters_df = filelist_to_df(args.clusters_files)
261261
if not clusters_df.empty:
262262
clusters_df = clusters_df.add_prefix("(cluster) ")
263263
clusters_df = clusters_df.rename(columns={"(cluster) sample": "sample", "(cluster) cluster": "cluster"})
@@ -282,8 +282,8 @@ def get_general_stats_data_mod(sample: Optional[str] = None) -> Dict:
282282
for row in rows:
283283
for key, val in row.data.items():
284284
if key in header:
285-
namespace = header[key].get('namespace', key).replace("SAMPLE: ", "")
286-
final_key = f"{namespace}. {header[key].get('title', key)}" if header[key].get('title') else key
285+
namespace = header[key].get("namespace", key).replace("SAMPLE: ", "")
286+
final_key = f"{namespace}. {header[key].get('title', key)}" if header[key].get("title") else key
287287
data[s][final_key] = val
288288
if sample:
289289
if not data:
@@ -511,6 +511,7 @@ def main(argv=None):
511511

512512
# 5.2 reformat the dataframe
513513
mqc_custom_df = reformat_custom_df(mqc_custom_df, cluster_df)
514+
mqc_custom_df.to_csv("mqc_custom_df.after.tsv", sep="\t")
514515

515516
# 5.3 split up denovo constructs and mapping (-CONSTRAIN) results
516517
logger.info("Splitting up denovo constructs and mapping (-CONSTRAIN) results")

bin/extract_clust.py

+8-5
Original file line numberDiff line numberDiff line change
@@ -122,8 +122,8 @@ def _to_line(self, prefix):
122122
return "\t".join(
123123
[
124124
str(prefix),
125-
str(self.taxid),
126125
str(self.cluster_id),
126+
str(self.taxid),
127127
str(self.centroid),
128128
str(self.cluster_size),
129129
"\t".join(map(str, rounded_depth)),
@@ -458,7 +458,10 @@ def filter_members(clusters, pattern):
458458
filtered_clusters.append(cluster)
459459
return filtered_clusters
460460

461-
def filter_clusters_by_coverage(clusters: List[Cluster], coverages: Dict, threshold: float,keep_n_clusters: int) -> Tuple[List[Cluster], List[Cluster]]:
461+
462+
def filter_clusters_by_coverage(
463+
clusters: List[Cluster], coverages: Dict, threshold: float, keep_n_clusters: int
464+
) -> Tuple[List[Cluster], List[Cluster]]:
462465
"""
463466
Filter clusters on coverage, only keep clusters with a coverage above the threshold. If no clusters are kept, return top 5.
464467
"""
@@ -471,9 +474,9 @@ def filter_clusters_by_coverage(clusters: List[Cluster], coverages: Dict, thresh
471474
filtered_clusters.append(cluster)
472475

473476
if filtered_clusters:
474-
return clusters,filtered_clusters
477+
return clusters, filtered_clusters
475478

476-
sorted_clusters = sorted(clusters, key=lambda x: sum(x.cumulative_read_depth), reverse= True)
479+
sorted_clusters = sorted(clusters, key=lambda x: sum(x.cumulative_read_depth), reverse=True)
477480
return sorted_clusters, sorted_clusters[:keep_n_clusters]
478481

479482

@@ -610,7 +613,7 @@ def main(argv=None):
610613
# Filter clusters by coverage
611614
if args.coverages:
612615
coverages = read_coverages(args.coverages)
613-
clusters,filtered_clusters = filter_clusters_by_coverage(filtered_clusters, coverages, args.perc_reads_contig, args.keep_clusters)
616+
clusters, filtered_clusters = filter_clusters_by_coverage(filtered_clusters, coverages, args.perc_reads_contig, args.keep_clusters)
614617
logger.info("Filtered clusters by coverage, %d were removed.", len(clusters_renamed) - len(filtered_clusters))
615618

616619
assert len(filtered_clusters) != 0, "No clusters left after filtering."

bin/utils/module_data_processing.py

+6-4
Original file line numberDiff line numberDiff line change
@@ -113,6 +113,7 @@ def parse_annotation_data(annotation_str):
113113
annotation_dict[key] = value
114114
return annotation_dict
115115

116+
116117
def reformat_custom_df(df: pd.DataFrame, cluster_df: pd.DataFrame) -> pd.DataFrame:
117118
"""
118119
Reformat the custom dataframe.
@@ -125,7 +126,7 @@ def reformat_custom_df(df: pd.DataFrame, cluster_df: pd.DataFrame) -> pd.DataFra
125126
df = split_index_column(df)
126127

127128
if not cluster_df.empty:
128-
df = pd.merge(df, cluster_df, on=['sample', 'cluster'], how = "left")
129+
df = pd.merge(df, cluster_df, on=["sample", "cluster"], how="left")
129130
df.index = df["index"]
130131

131132
# Reorder the columns
@@ -143,21 +144,22 @@ def reformat_custom_df(df: pd.DataFrame, cluster_df: pd.DataFrame) -> pd.DataFra
143144
for column in df.columns
144145
if group in column
145146
]
146-
return reorder_columns(df.dropna(subset=['step']), final_columns)
147+
return reorder_columns(df.dropna(subset=["step"]), list(dict.fromkeys(final_columns)))
147148

148149

149-
def filter_constrain(df, column, value):
150+
def filter_constrain(dataframe, column, value):
150151
"""
151152
Filter a dataframe based on a column and a regex value.
152153
153154
Args:
154-
df (pd.DataFrame): The dataframe to be filtered.
155+
dataframe (pd.DataFrame): The dataframe to be filtered.
155156
column (str): The column to filter on.
156157
regex_value (str): The regex value to filter on.
157158
158159
Returns:
159160
pd.DataFrame, pd.DataFrame: The filtered dataframe with the regex value and the filtered dataframe without the regex value.
160161
"""
162+
df = dataframe.copy()
161163
# Find rows with the regex value
162164
locations = df[column].str.contains(value) | df["step"].str.contains("constrain")
163165

conf/modules.config

+2
Original file line numberDiff line numberDiff line change
@@ -892,6 +892,7 @@ process {
892892
}
893893

894894
withName: RENAME_FASTA_HEADER_SINGLETON {
895+
ext.prefix = { "${meta.id}_singleton" } // DON'T CHANGE
895896
publishDir = [
896897
path: { "${params.outdir}/consensus/seq/consensus/${meta.sample}"},
897898
mode: params.publish_dir_mode,
@@ -954,6 +955,7 @@ process {
954955
}
955956

956957
withName: RENAME_FASTA_HEADER_CONTIG_CONSENSUS{
958+
ext.prefix = { "${meta.id}_consensus" } // DON'T CHANGE
957959
publishDir = [
958960
path: { "${params.outdir}/consensus/seq/consensus/${meta.sample}"},
959961
mode: params.publish_dir_mode,

subworkflows/local/align_collapse_contigs.nf

+1-1
Original file line numberDiff line numberDiff line change
@@ -64,7 +64,7 @@ workflow ALIGN_COLLAPSE_CONTIGS {
6464
)
6565
ch_versions= ch_versions.mix(IVAR_CONTIG_CONSENSUS.out.versions.first())
6666

67-
RENAME_FASTA_HEADER_CONTIG_CONSENSUS( IVAR_CONTIG_CONSENSUS.out.fasta, "consensus" )
67+
RENAME_FASTA_HEADER_CONTIG_CONSENSUS( IVAR_CONTIG_CONSENSUS.out.fasta, [])
6868
ch_versions = ch_versions.mix(RENAME_FASTA_HEADER_CONTIG_CONSENSUS.out.versions.first())
6969

7070
// If external, there possibly regions that require patching

subworkflows/local/singleton_filtering.nf

+1-1
Original file line numberDiff line numberDiff line change
@@ -19,7 +19,7 @@ workflow SINGLETON_FILTERING {
1919
// Rename to avoid errors downstream
2020
RENAME_FASTA_HEADER_SINGLETON(
2121
contig,
22-
"singleton.contig"
22+
[]
2323
)
2424
ch_versions = ch_versions.mix(RENAME_FASTA_HEADER_SINGLETON.out.versions)
2525

0 commit comments

Comments
 (0)