From f88228b79f4a0789bbb54be0ba04095cdfff7ca4 Mon Sep 17 00:00:00 2001 From: skelly001 <97435404+skelly001@users.noreply.github.com> Date: Tue, 27 Feb 2024 14:14:15 -0800 Subject: [PATCH] init read_FragPipe_LFQ and read_FragPipe_TMT --- DESCRIPTION | 2 +- NAMESPACE | 5 +++ R/read_FragPipe_LFQ.R | 71 ++++++++++++++++++++++++++++++++++++++++ R/read_FragPipe_TMT.R | 70 +++++++++++++++++++++++++++++++++++++++ man/complex_heatmap.Rd | 3 +- man/read_FragPipe_LFQ.Rd | 25 ++++++++++++++ man/read_FragPipe_TMT.Rd | 26 +++++++++++++++ 7 files changed, 200 insertions(+), 2 deletions(-) create mode 100644 R/read_FragPipe_LFQ.R create mode 100644 R/read_FragPipe_TMT.R create mode 100644 man/read_FragPipe_LFQ.Rd create mode 100644 man/read_FragPipe_TMT.Rd diff --git a/DESCRIPTION b/DESCRIPTION index f4987f6..903b984 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -90,4 +90,4 @@ Remotes: github::cran/FField, github::PNNL-Comp-Mass-Spec/MSnID@pnnl-master VignetteBuilder: knitr -RoxygenNote: 7.2.3 +RoxygenNote: 7.3.1 diff --git a/NAMESPACE b/NAMESPACE index d7832f8..0747b24 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -57,6 +57,8 @@ export(readMaxQuantPeptides) export(readMaxQuantProtGroups) export(readMaxQuantSummary) export(readSkyLinePRR) +export(read_FragPipe_LFQ) +export(read_FragPipe_TMT) export(remove_batch_effect) export(remove_covariate) export(rf_modeling) @@ -106,6 +108,7 @@ importFrom(ROCR,prediction) importFrom(WGCNA,empiricalBayesLM) importFrom(car,Anova) importFrom(circlize,colorRamp2) +importFrom(data.table,fread) importFrom(data.table,rbindlist) importFrom(data.table,setDT) importFrom(doParallel,registerDoParallel) @@ -118,6 +121,7 @@ importFrom(dplyr,case_when) importFrom(dplyr,desc) importFrom(dplyr,distinct) importFrom(dplyr,do) +importFrom(dplyr,everything) importFrom(dplyr,filter) importFrom(dplyr,full_join) importFrom(dplyr,group_by) @@ -305,6 +309,7 @@ importFrom(stringr,str_locate_all) importFrom(stringr,str_replace_all) importFrom(stringr,str_sub) importFrom(sva,ComBat) +importFrom(tibble,column_to_rownames) importFrom(tibble,enframe) importFrom(tibble,rownames_to_column) importFrom(tidyr,fill) diff --git a/R/read_FragPipe_LFQ.R b/R/read_FragPipe_LFQ.R new file mode 100644 index 0000000..d5eacbc --- /dev/null +++ b/R/read_FragPipe_LFQ.R @@ -0,0 +1,71 @@ +#' Reading MSFragger-generated LFQ-based MSstats from a file path as MSnSet object +#' +#' @description Function has only been tested with label-free intensity-based +#' quantification data. MSstats.csv is +#' an optional output file which needs to be specified in FP settings. +#' +#' @param path character; File path to the FragPipe-generated MSstats.csv file +#' +#' @return (MSnSet) MSnSet object of MSFragger LFQ results +#' +#' @importFrom MSnbase MSnSet +#' @importFrom data.table fread +#' @importFrom tidyr pivot_wider +#' @importFrom tibble column_to_rownames +#' @importFrom dplyr %>% select filter distinct relocate everything mutate +#' +#' @examples +#' file_path <- "C:/Users/fakeusr222/Desktop/MSF_LFQ_job/MSstats.csv" +#' msnset <- read_FragPipe_LFQ(file_path) +#' show(msnset) +#' +#' @export read_FragPipe_LFQ + + +read_FragPipe_LFQ <- function(path = NULL) +{ + path_to_file <- path + + if (!file.exists(path_to_file)) { + stop(sprintf("MSstats.csv file not found in folder: %s", dirname(path_to_file))) + } + + df <- fread(file = path_to_file, showProgress = FALSE, data.table = FALSE) %>% + filter(!is.na(Intensity)) %>% + # May add charge col later + select(ProteinName, PeptideSequence, Run, Intensity) %>% + mutate(featureName = paste0(ProteinName, "@", PeptideSequence)) %>% + relocate(featureName, .before = everything()) + + # Will sum intensity of unique features. + x_data <- df %>% + pivot_wider(id_cols = "featureName", + names_from = "Run", + values_from = "Intensity", + values_fn = sum) %>% + as.data.frame() %>% + column_to_rownames(var = "featureName") %>% + as.matrix() + + f_data <- df %>% + distinct(featureName, ProteinName, PeptideSequence) %>% + `rownames<-`(.[["featureName"]]) + + p_data <- df %>% + distinct(Run) %>% + `rownames<-`(.[["Run"]]) + + x_data <- x_data[rownames(f_data), rownames(p_data)] + + m <- MSnSet(exprs = x_data, fData = f_data, pData = p_data) + + return(m) +} + + + + +utils::globalVariables( + c("ProteinName", "PeptideSequence", "Run", "Intensity", ".", "featureName") +) + diff --git a/R/read_FragPipe_TMT.R b/R/read_FragPipe_TMT.R new file mode 100644 index 0000000..03fda18 --- /dev/null +++ b/R/read_FragPipe_TMT.R @@ -0,0 +1,70 @@ +#' Reading MSFragger-generated tmt-report files from a file path as MSnSet object +#' +#' @description Function has only been tested with TMT intensity-based +#' quantification data. Desired tmt-report output files (e.g., "ratio_multi-site_MD.tsv") +#' must be properly selected in the FP settings. +#' +#' @param path character; File path to the desired FragPipe-generated tmt-report file. +#' Any tmt-report file may be used. +#' +#' @return (MSnSet) MSnSet object of MSFragger TMT results +#' +#' @importFrom MSnbase MSnSet +#' @importFrom data.table fread +#' @importFrom tibble column_to_rownames +#' @importFrom dplyr %>% select mutate +#' +#' @examples +#' file_path <- "C:/Users/fakeusr222/Desktop/MSF_TMT_job/ratio_multi-site_MD.tsv" +#' msnset <- read_FragPipe_TMT(file_path) +#' show(msnset) +#' +#' @export read_FragPipe_TMT + +read_FragPipe_TMT <- function(path = NULL) +{ + + path_to_file <- path + + if (!file.exists(path_to_file)) { + stop(sprintf("file not found in folder: %s", dirname(path_to_file))) + } + + df <- fread(file = path_to_file, showProgress = FALSE, data.table = FALSE) + + + # make featureNames + if (grepl("multi-site|single-site|peptide", basename(path_to_file))) { + df <- df %>% + mutate(rowname = paste(Gene, ProteinID, Peptide, sep = "|")) + } else if (grepl("gene", basename(path_to_file))) { + df <- df %>% + mutate(rowname = paste(Index, ProteinID, sep = "|")) + } else if (grepl("protein", basename(path_to_file))) { + df <- df %>% + mutate(rowname = paste(Gene, Index, sep = "|")) + } + + df <- df %>% + mutate(featureName = rowname, .before = colnames(.)[[1]]) %>% + column_to_rownames(var = "rowname") + + x_data <- df %>% + select(-c(colnames(.)[[1]]:ReferenceIntensity)) %>% + as.matrix() + + f_data <- df %>% + select(c(colnames(.)[[1]]:ReferenceIntensity)) + + m <- MSnSet(exprs = x_data, fData = f_data) + + return(m) +} + + + + +utils::globalVariables( + c(".", "featureName") +) + diff --git a/man/complex_heatmap.Rd b/man/complex_heatmap.Rd index 678f701..c0ad1e8 100644 --- a/man/complex_heatmap.Rd +++ b/man/complex_heatmap.Rd @@ -20,7 +20,8 @@ complex_heatmap( show_row_names = FALSE, heatmap_title = character(0), heatmap_legend_title = NULL, - color_range = NULL, + color_range = c(-1, 0, 1), + colors = c("blue", "white", "red"), heatmap_args = list(), anno_column = NULL, anno_row = NULL, diff --git a/man/read_FragPipe_LFQ.Rd b/man/read_FragPipe_LFQ.Rd new file mode 100644 index 0000000..807f782 --- /dev/null +++ b/man/read_FragPipe_LFQ.Rd @@ -0,0 +1,25 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/read_FragPipe_LFQ.R +\name{read_FragPipe_LFQ} +\alias{read_FragPipe_LFQ} +\title{Reading MSFragger-generated LFQ-based MSstats from a file path as MSnSet object} +\usage{ +read_FragPipe_LFQ(path = NULL) +} +\arguments{ +\item{path}{character; File path to the FragPipe-generated MSstats.csv file} +} +\value{ +(MSnSet) MSnSet object of MSFragger LFQ results +} +\description{ +Function has only been tested with label-free intensity-based + quantification data. MSstats.csv is + an optional output file which needs to be specified in FP settings. +} +\examples{ +file_path <- "C:/Users/fakeusr222/Desktop/MSF_LFQ_job/MSstats.csv" + msnset <- read_FragPipe_LFQ(file_path) + show(msnset) + +} diff --git a/man/read_FragPipe_TMT.Rd b/man/read_FragPipe_TMT.Rd new file mode 100644 index 0000000..30e5ca3 --- /dev/null +++ b/man/read_FragPipe_TMT.Rd @@ -0,0 +1,26 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/read_FragPipe_TMT.R +\name{read_FragPipe_TMT} +\alias{read_FragPipe_TMT} +\title{Reading MSFragger-generated tmt-report files from a file path as MSnSet object} +\usage{ +read_FragPipe_TMT(path = NULL) +} +\arguments{ +\item{path}{character; File path to the desired FragPipe-generated tmt-report file. +Any tmt-report file may be used.} +} +\value{ +(MSnSet) MSnSet object of MSFragger TMT results +} +\description{ +Function has only been tested with TMT intensity-based + quantification data. Desired tmt-report output files (e.g., "ratio_multi-site_MD.tsv") + must be properly selected in the FP settings. +} +\examples{ +file_path <- "C:/Users/fakeusr222/Desktop/MSF_TMT_job/ratio_multi-site_MD.tsv" + msnset <- read_FragPipe_TMT(file_path) + show(msnset) + +}