Skip to content

Commit c6ab690

Browse files
Merge pull request #28 from PNNL-Comp-Mass-Spec/update/run_plexedpiper_redox
Add redox processing capability to run_plexedpiper
2 parents c5865fe + 38c98f4 commit c6ab690

17 files changed

+419
-336
lines changed

.github/workflows/R-CMD-check.yaml

+18-44
Original file line numberDiff line numberDiff line change
@@ -1,12 +1,12 @@
1+
# Workflow derived from https://github.com/r-lib/actions/tree/v2/examples
12
# Need help debugging build failures? Start at https://github.com/r-lib/actions#where-to-find-help
2-
33
on:
44
push:
5-
branches:
6-
master
5+
branches: [main, master]
76
pull_request:
8-
branches: master
7+
branches: [main, master]
98
workflow_dispatch:
9+
branches: [main, master]
1010

1111
name: R-CMD-check
1212

@@ -20,56 +20,30 @@ jobs:
2020
fail-fast: false
2121
matrix:
2222
config:
23+
- {os: macos-latest, r: 'release'}
2324
- {os: windows-latest, r: 'release'}
24-
- {os: macOS-latest, r: 'release'}
25+
- {os: ubuntu-latest, r: 'release'}
2526

2627
env:
27-
R_REMOTES_NO_ERRORS_FROM_WARNINGS: true
28-
RSPM: ${{ matrix.config.rspm }}
2928
GITHUB_PAT: ${{ secrets.GITHUB_TOKEN }}
29+
R_KEEP_PKG_SOURCE: yes
3030

3131
steps:
32-
- name: Set up Git Repository
33-
uses: actions/checkout@v2
32+
- uses: actions/checkout@v3
33+
34+
- uses: r-lib/actions/setup-pandoc@v2
3435

35-
- name: Set up R
36-
uses: r-lib/actions/setup-r@v1
36+
- uses: r-lib/actions/setup-r@v2
3737
with:
3838
r-version: ${{ matrix.config.r }}
39+
http-user-agent: ${{ matrix.config.http-user-agent }}
40+
use-public-rspm: true
3941

40-
- name: Set up Pandoc
41-
uses: r-lib/actions/setup-pandoc@v1
42-
43-
- name: Query dependencies
44-
run: |
45-
install.packages("devtools")
46-
saveRDS(devtools::dev_package_deps(dependencies = TRUE), ".github/depends.Rds", version = 2)
47-
writeLines(sprintf("R-%i.%i", getRversion()$major, getRversion()$minor), ".github/R-version")
48-
shell: Rscript {0}
49-
50-
- name: Restore R package cache
51-
uses: actions/cache@v2
42+
- uses: r-lib/actions/setup-r-dependencies@v2
5243
with:
53-
path: ${{ env.R_LIBS_USER }}
54-
key: ${{ runner.os }}-${{ hashFiles('.github/R-version') }}-1-${{ hashFiles('.github/depends.Rds') }}
55-
restore-keys: ${{ runner.os }}-${{ hashFiles('.github/R-version') }}-1-
56-
57-
- name: Install dependencies
58-
run: |
59-
devtools::install_deps(dependencies = TRUE)
60-
shell: Rscript {0}
61-
62-
- name: Check
63-
env:
64-
_R_CHECK_CRAN_INCOMING_REMOTE_: false
65-
run: |
66-
options(crayon.enabled = TRUE)
67-
devtools::check(error_on = "error", vignettes = FALSE)
68-
shell: Rscript {0}
44+
extra-packages: any::rcmdcheck
45+
needs: check
6946

70-
- name: Upload check results
71-
if: failure()
72-
uses: actions/upload-artifact@main
47+
- uses: r-lib/actions/check-r-package@v2
7348
with:
74-
name: ${{ runner.os }}-r${{ matrix.config.r }}-results
75-
path: check
49+
upload-snapshots: true

.github/workflows/pkgdown.yaml

-4
Original file line numberDiff line numberDiff line change
@@ -1,10 +1,6 @@
11
# Workflow derived from https://github.com/r-lib/actions/tree/v2/examples
22
# Need help debugging build failures? Start at https://github.com/r-lib/actions#where-to-find-help
33
on:
4-
push:
5-
branches: [main, master]
6-
pull_request:
7-
branches: [main, master]
84
release:
95
types: [published]
106
workflow_dispatch:

DESCRIPTION

+3-4
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,8 @@
11
Package: PlexedPiper
22
Type: Package
33
Title: Pipeline for isobaric quantification
4-
Version: 0.4.1
5-
Date: 2023-02-07
4+
Version: 0.4.2
5+
Date: 2023-07-06
66
Author: Vladislav Petyuk vladislav.petyuk@pnnl.gov
77
Maintainer: Vladislav Petyuk <vladislav.petyuk@pnnl.gov>
88
Description: Pipeline for isobaric quantification.
@@ -11,7 +11,7 @@ Encoding: UTF-8
1111
LazyData: true
1212
RoxygenNote: 7.2.3
1313
Depends:
14-
MSnID (>= 1.18.1)
14+
MSnID (>= 1.25.2)
1515
Imports:
1616
Biostrings,
1717
data.table,
@@ -21,7 +21,6 @@ Imports:
2121
purrr,
2222
tibble,
2323
tidyr,
24-
tidyselect,
2524
readr,
2625
utils
2726
Suggests:

NAMESPACE

+3-2
Original file line numberDiff line numberDiff line change
@@ -61,6 +61,7 @@ importFrom(data.table,setnames)
6161
importFrom(dplyr,"%>%")
6262
importFrom(dplyr,across)
6363
importFrom(dplyr,all_of)
64+
importFrom(dplyr,any_of)
6465
importFrom(dplyr,arrange)
6566
importFrom(dplyr,bind_cols)
6667
importFrom(dplyr,bind_rows)
@@ -76,20 +77,20 @@ importFrom(dplyr,mutate)
7677
importFrom(dplyr,n)
7778
importFrom(dplyr,pull)
7879
importFrom(dplyr,rename)
80+
importFrom(dplyr,rowwise)
7981
importFrom(dplyr,select)
8082
importFrom(dplyr,starts_with)
8183
importFrom(dplyr,summarise)
8284
importFrom(dplyr,summarize)
8385
importFrom(dplyr,ungroup)
86+
importFrom(dplyr,where)
8487
importFrom(plyr,llply)
8588
importFrom(purrr,map)
8689
importFrom(purrr,reduce)
8790
importFrom(readr,read_tsv)
8891
importFrom(tibble,rownames_to_column)
8992
importFrom(tidyr,pivot_longer)
9093
importFrom(tidyr,pivot_wider)
91-
importFrom(tidyr,separate)
92-
importFrom(tidyselect,where)
9394
importFrom(utils,read.delim)
9495
importFrom(utils,read.table)
9596
importFrom(utils,write.table)

NEWS.md

+6
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,9 @@
1+
# PlexedPiper 0.4.2 (2023-07-06)
2+
3+
- Update `run_plexedpiper` to process redox proteomics data.
4+
- Add more robust checks for `read_study_design` output.
5+
- Set minimum MSnID version to 1.25.2, though this does not prevent installation of an incorrect version of MSnID from Bioconductor, since the version number on Bioconductor is higher.
6+
17
# PlexedPiper 0.4.1 (2023-02-07)
28

39
- Removed duplicate GENCODE protein IDs from `run_plexedpiper` output. GENCODE IDs are currently only unique when combining the protein (ENSP) and transcript (ENST) IDs. Since there are so few duplicates, we will remove them rather than concatenating these IDs in the "protein_id" column of the output of `make_results_ratio_*` and `make_rii_peptide_*` functions.

R/filter_msfragger_data.R

+65-57
Original file line numberDiff line numberDiff line change
@@ -1,46 +1,44 @@
1-
#' Filtering msfragger Data
1+
#' Filtering MSFragger Data
2+
#'
3+
#' Filtering MSFragger data. In this implementation, the peptide-level filter
4+
#' optimizes both ppm and one of Expectation or PeptideProphet Probability
5+
#' thresholds to achieve maximum number of peptide identifications within a
6+
#' given FDR constraint.
27
#'
3-
#' Filtering msfragger data. In this implementation, the peptide-level filter
4-
#' optimizes both ppm and one of Expectation or PeptideProphet Probability thresholds
5-
#' to achieve maximum number of peptide identifications within a given FDR constraint.
68
#' The accession-level filter optimizes based on `peptides_per_1000aa`, so
79
#' \code{\link{compute_num_peptides_per_1000aa}} must be used first.
810
#'
911
#' @md
1012
#'
11-
#' @param msnid (MSnID object) collated msfragger output
13+
#' @param msnid (MSnID object) collated MSFragger output
1214
#' @param fdr.max (numeric) Maximum acceptable FDR. Default is 0.01 (1%).
1315
#' @param level (character) Level at which to perform FDR filter. The name of a
1416
#' column in `psms(msnid)`. Currently, only `"peptide"` or `"accession"` are
15-
#' supported. The added level `SiteID` makes sense only for PTM data and
17+
#' supported. The added level `"SiteID"` makes sense only for PTM data and
1618
#' first requires mapping of the modification site using
17-
#' `MSnID::map_mod_sites`.
18-
#' @param filtering_criterion (character) One of "evalue" which is
19-
#' expectation value or "pp_prob" - peptide prophet probability. Default is
20-
#' "pp_prob".
19+
#' `MSnID::map_mod_sites`.
20+
#' @param filtering_criterion (character) One of `"evalue"` which is expectation
21+
#' value or `"pp_prob"` - peptide prophet probability. Default is "pp_prob".
2122
#' @param n.iter.grid (numeric) number of grid-distributed evaluation points.
2223
#' @param n.iter.nm (numeric) number of iterations for Nelder-Mead optimization
2324
#' algorithm.
24-
#' @param ... arguments passed to `filter_msfragger_data`.
2525
#'
26-
#' @return (MSnID object) filtered msfragger output
26+
#' @return (MSnID object) filtered MSFragger output
2727
#'
28-
#' @seealso
29-
#' \code{\link[MSnID]{MSnIDFilter}}
30-
#' \code{\link[MSnID]{optimize_filter}}
31-
#' \code{\link[MSnID]{apply_filter}}
28+
#' @seealso \code{\link[MSnID]{MSnIDFilter}}
29+
#' \code{\link[MSnID]{optimize_filter}} \code{\link[MSnID]{apply_filter}}
3230
#'
33-
#' @importFrom MSnID MSnIDFilter optimize_filter
34-
#' mass_measurement_error apply_filter
31+
#' @importFrom MSnID MSnIDFilter optimize_filter mass_measurement_error
32+
#' apply_filter
3533

3634

3735
#' @export
3836
filter_msfragger_data <- function(msnid,
39-
level,
40-
filtering_criterion = c("pp_prob","evalue"),
41-
fdr.max=0.01,
42-
n.iter.grid=500,
43-
n.iter.nm=100){
37+
level,
38+
filtering_criterion = c("pp_prob", "evalue"),
39+
fdr.max=0.01,
40+
n.iter.grid=500,
41+
n.iter.nm=100){
4442

4543
# Clean up on exit
4644
on.exit(rm(list = ls()))
@@ -49,18 +47,18 @@ filter_msfragger_data <- function(msnid,
4947
# Check input
5048
level <- match.arg(level, choices = c("peptide", "accession", "SiteID"))
5149
filtering_criterion <- match.arg(filtering_criterion)
52-
53-
if(level == "SiteID" & !("SiteID" %in% names(msnid)))
54-
stop("Column 'SiteID' is not in the MSnID object. Please map the PTMs first.")
50+
51+
if (level == "SiteID" & !("SiteID" %in% names(msnid))) {
52+
stop("Column 'SiteID' is not in the MSnID object. Please map the PTMs first.")
53+
}
5554

5655
keep_cols <- c(level, "isDecoy") # columns to calculate FDR
5756

5857
# Create MSnID of minimum size
5958
suppressMessages(msnid_small <- MSnID())
60-
59+
6160
# Setup
6261
if (level == "accession") {
63-
6462
# Add filter criteria column
6563
keep_cols <- c(keep_cols, "peptides_per_1000aa")
6664
msnid_small@psms <- unique(msnid@psms[, keep_cols, with = FALSE])
@@ -69,36 +67,41 @@ filter_msfragger_data <- function(msnid,
6967
filtObj <- MSnIDFilter(msnid_small)
7068
filtObj$peptides_per_1000aa <- list(comparison = ">", threshold = 1)
7169
method <- "SANN"
70+
7271
} else {
73-
#Choose filter object probability value
74-
if (filtering_criterion == "evalue") {
75-
msnid$msmsScore <- -log10(msnid$Expectation)
76-
}
77-
if (filtering_criterion == "pp_prob") {
78-
msnid$msmsScore <- msnid$`PeptideProphet Probability`
79-
}
80-
# Create columns for peptide filtering
81-
# Can not use data.table syntax if the msnid has been modified at all,
82-
# as it results in the "Invalid .internal.selfref" warning and
83-
# columns not being created.
84-
msnid$absParentMassErrorPPM <- abs(mass_measurement_error(msnid))
85-
86-
# Add filter criteria columns
87-
keep_cols <- c(keep_cols, "msmsScore", "absParentMassErrorPPM")
88-
msnid_small@psms <- unique(msnid@psms[, keep_cols, with = FALSE])
89-
90-
# Create filter object
91-
filtObj <- MSnIDFilter(msnid_small)
92-
filtObj$absParentMassErrorPPM <- list(comparison = "<", threshold = 10)
93-
if (filtering_criterion == "evalue") {
94-
filtObj$msmsScore <- list(comparison = ">", threshold = 2)
95-
}
96-
if (filtering_criterion == "pp_prob") {
97-
filtObj$msmsScore <- list(comparison = ">", threshold = 0.99)
98-
}
99-
method <- "Nelder-Mead"
72+
#Choose filter object probability value
73+
if (filtering_criterion == "evalue") {
74+
msnid$msmsScore <- -log10(msnid$Expectation)
75+
}
76+
77+
if (filtering_criterion == "pp_prob") {
78+
msnid$msmsScore <- msnid$`PeptideProphet Probability`
79+
}
80+
81+
# Create columns for peptide filtering
82+
# Can not use data.table syntax if the msnid has been modified at all,
83+
# as it results in the "Invalid .internal.selfref" warning and
84+
# columns not being created.
85+
msnid$absParentMassErrorPPM <- abs(mass_measurement_error(msnid))
86+
87+
# Add filter criteria columns
88+
keep_cols <- c(keep_cols, "msmsScore", "absParentMassErrorPPM")
89+
msnid_small@psms <- unique(msnid@psms[, keep_cols, with = FALSE])
90+
91+
# Create filter object
92+
filtObj <- MSnIDFilter(msnid_small)
93+
filtObj$absParentMassErrorPPM <- list(comparison = "<", threshold = 10)
94+
95+
if (filtering_criterion == "evalue") {
96+
filtObj$msmsScore <- list(comparison = ">", threshold = 2)
97+
}
98+
99+
if (filtering_criterion == "pp_prob") {
100+
filtObj$msmsScore <- list(comparison = ">", threshold = 0.99)
101+
}
102+
103+
method <- "Nelder-Mead"
100104
}
101-
102105

103106
# step 1
104107
filtObj.grid <- optimize_filter(filtObj,
@@ -107,12 +110,17 @@ filter_msfragger_data <- function(msnid,
107110
method="Grid",
108111
level=level,
109112
n.iter=n.iter.grid)
113+
110114
# step 2
111115
filtObj.nm <- optimize_filter(filtObj.grid,
112116
msnid_small,
113117
fdr.max=fdr.max,
114118
method=method,
115119
level=level,
116120
n.iter=n.iter.nm)
117-
return(apply_filter(msnid, filtObj.nm))
121+
122+
msnid <- apply_filter(msnid, filtObj.nm)
123+
124+
return(msnid)
118125
}
126+

0 commit comments

Comments
 (0)