1
- # ' Filtering msfragger Data
1
+ # ' Filtering MSFragger Data
2
+ # '
3
+ # ' Filtering MSFragger data. In this implementation, the peptide-level filter
4
+ # ' optimizes both ppm and one of Expectation or PeptideProphet Probability
5
+ # ' thresholds to achieve maximum number of peptide identifications within a
6
+ # ' given FDR constraint.
2
7
# '
3
- # ' Filtering msfragger data. In this implementation, the peptide-level filter
4
- # ' optimizes both ppm and one of Expectation or PeptideProphet Probability thresholds
5
- # ' to achieve maximum number of peptide identifications within a given FDR constraint.
6
8
# ' The accession-level filter optimizes based on `peptides_per_1000aa`, so
7
9
# ' \code{\link{compute_num_peptides_per_1000aa}} must be used first.
8
10
# '
9
11
# ' @md
10
12
# '
11
- # ' @param msnid (MSnID object) collated msfragger output
13
+ # ' @param msnid (MSnID object) collated MSFragger output
12
14
# ' @param fdr.max (numeric) Maximum acceptable FDR. Default is 0.01 (1%).
13
15
# ' @param level (character) Level at which to perform FDR filter. The name of a
14
16
# ' column in `psms(msnid)`. Currently, only `"peptide"` or `"accession"` are
15
- # ' supported. The added level `SiteID` makes sense only for PTM data and
17
+ # ' supported. The added level `" SiteID" ` makes sense only for PTM data and
16
18
# ' first requires mapping of the modification site using
17
- # ' `MSnID::map_mod_sites`.
18
- # ' @param filtering_criterion (character) One of "evalue" which is
19
- # ' expectation value or "pp_prob" - peptide prophet probability. Default is
20
- # ' "pp_prob".
19
+ # ' `MSnID::map_mod_sites`.
20
+ # ' @param filtering_criterion (character) One of `"evalue"` which is expectation
21
+ # ' value or `"pp_prob"` - peptide prophet probability. Default is "pp_prob".
21
22
# ' @param n.iter.grid (numeric) number of grid-distributed evaluation points.
22
23
# ' @param n.iter.nm (numeric) number of iterations for Nelder-Mead optimization
23
24
# ' algorithm.
24
- # ' @param ... arguments passed to `filter_msfragger_data`.
25
25
# '
26
- # ' @return (MSnID object) filtered msfragger output
26
+ # ' @return (MSnID object) filtered MSFragger output
27
27
# '
28
- # ' @seealso
29
- # ' \code{\link[MSnID]{MSnIDFilter}}
30
- # ' \code{\link[MSnID]{optimize_filter}}
31
- # ' \code{\link[MSnID]{apply_filter}}
28
+ # ' @seealso \code{\link[MSnID]{MSnIDFilter}}
29
+ # ' \code{\link[MSnID]{optimize_filter}} \code{\link[MSnID]{apply_filter}}
32
30
# '
33
- # ' @importFrom MSnID MSnIDFilter optimize_filter
34
- # ' mass_measurement_error apply_filter
31
+ # ' @importFrom MSnID MSnIDFilter optimize_filter mass_measurement_error
32
+ # ' apply_filter
35
33
36
34
37
35
# ' @export
38
36
filter_msfragger_data <- function (msnid ,
39
- level ,
40
- filtering_criterion = c(" pp_prob" ," evalue" ),
41
- fdr.max = 0.01 ,
42
- n.iter.grid = 500 ,
43
- n.iter.nm = 100 ){
37
+ level ,
38
+ filtering_criterion = c(" pp_prob" , " evalue" ),
39
+ fdr.max = 0.01 ,
40
+ n.iter.grid = 500 ,
41
+ n.iter.nm = 100 ){
44
42
45
43
# Clean up on exit
46
44
on.exit(rm(list = ls()))
@@ -49,18 +47,18 @@ filter_msfragger_data <- function(msnid,
49
47
# Check input
50
48
level <- match.arg(level , choices = c(" peptide" , " accession" , " SiteID" ))
51
49
filtering_criterion <- match.arg(filtering_criterion )
52
-
53
- if (level == " SiteID" & ! (" SiteID" %in% names(msnid )))
54
- stop(" Column 'SiteID' is not in the MSnID object. Please map the PTMs first." )
50
+
51
+ if (level == " SiteID" & ! (" SiteID" %in% names(msnid ))) {
52
+ stop(" Column 'SiteID' is not in the MSnID object. Please map the PTMs first." )
53
+ }
55
54
56
55
keep_cols <- c(level , " isDecoy" ) # columns to calculate FDR
57
56
58
57
# Create MSnID of minimum size
59
58
suppressMessages(msnid_small <- MSnID())
60
-
59
+
61
60
# Setup
62
61
if (level == " accession" ) {
63
-
64
62
# Add filter criteria column
65
63
keep_cols <- c(keep_cols , " peptides_per_1000aa" )
66
64
msnid_small @ psms <- unique(msnid @ psms [, keep_cols , with = FALSE ])
@@ -69,36 +67,41 @@ filter_msfragger_data <- function(msnid,
69
67
filtObj <- MSnIDFilter(msnid_small )
70
68
filtObj $ peptides_per_1000aa <- list (comparison = " >" , threshold = 1 )
71
69
method <- " SANN"
70
+
72
71
} else {
73
- # Choose filter object probability value
74
- if (filtering_criterion == " evalue" ) {
75
- msnid $ msmsScore <- - log10(msnid $ Expectation )
76
- }
77
- if (filtering_criterion == " pp_prob" ) {
78
- msnid $ msmsScore <- msnid $ `PeptideProphet Probability`
79
- }
80
- # Create columns for peptide filtering
81
- # Can not use data.table syntax if the msnid has been modified at all,
82
- # as it results in the "Invalid .internal.selfref" warning and
83
- # columns not being created.
84
- msnid $ absParentMassErrorPPM <- abs(mass_measurement_error(msnid ))
85
-
86
- # Add filter criteria columns
87
- keep_cols <- c(keep_cols , " msmsScore" , " absParentMassErrorPPM" )
88
- msnid_small @ psms <- unique(msnid @ psms [, keep_cols , with = FALSE ])
89
-
90
- # Create filter object
91
- filtObj <- MSnIDFilter(msnid_small )
92
- filtObj $ absParentMassErrorPPM <- list (comparison = " <" , threshold = 10 )
93
- if (filtering_criterion == " evalue" ) {
94
- filtObj $ msmsScore <- list (comparison = " >" , threshold = 2 )
95
- }
96
- if (filtering_criterion == " pp_prob" ) {
97
- filtObj $ msmsScore <- list (comparison = " >" , threshold = 0.99 )
98
- }
99
- method <- " Nelder-Mead"
72
+ # Choose filter object probability value
73
+ if (filtering_criterion == " evalue" ) {
74
+ msnid $ msmsScore <- - log10(msnid $ Expectation )
75
+ }
76
+
77
+ if (filtering_criterion == " pp_prob" ) {
78
+ msnid $ msmsScore <- msnid $ `PeptideProphet Probability`
79
+ }
80
+
81
+ # Create columns for peptide filtering
82
+ # Can not use data.table syntax if the msnid has been modified at all,
83
+ # as it results in the "Invalid .internal.selfref" warning and
84
+ # columns not being created.
85
+ msnid $ absParentMassErrorPPM <- abs(mass_measurement_error(msnid ))
86
+
87
+ # Add filter criteria columns
88
+ keep_cols <- c(keep_cols , " msmsScore" , " absParentMassErrorPPM" )
89
+ msnid_small @ psms <- unique(msnid @ psms [, keep_cols , with = FALSE ])
90
+
91
+ # Create filter object
92
+ filtObj <- MSnIDFilter(msnid_small )
93
+ filtObj $ absParentMassErrorPPM <- list (comparison = " <" , threshold = 10 )
94
+
95
+ if (filtering_criterion == " evalue" ) {
96
+ filtObj $ msmsScore <- list (comparison = " >" , threshold = 2 )
97
+ }
98
+
99
+ if (filtering_criterion == " pp_prob" ) {
100
+ filtObj $ msmsScore <- list (comparison = " >" , threshold = 0.99 )
101
+ }
102
+
103
+ method <- " Nelder-Mead"
100
104
}
101
-
102
105
103
106
# step 1
104
107
filtObj.grid <- optimize_filter(filtObj ,
@@ -107,12 +110,17 @@ filter_msfragger_data <- function(msnid,
107
110
method = " Grid" ,
108
111
level = level ,
109
112
n.iter = n.iter.grid )
113
+
110
114
# step 2
111
115
filtObj.nm <- optimize_filter(filtObj.grid ,
112
116
msnid_small ,
113
117
fdr.max = fdr.max ,
114
118
method = method ,
115
119
level = level ,
116
120
n.iter = n.iter.nm )
117
- return (apply_filter(msnid , filtObj.nm ))
121
+
122
+ msnid <- apply_filter(msnid , filtObj.nm )
123
+
124
+ return (msnid )
118
125
}
126
+
0 commit comments