Skip to content

Commit 66572fd

Browse files
committed
Merge branch 'fix-51-premature-na' into main
2 parents b91eb12 + 1ce1147 commit 66572fd

8 files changed

+73
-18
lines changed

DESCRIPTION

+1-1
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
Package: maldipickr
22
Title: Dereplicate and Cherry-Pick Mass Spectrometry Spectra
3-
Version: 1.3.2
3+
Version: 1.3.1.9000
44
Authors@R: c(
55
person("Charlie", "Pauvert", , "cpauvert@ukaachen.de", role = c("aut", "cre", "cph"),
66
comment = c(ORCID = "0000-0001-9832-2507")),

NEWS.md

+6
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,9 @@
1+
# maldipickr (development version)
2+
3+
## Fixed
4+
5+
* Fix unwanted conversion to NAs when sample names were well numbers like E1 or E2 (#51, thanks for spotting this @sarah-lital)
6+
17
# maldipickr 1.3.2
28

39
## Fixed

R/read_biotyper_report.R

+14-3
Original file line numberDiff line numberDiff line change
@@ -45,7 +45,7 @@
4545
#' @param long_format A logical indicating whether the table is in the long format (many rows) or wide format (many columns) when showing all the hits. This option has no effect when `best_hits = TRUE`.
4646
#'
4747
#' @return
48-
#' A tibble of 7 columns (`best_hits = TRUE`) or 52 columns (`best_hits = FALSE`). See Details for the description of the columns.
48+
#' A tibble of 8 columns (`best_hits = TRUE`) or 52 columns (`best_hits = FALSE`). See Details for the description of the columns.
4949
#'
5050
#' @seealso [read_many_biotyper_reports]
5151
#'
@@ -77,14 +77,25 @@ read_biotyper_report <- function(path, best_hits = TRUE, long_format = TRUE) {
7777
path,
7878
col.names = c("name", "sample_name", prep_names$col_names),
7979
sep = ";", header = FALSE,
80-
na = c("NA", "E1", "E2", "") # Added E1 identification in taxid as NA
80+
na = c("NA", "") # Added E1 identification in taxid as NA
8181
)
8282
no_peak_lgl <- breport$bruker_01_species == "no peaks found"
8383

84+
85+
# E1 and E2 converted to NA is an issue if this are sample names (#51)
86+
# so using a more surgical replacement, but na_if cannot use vectors
87+
sanitize_taxid <- function(vec_taxid, vec_to_convert){
88+
pattern <- paste0(vec_to_convert, collapse = "|")
89+
base::gsub(pattern, "replaceNA", vec_taxid) %>%
90+
dplyr::na_if("replaceNA") %>% as.numeric()
91+
}
8492
# Remove the spot name for which no peaks were detected, and warn the user
8593
breport <- tibble::as_tibble(breport) %>%
8694
# Empty sample_name are considered logical and this is undesirable
87-
dplyr::mutate("sample_name" = as.character(.data$sample_name)) %>%
95+
dplyr::mutate("sample_name" = as.character(.data$sample_name)) %>%
96+
dplyr::mutate(
97+
dplyr::across(tidyselect::contains("taxid"), ~ sanitize_taxid(.x, c("E1","E2")))
98+
) %>%
8899
dplyr::filter(.data$bruker_01_species != "no peaks found")
89100
if (sum(no_peak_lgl) > 0) {
90101
warning(

dev/import-data.Rmd

+27-4
Original file line numberDiff line numberDiff line change
@@ -110,7 +110,7 @@ After inflating the template
110110
#' @param long_format A logical indicating whether the table is in the long format (many rows) or wide format (many columns) when showing all the hits. This option has no effect when `best_hits = TRUE`.
111111
#'
112112
#' @return
113-
#' A tibble of 7 columns (`best_hits = TRUE`) or 52 columns (`best_hits = FALSE`). See Details for the description of the columns.
113+
#' A tibble of 8 columns (`best_hits = TRUE`) or 52 columns (`best_hits = FALSE`). See Details for the description of the columns.
114114
#'
115115
#' @seealso [read_many_biotyper_reports]
116116
#'
@@ -136,14 +136,25 @@ read_biotyper_report <- function(path, best_hits = TRUE, long_format = TRUE) {
136136
path,
137137
col.names = c("name", "sample_name", prep_names$col_names),
138138
sep = ";", header = FALSE,
139-
na = c("NA", "E1", "E2", "") # Added E1 identification in taxid as NA
139+
na = c("NA", "") # Added E1 identification in taxid as NA
140140
)
141141
no_peak_lgl <- breport$bruker_01_species == "no peaks found"
142142
143+
144+
# E1 and E2 converted to NA is an issue if this are sample names (#51)
145+
# so using a more surgical replacement, but na_if cannot use vectors
146+
sanitize_taxid <- function(vec_taxid, vec_to_convert){
147+
pattern <- paste0(vec_to_convert, collapse = "|")
148+
base::gsub(pattern, "replaceNA", vec_taxid) %>%
149+
dplyr::na_if("replaceNA") %>% as.numeric()
150+
}
143151
# Remove the spot name for which no peaks were detected, and warn the user
144152
breport <- tibble::as_tibble(breport) %>%
145153
# Empty sample_name are considered logical and this is undesirable
146-
dplyr::mutate("sample_name" = as.character(.data$sample_name)) %>%
154+
dplyr::mutate("sample_name" = as.character(.data$sample_name)) %>%
155+
dplyr::mutate(
156+
dplyr::across(tidyselect::contains("taxid"), ~ sanitize_taxid(.x, c("E1","E2")))
157+
) %>%
147158
dplyr::filter(.data$bruker_01_species != "no peaks found")
148159
if (sum(no_peak_lgl) > 0) {
149160
warning(
@@ -280,7 +291,7 @@ After inflating the template
280291
# Test with a correct and empty datasets in "inst/"
281292
biotyper <- system.file("biotyper.csv", package = "maldipickr")
282293
biotyper_empty <- system.file("biotyper_empty.csv", package = "maldipickr")
283-
294+
biotyper_fixNA_51 <- system.file("biotyper_fixNA_51.csv", package = "maldipickr")
284295
# Apply test on my function
285296
test_that("read_biotyper_report works properly with correct dataset and best hits", {
286297
expect_equal(
@@ -324,6 +335,18 @@ test_that("read_biotyper_report is empty when no peaks are found", {
324335
nrow(out), 0
325336
)
326337
})
338+
test_that("read_biotyper_report works properly when sample can be named E1, E2 which used to be NA values", {
339+
expect_equal(
340+
nrow(read_biotyper_report(biotyper_fixNA_51)), 2
341+
)
342+
expect_equal(
343+
ncol(read_biotyper_report(biotyper_fixNA_51)), 8
344+
)
345+
expect_equal(
346+
dplyr::pull(read_biotyper_report(biotyper_fixNA_51), bruker_taxid),
347+
c(1351, NA)
348+
)
349+
})
327350
```
328351

329352
## Importing multiple reports

inst/biotyper_fixNA_51.csv

+2
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,2 @@
1+
E1;;+++;Enterococcus faecalis;1351;0ae85b21-2783-4fe6-9c04-96877702d15a;2.24;+++;1_Enterococcus faecalis;145093567;3d377ee2-e73f-433a-8246-9cc0ae324d03;2.21;+++;Enterococcus faecalis;1351;0bba480e-241f-4e83-b6f5-58e80a06389e;2.21;+++;Enterococcus faecalis;1351;a4ab8bfd-4128-4b50-9270-292f99bdad8d;2.13;+++;Enterococcus faecalis;1351;1cc3022d-bf5d-422d-aa5a-c1c5889e84de;2.08;+++;Enterococcus faecalis;1351;4cea4049-e540-44da-a3c7-28a257f47036;2.03;+++;Enterococcus faecalis;1351;f3aaad09-d984-4b9c-b55c-f2437ed1217d;2.00;+;Enterococcus faecalis;1351;46d48674-c407-48fe-b85b-300977b0558f;1.91;+;Enterococcus faecalis;1351;a82c73c8-b1b2-4bda-bb0d-2cae2de47f5c;1.90;+;Enterococcus faecalis;1351;ba0bb1df-5786-450a-ac91-fe4096b1a3e1;1.80
2+
E2;;-;not reliable identification;E1;3f402e73-bcef-40b1-9014-8a28878f12c5;1.62;-;not reliable identification;E1;631e1d31-81bd-4f97-b16b-195a5e43cfa9;1.40;-;not reliable identification;E1;13a7f9d9-6248-415b-bb12-ca4fe9082a8c;1.39;-;not reliable identification;E1;beca17ff-a591-440f-8f9e-7ef5b9c3d1ce;1.38;-;not reliable identification;E1;4a752505-cecf-4f86-96bf-5dc9de266913;1.37;-;not reliable identification;E1;689fb704-a4f4-46d6-92c7-0e5936a639a5;1.34;-;not reliable identification;E1;f2cfe654-ff28-4488-b0e6-d60e062cb065;1.34;-;not reliable identification;E1;bd6f9303-3d16-4cd8-8fa9-98aefe46db9f;1.33;-;not reliable identification;E1;79b6a458-3dc6-45a2-a8b6-acc0e5e8c730;1.31;-;not reliable identification;E1;28e961b0-791d-487c-8e95-76833bf55e44;1.31

man/read_biotyper_report.Rd

+1-1
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

renv.lock

+9-8
Original file line numberDiff line numberDiff line change
@@ -712,7 +712,7 @@
712712
},
713713
"knitr": {
714714
"Package": "knitr",
715-
"Version": "1.47",
715+
"Version": "1.49",
716716
"Source": "Repository",
717717
"Repository": "RSPM",
718718
"Requirements": [
@@ -724,7 +724,7 @@
724724
"xfun",
725725
"yaml"
726726
],
727-
"Hash": "7c99b2d55584b982717fcc0950378612"
727+
"Hash": "9fcb189926d93c636dea94fbe4f44480"
728728
},
729729
"later": {
730730
"Package": "later",
@@ -1138,14 +1138,14 @@
11381138
},
11391139
"rlang": {
11401140
"Package": "rlang",
1141-
"Version": "1.1.4",
1141+
"Version": "1.1.5",
11421142
"Source": "Repository",
11431143
"Repository": "RSPM",
11441144
"Requirements": [
11451145
"R",
11461146
"utils"
11471147
],
1148-
"Hash": "3eec01f8b1dee337674b2e34ab1f9bc1"
1148+
"Hash": "724dcc1490cd7071ee75ca2994a5446e"
11491149
},
11501150
"rmarkdown": {
11511151
"Package": "rmarkdown",
@@ -1172,7 +1172,7 @@
11721172
},
11731173
"roxygen2": {
11741174
"Package": "roxygen2",
1175-
"Version": "7.2.3",
1175+
"Version": "7.3.2",
11761176
"Source": "Repository",
11771177
"Repository": "RSPM",
11781178
"Requirements": [
@@ -1194,7 +1194,7 @@
11941194
"withr",
11951195
"xml2"
11961196
],
1197-
"Hash": "7b153c746193b143c14baa072bae4e27"
1197+
"Hash": "6ee25f9054a70f44d615300ed531ba8d"
11981198
},
11991199
"rprojroot": {
12001200
"Package": "rprojroot",
@@ -1611,15 +1611,16 @@
16111611
},
16121612
"xfun": {
16131613
"Package": "xfun",
1614-
"Version": "0.44",
1614+
"Version": "0.51",
16151615
"Source": "Repository",
16161616
"Repository": "RSPM",
16171617
"Requirements": [
1618+
"R",
16181619
"grDevices",
16191620
"stats",
16201621
"tools"
16211622
],
1622-
"Hash": "317a0538d32f4a009658bcedb7923f4b"
1623+
"Hash": "e1a3c06389a46d065c18bd4bbc27c64c"
16231624
},
16241625
"xml2": {
16251626
"Package": "xml2",

tests/testthat/test-read_biotyper_report.R

+13-1
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,7 @@
77
# Test with a correct and empty datasets in "inst/"
88
biotyper <- system.file("biotyper.csv", package = "maldipickr")
99
biotyper_empty <- system.file("biotyper_empty.csv", package = "maldipickr")
10-
10+
biotyper_fixNA_51 <- system.file("biotyper_fixNA_51.csv", package = "maldipickr")
1111
# Apply test on my function
1212
test_that("read_biotyper_report works properly with correct dataset and best hits", {
1313
expect_equal(
@@ -51,3 +51,15 @@ test_that("read_biotyper_report is empty when no peaks are found", {
5151
nrow(out), 0
5252
)
5353
})
54+
test_that("read_biotyper_report works properly when sample can be named E1, E2 which used to be NA values", {
55+
expect_equal(
56+
nrow(read_biotyper_report(biotyper_fixNA_51)), 2
57+
)
58+
expect_equal(
59+
ncol(read_biotyper_report(biotyper_fixNA_51)), 8
60+
)
61+
expect_equal(
62+
dplyr::pull(read_biotyper_report(biotyper_fixNA_51), bruker_taxid),
63+
c(1351, NA)
64+
)
65+
})

0 commit comments

Comments
 (0)