diff --git a/.Rbuildignore b/.Rbuildignore
index abbad15..d887687 100644
--- a/.Rbuildignore
+++ b/.Rbuildignore
@@ -9,3 +9,5 @@
^CRAN-RELEASE$
^cran-comments.md$
^\.github$
+^.*\.Rproj$
+^\.Rproj\.user$
diff --git a/.gitignore b/.gitignore
index 1396e56..4ae3cb1 100644
--- a/.gitignore
+++ b/.gitignore
@@ -3,3 +3,5 @@
*.o
*.rds
CRAN-RELEASE
+.Rproj.user
+.Rproj
diff --git a/DESCRIPTION b/DESCRIPTION
index f619a64..cc2068c 100644
--- a/DESCRIPTION
+++ b/DESCRIPTION
@@ -1,12 +1,27 @@
Package: MALDIquant
-Version: 1.20
+Version: 1.20.0.9999
Date: 2021-07-29
Title: Quantitative Analysis of Mass Spectrometry Data
-Authors@R: c(person("Sebastian", "Gibb", role=c("aut", "cre"),
+Authors@R: c(
+ person(
+ given = "Sebastian", family = "Gibb",
email="mail@sebastiangibb.de",
- comment=c(ORCID="0000-0001-7406-4443")), person("Korbinian",
- "Strimmer", role="ths",
- comment=c(ORCID="0000-0001-7917-2056")))
+ comment=c(ORCID="0000-0001-7406-4443"),
+ role = c("aut", "cre")
+ ),
+ person(
+ given = "Korbinian", family = "Strimmer",
+ comment=c(ORCID="0000-0001-7917-2056"),
+ role="ths"
+ ),
+ person(given = "Sigurdur", family = "Smarason", role = "ctb"),
+ person(
+ given = "Laurent", family = "Gatto",
+ email = "laurent.gatto@uclouvain.be",
+ comment = c(ORCID = "0000-0002-1520-2268"),
+ role = "ctb"
+ ),
+ person(given = "Paolo", "family = Inglese", role = "ctb"))
Depends: R (>= 4.0.0), methods
Imports: parallel
Suggests: knitr, testthat (>= 0.8)
@@ -26,4 +41,4 @@ URL: https://www.strimmerlab.org/software/maldiquant/
BugReports: https://github.com/sgibb/MALDIquant/issues/
LazyLoad: yes
VignetteBuilder: knitr
-RoxygenNote: 7.1.1
+RoxygenNote: 7.1.2
diff --git a/NEWS b/NEWS
index 7379c90..a9c3a71 100644
--- a/NEWS
+++ b/NEWS
@@ -1,6 +1,23 @@
RELEASE HISTORY OF THE "MALDIquant" PACKAGE
===========================================
+CHANGES IN MALDIquant VERSION 1.20.0.9999 [unreleased]:
+-------------------------------------------------------
+
+MODIFICATIONS
+
+* Reduce memory requirement for `filterPeaks`, especially for very sparse peak
+ lists by rewriting `filterPeaks` and `.as.matrix.MassObjectList` to use a
+ `list` internally instead of a `matrix`; see #71.
+ Contributed by Paolo Inglese
(@paoloinglese).
+
+INTERNAL CHANGES
+
+* Remove `is.null(getGeneric(...))` tests before setting generics for
+ S4 methods to avoid errors in package loading (especially with
+ `pkgload::load_all()`).
+
+
CHANGES IN MALDIquant VERSION 1.20 [2021-07-29]:
------------------------------------------------
diff --git a/R/AllGenerics.R b/R/AllGenerics.R
index 8958a47..1020ec6 100644
--- a/R/AllGenerics.R
+++ b/R/AllGenerics.R
@@ -1,123 +1,79 @@
## AbstractMassObject
-if (is.null(getGeneric("plotMsiSlice"))) {
- setGeneric("plotMsiSlice", function(x, ...) standardGeneric("plotMsiSlice"))
-}
-if (is.null(getGeneric(".prepareShow"))) {
- setGeneric(".prepareShow", function(object) standardGeneric(".prepareShow"))
-}
-if (is.null(getGeneric("transformIntensity"))) {
- setGeneric("transformIntensity",
- function(object, ...) standardGeneric("transformIntensity"))
-}
-if (is.null(getGeneric(".transformIntensity"))) {
- setGeneric(".transformIntensity",
- function(object, ...) standardGeneric(".transformIntensity"))
-}
-if (is.null(getGeneric("trim"))) {
- setGeneric("trim", function(object, range, ...) standardGeneric("trim"))
-}
+setGeneric("plotMsiSlice", function(x, ...) standardGeneric("plotMsiSlice"))
+setGeneric(".prepareShow", function(object) standardGeneric(".prepareShow"))
+setGeneric(
+ "transformIntensity",
+ function(object, ...) standardGeneric("transformIntensity")
+)
+setGeneric(
+ ".transformIntensity",
+ function(object, ...) standardGeneric(".transformIntensity")
+)
+setGeneric("trim", function(object, range, ...) standardGeneric("trim"))
## get/set slots
-if (is.null(getGeneric("mass"))) {
- setGeneric("mass", function(object, ...) standardGeneric("mass"))
-}
-if (is.null(getGeneric("mass<-"))) {
- setGeneric("mass<-", function(object, value) standardGeneric("mass<-"))
-}
+setGeneric("mass", function(object, ...) standardGeneric("mass"))
+setGeneric("mass<-", function(object, value) standardGeneric("mass<-"))
+
# from ProtGenerics (same as mass)
-if (is.null(getGeneric("mz"))) {
- setGeneric("mz", function(object, ...) standardGeneric("mz"))
-}
-if (is.null(getGeneric("mz<-"))) {
- setGeneric("mz<-", function(object, value) standardGeneric("mz<-"))
-}
-if (is.null(getGeneric("intensity"))) {
- setGeneric("intensity", function(object, ...) standardGeneric("intensity"))
-}
-if (is.null(getGeneric("intensity<-"))) {
- setGeneric("intensity<-",
- function(object, value) standardGeneric("intensity<-"))
-}
-if (is.null(getGeneric("isEmpty"))) {
- setGeneric("isEmpty", function(x) standardGeneric("isEmpty"))
-}
-if (is.null(getGeneric(".isEmptyWarning"))) {
- setGeneric(".isEmptyWarning", function(x) standardGeneric(".isEmptyWarning"))
-}
-if (is.null(getGeneric("metaData"))) {
- setGeneric("metaData", function(object) standardGeneric("metaData"))
-}
-if (is.null(getGeneric("metaData<-"))) {
- setGeneric("metaData<-",
- function(object, value) standardGeneric("metaData<-"))
-}
-if (is.null(getGeneric("coordinates"))) {
- setGeneric("coordinates",
- function(object, ...) standardGeneric("coordinates"))
-}
-if (is.null(getGeneric("coordinates<-"))) {
- setGeneric("coordinates<-",
- function(object, value) standardGeneric("coordinates<-"))
-}
+setGeneric("mz", function(object, ...) standardGeneric("mz"))
+setGeneric("mz<-", function(object, value) standardGeneric("mz<-"))
+setGeneric("intensity", function(object, ...) standardGeneric("intensity"))
+setGeneric(
+ "intensity<-",
+ function(object, value) standardGeneric("intensity<-")
+)
+setGeneric("isEmpty", function(x) standardGeneric("isEmpty"))
+setGeneric(".isEmptyWarning", function(x) standardGeneric(".isEmptyWarning"))
+setGeneric("metaData", function(object) standardGeneric("metaData"))
+setGeneric("metaData<-", function(object, value) standardGeneric("metaData<-"))
+setGeneric("coordinates", function(object, ...) standardGeneric("coordinates"))
+setGeneric(
+ "coordinates<-",
+ function(object, value) standardGeneric("coordinates<-")
+)
## end of AbstractMassObject
## MassSpectrum
-if (is.null(getGeneric("calibrateIntensity"))) {
- setGeneric("calibrateIntensity",
- function(object, ...) standardGeneric("calibrateIntensity"))
-}
-if (is.null(getGeneric("detectPeaks"))) {
- setGeneric("detectPeaks",
- function(object, ...) standardGeneric("detectPeaks"))
-}
-if (is.null(getGeneric("estimateBaseline"))) {
- setGeneric("estimateBaseline",
- function(object, method=c("SNIP", "ConvexHull", "Median"), ...)
- standardGeneric("estimateBaseline"))
-}
-if (is.null(getGeneric("estimateNoise"))) {
- setGeneric("estimateNoise",
- function(object, ...) standardGeneric("estimateNoise"))
-}
-if (is.null(getGeneric(".findLocalMaxima"))) {
- setGeneric(".findLocalMaxima",
- function(object, halfWindowSize=20L)
- standardGeneric(".findLocalMaxima"))
-}
-if (is.null(getGeneric(".findLocalMaximaLogical"))) {
- setGeneric(".findLocalMaximaLogical",
- function(object, halfWindowSize=20L)
- standardGeneric(".findLocalMaximaLogical"))
-}
-if (is.null(getGeneric("isRegular"))) {
- setGeneric("isRegular",
- function(object, ...) standardGeneric("isRegular"))
-}
-if (is.null(getGeneric("removeBaseline"))) {
- setGeneric("removeBaseline",
- function(object, ...) standardGeneric("removeBaseline"))
-}
-if (is.null(getGeneric("smoothIntensity"))) {
- setGeneric("smoothIntensity",
- function(object, ...)
- standardGeneric("smoothIntensity"))
-}
-if (is.null(getGeneric("totalIonCurrent"))) {
- setGeneric("totalIonCurrent",
- function(object) standardGeneric("totalIonCurrent"))
-}
+setGeneric(
+ "calibrateIntensity",
+ function(object, ...) standardGeneric("calibrateIntensity")
+)
+setGeneric("detectPeaks", function(object, ...) standardGeneric("detectPeaks"))
+setGeneric(
+ "estimateBaseline",
+ function(object, method=c("SNIP", "ConvexHull", "Median"), ...)
+ standardGeneric("estimateBaseline"))
+setGeneric(
+ "estimateNoise", function(object, ...) standardGeneric("estimateNoise")
+)
+setGeneric(
+ ".findLocalMaxima",
+ function(object, halfWindowSize=20L) standardGeneric(".findLocalMaxima")
+)
+setGeneric(
+ ".findLocalMaximaLogical",
+ function(object, halfWindowSize=20L)
+ standardGeneric(".findLocalMaximaLogical")
+)
+setGeneric("isRegular", function(object, ...) standardGeneric("isRegular"))
+setGeneric(
+ "removeBaseline", function(object, ...) standardGeneric("removeBaseline")
+)
+setGeneric(
+ "smoothIntensity", function(object, ...) standardGeneric("smoothIntensity")
+)
+setGeneric(
+ "totalIonCurrent", function(object) standardGeneric("totalIonCurrent")
+)
## end of MassSpectrum
## MassPeaks
-if (is.null(getGeneric("labelPeaks"))) {
- setGeneric("labelPeaks", function(object, ...) standardGeneric("labelPeaks"))
-}
-if (is.null(getGeneric("monoisotopicPeaks"))) {
- setGeneric("monoisotopicPeaks",
- function(object, ...) standardGeneric("monoisotopicPeaks"))
-}
-if (is.null(getGeneric("snr"))) {
- setGeneric("snr", function(object) standardGeneric("snr"))
-}
+setGeneric("labelPeaks", function(object, ...) standardGeneric("labelPeaks"))
+setGeneric(
+ "monoisotopicPeaks",
+ function(object, ...) standardGeneric("monoisotopicPeaks")
+)
+setGeneric("snr", function(object) standardGeneric("snr"))
## end of MassPeaks
diff --git a/R/as.list-functions.R b/R/as.list-functions.R
new file mode 100644
index 0000000..2f7b3f5
--- /dev/null
+++ b/R/as.list-functions.R
@@ -0,0 +1,22 @@
+## .as.occurrence.list
+## internal function to create a list of peaks occurrence
+##
+## params:
+## l: list of AbstractMassObject objects
+##
+## returns:
+## a list, where sample is the sample id, i is the index of the uniqueMass,
+## and mass is the unique mass vector
+.as.occurrence.list.MassObjectList <- function(l) {
+ .stopIfNotIsMassObjectList(l)
+
+ mass <- .unlist(lapply(l, function(x)x@mass))
+ uniqueMass <- sort.int(unique(mass))
+ n <- lengths(l)
+
+ list(
+ sample = rep.int(seq_along(l), lengths(l)),
+ i = findInterval(mass, uniqueMass),
+ mass = uniqueMass
+ )
+}
diff --git a/R/as.matrix-functions.R b/R/as.matrix-functions.R
index e9472de..d9c4aef 100644
--- a/R/as.matrix-functions.R
+++ b/R/as.matrix-functions.R
@@ -8,21 +8,18 @@
## returns:
## a matrix
.as.matrix.MassObjectList <- function(l) {
- .stopIfNotIsMassObjectList(l)
+ .stopIfNotIsMassObjectList(l)
- mass <- .unlist(lapply(l, function(x)x@mass))
- intensity <- .unlist(lapply(l, function(x)x@intensity))
- uniqueMass <- sort.int(unique(mass))
- n <- lengths(l)
- r <- rep.int(seq_along(l), n)
+ intensity <- .unlist(lapply(l, function(x)x@intensity))
+ o <- .as.occurrence.list.MassObjectList(l)
- i <- findInterval(mass, uniqueMass)
-
- m <- matrix(NA_real_, nrow=length(l), ncol=length(uniqueMass),
- dimnames=list(NULL, uniqueMass))
- m[cbind(r, i)] <- intensity
- attr(m, "mass") <- uniqueMass
- m
+ m <- matrix(
+ NA_real_, nrow=length(l), ncol=length(o$mass),
+ dimnames=list(NULL, o$mass)
+ )
+ m[cbind(o$sample, o$i)] <- intensity
+ attr(m, "mass") <- o$mass
+ m
}
## .as.binary.matrix
@@ -34,10 +31,9 @@
## returns:
## a binary matrix
.as.binary.matrix <- function(m) {
- stopifnot(is.matrix(m))
- isNA <- which(is.na(m))
- m[] <- 1L
- m[isNA] <- 0L
- mode(m) <- "integer"
- m
+ if (!is.matrix(m))
+ stop("'x' has to be a matrix!")
+ m[] <- !is.na(m)
+ mode(m) <- "integer"
+ m
}
diff --git a/R/filterPeaks-functions.R b/R/filterPeaks-functions.R
index 9595939..06137ee 100644
--- a/R/filterPeaks-functions.R
+++ b/R/filterPeaks-functions.R
@@ -49,44 +49,44 @@ filterPeaks <- function(l, minFrequency, minNumber, labels,
## recycle arguments if needed
minFrequency <- rep_len(minFrequency, nl)
minNumber <- rep_len(minNumber, nl)
- mergeWhitelists <- mergeWhitelists[1]
+ mergeWhitelists <- isTRUE(mergeWhitelists)
- ## binary peak matrix (mask)
- m <- .as.binary.matrix(.as.matrix.MassObjectList(l))
+ ## use peaks occurrence list
+ o <- .as.occurrence.list.MassObjectList(l)
- ## whitelist
- w <- matrix(0L, nrow=nrow(m), ncol=ncol(m))
-
- ## group indices by labels
+ # group indices by labels
idx <- lapply(ll, function(x)which(labels == x))
## collect whitelists
+ w <- matrix(FALSE, nrow = nl, ncol = length(o$mass))
+
for (i in seq_along(idx)) {
- wl <- .whitelist(m, idx[[i]],
- minFrequency=minFrequency[i], minNumber=minNumber[i])
+ wl <- .whitelist(
+ o, idx[[i]], minFrequency = minFrequency[i], minNumber = minNumber[i]
+ )
+
if (sum(wl)) {
if (mergeWhitelists) {
## R uses columnwise recycling
w <- t(t(w) | wl)
} else {
## R uses columnwise recycling
- w[idx[[i]], ] <- t(t(w[idx[[i]], , drop=FALSE]) | wl)
+ w[i, ] <- t(t(w[i, , drop=FALSE]) | wl)
}
} else {
warning("Empty peak whitelist for level ", sQuote(ll[i]), ".")
}
- }
- ## apply whitelist
- w <- w & m
+ }
## turn matrix back into MassPeaks objects
- for (i in seq_along(l)) {
- j <- which(as.logical(m[i, ]))
- include <- which(w[i, j])
- l[[i]]@mass <- l[[i]]@mass[include]
- l[[i]]@intensity <- l[[i]]@intensity[include]
- l[[i]]@snr <- l[[i]]@snr[include]
+ for (i in seq_along(idx)) {
+ for (j in idx[[i]]) {
+ wmask <- w[i, o$i[o$sample == j]]
+ l[[j]]@mass <- l[[j]]@mass[wmask]
+ l[[j]]@intensity <- l[[j]]@intensity[wmask]
+ l[[j]]@snr <- l[[j]]@snr[wmask]
+ }
}
l
@@ -96,7 +96,7 @@ filterPeaks <- function(l, minFrequency, minNumber, labels,
## helper function to create whitelists for filtering
##
## params:
-## m: matrix
+## l: list of occurrences, generated by .as.occurrence.list.MassObjectList
## rows: index of rows which should filtered
## minFrequency: double, minimal frequency of a peak to be not removed
## minNumber: double, minimal (absolute) number of peaks to be not removed
@@ -104,8 +104,7 @@ filterPeaks <- function(l, minFrequency, minNumber, labels,
## returns:
## a logical vector representing the whitelist
##
-.whitelist <- function(m, rows, minFrequency, minNumber) {
-
+.whitelist <- function(l, rows, minFrequency, minNumber) {
## test arguments
if (is.na(minFrequency) && is.na(minNumber)) {
stop(sQuote(minFrequency), " or ", sQuote(minNumber),
@@ -129,7 +128,8 @@ filterPeaks <- function(l, minFrequency, minNumber, labels,
}
## calculate minimal number of peaks
- minPeakNumber <- max(minFrequency * length(rows), minNumber, na.rm=TRUE)
+ minPeakNumber <-
+ max(minFrequency * length(rows), minNumber, na.rm=TRUE)
- colSums(m[rows, , drop=FALSE]) >= minPeakNumber
+ tabulate(l$i[l$sample %in% rows], nbins = length(l$mass)) >= minPeakNumber
}
diff --git a/tests/testthat/test_as.list-functions.R b/tests/testthat/test_as.list-functions.R
new file mode 100644
index 0000000..7120c00
--- /dev/null
+++ b/tests/testthat/test_as.list-functions.R
@@ -0,0 +1,21 @@
+p <- list(createMassPeaks(mass=1:4, intensity=11:14),
+ createMassPeaks(mass=2:5, intensity=22:25))
+
+l <- list(
+ sample = rep(1:2, each = 4),
+ i = c(1:4, 2:5),
+ mass = 1:5
+)
+
+test_that(".as.occurrence.list.MassObjectList throws errors", {
+ expect_error(.as.occurrence.list.MassObjectList(p[[1]]),
+ "no list of MALDIquant::AbstractMassObject objects")
+ expect_error(.as.occurrence.list.MassObjectList(list()),
+ "no list of MALDIquant::AbstractMassObject objects")
+ expect_error(.as.occurrence.list.MassObjectList(list(p, l)),
+ "no list of MALDIquant::AbstractMassObject objects")
+})
+
+test_that(".as.occurrence.list.MassObjectList", {
+ expect_identical(.as.occurrence.list.MassObjectList(p), l)
+})