diff --git a/.Rbuildignore b/.Rbuildignore index abbad15..d887687 100644 --- a/.Rbuildignore +++ b/.Rbuildignore @@ -9,3 +9,5 @@ ^CRAN-RELEASE$ ^cran-comments.md$ ^\.github$ +^.*\.Rproj$ +^\.Rproj\.user$ diff --git a/.gitignore b/.gitignore index 1396e56..4ae3cb1 100644 --- a/.gitignore +++ b/.gitignore @@ -3,3 +3,5 @@ *.o *.rds CRAN-RELEASE +.Rproj.user +.Rproj diff --git a/DESCRIPTION b/DESCRIPTION index f619a64..cc2068c 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,12 +1,27 @@ Package: MALDIquant -Version: 1.20 +Version: 1.20.0.9999 Date: 2021-07-29 Title: Quantitative Analysis of Mass Spectrometry Data -Authors@R: c(person("Sebastian", "Gibb", role=c("aut", "cre"), +Authors@R: c( + person( + given = "Sebastian", family = "Gibb", email="mail@sebastiangibb.de", - comment=c(ORCID="0000-0001-7406-4443")), person("Korbinian", - "Strimmer", role="ths", - comment=c(ORCID="0000-0001-7917-2056"))) + comment=c(ORCID="0000-0001-7406-4443"), + role = c("aut", "cre") + ), + person( + given = "Korbinian", family = "Strimmer", + comment=c(ORCID="0000-0001-7917-2056"), + role="ths" + ), + person(given = "Sigurdur", family = "Smarason", role = "ctb"), + person( + given = "Laurent", family = "Gatto", + email = "laurent.gatto@uclouvain.be", + comment = c(ORCID = "0000-0002-1520-2268"), + role = "ctb" + ), + person(given = "Paolo", "family = Inglese", role = "ctb")) Depends: R (>= 4.0.0), methods Imports: parallel Suggests: knitr, testthat (>= 0.8) @@ -26,4 +41,4 @@ URL: https://www.strimmerlab.org/software/maldiquant/ BugReports: https://github.com/sgibb/MALDIquant/issues/ LazyLoad: yes VignetteBuilder: knitr -RoxygenNote: 7.1.1 +RoxygenNote: 7.1.2 diff --git a/NEWS b/NEWS index 7379c90..a9c3a71 100644 --- a/NEWS +++ b/NEWS @@ -1,6 +1,23 @@ RELEASE HISTORY OF THE "MALDIquant" PACKAGE =========================================== +CHANGES IN MALDIquant VERSION 1.20.0.9999 [unreleased]: +------------------------------------------------------- + +MODIFICATIONS + +* Reduce memory requirement for `filterPeaks`, especially for very sparse peak + lists by rewriting `filterPeaks` and `.as.matrix.MassObjectList` to use a + `list` internally instead of a `matrix`; see #71. + Contributed by Paolo Inglese (@paoloinglese). + +INTERNAL CHANGES + +* Remove `is.null(getGeneric(...))` tests before setting generics for + S4 methods to avoid errors in package loading (especially with + `pkgload::load_all()`). + + CHANGES IN MALDIquant VERSION 1.20 [2021-07-29]: ------------------------------------------------ diff --git a/R/AllGenerics.R b/R/AllGenerics.R index 8958a47..1020ec6 100644 --- a/R/AllGenerics.R +++ b/R/AllGenerics.R @@ -1,123 +1,79 @@ ## AbstractMassObject -if (is.null(getGeneric("plotMsiSlice"))) { - setGeneric("plotMsiSlice", function(x, ...) standardGeneric("plotMsiSlice")) -} -if (is.null(getGeneric(".prepareShow"))) { - setGeneric(".prepareShow", function(object) standardGeneric(".prepareShow")) -} -if (is.null(getGeneric("transformIntensity"))) { - setGeneric("transformIntensity", - function(object, ...) standardGeneric("transformIntensity")) -} -if (is.null(getGeneric(".transformIntensity"))) { - setGeneric(".transformIntensity", - function(object, ...) standardGeneric(".transformIntensity")) -} -if (is.null(getGeneric("trim"))) { - setGeneric("trim", function(object, range, ...) standardGeneric("trim")) -} +setGeneric("plotMsiSlice", function(x, ...) standardGeneric("plotMsiSlice")) +setGeneric(".prepareShow", function(object) standardGeneric(".prepareShow")) +setGeneric( + "transformIntensity", + function(object, ...) standardGeneric("transformIntensity") +) +setGeneric( + ".transformIntensity", + function(object, ...) standardGeneric(".transformIntensity") +) +setGeneric("trim", function(object, range, ...) standardGeneric("trim")) ## get/set slots -if (is.null(getGeneric("mass"))) { - setGeneric("mass", function(object, ...) standardGeneric("mass")) -} -if (is.null(getGeneric("mass<-"))) { - setGeneric("mass<-", function(object, value) standardGeneric("mass<-")) -} +setGeneric("mass", function(object, ...) standardGeneric("mass")) +setGeneric("mass<-", function(object, value) standardGeneric("mass<-")) + # from ProtGenerics (same as mass) -if (is.null(getGeneric("mz"))) { - setGeneric("mz", function(object, ...) standardGeneric("mz")) -} -if (is.null(getGeneric("mz<-"))) { - setGeneric("mz<-", function(object, value) standardGeneric("mz<-")) -} -if (is.null(getGeneric("intensity"))) { - setGeneric("intensity", function(object, ...) standardGeneric("intensity")) -} -if (is.null(getGeneric("intensity<-"))) { - setGeneric("intensity<-", - function(object, value) standardGeneric("intensity<-")) -} -if (is.null(getGeneric("isEmpty"))) { - setGeneric("isEmpty", function(x) standardGeneric("isEmpty")) -} -if (is.null(getGeneric(".isEmptyWarning"))) { - setGeneric(".isEmptyWarning", function(x) standardGeneric(".isEmptyWarning")) -} -if (is.null(getGeneric("metaData"))) { - setGeneric("metaData", function(object) standardGeneric("metaData")) -} -if (is.null(getGeneric("metaData<-"))) { - setGeneric("metaData<-", - function(object, value) standardGeneric("metaData<-")) -} -if (is.null(getGeneric("coordinates"))) { - setGeneric("coordinates", - function(object, ...) standardGeneric("coordinates")) -} -if (is.null(getGeneric("coordinates<-"))) { - setGeneric("coordinates<-", - function(object, value) standardGeneric("coordinates<-")) -} +setGeneric("mz", function(object, ...) standardGeneric("mz")) +setGeneric("mz<-", function(object, value) standardGeneric("mz<-")) +setGeneric("intensity", function(object, ...) standardGeneric("intensity")) +setGeneric( + "intensity<-", + function(object, value) standardGeneric("intensity<-") +) +setGeneric("isEmpty", function(x) standardGeneric("isEmpty")) +setGeneric(".isEmptyWarning", function(x) standardGeneric(".isEmptyWarning")) +setGeneric("metaData", function(object) standardGeneric("metaData")) +setGeneric("metaData<-", function(object, value) standardGeneric("metaData<-")) +setGeneric("coordinates", function(object, ...) standardGeneric("coordinates")) +setGeneric( + "coordinates<-", + function(object, value) standardGeneric("coordinates<-") +) ## end of AbstractMassObject ## MassSpectrum -if (is.null(getGeneric("calibrateIntensity"))) { - setGeneric("calibrateIntensity", - function(object, ...) standardGeneric("calibrateIntensity")) -} -if (is.null(getGeneric("detectPeaks"))) { - setGeneric("detectPeaks", - function(object, ...) standardGeneric("detectPeaks")) -} -if (is.null(getGeneric("estimateBaseline"))) { - setGeneric("estimateBaseline", - function(object, method=c("SNIP", "ConvexHull", "Median"), ...) - standardGeneric("estimateBaseline")) -} -if (is.null(getGeneric("estimateNoise"))) { - setGeneric("estimateNoise", - function(object, ...) standardGeneric("estimateNoise")) -} -if (is.null(getGeneric(".findLocalMaxima"))) { - setGeneric(".findLocalMaxima", - function(object, halfWindowSize=20L) - standardGeneric(".findLocalMaxima")) -} -if (is.null(getGeneric(".findLocalMaximaLogical"))) { - setGeneric(".findLocalMaximaLogical", - function(object, halfWindowSize=20L) - standardGeneric(".findLocalMaximaLogical")) -} -if (is.null(getGeneric("isRegular"))) { - setGeneric("isRegular", - function(object, ...) standardGeneric("isRegular")) -} -if (is.null(getGeneric("removeBaseline"))) { - setGeneric("removeBaseline", - function(object, ...) standardGeneric("removeBaseline")) -} -if (is.null(getGeneric("smoothIntensity"))) { - setGeneric("smoothIntensity", - function(object, ...) - standardGeneric("smoothIntensity")) -} -if (is.null(getGeneric("totalIonCurrent"))) { - setGeneric("totalIonCurrent", - function(object) standardGeneric("totalIonCurrent")) -} +setGeneric( + "calibrateIntensity", + function(object, ...) standardGeneric("calibrateIntensity") +) +setGeneric("detectPeaks", function(object, ...) standardGeneric("detectPeaks")) +setGeneric( + "estimateBaseline", + function(object, method=c("SNIP", "ConvexHull", "Median"), ...) + standardGeneric("estimateBaseline")) +setGeneric( + "estimateNoise", function(object, ...) standardGeneric("estimateNoise") +) +setGeneric( + ".findLocalMaxima", + function(object, halfWindowSize=20L) standardGeneric(".findLocalMaxima") +) +setGeneric( + ".findLocalMaximaLogical", + function(object, halfWindowSize=20L) + standardGeneric(".findLocalMaximaLogical") +) +setGeneric("isRegular", function(object, ...) standardGeneric("isRegular")) +setGeneric( + "removeBaseline", function(object, ...) standardGeneric("removeBaseline") +) +setGeneric( + "smoothIntensity", function(object, ...) standardGeneric("smoothIntensity") +) +setGeneric( + "totalIonCurrent", function(object) standardGeneric("totalIonCurrent") +) ## end of MassSpectrum ## MassPeaks -if (is.null(getGeneric("labelPeaks"))) { - setGeneric("labelPeaks", function(object, ...) standardGeneric("labelPeaks")) -} -if (is.null(getGeneric("monoisotopicPeaks"))) { - setGeneric("monoisotopicPeaks", - function(object, ...) standardGeneric("monoisotopicPeaks")) -} -if (is.null(getGeneric("snr"))) { - setGeneric("snr", function(object) standardGeneric("snr")) -} +setGeneric("labelPeaks", function(object, ...) standardGeneric("labelPeaks")) +setGeneric( + "monoisotopicPeaks", + function(object, ...) standardGeneric("monoisotopicPeaks") +) +setGeneric("snr", function(object) standardGeneric("snr")) ## end of MassPeaks diff --git a/R/as.list-functions.R b/R/as.list-functions.R new file mode 100644 index 0000000..2f7b3f5 --- /dev/null +++ b/R/as.list-functions.R @@ -0,0 +1,22 @@ +## .as.occurrence.list +## internal function to create a list of peaks occurrence +## +## params: +## l: list of AbstractMassObject objects +## +## returns: +## a list, where sample is the sample id, i is the index of the uniqueMass, +## and mass is the unique mass vector +.as.occurrence.list.MassObjectList <- function(l) { + .stopIfNotIsMassObjectList(l) + + mass <- .unlist(lapply(l, function(x)x@mass)) + uniqueMass <- sort.int(unique(mass)) + n <- lengths(l) + + list( + sample = rep.int(seq_along(l), lengths(l)), + i = findInterval(mass, uniqueMass), + mass = uniqueMass + ) +} diff --git a/R/as.matrix-functions.R b/R/as.matrix-functions.R index e9472de..d9c4aef 100644 --- a/R/as.matrix-functions.R +++ b/R/as.matrix-functions.R @@ -8,21 +8,18 @@ ## returns: ## a matrix .as.matrix.MassObjectList <- function(l) { - .stopIfNotIsMassObjectList(l) + .stopIfNotIsMassObjectList(l) - mass <- .unlist(lapply(l, function(x)x@mass)) - intensity <- .unlist(lapply(l, function(x)x@intensity)) - uniqueMass <- sort.int(unique(mass)) - n <- lengths(l) - r <- rep.int(seq_along(l), n) + intensity <- .unlist(lapply(l, function(x)x@intensity)) + o <- .as.occurrence.list.MassObjectList(l) - i <- findInterval(mass, uniqueMass) - - m <- matrix(NA_real_, nrow=length(l), ncol=length(uniqueMass), - dimnames=list(NULL, uniqueMass)) - m[cbind(r, i)] <- intensity - attr(m, "mass") <- uniqueMass - m + m <- matrix( + NA_real_, nrow=length(l), ncol=length(o$mass), + dimnames=list(NULL, o$mass) + ) + m[cbind(o$sample, o$i)] <- intensity + attr(m, "mass") <- o$mass + m } ## .as.binary.matrix @@ -34,10 +31,9 @@ ## returns: ## a binary matrix .as.binary.matrix <- function(m) { - stopifnot(is.matrix(m)) - isNA <- which(is.na(m)) - m[] <- 1L - m[isNA] <- 0L - mode(m) <- "integer" - m + if (!is.matrix(m)) + stop("'x' has to be a matrix!") + m[] <- !is.na(m) + mode(m) <- "integer" + m } diff --git a/R/filterPeaks-functions.R b/R/filterPeaks-functions.R index 9595939..06137ee 100644 --- a/R/filterPeaks-functions.R +++ b/R/filterPeaks-functions.R @@ -49,44 +49,44 @@ filterPeaks <- function(l, minFrequency, minNumber, labels, ## recycle arguments if needed minFrequency <- rep_len(minFrequency, nl) minNumber <- rep_len(minNumber, nl) - mergeWhitelists <- mergeWhitelists[1] + mergeWhitelists <- isTRUE(mergeWhitelists) - ## binary peak matrix (mask) - m <- .as.binary.matrix(.as.matrix.MassObjectList(l)) + ## use peaks occurrence list + o <- .as.occurrence.list.MassObjectList(l) - ## whitelist - w <- matrix(0L, nrow=nrow(m), ncol=ncol(m)) - - ## group indices by labels + # group indices by labels idx <- lapply(ll, function(x)which(labels == x)) ## collect whitelists + w <- matrix(FALSE, nrow = nl, ncol = length(o$mass)) + for (i in seq_along(idx)) { - wl <- .whitelist(m, idx[[i]], - minFrequency=minFrequency[i], minNumber=minNumber[i]) + wl <- .whitelist( + o, idx[[i]], minFrequency = minFrequency[i], minNumber = minNumber[i] + ) + if (sum(wl)) { if (mergeWhitelists) { ## R uses columnwise recycling w <- t(t(w) | wl) } else { ## R uses columnwise recycling - w[idx[[i]], ] <- t(t(w[idx[[i]], , drop=FALSE]) | wl) + w[i, ] <- t(t(w[i, , drop=FALSE]) | wl) } } else { warning("Empty peak whitelist for level ", sQuote(ll[i]), ".") } - } - ## apply whitelist - w <- w & m + } ## turn matrix back into MassPeaks objects - for (i in seq_along(l)) { - j <- which(as.logical(m[i, ])) - include <- which(w[i, j]) - l[[i]]@mass <- l[[i]]@mass[include] - l[[i]]@intensity <- l[[i]]@intensity[include] - l[[i]]@snr <- l[[i]]@snr[include] + for (i in seq_along(idx)) { + for (j in idx[[i]]) { + wmask <- w[i, o$i[o$sample == j]] + l[[j]]@mass <- l[[j]]@mass[wmask] + l[[j]]@intensity <- l[[j]]@intensity[wmask] + l[[j]]@snr <- l[[j]]@snr[wmask] + } } l @@ -96,7 +96,7 @@ filterPeaks <- function(l, minFrequency, minNumber, labels, ## helper function to create whitelists for filtering ## ## params: -## m: matrix +## l: list of occurrences, generated by .as.occurrence.list.MassObjectList ## rows: index of rows which should filtered ## minFrequency: double, minimal frequency of a peak to be not removed ## minNumber: double, minimal (absolute) number of peaks to be not removed @@ -104,8 +104,7 @@ filterPeaks <- function(l, minFrequency, minNumber, labels, ## returns: ## a logical vector representing the whitelist ## -.whitelist <- function(m, rows, minFrequency, minNumber) { - +.whitelist <- function(l, rows, minFrequency, minNumber) { ## test arguments if (is.na(minFrequency) && is.na(minNumber)) { stop(sQuote(minFrequency), " or ", sQuote(minNumber), @@ -129,7 +128,8 @@ filterPeaks <- function(l, minFrequency, minNumber, labels, } ## calculate minimal number of peaks - minPeakNumber <- max(minFrequency * length(rows), minNumber, na.rm=TRUE) + minPeakNumber <- + max(minFrequency * length(rows), minNumber, na.rm=TRUE) - colSums(m[rows, , drop=FALSE]) >= minPeakNumber + tabulate(l$i[l$sample %in% rows], nbins = length(l$mass)) >= minPeakNumber } diff --git a/tests/testthat/test_as.list-functions.R b/tests/testthat/test_as.list-functions.R new file mode 100644 index 0000000..7120c00 --- /dev/null +++ b/tests/testthat/test_as.list-functions.R @@ -0,0 +1,21 @@ +p <- list(createMassPeaks(mass=1:4, intensity=11:14), + createMassPeaks(mass=2:5, intensity=22:25)) + +l <- list( + sample = rep(1:2, each = 4), + i = c(1:4, 2:5), + mass = 1:5 +) + +test_that(".as.occurrence.list.MassObjectList throws errors", { + expect_error(.as.occurrence.list.MassObjectList(p[[1]]), + "no list of MALDIquant::AbstractMassObject objects") + expect_error(.as.occurrence.list.MassObjectList(list()), + "no list of MALDIquant::AbstractMassObject objects") + expect_error(.as.occurrence.list.MassObjectList(list(p, l)), + "no list of MALDIquant::AbstractMassObject objects") +}) + +test_that(".as.occurrence.list.MassObjectList", { + expect_identical(.as.occurrence.list.MassObjectList(p), l) +})