From 75ebb0fd0c02a787dd4083223b40feb80625e74c Mon Sep 17 00:00:00 2001 From: Sebastian Gibb Date: Wed, 3 Nov 2021 11:19:52 +0100 Subject: [PATCH 01/16] fix: remove is.null(getGeneric(...)) tests --- DESCRIPTION | 2 +- NEWS | 10 +++ R/AllGenerics.R | 178 ++++++++++++++++++------------------------------ 3 files changed, 78 insertions(+), 112 deletions(-) diff --git a/DESCRIPTION b/DESCRIPTION index f619a64..7a96f42 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,5 +1,5 @@ Package: MALDIquant -Version: 1.20 +Version: 1.20.0.9999 Date: 2021-07-29 Title: Quantitative Analysis of Mass Spectrometry Data Authors@R: c(person("Sebastian", "Gibb", role=c("aut", "cre"), diff --git a/NEWS b/NEWS index 7379c90..d5df1ba 100644 --- a/NEWS +++ b/NEWS @@ -1,6 +1,16 @@ RELEASE HISTORY OF THE "MALDIquant" PACKAGE =========================================== +CHANGES IN MALDIquant VERSION 1.20.0.9999 [unreleased]: +------------------------------------------------------- + +INTERNAL CHANGES + +* Remove `is.null(getGeneric(...))` tests before setting generics for + S4 methods to avoid errors in package loading (especially with + `pkgload::load_all()`). + + CHANGES IN MALDIquant VERSION 1.20 [2021-07-29]: ------------------------------------------------ diff --git a/R/AllGenerics.R b/R/AllGenerics.R index 8958a47..1020ec6 100644 --- a/R/AllGenerics.R +++ b/R/AllGenerics.R @@ -1,123 +1,79 @@ ## AbstractMassObject -if (is.null(getGeneric("plotMsiSlice"))) { - setGeneric("plotMsiSlice", function(x, ...) standardGeneric("plotMsiSlice")) -} -if (is.null(getGeneric(".prepareShow"))) { - setGeneric(".prepareShow", function(object) standardGeneric(".prepareShow")) -} -if (is.null(getGeneric("transformIntensity"))) { - setGeneric("transformIntensity", - function(object, ...) standardGeneric("transformIntensity")) -} -if (is.null(getGeneric(".transformIntensity"))) { - setGeneric(".transformIntensity", - function(object, ...) standardGeneric(".transformIntensity")) -} -if (is.null(getGeneric("trim"))) { - setGeneric("trim", function(object, range, ...) standardGeneric("trim")) -} +setGeneric("plotMsiSlice", function(x, ...) standardGeneric("plotMsiSlice")) +setGeneric(".prepareShow", function(object) standardGeneric(".prepareShow")) +setGeneric( + "transformIntensity", + function(object, ...) standardGeneric("transformIntensity") +) +setGeneric( + ".transformIntensity", + function(object, ...) standardGeneric(".transformIntensity") +) +setGeneric("trim", function(object, range, ...) standardGeneric("trim")) ## get/set slots -if (is.null(getGeneric("mass"))) { - setGeneric("mass", function(object, ...) standardGeneric("mass")) -} -if (is.null(getGeneric("mass<-"))) { - setGeneric("mass<-", function(object, value) standardGeneric("mass<-")) -} +setGeneric("mass", function(object, ...) standardGeneric("mass")) +setGeneric("mass<-", function(object, value) standardGeneric("mass<-")) + # from ProtGenerics (same as mass) -if (is.null(getGeneric("mz"))) { - setGeneric("mz", function(object, ...) standardGeneric("mz")) -} -if (is.null(getGeneric("mz<-"))) { - setGeneric("mz<-", function(object, value) standardGeneric("mz<-")) -} -if (is.null(getGeneric("intensity"))) { - setGeneric("intensity", function(object, ...) standardGeneric("intensity")) -} -if (is.null(getGeneric("intensity<-"))) { - setGeneric("intensity<-", - function(object, value) standardGeneric("intensity<-")) -} -if (is.null(getGeneric("isEmpty"))) { - setGeneric("isEmpty", function(x) standardGeneric("isEmpty")) -} -if (is.null(getGeneric(".isEmptyWarning"))) { - setGeneric(".isEmptyWarning", function(x) standardGeneric(".isEmptyWarning")) -} -if (is.null(getGeneric("metaData"))) { - setGeneric("metaData", function(object) standardGeneric("metaData")) -} -if (is.null(getGeneric("metaData<-"))) { - setGeneric("metaData<-", - function(object, value) standardGeneric("metaData<-")) -} -if (is.null(getGeneric("coordinates"))) { - setGeneric("coordinates", - function(object, ...) standardGeneric("coordinates")) -} -if (is.null(getGeneric("coordinates<-"))) { - setGeneric("coordinates<-", - function(object, value) standardGeneric("coordinates<-")) -} +setGeneric("mz", function(object, ...) standardGeneric("mz")) +setGeneric("mz<-", function(object, value) standardGeneric("mz<-")) +setGeneric("intensity", function(object, ...) standardGeneric("intensity")) +setGeneric( + "intensity<-", + function(object, value) standardGeneric("intensity<-") +) +setGeneric("isEmpty", function(x) standardGeneric("isEmpty")) +setGeneric(".isEmptyWarning", function(x) standardGeneric(".isEmptyWarning")) +setGeneric("metaData", function(object) standardGeneric("metaData")) +setGeneric("metaData<-", function(object, value) standardGeneric("metaData<-")) +setGeneric("coordinates", function(object, ...) standardGeneric("coordinates")) +setGeneric( + "coordinates<-", + function(object, value) standardGeneric("coordinates<-") +) ## end of AbstractMassObject ## MassSpectrum -if (is.null(getGeneric("calibrateIntensity"))) { - setGeneric("calibrateIntensity", - function(object, ...) standardGeneric("calibrateIntensity")) -} -if (is.null(getGeneric("detectPeaks"))) { - setGeneric("detectPeaks", - function(object, ...) standardGeneric("detectPeaks")) -} -if (is.null(getGeneric("estimateBaseline"))) { - setGeneric("estimateBaseline", - function(object, method=c("SNIP", "ConvexHull", "Median"), ...) - standardGeneric("estimateBaseline")) -} -if (is.null(getGeneric("estimateNoise"))) { - setGeneric("estimateNoise", - function(object, ...) standardGeneric("estimateNoise")) -} -if (is.null(getGeneric(".findLocalMaxima"))) { - setGeneric(".findLocalMaxima", - function(object, halfWindowSize=20L) - standardGeneric(".findLocalMaxima")) -} -if (is.null(getGeneric(".findLocalMaximaLogical"))) { - setGeneric(".findLocalMaximaLogical", - function(object, halfWindowSize=20L) - standardGeneric(".findLocalMaximaLogical")) -} -if (is.null(getGeneric("isRegular"))) { - setGeneric("isRegular", - function(object, ...) standardGeneric("isRegular")) -} -if (is.null(getGeneric("removeBaseline"))) { - setGeneric("removeBaseline", - function(object, ...) standardGeneric("removeBaseline")) -} -if (is.null(getGeneric("smoothIntensity"))) { - setGeneric("smoothIntensity", - function(object, ...) - standardGeneric("smoothIntensity")) -} -if (is.null(getGeneric("totalIonCurrent"))) { - setGeneric("totalIonCurrent", - function(object) standardGeneric("totalIonCurrent")) -} +setGeneric( + "calibrateIntensity", + function(object, ...) standardGeneric("calibrateIntensity") +) +setGeneric("detectPeaks", function(object, ...) standardGeneric("detectPeaks")) +setGeneric( + "estimateBaseline", + function(object, method=c("SNIP", "ConvexHull", "Median"), ...) + standardGeneric("estimateBaseline")) +setGeneric( + "estimateNoise", function(object, ...) standardGeneric("estimateNoise") +) +setGeneric( + ".findLocalMaxima", + function(object, halfWindowSize=20L) standardGeneric(".findLocalMaxima") +) +setGeneric( + ".findLocalMaximaLogical", + function(object, halfWindowSize=20L) + standardGeneric(".findLocalMaximaLogical") +) +setGeneric("isRegular", function(object, ...) standardGeneric("isRegular")) +setGeneric( + "removeBaseline", function(object, ...) standardGeneric("removeBaseline") +) +setGeneric( + "smoothIntensity", function(object, ...) standardGeneric("smoothIntensity") +) +setGeneric( + "totalIonCurrent", function(object) standardGeneric("totalIonCurrent") +) ## end of MassSpectrum ## MassPeaks -if (is.null(getGeneric("labelPeaks"))) { - setGeneric("labelPeaks", function(object, ...) standardGeneric("labelPeaks")) -} -if (is.null(getGeneric("monoisotopicPeaks"))) { - setGeneric("monoisotopicPeaks", - function(object, ...) standardGeneric("monoisotopicPeaks")) -} -if (is.null(getGeneric("snr"))) { - setGeneric("snr", function(object) standardGeneric("snr")) -} +setGeneric("labelPeaks", function(object, ...) standardGeneric("labelPeaks")) +setGeneric( + "monoisotopicPeaks", + function(object, ...) standardGeneric("monoisotopicPeaks") +) +setGeneric("snr", function(object) standardGeneric("snr")) ## end of MassPeaks From 877ee1483ea1f2146139c8c50c098a75a18f0344 Mon Sep 17 00:00:00 2001 From: pi514 Date: Fri, 22 Oct 2021 12:01:16 +0100 Subject: [PATCH 02/16] using occurrence list --- R/as.matrix-functions.R | 23 ++++++++++++ R/filterPeaks-functions.R | 79 +++++++++++++++++++++++++++++---------- 2 files changed, 82 insertions(+), 20 deletions(-) diff --git a/R/as.matrix-functions.R b/R/as.matrix-functions.R index e9472de..f7a5115 100644 --- a/R/as.matrix-functions.R +++ b/R/as.matrix-functions.R @@ -25,6 +25,7 @@ m } + ## .as.binary.matrix ## internal function to convert a matrix with NA to a binary one ## @@ -41,3 +42,25 @@ mode(m) <- "integer" m } + +## .as.occurrence.list +## internal function to create a list of peaks occurrence +## +## params: +## l: list of AbstractMassObject objects +## +## returns: +## a list +.as.occurrence.list <- function(l) { + + .stopIfNotIsMassObjectList(l) + + mass <- .unlist(lapply(l, function(x)x@mass)) + uniqueMass <- sort.int(unique(mass)) + n <- lengths(l) + r <- rep.int(seq_along(l), n) + + i <- findInterval(mass, uniqueMass) + + return(list(r = as.integer(r), i = as.integer(i), masses = uniqueMass)) +} diff --git a/R/filterPeaks-functions.R b/R/filterPeaks-functions.R index 9595939..ab1248c 100644 --- a/R/filterPeaks-functions.R +++ b/R/filterPeaks-functions.R @@ -51,44 +51,43 @@ filterPeaks <- function(l, minFrequency, minNumber, labels, minNumber <- rep_len(minNumber, nl) mergeWhitelists <- mergeWhitelists[1] - ## binary peak matrix (mask) - m <- .as.binary.matrix(.as.matrix.MassObjectList(l)) + ## use peaks occurrence list + o <- .as.occurrence.list(l) - ## whitelist - w <- matrix(0L, nrow=nrow(m), ncol=ncol(m)) - - ## group indices by labels + # group indices by labels idx <- lapply(ll, function(x)which(labels == x)) ## collect whitelists + + w <- matrix(FALSE, nrow = nl, ncol = length(o$masses)) + for (i in seq_along(idx)) { - wl <- .whitelist(m, idx[[i]], - minFrequency=minFrequency[i], minNumber=minNumber[i]) + + wl <- .whitelist.list(o, idx[[i]], minFrequency=minFrequency[i], minNumber=minNumber[i]) + if (sum(wl)) { if (mergeWhitelists) { ## R uses columnwise recycling w <- t(t(w) | wl) } else { ## R uses columnwise recycling - w[idx[[i]], ] <- t(t(w[idx[[i]], , drop=FALSE]) | wl) + w[i, ] <- t(t(w[i, , drop=FALSE]) | wl) } } else { warning("Empty peak whitelist for level ", sQuote(ll[i]), ".") } } - - ## apply whitelist - w <- w & m - + ## turn matrix back into MassPeaks objects - for (i in seq_along(l)) { - j <- which(as.logical(m[i, ])) - include <- which(w[i, j]) - l[[i]]@mass <- l[[i]]@mass[include] - l[[i]]@intensity <- l[[i]]@intensity[include] - l[[i]]@snr <- l[[i]]@snr[include] + + for (i in seq_along(idx)) { + for (j in idx[[i]]) { + l[[j]]@mass <- l[[j]]@mass[w[i, o$i[o$r == j]]] + l[[j]]@intensity <- l[[j]]@intensity[w[i, o$i[o$r == j]]] + l[[j]]@snr <- l[[j]]@snr[w[i, o$i[o$r == j]]] + } } - + l } @@ -133,3 +132,43 @@ filterPeaks <- function(l, minFrequency, minNumber, labels, colSums(m[rows, , drop=FALSE]) >= minPeakNumber } + + +.whitelist.list <- function(l, rows, minFrequency, minNumber) { + + ## test arguments + if (is.na(minFrequency) && is.na(minNumber)) { + stop(sQuote(minFrequency), " or ", sQuote(minNumber), + " has to be a meaningful number!") + } + + if (!is.na(minFrequency) && minFrequency < 0L) { + minFrequency <- 0L + warning(sQuote("minFrequency"), + " < 0 does not make sense! Using 0 instead.") + } + + if (!is.na(minNumber) && minNumber < 0L) { + minNumber <- 0L + warning(sQuote("minNumber"), " < 0 does not make sense! Using 0 instead.") + } + + if (!is.na(minFrequency) && !is.na(minNumber)) { + warning(sQuote("minFrequency"), " and ", sQuote("minNumber"), + " arguments are given. Choosing the higher one.") + } + + ## calculate minimal number of peaks + + keep.rows <- (l$r %in% rows) + l$r <- l$r[keep.rows] + l$i <- l$i[keep.rows] + + minPeakNumber <- max(minFrequency * length(unique(l$r)), minNumber, na.rm=TRUE) + + return( + sapply(seq_along(l$masses), function(m) { + sum(l$i == m) >= minPeakNumber + })) + +} \ No newline at end of file From 5e7bd0996388a9e0d1b5c67afb93d30f093cd8c0 Mon Sep 17 00:00:00 2001 From: Paolo Inglese Date: Thu, 28 Oct 2021 16:33:17 +0100 Subject: [PATCH 03/16] optimized speed --- .Rbuildignore | 2 ++ .gitignore | 1 + MALDIquant.Rproj | 17 +++++++++++++++ R/as.matrix-functions.R | 1 + R/filterPeaks-functions.R | 45 ++++++++++++++++++++++----------------- 5 files changed, 47 insertions(+), 19 deletions(-) create mode 100644 MALDIquant.Rproj diff --git a/.Rbuildignore b/.Rbuildignore index abbad15..d887687 100644 --- a/.Rbuildignore +++ b/.Rbuildignore @@ -9,3 +9,5 @@ ^CRAN-RELEASE$ ^cran-comments.md$ ^\.github$ +^.*\.Rproj$ +^\.Rproj\.user$ diff --git a/.gitignore b/.gitignore index 1396e56..acc9fb3 100644 --- a/.gitignore +++ b/.gitignore @@ -3,3 +3,4 @@ *.o *.rds CRAN-RELEASE +.Rproj.user diff --git a/MALDIquant.Rproj b/MALDIquant.Rproj new file mode 100644 index 0000000..21a4da0 --- /dev/null +++ b/MALDIquant.Rproj @@ -0,0 +1,17 @@ +Version: 1.0 + +RestoreWorkspace: Default +SaveWorkspace: Default +AlwaysSaveHistory: Default + +EnableCodeIndexing: Yes +UseSpacesForTab: Yes +NumSpacesForTab: 2 +Encoding: UTF-8 + +RnwWeave: Sweave +LaTeX: pdfLaTeX + +BuildType: Package +PackageUseDevtools: Yes +PackageInstallArgs: --no-multiarch --with-keep.source diff --git a/R/as.matrix-functions.R b/R/as.matrix-functions.R index f7a5115..60c7428 100644 --- a/R/as.matrix-functions.R +++ b/R/as.matrix-functions.R @@ -63,4 +63,5 @@ i <- findInterval(mass, uniqueMass) return(list(r = as.integer(r), i = as.integer(i), masses = uniqueMass)) + } diff --git a/R/filterPeaks-functions.R b/R/filterPeaks-functions.R index ab1248c..33ad79e 100644 --- a/R/filterPeaks-functions.R +++ b/R/filterPeaks-functions.R @@ -63,7 +63,7 @@ filterPeaks <- function(l, minFrequency, minNumber, labels, for (i in seq_along(idx)) { - wl <- .whitelist.list(o, idx[[i]], minFrequency=minFrequency[i], minNumber=minNumber[i]) + wl <- .whitelistoccur(o, idx[[i]], minFrequency=minFrequency[i], minNumber=minNumber[i]) if (sum(wl)) { if (mergeWhitelists) { @@ -76,15 +76,17 @@ filterPeaks <- function(l, minFrequency, minNumber, labels, } else { warning("Empty peak whitelist for level ", sQuote(ll[i]), ".") } + } ## turn matrix back into MassPeaks objects for (i in seq_along(idx)) { for (j in idx[[i]]) { - l[[j]]@mass <- l[[j]]@mass[w[i, o$i[o$r == j]]] - l[[j]]@intensity <- l[[j]]@intensity[w[i, o$i[o$r == j]]] - l[[j]]@snr <- l[[j]]@snr[w[i, o$i[o$r == j]]] + wmask <- w[i, o$i[o$r == j]] + l[[j]]@mass <- l[[j]]@mass[wmask] + l[[j]]@intensity <- l[[j]]@intensity[wmask] + l[[j]]@snr <- l[[j]]@snr[wmask] } } @@ -134,41 +136,46 @@ filterPeaks <- function(l, minFrequency, minNumber, labels, } -.whitelist.list <- function(l, rows, minFrequency, minNumber) { - +.whitelistoccur <- function(l, rows, minFrequency, minNumber) { + ## test arguments if (is.na(minFrequency) && is.na(minNumber)) { stop(sQuote(minFrequency), " or ", sQuote(minNumber), " has to be a meaningful number!") } - + if (!is.na(minFrequency) && minFrequency < 0L) { minFrequency <- 0L warning(sQuote("minFrequency"), " < 0 does not make sense! Using 0 instead.") } - + if (!is.na(minNumber) && minNumber < 0L) { minNumber <- 0L warning(sQuote("minNumber"), " < 0 does not make sense! Using 0 instead.") } - + if (!is.na(minFrequency) && !is.na(minNumber)) { warning(sQuote("minFrequency"), " and ", sQuote("minNumber"), " arguments are given. Choosing the higher one.") } - + ## calculate minimal number of peaks - + keep.rows <- (l$r %in% rows) l$r <- l$r[keep.rows] l$i <- l$i[keep.rows] - + l$i <- split(l$i, factor(l$i)) + minPeakNumber <- max(minFrequency * length(unique(l$r)), minNumber, na.rm=TRUE) - - return( - sapply(seq_along(l$masses), function(m) { - sum(l$i == m) >= minPeakNumber - })) - -} \ No newline at end of file + + above.min <- function(x) { length(x) >= minPeakNumber } + wl.vals <- unlist(lapply(l$i, above.min)) + + wl <- array(FALSE, length(l$masses)) + wl[as.numeric(names(l$i))] <- wl.vals + + return(c(wl)) + +} + From 808abe35199ce22e0c28ab5b96dfb2433941fce6 Mon Sep 17 00:00:00 2001 From: Sebastian Gibb Date: Wed, 3 Nov 2021 11:29:29 +0100 Subject: [PATCH 04/16] chore: move as.occurrence.list to as.list-functions.R --- R/as.list-functions.R | 22 ++++++++++++++++++++++ R/as.matrix-functions.R | 23 ----------------------- 2 files changed, 22 insertions(+), 23 deletions(-) create mode 100644 R/as.list-functions.R diff --git a/R/as.list-functions.R b/R/as.list-functions.R new file mode 100644 index 0000000..48ab71f --- /dev/null +++ b/R/as.list-functions.R @@ -0,0 +1,22 @@ +## .as.occurrence.list +## internal function to create a list of peaks occurrence +## +## params: +## l: list of AbstractMassObject objects +## +## returns: +## a list +.as.occurrence.list <- function(l) { + + .stopIfNotIsMassObjectList(l) + + mass <- .unlist(lapply(l, function(x)x@mass)) + uniqueMass <- sort.int(unique(mass)) + n <- lengths(l) + r <- rep.int(seq_along(l), n) + + i <- findInterval(mass, uniqueMass) + + return(list(r = as.integer(r), i = as.integer(i), masses = uniqueMass)) + +} diff --git a/R/as.matrix-functions.R b/R/as.matrix-functions.R index 60c7428..0fc0870 100644 --- a/R/as.matrix-functions.R +++ b/R/as.matrix-functions.R @@ -42,26 +42,3 @@ mode(m) <- "integer" m } - -## .as.occurrence.list -## internal function to create a list of peaks occurrence -## -## params: -## l: list of AbstractMassObject objects -## -## returns: -## a list -.as.occurrence.list <- function(l) { - - .stopIfNotIsMassObjectList(l) - - mass <- .unlist(lapply(l, function(x)x@mass)) - uniqueMass <- sort.int(unique(mass)) - n <- lengths(l) - r <- rep.int(seq_along(l), n) - - i <- findInterval(mass, uniqueMass) - - return(list(r = as.integer(r), i = as.integer(i), masses = uniqueMass)) - -} From 9de53b7e7460752b623346c6c70ae14eba0ecc7c Mon Sep 17 00:00:00 2001 From: Sebastian Gibb Date: Wed, 3 Nov 2021 11:35:57 +0100 Subject: [PATCH 05/16] refactor: rename to as.occurrence.list.MassObjectList --- R/as.list-functions.R | 3 +-- R/filterPeaks-functions.R | 2 +- 2 files changed, 2 insertions(+), 3 deletions(-) diff --git a/R/as.list-functions.R b/R/as.list-functions.R index 48ab71f..78f309b 100644 --- a/R/as.list-functions.R +++ b/R/as.list-functions.R @@ -6,8 +6,7 @@ ## ## returns: ## a list -.as.occurrence.list <- function(l) { - +.as.occurrence.list.MassObjectList <- function(l) { .stopIfNotIsMassObjectList(l) mass <- .unlist(lapply(l, function(x)x@mass)) diff --git a/R/filterPeaks-functions.R b/R/filterPeaks-functions.R index 33ad79e..d60e7a4 100644 --- a/R/filterPeaks-functions.R +++ b/R/filterPeaks-functions.R @@ -52,7 +52,7 @@ filterPeaks <- function(l, minFrequency, minNumber, labels, mergeWhitelists <- mergeWhitelists[1] ## use peaks occurrence list - o <- .as.occurrence.list(l) + o <- .as.occurrence.list.MassObjectList(l) # group indices by labels idx <- lapply(ll, function(x)which(labels == x)) From e85df6682f82062c7f4e3e6d64787395127ea545 Mon Sep 17 00:00:00 2001 From: Sebastian Gibb Date: Wed, 3 Nov 2021 11:40:18 +0100 Subject: [PATCH 06/16] chore: remove .Rproj file --- .gitignore | 1 + MALDIquant.Rproj | 17 ----------------- 2 files changed, 1 insertion(+), 17 deletions(-) delete mode 100644 MALDIquant.Rproj diff --git a/.gitignore b/.gitignore index acc9fb3..4ae3cb1 100644 --- a/.gitignore +++ b/.gitignore @@ -4,3 +4,4 @@ *.rds CRAN-RELEASE .Rproj.user +.Rproj diff --git a/MALDIquant.Rproj b/MALDIquant.Rproj deleted file mode 100644 index 21a4da0..0000000 --- a/MALDIquant.Rproj +++ /dev/null @@ -1,17 +0,0 @@ -Version: 1.0 - -RestoreWorkspace: Default -SaveWorkspace: Default -AlwaysSaveHistory: Default - -EnableCodeIndexing: Yes -UseSpacesForTab: Yes -NumSpacesForTab: 2 -Encoding: UTF-8 - -RnwWeave: Sweave -LaTeX: pdfLaTeX - -BuildType: Package -PackageUseDevtools: Yes -PackageInstallArgs: --no-multiarch --with-keep.source From b738775870e7e554c8804c1e527dfa91707b08a6 Mon Sep 17 00:00:00 2001 From: Sebastian Gibb Date: Wed, 3 Nov 2021 11:48:51 +0100 Subject: [PATCH 07/16] refactor: rename list elements --- R/as.list-functions.R | 21 +++++++++++---------- 1 file changed, 11 insertions(+), 10 deletions(-) diff --git a/R/as.list-functions.R b/R/as.list-functions.R index 78f309b..2f7b3f5 100644 --- a/R/as.list-functions.R +++ b/R/as.list-functions.R @@ -5,17 +5,18 @@ ## l: list of AbstractMassObject objects ## ## returns: -## a list +## a list, where sample is the sample id, i is the index of the uniqueMass, +## and mass is the unique mass vector .as.occurrence.list.MassObjectList <- function(l) { - .stopIfNotIsMassObjectList(l) + .stopIfNotIsMassObjectList(l) - mass <- .unlist(lapply(l, function(x)x@mass)) - uniqueMass <- sort.int(unique(mass)) - n <- lengths(l) - r <- rep.int(seq_along(l), n) - - i <- findInterval(mass, uniqueMass) - - return(list(r = as.integer(r), i = as.integer(i), masses = uniqueMass)) + mass <- .unlist(lapply(l, function(x)x@mass)) + uniqueMass <- sort.int(unique(mass)) + n <- lengths(l) + list( + sample = rep.int(seq_along(l), lengths(l)), + i = findInterval(mass, uniqueMass), + mass = uniqueMass + ) } From 746c20ea62968a8b743d664623fa02670cf1e386 Mon Sep 17 00:00:00 2001 From: Sebastian Gibb Date: Wed, 3 Nov 2021 11:49:15 +0100 Subject: [PATCH 08/16] tests: add .as.occurrence.list.MassObjectList unit tests --- tests/testthat/test_as.list-functions.R | 21 +++++++++++++++++++++ 1 file changed, 21 insertions(+) create mode 100644 tests/testthat/test_as.list-functions.R diff --git a/tests/testthat/test_as.list-functions.R b/tests/testthat/test_as.list-functions.R new file mode 100644 index 0000000..7120c00 --- /dev/null +++ b/tests/testthat/test_as.list-functions.R @@ -0,0 +1,21 @@ +p <- list(createMassPeaks(mass=1:4, intensity=11:14), + createMassPeaks(mass=2:5, intensity=22:25)) + +l <- list( + sample = rep(1:2, each = 4), + i = c(1:4, 2:5), + mass = 1:5 +) + +test_that(".as.occurrence.list.MassObjectList throws errors", { + expect_error(.as.occurrence.list.MassObjectList(p[[1]]), + "no list of MALDIquant::AbstractMassObject objects") + expect_error(.as.occurrence.list.MassObjectList(list()), + "no list of MALDIquant::AbstractMassObject objects") + expect_error(.as.occurrence.list.MassObjectList(list(p, l)), + "no list of MALDIquant::AbstractMassObject objects") +}) + +test_that(".as.occurrence.list.MassObjectList", { + expect_identical(.as.occurrence.list.MassObjectList(p), l) +}) From 97b873344d33f3cd167864863e6a09ecb5a99774 Mon Sep 17 00:00:00 2001 From: Sebastian Gibb Date: Wed, 3 Nov 2021 12:10:32 +0100 Subject: [PATCH 09/16] refactor: adapt to renamed arguments --- R/filterPeaks-functions.R | 28 ++++++++++++++-------------- 1 file changed, 14 insertions(+), 14 deletions(-) diff --git a/R/filterPeaks-functions.R b/R/filterPeaks-functions.R index d60e7a4..ecf0926 100644 --- a/R/filterPeaks-functions.R +++ b/R/filterPeaks-functions.R @@ -58,13 +58,13 @@ filterPeaks <- function(l, minFrequency, minNumber, labels, idx <- lapply(ll, function(x)which(labels == x)) ## collect whitelists - - w <- matrix(FALSE, nrow = nl, ncol = length(o$masses)) - + + w <- matrix(FALSE, nrow = nl, ncol = length(o$mass)) + for (i in seq_along(idx)) { - + wl <- .whitelistoccur(o, idx[[i]], minFrequency=minFrequency[i], minNumber=minNumber[i]) - + if (sum(wl)) { if (mergeWhitelists) { ## R uses columnwise recycling @@ -76,20 +76,20 @@ filterPeaks <- function(l, minFrequency, minNumber, labels, } else { warning("Empty peak whitelist for level ", sQuote(ll[i]), ".") } - + } - + ## turn matrix back into MassPeaks objects - + for (i in seq_along(idx)) { for (j in idx[[i]]) { - wmask <- w[i, o$i[o$r == j]] + wmask <- w[i, o$i[o$sample == j]] l[[j]]@mass <- l[[j]]@mass[wmask] l[[j]]@intensity <- l[[j]]@intensity[wmask] l[[j]]@snr <- l[[j]]@snr[wmask] } } - + l } @@ -162,17 +162,17 @@ filterPeaks <- function(l, minFrequency, minNumber, labels, ## calculate minimal number of peaks - keep.rows <- (l$r %in% rows) - l$r <- l$r[keep.rows] + keep.rows <- (l$sample %in% rows) + l$sample <- l$sample[keep.rows] l$i <- l$i[keep.rows] l$i <- split(l$i, factor(l$i)) - minPeakNumber <- max(minFrequency * length(unique(l$r)), minNumber, na.rm=TRUE) + minPeakNumber <- max(minFrequency * length(unique(l$sample)), minNumber, na.rm=TRUE) above.min <- function(x) { length(x) >= minPeakNumber } wl.vals <- unlist(lapply(l$i, above.min)) - wl <- array(FALSE, length(l$masses)) + wl <- array(FALSE, length(l$mass)) wl[as.numeric(names(l$i))] <- wl.vals return(c(wl)) From 6eca7af5b6c060129dbd9765156ba4eb63761bf5 Mon Sep 17 00:00:00 2001 From: Sebastian Gibb Date: Wed, 3 Nov 2021 13:14:17 +0100 Subject: [PATCH 10/16] refactor: replace .whitelist with whitelistoccur --- R/filterPeaks-functions.R | 42 +++++---------------------------------- 1 file changed, 5 insertions(+), 37 deletions(-) diff --git a/R/filterPeaks-functions.R b/R/filterPeaks-functions.R index ecf0926..63a2ae3 100644 --- a/R/filterPeaks-functions.R +++ b/R/filterPeaks-functions.R @@ -58,12 +58,12 @@ filterPeaks <- function(l, minFrequency, minNumber, labels, idx <- lapply(ll, function(x)which(labels == x)) ## collect whitelists - w <- matrix(FALSE, nrow = nl, ncol = length(o$mass)) for (i in seq_along(idx)) { - - wl <- .whitelistoccur(o, idx[[i]], minFrequency=minFrequency[i], minNumber=minNumber[i]) + wl <- .whitelist( + o, idx[[i]], minFrequency = minFrequency[i], minNumber = minNumber[i] + ) if (sum(wl)) { if (mergeWhitelists) { @@ -97,7 +97,7 @@ filterPeaks <- function(l, minFrequency, minNumber, labels, ## helper function to create whitelists for filtering ## ## params: -## m: matrix +## l: list of occurrences, generated by .as.occurrence.list.MassObjectList ## rows: index of rows which should filtered ## minFrequency: double, minimal frequency of a peak to be not removed ## minNumber: double, minimal (absolute) number of peaks to be not removed @@ -105,39 +105,7 @@ filterPeaks <- function(l, minFrequency, minNumber, labels, ## returns: ## a logical vector representing the whitelist ## -.whitelist <- function(m, rows, minFrequency, minNumber) { - - ## test arguments - if (is.na(minFrequency) && is.na(minNumber)) { - stop(sQuote(minFrequency), " or ", sQuote(minNumber), - " has to be a meaningful number!") - } - - if (!is.na(minFrequency) && minFrequency < 0L) { - minFrequency <- 0L - warning(sQuote("minFrequency"), - " < 0 does not make sense! Using 0 instead.") - } - - if (!is.na(minNumber) && minNumber < 0L) { - minNumber <- 0L - warning(sQuote("minNumber"), " < 0 does not make sense! Using 0 instead.") - } - - if (!is.na(minFrequency) && !is.na(minNumber)) { - warning(sQuote("minFrequency"), " and ", sQuote("minNumber"), - " arguments are given. Choosing the higher one.") - } - - ## calculate minimal number of peaks - minPeakNumber <- max(minFrequency * length(rows), minNumber, na.rm=TRUE) - - colSums(m[rows, , drop=FALSE]) >= minPeakNumber -} - - -.whitelistoccur <- function(l, rows, minFrequency, minNumber) { - +.whitelist <- function(l, rows, minFrequency, minNumber) { ## test arguments if (is.na(minFrequency) && is.na(minNumber)) { stop(sQuote(minFrequency), " or ", sQuote(minNumber), From 9dd476406d4a08dc520c77f33629fde9a56fb932 Mon Sep 17 00:00:00 2001 From: Sebastian Gibb Date: Wed, 3 Nov 2021 13:14:42 +0100 Subject: [PATCH 11/16] refactor: replace split/lapply approach by tabulate --- R/filterPeaks-functions.R | 19 +++---------------- 1 file changed, 3 insertions(+), 16 deletions(-) diff --git a/R/filterPeaks-functions.R b/R/filterPeaks-functions.R index 63a2ae3..a017c8b 100644 --- a/R/filterPeaks-functions.R +++ b/R/filterPeaks-functions.R @@ -129,21 +129,8 @@ filterPeaks <- function(l, minFrequency, minNumber, labels, } ## calculate minimal number of peaks + minPeakNumber <- + max(minFrequency * length(rows), minNumber, na.rm=TRUE) - keep.rows <- (l$sample %in% rows) - l$sample <- l$sample[keep.rows] - l$i <- l$i[keep.rows] - l$i <- split(l$i, factor(l$i)) - - minPeakNumber <- max(minFrequency * length(unique(l$sample)), minNumber, na.rm=TRUE) - - above.min <- function(x) { length(x) >= minPeakNumber } - wl.vals <- unlist(lapply(l$i, above.min)) - - wl <- array(FALSE, length(l$mass)) - wl[as.numeric(names(l$i))] <- wl.vals - - return(c(wl)) - + tabulate(l$i[l$sample %in% rows], nbins = length(l$mass)) >= minPeakNumber } - From c02dcfb69d42c93c51b5c3841c5d6607da5336bd Mon Sep 17 00:00:00 2001 From: Sebastian Gibb Date: Wed, 3 Nov 2021 13:15:00 +0100 Subject: [PATCH 12/16] refactor: use isTRUE --- R/filterPeaks-functions.R | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/R/filterPeaks-functions.R b/R/filterPeaks-functions.R index a017c8b..06137ee 100644 --- a/R/filterPeaks-functions.R +++ b/R/filterPeaks-functions.R @@ -49,7 +49,7 @@ filterPeaks <- function(l, minFrequency, minNumber, labels, ## recycle arguments if needed minFrequency <- rep_len(minFrequency, nl) minNumber <- rep_len(minNumber, nl) - mergeWhitelists <- mergeWhitelists[1] + mergeWhitelists <- isTRUE(mergeWhitelists) ## use peaks occurrence list o <- .as.occurrence.list.MassObjectList(l) @@ -80,7 +80,6 @@ filterPeaks <- function(l, minFrequency, minNumber, labels, } ## turn matrix back into MassPeaks objects - for (i in seq_along(idx)) { for (j in idx[[i]]) { wmask <- w[i, o$i[o$sample == j]] From 0dc9c34926b7d88b7b5841366cb5e2dd9caad3e0 Mon Sep 17 00:00:00 2001 From: Sebastian Gibb Date: Wed, 3 Nov 2021 13:18:13 +0100 Subject: [PATCH 13/16] refactor: use .as.occurrence.list.MassObjectList in .as.matrix.MassObjectList --- R/as.matrix-functions.R | 24 ++++++++++-------------- 1 file changed, 10 insertions(+), 14 deletions(-) diff --git a/R/as.matrix-functions.R b/R/as.matrix-functions.R index 0fc0870..eae3f8b 100644 --- a/R/as.matrix-functions.R +++ b/R/as.matrix-functions.R @@ -8,24 +8,20 @@ ## returns: ## a matrix .as.matrix.MassObjectList <- function(l) { - .stopIfNotIsMassObjectList(l) + .stopIfNotIsMassObjectList(l) - mass <- .unlist(lapply(l, function(x)x@mass)) - intensity <- .unlist(lapply(l, function(x)x@intensity)) - uniqueMass <- sort.int(unique(mass)) - n <- lengths(l) - r <- rep.int(seq_along(l), n) + intensity <- .unlist(lapply(l, function(x)x@intensity)) + o <- .as.occurrence.list.MassObjectList(l) - i <- findInterval(mass, uniqueMass) - - m <- matrix(NA_real_, nrow=length(l), ncol=length(uniqueMass), - dimnames=list(NULL, uniqueMass)) - m[cbind(r, i)] <- intensity - attr(m, "mass") <- uniqueMass - m + m <- matrix( + NA_real_, nrow=length(l), ncol=length(o$mass), + dimnames=list(NULL, o$mass) + ) + m[cbind(o$sample, o$i)] <- intensity + attr(m, "mass") <- o$mass + m } - ## .as.binary.matrix ## internal function to convert a matrix with NA to a binary one ## From 0a01a124f3f0ec0b7cfaab4d68acdf64c737bda8 Mon Sep 17 00:00:00 2001 From: Sebastian Gibb Date: Wed, 3 Nov 2021 13:22:49 +0100 Subject: [PATCH 14/16] refactor: rewrite .as.binary.matrix --- R/as.matrix-functions.R | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) diff --git a/R/as.matrix-functions.R b/R/as.matrix-functions.R index eae3f8b..d9c4aef 100644 --- a/R/as.matrix-functions.R +++ b/R/as.matrix-functions.R @@ -31,10 +31,9 @@ ## returns: ## a binary matrix .as.binary.matrix <- function(m) { - stopifnot(is.matrix(m)) - isNA <- which(is.na(m)) - m[] <- 1L - m[isNA] <- 0L - mode(m) <- "integer" - m + if (!is.matrix(m)) + stop("'x' has to be a matrix!") + m[] <- !is.na(m) + mode(m) <- "integer" + m } From f53059f97e4a18a22baa3aa312788f2c55cc9928 Mon Sep 17 00:00:00 2001 From: Sebastian Gibb Date: Fri, 5 Nov 2021 15:36:08 +0100 Subject: [PATCH 15/16] chore: add contributors to DESCRIPTION --- DESCRIPTION | 25 ++++++++++++++++++++----- 1 file changed, 20 insertions(+), 5 deletions(-) diff --git a/DESCRIPTION b/DESCRIPTION index 7a96f42..cc2068c 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -2,11 +2,26 @@ Package: MALDIquant Version: 1.20.0.9999 Date: 2021-07-29 Title: Quantitative Analysis of Mass Spectrometry Data -Authors@R: c(person("Sebastian", "Gibb", role=c("aut", "cre"), +Authors@R: c( + person( + given = "Sebastian", family = "Gibb", email="mail@sebastiangibb.de", - comment=c(ORCID="0000-0001-7406-4443")), person("Korbinian", - "Strimmer", role="ths", - comment=c(ORCID="0000-0001-7917-2056"))) + comment=c(ORCID="0000-0001-7406-4443"), + role = c("aut", "cre") + ), + person( + given = "Korbinian", family = "Strimmer", + comment=c(ORCID="0000-0001-7917-2056"), + role="ths" + ), + person(given = "Sigurdur", family = "Smarason", role = "ctb"), + person( + given = "Laurent", family = "Gatto", + email = "laurent.gatto@uclouvain.be", + comment = c(ORCID = "0000-0002-1520-2268"), + role = "ctb" + ), + person(given = "Paolo", "family = Inglese", role = "ctb")) Depends: R (>= 4.0.0), methods Imports: parallel Suggests: knitr, testthat (>= 0.8) @@ -26,4 +41,4 @@ URL: https://www.strimmerlab.org/software/maldiquant/ BugReports: https://github.com/sgibb/MALDIquant/issues/ LazyLoad: yes VignetteBuilder: knitr -RoxygenNote: 7.1.1 +RoxygenNote: 7.1.2 From f8174385c23a0b727c9e60a62044dbaa02f57d7d Mon Sep 17 00:00:00 2001 From: Sebastian Gibb Date: Fri, 5 Nov 2021 15:36:24 +0100 Subject: [PATCH 16/16] chore: update NEWS --- NEWS | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/NEWS b/NEWS index d5df1ba..a9c3a71 100644 --- a/NEWS +++ b/NEWS @@ -4,6 +4,13 @@ RELEASE HISTORY OF THE "MALDIquant" PACKAGE CHANGES IN MALDIquant VERSION 1.20.0.9999 [unreleased]: ------------------------------------------------------- +MODIFICATIONS + +* Reduce memory requirement for `filterPeaks`, especially for very sparse peak + lists by rewriting `filterPeaks` and `.as.matrix.MassObjectList` to use a + `list` internally instead of a `matrix`; see #71. + Contributed by Paolo Inglese (@paoloinglese). + INTERNAL CHANGES * Remove `is.null(getGeneric(...))` tests before setting generics for