From d6d956916e0c9c83635a97be271d1480adc5ac0d Mon Sep 17 00:00:00 2001 From: pi514 Date: Fri, 22 Oct 2021 12:01:16 +0100 Subject: [PATCH 1/2] using occurrence list --- R/as.matrix-functions.R | 23 ++++++++++++ R/filterPeaks-functions.R | 79 +++++++++++++++++++++++++++++---------- 2 files changed, 82 insertions(+), 20 deletions(-) diff --git a/R/as.matrix-functions.R b/R/as.matrix-functions.R index e9472de..f7a5115 100644 --- a/R/as.matrix-functions.R +++ b/R/as.matrix-functions.R @@ -25,6 +25,7 @@ m } + ## .as.binary.matrix ## internal function to convert a matrix with NA to a binary one ## @@ -41,3 +42,25 @@ mode(m) <- "integer" m } + +## .as.occurrence.list +## internal function to create a list of peaks occurrence +## +## params: +## l: list of AbstractMassObject objects +## +## returns: +## a list +.as.occurrence.list <- function(l) { + + .stopIfNotIsMassObjectList(l) + + mass <- .unlist(lapply(l, function(x)x@mass)) + uniqueMass <- sort.int(unique(mass)) + n <- lengths(l) + r <- rep.int(seq_along(l), n) + + i <- findInterval(mass, uniqueMass) + + return(list(r = as.integer(r), i = as.integer(i), masses = uniqueMass)) +} diff --git a/R/filterPeaks-functions.R b/R/filterPeaks-functions.R index 9595939..ab1248c 100644 --- a/R/filterPeaks-functions.R +++ b/R/filterPeaks-functions.R @@ -51,44 +51,43 @@ filterPeaks <- function(l, minFrequency, minNumber, labels, minNumber <- rep_len(minNumber, nl) mergeWhitelists <- mergeWhitelists[1] - ## binary peak matrix (mask) - m <- .as.binary.matrix(.as.matrix.MassObjectList(l)) + ## use peaks occurrence list + o <- .as.occurrence.list(l) - ## whitelist - w <- matrix(0L, nrow=nrow(m), ncol=ncol(m)) - - ## group indices by labels + # group indices by labels idx <- lapply(ll, function(x)which(labels == x)) ## collect whitelists + + w <- matrix(FALSE, nrow = nl, ncol = length(o$masses)) + for (i in seq_along(idx)) { - wl <- .whitelist(m, idx[[i]], - minFrequency=minFrequency[i], minNumber=minNumber[i]) + + wl <- .whitelist.list(o, idx[[i]], minFrequency=minFrequency[i], minNumber=minNumber[i]) + if (sum(wl)) { if (mergeWhitelists) { ## R uses columnwise recycling w <- t(t(w) | wl) } else { ## R uses columnwise recycling - w[idx[[i]], ] <- t(t(w[idx[[i]], , drop=FALSE]) | wl) + w[i, ] <- t(t(w[i, , drop=FALSE]) | wl) } } else { warning("Empty peak whitelist for level ", sQuote(ll[i]), ".") } } - - ## apply whitelist - w <- w & m - + ## turn matrix back into MassPeaks objects - for (i in seq_along(l)) { - j <- which(as.logical(m[i, ])) - include <- which(w[i, j]) - l[[i]]@mass <- l[[i]]@mass[include] - l[[i]]@intensity <- l[[i]]@intensity[include] - l[[i]]@snr <- l[[i]]@snr[include] + + for (i in seq_along(idx)) { + for (j in idx[[i]]) { + l[[j]]@mass <- l[[j]]@mass[w[i, o$i[o$r == j]]] + l[[j]]@intensity <- l[[j]]@intensity[w[i, o$i[o$r == j]]] + l[[j]]@snr <- l[[j]]@snr[w[i, o$i[o$r == j]]] + } } - + l } @@ -133,3 +132,43 @@ filterPeaks <- function(l, minFrequency, minNumber, labels, colSums(m[rows, , drop=FALSE]) >= minPeakNumber } + + +.whitelist.list <- function(l, rows, minFrequency, minNumber) { + + ## test arguments + if (is.na(minFrequency) && is.na(minNumber)) { + stop(sQuote(minFrequency), " or ", sQuote(minNumber), + " has to be a meaningful number!") + } + + if (!is.na(minFrequency) && minFrequency < 0L) { + minFrequency <- 0L + warning(sQuote("minFrequency"), + " < 0 does not make sense! Using 0 instead.") + } + + if (!is.na(minNumber) && minNumber < 0L) { + minNumber <- 0L + warning(sQuote("minNumber"), " < 0 does not make sense! Using 0 instead.") + } + + if (!is.na(minFrequency) && !is.na(minNumber)) { + warning(sQuote("minFrequency"), " and ", sQuote("minNumber"), + " arguments are given. Choosing the higher one.") + } + + ## calculate minimal number of peaks + + keep.rows <- (l$r %in% rows) + l$r <- l$r[keep.rows] + l$i <- l$i[keep.rows] + + minPeakNumber <- max(minFrequency * length(unique(l$r)), minNumber, na.rm=TRUE) + + return( + sapply(seq_along(l$masses), function(m) { + sum(l$i == m) >= minPeakNumber + })) + +} \ No newline at end of file From 11764c1d27106a18d378086bb08a563e5fc89d83 Mon Sep 17 00:00:00 2001 From: Paolo Inglese Date: Thu, 28 Oct 2021 16:33:17 +0100 Subject: [PATCH 2/2] optimized speed --- .Rbuildignore | 2 ++ .gitignore | 1 + MALDIquant.Rproj | 17 +++++++++++++++ R/as.matrix-functions.R | 1 + R/filterPeaks-functions.R | 45 ++++++++++++++++++++++----------------- 5 files changed, 47 insertions(+), 19 deletions(-) create mode 100644 MALDIquant.Rproj diff --git a/.Rbuildignore b/.Rbuildignore index abbad15..d887687 100644 --- a/.Rbuildignore +++ b/.Rbuildignore @@ -9,3 +9,5 @@ ^CRAN-RELEASE$ ^cran-comments.md$ ^\.github$ +^.*\.Rproj$ +^\.Rproj\.user$ diff --git a/.gitignore b/.gitignore index 1396e56..acc9fb3 100644 --- a/.gitignore +++ b/.gitignore @@ -3,3 +3,4 @@ *.o *.rds CRAN-RELEASE +.Rproj.user diff --git a/MALDIquant.Rproj b/MALDIquant.Rproj new file mode 100644 index 0000000..21a4da0 --- /dev/null +++ b/MALDIquant.Rproj @@ -0,0 +1,17 @@ +Version: 1.0 + +RestoreWorkspace: Default +SaveWorkspace: Default +AlwaysSaveHistory: Default + +EnableCodeIndexing: Yes +UseSpacesForTab: Yes +NumSpacesForTab: 2 +Encoding: UTF-8 + +RnwWeave: Sweave +LaTeX: pdfLaTeX + +BuildType: Package +PackageUseDevtools: Yes +PackageInstallArgs: --no-multiarch --with-keep.source diff --git a/R/as.matrix-functions.R b/R/as.matrix-functions.R index f7a5115..60c7428 100644 --- a/R/as.matrix-functions.R +++ b/R/as.matrix-functions.R @@ -63,4 +63,5 @@ i <- findInterval(mass, uniqueMass) return(list(r = as.integer(r), i = as.integer(i), masses = uniqueMass)) + } diff --git a/R/filterPeaks-functions.R b/R/filterPeaks-functions.R index ab1248c..33ad79e 100644 --- a/R/filterPeaks-functions.R +++ b/R/filterPeaks-functions.R @@ -63,7 +63,7 @@ filterPeaks <- function(l, minFrequency, minNumber, labels, for (i in seq_along(idx)) { - wl <- .whitelist.list(o, idx[[i]], minFrequency=minFrequency[i], minNumber=minNumber[i]) + wl <- .whitelistoccur(o, idx[[i]], minFrequency=minFrequency[i], minNumber=minNumber[i]) if (sum(wl)) { if (mergeWhitelists) { @@ -76,15 +76,17 @@ filterPeaks <- function(l, minFrequency, minNumber, labels, } else { warning("Empty peak whitelist for level ", sQuote(ll[i]), ".") } + } ## turn matrix back into MassPeaks objects for (i in seq_along(idx)) { for (j in idx[[i]]) { - l[[j]]@mass <- l[[j]]@mass[w[i, o$i[o$r == j]]] - l[[j]]@intensity <- l[[j]]@intensity[w[i, o$i[o$r == j]]] - l[[j]]@snr <- l[[j]]@snr[w[i, o$i[o$r == j]]] + wmask <- w[i, o$i[o$r == j]] + l[[j]]@mass <- l[[j]]@mass[wmask] + l[[j]]@intensity <- l[[j]]@intensity[wmask] + l[[j]]@snr <- l[[j]]@snr[wmask] } } @@ -134,41 +136,46 @@ filterPeaks <- function(l, minFrequency, minNumber, labels, } -.whitelist.list <- function(l, rows, minFrequency, minNumber) { - +.whitelistoccur <- function(l, rows, minFrequency, minNumber) { + ## test arguments if (is.na(minFrequency) && is.na(minNumber)) { stop(sQuote(minFrequency), " or ", sQuote(minNumber), " has to be a meaningful number!") } - + if (!is.na(minFrequency) && minFrequency < 0L) { minFrequency <- 0L warning(sQuote("minFrequency"), " < 0 does not make sense! Using 0 instead.") } - + if (!is.na(minNumber) && minNumber < 0L) { minNumber <- 0L warning(sQuote("minNumber"), " < 0 does not make sense! Using 0 instead.") } - + if (!is.na(minFrequency) && !is.na(minNumber)) { warning(sQuote("minFrequency"), " and ", sQuote("minNumber"), " arguments are given. Choosing the higher one.") } - + ## calculate minimal number of peaks - + keep.rows <- (l$r %in% rows) l$r <- l$r[keep.rows] l$i <- l$i[keep.rows] - + l$i <- split(l$i, factor(l$i)) + minPeakNumber <- max(minFrequency * length(unique(l$r)), minNumber, na.rm=TRUE) - - return( - sapply(seq_along(l$masses), function(m) { - sum(l$i == m) >= minPeakNumber - })) - -} \ No newline at end of file + + above.min <- function(x) { length(x) >= minPeakNumber } + wl.vals <- unlist(lapply(l$i, above.min)) + + wl <- array(FALSE, length(l$masses)) + wl[as.numeric(names(l$i))] <- wl.vals + + return(c(wl)) + +} +