diff --git a/.Rbuildignore b/.Rbuildignore index abbad15..d887687 100644 --- a/.Rbuildignore +++ b/.Rbuildignore @@ -9,3 +9,5 @@ ^CRAN-RELEASE$ ^cran-comments.md$ ^\.github$ +^.*\.Rproj$ +^\.Rproj\.user$ diff --git a/.gitignore b/.gitignore index 1396e56..acc9fb3 100644 --- a/.gitignore +++ b/.gitignore @@ -3,3 +3,4 @@ *.o *.rds CRAN-RELEASE +.Rproj.user diff --git a/MALDIquant.Rproj b/MALDIquant.Rproj new file mode 100644 index 0000000..21a4da0 --- /dev/null +++ b/MALDIquant.Rproj @@ -0,0 +1,17 @@ +Version: 1.0 + +RestoreWorkspace: Default +SaveWorkspace: Default +AlwaysSaveHistory: Default + +EnableCodeIndexing: Yes +UseSpacesForTab: Yes +NumSpacesForTab: 2 +Encoding: UTF-8 + +RnwWeave: Sweave +LaTeX: pdfLaTeX + +BuildType: Package +PackageUseDevtools: Yes +PackageInstallArgs: --no-multiarch --with-keep.source diff --git a/R/as.matrix-functions.R b/R/as.matrix-functions.R index e9472de..60c7428 100644 --- a/R/as.matrix-functions.R +++ b/R/as.matrix-functions.R @@ -25,6 +25,7 @@ m } + ## .as.binary.matrix ## internal function to convert a matrix with NA to a binary one ## @@ -41,3 +42,26 @@ mode(m) <- "integer" m } + +## .as.occurrence.list +## internal function to create a list of peaks occurrence +## +## params: +## l: list of AbstractMassObject objects +## +## returns: +## a list +.as.occurrence.list <- function(l) { + + .stopIfNotIsMassObjectList(l) + + mass <- .unlist(lapply(l, function(x)x@mass)) + uniqueMass <- sort.int(unique(mass)) + n <- lengths(l) + r <- rep.int(seq_along(l), n) + + i <- findInterval(mass, uniqueMass) + + return(list(r = as.integer(r), i = as.integer(i), masses = uniqueMass)) + +} diff --git a/R/filterPeaks-functions.R b/R/filterPeaks-functions.R index 9595939..33ad79e 100644 --- a/R/filterPeaks-functions.R +++ b/R/filterPeaks-functions.R @@ -51,44 +51,45 @@ filterPeaks <- function(l, minFrequency, minNumber, labels, minNumber <- rep_len(minNumber, nl) mergeWhitelists <- mergeWhitelists[1] - ## binary peak matrix (mask) - m <- .as.binary.matrix(.as.matrix.MassObjectList(l)) + ## use peaks occurrence list + o <- .as.occurrence.list(l) - ## whitelist - w <- matrix(0L, nrow=nrow(m), ncol=ncol(m)) - - ## group indices by labels + # group indices by labels idx <- lapply(ll, function(x)which(labels == x)) ## collect whitelists + + w <- matrix(FALSE, nrow = nl, ncol = length(o$masses)) + for (i in seq_along(idx)) { - wl <- .whitelist(m, idx[[i]], - minFrequency=minFrequency[i], minNumber=minNumber[i]) + + wl <- .whitelistoccur(o, idx[[i]], minFrequency=minFrequency[i], minNumber=minNumber[i]) + if (sum(wl)) { if (mergeWhitelists) { ## R uses columnwise recycling w <- t(t(w) | wl) } else { ## R uses columnwise recycling - w[idx[[i]], ] <- t(t(w[idx[[i]], , drop=FALSE]) | wl) + w[i, ] <- t(t(w[i, , drop=FALSE]) | wl) } } else { warning("Empty peak whitelist for level ", sQuote(ll[i]), ".") } + } - - ## apply whitelist - w <- w & m - + ## turn matrix back into MassPeaks objects - for (i in seq_along(l)) { - j <- which(as.logical(m[i, ])) - include <- which(w[i, j]) - l[[i]]@mass <- l[[i]]@mass[include] - l[[i]]@intensity <- l[[i]]@intensity[include] - l[[i]]@snr <- l[[i]]@snr[include] + + for (i in seq_along(idx)) { + for (j in idx[[i]]) { + wmask <- w[i, o$i[o$r == j]] + l[[j]]@mass <- l[[j]]@mass[wmask] + l[[j]]@intensity <- l[[j]]@intensity[wmask] + l[[j]]@snr <- l[[j]]@snr[wmask] + } } - + l } @@ -133,3 +134,48 @@ filterPeaks <- function(l, minFrequency, minNumber, labels, colSums(m[rows, , drop=FALSE]) >= minPeakNumber } + + +.whitelistoccur <- function(l, rows, minFrequency, minNumber) { + + ## test arguments + if (is.na(minFrequency) && is.na(minNumber)) { + stop(sQuote(minFrequency), " or ", sQuote(minNumber), + " has to be a meaningful number!") + } + + if (!is.na(minFrequency) && minFrequency < 0L) { + minFrequency <- 0L + warning(sQuote("minFrequency"), + " < 0 does not make sense! Using 0 instead.") + } + + if (!is.na(minNumber) && minNumber < 0L) { + minNumber <- 0L + warning(sQuote("minNumber"), " < 0 does not make sense! Using 0 instead.") + } + + if (!is.na(minFrequency) && !is.na(minNumber)) { + warning(sQuote("minFrequency"), " and ", sQuote("minNumber"), + " arguments are given. Choosing the higher one.") + } + + ## calculate minimal number of peaks + + keep.rows <- (l$r %in% rows) + l$r <- l$r[keep.rows] + l$i <- l$i[keep.rows] + l$i <- split(l$i, factor(l$i)) + + minPeakNumber <- max(minFrequency * length(unique(l$r)), minNumber, na.rm=TRUE) + + above.min <- function(x) { length(x) >= minPeakNumber } + wl.vals <- unlist(lapply(l$i, above.min)) + + wl <- array(FALSE, length(l$masses)) + wl[as.numeric(names(l$i))] <- wl.vals + + return(c(wl)) + +} +