From d6d956916e0c9c83635a97be271d1480adc5ac0d Mon Sep 17 00:00:00 2001
From: pi514
Date: Fri, 22 Oct 2021 12:01:16 +0100
Subject: [PATCH 1/2] using occurrence list
---
R/as.matrix-functions.R | 23 ++++++++++++
R/filterPeaks-functions.R | 79 +++++++++++++++++++++++++++++----------
2 files changed, 82 insertions(+), 20 deletions(-)
diff --git a/R/as.matrix-functions.R b/R/as.matrix-functions.R
index e9472de..f7a5115 100644
--- a/R/as.matrix-functions.R
+++ b/R/as.matrix-functions.R
@@ -25,6 +25,7 @@
m
}
+
## .as.binary.matrix
## internal function to convert a matrix with NA to a binary one
##
@@ -41,3 +42,25 @@
mode(m) <- "integer"
m
}
+
+## .as.occurrence.list
+## internal function to create a list of peaks occurrence
+##
+## params:
+## l: list of AbstractMassObject objects
+##
+## returns:
+## a list
+.as.occurrence.list <- function(l) {
+
+ .stopIfNotIsMassObjectList(l)
+
+ mass <- .unlist(lapply(l, function(x)x@mass))
+ uniqueMass <- sort.int(unique(mass))
+ n <- lengths(l)
+ r <- rep.int(seq_along(l), n)
+
+ i <- findInterval(mass, uniqueMass)
+
+ return(list(r = as.integer(r), i = as.integer(i), masses = uniqueMass))
+}
diff --git a/R/filterPeaks-functions.R b/R/filterPeaks-functions.R
index 9595939..ab1248c 100644
--- a/R/filterPeaks-functions.R
+++ b/R/filterPeaks-functions.R
@@ -51,44 +51,43 @@ filterPeaks <- function(l, minFrequency, minNumber, labels,
minNumber <- rep_len(minNumber, nl)
mergeWhitelists <- mergeWhitelists[1]
- ## binary peak matrix (mask)
- m <- .as.binary.matrix(.as.matrix.MassObjectList(l))
+ ## use peaks occurrence list
+ o <- .as.occurrence.list(l)
- ## whitelist
- w <- matrix(0L, nrow=nrow(m), ncol=ncol(m))
-
- ## group indices by labels
+ # group indices by labels
idx <- lapply(ll, function(x)which(labels == x))
## collect whitelists
+
+ w <- matrix(FALSE, nrow = nl, ncol = length(o$masses))
+
for (i in seq_along(idx)) {
- wl <- .whitelist(m, idx[[i]],
- minFrequency=minFrequency[i], minNumber=minNumber[i])
+
+ wl <- .whitelist.list(o, idx[[i]], minFrequency=minFrequency[i], minNumber=minNumber[i])
+
if (sum(wl)) {
if (mergeWhitelists) {
## R uses columnwise recycling
w <- t(t(w) | wl)
} else {
## R uses columnwise recycling
- w[idx[[i]], ] <- t(t(w[idx[[i]], , drop=FALSE]) | wl)
+ w[i, ] <- t(t(w[i, , drop=FALSE]) | wl)
}
} else {
warning("Empty peak whitelist for level ", sQuote(ll[i]), ".")
}
}
-
- ## apply whitelist
- w <- w & m
-
+
## turn matrix back into MassPeaks objects
- for (i in seq_along(l)) {
- j <- which(as.logical(m[i, ]))
- include <- which(w[i, j])
- l[[i]]@mass <- l[[i]]@mass[include]
- l[[i]]@intensity <- l[[i]]@intensity[include]
- l[[i]]@snr <- l[[i]]@snr[include]
+
+ for (i in seq_along(idx)) {
+ for (j in idx[[i]]) {
+ l[[j]]@mass <- l[[j]]@mass[w[i, o$i[o$r == j]]]
+ l[[j]]@intensity <- l[[j]]@intensity[w[i, o$i[o$r == j]]]
+ l[[j]]@snr <- l[[j]]@snr[w[i, o$i[o$r == j]]]
+ }
}
-
+
l
}
@@ -133,3 +132,43 @@ filterPeaks <- function(l, minFrequency, minNumber, labels,
colSums(m[rows, , drop=FALSE]) >= minPeakNumber
}
+
+
+.whitelist.list <- function(l, rows, minFrequency, minNumber) {
+
+ ## test arguments
+ if (is.na(minFrequency) && is.na(minNumber)) {
+ stop(sQuote(minFrequency), " or ", sQuote(minNumber),
+ " has to be a meaningful number!")
+ }
+
+ if (!is.na(minFrequency) && minFrequency < 0L) {
+ minFrequency <- 0L
+ warning(sQuote("minFrequency"),
+ " < 0 does not make sense! Using 0 instead.")
+ }
+
+ if (!is.na(minNumber) && minNumber < 0L) {
+ minNumber <- 0L
+ warning(sQuote("minNumber"), " < 0 does not make sense! Using 0 instead.")
+ }
+
+ if (!is.na(minFrequency) && !is.na(minNumber)) {
+ warning(sQuote("minFrequency"), " and ", sQuote("minNumber"),
+ " arguments are given. Choosing the higher one.")
+ }
+
+ ## calculate minimal number of peaks
+
+ keep.rows <- (l$r %in% rows)
+ l$r <- l$r[keep.rows]
+ l$i <- l$i[keep.rows]
+
+ minPeakNumber <- max(minFrequency * length(unique(l$r)), minNumber, na.rm=TRUE)
+
+ return(
+ sapply(seq_along(l$masses), function(m) {
+ sum(l$i == m) >= minPeakNumber
+ }))
+
+}
\ No newline at end of file
From 11764c1d27106a18d378086bb08a563e5fc89d83 Mon Sep 17 00:00:00 2001
From: Paolo Inglese
Date: Thu, 28 Oct 2021 16:33:17 +0100
Subject: [PATCH 2/2] optimized speed
---
.Rbuildignore | 2 ++
.gitignore | 1 +
MALDIquant.Rproj | 17 +++++++++++++++
R/as.matrix-functions.R | 1 +
R/filterPeaks-functions.R | 45 ++++++++++++++++++++++-----------------
5 files changed, 47 insertions(+), 19 deletions(-)
create mode 100644 MALDIquant.Rproj
diff --git a/.Rbuildignore b/.Rbuildignore
index abbad15..d887687 100644
--- a/.Rbuildignore
+++ b/.Rbuildignore
@@ -9,3 +9,5 @@
^CRAN-RELEASE$
^cran-comments.md$
^\.github$
+^.*\.Rproj$
+^\.Rproj\.user$
diff --git a/.gitignore b/.gitignore
index 1396e56..acc9fb3 100644
--- a/.gitignore
+++ b/.gitignore
@@ -3,3 +3,4 @@
*.o
*.rds
CRAN-RELEASE
+.Rproj.user
diff --git a/MALDIquant.Rproj b/MALDIquant.Rproj
new file mode 100644
index 0000000..21a4da0
--- /dev/null
+++ b/MALDIquant.Rproj
@@ -0,0 +1,17 @@
+Version: 1.0
+
+RestoreWorkspace: Default
+SaveWorkspace: Default
+AlwaysSaveHistory: Default
+
+EnableCodeIndexing: Yes
+UseSpacesForTab: Yes
+NumSpacesForTab: 2
+Encoding: UTF-8
+
+RnwWeave: Sweave
+LaTeX: pdfLaTeX
+
+BuildType: Package
+PackageUseDevtools: Yes
+PackageInstallArgs: --no-multiarch --with-keep.source
diff --git a/R/as.matrix-functions.R b/R/as.matrix-functions.R
index f7a5115..60c7428 100644
--- a/R/as.matrix-functions.R
+++ b/R/as.matrix-functions.R
@@ -63,4 +63,5 @@
i <- findInterval(mass, uniqueMass)
return(list(r = as.integer(r), i = as.integer(i), masses = uniqueMass))
+
}
diff --git a/R/filterPeaks-functions.R b/R/filterPeaks-functions.R
index ab1248c..33ad79e 100644
--- a/R/filterPeaks-functions.R
+++ b/R/filterPeaks-functions.R
@@ -63,7 +63,7 @@ filterPeaks <- function(l, minFrequency, minNumber, labels,
for (i in seq_along(idx)) {
- wl <- .whitelist.list(o, idx[[i]], minFrequency=minFrequency[i], minNumber=minNumber[i])
+ wl <- .whitelistoccur(o, idx[[i]], minFrequency=minFrequency[i], minNumber=minNumber[i])
if (sum(wl)) {
if (mergeWhitelists) {
@@ -76,15 +76,17 @@ filterPeaks <- function(l, minFrequency, minNumber, labels,
} else {
warning("Empty peak whitelist for level ", sQuote(ll[i]), ".")
}
+
}
## turn matrix back into MassPeaks objects
for (i in seq_along(idx)) {
for (j in idx[[i]]) {
- l[[j]]@mass <- l[[j]]@mass[w[i, o$i[o$r == j]]]
- l[[j]]@intensity <- l[[j]]@intensity[w[i, o$i[o$r == j]]]
- l[[j]]@snr <- l[[j]]@snr[w[i, o$i[o$r == j]]]
+ wmask <- w[i, o$i[o$r == j]]
+ l[[j]]@mass <- l[[j]]@mass[wmask]
+ l[[j]]@intensity <- l[[j]]@intensity[wmask]
+ l[[j]]@snr <- l[[j]]@snr[wmask]
}
}
@@ -134,41 +136,46 @@ filterPeaks <- function(l, minFrequency, minNumber, labels,
}
-.whitelist.list <- function(l, rows, minFrequency, minNumber) {
-
+.whitelistoccur <- function(l, rows, minFrequency, minNumber) {
+
## test arguments
if (is.na(minFrequency) && is.na(minNumber)) {
stop(sQuote(minFrequency), " or ", sQuote(minNumber),
" has to be a meaningful number!")
}
-
+
if (!is.na(minFrequency) && minFrequency < 0L) {
minFrequency <- 0L
warning(sQuote("minFrequency"),
" < 0 does not make sense! Using 0 instead.")
}
-
+
if (!is.na(minNumber) && minNumber < 0L) {
minNumber <- 0L
warning(sQuote("minNumber"), " < 0 does not make sense! Using 0 instead.")
}
-
+
if (!is.na(minFrequency) && !is.na(minNumber)) {
warning(sQuote("minFrequency"), " and ", sQuote("minNumber"),
" arguments are given. Choosing the higher one.")
}
-
+
## calculate minimal number of peaks
-
+
keep.rows <- (l$r %in% rows)
l$r <- l$r[keep.rows]
l$i <- l$i[keep.rows]
-
+ l$i <- split(l$i, factor(l$i))
+
minPeakNumber <- max(minFrequency * length(unique(l$r)), minNumber, na.rm=TRUE)
-
- return(
- sapply(seq_along(l$masses), function(m) {
- sum(l$i == m) >= minPeakNumber
- }))
-
-}
\ No newline at end of file
+
+ above.min <- function(x) { length(x) >= minPeakNumber }
+ wl.vals <- unlist(lapply(l$i, above.min))
+
+ wl <- array(FALSE, length(l$masses))
+ wl[as.numeric(names(l$i))] <- wl.vals
+
+ return(c(wl))
+
+}
+