diff --git a/.Rbuildignore b/.Rbuildignore index 5f87fbb..5466049 100644 --- a/.Rbuildignore +++ b/.Rbuildignore @@ -3,3 +3,5 @@ ^\.git$ ^\.gitignore$ ^\.travis.yml$ +^.*\.Rproj$ +^\.Rproj\.user$ diff --git a/.gitignore b/.gitignore index 1377554..076c102 100644 --- a/.gitignore +++ b/.gitignore @@ -1 +1,2 @@ *.swp +.Rproj.user diff --git a/DESCRIPTION b/DESCRIPTION index 0499f22..a9dd182 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,5 +1,5 @@ Package: MALDIquantForeign -Version: 0.12 +Version: 0.12.75 Date: 2019-01-30 Title: Import/Export Routines for 'MALDIquant' Authors@R: c(person("Sebastian", "Gibb", role=c("aut", "cre"), @@ -7,9 +7,10 @@ Authors@R: c(person("Sebastian", "Gibb", role=c("aut", "cre"), comment=c(ORCID="0000-0001-7406-4443")), person("Pietro", "Franceschi", role=c("ctb"), email="pietro.franceschi@fmach.it")) -Depends: R (>= 3.2.2), methods, MALDIquant (>= 1.16.4) +biocViews: +Depends: R (>= 3.2.2), methods, MALDIquant (>= 1.19.15) Imports: base64enc, digest, readBrukerFlexData (>= 1.7), readMzXmlData - (>= 2.7), XML + (>= 2.7), XML, parallel Suggests: knitr, testthat (>= 0.8), RNetCDF (>= 1.6.1) Description: Functions for reading (tab, csv, Bruker fid, Ciphergen XML, mzXML, mzML, imzML, Analyze 7.5, CDF, mMass MSD) and diff --git a/MALDIquantForeign.Rproj b/MALDIquantForeign.Rproj new file mode 100644 index 0000000..7e331bc --- /dev/null +++ b/MALDIquantForeign.Rproj @@ -0,0 +1,17 @@ +Version: 1.0 + +RestoreWorkspace: Default +SaveWorkspace: Default +AlwaysSaveHistory: Default + +EnableCodeIndexing: Yes +UseSpacesForTab: Yes +NumSpacesForTab: 7 +Encoding: UTF-8 + +RnwWeave: Sweave +LaTeX: pdfLaTeX + +BuildType: Package +PackageUseDevtools: Yes +PackageInstallArgs: --no-multiarch --with-keep.source diff --git a/R/import-functions.R b/R/import-functions.R index e530f6b..b064e5b 100644 --- a/R/import-functions.R +++ b/R/import-functions.R @@ -354,7 +354,8 @@ importMzMl <- function(path, ...) { #' Import imzML files #' #' This function imports files in imzML file format -#' into \code{\link[MALDIquant]{MassSpectrum-class}} or +#' into \code{\link[MALDIquant]{MassSpectrum-class}}, +#' \code{\link[MALDIquant]{MassSpectrumOnDisk-class}} or #' \code{\link[MALDIquant]{MassPeaks-class}} objects. #' #' @param path \code{character}, path to directory or file which should be read @@ -362,6 +363,13 @@ importMzMl <- function(path, ...) { #' @param coordinates \code{matrix}, 2 column matrix that contains the x- and #' y-coordinates for spectra that should be imported. Other spectra would be #' ignored. +#' @param attachOnly logical (defaults to \code{FALSE}), whether to attach the dataset via the +#' \code{OnDiskVector} class without loading it into memory. See \code{\link[MALDIquant]{MassSpectrumOnDisk-class}}. +#' @param duplicateFile logical, when \code{TRUE} (default), creates a temporary copy of the binary \code{ibd} +#' file in the \code{tempdir} and attaches the \code{\link[MALDIquant]{MassSpectrumOnDisk}} objects to it so +#' as not to affect the original \code{ibd} file. +#' @param mc.cores integer, specifying number of cores for parallel evaluation through \code{parallel::mclapply}. +#' Falls back to \code{mc.cores = 1} is Windows. #' @param \ldots arguments to be passed to #' \code{\link[MALDIquantForeign]{import}}. #' @@ -372,9 +380,11 @@ importMzMl <- function(path, ...) { #' \code{\link[MALDIquant]{MassSpectrum-class}}, #' \code{\link[MALDIquant]{MassPeaks-class}} #' @author Sebastian Gibb -#' @references \url{http://strimmerlab.org/software/maldiquant/}, \cr +#' @references \url{http://strimmerlab.org/software/maldiquant/}, \cr\cr #' Definition of \code{imzML} format: -#' \url{http://www.imzml.org/} +#' \url{http://www.imzml.org/}\cr\cr +#' \code{"matter"}: Kylie A. Bemis (2018). matter: A framework for rapid prototyping with binary data on disk. R +#' package version 1.8.0. \url{https://github.com/kuwisdelu/matter}. #' @examples #' #' library("MALDIquant") @@ -391,9 +401,12 @@ importMzMl <- function(path, ...) { #' coordinates = cbind(1:2, c(1, 1))) #' #' @rdname importImzMl-functions +#' #' @export -importImzMl <- function(path, coordinates=NULL, ...) { - import(path=path, type="imzml", coordinates=coordinates, ...) +importImzMl <- function(path, coordinates=NULL, attachOnly=FALSE, duplicateFile=TRUE, + mc.cores = 1L, ...) { + import(path=path, type="imzml", coordinates=coordinates, attachOnly=attachOnly, + duplicateFile=duplicateFile, mc.cores = 1L, ...) } #' Import Ciphergen XML files diff --git a/R/importImzMl-functions.R b/R/importImzMl-functions.R index e3e02a6..6e2cf9d 100644 --- a/R/importImzMl-functions.R +++ b/R/importImzMl-functions.R @@ -17,8 +17,8 @@ ## along with MALDIquantForeign. If not, see .importImzMl <- function(file, centroided=FALSE, massRange=c(0, Inf), - minIntensity=0, coordinates=NULL, - verbose=FALSE) { + minIntensity=0, coordinates=NULL, attachOnly=FALSE, + duplicateFile=TRUE, mc.cores = 1L, verbose=FALSE) { .msg(verbose, "Reading spectrum from ", sQuote(file), " ...") @@ -31,6 +31,15 @@ if (!file.exists(ibdFilename)) { stop("File ", sQuote(ibdFilename), " doesn't exists!") } + + if (attachOnly) { # attach rather than load + if (duplicateFile) { # duplicate the ibd file to the temp dir in order to keep the original ibd intact + tf <- paste0(tempfile(), "_", basename(ibdFilename)) + file.copy(from=ibdFilename, to=tf) + ibdFilename <- tf + + } + } s <- .parseMzMl(file=file, verbose=verbose) @@ -89,7 +98,16 @@ } n <- x[column, "length"] e <- x[column, "encodedLength"] - readBin(file, double(), n=n, size=e/n, signed=TRUE, endian="little") + + if(attachOnly){ + + OnDiskVector(path=unname(summary(ibd)[[1]]), n=n, offset=x[column, "offset"], size=8L) + + }else{ + + readBin(file, double(), n=n, size=e/n, signed=TRUE, endian="little") + } + } n <- length(sel) @@ -97,27 +115,63 @@ isProcessed <- s$ims$type == "processed" isSeekNeeded <- length(s$ims$ibd) > length(sel) + + if(isProcessed && attachOnly){ + message("The imzML file is of type 'processed'. The 'attachOnly' option is only available ", + "for 'continuous' type and therefore will be overridden. In-memory MassPeaks objects will be created.") + attachOnly <- FALSE + } + if (!isProcessed) { mass <- .readValues(ibd, s$ims$ibd[[sel[1L]]], "mass", isSeekNeeded) } - ## read mass and intensity values - for (i in seq(along=sel)) { - .msg(verbose, "Reading binary data for spectrum ", i, "/", n, " ...") - - m <- modifyList(s$metaData, s$spectra[[sel[i]]]$metaData) - m$file <- file - - if (isProcessed) { - mass <- .readValues(ibd, s$ims$ibd[[sel[i]]], "mass", isSeekNeeded) - } - intensity <- .readValues(ibd, s$ims$ibd[[sel[i]]], "intensity", isSeekNeeded) - spectra[[i]] <- .createMassObject(mass=mass, intensity=intensity, - metaData=m, centroided=centroided, - massRange=massRange, - minIntensity=minIntensity, - verbose=verbose) + ## read mass and intensity values - possibly in parallel + mc.cores <- ifelse(.Platform$OS.type == "windows", 1, mc.cores) + + spectra <- parallel::mclapply(X = seq_along(sel), + mc.cores = mc.cores, + FUN = function(i) { + + .msg(verbose, "Reading binary data for spectrum ", i, "/", n, " ...") + + m <- modifyList(s$metaData, s$spectra[[sel[i]]]$metaData) + m$file <- file + + if (isProcessed) { + mass <- .readValues(ibd, s$ims$ibd[[sel[i]]], "mass", isSeekNeeded) + } + intensity <- .readValues(ibd, s$ims$ibd[[sel[i]]], "intensity", isSeekNeeded) + + if(attachOnly){ + tmpSpectrum <- new("MassSpectrumOnDisk", mass=mass, intensity=intensity, + metaData=m) + }else{ + tmpSpectrum <- .createMassObject(mass=mass, intensity=intensity, + metaData=m, centroided=centroided, + massRange=massRange, + minIntensity=minIntensity, + verbose=verbose) + } + + tmpSpectrum + }) + + + + + .msg(verbose, "Done. ") + + if(attachOnly) + { + if(duplicateFile) + message("\nNOTE: imzML dataset was loaded via attacheOnly option and a duplicate file was generate. ", + "Any changes made to the spectra are directly written to the duplicate file.\n ") + else + message("\nNOTE: imzML dataset was loaded via attacheOnly option to the ORIGINAL FILE. ", + "Any changes made to the spectra are directly written to the imzML file.\n ") } + spectra } diff --git a/man/importImzMl-functions.Rd b/man/importImzMl-functions.Rd index 60dad34..45a6d23 100644 --- a/man/importImzMl-functions.Rd +++ b/man/importImzMl-functions.Rd @@ -4,7 +4,8 @@ \alias{importImzMl} \title{Import imzML files} \usage{ -importImzMl(path, coordinates = NULL, ...) +importImzMl(path, coordinates = NULL, attachOnly = FALSE, + duplicateFile = TRUE, ...) } \arguments{ \item{path}{\code{character}, path to directory or file which should be read @@ -14,6 +15,13 @@ in.} y-coordinates for spectra that should be imported. Other spectra would be ignored.} +\item{attachOnly}{logical (defaults to \code{FALSE}), whether to attach the dataset via the \code{matter} +package without loading it into memory. See \code{\link[MALDIquant]{MassSpectrumOnDisk-class}}.} + +\item{duplicateFile}{logical, when \code{TRUE} (default), creates a temporary copy of the binary \code{ibd} +file in the \code{tempdir} and attaches the \code{\link[MALDIquant]{MassSpectrumOnDisk}} objects to it so +as not to affect the original \code{ibd} file.} + \item{\ldots}{arguments to be passed to \code{\link[MALDIquantForeign]{import}}.} } @@ -24,7 +32,8 @@ a \code{list} of \code{\link[MALDIquant]{MassSpectrum-class}} or } \description{ This function imports files in imzML file format -into \code{\link[MALDIquant]{MassSpectrum-class}} or +into \code{\link[MALDIquant]{MassSpectrum-class}}, +\code{\link[MALDIquant]{MassSpectrumOnDisk-class}} or \code{\link[MALDIquant]{MassPeaks-class}} objects. } \examples{ @@ -44,9 +53,11 @@ s <- importImzMl(file.path(exampleDirectory, "tiny_continuous.imzML"), } \references{ -\url{http://strimmerlab.org/software/maldiquant/}, \cr +\url{http://strimmerlab.org/software/maldiquant/}, \cr\cr Definition of \code{imzML} format: -\url{http://www.imzml.org/} +\url{http://www.imzml.org/}\cr\cr +\code{"matter"}: Kylie A. Bemis (2018). matter: A framework for rapid prototyping with binary data on disk. R +package version 1.8.0. \url{https://github.com/kuwisdelu/matter}. } \seealso{ \code{\link[MALDIquant]{MassSpectrum-class}},