From 207e9deb5f386298fe24fb484c5068d8f6a60ce2 Mon Sep 17 00:00:00 2001 From: mjchen1996 Date: Fri, 19 Aug 2022 14:10:47 +0800 Subject: [PATCH 1/7] add new filter method to get.placement.jplace() --- .gitignore | 21 ++- NAMESPACE | 1 + R/jplace.R | 428 ++++++++++++++++++++++++------------------ man/get-placements.Rd | 18 +- treeio.Rproj | 37 ++-- 5 files changed, 293 insertions(+), 212 deletions(-) diff --git a/.gitignore b/.gitignore index 5615568..1c15175 100644 --- a/.gitignore +++ b/.gitignore @@ -1,10 +1,11 @@ -.Rproj.user -.Rhistory -.RData -*~ -.DS_Store -.svn -mkdocs/mysoftware -__pycache__ -*.Rcheck -*.html +.Rproj +.Rproj.user +.Rhistory +.RData +*~ +.DS_Store +.svn +mkdocs/mysoftware +__pycache__ +*.Rcheck +*.html diff --git a/NAMESPACE b/NAMESPACE index 936bcac..a80e317 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -163,6 +163,7 @@ importFrom(dplyr,select) importFrom(dplyr,summarize) importFrom(jsonlite,fromJSON) importFrom(jsonlite,toJSON) +importFrom(magrittr,"%<>%") importFrom(magrittr,"%>%") importFrom(methods,"slot<-") importFrom(methods,.hasSlot) diff --git a/R/jplace.R b/R/jplace.R index 4d4f131..97d7d1e 100644 --- a/R/jplace.R +++ b/R/jplace.R @@ -1,183 +1,245 @@ -##' read jplace file -##' -##' -##' @title read.jplace -##' @param file jplace file -##' @return \code{jplace} instance -##' @importFrom jsonlite fromJSON -##' @export -##' @author Guangchuang Yu -##' @examples -##' jp <- system.file("extdata", "sample.jplace", package="treeio") -##' read.jplace(jp) -read.jplace <- function(file) { - fields <- tree <- placements <- NULL - version <- metadata <- NULL - jtree <- fromJSON(file) - phylo <- jplace_treetext_to_phylo(jtree$tree) - placements <- extract.placement(jtree, phylo) - info <- c(jtree$metadata, version=jtree$version, parser = "read.jplace") - res <- new("jplace", - treetext = jtree$tree, - phylo = phylo, - placements = placements, - info = info, - file = filename(file) - ) - - res@data <- summarize_placement(res) - return(res) -} - -##' @importFrom dplyr summarize -##' @importFrom dplyr mutate -##' @importFrom dplyr group_by -##' @importFrom dplyr n -summarize_placement <- function(tree) { - place <- get.placements(tree, by="best") - ids <- tibble(node = nodeIds(tree, internal.only = FALSE)) - group_by(place, .data$node) %>% summarize(nplace=n()) %>% - full_join(ids, by='node') %>% - mutate(nplace = ifelse(is.na(.data$nplace), 0, .data$nplace)) -} - -##' @method get.placements jplace -##' @param by one of 'best' and 'all' -##' @export -##' @rdname get-placements -##' @importFrom dplyr group_by -##' @importFrom dplyr filter -get.placements.jplace <- function(tree, by="best", ...) { - placements <- tree@placements - if (!'likelihood' %in% names(placements)) - return(placements) - - if (by == "best") { - ## http://astrostatistics.psu.edu/su07/R/html/base/html/all.equal.html - ## due to precision, number are identical maynot be equal, - ## so use all.equal which can test nearly equal number - ## if not equals, the output is a descript string of the differences - placements <- group_by(placements, .data$name) %>% - filter(.data$likelihood == min(.data$likelihood)) - } - return(placements) -} - -getplacedf <- function(places, nm){ - ## the first column of placements maybe a matrix or one numeric vector, - ## so when it is numeric vector, the nplaces will be 1. - ## and the type of nm also is various. - if (!inherits(places, "matrix")){ - nplaces <- 1 - } else{ - nplaces <- nrow(places) - } - if (inherits(nm, "matrix")){ - nmsize <- nrow(nm) - tmpn <- nm[,1] - } - if (inherits(nm, "list")){ - nmsize <- length(nm) - tmpn <- vapply(nm, function(x)x[1], character(1)) - } - if (inherits(nm, "character")){ - nmsize <- length(nm) - tmpn <- as.vector(nm) - } - ##example: - ## when first column of plamcements is [[1,2,3,4,5],[3,4,5,6,7],[6,7,3,2,4]] (3 row x 5 columns matrix), - ## and the n column is ["read1", "read2"] (the type of n is character vector), so - ## will use "inherits(nm, "character")" block. - ## this will first generate two same matrix contained 3 row x 5 columns, because the length of n is two (the nmsize argument). - places.df <- rep(list(places), nmsize) - ## then this will generate the names of each matrix for the nm. - ## example result is: rep(c("read1", "read2"), rep(3,2)), here 3 is nplaces (the nrow of first column of placements), - ## 2 is the length of nm. - name <- rep(tmpn, rep(nplaces, nmsize)) - places.df <- do.call("rbind", places.df) - places.df <- data.frame(name=name, places.df, stringsAsFactors=FALSE) - return(places.df) -} - - -mergenm <- function(n, nm){ - ## merge the n and nm. - ## it is impossible that n and nm is empty simultaneously, - ## so we will keep the column not NULL. - if(is.null(n)&&!is.null(nm)) {return(nm)} - if(is.null(nm)&&!is.null(n)) {return(n)} - if(is.null(n)&&is.null(nm)){ - stop("the placements of jplace should have corresponding name!") - } -} - - -extract.placement <- function(object, phylo) { - placements <- object$placements - if (ncol(placements)==2){ - ## when placements contained p and n two columns, - ## this will process placements row by row with getplacedf function. - ## The order of `p` and `n` column is not fixed. I think colnames of - ## placements (`p`, `n`, `nm`) are fixed, but when column number is - ## two, the `n` or `nm` is not fixed. - nameidx <- match("p", colnames(placements)) - place.df <- mapply(getplacedf, - placements$p, - placements[,-nameidx], - SIMPLIFY=FALSE) - } - if(ncol(placements)==3){ - ## when placements contained p ,n and nm three columns, - ## first, we merge n and nm row by row. - tmpname <- mapply(mergenm, - placements$n, - placements$nm, - SIMPLIFY=FALSE) - ## then, it becomes the same as two columns. - place.df <- mapply(getplacedf, - placements$p, - tmpname, - SIMPLIFY=FALSE) - } - place.df <- do.call("rbind", place.df) - colnames(place.df) <- c("name", object$fields) - ## place <- placements[,1] - - ## ids <- NULL - ## if (length(placements) == 2) { - ## tmpids <- placements[,2] - ## }else{ - ## tmpids <- list(unlist(placements[,2]), unlist(placements[,3])) - ## } - ## ids <- vapply(tmpids, function(x) x[1], character(1)) - ## names(place) <- ids - ## place.df <- do.call("rbind", place) - ## row.names(place.df) <- NULL - ## if (!is.null(ids)) { - ## nn <- rep(ids, vapply(place, function(x) { - ## nr <- nrow(x) - ## if (is.null(nr)) - ## return(1) - ## return(nr) - ## }, numeric(1))) - ## place.df <- data.frame(name=nn, place.df) - ## colnames(place.df) <- c("name", object$fields) - ## } else { - ## colnames(place.df) <- object$fields - ## } - edgeNum.df <- attr(phylo, "edgeNum") - place.df <- merge(place.df, edgeNum.df, by.x = "edge_num", by.y = "edgeNum") - place.df <- getnewplacements(place.df) - as_tibble(place.df) -} - -## To avoid the character column -getnewplacements <- function(placedf){ - tmpfile <- tempfile() - utils::write.csv(placedf, tmpfile) - placementdf <- utils::read.csv(tmpfile, row.names=1) - ## file.remove(tmpfile) - return(placementdf) -} - - +##' read jplace file +##' +##' +##' @title read.jplace +##' @param file jplace file +##' @return \code{jplace} instance +##' @importFrom jsonlite fromJSON +##' @export +##' @author Guangchuang Yu +##' @examples +##' jp <- system.file("extdata", "sample.jplace", package="treeio") +##' read.jplace(jp) +read.jplace <- function(file) { + fields <- tree <- placements <- NULL + version <- metadata <- NULL + jtree <- fromJSON(file) + phylo <- jplace_treetext_to_phylo(jtree$tree) + placements <- extract.placement(jtree, phylo) + info <- c(jtree$metadata, version=jtree$version, parser = "read.jplace") + res <- new("jplace", + treetext = jtree$tree, + phylo = phylo, + placements = placements, + info = info, + file = filename(file) + ) + + res@data <- summarize_placement(res) + return(res) +} + +##' @importFrom dplyr summarize +##' @importFrom dplyr mutate +##' @importFrom dplyr group_by +##' @importFrom dplyr n +summarize_placement <- function(tree) { + place <- get.placements(tree, by="best") + ids <- tibble(node = nodeIds(tree, internal.only = FALSE)) + group_by(place, .data$node) %>% summarize(nplace=n()) %>% + full_join(ids, by='node') %>% + mutate(nplace = ifelse(is.na(.data$nplace), 0, .data$nplace)) +} + + +#' @method get.placements jplace +#' @param tree jtree +#' @param by filter methods "all","max_lwr","max_pendant", +#' "min_likelihood","lwr","pendant","likelihood" +#' @param filter_value a given value to filter placements. +#' @rdname get-placements +#' @importFrom dplyr group_by +#' @importFrom dplyr filter +#' @importFrom magrittr %<>% +#' @return a dataframe of placements +#' @export +#' +#' @examples +#' \donttest{ +#' jp <- system.file("extdata", "sample.jplace", package="treeio") +#' jplace <- read.jplace(jp) +#' placements <- get.placement(jp,by="all") +#' } +get.placements.jplace <- function(tree, by="all", filter_value = NULL) { + jplist <- c("all","max_lwr","max_pendant", + "min_likelihood","lwr","pendant","likelihood") + if(!(by %in% jplist)){ + stop("by should be one of all,max_lwr,max_pendant, + min_likelihood,lwr,pendant,likelihood") + } + + if(by %in% c("lwr","pendant","likelihood")){ + if(!is.null(filter_value)){ + message("Placement will be filtered by the given value...") + }else{ + stop("The filter_value should be given + in order to filter placement by the given value.") + } + + } + + placements <- tree@placements + + if (by == "all") + return(placements) + + if (by == "max_lwr") { + if (!'like_weight_ratio' %in% names(placements)){ + return(placements) + } else{ + placements <- group_by(placements, .data$name) %>% + filter(.data$like_weight_ratio == max(.data$like_weight_ratio)) + } + + } + + if (by == "max_pendant"){ + if (!'pendant_length' %in% names(placements)){ + return(placements) + } else{ + placements <- group_by(placements, .data$name) %>% + filter(.data$pendant_length == max(.data$pendant_length)) + } + } + + if (by == "min_likelihood"){ + if (!'likelihood' %in% names(placements)){ + return(placements) + } else{ + placements <- group_by(placements, .data$name) %>% + filter(.data$likelihood == min(.data$likelihood)) + } + } + + if (by == "lwr"){ + placements %<>% filter(.data$like_weight_ratio > filter_value) + } + if (by == "pendant"){ + placements %<>% filter(.data$pendant_length > filter_value) + } + if (by == "likelihood"){ + placements %<>% filter(.data$likelihood < filter_value) + } + + return(placements) +} + + +getplacedf <- function(places, nm){ + ## the first column of placements maybe a matrix or one numeric vector, + ## so when it is numeric vector, the nplaces will be 1. + ## and the type of nm also is various. + if (!inherits(places, "matrix")){ + nplaces <- 1 + } else{ + nplaces <- nrow(places) + } + if (inherits(nm, "matrix")){ + nmsize <- nrow(nm) + tmpn <- nm[,1] + } + if (inherits(nm, "list")){ + nmsize <- length(nm) + tmpn <- vapply(nm, function(x)x[1], character(1)) + } + if (inherits(nm, "character")){ + nmsize <- length(nm) + tmpn <- as.vector(nm) + } + ##example: + ## when first column of plamcements is [[1,2,3,4,5],[3,4,5,6,7],[6,7,3,2,4]] (3 row x 5 columns matrix), + ## and the n column is ["read1", "read2"] (the type of n is character vector), so + ## will use "inherits(nm, "character")" block. + ## this will first generate two same matrix contained 3 row x 5 columns, because the length of n is two (the nmsize argument). + places.df <- rep(list(places), nmsize) + ## then this will generate the names of each matrix for the nm. + ## example result is: rep(c("read1", "read2"), rep(3,2)), here 3 is nplaces (the nrow of first column of placements), + ## 2 is the length of nm. + name <- rep(tmpn, rep(nplaces, nmsize)) + places.df <- do.call("rbind", places.df) + places.df <- data.frame(name=name, places.df, stringsAsFactors=FALSE) + return(places.df) +} + + +mergenm <- function(n, nm){ + ## merge the n and nm. + ## it is impossible that n and nm is empty simultaneously, + ## so we will keep the column not NULL. + if(is.null(n)&&!is.null(nm)) {return(nm)} + if(is.null(nm)&&!is.null(n)) {return(n)} + if(is.null(n)&&is.null(nm)){ + stop("the placements of jplace should have corresponding name!") + } +} + + +extract.placement <- function(object, phylo) { + placements <- object$placements + if (ncol(placements)==2){ + ## when placements contained p and n two columns, + ## this will process placements row by row with getplacedf function. + ## The order of `p` and `n` column is not fixed. I think colnames of + ## placements (`p`, `n`, `nm`) are fixed, but when column number is + ## two, the `n` or `nm` is not fixed. + nameidx <- match("p", colnames(placements)) + place.df <- mapply(getplacedf, + placements$p, + placements[,-nameidx], + SIMPLIFY=FALSE) + } + if(ncol(placements)==3){ + ## when placements contained p ,n and nm three columns, + ## first, we merge n and nm row by row. + tmpname <- mapply(mergenm, + placements$n, + placements$nm, + SIMPLIFY=FALSE) + ## then, it becomes the same as two columns. + place.df <- mapply(getplacedf, + placements$p, + tmpname, + SIMPLIFY=FALSE) + } + place.df <- do.call("rbind", place.df) + colnames(place.df) <- c("name", object$fields) + ## place <- placements[,1] + + ## ids <- NULL + ## if (length(placements) == 2) { + ## tmpids <- placements[,2] + ## }else{ + ## tmpids <- list(unlist(placements[,2]), unlist(placements[,3])) + ## } + ## ids <- vapply(tmpids, function(x) x[1], character(1)) + ## names(place) <- ids + ## place.df <- do.call("rbind", place) + ## row.names(place.df) <- NULL + ## if (!is.null(ids)) { + ## nn <- rep(ids, vapply(place, function(x) { + ## nr <- nrow(x) + ## if (is.null(nr)) + ## return(1) + ## return(nr) + ## }, numeric(1))) + ## place.df <- data.frame(name=nn, place.df) + ## colnames(place.df) <- c("name", object$fields) + ## } else { + ## colnames(place.df) <- object$fields + ## } + edgeNum.df <- attr(phylo, "edgeNum") + place.df <- merge(place.df, edgeNum.df, by.x = "edge_num", by.y = "edgeNum") + place.df <- getnewplacements(place.df) + as_tibble(place.df) +} + +## To avoid the character column +getnewplacements <- function(placedf){ + tmpfile <- tempfile() + utils::write.csv(placedf, tmpfile) + placementdf <- utils::read.csv(tmpfile, row.names=1) + ## file.remove(tmpfile) + return(placementdf) +} + + diff --git a/man/get-placements.Rd b/man/get-placements.Rd index 809f9b2..b37aa72 100644 --- a/man/get-placements.Rd +++ b/man/get-placements.Rd @@ -7,18 +7,30 @@ \usage{ get.placements(tree, ...) -\method{get.placements}{jplace}(tree, by = "best", ...) +\method{get.placements}{jplace}(tree, by = "all", filter_value = NULL) } \arguments{ -\item{tree}{tree object} +\item{tree}{jtree} \item{...}{additional parameters} -\item{by}{one of 'best' and 'all'} +\item{by}{filter methods "all","max_lwr","max_pendant", +"min_likelihood","lwr","pendant","likelihood"} + +\item{filter_value}{a given value to filter placements.} } \value{ placement tibble + +a dataframe of placements } \description{ access placement information } +\examples{ +\donttest{ +jp <- system.file("extdata", "sample.jplace", package="treeio") +jplace <- read.jplace(jp) +placements <- get.placement(jp,by="all") +} +} diff --git a/treeio.Rproj b/treeio.Rproj index d848a9f..e351a53 100644 --- a/treeio.Rproj +++ b/treeio.Rproj @@ -1,16 +1,21 @@ -Version: 1.0 - -RestoreWorkspace: No -SaveWorkspace: No -AlwaysSaveHistory: Default - -EnableCodeIndexing: Yes -Encoding: UTF-8 - -AutoAppendNewline: Yes -StripTrailingWhitespace: Yes - -BuildType: Package -PackageUseDevtools: Yes -PackageInstallArgs: --no-multiarch --with-keep.source -PackageRoxygenize: rd,collate,namespace +Version: 1.0 + +RestoreWorkspace: No +SaveWorkspace: No +AlwaysSaveHistory: Default + +EnableCodeIndexing: Yes +UseSpacesForTab: Yes +NumSpacesForTab: 4 +Encoding: UTF-8 + +RnwWeave: knitr +LaTeX: XeLaTeX + +AutoAppendNewline: Yes +StripTrailingWhitespace: Yes + +BuildType: Package +PackageUseDevtools: Yes +PackageInstallArgs: --no-multiarch --with-keep.source +PackageRoxygenize: rd,collate,namespace From bbe50c15f8a96141cd84b3178ed69e46d1fcdb8b Mon Sep 17 00:00:00 2001 From: mjchen1996 Date: Fri, 19 Aug 2022 14:15:30 +0800 Subject: [PATCH 2/7] remove Rproj --- .gitignore | 2 +- treeio.Rproj | 21 --------------------- 2 files changed, 1 insertion(+), 22 deletions(-) delete mode 100644 treeio.Rproj diff --git a/.gitignore b/.gitignore index 1c15175..ddcf790 100644 --- a/.gitignore +++ b/.gitignore @@ -1,4 +1,4 @@ -.Rproj +*.Rproj .Rproj.user .Rhistory .RData diff --git a/treeio.Rproj b/treeio.Rproj deleted file mode 100644 index e351a53..0000000 --- a/treeio.Rproj +++ /dev/null @@ -1,21 +0,0 @@ -Version: 1.0 - -RestoreWorkspace: No -SaveWorkspace: No -AlwaysSaveHistory: Default - -EnableCodeIndexing: Yes -UseSpacesForTab: Yes -NumSpacesForTab: 4 -Encoding: UTF-8 - -RnwWeave: knitr -LaTeX: XeLaTeX - -AutoAppendNewline: Yes -StripTrailingWhitespace: Yes - -BuildType: Package -PackageUseDevtools: Yes -PackageInstallArgs: --no-multiarch --with-keep.source -PackageRoxygenize: rd,collate,namespace From d98a782726951089013202a1cd1f47af73de6398 Mon Sep 17 00:00:00 2001 From: mjchen1996 Date: Fri, 19 Aug 2022 15:44:20 +0800 Subject: [PATCH 3/7] replace \r\n --- R/jplace.R | 490 ++++++++++++++++++++++++++--------------------------- 1 file changed, 245 insertions(+), 245 deletions(-) diff --git a/R/jplace.R b/R/jplace.R index 97d7d1e..ac8e20f 100644 --- a/R/jplace.R +++ b/R/jplace.R @@ -1,245 +1,245 @@ -##' read jplace file -##' -##' -##' @title read.jplace -##' @param file jplace file -##' @return \code{jplace} instance -##' @importFrom jsonlite fromJSON -##' @export -##' @author Guangchuang Yu -##' @examples -##' jp <- system.file("extdata", "sample.jplace", package="treeio") -##' read.jplace(jp) -read.jplace <- function(file) { - fields <- tree <- placements <- NULL - version <- metadata <- NULL - jtree <- fromJSON(file) - phylo <- jplace_treetext_to_phylo(jtree$tree) - placements <- extract.placement(jtree, phylo) - info <- c(jtree$metadata, version=jtree$version, parser = "read.jplace") - res <- new("jplace", - treetext = jtree$tree, - phylo = phylo, - placements = placements, - info = info, - file = filename(file) - ) - - res@data <- summarize_placement(res) - return(res) -} - -##' @importFrom dplyr summarize -##' @importFrom dplyr mutate -##' @importFrom dplyr group_by -##' @importFrom dplyr n -summarize_placement <- function(tree) { - place <- get.placements(tree, by="best") - ids <- tibble(node = nodeIds(tree, internal.only = FALSE)) - group_by(place, .data$node) %>% summarize(nplace=n()) %>% - full_join(ids, by='node') %>% - mutate(nplace = ifelse(is.na(.data$nplace), 0, .data$nplace)) -} - - -#' @method get.placements jplace -#' @param tree jtree -#' @param by filter methods "all","max_lwr","max_pendant", -#' "min_likelihood","lwr","pendant","likelihood" -#' @param filter_value a given value to filter placements. -#' @rdname get-placements -#' @importFrom dplyr group_by -#' @importFrom dplyr filter -#' @importFrom magrittr %<>% -#' @return a dataframe of placements -#' @export -#' -#' @examples -#' \donttest{ -#' jp <- system.file("extdata", "sample.jplace", package="treeio") -#' jplace <- read.jplace(jp) -#' placements <- get.placement(jp,by="all") -#' } -get.placements.jplace <- function(tree, by="all", filter_value = NULL) { - jplist <- c("all","max_lwr","max_pendant", - "min_likelihood","lwr","pendant","likelihood") - if(!(by %in% jplist)){ - stop("by should be one of all,max_lwr,max_pendant, - min_likelihood,lwr,pendant,likelihood") - } - - if(by %in% c("lwr","pendant","likelihood")){ - if(!is.null(filter_value)){ - message("Placement will be filtered by the given value...") - }else{ - stop("The filter_value should be given - in order to filter placement by the given value.") - } - - } - - placements <- tree@placements - - if (by == "all") - return(placements) - - if (by == "max_lwr") { - if (!'like_weight_ratio' %in% names(placements)){ - return(placements) - } else{ - placements <- group_by(placements, .data$name) %>% - filter(.data$like_weight_ratio == max(.data$like_weight_ratio)) - } - - } - - if (by == "max_pendant"){ - if (!'pendant_length' %in% names(placements)){ - return(placements) - } else{ - placements <- group_by(placements, .data$name) %>% - filter(.data$pendant_length == max(.data$pendant_length)) - } - } - - if (by == "min_likelihood"){ - if (!'likelihood' %in% names(placements)){ - return(placements) - } else{ - placements <- group_by(placements, .data$name) %>% - filter(.data$likelihood == min(.data$likelihood)) - } - } - - if (by == "lwr"){ - placements %<>% filter(.data$like_weight_ratio > filter_value) - } - if (by == "pendant"){ - placements %<>% filter(.data$pendant_length > filter_value) - } - if (by == "likelihood"){ - placements %<>% filter(.data$likelihood < filter_value) - } - - return(placements) -} - - -getplacedf <- function(places, nm){ - ## the first column of placements maybe a matrix or one numeric vector, - ## so when it is numeric vector, the nplaces will be 1. - ## and the type of nm also is various. - if (!inherits(places, "matrix")){ - nplaces <- 1 - } else{ - nplaces <- nrow(places) - } - if (inherits(nm, "matrix")){ - nmsize <- nrow(nm) - tmpn <- nm[,1] - } - if (inherits(nm, "list")){ - nmsize <- length(nm) - tmpn <- vapply(nm, function(x)x[1], character(1)) - } - if (inherits(nm, "character")){ - nmsize <- length(nm) - tmpn <- as.vector(nm) - } - ##example: - ## when first column of plamcements is [[1,2,3,4,5],[3,4,5,6,7],[6,7,3,2,4]] (3 row x 5 columns matrix), - ## and the n column is ["read1", "read2"] (the type of n is character vector), so - ## will use "inherits(nm, "character")" block. - ## this will first generate two same matrix contained 3 row x 5 columns, because the length of n is two (the nmsize argument). - places.df <- rep(list(places), nmsize) - ## then this will generate the names of each matrix for the nm. - ## example result is: rep(c("read1", "read2"), rep(3,2)), here 3 is nplaces (the nrow of first column of placements), - ## 2 is the length of nm. - name <- rep(tmpn, rep(nplaces, nmsize)) - places.df <- do.call("rbind", places.df) - places.df <- data.frame(name=name, places.df, stringsAsFactors=FALSE) - return(places.df) -} - - -mergenm <- function(n, nm){ - ## merge the n and nm. - ## it is impossible that n and nm is empty simultaneously, - ## so we will keep the column not NULL. - if(is.null(n)&&!is.null(nm)) {return(nm)} - if(is.null(nm)&&!is.null(n)) {return(n)} - if(is.null(n)&&is.null(nm)){ - stop("the placements of jplace should have corresponding name!") - } -} - - -extract.placement <- function(object, phylo) { - placements <- object$placements - if (ncol(placements)==2){ - ## when placements contained p and n two columns, - ## this will process placements row by row with getplacedf function. - ## The order of `p` and `n` column is not fixed. I think colnames of - ## placements (`p`, `n`, `nm`) are fixed, but when column number is - ## two, the `n` or `nm` is not fixed. - nameidx <- match("p", colnames(placements)) - place.df <- mapply(getplacedf, - placements$p, - placements[,-nameidx], - SIMPLIFY=FALSE) - } - if(ncol(placements)==3){ - ## when placements contained p ,n and nm three columns, - ## first, we merge n and nm row by row. - tmpname <- mapply(mergenm, - placements$n, - placements$nm, - SIMPLIFY=FALSE) - ## then, it becomes the same as two columns. - place.df <- mapply(getplacedf, - placements$p, - tmpname, - SIMPLIFY=FALSE) - } - place.df <- do.call("rbind", place.df) - colnames(place.df) <- c("name", object$fields) - ## place <- placements[,1] - - ## ids <- NULL - ## if (length(placements) == 2) { - ## tmpids <- placements[,2] - ## }else{ - ## tmpids <- list(unlist(placements[,2]), unlist(placements[,3])) - ## } - ## ids <- vapply(tmpids, function(x) x[1], character(1)) - ## names(place) <- ids - ## place.df <- do.call("rbind", place) - ## row.names(place.df) <- NULL - ## if (!is.null(ids)) { - ## nn <- rep(ids, vapply(place, function(x) { - ## nr <- nrow(x) - ## if (is.null(nr)) - ## return(1) - ## return(nr) - ## }, numeric(1))) - ## place.df <- data.frame(name=nn, place.df) - ## colnames(place.df) <- c("name", object$fields) - ## } else { - ## colnames(place.df) <- object$fields - ## } - edgeNum.df <- attr(phylo, "edgeNum") - place.df <- merge(place.df, edgeNum.df, by.x = "edge_num", by.y = "edgeNum") - place.df <- getnewplacements(place.df) - as_tibble(place.df) -} - -## To avoid the character column -getnewplacements <- function(placedf){ - tmpfile <- tempfile() - utils::write.csv(placedf, tmpfile) - placementdf <- utils::read.csv(tmpfile, row.names=1) - ## file.remove(tmpfile) - return(placementdf) -} - - +##' read jplace file +##' +##' +##' @title read.jplace +##' @param file jplace file +##' @return \code{jplace} instance +##' @importFrom jsonlite fromJSON +##' @export +##' @author Guangchuang Yu +##' @examples +##' jp <- system.file("extdata", "sample.jplace", package="treeio") +##' read.jplace(jp) +read.jplace <- function(file) { + fields <- tree <- placements <- NULL + version <- metadata <- NULL + jtree <- fromJSON(file) + phylo <- jplace_treetext_to_phylo(jtree$tree) + placements <- extract.placement(jtree, phylo) + info <- c(jtree$metadata, version=jtree$version, parser = "read.jplace") + res <- new("jplace", + treetext = jtree$tree, + phylo = phylo, + placements = placements, + info = info, + file = filename(file) + ) + + res@data <- summarize_placement(res) + return(res) +} + +##' @importFrom dplyr summarize +##' @importFrom dplyr mutate +##' @importFrom dplyr group_by +##' @importFrom dplyr n +summarize_placement <- function(tree) { + place <- get.placements(tree, by="best") + ids <- tibble(node = nodeIds(tree, internal.only = FALSE)) + group_by(place, .data$node) %>% summarize(nplace=n()) %>% + full_join(ids, by='node') %>% + mutate(nplace = ifelse(is.na(.data$nplace), 0, .data$nplace)) +} + + +#' @method get.placements jplace +#' @param tree jtree +#' @param by filter methods "all","max_lwr","max_pendant", +#' "min_likelihood","lwr","pendant","likelihood" +#' @param filter_value a given value to filter placements. +#' @rdname get-placements +#' @importFrom dplyr group_by +#' @importFrom dplyr filter +#' @importFrom magrittr %<>% +#' @return a dataframe of placements +#' @export +#' +#' @examples +#' \donttest{ +#' jp <- system.file("extdata", "sample.jplace", package="treeio") +#' jplace <- read.jplace(jp) +#' placements <- get.placement(jp,by="all") +#' } +get.placements.jplace <- function(tree, by="all", filter_value = NULL) { + jplist <- c("all","max_lwr","max_pendant", + "min_likelihood","lwr","pendant","likelihood") + if(!(by %in% jplist)){ + stop("by should be one of all,max_lwr,max_pendant, + min_likelihood,lwr,pendant,likelihood") + } + + if(by %in% c("lwr","pendant","likelihood")){ + if(!is.null(filter_value)){ + message("Placement will be filtered by the given value...") + }else{ + stop("The filter_value should be given + in order to filter placement by the given value.") + } + + } + + placements <- tree@placements + + if (by == "all") + return(placements) + + if (by == "max_lwr") { + if (!'like_weight_ratio' %in% names(placements)){ + return(placements) + } else{ + placements <- group_by(placements, .data$name) %>% + filter(.data$like_weight_ratio == max(.data$like_weight_ratio)) + } + + } + + if (by == "max_pendant"){ + if (!'pendant_length' %in% names(placements)){ + return(placements) + } else{ + placements <- group_by(placements, .data$name) %>% + filter(.data$pendant_length == max(.data$pendant_length)) + } + } + + if (by == "min_likelihood"){ + if (!'likelihood' %in% names(placements)){ + return(placements) + } else{ + placements <- group_by(placements, .data$name) %>% + filter(.data$likelihood == min(.data$likelihood)) + } + } + + if (by == "lwr"){ + placements %<>% filter(.data$like_weight_ratio > filter_value) + } + if (by == "pendant"){ + placements %<>% filter(.data$pendant_length > filter_value) + } + if (by == "likelihood"){ + placements %<>% filter(.data$likelihood < filter_value) + } + + return(placements) +} + + +getplacedf <- function(places, nm){ + ## the first column of placements maybe a matrix or one numeric vector, + ## so when it is numeric vector, the nplaces will be 1. + ## and the type of nm also is various. + if (!inherits(places, "matrix")){ + nplaces <- 1 + } else{ + nplaces <- nrow(places) + } + if (inherits(nm, "matrix")){ + nmsize <- nrow(nm) + tmpn <- nm[,1] + } + if (inherits(nm, "list")){ + nmsize <- length(nm) + tmpn <- vapply(nm, function(x)x[1], character(1)) + } + if (inherits(nm, "character")){ + nmsize <- length(nm) + tmpn <- as.vector(nm) + } + ##example: + ## when first column of plamcements is [[1,2,3,4,5],[3,4,5,6,7],[6,7,3,2,4]] (3 row x 5 columns matrix), + ## and the n column is ["read1", "read2"] (the type of n is character vector), so + ## will use "inherits(nm, "character")" block. + ## this will first generate two same matrix contained 3 row x 5 columns, because the length of n is two (the nmsize argument). + places.df <- rep(list(places), nmsize) + ## then this will generate the names of each matrix for the nm. + ## example result is: rep(c("read1", "read2"), rep(3,2)), here 3 is nplaces (the nrow of first column of placements), + ## 2 is the length of nm. + name <- rep(tmpn, rep(nplaces, nmsize)) + places.df <- do.call("rbind", places.df) + places.df <- data.frame(name=name, places.df, stringsAsFactors=FALSE) + return(places.df) +} + + +mergenm <- function(n, nm){ + ## merge the n and nm. + ## it is impossible that n and nm is empty simultaneously, + ## so we will keep the column not NULL. + if(is.null(n)&&!is.null(nm)) {return(nm)} + if(is.null(nm)&&!is.null(n)) {return(n)} + if(is.null(n)&&is.null(nm)){ + stop("the placements of jplace should have corresponding name!") + } +} + + +extract.placement <- function(object, phylo) { + placements <- object$placements + if (ncol(placements)==2){ + ## when placements contained p and n two columns, + ## this will process placements row by row with getplacedf function. + ## The order of `p` and `n` column is not fixed. I think colnames of + ## placements (`p`, `n`, `nm`) are fixed, but when column number is + ## two, the `n` or `nm` is not fixed. + nameidx <- match("p", colnames(placements)) + place.df <- mapply(getplacedf, + placements$p, + placements[,-nameidx], + SIMPLIFY=FALSE) + } + if(ncol(placements)==3){ + ## when placements contained p ,n and nm three columns, + ## first, we merge n and nm row by row. + tmpname <- mapply(mergenm, + placements$n, + placements$nm, + SIMPLIFY=FALSE) + ## then, it becomes the same as two columns. + place.df <- mapply(getplacedf, + placements$p, + tmpname, + SIMPLIFY=FALSE) + } + place.df <- do.call("rbind", place.df) + colnames(place.df) <- c("name", object$fields) + ## place <- placements[,1] + + ## ids <- NULL + ## if (length(placements) == 2) { + ## tmpids <- placements[,2] + ## }else{ + ## tmpids <- list(unlist(placements[,2]), unlist(placements[,3])) + ## } + ## ids <- vapply(tmpids, function(x) x[1], character(1)) + ## names(place) <- ids + ## place.df <- do.call("rbind", place) + ## row.names(place.df) <- NULL + ## if (!is.null(ids)) { + ## nn <- rep(ids, vapply(place, function(x) { + ## nr <- nrow(x) + ## if (is.null(nr)) + ## return(1) + ## return(nr) + ## }, numeric(1))) + ## place.df <- data.frame(name=nn, place.df) + ## colnames(place.df) <- c("name", object$fields) + ## } else { + ## colnames(place.df) <- object$fields + ## } + edgeNum.df <- attr(phylo, "edgeNum") + place.df <- merge(place.df, edgeNum.df, by.x = "edge_num", by.y = "edgeNum") + place.df <- getnewplacements(place.df) + as_tibble(place.df) +} + +## To avoid the character column +getnewplacements <- function(placedf){ + tmpfile <- tempfile() + utils::write.csv(placedf, tmpfile) + placementdf <- utils::read.csv(tmpfile, row.names=1) + ## file.remove(tmpfile) + return(placementdf) +} + + From d1ee2bc282a2128f6b2b27394385c22e0c823d6d Mon Sep 17 00:00:00 2001 From: mjchen1996 Date: Fri, 19 Aug 2022 16:30:51 +0800 Subject: [PATCH 4/7] Add filter method to get.placement() --- NAMESPACE | 3 - R/jplace.R | 490 ++++++++++++------------ man/get-placements.Rd | 4 +- test-tree-subset.R | 0 tests/testthat/test-treedata-accessor.R | 98 ++--- 5 files changed, 296 insertions(+), 299 deletions(-) create mode 100644 test-tree-subset.R diff --git a/NAMESPACE b/NAMESPACE index a80e317..9ec12b7 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -154,13 +154,10 @@ importFrom(dplyr,bind_rows) importFrom(dplyr,filter) importFrom(dplyr,full_join) importFrom(dplyr,group_by) -importFrom(dplyr,mutate) importFrom(dplyr,mutate_if) -importFrom(dplyr,n) importFrom(dplyr,pull) importFrom(dplyr,rename) importFrom(dplyr,select) -importFrom(dplyr,summarize) importFrom(jsonlite,fromJSON) importFrom(jsonlite,toJSON) importFrom(magrittr,"%<>%") diff --git a/R/jplace.R b/R/jplace.R index ac8e20f..bd4351d 100644 --- a/R/jplace.R +++ b/R/jplace.R @@ -1,245 +1,245 @@ -##' read jplace file -##' -##' -##' @title read.jplace -##' @param file jplace file -##' @return \code{jplace} instance -##' @importFrom jsonlite fromJSON -##' @export -##' @author Guangchuang Yu -##' @examples -##' jp <- system.file("extdata", "sample.jplace", package="treeio") -##' read.jplace(jp) -read.jplace <- function(file) { - fields <- tree <- placements <- NULL - version <- metadata <- NULL - jtree <- fromJSON(file) - phylo <- jplace_treetext_to_phylo(jtree$tree) - placements <- extract.placement(jtree, phylo) - info <- c(jtree$metadata, version=jtree$version, parser = "read.jplace") - res <- new("jplace", - treetext = jtree$tree, - phylo = phylo, - placements = placements, - info = info, - file = filename(file) - ) - - res@data <- summarize_placement(res) - return(res) -} - -##' @importFrom dplyr summarize -##' @importFrom dplyr mutate -##' @importFrom dplyr group_by -##' @importFrom dplyr n -summarize_placement <- function(tree) { - place <- get.placements(tree, by="best") - ids <- tibble(node = nodeIds(tree, internal.only = FALSE)) - group_by(place, .data$node) %>% summarize(nplace=n()) %>% - full_join(ids, by='node') %>% - mutate(nplace = ifelse(is.na(.data$nplace), 0, .data$nplace)) -} - - -#' @method get.placements jplace -#' @param tree jtree -#' @param by filter methods "all","max_lwr","max_pendant", -#' "min_likelihood","lwr","pendant","likelihood" -#' @param filter_value a given value to filter placements. -#' @rdname get-placements -#' @importFrom dplyr group_by -#' @importFrom dplyr filter -#' @importFrom magrittr %<>% -#' @return a dataframe of placements -#' @export -#' -#' @examples -#' \donttest{ -#' jp <- system.file("extdata", "sample.jplace", package="treeio") -#' jplace <- read.jplace(jp) -#' placements <- get.placement(jp,by="all") -#' } -get.placements.jplace <- function(tree, by="all", filter_value = NULL) { - jplist <- c("all","max_lwr","max_pendant", - "min_likelihood","lwr","pendant","likelihood") - if(!(by %in% jplist)){ - stop("by should be one of all,max_lwr,max_pendant, - min_likelihood,lwr,pendant,likelihood") - } - - if(by %in% c("lwr","pendant","likelihood")){ - if(!is.null(filter_value)){ - message("Placement will be filtered by the given value...") - }else{ - stop("The filter_value should be given - in order to filter placement by the given value.") - } - - } - - placements <- tree@placements - - if (by == "all") - return(placements) - - if (by == "max_lwr") { - if (!'like_weight_ratio' %in% names(placements)){ - return(placements) - } else{ - placements <- group_by(placements, .data$name) %>% - filter(.data$like_weight_ratio == max(.data$like_weight_ratio)) - } - - } - - if (by == "max_pendant"){ - if (!'pendant_length' %in% names(placements)){ - return(placements) - } else{ - placements <- group_by(placements, .data$name) %>% - filter(.data$pendant_length == max(.data$pendant_length)) - } - } - - if (by == "min_likelihood"){ - if (!'likelihood' %in% names(placements)){ - return(placements) - } else{ - placements <- group_by(placements, .data$name) %>% - filter(.data$likelihood == min(.data$likelihood)) - } - } - - if (by == "lwr"){ - placements %<>% filter(.data$like_weight_ratio > filter_value) - } - if (by == "pendant"){ - placements %<>% filter(.data$pendant_length > filter_value) - } - if (by == "likelihood"){ - placements %<>% filter(.data$likelihood < filter_value) - } - - return(placements) -} - - -getplacedf <- function(places, nm){ - ## the first column of placements maybe a matrix or one numeric vector, - ## so when it is numeric vector, the nplaces will be 1. - ## and the type of nm also is various. - if (!inherits(places, "matrix")){ - nplaces <- 1 - } else{ - nplaces <- nrow(places) - } - if (inherits(nm, "matrix")){ - nmsize <- nrow(nm) - tmpn <- nm[,1] - } - if (inherits(nm, "list")){ - nmsize <- length(nm) - tmpn <- vapply(nm, function(x)x[1], character(1)) - } - if (inherits(nm, "character")){ - nmsize <- length(nm) - tmpn <- as.vector(nm) - } - ##example: - ## when first column of plamcements is [[1,2,3,4,5],[3,4,5,6,7],[6,7,3,2,4]] (3 row x 5 columns matrix), - ## and the n column is ["read1", "read2"] (the type of n is character vector), so - ## will use "inherits(nm, "character")" block. - ## this will first generate two same matrix contained 3 row x 5 columns, because the length of n is two (the nmsize argument). - places.df <- rep(list(places), nmsize) - ## then this will generate the names of each matrix for the nm. - ## example result is: rep(c("read1", "read2"), rep(3,2)), here 3 is nplaces (the nrow of first column of placements), - ## 2 is the length of nm. - name <- rep(tmpn, rep(nplaces, nmsize)) - places.df <- do.call("rbind", places.df) - places.df <- data.frame(name=name, places.df, stringsAsFactors=FALSE) - return(places.df) -} - - -mergenm <- function(n, nm){ - ## merge the n and nm. - ## it is impossible that n and nm is empty simultaneously, - ## so we will keep the column not NULL. - if(is.null(n)&&!is.null(nm)) {return(nm)} - if(is.null(nm)&&!is.null(n)) {return(n)} - if(is.null(n)&&is.null(nm)){ - stop("the placements of jplace should have corresponding name!") - } -} - - -extract.placement <- function(object, phylo) { - placements <- object$placements - if (ncol(placements)==2){ - ## when placements contained p and n two columns, - ## this will process placements row by row with getplacedf function. - ## The order of `p` and `n` column is not fixed. I think colnames of - ## placements (`p`, `n`, `nm`) are fixed, but when column number is - ## two, the `n` or `nm` is not fixed. - nameidx <- match("p", colnames(placements)) - place.df <- mapply(getplacedf, - placements$p, - placements[,-nameidx], - SIMPLIFY=FALSE) - } - if(ncol(placements)==3){ - ## when placements contained p ,n and nm three columns, - ## first, we merge n and nm row by row. - tmpname <- mapply(mergenm, - placements$n, - placements$nm, - SIMPLIFY=FALSE) - ## then, it becomes the same as two columns. - place.df <- mapply(getplacedf, - placements$p, - tmpname, - SIMPLIFY=FALSE) - } - place.df <- do.call("rbind", place.df) - colnames(place.df) <- c("name", object$fields) - ## place <- placements[,1] - - ## ids <- NULL - ## if (length(placements) == 2) { - ## tmpids <- placements[,2] - ## }else{ - ## tmpids <- list(unlist(placements[,2]), unlist(placements[,3])) - ## } - ## ids <- vapply(tmpids, function(x) x[1], character(1)) - ## names(place) <- ids - ## place.df <- do.call("rbind", place) - ## row.names(place.df) <- NULL - ## if (!is.null(ids)) { - ## nn <- rep(ids, vapply(place, function(x) { - ## nr <- nrow(x) - ## if (is.null(nr)) - ## return(1) - ## return(nr) - ## }, numeric(1))) - ## place.df <- data.frame(name=nn, place.df) - ## colnames(place.df) <- c("name", object$fields) - ## } else { - ## colnames(place.df) <- object$fields - ## } - edgeNum.df <- attr(phylo, "edgeNum") - place.df <- merge(place.df, edgeNum.df, by.x = "edge_num", by.y = "edgeNum") - place.df <- getnewplacements(place.df) - as_tibble(place.df) -} - -## To avoid the character column -getnewplacements <- function(placedf){ - tmpfile <- tempfile() - utils::write.csv(placedf, tmpfile) - placementdf <- utils::read.csv(tmpfile, row.names=1) - ## file.remove(tmpfile) - return(placementdf) -} - - +##' read jplace file +##' +##' +##' @title read.jplace +##' @param file jplace file +##' @return \code{jplace} instance +##' @importFrom jsonlite fromJSON +##' @export +##' @author Guangchuang Yu +##' @examples +##' jp <- system.file("extdata", "sample.jplace", package="treeio") +##' read.jplace(jp) +read.jplace <- function(file) { + fields <- tree <- placements <- NULL + version <- metadata <- NULL + jtree <- fromJSON(file) + phylo <- jplace_treetext_to_phylo(jtree$tree) + placements <- extract.placement(jtree, phylo) + info <- c(jtree$metadata, version=jtree$version, parser = "read.jplace") + res <- new("jplace", + treetext = jtree$tree, + phylo = phylo, + placements = placements, + info = info, + file = filename(file) + ) + + # res@data <- summarize_placement(res) + return(res) +} + +# ##' @importFrom dplyr summarize +# ##' @importFrom dplyr mutate +# ##' @importFrom dplyr group_by +# ##' @importFrom dplyr n +# summarize_placement <- function(tree) { +# place <- get.placements(tree, by="max_lwr") +# ids <- tibble(node = nodeIds(tree, internal.only = FALSE)) +# group_by(place, .data$node) %>% summarize(nplace=n()) %>% +# full_join(ids, by='node') %>% +# mutate(nplace = ifelse(is.na(.data$nplace), 0, .data$nplace)) +# } + + +#' @method get.placements jplace +#' @param tree jtree +#' @param by filter methods "all","max_lwr","max_pendant", +#' "min_likelihood","lwr","pendant","likelihood" +#' @param filter_value a given value to filter placements. +#' @rdname get-placements +#' @importFrom dplyr group_by +#' @importFrom dplyr filter +#' @importFrom magrittr %<>% +#' @return a dataframe of placements +#' @export +#' +#' @examples +#' \donttest{ +#' jp <- system.file("extdata", "sample.jplace", package="treeio") +#' jplace <- read.jplace(jp) +#' placements <- get.placements(jplace,by="all") +#' } +get.placements.jplace <- function(tree, by="all", filter_value = NULL,...) { + jplist <- c("all","max_lwr","max_pendant", + "min_likelihood","lwr","pendant","likelihood") + if(!(by %in% jplist)){ + stop("by should be one of all,max_lwr,max_pendant, + min_likelihood,lwr,pendant,likelihood") + } + + if(by %in% c("lwr","pendant","likelihood")){ + if(!is.null(filter_value)){ + message("Placement will be filtered by the given value...") + }else{ + stop("The filter_value should be given + in order to filter placement by the given value.") + } + + } + + placements <- tree@placements + + if (by == "all") + return(placements) + + if (by == "max_lwr") { + if (!'like_weight_ratio' %in% names(placements)){ + return(placements) + } else{ + placements <- group_by(placements, .data$name) %>% + filter(.data$like_weight_ratio == max(.data$like_weight_ratio)) + } + + } + + if (by == "max_pendant"){ + if (!'pendant_length' %in% names(placements)){ + return(placements) + } else{ + placements <- group_by(placements, .data$name) %>% + filter(.data$pendant_length == max(.data$pendant_length)) + } + } + + if (by == "min_likelihood"){ + if (!'likelihood' %in% names(placements)){ + return(placements) + } else{ + placements <- group_by(placements, .data$name) %>% + filter(.data$likelihood == min(.data$likelihood)) + } + } + + if (by == "lwr"){ + placements %<>% filter(.data$like_weight_ratio > filter_value) + } + if (by == "pendant"){ + placements %<>% filter(.data$pendant_length > filter_value) + } + if (by == "likelihood"){ + placements %<>% filter(.data$likelihood < filter_value) + } + + return(placements) +} + + +getplacedf <- function(places, nm){ + ## the first column of placements maybe a matrix or one numeric vector, + ## so when it is numeric vector, the nplaces will be 1. + ## and the type of nm also is various. + if (!inherits(places, "matrix")){ + nplaces <- 1 + } else{ + nplaces <- nrow(places) + } + if (inherits(nm, "matrix")){ + nmsize <- nrow(nm) + tmpn <- nm[,1] + } + if (inherits(nm, "list")){ + nmsize <- length(nm) + tmpn <- vapply(nm, function(x)x[1], character(1)) + } + if (inherits(nm, "character")){ + nmsize <- length(nm) + tmpn <- as.vector(nm) + } + ##example: + ## when first column of plamcements is [[1,2,3,4,5],[3,4,5,6,7],[6,7,3,2,4]] (3 row x 5 columns matrix), + ## and the n column is ["read1", "read2"] (the type of n is character vector), so + ## will use "inherits(nm, "character")" block. + ## this will first generate two same matrix contained 3 row x 5 columns, because the length of n is two (the nmsize argument). + places.df <- rep(list(places), nmsize) + ## then this will generate the names of each matrix for the nm. + ## example result is: rep(c("read1", "read2"), rep(3,2)), here 3 is nplaces (the nrow of first column of placements), + ## 2 is the length of nm. + name <- rep(tmpn, rep(nplaces, nmsize)) + places.df <- do.call("rbind", places.df) + places.df <- data.frame(name=name, places.df, stringsAsFactors=FALSE) + return(places.df) +} + + +mergenm <- function(n, nm){ + ## merge the n and nm. + ## it is impossible that n and nm is empty simultaneously, + ## so we will keep the column not NULL. + if(is.null(n)&&!is.null(nm)) {return(nm)} + if(is.null(nm)&&!is.null(n)) {return(n)} + if(is.null(n)&&is.null(nm)){ + stop("the placements of jplace should have corresponding name!") + } +} + + +extract.placement <- function(object, phylo) { + placements <- object$placements + if (ncol(placements)==2){ + ## when placements contained p and n two columns, + ## this will process placements row by row with getplacedf function. + ## The order of `p` and `n` column is not fixed. I think colnames of + ## placements (`p`, `n`, `nm`) are fixed, but when column number is + ## two, the `n` or `nm` is not fixed. + nameidx <- match("p", colnames(placements)) + place.df <- mapply(getplacedf, + placements$p, + placements[,-nameidx], + SIMPLIFY=FALSE) + } + if(ncol(placements)==3){ + ## when placements contained p ,n and nm three columns, + ## first, we merge n and nm row by row. + tmpname <- mapply(mergenm, + placements$n, + placements$nm, + SIMPLIFY=FALSE) + ## then, it becomes the same as two columns. + place.df <- mapply(getplacedf, + placements$p, + tmpname, + SIMPLIFY=FALSE) + } + place.df <- do.call("rbind", place.df) + colnames(place.df) <- c("name", object$fields) + ## place <- placements[,1] + + ## ids <- NULL + ## if (length(placements) == 2) { + ## tmpids <- placements[,2] + ## }else{ + ## tmpids <- list(unlist(placements[,2]), unlist(placements[,3])) + ## } + ## ids <- vapply(tmpids, function(x) x[1], character(1)) + ## names(place) <- ids + ## place.df <- do.call("rbind", place) + ## row.names(place.df) <- NULL + ## if (!is.null(ids)) { + ## nn <- rep(ids, vapply(place, function(x) { + ## nr <- nrow(x) + ## if (is.null(nr)) + ## return(1) + ## return(nr) + ## }, numeric(1))) + ## place.df <- data.frame(name=nn, place.df) + ## colnames(place.df) <- c("name", object$fields) + ## } else { + ## colnames(place.df) <- object$fields + ## } + edgeNum.df <- attr(phylo, "edgeNum") + place.df <- merge(place.df, edgeNum.df, by.x = "edge_num", by.y = "edgeNum") + place.df <- getnewplacements(place.df) + as_tibble(place.df) +} + +## To avoid the character column +getnewplacements <- function(placedf){ + tmpfile <- tempfile() + utils::write.csv(placedf, tmpfile) + placementdf <- utils::read.csv(tmpfile, row.names=1) + ## file.remove(tmpfile) + return(placementdf) +} + + diff --git a/man/get-placements.Rd b/man/get-placements.Rd index b37aa72..870061a 100644 --- a/man/get-placements.Rd +++ b/man/get-placements.Rd @@ -7,7 +7,7 @@ \usage{ get.placements(tree, ...) -\method{get.placements}{jplace}(tree, by = "all", filter_value = NULL) +\method{get.placements}{jplace}(tree, by = "all", filter_value = NULL, ...) } \arguments{ \item{tree}{jtree} @@ -31,6 +31,6 @@ access placement information \donttest{ jp <- system.file("extdata", "sample.jplace", package="treeio") jplace <- read.jplace(jp) -placements <- get.placement(jp,by="all") +placements <- get.placements(jplace,by="all") } } diff --git a/test-tree-subset.R b/test-tree-subset.R new file mode 100644 index 0000000..e69de29 diff --git a/tests/testthat/test-treedata-accessor.R b/tests/testthat/test-treedata-accessor.R index 8dc9f94..88662f5 100644 --- a/tests/testthat/test-treedata-accessor.R +++ b/tests/testthat/test-treedata-accessor.R @@ -1,49 +1,49 @@ -context("accessor") - -library(treeio) - -jp <- system.file("extdata", "sample.jplace", package="treeio") -x <- read.jplace(jp) -pp <- get.placements(x) - -test_that("access placements slot for jplace object", { - expect_true(is(x, "jplace")) - expect_equal(nrow(pp), 3) - expect_equal(ncol(pp), 7) - expect_true('likelihood' %in% names(pp)) -}) - -mlcfile <- system.file("extdata/PAML_Codeml", "mlc", package="treeio") -mlc <- read.codeml_mlc(mlcfile) - -tree <- read.tree(text = get.treetext(mlc)) - -test_that("access treetext slot for treedata object", { - expect_true(ape::all.equal.phylo(mlc@phylo, tree, use.tip.label=FALSE)) -}) - -test_that("is.rooted method for treedata object", { - expect_equal(is.rooted(mlc), ape::is.rooted(as.phylo(mlc))) -}) - -test_that("convert edgeNum to nodeNum", { - expect_true(is.numeric(treeio:::edgeNum2nodeNum(x, 3))) - expect_true(is.na(treeio:::edgeNum2nodeNum(x, 100))) -}) - -p <- ggtree::ggtree(mlc) - -test_that("access phylo slot", { - expect_true(is(get.tree(mlc), "phylo")) - expect_true(is(as.phylo(p), "phylo")) - expect_true(is.ggtree(p)) -}) - -phy <- rtree(30) -nn <- treeio:::getNodeName(phy) -test_that("access node name", { - expect_equal(nn[1:Ntip(phy)], phy$tip.label) - expect_equal(nn[1:Nnode(phy) + Ntip(phy)], - as.character(1:Nnode(phy) + Ntip(phy))) - expect_equal(treeio:::tipIds(phy), 1:Ntip(phy)) -}) +context("accessor") + +library(treeio) + +jp <- system.file("extdata", "sample.jplace", package="treeio") +x <- read.jplace(jp) +pp <- get.placements(x) + +test_that("access placements slot for jplace object", { + expect_true(is(x, "jplace")) + expect_equal(nrow(pp), 7) + expect_equal(ncol(pp), 7) + expect_true('likelihood' %in% names(pp)) +}) + +mlcfile <- system.file("extdata/PAML_Codeml", "mlc", package="treeio") +mlc <- read.codeml_mlc(mlcfile) + +tree <- read.tree(text = get.treetext(mlc)) + +test_that("access treetext slot for treedata object", { + expect_true(ape::all.equal.phylo(mlc@phylo, tree, use.tip.label=FALSE)) +}) + +test_that("is.rooted method for treedata object", { + expect_equal(is.rooted(mlc), ape::is.rooted(as.phylo(mlc))) +}) + +test_that("convert edgeNum to nodeNum", { + expect_true(is.numeric(treeio:::edgeNum2nodeNum(x, 3))) + expect_true(is.na(treeio:::edgeNum2nodeNum(x, 100))) +}) + +p <- ggtree::ggtree(mlc) + +test_that("access phylo slot", { + expect_true(is(get.tree(mlc), "phylo")) + expect_true(is(as.phylo(p), "phylo")) + expect_true(is.ggtree(p)) +}) + +phy <- rtree(30) +nn <- treeio:::getNodeName(phy) +test_that("access node name", { + expect_equal(nn[1:Ntip(phy)], phy$tip.label) + expect_equal(nn[1:Nnode(phy) + Ntip(phy)], + as.character(1:Nnode(phy) + Ntip(phy))) + expect_equal(treeio:::tipIds(phy), 1:Ntip(phy)) +}) From 0a78e66f587020618a3beac7682e9cc65d406184 Mon Sep 17 00:00:00 2001 From: mjchen1996 Date: Fri, 19 Aug 2022 16:43:09 +0800 Subject: [PATCH 5/7] Add filter method to get.placement() --- R/jplace.R | 490 ++++++++++++------------ tests/testthat/test-treedata-accessor.R | 98 ++--- 2 files changed, 294 insertions(+), 294 deletions(-) diff --git a/R/jplace.R b/R/jplace.R index bd4351d..742c243 100644 --- a/R/jplace.R +++ b/R/jplace.R @@ -1,245 +1,245 @@ -##' read jplace file -##' -##' -##' @title read.jplace -##' @param file jplace file -##' @return \code{jplace} instance -##' @importFrom jsonlite fromJSON -##' @export -##' @author Guangchuang Yu -##' @examples -##' jp <- system.file("extdata", "sample.jplace", package="treeio") -##' read.jplace(jp) -read.jplace <- function(file) { - fields <- tree <- placements <- NULL - version <- metadata <- NULL - jtree <- fromJSON(file) - phylo <- jplace_treetext_to_phylo(jtree$tree) - placements <- extract.placement(jtree, phylo) - info <- c(jtree$metadata, version=jtree$version, parser = "read.jplace") - res <- new("jplace", - treetext = jtree$tree, - phylo = phylo, - placements = placements, - info = info, - file = filename(file) - ) - - # res@data <- summarize_placement(res) - return(res) -} - -# ##' @importFrom dplyr summarize -# ##' @importFrom dplyr mutate -# ##' @importFrom dplyr group_by -# ##' @importFrom dplyr n -# summarize_placement <- function(tree) { -# place <- get.placements(tree, by="max_lwr") -# ids <- tibble(node = nodeIds(tree, internal.only = FALSE)) -# group_by(place, .data$node) %>% summarize(nplace=n()) %>% -# full_join(ids, by='node') %>% -# mutate(nplace = ifelse(is.na(.data$nplace), 0, .data$nplace)) -# } - - -#' @method get.placements jplace -#' @param tree jtree -#' @param by filter methods "all","max_lwr","max_pendant", -#' "min_likelihood","lwr","pendant","likelihood" -#' @param filter_value a given value to filter placements. -#' @rdname get-placements -#' @importFrom dplyr group_by -#' @importFrom dplyr filter -#' @importFrom magrittr %<>% -#' @return a dataframe of placements -#' @export -#' -#' @examples -#' \donttest{ -#' jp <- system.file("extdata", "sample.jplace", package="treeio") -#' jplace <- read.jplace(jp) -#' placements <- get.placements(jplace,by="all") -#' } -get.placements.jplace <- function(tree, by="all", filter_value = NULL,...) { - jplist <- c("all","max_lwr","max_pendant", - "min_likelihood","lwr","pendant","likelihood") - if(!(by %in% jplist)){ - stop("by should be one of all,max_lwr,max_pendant, - min_likelihood,lwr,pendant,likelihood") - } - - if(by %in% c("lwr","pendant","likelihood")){ - if(!is.null(filter_value)){ - message("Placement will be filtered by the given value...") - }else{ - stop("The filter_value should be given - in order to filter placement by the given value.") - } - - } - - placements <- tree@placements - - if (by == "all") - return(placements) - - if (by == "max_lwr") { - if (!'like_weight_ratio' %in% names(placements)){ - return(placements) - } else{ - placements <- group_by(placements, .data$name) %>% - filter(.data$like_weight_ratio == max(.data$like_weight_ratio)) - } - - } - - if (by == "max_pendant"){ - if (!'pendant_length' %in% names(placements)){ - return(placements) - } else{ - placements <- group_by(placements, .data$name) %>% - filter(.data$pendant_length == max(.data$pendant_length)) - } - } - - if (by == "min_likelihood"){ - if (!'likelihood' %in% names(placements)){ - return(placements) - } else{ - placements <- group_by(placements, .data$name) %>% - filter(.data$likelihood == min(.data$likelihood)) - } - } - - if (by == "lwr"){ - placements %<>% filter(.data$like_weight_ratio > filter_value) - } - if (by == "pendant"){ - placements %<>% filter(.data$pendant_length > filter_value) - } - if (by == "likelihood"){ - placements %<>% filter(.data$likelihood < filter_value) - } - - return(placements) -} - - -getplacedf <- function(places, nm){ - ## the first column of placements maybe a matrix or one numeric vector, - ## so when it is numeric vector, the nplaces will be 1. - ## and the type of nm also is various. - if (!inherits(places, "matrix")){ - nplaces <- 1 - } else{ - nplaces <- nrow(places) - } - if (inherits(nm, "matrix")){ - nmsize <- nrow(nm) - tmpn <- nm[,1] - } - if (inherits(nm, "list")){ - nmsize <- length(nm) - tmpn <- vapply(nm, function(x)x[1], character(1)) - } - if (inherits(nm, "character")){ - nmsize <- length(nm) - tmpn <- as.vector(nm) - } - ##example: - ## when first column of plamcements is [[1,2,3,4,5],[3,4,5,6,7],[6,7,3,2,4]] (3 row x 5 columns matrix), - ## and the n column is ["read1", "read2"] (the type of n is character vector), so - ## will use "inherits(nm, "character")" block. - ## this will first generate two same matrix contained 3 row x 5 columns, because the length of n is two (the nmsize argument). - places.df <- rep(list(places), nmsize) - ## then this will generate the names of each matrix for the nm. - ## example result is: rep(c("read1", "read2"), rep(3,2)), here 3 is nplaces (the nrow of first column of placements), - ## 2 is the length of nm. - name <- rep(tmpn, rep(nplaces, nmsize)) - places.df <- do.call("rbind", places.df) - places.df <- data.frame(name=name, places.df, stringsAsFactors=FALSE) - return(places.df) -} - - -mergenm <- function(n, nm){ - ## merge the n and nm. - ## it is impossible that n and nm is empty simultaneously, - ## so we will keep the column not NULL. - if(is.null(n)&&!is.null(nm)) {return(nm)} - if(is.null(nm)&&!is.null(n)) {return(n)} - if(is.null(n)&&is.null(nm)){ - stop("the placements of jplace should have corresponding name!") - } -} - - -extract.placement <- function(object, phylo) { - placements <- object$placements - if (ncol(placements)==2){ - ## when placements contained p and n two columns, - ## this will process placements row by row with getplacedf function. - ## The order of `p` and `n` column is not fixed. I think colnames of - ## placements (`p`, `n`, `nm`) are fixed, but when column number is - ## two, the `n` or `nm` is not fixed. - nameidx <- match("p", colnames(placements)) - place.df <- mapply(getplacedf, - placements$p, - placements[,-nameidx], - SIMPLIFY=FALSE) - } - if(ncol(placements)==3){ - ## when placements contained p ,n and nm three columns, - ## first, we merge n and nm row by row. - tmpname <- mapply(mergenm, - placements$n, - placements$nm, - SIMPLIFY=FALSE) - ## then, it becomes the same as two columns. - place.df <- mapply(getplacedf, - placements$p, - tmpname, - SIMPLIFY=FALSE) - } - place.df <- do.call("rbind", place.df) - colnames(place.df) <- c("name", object$fields) - ## place <- placements[,1] - - ## ids <- NULL - ## if (length(placements) == 2) { - ## tmpids <- placements[,2] - ## }else{ - ## tmpids <- list(unlist(placements[,2]), unlist(placements[,3])) - ## } - ## ids <- vapply(tmpids, function(x) x[1], character(1)) - ## names(place) <- ids - ## place.df <- do.call("rbind", place) - ## row.names(place.df) <- NULL - ## if (!is.null(ids)) { - ## nn <- rep(ids, vapply(place, function(x) { - ## nr <- nrow(x) - ## if (is.null(nr)) - ## return(1) - ## return(nr) - ## }, numeric(1))) - ## place.df <- data.frame(name=nn, place.df) - ## colnames(place.df) <- c("name", object$fields) - ## } else { - ## colnames(place.df) <- object$fields - ## } - edgeNum.df <- attr(phylo, "edgeNum") - place.df <- merge(place.df, edgeNum.df, by.x = "edge_num", by.y = "edgeNum") - place.df <- getnewplacements(place.df) - as_tibble(place.df) -} - -## To avoid the character column -getnewplacements <- function(placedf){ - tmpfile <- tempfile() - utils::write.csv(placedf, tmpfile) - placementdf <- utils::read.csv(tmpfile, row.names=1) - ## file.remove(tmpfile) - return(placementdf) -} - - +##' read jplace file +##' +##' +##' @title read.jplace +##' @param file jplace file +##' @return \code{jplace} instance +##' @importFrom jsonlite fromJSON +##' @export +##' @author Guangchuang Yu +##' @examples +##' jp <- system.file("extdata", "sample.jplace", package="treeio") +##' read.jplace(jp) +read.jplace <- function(file) { + fields <- tree <- placements <- NULL + version <- metadata <- NULL + jtree <- fromJSON(file) + phylo <- jplace_treetext_to_phylo(jtree$tree) + placements <- extract.placement(jtree, phylo) + info <- c(jtree$metadata, version=jtree$version, parser = "read.jplace") + res <- new("jplace", + treetext = jtree$tree, + phylo = phylo, + placements = placements, + info = info, + file = filename(file) + ) + + # res@data <- summarize_placement(res) + return(res) +} + +# ##' @importFrom dplyr summarize +# ##' @importFrom dplyr mutate +# ##' @importFrom dplyr group_by +# ##' @importFrom dplyr n +# summarize_placement <- function(tree) { +# place <- get.placements(tree, by="max_lwr") +# ids <- tibble(node = nodeIds(tree, internal.only = FALSE)) +# group_by(place, .data$node) %>% summarize(nplace=n()) %>% +# full_join(ids, by='node') %>% +# mutate(nplace = ifelse(is.na(.data$nplace), 0, .data$nplace)) +# } + + +#' @method get.placements jplace +#' @param tree jtree +#' @param by filter methods "all","max_lwr","max_pendant", +#' "min_likelihood","lwr","pendant","likelihood" +#' @param filter_value a given value to filter placements. +#' @rdname get-placements +#' @importFrom dplyr group_by +#' @importFrom dplyr filter +#' @importFrom magrittr %<>% +#' @return a dataframe of placements +#' @export +#' +#' @examples +#' \donttest{ +#' jp <- system.file("extdata", "sample.jplace", package="treeio") +#' jplace <- read.jplace(jp) +#' placements <- get.placements(jplace,by="all") +#' } +get.placements.jplace <- function(tree, by="all", filter_value = NULL,...) { + jplist <- c("all","max_lwr","max_pendant", + "min_likelihood","lwr","pendant","likelihood") + if(!(by %in% jplist)){ + stop("by should be one of all,max_lwr,max_pendant, + min_likelihood,lwr,pendant,likelihood") + } + + if(by %in% c("lwr","pendant","likelihood")){ + if(!is.null(filter_value)){ + message("Placement will be filtered by the given value...") + }else{ + stop("The filter_value should be given + in order to filter placement by the given value.") + } + + } + + placements <- tree@placements + + if (by == "all") + return(placements) + + if (by == "max_lwr") { + if (!'like_weight_ratio' %in% names(placements)){ + return(placements) + } else{ + placements <- group_by(placements, .data$name) %>% + filter(.data$like_weight_ratio == max(.data$like_weight_ratio)) + } + + } + + if (by == "max_pendant"){ + if (!'pendant_length' %in% names(placements)){ + return(placements) + } else{ + placements <- group_by(placements, .data$name) %>% + filter(.data$pendant_length == max(.data$pendant_length)) + } + } + + if (by == "min_likelihood"){ + if (!'likelihood' %in% names(placements)){ + return(placements) + } else{ + placements <- group_by(placements, .data$name) %>% + filter(.data$likelihood == min(.data$likelihood)) + } + } + + if (by == "lwr"){ + placements %<>% filter(.data$like_weight_ratio > filter_value) + } + if (by == "pendant"){ + placements %<>% filter(.data$pendant_length > filter_value) + } + if (by == "likelihood"){ + placements %<>% filter(.data$likelihood < filter_value) + } + + return(placements) +} + + +getplacedf <- function(places, nm){ + ## the first column of placements maybe a matrix or one numeric vector, + ## so when it is numeric vector, the nplaces will be 1. + ## and the type of nm also is various. + if (!inherits(places, "matrix")){ + nplaces <- 1 + } else{ + nplaces <- nrow(places) + } + if (inherits(nm, "matrix")){ + nmsize <- nrow(nm) + tmpn <- nm[,1] + } + if (inherits(nm, "list")){ + nmsize <- length(nm) + tmpn <- vapply(nm, function(x)x[1], character(1)) + } + if (inherits(nm, "character")){ + nmsize <- length(nm) + tmpn <- as.vector(nm) + } + ##example: + ## when first column of plamcements is [[1,2,3,4,5],[3,4,5,6,7],[6,7,3,2,4]] (3 row x 5 columns matrix), + ## and the n column is ["read1", "read2"] (the type of n is character vector), so + ## will use "inherits(nm, "character")" block. + ## this will first generate two same matrix contained 3 row x 5 columns, because the length of n is two (the nmsize argument). + places.df <- rep(list(places), nmsize) + ## then this will generate the names of each matrix for the nm. + ## example result is: rep(c("read1", "read2"), rep(3,2)), here 3 is nplaces (the nrow of first column of placements), + ## 2 is the length of nm. + name <- rep(tmpn, rep(nplaces, nmsize)) + places.df <- do.call("rbind", places.df) + places.df <- data.frame(name=name, places.df, stringsAsFactors=FALSE) + return(places.df) +} + + +mergenm <- function(n, nm){ + ## merge the n and nm. + ## it is impossible that n and nm is empty simultaneously, + ## so we will keep the column not NULL. + if(is.null(n)&&!is.null(nm)) {return(nm)} + if(is.null(nm)&&!is.null(n)) {return(n)} + if(is.null(n)&&is.null(nm)){ + stop("the placements of jplace should have corresponding name!") + } +} + + +extract.placement <- function(object, phylo) { + placements <- object$placements + if (ncol(placements)==2){ + ## when placements contained p and n two columns, + ## this will process placements row by row with getplacedf function. + ## The order of `p` and `n` column is not fixed. I think colnames of + ## placements (`p`, `n`, `nm`) are fixed, but when column number is + ## two, the `n` or `nm` is not fixed. + nameidx <- match("p", colnames(placements)) + place.df <- mapply(getplacedf, + placements$p, + placements[,-nameidx], + SIMPLIFY=FALSE) + } + if(ncol(placements)==3){ + ## when placements contained p ,n and nm three columns, + ## first, we merge n and nm row by row. + tmpname <- mapply(mergenm, + placements$n, + placements$nm, + SIMPLIFY=FALSE) + ## then, it becomes the same as two columns. + place.df <- mapply(getplacedf, + placements$p, + tmpname, + SIMPLIFY=FALSE) + } + place.df <- do.call("rbind", place.df) + colnames(place.df) <- c("name", object$fields) + ## place <- placements[,1] + + ## ids <- NULL + ## if (length(placements) == 2) { + ## tmpids <- placements[,2] + ## }else{ + ## tmpids <- list(unlist(placements[,2]), unlist(placements[,3])) + ## } + ## ids <- vapply(tmpids, function(x) x[1], character(1)) + ## names(place) <- ids + ## place.df <- do.call("rbind", place) + ## row.names(place.df) <- NULL + ## if (!is.null(ids)) { + ## nn <- rep(ids, vapply(place, function(x) { + ## nr <- nrow(x) + ## if (is.null(nr)) + ## return(1) + ## return(nr) + ## }, numeric(1))) + ## place.df <- data.frame(name=nn, place.df) + ## colnames(place.df) <- c("name", object$fields) + ## } else { + ## colnames(place.df) <- object$fields + ## } + edgeNum.df <- attr(phylo, "edgeNum") + place.df <- merge(place.df, edgeNum.df, by.x = "edge_num", by.y = "edgeNum") + place.df <- getnewplacements(place.df) + as_tibble(place.df) +} + +## To avoid the character column +getnewplacements <- function(placedf){ + tmpfile <- tempfile() + utils::write.csv(placedf, tmpfile) + placementdf <- utils::read.csv(tmpfile, row.names=1) + ## file.remove(tmpfile) + return(placementdf) +} + + diff --git a/tests/testthat/test-treedata-accessor.R b/tests/testthat/test-treedata-accessor.R index 88662f5..78c07a4 100644 --- a/tests/testthat/test-treedata-accessor.R +++ b/tests/testthat/test-treedata-accessor.R @@ -1,49 +1,49 @@ -context("accessor") - -library(treeio) - -jp <- system.file("extdata", "sample.jplace", package="treeio") -x <- read.jplace(jp) -pp <- get.placements(x) - -test_that("access placements slot for jplace object", { - expect_true(is(x, "jplace")) - expect_equal(nrow(pp), 7) - expect_equal(ncol(pp), 7) - expect_true('likelihood' %in% names(pp)) -}) - -mlcfile <- system.file("extdata/PAML_Codeml", "mlc", package="treeio") -mlc <- read.codeml_mlc(mlcfile) - -tree <- read.tree(text = get.treetext(mlc)) - -test_that("access treetext slot for treedata object", { - expect_true(ape::all.equal.phylo(mlc@phylo, tree, use.tip.label=FALSE)) -}) - -test_that("is.rooted method for treedata object", { - expect_equal(is.rooted(mlc), ape::is.rooted(as.phylo(mlc))) -}) - -test_that("convert edgeNum to nodeNum", { - expect_true(is.numeric(treeio:::edgeNum2nodeNum(x, 3))) - expect_true(is.na(treeio:::edgeNum2nodeNum(x, 100))) -}) - -p <- ggtree::ggtree(mlc) - -test_that("access phylo slot", { - expect_true(is(get.tree(mlc), "phylo")) - expect_true(is(as.phylo(p), "phylo")) - expect_true(is.ggtree(p)) -}) - -phy <- rtree(30) -nn <- treeio:::getNodeName(phy) -test_that("access node name", { - expect_equal(nn[1:Ntip(phy)], phy$tip.label) - expect_equal(nn[1:Nnode(phy) + Ntip(phy)], - as.character(1:Nnode(phy) + Ntip(phy))) - expect_equal(treeio:::tipIds(phy), 1:Ntip(phy)) -}) +context("accessor") + +library(treeio) + +jp <- system.file("extdata", "sample.jplace", package="treeio") +x <- read.jplace(jp) +pp <- get.placements(x) + +test_that("access placements slot for jplace object", { + expect_true(is(x, "jplace")) + expect_equal(nrow(pp), 7) + expect_equal(ncol(pp), 7) + expect_true('likelihood' %in% names(pp)) +}) + +mlcfile <- system.file("extdata/PAML_Codeml", "mlc", package="treeio") +mlc <- read.codeml_mlc(mlcfile) + +tree <- read.tree(text = get.treetext(mlc)) + +test_that("access treetext slot for treedata object", { + expect_true(ape::all.equal.phylo(mlc@phylo, tree, use.tip.label=FALSE)) +}) + +test_that("is.rooted method for treedata object", { + expect_equal(is.rooted(mlc), ape::is.rooted(as.phylo(mlc))) +}) + +test_that("convert edgeNum to nodeNum", { + expect_true(is.numeric(treeio:::edgeNum2nodeNum(x, 3))) + expect_true(is.na(treeio:::edgeNum2nodeNum(x, 100))) +}) + +p <- ggtree::ggtree(mlc) + +test_that("access phylo slot", { + expect_true(is(get.tree(mlc), "phylo")) + expect_true(is(as.phylo(p), "phylo")) + expect_true(is.ggtree(p)) +}) + +phy <- rtree(30) +nn <- treeio:::getNodeName(phy) +test_that("access node name", { + expect_equal(nn[1:Ntip(phy)], phy$tip.label) + expect_equal(nn[1:Nnode(phy) + Ntip(phy)], + as.character(1:Nnode(phy) + Ntip(phy))) + expect_equal(treeio:::tipIds(phy), 1:Ntip(phy)) +}) From 467faee0a332c1814596784c13faa7cf0069a594 Mon Sep 17 00:00:00 2001 From: mjchen1996 Date: Fri, 19 Aug 2022 16:55:27 +0800 Subject: [PATCH 6/7] Add filter method --- R/jplace.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/R/jplace.R b/R/jplace.R index 742c243..cc6ed7f 100644 --- a/R/jplace.R +++ b/R/jplace.R @@ -148,7 +148,7 @@ getplacedf <- function(places, nm){ } ##example: ## when first column of plamcements is [[1,2,3,4,5],[3,4,5,6,7],[6,7,3,2,4]] (3 row x 5 columns matrix), - ## and the n column is ["read1", "read2"] (the type of n is character vector), so + ## and the n column is ["read1", "read2"] (the type of n is character vector), so ## will use "inherits(nm, "character")" block. ## this will first generate two same matrix contained 3 row x 5 columns, because the length of n is two (the nmsize argument). places.df <- rep(list(places), nmsize) From dc976450acfbcf8e4fb5443f028f4cdefd6d6d8a Mon Sep 17 00:00:00 2001 From: mjchen1996 Date: Fri, 19 Aug 2022 17:02:35 +0800 Subject: [PATCH 7/7] Add filter method --- R/jplace.R | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/R/jplace.R b/R/jplace.R index cc6ed7f..6fc1b80 100644 --- a/R/jplace.R +++ b/R/jplace.R @@ -155,7 +155,7 @@ getplacedf <- function(places, nm){ ## then this will generate the names of each matrix for the nm. ## example result is: rep(c("read1", "read2"), rep(3,2)), here 3 is nplaces (the nrow of first column of placements), ## 2 is the length of nm. - name <- rep(tmpn, rep(nplaces, nmsize)) + name <- rep(tmpn, rep(nplaces, nmsize)) places.df <- do.call("rbind", places.df) places.df <- data.frame(name=name, places.df, stringsAsFactors=FALSE) return(places.df) @@ -168,7 +168,7 @@ mergenm <- function(n, nm){ ## so we will keep the column not NULL. if(is.null(n)&&!is.null(nm)) {return(nm)} if(is.null(nm)&&!is.null(n)) {return(n)} - if(is.null(n)&&is.null(nm)){ + if(is.null(n)&&is.null(nm)){ stop("the placements of jplace should have corresponding name!") } } @@ -193,7 +193,7 @@ extract.placement <- function(object, phylo) { ## first, we merge n and nm row by row. tmpname <- mapply(mergenm, placements$n, - placements$nm, + placements$nm, SIMPLIFY=FALSE) ## then, it becomes the same as two columns. place.df <- mapply(getplacedf, @@ -204,7 +204,7 @@ extract.placement <- function(object, phylo) { place.df <- do.call("rbind", place.df) colnames(place.df) <- c("name", object$fields) ## place <- placements[,1] - + ## ids <- NULL ## if (length(placements) == 2) { ## tmpids <- placements[,2]