From 6d81716038a3c6d5a2db38fe4d06d6c2124f4de6 Mon Sep 17 00:00:00 2001 From: anguswg-ucsb Date: Fri, 16 Sep 2022 10:04:27 -0700 Subject: [PATCH 1/3] replaced loop in windfetch() with a parallel backend loop from foreach package, cuts processing time in ~half --- DESCRIPTION | 5 ++- NAMESPACE | 8 ++++ R/internals.R | 7 ++-- R/windfetch.R | 99 ++++++++++++++++++++++++++++++++++-------------- man/windfetch.Rd | 9 ++++- 5 files changed, 95 insertions(+), 33 deletions(-) diff --git a/DESCRIPTION b/DESCRIPTION index ba40ab0..6223e82 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -11,7 +11,10 @@ Imports: magrittr, sf, utils, - rlang + rlang, + foreach, + doParallel, + parallel URL: https://github.com/blasee/windfetch BugReports: https://github.com/blasee/windfetch/issues Description: Wind fetch is the unobstructed length of water over which wind can diff --git a/NAMESPACE b/NAMESPACE index 7498c41..1ca47b7 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -6,13 +6,17 @@ exportMethods(as_sf) exportMethods(crs_transform) exportMethods(plot) exportMethods(summary) +importFrom(doParallel,registerDoParallel) importFrom(dplyr,as_tibble) importFrom(dplyr,bind_cols) importFrom(dplyr,group_by) importFrom(dplyr,left_join) +importFrom(dplyr,mutate) +importFrom(dplyr,relocate) importFrom(dplyr,select) importFrom(dplyr,summarise) importFrom(dplyr,tibble) +importFrom(foreach,foreach) importFrom(magrittr,"%>%") importFrom(methods,"S3Part<-") importFrom(methods,as) @@ -25,6 +29,9 @@ importFrom(methods,setMethod) importFrom(methods,setValidity) importFrom(methods,show) importFrom(methods,validObject) +importFrom(parallel,detectCores) +importFrom(parallel,makeCluster) +importFrom(parallel,stopCluster) importFrom(rlang,.data) importFrom(sf,"st_geometry<-") importFrom(sf,st_as_sf) @@ -44,6 +51,7 @@ importFrom(sf,st_point) importFrom(sf,st_sf) importFrom(sf,st_sfc) importFrom(sf,st_transform) +importFrom(stats,setNames) importFrom(utils,head) importFrom(utils,setTxtProgressBar) importFrom(utils,txtProgressBar) diff --git a/R/internals.R b/R/internals.R index 877da41..6076617 100644 --- a/R/internals.R +++ b/R/internals.R @@ -3,14 +3,15 @@ #' @importFrom sf st_intersection st_cast st_nearest_points st_nearest_feature return_fetch_vector = function(vector, origin, polygon) { + fetch_on_land = suppressWarnings(st_intersection(vector, polygon)) if (nrow(fetch_on_land) == 0) { fetch = vector } else { - fetch_ends = suppressWarnings(st_cast(fetch_on_land, "POINT")) - fetch_vector = st_nearest_points(origin, fetch_ends) - which_closest = st_nearest_feature(origin, fetch_ends) + fetch_ends = suppressWarnings(st_cast(fetch_on_land, "POINT")) + fetch_vector = st_nearest_points(origin, fetch_ends) + which_closest = st_nearest_feature(origin, fetch_ends) fetch = fetch_vector[which_closest] } fetch diff --git a/R/windfetch.R b/R/windfetch.R index da3e188..5562965 100644 --- a/R/windfetch.R +++ b/R/windfetch.R @@ -37,6 +37,7 @@ #' quadrant (default 9). #' @param quiet logical. Suppress diagnostic messages? (Default \code{FALSE}). #' @param progress_bar logical. Show a text progress bar? (Default \code{TRUE}) +#' @param ncores numeric. The number of cores on ones computer to use for parallel processing backend. Default is 2. #' #' @return Returns a \code{\link{WindFetch}} object. #' @@ -53,16 +54,25 @@ #' #' @importFrom utils head txtProgressBar setTxtProgressBar #' @importFrom methods new is -#' @importFrom sf st_is st_is_longlat st_crs st_transform st_intersects +#' @importFrom sf st_is st_is_longlat st_crs st_transform st_intersects st_as_sf #' st_buffer st_coordinates st_sf st_sfc st_linestring st_point #' st_length st_geometry<- st_as_sf -#' @importFrom dplyr left_join +#' @importFrom dplyr left_join mutate relocate +#' @importFrom stats setNames +#' @importFrom parallel detectCores makeCluster stopCluster +#' @importFrom doParallel registerDoParallel +#' @importFrom foreach foreach +#' #' @include windfetch_class.R #' @examples #' #' # Create the polygon layer ---------------------------------------- #' #' library(sf) +#' library(doParallel) +#' library(parallel) +#' library(foreach) +#' library(dplyr) #' #' # Read in North Carolina shape file #' nc = st_read(system.file("shape/nc.shp", package = "sf"), quiet = TRUE) @@ -133,8 +143,15 @@ #' st_write(nc_fetch_sf, "nc_fetch.shp", driver = "ESRI Shapefile") #' } #' @export -windfetch = function(polygon_layer, site_layer, max_dist = 300, n_directions = 9, - quiet = FALSE, progress_bar = TRUE) { +windfetch <- function( + polygon_layer, + site_layer, + max_dist = 300, + n_directions = 9, + quiet = FALSE, + progress_bar = TRUE, + ncores = 2 + ) { if (!any(is(polygon_layer, "sf"), is(polygon_layer, "sfc"))) stop(paste("polygon_layer must be either an sf or sfc object.\nSee", @@ -202,7 +219,7 @@ windfetch = function(polygon_layer, site_layer, max_dist = 300, n_directions = 9 call. = FALSE) site_layer = site_layer[!sites_on_land, ] } - + if (!is.data.frame(site_layer)) { site_layer = st_as_sf(site_layer) } @@ -269,28 +286,54 @@ windfetch = function(polygon_layer, site_layer, max_dist = 300, n_directions = 9 poly_subset = subset(polygon_layer, lengths(st_intersects(polygon_layer, fetch_df)) > 0) - if (progress_bar) - pb = txtProgressBar(max = nrow(fetch_df), style = 3) - - for (i in 1:nrow(fetch_df)) { - fetch_df$fetch[i] = as.data.frame( - return_fetch_vector(fetch_df[i, "geom"], - fetch_df$origin[i], - poly_subset)) - if (progress_bar) - setTxtProgressBar(pb, i) - } - cat("\n") - - fetch_df$fetch = st_sfc(lapply(fetch_df$fetch, `[[`, 1), - crs = st_crs(polygon_layer)) - - st_geometry(fetch_df) = fetch_df$fetch - fetch_df = fetch_df[, 1:2] - fetch_df$quadrant = factor(quadrant, - levels = c("North", "East", "South", "West")) - fetch_df$fetch = st_length(fetch_df$geom) - - new("WindFetch", fetch_df, names = site_names, max_dist = max_dist / 1000) + # detect system cores + # cores <- parallel::detectCores() + + # make a cluster + cl <- parallel::makeCluster(ncores) #not to overload your computer + + # register parallel backend + doParallel::registerDoParallel(cl) + + # run a loop using a parallel backend. ~2x faster + fetch_loop <- foreach::foreach(i = 1:nrow(fetch_df), + .combine = "rbind", + .export = c('return_fetch_vector'), + .packages = c("dplyr", "sf", "stats")) %dopar% { + + fetch_calc <- return_fetch_vector( + vector = fetch_df[i, "geom"], + origin = fetch_df$origin[i], + polygon = poly_subset + ) %>% + as.data.frame() %>% + stats::setNames(c("geometry")) %>% + dplyr::mutate( + site_names = fetch_df$site_names[i], + directions = fetch_df$directions[i] + ) %>% + dplyr::relocate(site_names, directions) + + fetch_calc + } + + # stop extra workers + doParallel::stopImplicitCluster() + parallel::stopCluster(cl) + + # convert linestring dataframe to sf dataframe + final_fetch <- + fetch_loop %>% + sf::st_as_sf() %>% + sf::st_transform(sf::st_crs(polygon_layer)) + + # add quadrant factor column + final_fetch$quadrant <- factor(quadrant, levels = c("North", "East", "South", "West")) + + # add length of fetch linestring + final_fetch$fetch <- sf::st_length(final_fetch$geometry) + + # make the "WindFetch" object + new("WindFetch", final_fetch, names = site_names, max_dist = max_dist / 1000) } diff --git a/man/windfetch.Rd b/man/windfetch.Rd index a6e31e9..bcdb6f6 100644 --- a/man/windfetch.Rd +++ b/man/windfetch.Rd @@ -11,7 +11,8 @@ windfetch( max_dist = 300, n_directions = 9, quiet = FALSE, - progress_bar = TRUE + progress_bar = TRUE, + ncores = 2 ) } \arguments{ @@ -33,6 +34,8 @@ quadrant (default 9).} \item{quiet}{logical. Suppress diagnostic messages? (Default \code{FALSE}).} \item{progress_bar}{logical. Show a text progress bar? (Default \code{TRUE})} + +\item{ncores}{numeric. The number of cores on ones computer to use for parallel processing backend. Default is 2.} } \value{ Returns a \code{\link{WindFetch}} object. @@ -95,6 +98,10 @@ At least one of the inputs to the \code{polygon_layer} or # Create the polygon layer ---------------------------------------- library(sf) +library(doParallel) +library(parallel) +library(foreach) +library(dplyr) # Read in North Carolina shape file nc = st_read(system.file("shape/nc.shp", package = "sf"), quiet = TRUE) From 027769df9b6f1526087eaf408a3ddb6b7d9818ec Mon Sep 17 00:00:00 2001 From: anguswg-ucsb Date: Fri, 16 Sep 2022 14:01:35 -0700 Subject: [PATCH 2/3] as_sf as a parameter --- NAMESPACE | 1 + R/windfetch.R | 55 +++++++++++++++++++++++++++++++++++------------- man/windfetch.Rd | 3 +++ 3 files changed, 44 insertions(+), 15 deletions(-) diff --git a/NAMESPACE b/NAMESPACE index 1ca47b7..593f84d 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -39,6 +39,7 @@ importFrom(sf,st_buffer) importFrom(sf,st_cast) importFrom(sf,st_coordinates) importFrom(sf,st_crs) +importFrom(sf,st_drop_geometry) importFrom(sf,st_intersection) importFrom(sf,st_intersects) importFrom(sf,st_is) diff --git a/R/windfetch.R b/R/windfetch.R index 5562965..527e206 100644 --- a/R/windfetch.R +++ b/R/windfetch.R @@ -37,6 +37,7 @@ #' quadrant (default 9). #' @param quiet logical. Suppress diagnostic messages? (Default \code{FALSE}). #' @param progress_bar logical. Show a text progress bar? (Default \code{TRUE}) +#' @param as_sf logical. Should an SF object be returned on only the fetch distances. Default is TRUE, returns SF object #' @param ncores numeric. The number of cores on ones computer to use for parallel processing backend. Default is 2. #' #' @return Returns a \code{\link{WindFetch}} object. @@ -55,7 +56,7 @@ #' @importFrom utils head txtProgressBar setTxtProgressBar #' @importFrom methods new is #' @importFrom sf st_is st_is_longlat st_crs st_transform st_intersects st_as_sf -#' st_buffer st_coordinates st_sf st_sfc st_linestring st_point +#' st_buffer st_coordinates st_sf st_sfc st_linestring st_point st_drop_geometry #' st_length st_geometry<- st_as_sf #' @importFrom dplyr left_join mutate relocate #' @importFrom stats setNames @@ -150,9 +151,11 @@ windfetch <- function( n_directions = 9, quiet = FALSE, progress_bar = TRUE, + as_sf = TRUE, ncores = 2 ) { - + # polygon_layer <- nc_proj + # site_layer <- sites_points if (!any(is(polygon_layer, "sf"), is(polygon_layer, "sfc"))) stop(paste("polygon_layer must be either an sf or sfc object.\nSee", "`?read_sf` for details on how to read in an", @@ -273,22 +276,28 @@ windfetch <- function( fetch_df = st_sf(fetch_df[, c("site_names", "directions")], - geom = st_sfc(sapply(apply(fetch_df, 1, function(x) { - X0 = as.numeric(x['X0']) - Y0 = as.numeric(x['Y0']) - X = as.numeric(x['X']) - Y = as.numeric(x['Y']) - st_sfc(st_linestring(matrix(c(X0, X, Y0, Y), 2, 2), dim = "XY")) - }), st_sfc), crs = st_crs(polygon_layer)), - origin = st_sfc(sapply(apply(fetch_df, 1, function(x) { + geom = st_sfc( + sapply( + apply(fetch_df, 1, function(x) { + X0 = as.numeric(x['X0']) + Y0 = as.numeric(x['Y0']) + X = as.numeric(x['X']) + Y = as.numeric(x['Y']) + st_sfc( + st_linestring(matrix(c(X0, X, Y0, Y), 2, 2), dim = "XY") + ) + }), + st_sfc + ), + crs = st_crs(polygon_layer) + ), + origin = st_sfc( + sapply(apply(fetch_df, 1, function(x) { st_sfc(st_point(c(as.numeric(x['X0']), as.numeric(x['Y0'])))) }), st_sfc), crs = st_crs(polygon_layer))) poly_subset = subset(polygon_layer, lengths(st_intersects(polygon_layer, fetch_df)) > 0) - # detect system cores - # cores <- parallel::detectCores() - # make a cluster cl <- parallel::makeCluster(ncores) #not to overload your computer @@ -333,7 +342,23 @@ windfetch <- function( # add length of fetch linestring final_fetch$fetch <- sf::st_length(final_fetch$geometry) - # make the "WindFetch" object - new("WindFetch", final_fetch, names = site_names, max_dist = max_dist / 1000) + # if no SF object is desired, just return fetch distances + if(as_sf == TRUE) { + + # make the "WindFetch" object + return(new("WindFetch", final_fetch, names = site_names, max_dist = max_dist / 1000)) + + } else { + + # return only distances, no sf object + final_fetch <- + final_fetch %>% + sf::st_drop_geometry() + + return(final_fetch) + + } + + } diff --git a/man/windfetch.Rd b/man/windfetch.Rd index bcdb6f6..ad97fc9 100644 --- a/man/windfetch.Rd +++ b/man/windfetch.Rd @@ -12,6 +12,7 @@ windfetch( n_directions = 9, quiet = FALSE, progress_bar = TRUE, + as_sf = TRUE, ncores = 2 ) } @@ -35,6 +36,8 @@ quadrant (default 9).} \item{progress_bar}{logical. Show a text progress bar? (Default \code{TRUE})} +\item{as_sf}{logical. Should an SF object be returned on only the fetch distances. Default is TRUE, returns SF object} + \item{ncores}{numeric. The number of cores on ones computer to use for parallel processing backend. Default is 2.} } \value{ From 3a0e3b793cb23ca2eca2efea1b6255a4f4d4a955 Mon Sep 17 00:00:00 2001 From: anguswg-ucsb Date: Sat, 17 Sep 2022 06:38:31 -0700 Subject: [PATCH 3/3] error searching --- R/windfetch.R | 67 ++++++++++++++++++++++++++++++++++++++++----------- 1 file changed, 53 insertions(+), 14 deletions(-) diff --git a/R/windfetch.R b/R/windfetch.R index 527e206..65ff351 100644 --- a/R/windfetch.R +++ b/R/windfetch.R @@ -154,8 +154,6 @@ windfetch <- function( as_sf = TRUE, ncores = 2 ) { - # polygon_layer <- nc_proj - # site_layer <- sites_points if (!any(is(polygon_layer, "sf"), is(polygon_layer, "sfc"))) stop(paste("polygon_layer must be either an sf or sfc object.\nSee", "`?read_sf` for details on how to read in an", @@ -174,8 +172,11 @@ windfetch <- function( if (!is.numeric(max_dist) || length(max_dist) != 1) stop("max_dist must be a single number.", call. = FALSE) - if (!is.numeric(n_directions) || length(n_directions) != 1) + if (!is.numeric(n_directions) || length(n_directions) != 1) { + stop("n_directions must be a single integer.", call. = FALSE) + + } n_directions = round(n_directions) if (n_directions < 1 || n_directions > 90) @@ -188,32 +189,42 @@ windfetch <- function( which_proj = c(!st_is_longlat(polygon_layer), !st_is_longlat(site_layer)) - if (all(!which_proj)) + if (all(!which_proj)) { + stop("The polygon_layer or site_layer argument must be projected to calculate fetch.", call. = FALSE) - if (all(which_proj) && !(st_crs(site_layer) == st_crs(polygon_layer))){ + } + + if (all(which_proj) && !(st_crs(site_layer) == st_crs(polygon_layer))) { warning(paste("The CRS for the polygon_layer and site_layer arguments", "differ; transforming site_layer CRS to match."), call. = FALSE) site_layer = st_transform(site_layer, st_crs(polygon_layer)) + } if (!which_proj[1]){ - if (!quiet) + if (!quiet) { + message("projecting polygon_layer onto the site_layer CRS") + + } polygon_layer = st_transform(polygon_layer, st_crs(site_layer)) } if (!which_proj[2]){ - if (!quiet) + + if (!quiet) { message("projecting site_layer onto the polygon_layer CRS") + } site_layer = st_transform(site_layer, st_crs(polygon_layer)) } - if (!quiet) + if (!quiet) { message("checking site locations are not on land") + } sites_on_land = st_intersects(site_layer, polygon_layer, sparse = FALSE)[, 1] @@ -223,17 +234,26 @@ windfetch <- function( site_layer = site_layer[!sites_on_land, ] } + # convert dataframes to SF if (!is.data.frame(site_layer)) { + site_layer = st_as_sf(site_layer) + } + # name sites if (any(grepl("^[Nn]ames{0,1}$", names(site_layer)))){ - name_col = grep("^[Nn]ames{0,1}$", names(site_layer)) + message("name section1") + name_col = grep("^[Nn]ames{0,1}$", names(site_layer)) + site_names = as.character(site_layer[[name_col]]) + } else { + message("name section2") site_names = paste("Site", seq_len(dim(site_layer)[[1]])) - } + } + message("name section3") site_layer$site_names = site_names # Convert max_dist to appropriate units. @@ -243,12 +263,18 @@ windfetch <- function( # Double check if metres are the correct units, and warn otherwise. proj_unit = st_crs(polygon_layer)$units - if (proj_unit != "m") + if (proj_unit != "m") { + warning(paste("The CRS unit is not metres; ensure that the max_dist argument", "has been scaled appropriately."), call. = FALSE) + } + + message("dir section1") directions = head(seq(0, 360, by = 360 / (n_directions * 4)), -1) + + message("dir section2") # Return the quadrant the directions belong to dirs = as.numeric(directions) dirs_bin = findInterval(dirs, seq(45, 315, by = 90)) @@ -257,16 +283,24 @@ windfetch <- function( quadrant[dirs_bin == 2] = "South" quadrant[dirs_bin == 3] = "West" + message("dir section3") + # Rearrange sequence order to start at 90 degrees directions = unlist(split(directions, directions < 90), use.names = FALSE) fetch_ends = st_buffer(site_layer, max_dist, n_directions) fetch_ends_df = as.data.frame(st_coordinates(fetch_ends)) + message("name section4") + fetch_ends_df$site_names = site_names[fetch_ends_df[, 4]] fetch_locs_df = as.data.frame(st_coordinates(site_layer)) + colnames(fetch_locs_df) = c("X0", "Y0") + + message("name section5") + fetch_locs_df$site_names = site_names fetch_df = unique(left_join(fetch_ends_df, fetch_locs_df, by = "site_names")) @@ -274,7 +308,7 @@ windfetch <- function( head(seq(0, 90, by = 360 / (n_directions * 4)), -1)) fetch_df = fetch_df[with(fetch_df, order(site_names, directions)), ] - + message("nested section") fetch_df = st_sf(fetch_df[, c("site_names", "directions")], geom = st_sfc( sapply( @@ -296,8 +330,10 @@ windfetch <- function( st_sfc(st_point(c(as.numeric(x['X0']), as.numeric(x['Y0'])))) }), st_sfc), crs = st_crs(polygon_layer))) - poly_subset = subset(polygon_layer, lengths(st_intersects(polygon_layer, fetch_df)) > 0) + message("Subset section") + poly_subset = subset(polygon_layer, lengths(st_intersects(polygon_layer, fetch_df)) > 0) + message("parralel") # make a cluster cl <- parallel::makeCluster(ncores) #not to overload your computer @@ -336,6 +372,8 @@ windfetch <- function( sf::st_as_sf() %>% sf::st_transform(sf::st_crs(polygon_layer)) + message("quadrant add") + # add quadrant factor column final_fetch$quadrant <- factor(quadrant, levels = c("North", "East", "South", "West")) @@ -344,12 +382,13 @@ windfetch <- function( # if no SF object is desired, just return fetch distances if(as_sf == TRUE) { - + message("new part") # make the "WindFetch" object return(new("WindFetch", final_fetch, names = site_names, max_dist = max_dist / 1000)) } else { + message("no new part") # return only distances, no sf object final_fetch <- final_fetch %>%