diff --git a/.Rbuildignore b/.Rbuildignore index 5fa08928..8fa96f42 100644 --- a/.Rbuildignore +++ b/.Rbuildignore @@ -15,6 +15,7 @@ ^docker-compose_opal\.yml$ ^docker-compose\.yml$ ^R/secure.global.ranking.md$ +^PULL_REQUEST_TEMPLATE\.md\.R$ ^_pkgdown\.yml$ ^docs$ ^dsBase_7.0.0\.tar\.gz$ diff --git a/DESCRIPTION b/DESCRIPTION index ad8e28a6..a38b3ec1 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -69,6 +69,7 @@ Imports: gridExtra, data.table, methods, + cli, dplyr Suggests: lme4, @@ -80,7 +81,8 @@ Suggests: DescTools, DSOpal, DSMolgenisArmadillo, - DSLite + DSLite, + mockery RoxygenNote: 7.3.3 Encoding: UTF-8 Language: en-GB diff --git a/NAMESPACE b/NAMESPACE index 8bdab82e..bd539a11 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -119,6 +119,8 @@ export(ds.var) export(ds.vectorCalc) import(DSI) import(data.table) +importFrom(DSI,datashield.connections_find) +importFrom(cli,cli_abort) importFrom(stats,as.formula) importFrom(stats,na.omit) importFrom(stats,ts) diff --git a/PULL_REQUEST_TEMPLATE.md b/PULL_REQUEST_TEMPLATE.md new file mode 100644 index 00000000..8510656d --- /dev/null +++ b/PULL_REQUEST_TEMPLATE.md @@ -0,0 +1,27 @@ +## Instructions & checklist for PR author + +### Description of changes +[Add a description of what they have changed] + +### Refactor instructions +- [ ] Removed `exists` and `isDefined` from clientside function and add appropriate checks on server-side function +- [ ] Removed any client-side code checking whether an object has been successfully created +- [ ] Reviewed code to determine if additional refactoring could reduce calls to server-side package +- [ ] Replaced check relating to datashield connections object with `.set_datasources()` (defined in `utils.r`) +- [ ] If relevant, replaced argument check that dataset name has been provided to `.check_df_name_provided()` (defined in `utils.r`) + +### Testing instructions +- [ ] Writen client-side unit tests for unhappy flow +- [ ] Run `devtools::test(filter = "smk-|disc|arg")` and check it passes +- [ ] Run `devtools::check(args = '--no-tests')` and check it passes +- [ ] Run `devtools::build()` and check it builds without errors +- [ ] Check that the continuous integration checks pass on the pull request branch. Note that the performance test relating to your function may fail, as failure is also triggered by a dramatic improval in performance! + +## Instructions & checklist for PR reviewers +- [ ] Checkout this branch as well as the corresponding branch of dsBase +- [ ] Review the code and suggest any changes +- [ ] Install the dsBase branch on an Armadillo or Opal test server +- [ ] Run `devtools::test(filter = "smk-|disc|arg")` and check it passes +- [ ] Run `devtools::check(args = '--no-tests')` and check it passes (we run tests separately to skip performance checks) +- [ ] Run `devtools::build()` and check it builds without errors +- [ ] Check that the continuous integration checks pass on the pull request branch (see above note on performance checks) diff --git a/R/ds.colnames.R b/R/ds.colnames.R index a4b98b1a..8bfddcb4 100644 --- a/R/ds.colnames.R +++ b/R/ds.colnames.R @@ -1,84 +1,61 @@ #' #' @title Produces column names of the R object in the server-side -#' @description Retrieves column names of an R object on the server-side. +#' @description Retrieves column names of an R object on the server-side. #' This function is similar to R function \code{colnames}. -#' @details The input is restricted to the object of type \code{data.frame} or \code{matrix}. -#' +#' @details The input is restricted to the object of type \code{data.frame} or \code{matrix}. +#' #' Server function called: \code{colnamesDS} #' @param x a character string providing the name of the input data frame or matrix. -#' @param datasources a list of \code{\link[DSI]{DSConnection-class}} objects obtained after login. +#' @param datasources a list of \code{\link[DSI]{DSConnection-class}} objects obtained after login. #' If the \code{datasources} argument is not specified #' the default set of connections will be used: see \code{\link[DSI]{datashield.connections_default}}. -#' @return \code{ds.colnames} returns the column names of -#' the specified server-side data frame or matrix. +#' @return \code{ds.colnames} returns the column names of +#' the specified server-side data frame or matrix. #' @author DataSHIELD Development Team #' @seealso \code{\link{ds.dim}} to obtain the dimensions of a matrix or a data frame. -#' @examples +#' @examples #' \dontrun{ -#' +#' #' ## Version 6, for version 5 see the Wiki #' # Connecting to the Opal servers -#' +#' #' require('DSI') #' require('DSOpal') #' require('dsBaseClient') -#' +#' #' builder <- DSI::newDSLoginBuilder() -#' builder$append(server = "study1", -#' url = "http://192.168.56.100:8080/", -#' user = "administrator", password = "datashield_test&", +#' builder$append(server = "study1", +#' url = "http://192.168.56.100:8080/", +#' user = "administrator", password = "datashield_test&", #' table = "CNSIM.CNSIM1", driver = "OpalDriver") -#' builder$append(server = "study2", -#' url = "http://192.168.56.100:8080/", -#' user = "administrator", password = "datashield_test&", +#' builder$append(server = "study2", +#' url = "http://192.168.56.100:8080/", +#' user = "administrator", password = "datashield_test&", #' table = "CNSIM.CNSIM2", driver = "OpalDriver") #' builder$append(server = "study3", -#' url = "http://192.168.56.100:8080/", -#' user = "administrator", password = "datashield_test&", +#' url = "http://192.168.56.100:8080/", +#' user = "administrator", password = "datashield_test&", #' table = "CNSIM.CNSIM3", driver = "OpalDriver") #' logindata <- builder$build() -#' +#' #' # Log onto the remote Opal training servers -#' connections <- DSI::datashield.login(logins = logindata, assign = TRUE, symbol = "D") -#' +#' connections <- DSI::datashield.login(logins = logindata, assign = TRUE, symbol = "D") +#' #' # Getting column names of the R objects stored in the server-side #' ds.colnames(x = "D", #' datasources = connections[1]) #only the first server ("study1") is used #' # Clear the Datashield R sessions and logout -#' datashield.logout(connections) +#' datashield.logout(connections) #' } #' @export #' ds.colnames <- function(x=NULL, datasources=NULL) { - # look for DS connections - if(is.null(datasources)){ - datasources <- datashield.connections_find() - } - - # ensure datasources is a list of DSConnection-class - if(!(is.list(datasources) && all(unlist(lapply(datasources, function(d) {methods::is(d,"DSConnection")}))))){ - stop("The 'datasources' were expected to be a list of DSConnection-class objects", call.=FALSE) - } - - if(is.null(x)){ - stop("Please provide the name of a data.frame or matrix!", call.=FALSE) - } - - # check if the input object(s) is(are) defined in all the studies - defined <- isDefined(datasources, x) - - # call the internal function that checks the input object is of the same class in all studies. - typ <- checkClass(datasources, x) - - # if the input object is not a matrix or a dataframe stop - if(!('data.frame' %in% typ) & !('matrix' %in% typ)){ - stop("The input vector must be of type 'data.frame' or a 'matrix'!", call.=FALSE) - } + datasources <- .set_datasources(datasources) + .check_df_name_provided(x) cally <- call("colnamesDS", x) column_names <- DSI::datashield.aggregate(datasources, cally) - return(column_names) } diff --git a/R/utils.R b/R/utils.R new file mode 100644 index 00000000..85d8d7e2 --- /dev/null +++ b/R/utils.R @@ -0,0 +1,51 @@ +#' Retrieve datasources if not specified +#' +#' @param datasources An optional list of data sources. If not provided, the function will attempt +#' to find available data sources. +#' @importFrom DSI datashield.connections_find +#' @return A list of data sources. +#' @noRd +.get_datasources <- function(datasources) { + if (is.null(datasources)) { + datasources <- datashield.connections_find() + } + return(datasources) +} + +#' Verify that the provided data sources are of class 'DSConnection'. +#' +#' @param datasources A list of data sources. +#' @importFrom cli cli_abort +#' @noRd +.verify_datasources <- function(datasources) { + is_connection_class <- sapply(datasources, function(x) inherits(unlist(x), "DSConnection")) + if (!all(is_connection_class)) { + cli_abort("The 'datasources' were expected to be a list of DSConnection-class objects") + } +} + +#' Set and verify data sources. +#' +#' @param datasources An optional list of data sources. If not provided, the function will attempt +#' to find available data sources. +#' @return A list of verified data sources. +#' @noRd +.set_datasources <- function(datasources) { + datasources <- .get_datasources(datasources) + .verify_datasources(datasources) + return(datasources) +} + +#' Check That a Data Frame Name Is Provided +#' +#' Internal helper that checks whether a data frame or matrix object +#' has been provided. If `NULL`, it aborts with a user-friendly error. +#' +#' @param df A data.frame or matrix. +#' @return Invisibly returns `NULL`. Called for its side effect (error checking). +#' @noRd +.check_df_name_provided <- function(df) { + if(is.null(df)){ + cli_abort("Please provide the name of a data.frame or matrix!", call.=FALSE) + } +} diff --git a/man/ds.colnames.Rd b/man/ds.colnames.Rd index e7391081..9460a567 100644 --- a/man/ds.colnames.Rd +++ b/man/ds.colnames.Rd @@ -9,20 +9,20 @@ ds.colnames(x = NULL, datasources = NULL) \arguments{ \item{x}{a character string providing the name of the input data frame or matrix.} -\item{datasources}{a list of \code{\link[DSI]{DSConnection-class}} objects obtained after login. +\item{datasources}{a list of \code{\link[DSI]{DSConnection-class}} objects obtained after login. If the \code{datasources} argument is not specified the default set of connections will be used: see \code{\link[DSI]{datashield.connections_default}}.} } \value{ -\code{ds.colnames} returns the column names of +\code{ds.colnames} returns the column names of the specified server-side data frame or matrix. } \description{ -Retrieves column names of an R object on the server-side. +Retrieves column names of an R object on the server-side. This function is similar to R function \code{colnames}. } \details{ -The input is restricted to the object of type \code{data.frame} or \code{matrix}. +The input is restricted to the object of type \code{data.frame} or \code{matrix}. Server function called: \code{colnamesDS} } @@ -37,28 +37,28 @@ Server function called: \code{colnamesDS} require('dsBaseClient') builder <- DSI::newDSLoginBuilder() - builder$append(server = "study1", - url = "http://192.168.56.100:8080/", - user = "administrator", password = "datashield_test&", + builder$append(server = "study1", + url = "http://192.168.56.100:8080/", + user = "administrator", password = "datashield_test&", table = "CNSIM.CNSIM1", driver = "OpalDriver") - builder$append(server = "study2", - url = "http://192.168.56.100:8080/", - user = "administrator", password = "datashield_test&", + builder$append(server = "study2", + url = "http://192.168.56.100:8080/", + user = "administrator", password = "datashield_test&", table = "CNSIM.CNSIM2", driver = "OpalDriver") builder$append(server = "study3", - url = "http://192.168.56.100:8080/", - user = "administrator", password = "datashield_test&", + url = "http://192.168.56.100:8080/", + user = "administrator", password = "datashield_test&", table = "CNSIM.CNSIM3", driver = "OpalDriver") logindata <- builder$build() - + # Log onto the remote Opal training servers - connections <- DSI::datashield.login(logins = logindata, assign = TRUE, symbol = "D") + connections <- DSI::datashield.login(logins = logindata, assign = TRUE, symbol = "D") # Getting column names of the R objects stored in the server-side ds.colnames(x = "D", datasources = connections[1]) #only the first server ("study1") is used # Clear the Datashield R sessions and logout - datashield.logout(connections) + datashield.logout(connections) } } \seealso{ diff --git a/tests/testthat/perf_files/default_perf_profile.csv b/tests/testthat/perf_files/default_perf_profile.csv index 9f1ae6e5..a4669eac 100644 --- a/tests/testthat/perf_files/default_perf_profile.csv +++ b/tests/testthat/perf_files/default_perf_profile.csv @@ -1,14 +1,14 @@ "refer_name","rate","lower_tolerance","upper_tolerance" -"conndisconn::perf::simple0","0.2725","0.5","2" -"ds.abs::perf::0","2.677","0.5","2" -"ds.asInteger::perf:0","2.294","0.5","2" -"ds.asList::perf:0","4.587","0.5","2" -"ds.asNumeric::perf:0","2.185","0.5","2" -"ds.assign::perf::0","5.490","0.5","2" -"ds.class::perf::combine:0","4.760","0.5","2" -"ds.colnames::perf:0","4.218","0.5","2" -"ds.exists::perf::combine:0","11.09","0.5","2" -"ds.length::perf::combine:0","9.479","0.5","2" -"ds.mean::perf::combine:0","9.650","0.5","2" -"ds.mean::perf::split:0","11.26","0.5","2" -"void::perf::void::0","46250.0","0.5","2" +"conndisconn::perf::simple0","0.2725","0.5","10" +"ds.abs::perf::0","2.677","0.5","10" +"ds.asInteger::perf:0","2.294","0.5","10" +"ds.asList::perf:0","4.587","0.5","10" +"ds.asNumeric::perf:0","2.185","0.5","10" +"ds.assign::perf::0","5.490","0.5","10" +"ds.class::perf::combine:0","4.760","0.5","10" +"ds.colnames::perf:0","4.218","0.5","10" +"ds.exists::perf::combine:0","11.09","0.5","10" +"ds.length::perf::combine:0","9.479","0.5","10" +"ds.mean::perf::combine:0","9.650","0.5","10" +"ds.mean::perf::split:0","11.26","0.5","10" +"void::perf::void::0","46250.0","0.5","10" diff --git a/tests/testthat/test-smk-ds.colnames.R b/tests/testthat/test-smk-ds.colnames.R index b7d289ac..09e57946 100644 --- a/tests/testthat/test-smk-ds.colnames.R +++ b/tests/testthat/test-smk-ds.colnames.R @@ -47,7 +47,8 @@ test_that("simple colnames", { test_that("fails if the object does not exist", { expect_error( ds.colnames("non_existing_df"), - regexp = "The input object non_existing_df is not defined in sim1, sim2, sim3!", +# regexp = "object 'non_existing_df' not found", + regexp = "There are some DataSHIELD errors, list them with datashield.errors()", ignore.case = TRUE ) }) diff --git a/tests/testthat/test-smk-utils.R b/tests/testthat/test-smk-utils.R new file mode 100644 index 00000000..f220fb4e --- /dev/null +++ b/tests/testthat/test-smk-utils.R @@ -0,0 +1,139 @@ +library(testthat) +library(DSI) +library(cli) + +library(testthat) +library(mockery) +library(cli) +library(testthat) +library(mockery) +library(cli) + +if (!isClass("DSConnection")) { + setClass("DSConnection", contains = "VIRTUAL") +} + +setClass("MockDSConnection", contains = "DSConnection") +mock_ds_conn <- new("MockDSConnection") + +test_that(".get_datasources retrieves connections when input is NULL", { + + mock_connections <- list( + server1 = mock_ds_conn, + server2 = mock_ds_conn + ) + + stub(.get_datasources, "datashield.connections_find", mock_connections) + + result <- .get_datasources(NULL) + + expect_type(result, "list") + expect_length(result, 2) + expect_named(result, c("server1", "server2")) + expect_true(is(result$server1, "DSConnection")) +}) + +test_that(".get_datasources returns input when provided", { + + input_datasources <- list(A = "connA", B = "connB") + + result <- with_mocked_bindings( + datashield.connections_find = function() stop("Should not be called!"), + .get_datasources(input_datasources), + .package = "dsBaseClient" + ) + + expect_equal(result, input_datasources) +}) + +test_that(".verify_datasources passes with valid DSConnection list", { + + valid_datasources <- list( + conn_list1 = mock_ds_conn, + conn_list2 = mock_ds_conn + ) + + expect_no_error(.verify_datasources(valid_datasources)) + expect_null(.verify_datasources(valid_datasources)) +}) + +test_that(".verify_datasources aborts with invalid object types", { + + invalid_datasources <- list( + conn_list1 = mock_ds_conn, + conn_list2 = "not_a_connection" + ) + + expect_error( + .verify_datasources(invalid_datasources) + ) +}) + +test_that(".set_datasources works with valid input", { + + input_datasources <- list( + mock_ds_conn + ) + + result <- with_mocked_bindings( + .get_datasources = function(d) d, + .verify_datasources = function(d) {}, + .set_datasources(input_datasources), + .package = "dsBaseClient" + ) + + expect_equal(result, input_datasources) +}) + +test_that(".set_datasources calls .get_datasources and .verify_datasources", { + + get_called <- FALSE + verify_called <- FALSE + + mock_get <- function(d) { + get_called <<- TRUE + return(list(list(mock_ds_conn))) + } + + mock_verify <- function(d) { + verify_called <<- TRUE + } + + with_mocked_bindings( + .get_datasources = mock_get, + .verify_datasources = mock_verify, + .set_datasources(NULL), + .package = "dsBaseClient" + ) + + expect_true(get_called) + expect_true(verify_called) +}) + +test_that(".set_datasources aborts if verification fails", { + + expect_error( + with_mocked_bindings( + .get_datasources = function(d) list(list("bad_conn")), + .verify_datasources = function(d) cli::cli_abort("Verification failed mock"), + .set_datasources(NULL), + .package = "dsBaseClient" + ) + ) +}) + +test_that(".check_df_name_provided passes when df is not NULL", { + + expect_no_error(.check_df_name_provided("D")) + expect_null(.check_df_name_provided("D")) + + expect_no_error(.check_df_name_provided(data.frame(a=1))) +}) + +test_that(".check_df_name_provided aborts when df is NULL", { + + expect_error( + .check_df_name_provided(NULL) + ) +}) +