From 366e88b13cf3258fb86412f8e0a6ee4959403916 Mon Sep 17 00:00:00 2001 From: Tim Cadman <41470917+timcadman@users.noreply.github.com> Date: Mon, 21 Jul 2025 16:49:10 +0200 Subject: [PATCH 01/18] feat: replacement to data frame fill --- R/ds.standardiseDf.R | 603 +++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 603 insertions(+) create mode 100644 R/ds.standardiseDf.R diff --git a/R/ds.standardiseDf.R b/R/ds.standardiseDf.R new file mode 100644 index 00000000..206375cb --- /dev/null +++ b/R/ds.standardiseDf.R @@ -0,0 +1,603 @@ +#' Fill DataFrame with Missing Columns and Adjust Classes +#' +#' This function fills a given DataFrame by adding missing columns, ensuring consistent column classes, and adjusting factor levels where necessary. +#' It performs checks to detect class and factor level conflicts and prompts the user for decisions to resolve these conflicts. +#' +#' @param df.name Name of the input DataFrame to fill. +#' @param newobj Name of the new DataFrame object created after filling. +#' @param fix_class Character, determines behaviour if class of variables is not the same in all +#' studies. Option "ask" (default) provides the user with a prompt asking if they want to set the +#' class across all studies, option "no" will throw an error if class conflicts are present. +#' @param fix_levels Character, determines behaviour if levels of factor variables is not the same +#' in all studies. Option "ask" (default) provides the user with a prompt asking if they want to set +#' the levels of factor variables to be the same across all studies, whilst option "no" will throw +#' an error if factor variables do not have the same class. +#' @param datasources Data sources from which to aggregate data. Default is `NULL`. +#' @importFrom assertthat assert_that +#' @importFrom DSI datashield.aggregate datashield.assign +#' @return The filled DataFrame with added columns and adjusted classes or factor levels. +#' @export +ds.standardiseDf <- function(df.name = NULL, newobj = NULL, fix_class = "ask", fix_levels = "ask", + datasources = NULL) { + fill_warnings <- list() + + .check_arguments(df.name, newobj, fix_class, fix_levels) + + if(is.null(datasources)){ + datasources <- datashield.connections_find() + } + + col_names <- datashield.aggregate(datasources, call("colnamesDS", df.name)) + .stop_if_cols_identical(col_names) + + var_classes <- .get_var_classes(df.name, datasources) + class_conflicts <- .identify_class_conflicts(var_classes) + + datashield.assign(datasources, newobj, as.symbol(df.name)) + + if (length(class_conflicts) > 0 & fix_class == "no") { + DSI::datashield.aggregate(datasources, call("rmDS", newobj)) + cli_abort("Variables do not have the same class in all studies and `fix_class` is 'no'") + } else if (length(class_conflicts) > 0 & fix_class == "ask") { + class_decisions <- prompt_user_class_decision_all_vars( + names(class_conflicts), + var_classes$server, + dplyr::select(var_classes, all_of(names(class_conflicts))), + newobj, + datasources + ) + + withCallingHandlers({ + .fix_classes(newobj, names(class_conflicts), class_decisions, newobj, datasources) + }, warning = function(w) { + fill_warnings <<- c(fill_warnings, conditionMessage(w)) # Append warning to the list + invokeRestart("muffleWarning") # Suppress immediate display of the warning + }) + } + + unique_cols <- .get_unique_cols(col_names) + .add_missing_cols_to_df(newobj, unique_cols, newobj, datasources) + new_names <- datashield.aggregate(datasources, call("colnamesDS", newobj)) + added_cols <- .get_added_cols(col_names, new_names) + + new_classes <- .get_var_classes(newobj, datasources) + factor_vars <- .identify_factor_vars(new_classes) + factor_levels <- .get_factor_levels(factor_vars, newobj, datasources) + level_conflicts <- .identify_level_conflicts(factor_levels) + + if (length(level_conflicts) > 0 & fix_levels == "no") { + DSI::datashield.aggregate(datasources, call("rmDS", newobj)) + cli_abort("Factor variables do not have the same levels in all studies and `fix_levels` is 'no'") + } else if (length(level_conflicts) > 0 & fix_levels == "ask") { + levels_decision <- ask_question_wait_response_levels(level_conflicts, newobj, datasources) + } + + if (levels_decision == "1") { + unique_levels <- .get_unique_levels(factor_levels, level_conflicts) + .set_factor_levels(newobj, unique_levels, datasources) + } + + .print_out_messages(added_cols, class_decisions, names(class_conflicts), unique_levels, + level_conflicts, levels_decision, newobj) + + .handle_warnings(fill_warnings) + .print_class_warning(class_conflicts, fix_class, class_decisions) +} + +#' Check Function Arguments for Validity +#' +#' This function validates the arguments provided to ensure they meet specified conditions. +#' It checks that the `fix_class` and `fix_levels` arguments are set to accepted values +#' and that `df.name` and `newobj` are character strings. +#' +#' @param df.name A character string representing the name of the data frame. +#' @param newobj A character string representing the name of the new object to be created. +#' @param fix_class A character string indicating the method for handling class issues. +#' Must be either `"ask"` or `"no"`. +#' @param fix_levels A character string indicating the method for handling level issues. +#' Must be either `"ask"` or `"no"`. +#' @return NULL. This function is used for validation and does not return a value. +#' @importFrom assertthat assert_that +#' @noRd +.check_arguments <- function(df.name, newobj, fix_class, fix_levels) { + assert_that(fix_class %in% c("ask", "no")) + assert_that(fix_levels %in% c("ask", "no")) + assert_that(is.character(df.name)) + assert_that(is.character(newobj)) +} + +#' Stop If Columns Are Identical +#' +#' Checks if the columns in the data frames are identical and throws an error if they are. +#' +#' @param col_names A list of column names from different data sources. +#' @return None. Throws an error if columns are identical. +#' @importFrom cli cli_abort +#' @noRd +.stop_if_cols_identical <- function(col_names) { + are_identical <- all(sapply(col_names, identical, col_names[[1]])) + if (are_identical) { + cli_abort("Columns are identical in all data frames: nothing to fill") + } +} + +#' Get Variable Classes from DataFrame +#' +#' Retrieves the class of each variable in the specified DataFrame from different data sources. +#' +#' @param df.name Name of the input DataFrame. +#' @param datasources Data sources from which to aggregate data. +#' @return A DataFrame containing the variable classes from each data source. +#' @import dplyr +#' @noRd +.get_var_classes <- function(df.name, datasources) { + cally <- call("getClassAllColsDS", df.name) + classes <- datashield.aggregate(datasources, cally) %>% + bind_rows(.id = "server") + return(classes) +} + +#' Identify Class Conflicts +#' +#' Identifies conflicts in variable classes across different data sources. +#' +#' @param classes A DataFrame containing variable classes across data sources. +#' @return A list of variables that have class conflicts. +#' @import dplyr +#' @importFrom purrr map +#' @noRd +.identify_class_conflicts <- function(classes) { + server <- NULL + different_class <- classes |> + dplyr::select(-server) |> + map(~ unique(na.omit(.))) + + out <- different_class[which(different_class %>% map(length) > 1)] + return(out) +} + +#' Prompt User for Class Decision for All Variables +#' +#' Prompts the user to resolve class conflicts for all variables. +#' +#' @param vars A vector of variable names with class conflicts. +#' @param all_servers The names of all servers. +#' @param all_classes The classes of the variables across servers. +#' @return A vector of decisions for each variable's class. +#' @noRd +prompt_user_class_decision_all_vars <- function(vars, all_servers, all_classes, newobj, datasources) { + decisions <- c() + for (i in 1:length(vars)) { + decisions[i] <- prompt_user_class_decision(vars[i], all_servers, all_classes[[i]], newobj, datasources) + } + return(decisions) +} + +#' Prompt User for Class Decision for a Single Variable +#' +#' Prompts the user to resolve a class conflict for a single variable. +#' +#' @param var The variable name with a class conflict. +#' @param all_servers The names of all servers. +#' @param all_classes The classes of the variable across servers. +#' @importFrom cli cli_alert_warning cli_alert_danger +#' @return A decision for the variable's class. +#' @noRd +prompt_user_class_decision <- function(var, servers, classes, newobj, datasources) { + cli_alert_warning("`ds.dataFrameFill` requires that all columns have the same class.") + cli_alert_danger("Column {.strong {var}} has following classes:") + print_all_classes(servers, classes) + cli_text("") + return(ask_question_wait_response_class(var, newobj, datasources)) +} + +#' Print All Server-Class Pairs +#' +#' This function prints out a list of server names along with their corresponding +#' class types. It formats the output with a bullet-point list using the `cli` package. +#' +#' @param all_servers A character vector containing the names of servers. +#' @param all_classes A character vector containing the class types corresponding +#' to each server. +#' @return This function does not return a value. It prints the server-class pairs +#' to the console as a bulleted list. +#' @importFrom cli cli_ul cli_li cli_end +#' @noRd +print_all_classes <- function(all_servers, all_classes) { + combined <- paste(all_servers, all_classes, sep = ": ") + cli_ul() + for (i in 1:length(combined)) { + cli_li("{combined[i]}") + } + cli_end() +} + +#' Ask Question and Wait for Class Response +#' +#' Prompts the user with a question and waits for a response related to class decisions. +#' +#' @param question The question to ask the user. +#' @return The user's decision. +#' @importFrom cli cli_text cli_alert_warning cli_abort +#' @noRd +ask_question_wait_response_class <- function(var, newobj, datasources) { + readline <- NULL + ask_question_class(var) + answer <- readline() + if (answer == "6") { + DSI::datashield.aggregate(datasources, call("rmDS", newobj)) + cli_abort("Aborted `ds.dataFrameFill`", .call = NULL) + } else if (!answer %in% as.character(1:5)) { + cli_text("") + cli_alert_warning("Invalid input. Please try again.") + cli_text("") + ask_question_wait_response_class(var, newobj, datasources) + } else { + return(answer) + } +} + +#' Prompt User for Class Conversion Options +#' +#' This function prompts the user with options to convert a variable to a specific class (e.g., factor, integer, numeric, character, or logical). +#' The function provides a list of class conversion options for the specified variable and includes an option to cancel the operation. +#' +#' @param var The name of the variable for which the user is prompted to select a class conversion option. +#' +#' @importFrom cli cli_alert_info cli_ol +#' @return None. This function is used for prompting the user and does not return a value. +#' @examples +#' ask_question("variable_name") +#' @noRd +ask_question_class <- function(var) { + cli_alert_info("Would you like to:") + class_options <- c("a factor", "an integer", "numeric", "a character", "a logical vector") + class_message <- paste0("Convert `{var}` to ", class_options, " in all studies") + cli_ol( + c(class_message, "Cancel `ds.dataFrameFill` operation") + ) +} + +#' Fix Variable Classes +#' +#' Applies the user's class decisions to fix the classes of variables across different data sources. +#' +#' @param df.name The name of the DataFrame. +#' @param different_classes A list of variables with class conflicts. +#' @param class_decisions The decisions made by the user. +#' @param newobj The name of the new DataFrame. +#' @param datasources Data sources from which to aggregate data. +#' @return None. Updates the DataFrame with consistent variable classes. +#' @noRd +.fix_classes <- function(df.name, different_classes, class_decisions, newobj, datasources) { + cally <- call("fixClassDS", df.name, different_classes, class_decisions) + datashield.assign(datasources, newobj, cally) +} + +#' Get Unique Columns from Data Sources +#' +#' Retrieves all unique columns from the data sources. +#' +#' @param col_names A list of column names. +#' @return A vector of unique column names. +#' @noRd +.get_unique_cols <- function(col_names) { + return( + unique( + unlist(col_names) + ) + ) +} + +#' Add Missing Columns to DataFrame +#' +#' Adds any missing columns to the DataFrame to ensure all columns are present across data sources. +#' +#' @param df.name The name of the DataFrame. +#' @param unique_cols A vector of unique column names. +#' @param newobj The name of the new DataFrame. +#' @param datasources Data sources from which to aggregate data. +#' @return None. Updates the DataFrame with added columns. +#' @noRd +.add_missing_cols_to_df <- function(df.name, cols_to_add_if_missing, newobj, datasources) { + cally <- call("fixColsDS", df.name, cols_to_add_if_missing) + datashield.assign(datasources, newobj, cally) +} + +#' Get Added Columns +#' +#' Compares the old and new column names and identifies newly added columns. +#' +#' @param old_names A list of old column names. +#' @param new_names A list of new column names. +#' @importFrom purrr pmap +#' @return A list of added column names. +#' @noRd +.get_added_cols <- function(old_names, new_names) { + list(old_names, new_names) %>% + pmap(function(.x, .y) { + .y[!.y %in% .x] + }) +} + +#' Identify Factor Variables +#' +#' Identifies which variables are factors in the DataFrame. +#' +#' @param var_classes A DataFrame containing variable classes. +#' @return A vector of factor variables. +#' @noRd +.identify_factor_vars <- function(var_classes) { + return( + var_classes %>% + dplyr::filter(row_number() == 1) %>% + dplyr::select(where(~ . == "factor")) + ) +} + +#' Get Factor Levels from Data Sources +#' +#' Retrieves the levels of factor variables from different data sources. +#' +#' @param factor_vars A vector of factor variables. +#' @param newobj The name of the new DataFrame. +#' @param datasources Data sources from which to aggregate data. +#' @return A list of factor levels. +#' @noRd +.get_factor_levels <- function(factor_vars, df, datasources) { + cally <- call("getAllLevelsDS", df, names(factor_vars)) + return(datashield.aggregate(datasources, cally)) +} + +#' Identify Factor Level Conflicts +#' +#' Identifies conflicts in factor levels across different data sources. +#' +#' @param factor_levels A list of factor levels. +#' @return A list of variables with level conflicts. +#' @importFrom purrr map_lgl pmap_lgl +#' @noRd +.identify_level_conflicts <- function(factor_levels) { + levels <- factor_levels %>% + pmap_lgl(function(...) { + args <- list(...) + !all(map_lgl(args[-1], ~ identical(.x, args[[1]]))) + }) + + return(names(levels[levels == TRUE])) +} + +#' Ask Question and Wait for Response on Factor Levels +#' +#' Prompts the user with options for resolving factor level conflicts and waits for a response. +#' +#' @param level_conflicts A list of variables with factor level conflicts. +#' @return The user's decision. +#' @noRd +ask_question_wait_response_levels <- function(level_conflicts, newobj, datasources) { + .make_levels_message(level_conflicts) + answer <- readline() + if (answer == "3") { + DSI::datashield.aggregate(datasources, call("rmDS", newobj)) + cli_abort("Aborted `ds.dataFrameFill`", .call = NULL) + } else if (!answer %in% as.character(1:2)) { + cli_alert_warning("Invalid input. Please try again.") + cli_alert_info("") + .make_levels_message(level_conflicts) + return(ask_question_wait_response_levels(level_conflicts, newobj, datasources)) + } else { + return(answer) + } +} + +#' Make Factor Level Conflict Message +#' +#' Creates a message to alert the user about factor level conflicts and prompt for action. +#' +#' @param level_conflicts A list of variables with factor level conflicts. +#' @importFrom cli cli_alert_warning cli_alert_info cli_ol +#' @return None. Prints the message to the console. +#' @noRd +.make_levels_message <- function(level_conflicts) { + cli_alert_warning("Warning: factor variables {level_conflicts} do not have the same levels in all studies") + cli_alert_info("Would you like to:") + cli_ol(c("Create the missing levels where they are not present", "Do nothing", "Cancel `ds.dataFrameFill` operation")) +} + +#' Get Unique Factor Levels +#' +#' Retrieves the unique factor levels for variables with conflicts. +#' +#' @param factor_levels A list of factor levels. +#' @param level_conflicts A list of variables with level conflicts. +#' @importFrom purrr pmap +#' @return A list of unique factor levels. +#' @noRd +.get_unique_levels <- function(factor_levels, level_conflicts) { + unique_levels <- factor_levels %>% + map(~ .[level_conflicts]) %>% + pmap(function(...) { + as.character(c(...)) + }) %>% + map(~ unique(.)) + return(unique_levels) +} + +#' Set Factor Levels in DataFrame +#' +#' Applies the unique factor levels to the DataFrame. +#' +#' @param newobj The name of the new DataFrame. +#' @param unique_levels A list of unique factor levels. +#' @param datasources Data sources from which to aggregate data. +#' @return None. Updates the DataFrame with the new factor levels. +#' @noRd +.set_factor_levels <- function(df, unique_levels, datasources) { + cally <- call("fixLevelsDS", df, names(unique_levels), unique_levels) + datashield.assign(datasources, df, cally) +} + +#' Print Out Summary Messages +#' +#' Prints summary messages regarding the filled DataFrame, including added columns, class decisions, and factor level adjustments. +#' +#' @param added_cols A list of added columns. +#' @param class_decisions A vector of class decisions. +#' @param different_classes A list of variables with class conflicts. +#' @param unique_levels A list of unique factor levels. +#' @param level_conflicts A list of variables with level conflicts. +#' @param levels_decision The decision made regarding factor levels. +#' @param newobj The name of the new DataFrame. +#' @importFrom cli cli_text +#' @return None. Prints messages to the console. +#' @noRd +.print_out_messages <- function(added_cols, class_decisions, different_classes, unique_levels, + level_conflicts, levels_decision, newobj) { + .print_var_recode_message(added_cols, newobj) + + if (length(different_classes) > 0) { + .print_class_recode_message(class_decisions, different_classes, newobj) + cli_text("") + } + + if (length(level_conflicts) > 0 & levels_decision == "1") { + .print_levels_recode_message(unique_levels, newobj) + } +} + +#' Print Variable Recode Message +#' +#' Prints a message summarizing the columns that were added to the DataFrame. +#' +#' @param added_cols A list of added columns. +#' @param newobj The name of the new DataFrame. +#' @importFrom cli cli_text +#' @return None. Prints the message to the console. +#' @noRd +.print_var_recode_message <- function(added_cols, newobj) { + cli_alert_success("The following variables have been added to {newobj}:") + added_cols_neat <- added_cols %>% map(~ ifelse(length(.) == 0, "", .)) + var_message <- paste0(names(added_cols), " --> ", added_cols_neat) + for (i in 1:length(var_message)) { + cli_alert_info("{var_message[[i]]}") + } + cli_text("") +} + +#' Print Class Recode Message +#' +#' Prints a message summarizing the class decisions that were made for variables with conflicts. +#' +#' @param class_decisions A vector of class decisions. +#' @param different_classes A list of variables with class conflicts. +#' @param newobj The name of the new DataFrame. +#' @importFrom cli cli_alert_info cli_alert_success +#' @return None. Prints the message to the console. +#' @noRd +.print_class_recode_message <- function(class_decisions, different_classes, newobj) { + choice_neat <- .change_choice_to_string(class_decisions) + class_message <- paste0(different_classes, " --> ", choice_neat) + cli_alert_success("The following classes have been set for all datasources in {newobj}: ") + for (i in 1:length(class_message)) { + cli_alert_info("{class_message[[i]]}") + } +} + +#' Convert Class Decision Code to String +#' +#' This function converts a numeric class decision input (represented as a string) +#' into the corresponding class type string (e.g., "factor", "integer", "numeric", etc.). +#' @param class_decision A string representing the class decision. It should be +#' one of the following values: "1", "2", "3", "4", or "5". +#' @return A string representing the class type corresponding to the input: +#' "factor", "integer", "numeric", "character", or "logical". +#' @noRd +.change_choice_to_string <- function(class_decision) { + case_when( + class_decision == "1" ~ "factor", + class_decision == "2" ~ "integer", + class_decision == "3" ~ "numeric", + class_decision == "4" ~ "character", + class_decision == "5" ~ "logical" + ) +} + +#' Print Factor Levels Recode Message +#' +#' Prints a message summarizing the factor level decisions that were made for variables with conflicts. +#' +#' @param unique_levels A list of unique factor levels. +#' @param newobj The name of the new DataFrame. +#' @importFrom cli cli_alert_success cli_alert_info +#' @return None. Prints the message to the console. +#' @noRd +.print_levels_recode_message <- function(unique_levels, newobj) { + levels_message <- .make_levels_recode_message(unique_levels) + cli_alert_success("The following levels have been set for all datasources in {newobj}: ") + for (i in 1:length(levels_message)) { + cli_alert_info("{levels_message[[i]]}") + } +} + +#' Make Levels Recode Message +#' +#' Creates a message to alert the user about factor level recoding. +#' +#' @param unique_levels A list of unique factor levels. +#' @return A formatted string summarizing the level recoding. +#' @importFrom purrr pmap +#' @noRd +.make_levels_recode_message <- function(unique_levels) { + return( + list(names(unique_levels), unique_levels) %>% + pmap(function(.x, .y) { + paste0(.x, " --> ", paste0(.y, collapse = ", ")) + }) + ) +} + +#' Handle Warnings for Class Conversion Issues +#' +#' This function iterates through a list of warnings generated during class conversion and +#' triggers a danger alert if any warnings indicate that the conversion has resulted in `NA` values. +#' +#' @param fill_warnings A list or vector of warning messages generated during class conversion. +#' If any warnings indicate that `NA` values were introduced, a danger alert will be displayed. +#' @return NULL. This function is used for its side effects of printing alerts. +#' @importFrom cli cli_alert_danger +#' @importFrom stringr str_detect +#' @noRd +.handle_warnings <- function(fill_warnings) { + if(length(fill_warnings) > 0) { + for(i in 1:length(fill_warnings)) { + if(str_detect(fill_warnings[[i]], "NAs introduced by coercion")){ + cli_alert_danger("Class conversion resulted in the creation of NA values.") + } else { + cli_alert_danger(fill_warnings[[i]]) + } + } + } +} + +#' Print Warning for Class Conflicts in Data Conversion +#' +#' This function displays a warning when there are class conflicts in a dataset that may have resulted +#' from incompatible class changes during data conversion. It alerts users to verify column classes, +#' as incompatible changes could corrupt the data. +#' +#' @param class_conflicts A list or vector of conflicting classes identified during conversion. +#' @param fix_class A string indicating the user's choice for fixing class conflicts. Typically, +#' this is "ask" if the user is prompted to confirm class changes. +#' @param class_decisions A vector of decisions made for class conversions. When any value is not +#' "6", it indicates unresolved class conflicts. +#' @return NULL. This function is used for its side effects of printing alerts. +#' @importFrom cli cli_alert_warning +#' @noRd +.print_class_warning <- function(class_conflicts, fix_class, class_decisions) { + if(length(class_conflicts) > 0 & fix_class == "ask" & all(!class_decisions == "6")) { + cli_alert_warning("Please check all columns that have changed class. Not all class changes + are compatible with all data types, so this could have corrupted the data.") + } +} + +readline <- NULL From 2d251dd706390e32dc929c620dd9e27e1df21901 Mon Sep 17 00:00:00 2001 From: Tim Cadman <41470917+timcadman@users.noreply.github.com> Date: Mon, 21 Jul 2025 16:50:48 +0200 Subject: [PATCH 02/18] test: added tests for standardise function --- tests/testthat/test-smk-standardiseDf.R | 743 ++++++++++++++++++++++++ 1 file changed, 743 insertions(+) create mode 100644 tests/testthat/test-smk-standardiseDf.R diff --git a/tests/testthat/test-smk-standardiseDf.R b/tests/testthat/test-smk-standardiseDf.R new file mode 100644 index 00000000..92ebd7b1 --- /dev/null +++ b/tests/testthat/test-smk-standardiseDf.R @@ -0,0 +1,743 @@ +suppressWarnings(library(DSLite)) +library(purrr) +library(dplyr) +library(dsBase) +library(dsBaseClient) +library(purrr) +library(dsTidyverse) +# devtools::load_all("~/Library/Mobile Documents/com~apple~CloudDocs/work/repos/dsTidyverse") +options("datashield.return_errors" = TRUE) + +df <- create_mixed_dataframe(n_rows = 100, n_factor_cols = 10, n_other_cols = 10) + +df_1 <- df %>% select(1:5, 6, 9, 12, 15, 18) %>% + mutate( + fac_col2 = factor(fac_col2, levels = c("Blue", "Green")), + fac_col4 = as.numeric(fac_col4), + fac_col5 = as.logical(fac_col5)) + +df_2 <- df %>% select(1:5, 7, 10, 13, 16, 19) %>% + mutate( + fac_col2 = factor(fac_col2, levels = c("Green", "Red")), + fac_col3 = factor(fac_col3, levels = "No"), + fac_col4 = as.character(fac_col4), + fac_col5 = as.integer(fac_col5)) + +df_3 <- df %>% select(1:5, 11, 14, 17, 20) %>% + mutate( + fac_col2 = factor(fac_col2, levels = "Blue"), + fac_col3 = factor(fac_col3, levels = "Yes")) + +options(datashield.env = environment()) + +dslite.server <- newDSLiteServer( + tables = list( + df_1 = df_1, + df_2 = df_2, + df_3 = df_3 + ) +) + +dslite.server$config(defaultDSConfiguration(include = c("dsBase", "dsTidyverse", "dsDanger"))) +dslite.server$aggregateMethod("getClassAllColsDS", "getClassAllColsDS") +dslite.server$assignMethod("fixClassDS", "fixClassDS") +dslite.server$assignMethod("fixColsDS", "fixColsDS") +dslite.server$aggregateMethod("getAllLevelsDS", "getAllLevelsDS") +dslite.server$assignMethod("fixLevelsDS", "fixLevelsDS") + +builder <- DSI::newDSLoginBuilder() + +builder$append( + server = "server_1", + url = "dslite.server", + driver = "DSLiteDriver" +) + +builder$append( + server = "server_2", + url = "dslite.server", + driver = "DSLiteDriver" +) + +builder$append( + server = "server_3", + url = "dslite.server", + driver = "DSLiteDriver" +) + +logindata <- builder$build() +conns <- DSI::datashield.login(logins = logindata, assign = FALSE) + +datashield.assign.table(conns["server_1"], "df", "df_1") +datashield.assign.table(conns["server_2"], "df", "df_2") +datashield.assign.table(conns["server_3"], "df", "df_3") + +datashield.assign.table(conns["server_1"], "df_ident", "df_1") +datashield.assign.table(conns["server_2"], "df_ident", "df_1") +datashield.assign.table(conns["server_3"], "df_ident", "df_1") + +#################################################################################################### +# Code that will be used in multiple tests +#################################################################################################### +var_class <- .get_var_classes("df", datasources = conns) + +class_conflicts <- .identify_class_conflicts(var_class) + +different_classes <- c("fac_col4", "fac_col5") + +class_decisions <- c("1", "5") + +.fix_classes( + df.name = "df", + different_classes = different_classes, + class_decisions = class_decisions, + newobj = "new_classes", + datasources = conns) + +cols_to_set <- c( + "fac_col1", "fac_col2", "fac_col3", "fac_col4", "fac_col5", "fac_col6", "fac_col9", "col12", + "col15", "col18", "fac_col7", "fac_col10", "col13", "col16", "col19", "col11", "col14", "col17", + "col20") + +.add_missing_cols_to_df( + df.name = "df", + cols_to_add_if_missing = cols_to_set, + newobj = "with_new_cols", + datasources = conns) + +old_cols <- ds.colnames("df") + +new_cols <- c("col11", "col12", "col13", "col14", "col15", "col16", "col17", "col18", "col19", + "col20", "fac_col1", "fac_col10", "fac_col2", "fac_col3", "fac_col4", "fac_col5", + "fac_col6", "fac_col7", "fac_col9") + +new_cols_servers <- list( + server_1 = new_cols, + server_2 = new_cols, + server_3 = new_cols +) + +added_cols <- .get_added_cols(old_cols, new_cols_servers) + +var_class_fact <- .get_var_classes("with_new_cols", datasources = conns) + +fac_vars <- .identify_factor_vars(var_class_fact) + +fac_levels <- .get_factor_levels(fac_vars, "with_new_cols", conns) + +level_conflicts <- .identify_level_conflicts(fac_levels) + +unique_levs <- .get_unique_levels(fac_levels, level_conflicts) + +#################################################################################################### +# Tests +#################################################################################################### +test_that(".stop_if_cols_identical throws error if columns are identical", { + + identical_cols <- list( + c("col1", "col2", "col3"), + c("col1", "col2", "col3"), + c("col1", "col2", "col3") + ) + + expect_error( + .stop_if_cols_identical(identical_cols), + "Columns are identical in all data frames: nothing to fill" + ) + +}) + +test_that(".stop_if_cols_identical doesn't throw error if data frames have different columns", { + + different_cols <- list( + c("col1", "col2", "col3"), + c("col1", "col2", "col4"), + c("col1", "col5", "col3") + ) + + expect_silent( + .stop_if_cols_identical(different_cols) + ) + +}) + +test_that(".get_var_classes returns correct output", { + + expected <- tibble( + server = c("server_1", "server_2", "server_3"), + fac_col1 = c("factor", "factor", "factor"), + fac_col2 = c("factor", "factor", "factor"), + fac_col3 = c("factor", "factor", "factor"), + fac_col4 = c("numeric", "character", "factor"), + fac_col5 = c("logical", "integer", "factor"), + fac_col6 = c("factor", NA, NA), + fac_col9 = c("factor", NA, NA), + col12 = c("numeric", NA, NA), + col15 = c("integer", NA, NA), + col18 = c("logical", NA, NA), + fac_col7 = c(NA, "factor", NA), + fac_col10 = c(NA, "factor", NA), + col13 = c(NA, "character", NA), + col16 = c(NA, "numeric", NA), + col19 = c(NA, "integer", NA), + col11 = c(NA, NA, "integer"), + col14 = c(NA, NA, "logical"), + col17 = c(NA, NA, "character"), + col20 = c(NA, NA, "numeric") + ) + + expect_equal(var_class, expected) + +}) + +test_that(".identify_class_conflicts returns correct output", { + expected <- list( + fac_col4 = c("numeric", "character", "factor"), + fac_col5 = c("logical", "integer", "factor") + ) + + expect_equal(class_conflicts, expected) + +}) + +test_that("ask_question displays the correct prompt", { + expect_snapshot(ask_question_class("my_var")) +}) + +test_that("ask_question_wait_response_class continues with valid response", { + expect_equal( + with_mocked_bindings( + ask_question_wait_response_class("a variable"), + ask_question_class = function(var) "A question", + readline = function() "1" + ), "1" + ) +}) + +test_that("ask_question_wait_response_class throws error if option 6 selected", { + expect_error( + with_mocked_bindings( + ask_question_wait_response_class("a variable"), + ask_question_class = function(var) "A question", + readline = function() "6") + ) +}) + +test_that("print_all_classes prints the correct message", { + expect_snapshot( + print_all_classes( + c("server_1", "server_2", "server_3"), + c("numeric", "factor", "integer") + ) + ) +}) + +test_that("prompt_user_class_decision function properly", { + expect_message( + with_mocked_bindings( + prompt_user_class_decision( + var = "test_col", + servers = c("server_1", "server_2", "server_3"), + classes = c("numeric", "character", "factor"), + newobj = "test_df", + datasources = datasources), + ask_question_wait_response_class = function(var, newobj, datasources) "test_col" + ) + ) + + expect_equal( + with_mocked_bindings( + prompt_user_class_decision( + var = "test_col", + servers = c("server_1", "server_2", "server_3"), + classes = c("numeric", "character", "factor"), + newobj = "test_df", + datasources = datasources), + ask_question_wait_response_class = function(var, newobj, datasources) "test_col" + ), + "test_col" + ) +}) + +test_that("prompt_user_class_decision_all_vars returns correct value", { + expect_equal( + with_mocked_bindings( + prompt_user_class_decision_all_vars( + vars = c("test_var_1", "test_var_2"), + all_servers = c("server_1", "server_2", "server_3"), + all_classes = tibble( + test_var_1 = c("numeric", "character", "factor"), + test_var_2 = c("logical", "integer", "factor") + ), + "test_df", + conns), + prompt_user_class_decision = function(var, server, classes, newobj, datasources) "1" + ), + c("1", "1") + ) +}) + +test_that(".fix_classes sets the correct classes in serverside data frame", { + + expect_equal( + unname(unlist(ds.class("df$fac_col4"))), + c("numeric", "character", "factor") + ) + + expect_equal( + unname(unlist(ds.class("df$fac_col5"))), + c("logical", "integer", "factor") + ) + + expect_equal( + unname(unlist(ds.class("new_classes$fac_col4"))), + rep("factor", 3) + ) + + expect_equal( + unname(unlist(ds.class("new_classes$fac_col5"))), + rep("logical", 3) + ) + +}) + +test_that(".get_unique_cols extracts unique names from a list", { + expect_equal( + .get_unique_cols( + list( + server_1 = c("col_1", "col_2", "col_3"), + server_1 = c("col_1", "col_2", "col_4"), + server_1 = c("col_2", "col_3", "col_3", "col_5") + ) + ), + c("col_1", "col_2", "col_3", "col_4", "col_5") + ) +}) + +test_that(".add_missing_cols_to_df correctly creates missing columns", { + + new_cols <- c("col11", "col12", "col13", "col14", "col15", "col16", "col17", "col18", "col19", + "col20", "fac_col1", "fac_col10", "fac_col2", "fac_col3", "fac_col4", "fac_col5", + "fac_col6", "fac_col7", "fac_col9") + + observed <- ds.colnames("with_new_cols") + + expected <- list( + server_1 = new_cols, + server_2 = new_cols, + server_3 = new_cols + ) + + expect_equal(observed, expected) +}) + +test_that(".get_added_cols correctly identifies newly added columns", { + + expect_equal( + added_cols, + list( + server_1 = c("col11", "col13", "col14", "col16", "col17", "col19", "col20", "fac_col10", "fac_col7"), + server_2 = c("col11", "col12", "col14", "col15", "col17", "col18", "col20", "fac_col6", "fac_col9"), + server_3 = c("col12", "col13", "col15", "col16", "col18", "col19", "fac_col10", "fac_col6", "fac_col7", "fac_col9") + ) + ) +}) + +test_that(".identify_factor_vars correctly identifies factor variables", { + + + + var_class_fact <- var_class |> dplyr::select(server: col18) + expect_equal( + names(fac_vars), + c("fac_col1", "fac_col2", "fac_col3", "fac_col6", "fac_col9") + ) +}) + +test_that(".get_factor_levels correctly identifies factor levels", { + expected <- list( + server_1 = list( + fac_col1 = c("High", "Low", "Medium"), + fac_col2 = c("Blue", "Green"), + fac_col3 = c("No", "Yes"), + fac_col6 = c("Bird", "Cat", "Dog"), + fac_col9 = c("False", "True") + ), + server_2 = list( + fac_col1 = c("High", "Low", "Medium"), + fac_col2 = c("Green", "Red"), + fac_col3 = c("No"), + fac_col6 = NULL, + fac_col9 = NULL + ), + server_3 = list( + fac_col1 = c("High", "Low", "Medium"), + fac_col2 = c("Blue"), + fac_col3 = c("Yes"), + fac_col6 = NULL, + fac_col9 = NULL + ) + ) + + expect_equal(fac_levels, expected) +}) + +test_that(".identify_level_conflicts correctly factor columns with different levels", { + expect_equal( + .identify_level_conflicts(fac_levels), + c("fac_col2", "fac_col3", "fac_col6", "fac_col9") + ) + +}) + +test_that("ask_question_wait_response_levels continues with valid response", { + expect_equal( + with_mocked_bindings( + suppressWarnings(ask_question_wait_response_levels("test variable", "test_obj", conns)), + readline = function() "1" + ), "1" + ) + + expect_equal( + with_mocked_bindings( + suppressWarnings(ask_question_wait_response_levels("test variable", "test_obj", conns)), + readline = function() "1" + ), "1" + ) + +}) + +test_that("ask_question_wait_response_levels aborts with response of 3", { + expect_error( + with_mocked_bindings( + suppressWarnings(ask_question_wait_response_levels("test variable", "test_obj", conns)), + readline = function() "3") + ) +}) + +test_that(".make_levels_message makes correct message", { + expect_snapshot(.make_levels_message(level_conflicts)) +}) + +test_that(".get_unique_levels extracts all possible levels", { + + expected <- list( + fac_col2 = c("Blue", "Green", "Red"), + fac_col3 = c("No", "Yes"), + fac_col6 = c("Bird", "Cat", "Dog"), + fac_col9 = c("False", "True") + ) + + expect_equal(unique_levs, expected) + +}) + +test_that(".set_factor_levels sets levels correctly", { + .set_factor_levels("with_new_cols", unique_levs, conns) + + expect_equal( + ds.levels("with_new_cols$fac_col2") |> map(~.x[[1]]), + list( + server_1 = c("Blue", "Green", "Red"), + server_2 = c("Blue", "Green", "Red"), + server_3 = c("Blue", "Green", "Red") + ) + ) + + expect_equal( + ds.levels("with_new_cols$fac_col3") |> map(~.x[[1]]), + list( + server_1 = c("No", "Yes"), + server_2 = c("No", "Yes"), + server_3 = c("No", "Yes") + ) + ) + + expect_equal( + ds.levels("with_new_cols$fac_col6") |> map(~.x[[1]]), + list( + server_1 = c("Bird", "Cat", "Dog"), + server_2 = c("Bird", "Cat", "Dog"), + server_3 = c("Bird", "Cat", "Dog") + ) + ) + + expect_equal( + ds.levels("with_new_cols$fac_col9") |> map(~.x[[1]]), + list( + server_1 = c("False", "True"), + server_2 = c("False", "True"), + server_3 = c("False", "True") + ) + ) + +}) + +test_that(".print_var_recode_message prints the correct message", { + expect_snapshot(.print_var_recode_message(added_cols, "test_df")) +}) + +test_that(".print_class_recode_message prints the correct message", { + expect_snapshot( + .print_class_recode_message(class_decisions, different_classes, "test_df") + ) +}) + +test_that(".print_levels_recode_message prints the correct message", { + expect_snapshot( + .print_levels_recode_message(unique_levs, "test_df") + ) +}) + +test_that(".make_levels_recode_message prints the correct message", { + expect_equal( + .make_levels_recode_message(unique_levs), + list( + "fac_col2 --> Blue, Green, Red", + "fac_col3 --> No, Yes", + "fac_col6 --> Bird, Cat, Dog", + "fac_col9 --> False, True" + ) + ) +}) + +test_that(".print_out_messages prints the correct messages", { + expect_snapshot( + .print_out_messages( + added_cols, class_decisions, different_classes, unique_levs, level_conflicts, "1", "test_df" + ) + ) +}) + +test_that(".change_choice_to_string converts numeric class codes to strings correctly", { + expect_equal(.change_choice_to_string("1"), "factor") + expect_equal(.change_choice_to_string("2"), "integer") + expect_equal(.change_choice_to_string("3"), "numeric") + expect_equal(.change_choice_to_string("4"), "character") + expect_equal(.change_choice_to_string("5"), "logical") +}) + +test_that("ds.standardiseDf doesn't run if dataframes are identical", { + expect_error( + ds.standardiseDf( + df = "df_ident", + newobj = "test_fill" + ), + "Columns are identical" + ) +}) + +test_that("ds.standardiseDf works when called directly and class conversion is factor", { + with_mocked_bindings( + ds.standardiseDf( + df = "df", + newobj = "test_fill" + ), + prompt_user_class_decision_all_vars = function(var, server, classes, newobj, datasources) "1", + ask_question_wait_response_levels = function(levels_conflict, newobj, datasources) "2" + ) + + expect_equal( + ds.class("test_fill$fac_col4")[[1]], + "factor" + ) +}) + +test_that("ds.standardiseDf returns warning when called directly and class conversion is integer", { + with_mocked_bindings( + ds.standardiseDf( + df = "df", + newobj = "test_fill" + ), + prompt_user_class_decision_all_vars = function(var, server, classes, newobj, datasources) c("2", "2"), + ask_question_wait_response_levels = function(levels_conflict, newobj, datasources) "2" + ) + + expect_equal( + ds.class("test_fill$fac_col4")[[1]], + "integer" + ) + + expect_equal( + ds.class("test_fill$fac_col5")[[1]], + "integer" + ) +}) + +test_that("ds.standardiseDf returns warning when called directly and class conversion is numeric", { + with_mocked_bindings( + ds.standardiseDf( + df = "df", + newobj = "test_fill" + ), + prompt_user_class_decision_all_vars = function(var, server, classes, newobj, datasources) c("3", "3"), + ask_question_wait_response_levels = function(levels_conflict, newobj, datasources) "2" + ) + + expect_equal( + ds.class("test_fill$fac_col4")[[1]], + "numeric" + ) + + expect_equal( + ds.class("test_fill$fac_col5")[[1]], + "numeric" + ) +}) + +test_that("ds.standardiseDf returns warning when called directly and class conversion is character", { + with_mocked_bindings( + ds.standardiseDf( + df = "df", + newobj = "test_fill" + ), + prompt_user_class_decision_all_vars = function(var, server, classes, newobj, datasources) c("4", "4"), + ask_question_wait_response_levels = function(levels_conflict, newobj, datasources) "2" + ) + + expect_equal( + ds.class("test_fill$fac_col4")[[1]], + "character" + ) + + expect_equal( + ds.class("test_fill$fac_col5")[[1]], + "character" + ) +}) + +test_that("ds.standardiseDf returns warning when called directly and class conversion is logical", { + with_mocked_bindings( + ds.standardiseDf( + df = "df", + newobj = "test_fill" + ), + prompt_user_class_decision_all_vars = function(var, server, classes, newobj, datasources) c("5", "5"), + ask_question_wait_response_levels = function(levels_conflict, newobj, datasources) "2" + ) + + expect_equal( + ds.class("test_fill$fac_col4")[[1]], + "logical" + ) + + expect_equal( + ds.class("test_fill$fac_col5")[[1]], + "logical" + ) +}) + +test_that("ds.standardiseDf changes levels if this option is selected", { + with_mocked_bindings( + ds.standardiseDf( + df = "df", + newobj = "test_fill" + ), + prompt_user_class_decision_all_vars = function(var, server, classes, newobj, datasources) c("1", "1"), + ask_question_wait_response_levels = function(levels_conflict, newobj, datasources) "1" + ) + + levels_2 <- ds.levels("test_fill$fac_col2") %>% map(~.$Levels) + levels_3 <- ds.levels("test_fill$fac_col3") %>% map(~.$Levels) + levels_4 <- ds.levels("test_fill$fac_col4") %>% map(~.$Levels) + levels_5 <- ds.levels("test_fill$fac_col5") %>% map(~.$Levels) + levels_6 <- ds.levels("test_fill$fac_col6") %>% map(~.$Levels) + levels_9 <- ds.levels("test_fill$fac_col9") %>% map(~.$Levels) + + expect_equal( + levels_2, + list( + server_1 = c("Blue", "Green", "Red"), + server_2 = c("Blue", "Green", "Red"), + server_3 = c("Blue", "Green", "Red") + ) + ) + + expect_equal( + levels_3, + list( + server_1 = c("No", "Yes"), + server_2 = c("No", "Yes"), + server_3 = c("No", "Yes") + ) + ) + + expect_equal( + levels_4, + list( + server_1 = c("1", "2", "3", "A", "B", "C"), + server_2 = c("1", "2", "3", "A", "B", "C"), + server_3 = c("1", "2", "3", "A", "B", "C") + ) + ) + + expect_equal( + levels_5, + list( + server_1 = c("1", "2", "3", "One", "Three", "Two"), + server_2 = c("1", "2", "3", "One", "Three", "Two"), + server_3 = c("1", "2", "3", "One", "Three", "Two") + ) + ) + + expect_equal( + levels_6, + list( + server_1 = c("Bird", "Cat", "Dog"), + server_2 = c("Bird", "Cat", "Dog"), + server_3 = c("Bird", "Cat", "Dog") + ) + ) + + expect_equal( + levels_9, + list( + server_1 = c("False", "True"), + server_2 = c("False", "True"), + server_3 = c("False", "True") + ) + ) + +}) + +test_that("ds.standardiseDf doesn't run if classes are not identical and fix_class is no", { + expect_error( + ds.standardiseDf( + df = "df", + newobj = "shouldnt_exist", + fix_class = "no" + ), + "Variables do not have the same class in all studies" + ) + + expect_equal( + ds.exists("shouldnt_exist")[[1]], + FALSE + ) +}) + +test_that("ds.standardiseDf doesn't run if levels are not identical and fix_class is no", { + expect_error( + with_mocked_bindings( + ds.standardiseDf( + df = "df", + newobj = "shouldnt_exist", + fix_levels = "no" + ), + prompt_user_class_decision_all_vars = function(var, server, classes, newobj, datasources) c("1", "1") + ), + "Factor variables do not have the same levels in all studies" + ) + + expect_equal( + ds.exists("shouldnt_exist")[[1]], + FALSE + ) +}) + + +## 9. Handle incorrect response for level fix + + + + From 5fac73bdbe9836a20a2066e1483d6c35f9ffca26 Mon Sep 17 00:00:00 2001 From: Tim Cadman <41470917+timcadman@users.noreply.github.com> Date: Mon, 21 Jul 2025 16:56:02 +0200 Subject: [PATCH 03/18] docs: updated following check --- DESCRIPTION | 9 +++++++-- NAMESPACE | 20 ++++++++++++++++++++ man/ds.standardiseDf.Rd | 37 +++++++++++++++++++++++++++++++++++++ 3 files changed, 64 insertions(+), 2 deletions(-) create mode 100644 man/ds.standardiseDf.Rd diff --git a/DESCRIPTION b/DESCRIPTION index 882df32c..102604cd 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -54,7 +54,11 @@ Imports: gridExtra, data.table, methods, - dplyr + dplyr, + cli, + purrr, + stringr, + assertthat Suggests: lme4, httr, @@ -64,6 +68,7 @@ Suggests: DescTools, DSOpal, DSMolgenisArmadillo, - DSLite + DSLite, + assertthat RoxygenNote: 7.3.2 Encoding: UTF-8 diff --git a/NAMESPACE b/NAMESPACE index d737d5e6..7919f665 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -104,6 +104,7 @@ export(ds.seq) export(ds.setSeed) export(ds.skewness) export(ds.sqrt) +export(ds.standardiseDf) export(ds.summary) export(ds.table) export(ds.table1D) @@ -117,7 +118,26 @@ export(ds.var) export(ds.vectorCalc) import(DSI) import(data.table) +import(dplyr) +importFrom(DSI,datashield.aggregate) +importFrom(DSI,datashield.assign) +importFrom(assertthat,assert_that) +importFrom(cli,cli_abort) +importFrom(cli,cli_alert_danger) +importFrom(cli,cli_alert_info) +importFrom(cli,cli_alert_success) +importFrom(cli,cli_alert_warning) +importFrom(cli,cli_end) +importFrom(cli,cli_li) +importFrom(cli,cli_ol) +importFrom(cli,cli_text) +importFrom(cli,cli_ul) +importFrom(purrr,map) +importFrom(purrr,map_lgl) +importFrom(purrr,pmap) +importFrom(purrr,pmap_lgl) importFrom(stats,as.formula) importFrom(stats,na.omit) importFrom(stats,ts) importFrom(stats,weighted.mean) +importFrom(stringr,str_detect) diff --git a/man/ds.standardiseDf.Rd b/man/ds.standardiseDf.Rd new file mode 100644 index 00000000..4f544f1d --- /dev/null +++ b/man/ds.standardiseDf.Rd @@ -0,0 +1,37 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/ds.standardiseDf.R +\name{ds.standardiseDf} +\alias{ds.standardiseDf} +\title{Fill DataFrame with Missing Columns and Adjust Classes} +\usage{ +ds.standardiseDf( + df.name = NULL, + newobj = NULL, + fix_class = "ask", + fix_levels = "ask", + datasources = NULL +) +} +\arguments{ +\item{df.name}{Name of the input DataFrame to fill.} + +\item{newobj}{Name of the new DataFrame object created after filling.} + +\item{fix_class}{Character, determines behaviour if class of variables is not the same in all +studies. Option "ask" (default) provides the user with a prompt asking if they want to set the +class across all studies, option "no" will throw an error if class conflicts are present.} + +\item{fix_levels}{Character, determines behaviour if levels of factor variables is not the same +in all studies. Option "ask" (default) provides the user with a prompt asking if they want to set +the levels of factor variables to be the same across all studies, whilst option "no" will throw +an error if factor variables do not have the same class.} + +\item{datasources}{Data sources from which to aggregate data. Default is `NULL`.} +} +\value{ +The filled DataFrame with added columns and adjusted classes or factor levels. +} +\description{ +This function fills a given DataFrame by adding missing columns, ensuring consistent column classes, and adjusting factor levels where necessary. +It performs checks to detect class and factor level conflicts and prompts the user for decisions to resolve these conflicts. +} From c2f6c427d6f0f9289101270d8ba8860e7b9e3d58 Mon Sep 17 00:00:00 2001 From: Tim Cadman <41470917+timcadman@users.noreply.github.com> Date: Tue, 22 Jul 2025 09:58:11 +0200 Subject: [PATCH 04/18] removed dependency on stringr --- DESCRIPTION | 3 +-- NAMESPACE | 1 - R/ds.standardiseDf.R | 7 +++---- 3 files changed, 4 insertions(+), 7 deletions(-) diff --git a/DESCRIPTION b/DESCRIPTION index 102604cd..a3a4ceaf 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -57,13 +57,12 @@ Imports: dplyr, cli, purrr, - stringr, assertthat Suggests: lme4, httr, tibble, - testthat, + testthat (>= 3.0.0), e1071, DescTools, DSOpal, diff --git a/NAMESPACE b/NAMESPACE index 7919f665..59da737b 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -140,4 +140,3 @@ importFrom(stats,as.formula) importFrom(stats,na.omit) importFrom(stats,ts) importFrom(stats,weighted.mean) -importFrom(stringr,str_detect) diff --git a/R/ds.standardiseDf.R b/R/ds.standardiseDf.R index 206375cb..4ba24f90 100644 --- a/R/ds.standardiseDf.R +++ b/R/ds.standardiseDf.R @@ -565,12 +565,11 @@ ask_question_wait_response_levels <- function(level_conflicts, newobj, datasourc #' If any warnings indicate that `NA` values were introduced, a danger alert will be displayed. #' @return NULL. This function is used for its side effects of printing alerts. #' @importFrom cli cli_alert_danger -#' @importFrom stringr str_detect #' @noRd .handle_warnings <- function(fill_warnings) { - if(length(fill_warnings) > 0) { - for(i in 1:length(fill_warnings)) { - if(str_detect(fill_warnings[[i]], "NAs introduced by coercion")){ + if (length(fill_warnings) > 0) { + for (i in seq_along(fill_warnings)) { + if (grepl("NAs introduced by coercion", fill_warnings[[i]])) { cli_alert_danger("Class conversion resulted in the creation of NA values.") } else { cli_alert_danger(fill_warnings[[i]]) From 574e54d9b4e780fda08d24f16cd01f3148443c81 Mon Sep 17 00:00:00 2001 From: Tim Cadman <41470917+timcadman@users.noreply.github.com> Date: Tue, 22 Jul 2025 09:58:47 +0200 Subject: [PATCH 05/18] added helper file, saved snapshots --- tests/testthat/_snaps/smk-standardiseDf.md | 86 ++++++++ tests/testthat/helpers.R | 218 +++++++++++++++++++++ tests/testthat/test-smk-standardiseDf.R | 3 + 3 files changed, 307 insertions(+) create mode 100644 tests/testthat/_snaps/smk-standardiseDf.md create mode 100644 tests/testthat/helpers.R diff --git a/tests/testthat/_snaps/smk-standardiseDf.md b/tests/testthat/_snaps/smk-standardiseDf.md new file mode 100644 index 00000000..ac1558bd --- /dev/null +++ b/tests/testthat/_snaps/smk-standardiseDf.md @@ -0,0 +1,86 @@ +# ask_question displays the correct prompt + + Code + ask_question_class("my_var") + Message + i Would you like to: + 1. Convert `my_var` to a factor in all studies + 2. Convert `my_var` to an integer in all studies + 3. Convert `my_var` to numeric in all studies + 4. Convert `my_var` to a character in all studies + 5. Convert `my_var` to a logical vector in all studies + 6. Cancel `ds.dataFrameFill` operation + +# print_all_classes prints the correct message + + Code + print_all_classes(c("server_1", "server_2", "server_3"), c("numeric", "factor", + "integer")) + Message + * server_1: numeric + * server_2: factor + * server_3: integer + +# .make_levels_message makes correct message + + Code + .make_levels_message(level_conflicts) + Message + ! Warning: factor variables fac_col2, fac_col3, fac_col6, and fac_col9 do not have the same levels in all studies + i Would you like to: + 1. Create the missing levels where they are not present + 2. Do nothing + 3. Cancel `ds.dataFrameFill` operation + +# .print_var_recode_message prints the correct message + + Code + .print_var_recode_message(added_cols, "test_df") + Message + v The following variables have been added to test_df: + i server_1 --> col11 + i server_2 --> col11 + i server_3 --> col12 + + +# .print_class_recode_message prints the correct message + + Code + .print_class_recode_message(class_decisions, different_classes, "test_df") + Message + v The following classes have been set for all datasources in test_df: + i fac_col4 --> factor + i fac_col5 --> logical + +# .print_levels_recode_message prints the correct message + + Code + .print_levels_recode_message(unique_levs, "test_df") + Message + v The following levels have been set for all datasources in test_df: + i fac_col2 --> Blue, Green, Red + i fac_col3 --> No, Yes + i fac_col6 --> Bird, Cat, Dog + i fac_col9 --> False, True + +# .print_out_messages prints the correct messages + + Code + .print_out_messages(added_cols, class_decisions, different_classes, unique_levs, + level_conflicts, "1", "test_df") + Message + v The following variables have been added to test_df: + i server_1 --> col11 + i server_2 --> col11 + i server_3 --> col12 + + v The following classes have been set for all datasources in test_df: + i fac_col4 --> factor + i fac_col5 --> logical + + v The following levels have been set for all datasources in test_df: + i fac_col2 --> Blue, Green, Red + i fac_col3 --> No, Yes + i fac_col6 --> Bird, Cat, Dog + i fac_col9 --> False, True + diff --git a/tests/testthat/helpers.R b/tests/testthat/helpers.R new file mode 100644 index 00000000..919df923 --- /dev/null +++ b/tests/testthat/helpers.R @@ -0,0 +1,218 @@ +#' Create a DSLite login object that can be used for testing +#' +#' @param assign_method A string specifying the name of the custom assign method to be added +#' to the DSLite server. If `NULL`, no additional assign method is added. Default is `NULL`. +#' @param aggregate_method A string specifying the name of the custom aggregate method to be +#' added to the DSLite server. If `NULL`, no additional aggregate method is added. Default is `NULL`. +#' @param tables A named list of tables to be made available on the DSLite server. Default is `NULL`. +#' +#' @return A DataSHIELD login object containing the necessary connection information for the DSLite server. +#' +#' @examples +#' \dontrun{ +#' # Prepare a DSLite server with default methods and custom assign/aggregate methods +#' login_data <- .prepare_dslite( +#' assign_method = "customAssign", +#' aggregate_method = "customAggregate", +#' tables = list(mtcars = mtcars, mtcars_group = mtcars_group) +#' ) +#' +#' @importFrom DSLite newDSLiteServer +#' @importFrom DSI newDSLoginBuilder +#' @export +.prepare_dslite <- function(assign_method = NULL, aggregate_method = NULL, tables = NULL) { + + options(datashield.env = environment()) + dslite.server <- DSLite::newDSLiteServer(tables = tables) + dslite.server$config(defaultDSConfiguration(include = c("dsBase", "dsTidyverse"))) + dslite.server$aggregateMethod("exists", "base::exists") + dslite.server$aggregateMethod("classDS", "dsBase::classDS") + dslite.server$aggregateMethod("lsDS", "dsBase::lsDS") + dslite.server$aggregateMethod("dsListDisclosureSettings", "dsTidyverse::dsListDisclosureSettings") + + if (!is.null(assign_method)) { + dslite.server$assignMethod(assign_method, paste0("dsTidyverse::", assign_method)) + } + + if (!is.null(aggregate_method)) { + dslite.server$aggregateMethod(assign_method, paste0("dsTidyverse::", assign_method)) + } + + builder <- DSI::newDSLoginBuilder() + builder$append(server = "server_1", url = "dslite.server", driver = "DSLiteDriver") + builder$append(server = "server_2", url = "dslite.server", driver = "DSLiteDriver") + builder$append(server = "server_3", url = "dslite.server", driver = "DSLiteDriver") + login_data <- builder$build() + return(login_data) +} + +#' Create a mixed dataframe with factor and other types of columns +#' +#' This function generates a dataframe with a specified number of rows, +#' factor columns, and other columns (integer, numeric, and string). +#' +#' @param n_rows Number of rows in the dataframe. Default is 10,000. +#' @param n_factor_cols Number of factor columns in the dataframe. Default is 15. +#' @param n_other_cols Number of other columns (integer, numeric, and string) in the dataframe. Default is 15. +#' +#' @return A dataframe with the specified number of rows and columns, containing mixed data types. +#' @importFrom dplyr bind_cols +#' @importFrom purrr map_dfc +#' @examples +#' df <- create_mixed_dataframe(n_rows = 100, n_factor_cols = 10, n_other_cols = 5) +create_mixed_dataframe <- function(n_rows = 10000, n_factor_cols = 15, n_other_cols = 15) { + + # Function to create a factor column with defined levels + create_factor_column <- function(levels, n = n_rows) { + set.seed(123) # Set seed before sample for reproducibility + factor(sample(levels, n, replace = TRUE)) + } + + # Define factor levels for different columns + factor_levels <- list( + c("Low", "Medium", "High"), + c("Red", "Green", "Blue"), + c("Yes", "No"), + c("A", "B", "C"), + c("One", "Two", "Three"), + c("Cat", "Dog", "Bird"), + c("Small", "Medium", "Large"), + c("Alpha", "Beta", "Gamma"), + c("True", "False"), + c("Left", "Right"), + c("North", "South", "East", "West"), + c("Day", "Night"), + c("Up", "Down"), + c("Male", "Female"), + c("Summer", "Winter", "Spring", "Fall") + ) + + # Create factor columns + factor_columns <- map_dfc(factor_levels[1:n_factor_cols], create_factor_column) + colnames(factor_columns) <- paste0("fac_col", 1:n_factor_cols) + + # Function to create other types of columns + create_other_column <- function(type, n = n_rows) { + set.seed(123) # Set seed before sample for reproducibility + switch(type, + "int" = sample(1:100, n, replace = TRUE), # Integer column + "num" = runif(n, 0, 100), # Numeric column + "str" = sample(letters, n, replace = TRUE), # Character column + "log" = sample(c(TRUE, FALSE), n, replace = TRUE) # Logical column + ) + } + + # Ensure that each data type is included + column_types <- c( + "int", "num", "str", "log", "int", + "num", "str", "log", "int", "num", + "str", "int", "num", "log", "str" + ) + + # Create other columns with specified types + other_columns <- map_dfc(column_types[1:n_other_cols], create_other_column) + colnames(other_columns) <- paste0("col", (n_factor_cols + 1):(n_factor_cols + n_other_cols)) + + # Combine factor and other columns into a single dataframe + df <- bind_cols(factor_columns, other_columns) + + return(df) +} + + +#' Modify factor levels for partial overlap +#' +#' This function takes two sets of factor levels, computes the common and unique levels, +#' and returns a new set of levels with partial overlap. +#' +#' @param levels1 First set of factor levels. +#' @param levels2 Second set of factor levels. +#' +#' @return A character vector of new factor levels with partial overlap. +#' @examples +#' new_levels <- partial_overlap_levels(c("A", "B", "C"), c("B", "C", "D")) +partial_overlap_levels <- function(levels1, levels2) { + common <- intersect(levels1, levels2) + unique1 <- setdiff(levels1, common) + unique2 <- setdiff(levels2, common) + + # Set seed before each sample call + set.seed(123) + sampled_unique1 <- sample(unique1, length(unique1) * 0.5) + + set.seed(123) + sampled_unique2 <- sample(unique2, length(unique2) * 0.5) + + new_levels <- c(common, sampled_unique1, sampled_unique2) + return(new_levels) +} + + +#' Create additional dataframes with specific conditions +#' +#' This function generates additional dataframes based on an input dataframe, modifying column classes and levels, +#' and adding new columns with unique names. Different seeds are used for each iteration of the loop, +#' ensuring reproducibility of the generated dataframes. +#' +#' @param base_df The base dataframe used to create the additional dataframes. +#' @param n_rows Number of rows in the additional dataframes. Default is 10,000. +#' @param df_names Names of the additional dataframes to be created. Default is c("df1", "df2", "df3"). +#' +#' @return A list of dataframes with the specified modifications. +#' @importFrom dplyr bind_cols +#' @examples +#' base_df <- create_mixed_dataframe(n_rows = 100, n_factor_cols = 10, n_other_cols = 5) +#' additional_dfs <- create_additional_dataframes(base_df, n_rows = 1000, df_names = c("df1", "df2")) +create_additional_dataframes <- function(base_df, n_rows = 10000, df_names = c("df1", "df2", "df3")) { + + # Define a fixed sequence of seeds, one for each dataframe to be created + seeds <- c(123, 456, 789, 101112) + + df_list <- list() + + for (i in seq_along(df_names)) { + # Set the seed for this iteration based on the pre-defined seeds + set.seed(seeds[i]) + + # Proceed with the dataframe generation process + overlap_cols <- sample(colnames(base_df), size = round(0.8 * ncol(base_df))) + df <- base_df + cols_to_modify_class <- sample(overlap_cols, size = round(0.2 * length(overlap_cols))) + + # Modify columns to have different data types + for (col in cols_to_modify_class) { + current_class <- class(df[[col]]) + new_class <- switch(current_class, + "factor" = as.character(df[[col]]), + "character" = as.factor(df[[col]]), + "numeric" = as.integer(df[[col]]), + "integer" = as.numeric(df[[col]]), + df[[col]]) + df[[col]] <- new_class + } + + # Modify factor levels for partial overlap + factor_cols <- colnames(base_df)[sapply(base_df, is.factor)] + overlap_factor_cols <- intersect(overlap_cols, factor_cols) + cols_to_modify_levels <- sample(overlap_factor_cols, size = round(0.5 * length(overlap_factor_cols))) + + for (col in cols_to_modify_levels) { + original_levels <- levels(base_df[[col]]) + new_levels <- partial_overlap_levels(original_levels, original_levels) + df[[col]] <- factor(df[[col]], levels = new_levels) + } + + # Create new random columns for each dataframe (these will vary by seed) + set.seed(seeds[i]) # Set the seed again for generating new columns + n_new_cols <- round(0.2 * ncol(base_df)) + new_col_names <- paste0(df_names[i], "_new_col_", 1:n_new_cols) + new_cols <- data.frame(matrix(runif(n_rows * n_new_cols), ncol = n_new_cols)) + colnames(new_cols) <- new_col_names + + # Bind new columns to the dataframe + df <- bind_cols(df, new_cols) + df_list[[df_names[i]]] <- df + } + + return(df_list) +} diff --git a/tests/testthat/test-smk-standardiseDf.R b/tests/testthat/test-smk-standardiseDf.R index 92ebd7b1..fca136ac 100644 --- a/tests/testthat/test-smk-standardiseDf.R +++ b/tests/testthat/test-smk-standardiseDf.R @@ -1,12 +1,15 @@ suppressWarnings(library(DSLite)) library(purrr) library(dplyr) +# devtools::install_github("datashield/dsBase", ref = "v6.4.0-dev") library(dsBase) library(dsBaseClient) library(purrr) library(dsTidyverse) +source("~/Library/Mobile Documents/com~apple~CloudDocs/work/repos/dsBaseClient/tests/testthat/helpers.R") # devtools::load_all("~/Library/Mobile Documents/com~apple~CloudDocs/work/repos/dsTidyverse") options("datashield.return_errors" = TRUE) +testthat::local_edition(3) df <- create_mixed_dataframe(n_rows = 100, n_factor_cols = 10, n_other_cols = 10) From de444d043eec5ff99ae22b807902795f2ddff1b7 Mon Sep 17 00:00:00 2001 From: Tim Cadman <41470917+timcadman@users.noreply.github.com> Date: Tue, 22 Jul 2025 09:59:40 +0200 Subject: [PATCH 06/18] removed duplicate dependency --- DESCRIPTION | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/DESCRIPTION b/DESCRIPTION index a3a4ceaf..b0779e7f 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -67,7 +67,6 @@ Suggests: DescTools, DSOpal, DSMolgenisArmadillo, - DSLite, - assertthat + DSLite RoxygenNote: 7.3.2 Encoding: UTF-8 From f664e047bbabc6f7230de643dbe29e1bdcff13a5 Mon Sep 17 00:00:00 2001 From: Tim Cadman <41470917+timcadman@users.noreply.github.com> Date: Mon, 6 Oct 2025 16:38:24 +0200 Subject: [PATCH 07/18] fix: corrected how to pass variable names through the parser --- R/ds.standardiseDf.R | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/R/ds.standardiseDf.R b/R/ds.standardiseDf.R index 4ba24f90..daaadbd6 100644 --- a/R/ds.standardiseDf.R +++ b/R/ds.standardiseDf.R @@ -62,7 +62,7 @@ ds.standardiseDf <- function(df.name = NULL, newobj = NULL, fix_class = "ask", f new_classes <- .get_var_classes(newobj, datasources) factor_vars <- .identify_factor_vars(new_classes) - factor_levels <- .get_factor_levels(factor_vars, newobj, datasources) + factor_levels <- .get_factor_levels(newobj, factor_vars, datasources) level_conflicts <- .identify_level_conflicts(factor_levels) if (length(level_conflicts) > 0 & fix_levels == "no") { @@ -344,8 +344,9 @@ ask_question_class <- function(var) { #' @param datasources Data sources from which to aggregate data. #' @return A list of factor levels. #' @noRd -.get_factor_levels <- function(factor_vars, df, datasources) { - cally <- call("getAllLevelsDS", df, names(factor_vars)) +.get_factor_levels <- function(df, factor_vars, datasources) { + factor_vars <- paste(names(factor_vars), collapse = ",") + cally <- call("getAllLevelsDS", df, factor_vars) return(datashield.aggregate(datasources, cally)) } From eb97f82534c12400405901583925f2ee88469787 Mon Sep 17 00:00:00 2001 From: Tim Cadman <41470917+timcadman@users.noreply.github.com> Date: Mon, 6 Oct 2025 16:41:27 +0200 Subject: [PATCH 08/18] put test file in correct format --- tests/testthat/test-smk-standardiseDf.R | 1201 +++++++++++------------ 1 file changed, 570 insertions(+), 631 deletions(-) diff --git a/tests/testthat/test-smk-standardiseDf.R b/tests/testthat/test-smk-standardiseDf.R index fca136ac..d3ceb647 100644 --- a/tests/testthat/test-smk-standardiseDf.R +++ b/tests/testthat/test-smk-standardiseDf.R @@ -1,88 +1,25 @@ -suppressWarnings(library(DSLite)) -library(purrr) -library(dplyr) -# devtools::install_github("datashield/dsBase", ref = "v6.4.0-dev") -library(dsBase) -library(dsBaseClient) -library(purrr) -library(dsTidyverse) -source("~/Library/Mobile Documents/com~apple~CloudDocs/work/repos/dsBaseClient/tests/testthat/helpers.R") -# devtools::load_all("~/Library/Mobile Documents/com~apple~CloudDocs/work/repos/dsTidyverse") -options("datashield.return_errors" = TRUE) -testthat::local_edition(3) - -df <- create_mixed_dataframe(n_rows = 100, n_factor_cols = 10, n_other_cols = 10) - -df_1 <- df %>% select(1:5, 6, 9, 12, 15, 18) %>% - mutate( - fac_col2 = factor(fac_col2, levels = c("Blue", "Green")), - fac_col4 = as.numeric(fac_col4), - fac_col5 = as.logical(fac_col5)) - -df_2 <- df %>% select(1:5, 7, 10, 13, 16, 19) %>% - mutate( - fac_col2 = factor(fac_col2, levels = c("Green", "Red")), - fac_col3 = factor(fac_col3, levels = "No"), - fac_col4 = as.character(fac_col4), - fac_col5 = as.integer(fac_col5)) - -df_3 <- df %>% select(1:5, 11, 14, 17, 20) %>% - mutate( - fac_col2 = factor(fac_col2, levels = "Blue"), - fac_col3 = factor(fac_col3, levels = "Yes")) - -options(datashield.env = environment()) - -dslite.server <- newDSLiteServer( - tables = list( - df_1 = df_1, - df_2 = df_2, - df_3 = df_3 - ) -) - -dslite.server$config(defaultDSConfiguration(include = c("dsBase", "dsTidyverse", "dsDanger"))) -dslite.server$aggregateMethod("getClassAllColsDS", "getClassAllColsDS") -dslite.server$assignMethod("fixClassDS", "fixClassDS") -dslite.server$assignMethod("fixColsDS", "fixColsDS") -dslite.server$aggregateMethod("getAllLevelsDS", "getAllLevelsDS") -dslite.server$assignMethod("fixLevelsDS", "fixLevelsDS") - -builder <- DSI::newDSLoginBuilder() - -builder$append( - server = "server_1", - url = "dslite.server", - driver = "DSLiteDriver" -) +# +# Set up +# -builder$append( - server = "server_2", - url = "dslite.server", - driver = "DSLiteDriver" -) - -builder$append( - server = "server_3", - url = "dslite.server", - driver = "DSLiteDriver" -) +context("ds.standardiseDf::smk::setup") +options(datashield.errors.print = TRUE) -logindata <- builder$build() -conns <- DSI::datashield.login(logins = logindata, assign = FALSE) +connect.studies.dataset.stand(c("fac_col1", "fac_col2", "fac_col3", "fac_col4", "fac_col5", + "fac_col6", "fac_col9", "col12", "col15", "col18")) -datashield.assign.table(conns["server_1"], "df", "df_1") -datashield.assign.table(conns["server_2"], "df", "df_2") -datashield.assign.table(conns["server_3"], "df", "df_3") +test_that("setup", { + ds_expect_variables(c("D")) +}) -datashield.assign.table(conns["server_1"], "df_ident", "df_1") -datashield.assign.table(conns["server_2"], "df_ident", "df_1") -datashield.assign.table(conns["server_3"], "df_ident", "df_1") +# +# Tests +# #################################################################################################### # Code that will be used in multiple tests #################################################################################################### -var_class <- .get_var_classes("df", datasources = conns) +var_class <- .get_var_classes("D", datasources = ds.test_env$connections) class_conflicts <- .identify_class_conflicts(var_class) @@ -91,11 +28,11 @@ different_classes <- c("fac_col4", "fac_col5") class_decisions <- c("1", "5") .fix_classes( - df.name = "df", + df.name = "D", different_classes = different_classes, class_decisions = class_decisions, newobj = "new_classes", - datasources = conns) + datasources = ds.test_env$connections) cols_to_set <- c( "fac_col1", "fac_col2", "fac_col3", "fac_col4", "fac_col5", "fac_col6", "fac_col9", "col12", @@ -103,12 +40,12 @@ cols_to_set <- c( "col20") .add_missing_cols_to_df( - df.name = "df", + df.name = "D", cols_to_add_if_missing = cols_to_set, newobj = "with_new_cols", - datasources = conns) + datasources = ds.test_env$connections) -old_cols <- ds.colnames("df") +old_cols <- ds.colnames("D") new_cols <- c("col11", "col12", "col13", "col14", "col15", "col16", "col17", "col18", "col19", "col20", "fac_col1", "fac_col10", "fac_col2", "fac_col3", "fac_col4", "fac_col5", @@ -122,11 +59,11 @@ new_cols_servers <- list( added_cols <- .get_added_cols(old_cols, new_cols_servers) -var_class_fact <- .get_var_classes("with_new_cols", datasources = conns) +var_class_fact <- .get_var_classes("with_new_cols", datasources = ds.test_env$connections) fac_vars <- .identify_factor_vars(var_class_fact) -fac_levels <- .get_factor_levels(fac_vars, "with_new_cols", conns) +fac_levels <- .get_factor_levels("with_new_cols", fac_vars, ds.test_env$connections) level_conflicts <- .identify_level_conflicts(fac_levels) @@ -192,554 +129,556 @@ test_that(".get_var_classes returns correct output", { expect_equal(var_class, expected) }) +# +# test_that(".identify_class_conflicts returns correct output", { +# expected <- list( +# fac_col4 = c("numeric", "character", "factor"), +# fac_col5 = c("logical", "integer", "factor") +# ) +# +# expect_equal(class_conflicts, expected) +# +# }) +# +# test_that("ask_question displays the correct prompt", { +# expect_snapshot(ask_question_class("my_var")) +# }) +# +# test_that("ask_question_wait_response_class continues with valid response", { +# expect_equal( +# with_mocked_bindings( +# ask_question_wait_response_class("a variable"), +# ask_question_class = function(var) "A question", +# readline = function() "1" +# ), "1" +# ) +# }) +# +# test_that("ask_question_wait_response_class throws error if option 6 selected", { +# expect_error( +# with_mocked_bindings( +# ask_question_wait_response_class("a variable"), +# ask_question_class = function(var) "A question", +# readline = function() "6") +# ) +# }) +# +# test_that("print_all_classes prints the correct message", { +# expect_snapshot( +# print_all_classes( +# c("server_1", "server_2", "server_3"), +# c("numeric", "factor", "integer") +# ) +# ) +# }) +# +# test_that("prompt_user_class_decision function properly", { +# expect_message( +# with_mocked_bindings( +# prompt_user_class_decision( +# var = "test_col", +# servers = c("server_1", "server_2", "server_3"), +# classes = c("numeric", "character", "factor"), +# newobj = "test_df", +# datasources = datasources), +# ask_question_wait_response_class = function(var, newobj, datasources) "test_col" +# ) +# ) +# +# expect_equal( +# with_mocked_bindings( +# prompt_user_class_decision( +# var = "test_col", +# servers = c("server_1", "server_2", "server_3"), +# classes = c("numeric", "character", "factor"), +# newobj = "test_df", +# datasources = datasources), +# ask_question_wait_response_class = function(var, newobj, datasources) "test_col" +# ), +# "test_col" +# ) +# }) +# +# test_that("prompt_user_class_decision_all_vars returns correct value", { +# expect_equal( +# with_mocked_bindings( +# prompt_user_class_decision_all_vars( +# vars = c("test_var_1", "test_var_2"), +# all_servers = c("server_1", "server_2", "server_3"), +# all_classes = tibble( +# test_var_1 = c("numeric", "character", "factor"), +# test_var_2 = c("logical", "integer", "factor") +# ), +# "test_df", +# conns), +# prompt_user_class_decision = function(var, server, classes, newobj, datasources) "1" +# ), +# c("1", "1") +# ) +# }) +# +# test_that(".fix_classes sets the correct classes in serverside data frame", { +# +# expect_equal( +# unname(unlist(ds.class("df$fac_col4"))), +# c("numeric", "character", "factor") +# ) +# +# expect_equal( +# unname(unlist(ds.class("df$fac_col5"))), +# c("logical", "integer", "factor") +# ) +# +# expect_equal( +# unname(unlist(ds.class("new_classes$fac_col4"))), +# rep("factor", 3) +# ) +# +# expect_equal( +# unname(unlist(ds.class("new_classes$fac_col5"))), +# rep("logical", 3) +# ) +# +# }) +# +# test_that(".get_unique_cols extracts unique names from a list", { +# expect_equal( +# .get_unique_cols( +# list( +# server_1 = c("col_1", "col_2", "col_3"), +# server_1 = c("col_1", "col_2", "col_4"), +# server_1 = c("col_2", "col_3", "col_3", "col_5") +# ) +# ), +# c("col_1", "col_2", "col_3", "col_4", "col_5") +# ) +# }) +# +# test_that(".add_missing_cols_to_df correctly creates missing columns", { +# +# new_cols <- c("col11", "col12", "col13", "col14", "col15", "col16", "col17", "col18", "col19", +# "col20", "fac_col1", "fac_col10", "fac_col2", "fac_col3", "fac_col4", "fac_col5", +# "fac_col6", "fac_col7", "fac_col9") +# +# observed <- ds.colnames("with_new_cols") +# +# expected <- list( +# server_1 = new_cols, +# server_2 = new_cols, +# server_3 = new_cols +# ) +# +# expect_equal(observed, expected) +# }) +# +# test_that(".get_added_cols correctly identifies newly added columns", { +# +# expect_equal( +# added_cols, +# list( +# server_1 = c("col11", "col13", "col14", "col16", "col17", "col19", "col20", "fac_col10", "fac_col7"), +# server_2 = c("col11", "col12", "col14", "col15", "col17", "col18", "col20", "fac_col6", "fac_col9"), +# server_3 = c("col12", "col13", "col15", "col16", "col18", "col19", "fac_col10", "fac_col6", "fac_col7", "fac_col9") +# ) +# ) +# }) +# +# test_that(".identify_factor_vars correctly identifies factor variables", { +# +# +# +# var_class_fact <- var_class |> dplyr::select(server: col18) +# expect_equal( +# names(fac_vars), +# c("fac_col1", "fac_col2", "fac_col3", "fac_col6", "fac_col9") +# ) +# }) +# +# test_that(".get_factor_levels correctly identifies factor levels", { +# expected <- list( +# server_1 = list( +# fac_col1 = c("High", "Low", "Medium"), +# fac_col2 = c("Blue", "Green"), +# fac_col3 = c("No", "Yes"), +# fac_col6 = c("Bird", "Cat", "Dog"), +# fac_col9 = c("False", "True") +# ), +# server_2 = list( +# fac_col1 = c("High", "Low", "Medium"), +# fac_col2 = c("Green", "Red"), +# fac_col3 = c("No"), +# fac_col6 = NULL, +# fac_col9 = NULL +# ), +# server_3 = list( +# fac_col1 = c("High", "Low", "Medium"), +# fac_col2 = c("Blue"), +# fac_col3 = c("Yes"), +# fac_col6 = NULL, +# fac_col9 = NULL +# ) +# ) +# +# expect_equal(fac_levels, expected) +# }) +# +# test_that(".identify_level_conflicts correctly factor columns with different levels", { +# expect_equal( +# .identify_level_conflicts(fac_levels), +# c("fac_col2", "fac_col3", "fac_col6", "fac_col9") +# ) +# +# }) +# +# test_that("ask_question_wait_response_levels continues with valid response", { +# expect_equal( +# with_mocked_bindings( +# suppressWarnings(ask_question_wait_response_levels("test variable", "test_obj", conns)), +# readline = function() "1" +# ), "1" +# ) +# +# expect_equal( +# with_mocked_bindings( +# suppressWarnings(ask_question_wait_response_levels("test variable", "test_obj", conns)), +# readline = function() "1" +# ), "1" +# ) +# +# }) +# +# test_that("ask_question_wait_response_levels aborts with response of 3", { +# expect_error( +# with_mocked_bindings( +# suppressWarnings(ask_question_wait_response_levels("test variable", "test_obj", conns)), +# readline = function() "3") +# ) +# }) +# +# test_that(".make_levels_message makes correct message", { +# expect_snapshot(.make_levels_message(level_conflicts)) +# }) +# +# test_that(".get_unique_levels extracts all possible levels", { +# +# expected <- list( +# fac_col2 = c("Blue", "Green", "Red"), +# fac_col3 = c("No", "Yes"), +# fac_col6 = c("Bird", "Cat", "Dog"), +# fac_col9 = c("False", "True") +# ) +# +# expect_equal(unique_levs, expected) +# +# }) +# +# test_that(".set_factor_levels sets levels correctly", { +# .set_factor_levels("with_new_cols", unique_levs, conns) +# +# expect_equal( +# ds.levels("with_new_cols$fac_col2") |> map(~.x[[1]]), +# list( +# server_1 = c("Blue", "Green", "Red"), +# server_2 = c("Blue", "Green", "Red"), +# server_3 = c("Blue", "Green", "Red") +# ) +# ) +# +# expect_equal( +# ds.levels("with_new_cols$fac_col3") |> map(~.x[[1]]), +# list( +# server_1 = c("No", "Yes"), +# server_2 = c("No", "Yes"), +# server_3 = c("No", "Yes") +# ) +# ) +# +# expect_equal( +# ds.levels("with_new_cols$fac_col6") |> map(~.x[[1]]), +# list( +# server_1 = c("Bird", "Cat", "Dog"), +# server_2 = c("Bird", "Cat", "Dog"), +# server_3 = c("Bird", "Cat", "Dog") +# ) +# ) +# +# expect_equal( +# ds.levels("with_new_cols$fac_col9") |> map(~.x[[1]]), +# list( +# server_1 = c("False", "True"), +# server_2 = c("False", "True"), +# server_3 = c("False", "True") +# ) +# ) +# +# }) +# +# test_that(".print_var_recode_message prints the correct message", { +# expect_snapshot(.print_var_recode_message(added_cols, "test_df")) +# }) +# +# test_that(".print_class_recode_message prints the correct message", { +# expect_snapshot( +# .print_class_recode_message(class_decisions, different_classes, "test_df") +# ) +# }) +# +# test_that(".print_levels_recode_message prints the correct message", { +# expect_snapshot( +# .print_levels_recode_message(unique_levs, "test_df") +# ) +# }) +# +# test_that(".make_levels_recode_message prints the correct message", { +# expect_equal( +# .make_levels_recode_message(unique_levs), +# list( +# "fac_col2 --> Blue, Green, Red", +# "fac_col3 --> No, Yes", +# "fac_col6 --> Bird, Cat, Dog", +# "fac_col9 --> False, True" +# ) +# ) +# }) +# +# test_that(".print_out_messages prints the correct messages", { +# expect_snapshot( +# .print_out_messages( +# added_cols, class_decisions, different_classes, unique_levs, level_conflicts, "1", "test_df" +# ) +# ) +# }) +# +# test_that(".change_choice_to_string converts numeric class codes to strings correctly", { +# expect_equal(.change_choice_to_string("1"), "factor") +# expect_equal(.change_choice_to_string("2"), "integer") +# expect_equal(.change_choice_to_string("3"), "numeric") +# expect_equal(.change_choice_to_string("4"), "character") +# expect_equal(.change_choice_to_string("5"), "logical") +# }) +# +# test_that("ds.standardiseDf doesn't run if dataframes are identical", { +# expect_error( +# ds.standardiseDf( +# df = "df_ident", +# newobj = "test_fill" +# ), +# "Columns are identical" +# ) +# }) +# +# test_that("ds.standardiseDf works when called directly and class conversion is factor", { +# with_mocked_bindings( +# ds.standardiseDf( +# df = "df", +# newobj = "test_fill" +# ), +# prompt_user_class_decision_all_vars = function(var, server, classes, newobj, datasources) "1", +# ask_question_wait_response_levels = function(levels_conflict, newobj, datasources) "2" +# ) +# +# expect_equal( +# ds.class("test_fill$fac_col4")[[1]], +# "factor" +# ) +# }) +# +# test_that("ds.standardiseDf returns warning when called directly and class conversion is integer", { +# with_mocked_bindings( +# ds.standardiseDf( +# df = "df", +# newobj = "test_fill" +# ), +# prompt_user_class_decision_all_vars = function(var, server, classes, newobj, datasources) c("2", "2"), +# ask_question_wait_response_levels = function(levels_conflict, newobj, datasources) "2" +# ) +# +# expect_equal( +# ds.class("test_fill$fac_col4")[[1]], +# "integer" +# ) +# +# expect_equal( +# ds.class("test_fill$fac_col5")[[1]], +# "integer" +# ) +# }) +# +# test_that("ds.standardiseDf returns warning when called directly and class conversion is numeric", { +# with_mocked_bindings( +# ds.standardiseDf( +# df = "df", +# newobj = "test_fill" +# ), +# prompt_user_class_decision_all_vars = function(var, server, classes, newobj, datasources) c("3", "3"), +# ask_question_wait_response_levels = function(levels_conflict, newobj, datasources) "2" +# ) +# +# expect_equal( +# ds.class("test_fill$fac_col4")[[1]], +# "numeric" +# ) +# +# expect_equal( +# ds.class("test_fill$fac_col5")[[1]], +# "numeric" +# ) +# }) +# +# test_that("ds.standardiseDf returns warning when called directly and class conversion is character", { +# with_mocked_bindings( +# ds.standardiseDf( +# df = "df", +# newobj = "test_fill" +# ), +# prompt_user_class_decision_all_vars = function(var, server, classes, newobj, datasources) c("4", "4"), +# ask_question_wait_response_levels = function(levels_conflict, newobj, datasources) "2" +# ) +# +# expect_equal( +# ds.class("test_fill$fac_col4")[[1]], +# "character" +# ) +# +# expect_equal( +# ds.class("test_fill$fac_col5")[[1]], +# "character" +# ) +# }) +# +# test_that("ds.standardiseDf returns warning when called directly and class conversion is logical", { +# with_mocked_bindings( +# ds.standardiseDf( +# df = "df", +# newobj = "test_fill" +# ), +# prompt_user_class_decision_all_vars = function(var, server, classes, newobj, datasources) c("5", "5"), +# ask_question_wait_response_levels = function(levels_conflict, newobj, datasources) "2" +# ) +# +# expect_equal( +# ds.class("test_fill$fac_col4")[[1]], +# "logical" +# ) +# +# expect_equal( +# ds.class("test_fill$fac_col5")[[1]], +# "logical" +# ) +# }) +# +# test_that("ds.standardiseDf changes levels if this option is selected", { +# with_mocked_bindings( +# ds.standardiseDf( +# df = "df", +# newobj = "test_fill" +# ), +# prompt_user_class_decision_all_vars = function(var, server, classes, newobj, datasources) c("1", "1"), +# ask_question_wait_response_levels = function(levels_conflict, newobj, datasources) "1" +# ) +# +# levels_2 <- ds.levels("test_fill$fac_col2") %>% map(~.$Levels) +# levels_3 <- ds.levels("test_fill$fac_col3") %>% map(~.$Levels) +# levels_4 <- ds.levels("test_fill$fac_col4") %>% map(~.$Levels) +# levels_5 <- ds.levels("test_fill$fac_col5") %>% map(~.$Levels) +# levels_6 <- ds.levels("test_fill$fac_col6") %>% map(~.$Levels) +# levels_9 <- ds.levels("test_fill$fac_col9") %>% map(~.$Levels) +# +# expect_equal( +# levels_2, +# list( +# server_1 = c("Blue", "Green", "Red"), +# server_2 = c("Blue", "Green", "Red"), +# server_3 = c("Blue", "Green", "Red") +# ) +# ) +# +# expect_equal( +# levels_3, +# list( +# server_1 = c("No", "Yes"), +# server_2 = c("No", "Yes"), +# server_3 = c("No", "Yes") +# ) +# ) +# +# expect_equal( +# levels_4, +# list( +# server_1 = c("1", "2", "3", "A", "B", "C"), +# server_2 = c("1", "2", "3", "A", "B", "C"), +# server_3 = c("1", "2", "3", "A", "B", "C") +# ) +# ) +# +# expect_equal( +# levels_5, +# list( +# server_1 = c("1", "2", "3", "One", "Three", "Two"), +# server_2 = c("1", "2", "3", "One", "Three", "Two"), +# server_3 = c("1", "2", "3", "One", "Three", "Two") +# ) +# ) +# +# expect_equal( +# levels_6, +# list( +# server_1 = c("Bird", "Cat", "Dog"), +# server_2 = c("Bird", "Cat", "Dog"), +# server_3 = c("Bird", "Cat", "Dog") +# ) +# ) +# +# expect_equal( +# levels_9, +# list( +# server_1 = c("False", "True"), +# server_2 = c("False", "True"), +# server_3 = c("False", "True") +# ) +# ) +# +# }) +# +# test_that("ds.standardiseDf doesn't run if classes are not identical and fix_class is no", { +# expect_error( +# ds.standardiseDf( +# df = "df", +# newobj = "shouldnt_exist", +# fix_class = "no" +# ), +# "Variables do not have the same class in all studies" +# ) +# +# expect_equal( +# ds.exists("shouldnt_exist")[[1]], +# FALSE +# ) +# }) +# +# test_that("ds.standardiseDf doesn't run if levels are not identical and fix_class is no", { +# expect_error( +# with_mocked_bindings( +# ds.standardiseDf( +# df = "df", +# newobj = "shouldnt_exist", +# fix_levels = "no" +# ), +# prompt_user_class_decision_all_vars = function(var, server, classes, newobj, datasources) c("1", "1") +# ), +# "Factor variables do not have the same levels in all studies" +# ) +# +# expect_equal( +# ds.exists("shouldnt_exist")[[1]], +# FALSE +# ) +# }) + + +## Add disclosure check levels +## Push change to dsBase -test_that(".identify_class_conflicts returns correct output", { - expected <- list( - fac_col4 = c("numeric", "character", "factor"), - fac_col5 = c("logical", "integer", "factor") - ) - - expect_equal(class_conflicts, expected) - -}) - -test_that("ask_question displays the correct prompt", { - expect_snapshot(ask_question_class("my_var")) -}) - -test_that("ask_question_wait_response_class continues with valid response", { - expect_equal( - with_mocked_bindings( - ask_question_wait_response_class("a variable"), - ask_question_class = function(var) "A question", - readline = function() "1" - ), "1" - ) -}) - -test_that("ask_question_wait_response_class throws error if option 6 selected", { - expect_error( - with_mocked_bindings( - ask_question_wait_response_class("a variable"), - ask_question_class = function(var) "A question", - readline = function() "6") - ) -}) - -test_that("print_all_classes prints the correct message", { - expect_snapshot( - print_all_classes( - c("server_1", "server_2", "server_3"), - c("numeric", "factor", "integer") - ) - ) -}) - -test_that("prompt_user_class_decision function properly", { - expect_message( - with_mocked_bindings( - prompt_user_class_decision( - var = "test_col", - servers = c("server_1", "server_2", "server_3"), - classes = c("numeric", "character", "factor"), - newobj = "test_df", - datasources = datasources), - ask_question_wait_response_class = function(var, newobj, datasources) "test_col" - ) - ) - - expect_equal( - with_mocked_bindings( - prompt_user_class_decision( - var = "test_col", - servers = c("server_1", "server_2", "server_3"), - classes = c("numeric", "character", "factor"), - newobj = "test_df", - datasources = datasources), - ask_question_wait_response_class = function(var, newobj, datasources) "test_col" - ), - "test_col" - ) -}) - -test_that("prompt_user_class_decision_all_vars returns correct value", { - expect_equal( - with_mocked_bindings( - prompt_user_class_decision_all_vars( - vars = c("test_var_1", "test_var_2"), - all_servers = c("server_1", "server_2", "server_3"), - all_classes = tibble( - test_var_1 = c("numeric", "character", "factor"), - test_var_2 = c("logical", "integer", "factor") - ), - "test_df", - conns), - prompt_user_class_decision = function(var, server, classes, newobj, datasources) "1" - ), - c("1", "1") - ) -}) - -test_that(".fix_classes sets the correct classes in serverside data frame", { - - expect_equal( - unname(unlist(ds.class("df$fac_col4"))), - c("numeric", "character", "factor") - ) - - expect_equal( - unname(unlist(ds.class("df$fac_col5"))), - c("logical", "integer", "factor") - ) - - expect_equal( - unname(unlist(ds.class("new_classes$fac_col4"))), - rep("factor", 3) - ) - - expect_equal( - unname(unlist(ds.class("new_classes$fac_col5"))), - rep("logical", 3) - ) - -}) - -test_that(".get_unique_cols extracts unique names from a list", { - expect_equal( - .get_unique_cols( - list( - server_1 = c("col_1", "col_2", "col_3"), - server_1 = c("col_1", "col_2", "col_4"), - server_1 = c("col_2", "col_3", "col_3", "col_5") - ) - ), - c("col_1", "col_2", "col_3", "col_4", "col_5") - ) -}) - -test_that(".add_missing_cols_to_df correctly creates missing columns", { - - new_cols <- c("col11", "col12", "col13", "col14", "col15", "col16", "col17", "col18", "col19", - "col20", "fac_col1", "fac_col10", "fac_col2", "fac_col3", "fac_col4", "fac_col5", - "fac_col6", "fac_col7", "fac_col9") - - observed <- ds.colnames("with_new_cols") - - expected <- list( - server_1 = new_cols, - server_2 = new_cols, - server_3 = new_cols - ) - - expect_equal(observed, expected) -}) - -test_that(".get_added_cols correctly identifies newly added columns", { - - expect_equal( - added_cols, - list( - server_1 = c("col11", "col13", "col14", "col16", "col17", "col19", "col20", "fac_col10", "fac_col7"), - server_2 = c("col11", "col12", "col14", "col15", "col17", "col18", "col20", "fac_col6", "fac_col9"), - server_3 = c("col12", "col13", "col15", "col16", "col18", "col19", "fac_col10", "fac_col6", "fac_col7", "fac_col9") - ) - ) -}) - -test_that(".identify_factor_vars correctly identifies factor variables", { - - - - var_class_fact <- var_class |> dplyr::select(server: col18) - expect_equal( - names(fac_vars), - c("fac_col1", "fac_col2", "fac_col3", "fac_col6", "fac_col9") - ) -}) - -test_that(".get_factor_levels correctly identifies factor levels", { - expected <- list( - server_1 = list( - fac_col1 = c("High", "Low", "Medium"), - fac_col2 = c("Blue", "Green"), - fac_col3 = c("No", "Yes"), - fac_col6 = c("Bird", "Cat", "Dog"), - fac_col9 = c("False", "True") - ), - server_2 = list( - fac_col1 = c("High", "Low", "Medium"), - fac_col2 = c("Green", "Red"), - fac_col3 = c("No"), - fac_col6 = NULL, - fac_col9 = NULL - ), - server_3 = list( - fac_col1 = c("High", "Low", "Medium"), - fac_col2 = c("Blue"), - fac_col3 = c("Yes"), - fac_col6 = NULL, - fac_col9 = NULL - ) - ) - - expect_equal(fac_levels, expected) -}) - -test_that(".identify_level_conflicts correctly factor columns with different levels", { - expect_equal( - .identify_level_conflicts(fac_levels), - c("fac_col2", "fac_col3", "fac_col6", "fac_col9") - ) - -}) - -test_that("ask_question_wait_response_levels continues with valid response", { - expect_equal( - with_mocked_bindings( - suppressWarnings(ask_question_wait_response_levels("test variable", "test_obj", conns)), - readline = function() "1" - ), "1" - ) - - expect_equal( - with_mocked_bindings( - suppressWarnings(ask_question_wait_response_levels("test variable", "test_obj", conns)), - readline = function() "1" - ), "1" - ) - -}) - -test_that("ask_question_wait_response_levels aborts with response of 3", { - expect_error( - with_mocked_bindings( - suppressWarnings(ask_question_wait_response_levels("test variable", "test_obj", conns)), - readline = function() "3") - ) -}) - -test_that(".make_levels_message makes correct message", { - expect_snapshot(.make_levels_message(level_conflicts)) -}) - -test_that(".get_unique_levels extracts all possible levels", { - - expected <- list( - fac_col2 = c("Blue", "Green", "Red"), - fac_col3 = c("No", "Yes"), - fac_col6 = c("Bird", "Cat", "Dog"), - fac_col9 = c("False", "True") - ) - - expect_equal(unique_levs, expected) - -}) - -test_that(".set_factor_levels sets levels correctly", { - .set_factor_levels("with_new_cols", unique_levs, conns) - - expect_equal( - ds.levels("with_new_cols$fac_col2") |> map(~.x[[1]]), - list( - server_1 = c("Blue", "Green", "Red"), - server_2 = c("Blue", "Green", "Red"), - server_3 = c("Blue", "Green", "Red") - ) - ) - - expect_equal( - ds.levels("with_new_cols$fac_col3") |> map(~.x[[1]]), - list( - server_1 = c("No", "Yes"), - server_2 = c("No", "Yes"), - server_3 = c("No", "Yes") - ) - ) - - expect_equal( - ds.levels("with_new_cols$fac_col6") |> map(~.x[[1]]), - list( - server_1 = c("Bird", "Cat", "Dog"), - server_2 = c("Bird", "Cat", "Dog"), - server_3 = c("Bird", "Cat", "Dog") - ) - ) - - expect_equal( - ds.levels("with_new_cols$fac_col9") |> map(~.x[[1]]), - list( - server_1 = c("False", "True"), - server_2 = c("False", "True"), - server_3 = c("False", "True") - ) - ) - -}) - -test_that(".print_var_recode_message prints the correct message", { - expect_snapshot(.print_var_recode_message(added_cols, "test_df")) -}) - -test_that(".print_class_recode_message prints the correct message", { - expect_snapshot( - .print_class_recode_message(class_decisions, different_classes, "test_df") - ) -}) - -test_that(".print_levels_recode_message prints the correct message", { - expect_snapshot( - .print_levels_recode_message(unique_levs, "test_df") - ) -}) - -test_that(".make_levels_recode_message prints the correct message", { - expect_equal( - .make_levels_recode_message(unique_levs), - list( - "fac_col2 --> Blue, Green, Red", - "fac_col3 --> No, Yes", - "fac_col6 --> Bird, Cat, Dog", - "fac_col9 --> False, True" - ) - ) -}) - -test_that(".print_out_messages prints the correct messages", { - expect_snapshot( - .print_out_messages( - added_cols, class_decisions, different_classes, unique_levs, level_conflicts, "1", "test_df" - ) - ) -}) - -test_that(".change_choice_to_string converts numeric class codes to strings correctly", { - expect_equal(.change_choice_to_string("1"), "factor") - expect_equal(.change_choice_to_string("2"), "integer") - expect_equal(.change_choice_to_string("3"), "numeric") - expect_equal(.change_choice_to_string("4"), "character") - expect_equal(.change_choice_to_string("5"), "logical") -}) - -test_that("ds.standardiseDf doesn't run if dataframes are identical", { - expect_error( - ds.standardiseDf( - df = "df_ident", - newobj = "test_fill" - ), - "Columns are identical" - ) -}) - -test_that("ds.standardiseDf works when called directly and class conversion is factor", { - with_mocked_bindings( - ds.standardiseDf( - df = "df", - newobj = "test_fill" - ), - prompt_user_class_decision_all_vars = function(var, server, classes, newobj, datasources) "1", - ask_question_wait_response_levels = function(levels_conflict, newobj, datasources) "2" - ) - - expect_equal( - ds.class("test_fill$fac_col4")[[1]], - "factor" - ) -}) - -test_that("ds.standardiseDf returns warning when called directly and class conversion is integer", { - with_mocked_bindings( - ds.standardiseDf( - df = "df", - newobj = "test_fill" - ), - prompt_user_class_decision_all_vars = function(var, server, classes, newobj, datasources) c("2", "2"), - ask_question_wait_response_levels = function(levels_conflict, newobj, datasources) "2" - ) - - expect_equal( - ds.class("test_fill$fac_col4")[[1]], - "integer" - ) - - expect_equal( - ds.class("test_fill$fac_col5")[[1]], - "integer" - ) -}) - -test_that("ds.standardiseDf returns warning when called directly and class conversion is numeric", { - with_mocked_bindings( - ds.standardiseDf( - df = "df", - newobj = "test_fill" - ), - prompt_user_class_decision_all_vars = function(var, server, classes, newobj, datasources) c("3", "3"), - ask_question_wait_response_levels = function(levels_conflict, newobj, datasources) "2" - ) - - expect_equal( - ds.class("test_fill$fac_col4")[[1]], - "numeric" - ) - - expect_equal( - ds.class("test_fill$fac_col5")[[1]], - "numeric" - ) -}) - -test_that("ds.standardiseDf returns warning when called directly and class conversion is character", { - with_mocked_bindings( - ds.standardiseDf( - df = "df", - newobj = "test_fill" - ), - prompt_user_class_decision_all_vars = function(var, server, classes, newobj, datasources) c("4", "4"), - ask_question_wait_response_levels = function(levels_conflict, newobj, datasources) "2" - ) - - expect_equal( - ds.class("test_fill$fac_col4")[[1]], - "character" - ) - - expect_equal( - ds.class("test_fill$fac_col5")[[1]], - "character" - ) -}) - -test_that("ds.standardiseDf returns warning when called directly and class conversion is logical", { - with_mocked_bindings( - ds.standardiseDf( - df = "df", - newobj = "test_fill" - ), - prompt_user_class_decision_all_vars = function(var, server, classes, newobj, datasources) c("5", "5"), - ask_question_wait_response_levels = function(levels_conflict, newobj, datasources) "2" - ) - - expect_equal( - ds.class("test_fill$fac_col4")[[1]], - "logical" - ) - - expect_equal( - ds.class("test_fill$fac_col5")[[1]], - "logical" - ) -}) - -test_that("ds.standardiseDf changes levels if this option is selected", { - with_mocked_bindings( - ds.standardiseDf( - df = "df", - newobj = "test_fill" - ), - prompt_user_class_decision_all_vars = function(var, server, classes, newobj, datasources) c("1", "1"), - ask_question_wait_response_levels = function(levels_conflict, newobj, datasources) "1" - ) - - levels_2 <- ds.levels("test_fill$fac_col2") %>% map(~.$Levels) - levels_3 <- ds.levels("test_fill$fac_col3") %>% map(~.$Levels) - levels_4 <- ds.levels("test_fill$fac_col4") %>% map(~.$Levels) - levels_5 <- ds.levels("test_fill$fac_col5") %>% map(~.$Levels) - levels_6 <- ds.levels("test_fill$fac_col6") %>% map(~.$Levels) - levels_9 <- ds.levels("test_fill$fac_col9") %>% map(~.$Levels) - - expect_equal( - levels_2, - list( - server_1 = c("Blue", "Green", "Red"), - server_2 = c("Blue", "Green", "Red"), - server_3 = c("Blue", "Green", "Red") - ) - ) - - expect_equal( - levels_3, - list( - server_1 = c("No", "Yes"), - server_2 = c("No", "Yes"), - server_3 = c("No", "Yes") - ) - ) - - expect_equal( - levels_4, - list( - server_1 = c("1", "2", "3", "A", "B", "C"), - server_2 = c("1", "2", "3", "A", "B", "C"), - server_3 = c("1", "2", "3", "A", "B", "C") - ) - ) - - expect_equal( - levels_5, - list( - server_1 = c("1", "2", "3", "One", "Three", "Two"), - server_2 = c("1", "2", "3", "One", "Three", "Two"), - server_3 = c("1", "2", "3", "One", "Three", "Two") - ) - ) - - expect_equal( - levels_6, - list( - server_1 = c("Bird", "Cat", "Dog"), - server_2 = c("Bird", "Cat", "Dog"), - server_3 = c("Bird", "Cat", "Dog") - ) - ) - - expect_equal( - levels_9, - list( - server_1 = c("False", "True"), - server_2 = c("False", "True"), - server_3 = c("False", "True") - ) - ) - -}) - -test_that("ds.standardiseDf doesn't run if classes are not identical and fix_class is no", { - expect_error( - ds.standardiseDf( - df = "df", - newobj = "shouldnt_exist", - fix_class = "no" - ), - "Variables do not have the same class in all studies" - ) - - expect_equal( - ds.exists("shouldnt_exist")[[1]], - FALSE - ) -}) - -test_that("ds.standardiseDf doesn't run if levels are not identical and fix_class is no", { - expect_error( - with_mocked_bindings( - ds.standardiseDf( - df = "df", - newobj = "shouldnt_exist", - fix_levels = "no" - ), - prompt_user_class_decision_all_vars = function(var, server, classes, newobj, datasources) c("1", "1") - ), - "Factor variables do not have the same levels in all studies" - ) - - expect_equal( - ds.exists("shouldnt_exist")[[1]], - FALSE - ) -}) - - -## 9. Handle incorrect response for level fix From 3d29e879ae054b60bcfa614f8c875cde60678adb Mon Sep 17 00:00:00 2001 From: Tim Cadman <41470917+timcadman@users.noreply.github.com> Date: Tue, 7 Oct 2025 12:11:25 +0200 Subject: [PATCH 09/18] test: all tests working --- tests/testthat/test-smk-standardiseDf.R | 1109 ++++++++++++----------- 1 file changed, 560 insertions(+), 549 deletions(-) diff --git a/tests/testthat/test-smk-standardiseDf.R b/tests/testthat/test-smk-standardiseDf.R index d3ceb647..765d655c 100644 --- a/tests/testthat/test-smk-standardiseDf.R +++ b/tests/testthat/test-smk-standardiseDf.R @@ -1,12 +1,15 @@ # # Set up # - context("ds.standardiseDf::smk::setup") options(datashield.errors.print = TRUE) -connect.studies.dataset.stand(c("fac_col1", "fac_col2", "fac_col3", "fac_col4", "fac_col5", - "fac_col6", "fac_col9", "col12", "col15", "col18")) +connect.studies.dataset.stand( + c( + "fac_col1", "fac_col2", "fac_col3", "fac_col4", "fac_col5", "fac_col6", "fac_col7", "fac_col9", + "fac_col10", "col11", "col12", "col13", "col14", "col15", "col16", "col17", "col18", "col19", + "col20") + ) test_that("setup", { ds_expect_variables(c("D")) @@ -104,7 +107,7 @@ test_that(".stop_if_cols_identical doesn't throw error if data frames have diffe test_that(".get_var_classes returns correct output", { expected <- tibble( - server = c("server_1", "server_2", "server_3"), + server = c("sim1", "sim2", "sim3"), fac_col1 = c("factor", "factor", "factor"), fac_col2 = c("factor", "factor", "factor"), fac_col3 = c("factor", "factor", "factor"), @@ -129,551 +132,559 @@ test_that(".get_var_classes returns correct output", { expect_equal(var_class, expected) }) -# -# test_that(".identify_class_conflicts returns correct output", { -# expected <- list( -# fac_col4 = c("numeric", "character", "factor"), -# fac_col5 = c("logical", "integer", "factor") -# ) -# -# expect_equal(class_conflicts, expected) -# -# }) -# -# test_that("ask_question displays the correct prompt", { -# expect_snapshot(ask_question_class("my_var")) -# }) -# -# test_that("ask_question_wait_response_class continues with valid response", { -# expect_equal( -# with_mocked_bindings( -# ask_question_wait_response_class("a variable"), -# ask_question_class = function(var) "A question", -# readline = function() "1" -# ), "1" -# ) -# }) -# -# test_that("ask_question_wait_response_class throws error if option 6 selected", { -# expect_error( -# with_mocked_bindings( -# ask_question_wait_response_class("a variable"), -# ask_question_class = function(var) "A question", -# readline = function() "6") -# ) -# }) -# -# test_that("print_all_classes prints the correct message", { -# expect_snapshot( -# print_all_classes( -# c("server_1", "server_2", "server_3"), -# c("numeric", "factor", "integer") -# ) -# ) -# }) -# -# test_that("prompt_user_class_decision function properly", { -# expect_message( -# with_mocked_bindings( -# prompt_user_class_decision( -# var = "test_col", -# servers = c("server_1", "server_2", "server_3"), -# classes = c("numeric", "character", "factor"), -# newobj = "test_df", -# datasources = datasources), -# ask_question_wait_response_class = function(var, newobj, datasources) "test_col" -# ) -# ) -# -# expect_equal( -# with_mocked_bindings( -# prompt_user_class_decision( -# var = "test_col", -# servers = c("server_1", "server_2", "server_3"), -# classes = c("numeric", "character", "factor"), -# newobj = "test_df", -# datasources = datasources), -# ask_question_wait_response_class = function(var, newobj, datasources) "test_col" -# ), -# "test_col" -# ) -# }) -# -# test_that("prompt_user_class_decision_all_vars returns correct value", { -# expect_equal( -# with_mocked_bindings( -# prompt_user_class_decision_all_vars( -# vars = c("test_var_1", "test_var_2"), -# all_servers = c("server_1", "server_2", "server_3"), -# all_classes = tibble( -# test_var_1 = c("numeric", "character", "factor"), -# test_var_2 = c("logical", "integer", "factor") -# ), -# "test_df", -# conns), -# prompt_user_class_decision = function(var, server, classes, newobj, datasources) "1" -# ), -# c("1", "1") -# ) -# }) -# -# test_that(".fix_classes sets the correct classes in serverside data frame", { -# -# expect_equal( -# unname(unlist(ds.class("df$fac_col4"))), -# c("numeric", "character", "factor") -# ) -# -# expect_equal( -# unname(unlist(ds.class("df$fac_col5"))), -# c("logical", "integer", "factor") -# ) -# -# expect_equal( -# unname(unlist(ds.class("new_classes$fac_col4"))), -# rep("factor", 3) -# ) -# -# expect_equal( -# unname(unlist(ds.class("new_classes$fac_col5"))), -# rep("logical", 3) -# ) -# -# }) -# -# test_that(".get_unique_cols extracts unique names from a list", { -# expect_equal( -# .get_unique_cols( -# list( -# server_1 = c("col_1", "col_2", "col_3"), -# server_1 = c("col_1", "col_2", "col_4"), -# server_1 = c("col_2", "col_3", "col_3", "col_5") -# ) -# ), -# c("col_1", "col_2", "col_3", "col_4", "col_5") -# ) -# }) -# -# test_that(".add_missing_cols_to_df correctly creates missing columns", { -# -# new_cols <- c("col11", "col12", "col13", "col14", "col15", "col16", "col17", "col18", "col19", -# "col20", "fac_col1", "fac_col10", "fac_col2", "fac_col3", "fac_col4", "fac_col5", -# "fac_col6", "fac_col7", "fac_col9") -# -# observed <- ds.colnames("with_new_cols") -# -# expected <- list( -# server_1 = new_cols, -# server_2 = new_cols, -# server_3 = new_cols -# ) -# -# expect_equal(observed, expected) -# }) -# -# test_that(".get_added_cols correctly identifies newly added columns", { -# -# expect_equal( -# added_cols, -# list( -# server_1 = c("col11", "col13", "col14", "col16", "col17", "col19", "col20", "fac_col10", "fac_col7"), -# server_2 = c("col11", "col12", "col14", "col15", "col17", "col18", "col20", "fac_col6", "fac_col9"), -# server_3 = c("col12", "col13", "col15", "col16", "col18", "col19", "fac_col10", "fac_col6", "fac_col7", "fac_col9") -# ) -# ) -# }) -# -# test_that(".identify_factor_vars correctly identifies factor variables", { -# -# -# -# var_class_fact <- var_class |> dplyr::select(server: col18) -# expect_equal( -# names(fac_vars), -# c("fac_col1", "fac_col2", "fac_col3", "fac_col6", "fac_col9") -# ) -# }) -# -# test_that(".get_factor_levels correctly identifies factor levels", { -# expected <- list( -# server_1 = list( -# fac_col1 = c("High", "Low", "Medium"), -# fac_col2 = c("Blue", "Green"), -# fac_col3 = c("No", "Yes"), -# fac_col6 = c("Bird", "Cat", "Dog"), -# fac_col9 = c("False", "True") -# ), -# server_2 = list( -# fac_col1 = c("High", "Low", "Medium"), -# fac_col2 = c("Green", "Red"), -# fac_col3 = c("No"), -# fac_col6 = NULL, -# fac_col9 = NULL -# ), -# server_3 = list( -# fac_col1 = c("High", "Low", "Medium"), -# fac_col2 = c("Blue"), -# fac_col3 = c("Yes"), -# fac_col6 = NULL, -# fac_col9 = NULL -# ) -# ) -# -# expect_equal(fac_levels, expected) -# }) -# -# test_that(".identify_level_conflicts correctly factor columns with different levels", { -# expect_equal( -# .identify_level_conflicts(fac_levels), -# c("fac_col2", "fac_col3", "fac_col6", "fac_col9") -# ) -# -# }) -# -# test_that("ask_question_wait_response_levels continues with valid response", { -# expect_equal( -# with_mocked_bindings( -# suppressWarnings(ask_question_wait_response_levels("test variable", "test_obj", conns)), -# readline = function() "1" -# ), "1" -# ) -# -# expect_equal( -# with_mocked_bindings( -# suppressWarnings(ask_question_wait_response_levels("test variable", "test_obj", conns)), -# readline = function() "1" -# ), "1" -# ) -# -# }) -# -# test_that("ask_question_wait_response_levels aborts with response of 3", { -# expect_error( -# with_mocked_bindings( -# suppressWarnings(ask_question_wait_response_levels("test variable", "test_obj", conns)), -# readline = function() "3") -# ) -# }) -# -# test_that(".make_levels_message makes correct message", { -# expect_snapshot(.make_levels_message(level_conflicts)) -# }) -# -# test_that(".get_unique_levels extracts all possible levels", { -# -# expected <- list( -# fac_col2 = c("Blue", "Green", "Red"), -# fac_col3 = c("No", "Yes"), -# fac_col6 = c("Bird", "Cat", "Dog"), -# fac_col9 = c("False", "True") -# ) -# -# expect_equal(unique_levs, expected) -# -# }) -# -# test_that(".set_factor_levels sets levels correctly", { -# .set_factor_levels("with_new_cols", unique_levs, conns) -# -# expect_equal( -# ds.levels("with_new_cols$fac_col2") |> map(~.x[[1]]), -# list( -# server_1 = c("Blue", "Green", "Red"), -# server_2 = c("Blue", "Green", "Red"), -# server_3 = c("Blue", "Green", "Red") -# ) -# ) -# -# expect_equal( -# ds.levels("with_new_cols$fac_col3") |> map(~.x[[1]]), -# list( -# server_1 = c("No", "Yes"), -# server_2 = c("No", "Yes"), -# server_3 = c("No", "Yes") -# ) -# ) -# -# expect_equal( -# ds.levels("with_new_cols$fac_col6") |> map(~.x[[1]]), -# list( -# server_1 = c("Bird", "Cat", "Dog"), -# server_2 = c("Bird", "Cat", "Dog"), -# server_3 = c("Bird", "Cat", "Dog") -# ) -# ) -# -# expect_equal( -# ds.levels("with_new_cols$fac_col9") |> map(~.x[[1]]), -# list( -# server_1 = c("False", "True"), -# server_2 = c("False", "True"), -# server_3 = c("False", "True") -# ) -# ) -# -# }) -# -# test_that(".print_var_recode_message prints the correct message", { -# expect_snapshot(.print_var_recode_message(added_cols, "test_df")) -# }) -# -# test_that(".print_class_recode_message prints the correct message", { -# expect_snapshot( -# .print_class_recode_message(class_decisions, different_classes, "test_df") -# ) -# }) -# -# test_that(".print_levels_recode_message prints the correct message", { -# expect_snapshot( -# .print_levels_recode_message(unique_levs, "test_df") -# ) -# }) -# -# test_that(".make_levels_recode_message prints the correct message", { -# expect_equal( -# .make_levels_recode_message(unique_levs), -# list( -# "fac_col2 --> Blue, Green, Red", -# "fac_col3 --> No, Yes", -# "fac_col6 --> Bird, Cat, Dog", -# "fac_col9 --> False, True" -# ) -# ) -# }) -# -# test_that(".print_out_messages prints the correct messages", { -# expect_snapshot( -# .print_out_messages( -# added_cols, class_decisions, different_classes, unique_levs, level_conflicts, "1", "test_df" -# ) -# ) -# }) -# -# test_that(".change_choice_to_string converts numeric class codes to strings correctly", { -# expect_equal(.change_choice_to_string("1"), "factor") -# expect_equal(.change_choice_to_string("2"), "integer") -# expect_equal(.change_choice_to_string("3"), "numeric") -# expect_equal(.change_choice_to_string("4"), "character") -# expect_equal(.change_choice_to_string("5"), "logical") -# }) -# -# test_that("ds.standardiseDf doesn't run if dataframes are identical", { -# expect_error( -# ds.standardiseDf( -# df = "df_ident", -# newobj = "test_fill" -# ), -# "Columns are identical" -# ) -# }) -# -# test_that("ds.standardiseDf works when called directly and class conversion is factor", { -# with_mocked_bindings( -# ds.standardiseDf( -# df = "df", -# newobj = "test_fill" -# ), -# prompt_user_class_decision_all_vars = function(var, server, classes, newobj, datasources) "1", -# ask_question_wait_response_levels = function(levels_conflict, newobj, datasources) "2" -# ) -# -# expect_equal( -# ds.class("test_fill$fac_col4")[[1]], -# "factor" -# ) -# }) -# -# test_that("ds.standardiseDf returns warning when called directly and class conversion is integer", { -# with_mocked_bindings( -# ds.standardiseDf( -# df = "df", -# newobj = "test_fill" -# ), -# prompt_user_class_decision_all_vars = function(var, server, classes, newobj, datasources) c("2", "2"), -# ask_question_wait_response_levels = function(levels_conflict, newobj, datasources) "2" -# ) -# -# expect_equal( -# ds.class("test_fill$fac_col4")[[1]], -# "integer" -# ) -# -# expect_equal( -# ds.class("test_fill$fac_col5")[[1]], -# "integer" -# ) -# }) -# -# test_that("ds.standardiseDf returns warning when called directly and class conversion is numeric", { -# with_mocked_bindings( -# ds.standardiseDf( -# df = "df", -# newobj = "test_fill" -# ), -# prompt_user_class_decision_all_vars = function(var, server, classes, newobj, datasources) c("3", "3"), -# ask_question_wait_response_levels = function(levels_conflict, newobj, datasources) "2" -# ) -# -# expect_equal( -# ds.class("test_fill$fac_col4")[[1]], -# "numeric" -# ) -# -# expect_equal( -# ds.class("test_fill$fac_col5")[[1]], -# "numeric" -# ) -# }) -# -# test_that("ds.standardiseDf returns warning when called directly and class conversion is character", { -# with_mocked_bindings( -# ds.standardiseDf( -# df = "df", -# newobj = "test_fill" -# ), -# prompt_user_class_decision_all_vars = function(var, server, classes, newobj, datasources) c("4", "4"), -# ask_question_wait_response_levels = function(levels_conflict, newobj, datasources) "2" -# ) -# -# expect_equal( -# ds.class("test_fill$fac_col4")[[1]], -# "character" -# ) -# -# expect_equal( -# ds.class("test_fill$fac_col5")[[1]], -# "character" -# ) -# }) -# -# test_that("ds.standardiseDf returns warning when called directly and class conversion is logical", { -# with_mocked_bindings( -# ds.standardiseDf( -# df = "df", -# newobj = "test_fill" -# ), -# prompt_user_class_decision_all_vars = function(var, server, classes, newobj, datasources) c("5", "5"), -# ask_question_wait_response_levels = function(levels_conflict, newobj, datasources) "2" -# ) -# -# expect_equal( -# ds.class("test_fill$fac_col4")[[1]], -# "logical" -# ) -# -# expect_equal( -# ds.class("test_fill$fac_col5")[[1]], -# "logical" -# ) -# }) -# -# test_that("ds.standardiseDf changes levels if this option is selected", { -# with_mocked_bindings( -# ds.standardiseDf( -# df = "df", -# newobj = "test_fill" -# ), -# prompt_user_class_decision_all_vars = function(var, server, classes, newobj, datasources) c("1", "1"), -# ask_question_wait_response_levels = function(levels_conflict, newobj, datasources) "1" -# ) -# -# levels_2 <- ds.levels("test_fill$fac_col2") %>% map(~.$Levels) -# levels_3 <- ds.levels("test_fill$fac_col3") %>% map(~.$Levels) -# levels_4 <- ds.levels("test_fill$fac_col4") %>% map(~.$Levels) -# levels_5 <- ds.levels("test_fill$fac_col5") %>% map(~.$Levels) -# levels_6 <- ds.levels("test_fill$fac_col6") %>% map(~.$Levels) -# levels_9 <- ds.levels("test_fill$fac_col9") %>% map(~.$Levels) -# -# expect_equal( -# levels_2, -# list( -# server_1 = c("Blue", "Green", "Red"), -# server_2 = c("Blue", "Green", "Red"), -# server_3 = c("Blue", "Green", "Red") -# ) -# ) -# -# expect_equal( -# levels_3, -# list( -# server_1 = c("No", "Yes"), -# server_2 = c("No", "Yes"), -# server_3 = c("No", "Yes") -# ) -# ) -# -# expect_equal( -# levels_4, -# list( -# server_1 = c("1", "2", "3", "A", "B", "C"), -# server_2 = c("1", "2", "3", "A", "B", "C"), -# server_3 = c("1", "2", "3", "A", "B", "C") -# ) -# ) -# -# expect_equal( -# levels_5, -# list( -# server_1 = c("1", "2", "3", "One", "Three", "Two"), -# server_2 = c("1", "2", "3", "One", "Three", "Two"), -# server_3 = c("1", "2", "3", "One", "Three", "Two") -# ) -# ) -# -# expect_equal( -# levels_6, -# list( -# server_1 = c("Bird", "Cat", "Dog"), -# server_2 = c("Bird", "Cat", "Dog"), -# server_3 = c("Bird", "Cat", "Dog") -# ) -# ) -# -# expect_equal( -# levels_9, -# list( -# server_1 = c("False", "True"), -# server_2 = c("False", "True"), -# server_3 = c("False", "True") -# ) -# ) -# -# }) -# -# test_that("ds.standardiseDf doesn't run if classes are not identical and fix_class is no", { -# expect_error( -# ds.standardiseDf( -# df = "df", -# newobj = "shouldnt_exist", -# fix_class = "no" -# ), -# "Variables do not have the same class in all studies" -# ) -# -# expect_equal( -# ds.exists("shouldnt_exist")[[1]], -# FALSE -# ) -# }) -# -# test_that("ds.standardiseDf doesn't run if levels are not identical and fix_class is no", { -# expect_error( -# with_mocked_bindings( -# ds.standardiseDf( -# df = "df", -# newobj = "shouldnt_exist", -# fix_levels = "no" -# ), -# prompt_user_class_decision_all_vars = function(var, server, classes, newobj, datasources) c("1", "1") -# ), -# "Factor variables do not have the same levels in all studies" -# ) -# -# expect_equal( -# ds.exists("shouldnt_exist")[[1]], -# FALSE -# ) -# }) + +test_that(".identify_class_conflicts returns correct output", { + expected <- list( + fac_col4 = c("numeric", "character", "factor"), + fac_col5 = c("logical", "integer", "factor") + ) + + expect_equal(class_conflicts, expected) + +}) + +test_that("ask_question displays the correct prompt", { + expect_snapshot(ask_question_class("my_var")) +}) + +test_that("ask_question_wait_response_class continues with valid response", { + expect_equal( + with_mocked_bindings( + ask_question_wait_response_class("a variable"), + ask_question_class = function(var) "A question", + readline = function() "1" + ), "1" + ) +}) + +test_that("ask_question_wait_response_class throws error if option 6 selected", { + expect_error( + with_mocked_bindings( + ask_question_wait_response_class("a variable"), + ask_question_class = function(var) "A question", + readline = function() "6") + ) +}) + +test_that("print_all_classes prints the correct message", { + expect_snapshot( + print_all_classes( + c("server_1", "server_2", "server_3"), + c("numeric", "factor", "integer") + ) + ) +}) + +test_that("prompt_user_class_decision function properly", { + expect_message( + with_mocked_bindings( + prompt_user_class_decision( + var = "test_col", + servers = c("sim2", "sim2", "sim3"), + classes = c("numeric", "character", "factor"), + newobj = "test_df", + datasources = datasources), + ask_question_wait_response_class = function(var, newobj, datasources) "test_col" + ) + ) + + expect_equal( + with_mocked_bindings( + prompt_user_class_decision( + var = "test_col", + servers = c("sim2", "sim2", "sim3"), + classes = c("numeric", "character", "factor"), + newobj = "test_df", + datasources = datasources), + ask_question_wait_response_class = function(var, newobj, datasources) "test_col" + ), + "test_col" + ) +}) + +test_that("prompt_user_class_decision_all_vars returns correct value", { + expect_equal( + with_mocked_bindings( + prompt_user_class_decision_all_vars( + vars = c("test_var_1", "test_var_2"), + all_servers = c("sim2", "sim2", "sim3"), + all_classes = tibble( + test_var_1 = c("numeric", "character", "factor"), + test_var_2 = c("logical", "integer", "factor") + ), + "test_df", + conns), + prompt_user_class_decision = function(var, server, classes, newobj, datasources) "1" + ), + c("1", "1") + ) +}) + +test_that(".fix_classes sets the correct classes in serverside data frame", { + + expect_equal( + unname(unlist(ds.class("D$fac_col4"))), + c("numeric", "character", "factor") + ) + + expect_equal( + unname(unlist(ds.class("D$fac_col5"))), + c("logical", "integer", "factor") + ) + + expect_equal( + unname(unlist(ds.class("new_classes$fac_col4"))), + rep("factor", 3) + ) + + expect_equal( + unname(unlist(ds.class("new_classes$fac_col5"))), + rep("logical", 3) + ) + +}) + +test_that(".get_unique_cols extracts unique names from a list", { + expect_equal( + .get_unique_cols( + list( + server_1 = c("col_1", "col_2", "col_3"), + server_1 = c("col_1", "col_2", "col_4"), + server_1 = c("col_2", "col_3", "col_3", "col_5") + ) + ), + c("col_1", "col_2", "col_3", "col_4", "col_5") + ) +}) + +test_that(".add_missing_cols_to_df correctly creates missing columns", { + + new_cols <- c("col11", "col12", "col13", "col14", "col15", "col16", "col17", "col18", "col19", + "col20", "fac_col1", "fac_col10", "fac_col2", "fac_col3", "fac_col4", "fac_col5", + "fac_col6", "fac_col7", "fac_col9") + + observed <- ds.colnames("with_new_cols") + + expected <- list( + sim1 = new_cols, + sim2 = new_cols, + sim3 = new_cols + ) + + expect_equal(observed, expected) +}) + +test_that(".get_added_cols correctly identifies newly added columns", { + + expect_equal( + added_cols, + list( + sim1 = c("col11", "col13", "col14", "col16", "col17", "col19", "col20", "fac_col10", "fac_col7"), + sim2 = c("col11", "col12", "col14", "col15", "col17", "col18", "col20", "fac_col6", "fac_col9"), + sim3 = c("col12", "col13", "col15", "col16", "col18", "col19", "fac_col10", "fac_col6", "fac_col7", "fac_col9") + ) + ) +}) + +test_that(".identify_factor_vars correctly identifies factor variables", { + + + + var_class_fact <- var_class |> dplyr::select(server: col18) + expect_equal( + names(fac_vars), + c("fac_col1", "fac_col2", "fac_col3", "fac_col6", "fac_col9") + ) +}) + +test_that(".get_factor_levels correctly identifies factor levels", { + expected <- list( + sim1 = list( + fac_col1 = c("High", "Low", "Medium"), + fac_col2 = c("Blue", "Green"), + fac_col3 = c("No", "Yes"), + fac_col6 = c("Bird", "Cat", "Dog"), + fac_col9 = c("False", "True") + ), + sim2 = list( + fac_col1 = c("High", "Low", "Medium"), + fac_col2 = c("Green", "Red"), + fac_col3 = c("No"), + fac_col6 = NULL, + fac_col9 = NULL + ), + sim3 = list( + fac_col1 = c("High", "Low", "Medium"), + fac_col2 = c("Blue"), + fac_col3 = c("Yes"), + fac_col6 = NULL, + fac_col9 = NULL + ) + ) + + expect_equal(fac_levels, expected) +}) + +test_that(".identify_level_conflicts correctly factor columns with different levels", { + expect_equal( + .identify_level_conflicts(fac_levels), + c("fac_col2", "fac_col3", "fac_col6", "fac_col9") + ) + +}) + +test_that("ask_question_wait_response_levels continues with valid response", { + expect_equal( + with_mocked_bindings( + suppressWarnings(ask_question_wait_response_levels("test variable", "test_obj", conns)), + readline = function() "1" + ), "1" + ) + + expect_equal( + with_mocked_bindings( + suppressWarnings(ask_question_wait_response_levels("test variable", "test_obj", conns)), + readline = function() "1" + ), "1" + ) + +}) + +test_that("ask_question_wait_response_levels aborts with response of 3", { + expect_error( + with_mocked_bindings( + suppressWarnings(ask_question_wait_response_levels("test variable", "test_obj", conns)), + readline = function() "3") + ) +}) + +test_that(".make_levels_message makes correct message", { + expect_snapshot(.make_levels_message(level_conflicts)) +}) + +test_that(".get_unique_levels extracts all possible levels", { + + expected <- list( + fac_col2 = c("Blue", "Green", "Red"), + fac_col3 = c("No", "Yes"), + fac_col6 = c("Bird", "Cat", "Dog"), + fac_col9 = c("False", "True") + ) + + expect_equal(unique_levs, expected) + +}) + +test_that(".set_factor_levels sets levels correctly", { + .set_factor_levels("with_new_cols", unique_levs, ds.test_env$connections) + + expect_equal( + ds.levels("with_new_cols$fac_col2") |> map(~.x[[1]]), + list( + sim1 = c("Blue", "Green", "Red"), + sim2 = c("Blue", "Green", "Red"), + sim3 = c("Blue", "Green", "Red") + ) + ) + + expect_equal( + ds.levels("with_new_cols$fac_col3") |> map(~.x[[1]]), + list( + sim1 = c("No", "Yes"), + sim2 = c("No", "Yes"), + sim3 = c("No", "Yes") + ) + ) + + expect_equal( + ds.levels("with_new_cols$fac_col6") |> map(~.x[[1]]), + list( + sim1 = c("Bird", "Cat", "Dog"), + sim2 = c("Bird", "Cat", "Dog"), + sim3 = c("Bird", "Cat", "Dog") + ) + ) + + expect_equal( + ds.levels("with_new_cols$fac_col9") |> map(~.x[[1]]), + list( + sim1 = c("False", "True"), + sim2 = c("False", "True"), + sim3 = c("False", "True") + ) + ) + +}) + +test_that(".print_var_recode_message prints the correct message", { + expect_snapshot(.print_var_recode_message(added_cols, "test_df")) +}) + +test_that(".print_class_recode_message prints the correct message", { + expect_snapshot( + .print_class_recode_message(class_decisions, different_classes, "test_df") + ) +}) + +test_that(".print_levels_recode_message prints the correct message", { + expect_snapshot( + .print_levels_recode_message(unique_levs, "test_df") + ) +}) + +test_that(".make_levels_recode_message prints the correct message", { + expect_equal( + .make_levels_recode_message(unique_levs), + list( + "fac_col2 --> Blue, Green, Red", + "fac_col3 --> No, Yes", + "fac_col6 --> Bird, Cat, Dog", + "fac_col9 --> False, True" + ) + ) +}) + +test_that(".print_out_messages prints the correct messages", { + expect_snapshot( + .print_out_messages( + added_cols, class_decisions, different_classes, unique_levs, level_conflicts, "1", "test_df" + ) + ) +}) + +test_that(".change_choice_to_string converts numeric class codes to strings correctly", { + expect_equal(.change_choice_to_string("1"), "factor") + expect_equal(.change_choice_to_string("2"), "integer") + expect_equal(.change_choice_to_string("3"), "numeric") + expect_equal(.change_choice_to_string("4"), "character") + expect_equal(.change_choice_to_string("5"), "logical") +}) + +test_that("ds.standardiseDf doesn't run if dataframes are identical", { + with_mocked_bindings( + ds.standardiseDf( + df = "D", + newobj = "test_fill" + ), + prompt_user_class_decision_all_vars = function(var, server, classes, newobj, datasources) "1", + ask_question_wait_response_levels = function(levels_conflict, newobj, datasources) "2" + ) + + expect_error( + ds.standardiseDf( + df = "test_fill", + newobj = "shouldn't_exist"), + "Columns are identical" + ) + }) + +test_that("ds.standardiseDf works when called directly and class conversion is factor", { + with_mocked_bindings( + ds.standardiseDf( + df = "D", + newobj = "test_fill" + ), + prompt_user_class_decision_all_vars = function(var, server, classes, newobj, datasources) "1", + ask_question_wait_response_levels = function(levels_conflict, newobj, datasources) "2" + ) + + expect_equal( + ds.class("test_fill$fac_col4")[[1]], + "factor" + ) +}) + +test_that("ds.standardiseDf returns warning when called directly and class conversion is integer", { + with_mocked_bindings( + ds.standardiseDf( + df = "D", + newobj = "test_fill" + ), + prompt_user_class_decision_all_vars = function(var, server, classes, newobj, datasources) c("2", "2"), + ask_question_wait_response_levels = function(levels_conflict, newobj, datasources) "2" + ) + + expect_equal( + ds.class("test_fill$fac_col4")[[1]], + "integer" + ) + + expect_equal( + ds.class("test_fill$fac_col5")[[1]], + "integer" + ) +}) + +test_that("ds.standardiseDf returns warning when called directly and class conversion is numeric", { + with_mocked_bindings( + ds.standardiseDf( + df = "D", + newobj = "test_fill" + ), + prompt_user_class_decision_all_vars = function(var, server, classes, newobj, datasources) c("3", "3"), + ask_question_wait_response_levels = function(levels_conflict, newobj, datasources) "2" + ) + + expect_equal( + ds.class("test_fill$fac_col4")[[1]], + "numeric" + ) + + expect_equal( + ds.class("test_fill$fac_col5")[[1]], + "numeric" + ) +}) + +test_that("ds.standardiseDf returns warning when called directly and class conversion is character", { + with_mocked_bindings( + ds.standardiseDf( + df = "D", + newobj = "test_fill" + ), + prompt_user_class_decision_all_vars = function(var, server, classes, newobj, datasources) c("4", "4"), + ask_question_wait_response_levels = function(levels_conflict, newobj, datasources) "2" + ) + + expect_equal( + ds.class("test_fill$fac_col4")[[1]], + "character" + ) + + expect_equal( + ds.class("test_fill$fac_col5")[[1]], + "character" + ) +}) + +test_that("ds.standardiseDf returns warning when called directly and class conversion is logical", { + with_mocked_bindings( + ds.standardiseDf( + df = "D", + newobj = "test_fill" + ), + prompt_user_class_decision_all_vars = function(var, server, classes, newobj, datasources) c("5", "5"), + ask_question_wait_response_levels = function(levels_conflict, newobj, datasources) "2" + ) + + expect_equal( + ds.class("test_fill$fac_col4")[[1]], + "logical" + ) + + expect_equal( + ds.class("test_fill$fac_col5")[[1]], + "logical" + ) +}) + +test_that("ds.standardiseDf changes levels if this option is selected", { + with_mocked_bindings( + ds.standardiseDf( + df = "D", + newobj = "test_fill" + ), + prompt_user_class_decision_all_vars = function(var, server, classes, newobj, datasources) c("1", "1"), + ask_question_wait_response_levels = function(levels_conflict, newobj, datasources) "1" + ) + + levels_2 <- ds.levels("test_fill$fac_col2") %>% map(~.$Levels) + levels_3 <- ds.levels("test_fill$fac_col3") %>% map(~.$Levels) + levels_4 <- ds.levels("test_fill$fac_col4") %>% map(~.$Levels) + levels_5 <- ds.levels("test_fill$fac_col5") %>% map(~.$Levels) + levels_6 <- ds.levels("test_fill$fac_col6") %>% map(~.$Levels) + levels_9 <- ds.levels("test_fill$fac_col9") %>% map(~.$Levels) + + expect_equal( + levels_2, + list( + sim1 = c("Blue", "Green", "Red"), + sim2 = c("Blue", "Green", "Red"), + sim3 = c("Blue", "Green", "Red") + ) + ) + + expect_equal( + levels_3, + list( + sim1 = c("No", "Yes"), + sim2 = c("No", "Yes"), + sim3 = c("No", "Yes") + ) + ) + + expect_equal( + levels_4, + list( + sim1 = c("1", "2", "3", "A", "B", "C"), + sim2 = c("1", "2", "3", "A", "B", "C"), + sim3 = c("1", "2", "3", "A", "B", "C") + ) + ) + + expect_equal( + levels_5, + list( + sim1 = c("1", "2", "3", "One", "Three", "Two"), + sim2 = c("1", "2", "3", "One", "Three", "Two"), + sim3 = c("1", "2", "3", "One", "Three", "Two") + ) + ) + + expect_equal( + levels_6, + list( + sim1 = c("Bird", "Cat", "Dog"), + sim2 = c("Bird", "Cat", "Dog"), + sim3 = c("Bird", "Cat", "Dog") + ) + ) + + expect_equal( + levels_9, + list( + sim1 = c("False", "True"), + sim2 = c("False", "True"), + sim3 = c("False", "True") + ) + ) + +}) + +test_that("ds.standardiseDf doesn't run if classes are not identical and fix_class is no", { + expect_error( + ds.standardiseDf( + df = "D", + newobj = "shouldnt_exist", + fix_class = "no" + ), + "Variables do not have the same class in all studies" + ) + + expect_equal( + ds.exists("shouldnt_exist")[[1]], + FALSE + ) +}) + +test_that("ds.standardiseDf doesn't run if levels are not identical and fix_class is no", { + expect_error( + with_mocked_bindings( + ds.standardiseDf( + df = "D", + newobj = "shouldnt_exist", + fix_levels = "no" + ), + prompt_user_class_decision_all_vars = function(var, server, classes, newobj, datasources) c("1", "1") + ), + "Factor variables do not have the same levels in all studies" + ) + + expect_equal( + ds.exists("shouldnt_exist")[[1]], + FALSE + ) +}) ## Add disclosure check levels From de99b8ffb43adee5e244968ce6a890d3db3d08d7 Mon Sep 17 00:00:00 2001 From: Tim Cadman <41470917+timcadman@users.noreply.github.com> Date: Tue, 7 Oct 2025 12:13:05 +0200 Subject: [PATCH 10/18] test: added snapshots --- tests/testthat/_snaps/smk-standardiseDf.md | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/tests/testthat/_snaps/smk-standardiseDf.md b/tests/testthat/_snaps/smk-standardiseDf.md index ac1558bd..4fa52eca 100644 --- a/tests/testthat/_snaps/smk-standardiseDf.md +++ b/tests/testthat/_snaps/smk-standardiseDf.md @@ -38,9 +38,9 @@ .print_var_recode_message(added_cols, "test_df") Message v The following variables have been added to test_df: - i server_1 --> col11 - i server_2 --> col11 - i server_3 --> col12 + i sim1 --> col11 + i sim2 --> col11 + i sim3 --> col12 # .print_class_recode_message prints the correct message @@ -70,9 +70,9 @@ level_conflicts, "1", "test_df") Message v The following variables have been added to test_df: - i server_1 --> col11 - i server_2 --> col11 - i server_3 --> col12 + i sim1 --> col11 + i sim2 --> col11 + i sim3 --> col12 v The following classes have been set for all datasources in test_df: i fac_col4 --> factor From 7c49bab4c9a353c034d60ebc71e6efffef14561a Mon Sep 17 00:00:00 2001 From: Tim Cadman <41470917+timcadman@users.noreply.github.com> Date: Tue, 7 Oct 2025 12:13:47 +0200 Subject: [PATCH 11/18] require dsbase --- tests/testthat/setup.R | 1 + 1 file changed, 1 insertion(+) diff --git a/tests/testthat/setup.R b/tests/testthat/setup.R index 4c55c6e7..1b0f9b8a 100644 --- a/tests/testthat/setup.R +++ b/tests/testthat/setup.R @@ -24,6 +24,7 @@ library(DescTools) library(DSOpal) library(DSMolgenisArmadillo) library(DSLite) +library(dsBase) source("dstest_functions/ds_expect_variables.R") source("perf_tests/perf_rate.R") From 02ef0aa4b9cecff43ad7abf59111d5146f011821 Mon Sep 17 00:00:00 2001 From: Tim Cadman <41470917+timcadman@users.noreply.github.com> Date: Tue, 7 Oct 2025 12:14:16 +0200 Subject: [PATCH 12/18] set minimum testthat edition as 3 --- DESCRIPTION | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/DESCRIPTION b/DESCRIPTION index b0779e7f..d5dc606a 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -67,6 +67,8 @@ Suggests: DescTools, DSOpal, DSMolgenisArmadillo, - DSLite + DSLite, + dsBase RoxygenNote: 7.3.2 Encoding: UTF-8 +Config/testthat/edition: 3 From 4c419b3c592839943ba48a04e0d419ba800dd762 Mon Sep 17 00:00:00 2001 From: Tim Cadman <41470917+timcadman@users.noreply.github.com> Date: Tue, 7 Oct 2025 12:14:36 +0200 Subject: [PATCH 13/18] test: define new dataset for standardiseDf testing --- .../init_studies_datasets.R | 213 +++++++++++------- 1 file changed, 127 insertions(+), 86 deletions(-) diff --git a/tests/testthat/connection_to_datasets/init_studies_datasets.R b/tests/testthat/connection_to_datasets/init_studies_datasets.R index 0639aac6..e7e412ab 100644 --- a/tests/testthat/connection_to_datasets/init_studies_datasets.R +++ b/tests/testthat/connection_to_datasets/init_studies_datasets.R @@ -1,85 +1,85 @@ init.studies.dataset.cnsim <- function(variables) { - if (ds.test_env$secure_login_details) + if (ds.test_env$secure_login_details) + { + if (ds.test_env$driver == "OpalDriver") { - if (ds.test_env$driver == "OpalDriver") - { - builder <- DSI::newDSLoginBuilder(.silent = TRUE) - builder$append(server = "sim1", url = ds.test_env$ip_address_1, user = ds.test_env$user_1, password = ds.test_env$password_1, table = "CNSIM.CNSIM1", options=ds.test_env$options_1) - builder$append(server = "sim2", url = ds.test_env$ip_address_2, user = ds.test_env$user_2, password = ds.test_env$password_2, table = "CNSIM.CNSIM2", options=ds.test_env$options_2) - builder$append(server = "sim3", url = ds.test_env$ip_address_3, user = ds.test_env$user_3, password = ds.test_env$password_3, table = "CNSIM.CNSIM3", options=ds.test_env$options_3) - ds.test_env$login.data <- builder$build() - } - else if (ds.test_env$driver == "ArmadilloDriver") - { - builder <- DSI::newDSLoginBuilder(.silent = TRUE) - builder$append(server = "sim1", url = ds.test_env$ip_address_1, user = ds.test_env$user_1, password = ds.test_env$password_1, table = "datashield/cnsim/CNSIM1", driver = ds.test_env$driver) - builder$append(server = "sim2", url = ds.test_env$ip_address_2, user = ds.test_env$user_2, password = ds.test_env$password_2, table = "datashield/cnsim/CNSIM2", driver = ds.test_env$driver) - builder$append(server = "sim3", url = ds.test_env$ip_address_3, user = ds.test_env$user_3, password = ds.test_env$password_3, table = "datashield/cnsim/CNSIM3", driver = ds.test_env$driver) - ds.test_env$login.data <- builder$build() - } - else - { - ds.test_env$login.data <- DSLite::setupCNSIMTest("dsBase", env = ds.test_env) - } - ds.test_env$stats.var <- variables + builder <- DSI::newDSLoginBuilder(.silent = TRUE) + builder$append(server = "sim1", url = ds.test_env$ip_address_1, user = ds.test_env$user_1, password = ds.test_env$password_1, table = "CNSIM.CNSIM1", options=ds.test_env$options_1) + builder$append(server = "sim2", url = ds.test_env$ip_address_2, user = ds.test_env$user_2, password = ds.test_env$password_2, table = "CNSIM.CNSIM2", options=ds.test_env$options_2) + builder$append(server = "sim3", url = ds.test_env$ip_address_3, user = ds.test_env$user_3, password = ds.test_env$password_3, table = "CNSIM.CNSIM3", options=ds.test_env$options_3) + ds.test_env$login.data <- builder$build() } + else if (ds.test_env$driver == "ArmadilloDriver") + { + builder <- DSI::newDSLoginBuilder(.silent = TRUE) + builder$append(server = "sim1", url = ds.test_env$ip_address_1, user = ds.test_env$user_1, password = ds.test_env$password_1, table = "datashield/cnsim/CNSIM1", driver = ds.test_env$driver) + builder$append(server = "sim2", url = ds.test_env$ip_address_2, user = ds.test_env$user_2, password = ds.test_env$password_2, table = "datashield/cnsim/CNSIM2", driver = ds.test_env$driver) + builder$append(server = "sim3", url = ds.test_env$ip_address_3, user = ds.test_env$user_3, password = ds.test_env$password_3, table = "datashield/cnsim/CNSIM3", driver = ds.test_env$driver) + ds.test_env$login.data <- builder$build() + } + else + { + ds.test_env$login.data <- DSLite::setupCNSIMTest("dsBase", env = ds.test_env) + } + ds.test_env$stats.var <- variables + } } init.studies.dataset.dasim <- function(variables) { - if (ds.test_env$secure_login_details) + if (ds.test_env$secure_login_details) + { + if (ds.test_env$driver == "OpalDriver") + { + builder <- DSI::newDSLoginBuilder(.silent = TRUE) + builder$append(server = "sim1", url = ds.test_env$ip_address_1, user = ds.test_env$user_1, password = ds.test_env$password_1, table = "DASIM.DASIM1", options=ds.test_env$options_1) + builder$append(server = "sim2", url = ds.test_env$ip_address_2, user = ds.test_env$user_2, password = ds.test_env$password_2, table = "DASIM.DASIM2", options=ds.test_env$options_2) + builder$append(server = "sim3", url = ds.test_env$ip_address_3, user = ds.test_env$user_3, password = ds.test_env$password_3, table = "DASIM.DASIM3", options=ds.test_env$options_3) + ds.test_env$login.data <- builder$build() + } + else if (ds.test_env$driver == "ArmadilloDriver") + { + builder <- DSI::newDSLoginBuilder(.silent = TRUE) + builder$append(server = "sim1", url = ds.test_env$ip_address_1, user = ds.test_env$user_1, password = ds.test_env$password_1, table = "datashield/dasim/DASIM1", driver = ds.test_env$driver) + builder$append(server = "sim2", url = ds.test_env$ip_address_2, user = ds.test_env$user_2, password = ds.test_env$password_2, table = "datashield/dasim/DASIM2", driver = ds.test_env$driver) + builder$append(server = "sim3", url = ds.test_env$ip_address_3, user = ds.test_env$user_3, password = ds.test_env$password_3, table = "datashield/dasim/DASIM3", driver = ds.test_env$driver) + ds.test_env$login.data <- builder$build() + } + else { - if (ds.test_env$driver == "OpalDriver") - { - builder <- DSI::newDSLoginBuilder(.silent = TRUE) - builder$append(server = "sim1", url = ds.test_env$ip_address_1, user = ds.test_env$user_1, password = ds.test_env$password_1, table = "DASIM.DASIM1", options=ds.test_env$options_1) - builder$append(server = "sim2", url = ds.test_env$ip_address_2, user = ds.test_env$user_2, password = ds.test_env$password_2, table = "DASIM.DASIM2", options=ds.test_env$options_2) - builder$append(server = "sim3", url = ds.test_env$ip_address_3, user = ds.test_env$user_3, password = ds.test_env$password_3, table = "DASIM.DASIM3", options=ds.test_env$options_3) - ds.test_env$login.data <- builder$build() - } - else if (ds.test_env$driver == "ArmadilloDriver") - { - builder <- DSI::newDSLoginBuilder(.silent = TRUE) - builder$append(server = "sim1", url = ds.test_env$ip_address_1, user = ds.test_env$user_1, password = ds.test_env$password_1, table = "datashield/dasim/DASIM1", driver = ds.test_env$driver) - builder$append(server = "sim2", url = ds.test_env$ip_address_2, user = ds.test_env$user_2, password = ds.test_env$password_2, table = "datashield/dasim/DASIM2", driver = ds.test_env$driver) - builder$append(server = "sim3", url = ds.test_env$ip_address_3, user = ds.test_env$user_3, password = ds.test_env$password_3, table = "datashield/dasim/DASIM3", driver = ds.test_env$driver) - ds.test_env$login.data <- builder$build() - } - else - { - ds.test_env$login.data <- DSLite::setupDASIMTest("dsBase", env = ds.test_env) - } - ds.test_env$stats.var <- variables + ds.test_env$login.data <- DSLite::setupDASIMTest("dsBase", env = ds.test_env) } + ds.test_env$stats.var <- variables + } } init.studies.dataset.survival <- function(variables) { - if (ds.test_env$secure_login_details) + if (ds.test_env$secure_login_details) + { + if (ds.test_env$driver == "OpalDriver") { - if (ds.test_env$driver == "OpalDriver") - { - builder <- DSI::newDSLoginBuilder(.silent = TRUE) - builder$append(server = "survival1", url = ds.test_env$ip_address_1, user = ds.test_env$user_1, password = ds.test_env$password_1, table = "SURVIVAL.EXPAND_WITH_MISSING1", options=ds.test_env$options_1) - builder$append(server = "survival2", url = ds.test_env$ip_address_2, user = ds.test_env$user_2, password = ds.test_env$password_2, table = "SURVIVAL.EXPAND_WITH_MISSING2", options=ds.test_env$options_2) - builder$append(server = "survival3", url = ds.test_env$ip_address_3, user = ds.test_env$user_3, password = ds.test_env$password_3, table = "SURVIVAL.EXPAND_WITH_MISSING3", options=ds.test_env$options_3) - ds.test_env$login.data <- builder$build() - } - else if (ds.test_env$driver == "ArmadilloDriver") - { - builder <- DSI::newDSLoginBuilder(.silent = TRUE) - builder$append(server = "survival1", url = ds.test_env$ip_address_1, user = ds.test_env$user_1, password = ds.test_env$password_1, table = "datashield/survival/EXPAND_WITH_MISSING1", driver = ds.test_env$driver) - builder$append(server = "survival2", url = ds.test_env$ip_address_2, user = ds.test_env$user_2, password = ds.test_env$password_2, table = "datashield/survival/EXPAND_WITH_MISSING2", driver = ds.test_env$driver) - builder$append(server = "survival3", url = ds.test_env$ip_address_3, user = ds.test_env$user_3, password = ds.test_env$password_3, table = "datashield/survival/EXPAND_WITH_MISSING3", driver = ds.test_env$driver) - ds.test_env$login.data <- builder$build() - } - else - { - ds.test_env$login.data <- DSLite::setupSURVIVALTest("dsBase", env = ds.test_env) - } - ds.test_env$stats.var <- variables + builder <- DSI::newDSLoginBuilder(.silent = TRUE) + builder$append(server = "survival1", url = ds.test_env$ip_address_1, user = ds.test_env$user_1, password = ds.test_env$password_1, table = "SURVIVAL.EXPAND_WITH_MISSING1", options=ds.test_env$options_1) + builder$append(server = "survival2", url = ds.test_env$ip_address_2, user = ds.test_env$user_2, password = ds.test_env$password_2, table = "SURVIVAL.EXPAND_WITH_MISSING2", options=ds.test_env$options_2) + builder$append(server = "survival3", url = ds.test_env$ip_address_3, user = ds.test_env$user_3, password = ds.test_env$password_3, table = "SURVIVAL.EXPAND_WITH_MISSING3", options=ds.test_env$options_3) + ds.test_env$login.data <- builder$build() + } + else if (ds.test_env$driver == "ArmadilloDriver") + { + builder <- DSI::newDSLoginBuilder(.silent = TRUE) + builder$append(server = "survival1", url = ds.test_env$ip_address_1, user = ds.test_env$user_1, password = ds.test_env$password_1, table = "datashield/survival/EXPAND_WITH_MISSING1", driver = ds.test_env$driver) + builder$append(server = "survival2", url = ds.test_env$ip_address_2, user = ds.test_env$user_2, password = ds.test_env$password_2, table = "datashield/survival/EXPAND_WITH_MISSING2", driver = ds.test_env$driver) + builder$append(server = "survival3", url = ds.test_env$ip_address_3, user = ds.test_env$user_3, password = ds.test_env$password_3, table = "datashield/survival/EXPAND_WITH_MISSING3", driver = ds.test_env$driver) + ds.test_env$login.data <- builder$build() + } + else + { + ds.test_env$login.data <- DSLite::setupSURVIVALTest("dsBase", env = ds.test_env) } + ds.test_env$stats.var <- variables + } } init.studies.dataset.cluster.int <- function(variables) @@ -107,7 +107,7 @@ init.studies.dataset.cluster.int <- function(variables) builder$append(server = "cluster.int3", url = ds.test_env$ip_address_3, user = ds.test_env$user_3, password = ds.test_env$password_3, table = "datashield/cluster/CLUSTER_INT3", driver = ds.test_env$driver) ds.test_env$login.data <- builder$build() } - else + else { #to do #ds.test_env$login.data <- DSLite::setupCLUSTERTest("dsBase", env = ds.test_env) @@ -141,7 +141,7 @@ init.studies.dataset.cluster.slo <- function(variables) builder$append(server = "cluster.slo3", url = ds.test_env$ip_address_3, user = ds.test_env$user_3, password = ds.test_env$password_3, table = "datashield/cluster/CLUSTER_SLO3", driver = ds.test_env$driver) ds.test_env$login.data <- builder$build() } - else + else { #to do #ds.test_env$login.data <- DSLite::setupCLUSTERTest("dsBase", env = ds.test_env) @@ -176,7 +176,7 @@ init.studies.dataset.anthro <- function(variables) builder$append(server = "study3", url = ds.test_env$ip_address_3, user = ds.test_env$user_3, password = ds.test_env$password_3, table = "datashield/anthro/anthro3", driver = ds.test_env$driver) ds.test_env$login.data <- builder$build() } - else + else { #to do #ds.test_env$login.data <- DSLite::setupCLUSTERTest("dsBase", env = ds.test_env) @@ -211,39 +211,67 @@ init.studies.dataset.gamlss <- function(variables) builder$append(server = "study3", url = ds.test_env$ip_address_3, user = ds.test_env$user_3, password = ds.test_env$password_3, table = "datashield/gamlss/gamlss3", driver = ds.test_env$driver) ds.test_env$login.data <- builder$build() } - else + else { #to do #ds.test_env$login.data <- DSLite::setupCLUSTERTest("dsBase", env = ds.test_env) } ds.test_env$stats.var <- variables - + + } +} + +init.studies.dataset.stand <- function(variables) +{ + if (ds.test_env$secure_login_details) + { + if (ds.test_env$driver == "OpalDriver") + { + builder <- DSI::newDSLoginBuilder(.silent = TRUE) + builder$append(server = "sim1", url = ds.test_env$ip_address_1, user = ds.test_env$user_1, password = ds.test_env$password_1, table = "STANDARDISE.std_1", options=ds.test_env$options_1) + builder$append(server = "sim2", url = ds.test_env$ip_address_2, user = ds.test_env$user_2, password = ds.test_env$password_2, table = "STANDARDISE.std_1", options=ds.test_env$options_2) + builder$append(server = "sim3", url = ds.test_env$ip_address_3, user = ds.test_env$user_3, password = ds.test_env$password_3, table = "STANDARDISE.std_1", options=ds.test_env$options_3) + ds.test_env$login.data <- builder$build() + } + else if (ds.test_env$driver == "ArmadilloDriver") + { + builder <- DSI::newDSLoginBuilder(.silent = TRUE) + builder$append(server = "sim1", url = ds.test_env$ip_address_1, user = ds.test_env$user_1, password = ds.test_env$password_1, table = "datashield/standardise/std_1", driver = ds.test_env$driver) + builder$append(server = "sim2", url = ds.test_env$ip_address_2, user = ds.test_env$user_2, password = ds.test_env$password_2, table = "datashield/standardise/std_2", driver = ds.test_env$driver) + builder$append(server = "sim3", url = ds.test_env$ip_address_3, user = ds.test_env$user_3, password = ds.test_env$password_3, table = "datashield/standardise/std_3", driver = ds.test_env$driver) + ds.test_env$login.data <- builder$build() + } + else + { + ds.test_env$login.data <- DSLite::setupCNSIMTest("dsBase", env = ds.test_env) + } + ds.test_env$stats.var <- variables } } connect.studies.dataset.cnsim <- function(variables) { - log.out.data.server() - source("connection_to_datasets/login_details.R") - init.studies.dataset.cnsim(variables) - log.in.data.server() + log.out.data.server() + source("connection_to_datasets/login_details.R") + init.studies.dataset.cnsim(variables) + log.in.data.server() } connect.studies.dataset.dasim <- function(variables) { - log.out.data.server() - source("connection_to_datasets/login_details.R") - init.studies.dataset.dasim(variables) - log.in.data.server() + log.out.data.server() + source("connection_to_datasets/login_details.R") + init.studies.dataset.dasim(variables) + log.in.data.server() } connect.studies.dataset.survival <- function(variables) { - log.out.data.server() - source("connection_to_datasets/login_details.R") - init.studies.dataset.survival(variables) - log.in.data.server() + log.out.data.server() + source("connection_to_datasets/login_details.R") + init.studies.dataset.survival(variables) + log.in.data.server() } connect.studies.dataset.cluster.int <- function(variables) @@ -278,19 +306,27 @@ connect.studies.dataset.gamlss <- function(variables) log.in.data.server() } +connect.studies.dataset.stand <- function(variables) +{ + log.out.data.server() + source("connection_to_datasets/login_details.R") + init.studies.dataset.stand(variables) + log.in.data.server() +} + disconnect.studies.dataset.cnsim <- function() { - log.out.data.server() + log.out.data.server() } disconnect.studies.dataset.dasim <- function() { - log.out.data.server() + log.out.data.server() } disconnect.studies.dataset.survival <- function() { - log.out.data.server() + log.out.data.server() } disconnect.studies.dataset.cluster.int <- function() @@ -312,3 +348,8 @@ disconnect.studies.dataset.gamlss <- function() { log.out.data.server() } + +disconnect.studies.dataset.stand <- function() +{ + log.out.data.server() +} From b237582c56bc2bbc07a7f50b547cbe445601096a Mon Sep 17 00:00:00 2001 From: Tim Cadman <41470917+timcadman@users.noreply.github.com> Date: Tue, 7 Oct 2025 12:16:34 +0200 Subject: [PATCH 14/18] remove old helper file --- tests/testthat/helpers.R | 218 --------------------------------------- 1 file changed, 218 deletions(-) delete mode 100644 tests/testthat/helpers.R diff --git a/tests/testthat/helpers.R b/tests/testthat/helpers.R deleted file mode 100644 index 919df923..00000000 --- a/tests/testthat/helpers.R +++ /dev/null @@ -1,218 +0,0 @@ -#' Create a DSLite login object that can be used for testing -#' -#' @param assign_method A string specifying the name of the custom assign method to be added -#' to the DSLite server. If `NULL`, no additional assign method is added. Default is `NULL`. -#' @param aggregate_method A string specifying the name of the custom aggregate method to be -#' added to the DSLite server. If `NULL`, no additional aggregate method is added. Default is `NULL`. -#' @param tables A named list of tables to be made available on the DSLite server. Default is `NULL`. -#' -#' @return A DataSHIELD login object containing the necessary connection information for the DSLite server. -#' -#' @examples -#' \dontrun{ -#' # Prepare a DSLite server with default methods and custom assign/aggregate methods -#' login_data <- .prepare_dslite( -#' assign_method = "customAssign", -#' aggregate_method = "customAggregate", -#' tables = list(mtcars = mtcars, mtcars_group = mtcars_group) -#' ) -#' -#' @importFrom DSLite newDSLiteServer -#' @importFrom DSI newDSLoginBuilder -#' @export -.prepare_dslite <- function(assign_method = NULL, aggregate_method = NULL, tables = NULL) { - - options(datashield.env = environment()) - dslite.server <- DSLite::newDSLiteServer(tables = tables) - dslite.server$config(defaultDSConfiguration(include = c("dsBase", "dsTidyverse"))) - dslite.server$aggregateMethod("exists", "base::exists") - dslite.server$aggregateMethod("classDS", "dsBase::classDS") - dslite.server$aggregateMethod("lsDS", "dsBase::lsDS") - dslite.server$aggregateMethod("dsListDisclosureSettings", "dsTidyverse::dsListDisclosureSettings") - - if (!is.null(assign_method)) { - dslite.server$assignMethod(assign_method, paste0("dsTidyverse::", assign_method)) - } - - if (!is.null(aggregate_method)) { - dslite.server$aggregateMethod(assign_method, paste0("dsTidyverse::", assign_method)) - } - - builder <- DSI::newDSLoginBuilder() - builder$append(server = "server_1", url = "dslite.server", driver = "DSLiteDriver") - builder$append(server = "server_2", url = "dslite.server", driver = "DSLiteDriver") - builder$append(server = "server_3", url = "dslite.server", driver = "DSLiteDriver") - login_data <- builder$build() - return(login_data) -} - -#' Create a mixed dataframe with factor and other types of columns -#' -#' This function generates a dataframe with a specified number of rows, -#' factor columns, and other columns (integer, numeric, and string). -#' -#' @param n_rows Number of rows in the dataframe. Default is 10,000. -#' @param n_factor_cols Number of factor columns in the dataframe. Default is 15. -#' @param n_other_cols Number of other columns (integer, numeric, and string) in the dataframe. Default is 15. -#' -#' @return A dataframe with the specified number of rows and columns, containing mixed data types. -#' @importFrom dplyr bind_cols -#' @importFrom purrr map_dfc -#' @examples -#' df <- create_mixed_dataframe(n_rows = 100, n_factor_cols = 10, n_other_cols = 5) -create_mixed_dataframe <- function(n_rows = 10000, n_factor_cols = 15, n_other_cols = 15) { - - # Function to create a factor column with defined levels - create_factor_column <- function(levels, n = n_rows) { - set.seed(123) # Set seed before sample for reproducibility - factor(sample(levels, n, replace = TRUE)) - } - - # Define factor levels for different columns - factor_levels <- list( - c("Low", "Medium", "High"), - c("Red", "Green", "Blue"), - c("Yes", "No"), - c("A", "B", "C"), - c("One", "Two", "Three"), - c("Cat", "Dog", "Bird"), - c("Small", "Medium", "Large"), - c("Alpha", "Beta", "Gamma"), - c("True", "False"), - c("Left", "Right"), - c("North", "South", "East", "West"), - c("Day", "Night"), - c("Up", "Down"), - c("Male", "Female"), - c("Summer", "Winter", "Spring", "Fall") - ) - - # Create factor columns - factor_columns <- map_dfc(factor_levels[1:n_factor_cols], create_factor_column) - colnames(factor_columns) <- paste0("fac_col", 1:n_factor_cols) - - # Function to create other types of columns - create_other_column <- function(type, n = n_rows) { - set.seed(123) # Set seed before sample for reproducibility - switch(type, - "int" = sample(1:100, n, replace = TRUE), # Integer column - "num" = runif(n, 0, 100), # Numeric column - "str" = sample(letters, n, replace = TRUE), # Character column - "log" = sample(c(TRUE, FALSE), n, replace = TRUE) # Logical column - ) - } - - # Ensure that each data type is included - column_types <- c( - "int", "num", "str", "log", "int", - "num", "str", "log", "int", "num", - "str", "int", "num", "log", "str" - ) - - # Create other columns with specified types - other_columns <- map_dfc(column_types[1:n_other_cols], create_other_column) - colnames(other_columns) <- paste0("col", (n_factor_cols + 1):(n_factor_cols + n_other_cols)) - - # Combine factor and other columns into a single dataframe - df <- bind_cols(factor_columns, other_columns) - - return(df) -} - - -#' Modify factor levels for partial overlap -#' -#' This function takes two sets of factor levels, computes the common and unique levels, -#' and returns a new set of levels with partial overlap. -#' -#' @param levels1 First set of factor levels. -#' @param levels2 Second set of factor levels. -#' -#' @return A character vector of new factor levels with partial overlap. -#' @examples -#' new_levels <- partial_overlap_levels(c("A", "B", "C"), c("B", "C", "D")) -partial_overlap_levels <- function(levels1, levels2) { - common <- intersect(levels1, levels2) - unique1 <- setdiff(levels1, common) - unique2 <- setdiff(levels2, common) - - # Set seed before each sample call - set.seed(123) - sampled_unique1 <- sample(unique1, length(unique1) * 0.5) - - set.seed(123) - sampled_unique2 <- sample(unique2, length(unique2) * 0.5) - - new_levels <- c(common, sampled_unique1, sampled_unique2) - return(new_levels) -} - - -#' Create additional dataframes with specific conditions -#' -#' This function generates additional dataframes based on an input dataframe, modifying column classes and levels, -#' and adding new columns with unique names. Different seeds are used for each iteration of the loop, -#' ensuring reproducibility of the generated dataframes. -#' -#' @param base_df The base dataframe used to create the additional dataframes. -#' @param n_rows Number of rows in the additional dataframes. Default is 10,000. -#' @param df_names Names of the additional dataframes to be created. Default is c("df1", "df2", "df3"). -#' -#' @return A list of dataframes with the specified modifications. -#' @importFrom dplyr bind_cols -#' @examples -#' base_df <- create_mixed_dataframe(n_rows = 100, n_factor_cols = 10, n_other_cols = 5) -#' additional_dfs <- create_additional_dataframes(base_df, n_rows = 1000, df_names = c("df1", "df2")) -create_additional_dataframes <- function(base_df, n_rows = 10000, df_names = c("df1", "df2", "df3")) { - - # Define a fixed sequence of seeds, one for each dataframe to be created - seeds <- c(123, 456, 789, 101112) - - df_list <- list() - - for (i in seq_along(df_names)) { - # Set the seed for this iteration based on the pre-defined seeds - set.seed(seeds[i]) - - # Proceed with the dataframe generation process - overlap_cols <- sample(colnames(base_df), size = round(0.8 * ncol(base_df))) - df <- base_df - cols_to_modify_class <- sample(overlap_cols, size = round(0.2 * length(overlap_cols))) - - # Modify columns to have different data types - for (col in cols_to_modify_class) { - current_class <- class(df[[col]]) - new_class <- switch(current_class, - "factor" = as.character(df[[col]]), - "character" = as.factor(df[[col]]), - "numeric" = as.integer(df[[col]]), - "integer" = as.numeric(df[[col]]), - df[[col]]) - df[[col]] <- new_class - } - - # Modify factor levels for partial overlap - factor_cols <- colnames(base_df)[sapply(base_df, is.factor)] - overlap_factor_cols <- intersect(overlap_cols, factor_cols) - cols_to_modify_levels <- sample(overlap_factor_cols, size = round(0.5 * length(overlap_factor_cols))) - - for (col in cols_to_modify_levels) { - original_levels <- levels(base_df[[col]]) - new_levels <- partial_overlap_levels(original_levels, original_levels) - df[[col]] <- factor(df[[col]], levels = new_levels) - } - - # Create new random columns for each dataframe (these will vary by seed) - set.seed(seeds[i]) # Set the seed again for generating new columns - n_new_cols <- round(0.2 * ncol(base_df)) - new_col_names <- paste0(df_names[i], "_new_col_", 1:n_new_cols) - new_cols <- data.frame(matrix(runif(n_rows * n_new_cols), ncol = n_new_cols)) - colnames(new_cols) <- new_col_names - - # Bind new columns to the dataframe - df <- bind_cols(df, new_cols) - df_list[[df_names[i]]] <- df - } - - return(df_list) -} From ea62922988afa61df72fac6f74e50fad7cf1d1d6 Mon Sep 17 00:00:00 2001 From: Tim Cadman <41470917+timcadman@users.noreply.github.com> Date: Tue, 7 Oct 2025 12:16:45 +0200 Subject: [PATCH 15/18] add test data --- .../testthat/data_files/STANDARDISE/std_1.csv | 101 ++++++++++++++++++ .../testthat/data_files/STANDARDISE/std_1.rda | Bin 0 -> 1474 bytes .../testthat/data_files/STANDARDISE/std_2.csv | 101 ++++++++++++++++++ .../testthat/data_files/STANDARDISE/std_2.rda | Bin 0 -> 1706 bytes .../testthat/data_files/STANDARDISE/std_3.csv | 101 ++++++++++++++++++ .../testthat/data_files/STANDARDISE/std_3.rda | Bin 0 -> 1597 bytes 6 files changed, 303 insertions(+) create mode 100644 tests/testthat/data_files/STANDARDISE/std_1.csv create mode 100644 tests/testthat/data_files/STANDARDISE/std_1.rda create mode 100644 tests/testthat/data_files/STANDARDISE/std_2.csv create mode 100644 tests/testthat/data_files/STANDARDISE/std_2.rda create mode 100644 tests/testthat/data_files/STANDARDISE/std_3.csv create mode 100644 tests/testthat/data_files/STANDARDISE/std_3.rda diff --git a/tests/testthat/data_files/STANDARDISE/std_1.csv b/tests/testthat/data_files/STANDARDISE/std_1.csv new file mode 100644 index 00000000..32890959 --- /dev/null +++ b/tests/testthat/data_files/STANDARDISE/std_1.csv @@ -0,0 +1,101 @@ +fac_col1,fac_col2,fac_col3,fac_col4,fac_col5,fac_col6,fac_col9,col12,col15,col18 +High,Blue,Yes,3,NA,Bird,True,28.757752012461424,31,TRUE +High,Blue,Yes,3,NA,Bird,True,78.83051354438066,79,TRUE +High,Blue,Yes,3,NA,Bird,True,40.89769218116999,51,TRUE +Medium,Green,No,2,NA,Dog,False,88.301740400493145,14,FALSE +High,Blue,Yes,3,NA,Bird,True,94.04672842938453,67,TRUE +Medium,Green,No,2,NA,Dog,False,4.555649938993156,42,FALSE +Medium,Green,No,2,NA,Dog,False,52.810548804700375,50,FALSE +Medium,Green,No,2,NA,Dog,False,89.2419044394046,43,FALSE +High,Blue,Yes,3,NA,Bird,True,55.14350144658238,14,TRUE +Low,NA,Yes,1,NA,Cat,True,45.661473530344665,25,TRUE +Medium,Green,No,2,NA,Dog,False,95.68333453498781,90,FALSE +Medium,Green,No,2,NA,Dog,False,45.33341561909765,91,FALSE +Low,NA,No,1,NA,Cat,False,67.75706354528666,69,FALSE +Medium,Green,Yes,2,NA,Dog,True,57.26334019564092,91,TRUE +High,Blue,No,3,NA,Bird,False,10.292468266561627,57,FALSE +Low,NA,Yes,1,NA,Cat,True,89.98249704018235,92,TRUE +High,Blue,No,3,NA,Bird,False,24.60877343546599,9,FALSE +High,Blue,Yes,3,NA,Bird,True,4.205953353084624,93,TRUE +Low,NA,Yes,1,NA,Cat,True,32.79207192827016,99,TRUE +Low,NA,Yes,1,NA,Cat,True,95.45036491472274,72,TRUE +Low,NA,Yes,1,NA,Cat,True,88.95393160637468,26,TRUE +Low,NA,No,1,NA,Cat,False,69.28034061565995,7,FALSE +High,Blue,Yes,3,NA,Bird,True,64.05068137682974,42,TRUE +Medium,Green,Yes,2,NA,Dog,True,99.42697766236961,9,TRUE +High,Blue,Yes,3,NA,Bird,True,65.57057991158217,83,TRUE +Medium,Green,Yes,2,NA,Dog,True,70.85304681677371,36,TRUE +Low,NA,No,1,NA,Cat,False,54.40660247113556,78,FALSE +Medium,Green,No,2,NA,Dog,False,59.41420204471797,81,FALSE +High,Blue,Yes,3,NA,Bird,True,28.91597372945398,43,TRUE +Medium,Green,No,2,NA,Dog,False,14.711364731192589,76,FALSE +Low,NA,Yes,1,NA,Cat,True,96.30242325365543,15,TRUE +High,Blue,No,3,NA,Bird,False,90.22990451194346,32,FALSE +High,Blue,Yes,3,NA,Bird,True,69.07052784226835,7,TRUE +Low,NA,No,1,NA,Cat,False,79.54674176871777,9,FALSE +High,Blue,No,3,NA,Bird,False,2.461368450894952,41,FALSE +Medium,Green,Yes,2,NA,Dog,True,47.77959710918367,74,TRUE +Low,NA,Yes,1,NA,Cat,True,75.84595375228673,23,TRUE +High,Blue,Yes,3,NA,Bird,True,21.640793583355844,27,TRUE +Low,NA,Yes,1,NA,Cat,True,31.818100763484836,60,TRUE +Low,NA,No,1,NA,Cat,False,23.16257853526622,53,FALSE +Medium,Green,Yes,2,NA,Dog,True,14.280002238228917,7,TRUE +High,Blue,No,3,NA,Bird,False,41.45463358145207,53,FALSE +High,Blue,No,3,NA,Bird,False,41.372432629577816,27,FALSE +Low,NA,Yes,1,NA,Cat,True,36.884545092470944,96,TRUE +High,Blue,Yes,3,NA,Bird,True,15.244474774226546,38,TRUE +Low,NA,Yes,1,NA,Cat,True,13.880606344901025,89,TRUE +High,Blue,Yes,3,NA,Bird,True,23.303409945219755,34,TRUE +Medium,Green,No,2,NA,Dog,False,46.59624502528459,93,FALSE +Low,NA,Yes,1,NA,Cat,True,26.597264036536217,69,TRUE +Medium,Green,Yes,2,NA,Dog,True,85.78277153428644,72,TRUE +Low,NA,No,1,NA,Cat,False,4.583116667345166,76,FALSE +Low,NA,Yes,1,NA,Cat,True,44.220007420517504,63,TRUE +High,Blue,Yes,3,NA,Bird,True,79.89248456433415,13,TRUE +Low,NA,Yes,1,NA,Cat,True,12.189925997518003,82,TRUE +Medium,Green,Yes,2,NA,Dog,True,56.094798375852406,97,TRUE +Low,NA,No,1,NA,Cat,False,20.65313896164298,91,FALSE +Low,NA,No,1,NA,Cat,False,12.753165024332702,25,FALSE +High,Blue,Yes,3,NA,Bird,True,75.33078643027693,38,TRUE +Low,NA,No,1,NA,Cat,False,89.50453591533005,21,FALSE +Medium,Green,Yes,2,NA,Dog,True,37.44627758860588,79,TRUE +Low,NA,Yes,1,NA,Cat,True,66.51151946280152,41,TRUE +High,Blue,No,3,NA,Bird,False,9.484066092409194,47,FALSE +Low,NA,No,1,NA,Cat,False,38.39696377981454,90,FALSE +High,Blue,Yes,3,NA,Bird,True,27.43836445733905,60,TRUE +Medium,Green,Yes,2,NA,Dog,True,81.46400388795882,95,TRUE +High,Blue,No,3,NA,Bird,False,44.851634139195085,16,FALSE +Medium,Green,Yes,2,NA,Dog,True,81.00643530488014,94,TRUE +Medium,Green,Yes,2,NA,Dog,True,81.23895095195621,6,TRUE +High,Blue,Yes,3,NA,Bird,True,79.43423211108893,72,TRUE +Medium,Green,Yes,2,NA,Dog,True,43.983168760314584,86,TRUE +Medium,Green,No,2,NA,Dog,False,75.44751586392522,86,FALSE +High,Blue,Yes,3,NA,Bird,True,62.922113155946136,39,TRUE +High,Blue,Yes,3,NA,Bird,True,71.01824013516307,31,TRUE +Low,NA,Yes,1,NA,Cat,True,0.062477332539856434,81,TRUE +Medium,Green,Yes,2,NA,Dog,True,47.53165740985423,50,TRUE +Medium,Green,No,2,NA,Dog,False,22.011888516135514,34,FALSE +Low,NA,No,1,NA,Cat,False,37.98165377229452,4,FALSE +Medium,Green,Yes,2,NA,Dog,True,61.277100327424705,13,TRUE +Low,NA,No,1,NA,Cat,False,35.179790924303234,69,FALSE +Low,NA,No,1,NA,Cat,False,11.113542434759438,25,FALSE +Medium,Green,No,2,NA,Dog,False,24.361947271972895,52,FALSE +High,Blue,No,3,NA,Bird,False,66.80555874481797,22,FALSE +High,Blue,Yes,3,NA,Bird,True,41.764677967876196,89,TRUE +Low,NA,No,1,NA,Cat,False,78.81958340294659,32,FALSE +Medium,Green,No,2,NA,Dog,False,10.286464425735176,25,FALSE +Low,NA,No,1,NA,Cat,False,43.489274149760604,87,FALSE +Medium,Green,Yes,2,NA,Dog,True,98.49569799844176,35,TRUE +Low,NA,Yes,1,NA,Cat,True,89.30511143989861,40,TRUE +High,Blue,No,3,NA,Bird,False,88.64690607879311,30,FALSE +High,Blue,Yes,3,NA,Bird,True,17.505265027284622,12,TRUE +Medium,Green,No,2,NA,Dog,False,13.069569156505167,31,FALSE +High,Blue,No,3,NA,Bird,False,65.31019250396639,30,FALSE +Low,NA,Yes,1,NA,Cat,True,34.3516472261399,64,TRUE +Medium,Green,No,2,NA,Dog,False,65.67581279668957,99,FALSE +Medium,Green,No,2,NA,Dog,False,32.03732424881309,14,FALSE +High,Blue,Yes,3,NA,Bird,True,18.769111926667392,93,TRUE +Medium,Green,Yes,2,NA,Dog,True,78.22943013161421,96,TRUE +Low,NA,No,1,NA,Cat,False,9.359498671256006,71,FALSE +High,Blue,Yes,3,NA,Bird,True,46.677904156968,67,TRUE +High,Blue,Yes,3,NA,Bird,True,51.15054599009454,23,TRUE diff --git a/tests/testthat/data_files/STANDARDISE/std_1.rda b/tests/testthat/data_files/STANDARDISE/std_1.rda new file mode 100644 index 0000000000000000000000000000000000000000..df5a8854fcbee0ae5fbf212f51ae0b47675348b7 GIT binary patch literal 1474 zcmV;z1wHy7iwFP!000001MOIQP*hbIKipm3U`$LYNs22Y0zvUuK!I3b0YhHP0s?`! zunSTPYYRw?ql3m&HVzr|Aasx>CoMFc3Pz0~a0tq=DIG0Jr=o{C_EM)wE0Ml$_guOh z?%f6c$v@ngkKg&e^PTT`&b^15t1gUiD|91-$cS9d5t$%&oOcb!B|MFJx+voVC)Fx9J9uOnPcs8%)Bib zYeSebJC@2{Dn3?+tt}_wOQ1KUXHG`LMuV{qMqbtyTRXX`x@tWg%Q8%jymF>NU)@l{ zm~gBx>gsq4lXfe074;@FjBZKn1hxwnzKK|V7G5ysaDz@uSEMXb2VG1iwbs@HVSoej zG7sCUaZC=8!J^g9%uzn=$*|pE`lI!QF zXD<%PKHHPevj4NLpB+2)_N`Q0Gu;=&a_=GfCNlL6b%*|6x^ov5jqx_Qi%!0tCU2if z)n+~af=bra^Fy_%>dsZfhEFkjxBFGZF;!=*$i->ssUqmjkXy9y-xpRLb=LNzWXxT zIe&Vj6!qdgqXPgnnyWrTXixTV>3x8O=d>M*kk>cscHw+)yXGLq6Y0LAA272=-uen) z+~v)QzJMXd=!qs==Usf!3+=Y_4nBtK)K5Pfh53I!ehTxF9Ui-?0!;hilP`7v=Kkrw zPzk8q>+Vqkm~hJE3IDXKq60fWj0Ak(g_3%-r#==|djauK_qPRrNl(NK!;4H6x;gkH z+6_Mb&DkTkF7rSt@@W~G!4rTh+b^sh0L(Wp_;m#9obS8c59{X}7|CH=e=TZ!32?bC z{%`C@hKHgU@mS~78m{mUy9PX;J|MCyl@^CeG9b&S`=CfwF)!)Fwu77my#-MZG|vbfEb_3xOb7qNMXmDq9Bx zr;!u^DX8BR>KAP$(7bYibTm#My5=65C;EAi=7T=U=+k5^K1ryp0!jh2J{npN^eLx4 zQmAd%4dh-LlOL6>0a^;Q90*P`@t}5atfIA=N8`tw(rDc=cQ-QqIzj|U1iYAxMKL?9 zOlmFbODY~)erD=Z%-&pcAU0&SaUXVLU5$bNuH{CBF7O&Et1ubE7(SfgBN#rC z;gt*@#qcX8KdynVa3P}eY2 c=+I5CtJj5An!y&azH@K=3;o3R#|st!04bEq-v9sr literal 0 HcmV?d00001 diff --git a/tests/testthat/data_files/STANDARDISE/std_2.csv b/tests/testthat/data_files/STANDARDISE/std_2.csv new file mode 100644 index 00000000..9c20866c --- /dev/null +++ b/tests/testthat/data_files/STANDARDISE/std_2.csv @@ -0,0 +1,101 @@ +fac_col1,fac_col2,fac_col3,fac_col4,fac_col5,fac_col7,fac_col10,col13,col16,col19 +High,NA,NA,C,2,Large,Left,o,28.757752012461424,31 +High,NA,NA,C,2,Large,Left,s,78.83051354438066,79 +High,NA,NA,C,2,Large,Left,n,40.89769218116999,51 +Medium,Green,No,B,3,Medium,Right,c,88.301740400493145,14 +High,NA,NA,C,2,Large,Left,j,94.04672842938453,67 +Medium,Green,No,B,3,Medium,Right,r,4.555649938993156,42 +Medium,Green,No,B,3,Medium,Right,v,52.810548804700375,50 +Medium,Green,No,B,3,Medium,Right,k,89.2419044394046,43 +High,NA,NA,C,2,Large,Left,e,55.14350144658238,14 +Low,Red,NA,A,1,Small,Left,t,45.661473530344665,25 +Medium,Green,No,B,3,Medium,Right,n,95.68333453498781,90 +Medium,Green,No,B,3,Medium,Right,v,45.33341561909765,91 +Low,Red,No,A,1,Small,Right,y,67.75706354528666,69 +Medium,Green,NA,B,3,Medium,Left,z,57.26334019564092,91 +High,NA,No,C,2,Large,Right,e,10.292468266561627,57 +Low,Red,NA,A,1,Small,Left,s,89.98249704018235,92 +High,NA,No,C,2,Large,Right,y,24.60877343546599,9 +High,NA,NA,C,2,Large,Left,y,4.205953353084624,93 +Low,Red,NA,A,1,Small,Left,i,32.79207192827016,99 +Low,Red,NA,A,1,Small,Left,c,95.45036491472274,72 +Low,Red,NA,A,1,Small,Left,h,88.95393160637468,26 +Low,Red,No,A,1,Small,Right,z,69.28034061565995,7 +High,NA,NA,C,2,Large,Left,g,64.05068137682974,42 +Medium,Green,NA,B,3,Medium,Left,j,99.42697766236961,9 +High,NA,NA,C,2,Large,Left,i,65.57057991158217,83 +Medium,Green,NA,B,3,Medium,Left,s,70.85304681677371,36 +Low,Red,No,A,1,Small,Right,d,54.40660247113556,78 +Medium,Green,No,B,3,Medium,Right,n,59.41420204471797,81 +High,NA,NA,C,2,Large,Left,q,28.91597372945398,43 +Medium,Green,No,B,3,Medium,Right,k,14.711364731192589,76 +Low,Red,NA,A,1,Small,Left,g,96.30242325365543,15 +High,NA,No,C,2,Large,Right,u,90.22990451194346,32 +High,NA,NA,C,2,Large,Left,l,69.07052784226835,7 +Low,Red,No,A,1,Small,Right,o,79.54674176871777,9 +High,NA,No,C,2,Large,Right,j,2.461368450894952,41 +Medium,Green,NA,B,3,Medium,Left,m,47.77959710918367,74 +Low,Red,NA,A,1,Small,Left,g,75.84595375228673,23 +High,NA,NA,C,2,Large,Left,i,21.640793583355844,27 +Low,Red,NA,A,1,Small,Left,i,31.818100763484836,60 +Low,Red,No,A,1,Small,Right,j,23.16257853526622,53 +Medium,Green,NA,B,3,Medium,Left,w,14.280002238228917,7 +High,NA,No,C,2,Large,Right,u,41.45463358145207,53 +High,NA,No,C,2,Large,Right,g,41.372432629577816,27 +Low,Red,NA,A,1,Small,Left,u,36.884545092470944,96 +High,NA,NA,C,2,Large,Left,f,15.244474774226546,38 +Low,Red,NA,A,1,Small,Left,y,13.880606344901025,89 +High,NA,NA,C,2,Large,Left,b,23.303409945219755,34 +Medium,Green,No,B,3,Medium,Right,e,46.59624502528459,93 +Low,Red,NA,A,1,Small,Left,h,26.597264036536217,69 +Medium,Green,NA,B,3,Medium,Left,l,85.78277153428644,72 +Low,Red,No,A,1,Small,Right,m,4.583116667345166,76 +Low,Red,NA,A,1,Small,Left,r,44.220007420517504,63 +High,NA,NA,C,2,Large,Left,a,79.89248456433415,13 +Low,Red,NA,A,1,Small,Left,y,12.189925997518003,82 +Medium,Green,NA,B,3,Medium,Left,y,56.094798375852406,97 +Low,Red,No,A,1,Small,Right,f,20.65313896164298,91 +Low,Red,No,A,1,Small,Right,u,12.753165024332702,25 +High,NA,NA,C,2,Large,Left,o,75.33078643027693,38 +Low,Red,No,A,1,Small,Right,i,89.50453591533005,21 +Medium,Green,NA,B,3,Medium,Left,o,37.44627758860588,79 +Low,Red,NA,A,1,Small,Left,z,66.51151946280152,41 +High,NA,No,C,2,Large,Right,p,9.484066092409194,47 +Low,Red,No,A,1,Small,Right,t,38.39696377981454,90 +High,NA,NA,C,2,Large,Left,f,27.43836445733905,60 +Medium,Green,NA,B,3,Medium,Left,k,81.46400388795882,95 +High,NA,No,C,2,Large,Right,h,44.851634139195085,16 +Medium,Green,NA,B,3,Medium,Left,v,81.00643530488014,94 +Medium,Green,NA,B,3,Medium,Left,v,81.23895095195621,6 +High,NA,NA,C,2,Large,Left,g,79.43423211108893,72 +Medium,Green,NA,B,3,Medium,Left,p,43.983168760314584,86 +Medium,Green,No,B,3,Medium,Right,q,75.44751586392522,86 +High,NA,NA,C,2,Large,Left,v,62.922113155946136,39 +High,NA,NA,C,2,Large,Left,r,71.01824013516307,31 +Low,Red,NA,A,1,Small,Left,q,0.062477332539856434,81 +Medium,Green,NA,B,3,Medium,Left,b,47.53165740985423,50 +Medium,Green,No,B,3,Medium,Right,d,22.011888516135514,34 +Low,Red,No,A,1,Small,Right,m,37.98165377229452,4 +Medium,Green,NA,B,3,Medium,Left,e,61.277100327424705,13 +Low,Red,No,A,1,Small,Right,v,35.179790924303234,69 +Low,Red,No,A,1,Small,Right,s,11.113542434759438,25 +Medium,Green,No,B,3,Medium,Right,y,24.361947271972895,52 +High,NA,No,C,2,Large,Right,t,66.80555874481797,22 +High,NA,NA,C,2,Large,Left,v,41.764677967876196,89 +Low,Red,No,A,1,Small,Right,y,78.81958340294659,32 +Medium,Green,No,B,3,Medium,Right,n,10.286464425735176,25 +Low,Red,No,A,1,Small,Right,y,43.489274149760604,87 +Medium,Green,NA,B,3,Medium,Left,w,98.49569799844176,35 +Low,Red,NA,A,1,Small,Left,c,89.30511143989861,40 +High,NA,No,C,2,Large,Right,h,88.64690607879311,30 +High,NA,NA,C,2,Large,Left,p,17.505265027284622,12 +Medium,Green,No,B,3,Medium,Right,l,13.069569156505167,31 +High,NA,No,C,2,Large,Right,y,65.31019250396639,30 +Low,Red,NA,A,1,Small,Left,n,34.3516472261399,64 +Medium,Green,No,B,3,Medium,Right,c,65.67581279668957,99 +Medium,Green,No,B,3,Medium,Right,n,32.03732424881309,14 +High,NA,NA,C,2,Large,Left,g,18.769111926667392,93 +Medium,Green,NA,B,3,Medium,Left,c,78.22943013161421,96 +Low,Red,No,A,1,Small,Right,w,9.359498671256006,71 +High,NA,NA,C,2,Large,Left,v,46.677904156968,67 +High,NA,NA,C,2,Large,Left,z,51.15054599009454,23 diff --git a/tests/testthat/data_files/STANDARDISE/std_2.rda b/tests/testthat/data_files/STANDARDISE/std_2.rda new file mode 100644 index 0000000000000000000000000000000000000000..cbe3d2791b8807083caa46ecb97bb452bf1a303f GIT binary patch literal 1706 zcmV;b237eViwFP!000001Ep9CY?MV1-uv%bY-zEPC{z&2L4dY`^hL{~axHyeIVjhI zQm|02_h<{(9$b%BAP~St#Sld51AzcWA}An4EdeA*34#)gLO`n!6(2zG2{D37sWaT} z@88}1FGp!I{bpxpznPuc%S~I7SC$rC7EK5-5|c?G#t2YJ#I&ge!?Fkg10e=tB2j=Q zm!~2X{Ca{82aK_Rt~>kxi&29{&05i{70zzZ=vv|V6v^7a@o4P|cgDc&@w^JRAIipa zXz$GHdiU4MkN4qgt4LyL{5th4((7(;d+R}q3${4e$q6-8)l@fHeT~d{lG|0Yu$H5U zaC#l}j6vn~IGh2WAGF)-B`k+uOwf}d6S@|<#_XE2onC^3PvO_vv%FoyWsvxF^4FmD ztgemc&~OPlcUFj}sa$k+^(RXk@?+Yhh;V% zWjwr1?<^lnFP~6x!i_28>*^hdq$YaYGC%n}I(3+JPMPcrr>A#zTy)1UUSG-8w}^`B zlcVn=Zi%PFG;+dF!pS*AbkQe78sVH0GZce{U=AmiXbHh{kIvyF7UGeFv(>ft)3rK> zLm1A-Aw;U-JuO1mET#SC)6N0f*+DjI2;U@}@O-kFN;d28Gn#BB;{9dwRw$cnzT#&k zJ}!TMz5I9|zP1KQERCOZ#^r0x&yq5J0~d>*jrHbB{;aIldUGQ0uQxa9+4Op(_ts?h z*UL}#9%Xa6PAxip@3E;i!s_r>xw%=EZrN)cUT^r5PM95+*?5%k@HV}(p+3#w?B7jH z8DCfLAghTUcc>So(WSGj$?En5Sc^;GZ<@f}4?aGo)H8(*#|9@;<}$_4lm@2EV+wnC zhJe7MIxk|%V&PQ1;G-%vLO#_3PnD2rjlk>@5?dhLLgiV=6t7SyHAJm&szy*$=SG29 zjO7vTUMUo+T1ZSyL+y@3znC5&s87gQwW#HtFEm%>nI|OHAShJ=&jP{6FKnrIcL_Ij z3&+Lx6A*mVtvbP?c1L_;YR+DL9;aZb61EzJd=|s69{5?bjBULYHG&X}y=91b0$`s1 za`Wear9F%H-2|Mzvi-Gezz4E!S<$bk>PQ@7Y4MWsqkt3F&CaO=EI7L}X)yZ#{K1jq zxPE(g`EP(z{-{5=1JL?<%Ya>ggI+%DS&QeFT^y4Kn6c`kXwS9q^EMK(N_d!YXoSry_1CE)WwH@=AI_uqRtaCxFQuZgVEj7Nah{Z)Wf5-UePcB>nnBU&EXe!3J zuq__*DQ@WY=_#yVYf>BZb=;ozZq)?ubu4zq2EdWV?g?0*-1axm?ggCu##51zfH|)k zo!FPbqs?QkVE@Z^G-KbVytk$R-$UNT?`~k7?C%F+uj6}Jx3jS~#y`AuRSfoT%YmkN zyr=z{>t@76&y`ldloc~#u)j$IH^2BC^4Y&mc?;tWQ;r|P^*l?H4d0jPMD#Mi)MGz3 z{ep4Uyz?){Gw+YfKrAf%Xzf_QCz|tnSqc8Iv7|ppF-RKy?BE{{Ng$~p$39qxZoW6(somJsTvFa+OjU zLhECr^}v`W8Y7=_!)qW9(VP;fZ92$6kf9*(X(ln02XzarRS%j!-l>q*y%+zV(mF@2 zoBgLWni}te(WAoY^QLg?RBoNdt<$-62Di?n>nO%Qf@Oh=H0@%fb}@=eI?C^B9L^HQ zN-4|XS4T(3CT^BuV|HYq(p%wT-+x9}Q8n7-2snm&{J=$mgVyc;04=|izFrsr0J@22 A82|tP literal 0 HcmV?d00001 diff --git a/tests/testthat/data_files/STANDARDISE/std_3.csv b/tests/testthat/data_files/STANDARDISE/std_3.csv new file mode 100644 index 00000000..583c9a22 --- /dev/null +++ b/tests/testthat/data_files/STANDARDISE/std_3.csv @@ -0,0 +1,101 @@ +fac_col1,fac_col2,fac_col3,fac_col4,fac_col5,col11,col14,col17,col20 +High,Blue,Yes,C,Three,31,TRUE,o,28.757752012461424 +High,Blue,Yes,C,Three,79,TRUE,s,78.83051354438066 +High,Blue,Yes,C,Three,51,TRUE,n,40.89769218116999 +Medium,NA,NA,B,Two,14,FALSE,c,88.301740400493145 +High,Blue,Yes,C,Three,67,TRUE,j,94.04672842938453 +Medium,NA,NA,B,Two,42,FALSE,r,4.555649938993156 +Medium,NA,NA,B,Two,50,FALSE,v,52.810548804700375 +Medium,NA,NA,B,Two,43,FALSE,k,89.2419044394046 +High,Blue,Yes,C,Three,14,TRUE,e,55.14350144658238 +Low,NA,Yes,A,One,25,TRUE,t,45.661473530344665 +Medium,NA,NA,B,Two,90,FALSE,n,95.68333453498781 +Medium,NA,NA,B,Two,91,FALSE,v,45.33341561909765 +Low,NA,NA,A,One,69,FALSE,y,67.75706354528666 +Medium,NA,Yes,B,Two,91,TRUE,z,57.26334019564092 +High,Blue,NA,C,Three,57,FALSE,e,10.292468266561627 +Low,NA,Yes,A,One,92,TRUE,s,89.98249704018235 +High,Blue,NA,C,Three,9,FALSE,y,24.60877343546599 +High,Blue,Yes,C,Three,93,TRUE,y,4.205953353084624 +Low,NA,Yes,A,One,99,TRUE,i,32.79207192827016 +Low,NA,Yes,A,One,72,TRUE,c,95.45036491472274 +Low,NA,Yes,A,One,26,TRUE,h,88.95393160637468 +Low,NA,NA,A,One,7,FALSE,z,69.28034061565995 +High,Blue,Yes,C,Three,42,TRUE,g,64.05068137682974 +Medium,NA,Yes,B,Two,9,TRUE,j,99.42697766236961 +High,Blue,Yes,C,Three,83,TRUE,i,65.57057991158217 +Medium,NA,Yes,B,Two,36,TRUE,s,70.85304681677371 +Low,NA,NA,A,One,78,FALSE,d,54.40660247113556 +Medium,NA,NA,B,Two,81,FALSE,n,59.41420204471797 +High,Blue,Yes,C,Three,43,TRUE,q,28.91597372945398 +Medium,NA,NA,B,Two,76,FALSE,k,14.711364731192589 +Low,NA,Yes,A,One,15,TRUE,g,96.30242325365543 +High,Blue,NA,C,Three,32,FALSE,u,90.22990451194346 +High,Blue,Yes,C,Three,7,TRUE,l,69.07052784226835 +Low,NA,NA,A,One,9,FALSE,o,79.54674176871777 +High,Blue,NA,C,Three,41,FALSE,j,2.461368450894952 +Medium,NA,Yes,B,Two,74,TRUE,m,47.77959710918367 +Low,NA,Yes,A,One,23,TRUE,g,75.84595375228673 +High,Blue,Yes,C,Three,27,TRUE,i,21.640793583355844 +Low,NA,Yes,A,One,60,TRUE,i,31.818100763484836 +Low,NA,NA,A,One,53,FALSE,j,23.16257853526622 +Medium,NA,Yes,B,Two,7,TRUE,w,14.280002238228917 +High,Blue,NA,C,Three,53,FALSE,u,41.45463358145207 +High,Blue,NA,C,Three,27,FALSE,g,41.372432629577816 +Low,NA,Yes,A,One,96,TRUE,u,36.884545092470944 +High,Blue,Yes,C,Three,38,TRUE,f,15.244474774226546 +Low,NA,Yes,A,One,89,TRUE,y,13.880606344901025 +High,Blue,Yes,C,Three,34,TRUE,b,23.303409945219755 +Medium,NA,NA,B,Two,93,FALSE,e,46.59624502528459 +Low,NA,Yes,A,One,69,TRUE,h,26.597264036536217 +Medium,NA,Yes,B,Two,72,TRUE,l,85.78277153428644 +Low,NA,NA,A,One,76,FALSE,m,4.583116667345166 +Low,NA,Yes,A,One,63,TRUE,r,44.220007420517504 +High,Blue,Yes,C,Three,13,TRUE,a,79.89248456433415 +Low,NA,Yes,A,One,82,TRUE,y,12.189925997518003 +Medium,NA,Yes,B,Two,97,TRUE,y,56.094798375852406 +Low,NA,NA,A,One,91,FALSE,f,20.65313896164298 +Low,NA,NA,A,One,25,FALSE,u,12.753165024332702 +High,Blue,Yes,C,Three,38,TRUE,o,75.33078643027693 +Low,NA,NA,A,One,21,FALSE,i,89.50453591533005 +Medium,NA,Yes,B,Two,79,TRUE,o,37.44627758860588 +Low,NA,Yes,A,One,41,TRUE,z,66.51151946280152 +High,Blue,NA,C,Three,47,FALSE,p,9.484066092409194 +Low,NA,NA,A,One,90,FALSE,t,38.39696377981454 +High,Blue,Yes,C,Three,60,TRUE,f,27.43836445733905 +Medium,NA,Yes,B,Two,95,TRUE,k,81.46400388795882 +High,Blue,NA,C,Three,16,FALSE,h,44.851634139195085 +Medium,NA,Yes,B,Two,94,TRUE,v,81.00643530488014 +Medium,NA,Yes,B,Two,6,TRUE,v,81.23895095195621 +High,Blue,Yes,C,Three,72,TRUE,g,79.43423211108893 +Medium,NA,Yes,B,Two,86,TRUE,p,43.983168760314584 +Medium,NA,NA,B,Two,86,FALSE,q,75.44751586392522 +High,Blue,Yes,C,Three,39,TRUE,v,62.922113155946136 +High,Blue,Yes,C,Three,31,TRUE,r,71.01824013516307 +Low,NA,Yes,A,One,81,TRUE,q,0.062477332539856434 +Medium,NA,Yes,B,Two,50,TRUE,b,47.53165740985423 +Medium,NA,NA,B,Two,34,FALSE,d,22.011888516135514 +Low,NA,NA,A,One,4,FALSE,m,37.98165377229452 +Medium,NA,Yes,B,Two,13,TRUE,e,61.277100327424705 +Low,NA,NA,A,One,69,FALSE,v,35.179790924303234 +Low,NA,NA,A,One,25,FALSE,s,11.113542434759438 +Medium,NA,NA,B,Two,52,FALSE,y,24.361947271972895 +High,Blue,NA,C,Three,22,FALSE,t,66.80555874481797 +High,Blue,Yes,C,Three,89,TRUE,v,41.764677967876196 +Low,NA,NA,A,One,32,FALSE,y,78.81958340294659 +Medium,NA,NA,B,Two,25,FALSE,n,10.286464425735176 +Low,NA,NA,A,One,87,FALSE,y,43.489274149760604 +Medium,NA,Yes,B,Two,35,TRUE,w,98.49569799844176 +Low,NA,Yes,A,One,40,TRUE,c,89.30511143989861 +High,Blue,NA,C,Three,30,FALSE,h,88.64690607879311 +High,Blue,Yes,C,Three,12,TRUE,p,17.505265027284622 +Medium,NA,NA,B,Two,31,FALSE,l,13.069569156505167 +High,Blue,NA,C,Three,30,FALSE,y,65.31019250396639 +Low,NA,Yes,A,One,64,TRUE,n,34.3516472261399 +Medium,NA,NA,B,Two,99,FALSE,c,65.67581279668957 +Medium,NA,NA,B,Two,14,FALSE,n,32.03732424881309 +High,Blue,Yes,C,Three,93,TRUE,g,18.769111926667392 +Medium,NA,Yes,B,Two,96,TRUE,c,78.22943013161421 +Low,NA,NA,A,One,71,FALSE,w,9.359498671256006 +High,Blue,Yes,C,Three,67,TRUE,v,46.677904156968 +High,Blue,Yes,C,Three,23,TRUE,z,51.15054599009454 diff --git a/tests/testthat/data_files/STANDARDISE/std_3.rda b/tests/testthat/data_files/STANDARDISE/std_3.rda new file mode 100644 index 0000000000000000000000000000000000000000..8036267480506dd3af4291f4e142386a083abad1 GIT binary patch literal 1597 zcmV-D2EzFtiwFP!000001D#k4P*hbIKHR%XAQ+QVN}A%zpg@|yBBYWR70wNfQ z%RT^ISX~x2b#&C2%Elp+9;A-iFL< zf^~yH23-yZcJ$2u2U*9mPM37L#Eb{&WIb4Wl0Xk)?C8fOW={|^$J&*c`9Nc=4Smk6 zY?QxIeC!&wwvxb?ZryD?>+g0pI^FdkMfA3~x0BOqs;a3h=Xw0<;u%gyO+zhXqMzMu zt5+>(c3n2R&*KH@mgpp`Bt3sUtb*0qas6JSc_Vv9enorOb^jMvps_%R+10*OMP!E^Guq2%X54KfKYjHnvUj&R=)sN!$|10S zVOQl+XZ;;DRewRV7ZQ6xvlpP~g4178qKi_m8(m&T7pCYTU>^_BgTju99uvtxHp<^9 zKGCBfdh85paa&W>YdR=h6*5#|>K!$iZKyuzzBELiydJDQ(F<*KaM%Wk3e4ISz4At5 zqC>|vgY_#qXrlPoH31qbs)ex0a(VIvJkF%pKUZM(1xn1?#8cnF-;;H8BRB@r!aF!ihV2imdG&>Ed;N}XY zxv1rK)o_Bg*w0z!eYv>YE4e~dbAf4LXx*_H7t_T#^>8t3BU*VEan052EaU=fkfa$w3yw-cI@0p6*uMQ;#o?1?@os@x#MQY zc(_mt+ee$>)>bmTms&mpE*_M>_XhBs)m^Wq0pFK;GY{A0RvnH&F3w+8b_96(`g!S< zz}aWo;$m?9PwyW-hWcB(%6(atg-~~~v)~k_6(VqNRIlPWa$+^M}!%=rM**4<*-d5`_ zjK?xy?IGY9?WXlF15duPJbf7OSa(YIO58VS?xhH{+q}190q)Cu>X`|c|94%dFfVz} zg{um1&i5aGz81LP&*-7>aw^H&286*&txP%P2~P!1iQW7Gyjse`&zJeoUgm*>x{JuW z+rEKUPsv!EdJJA6W!&=klV~^o$k%80;l3F=vr*5=fbvwpURQ^sH);YUYD)|F-rN*-rIY0Nt zZ|MKriH2psSzVn=iqOybZIKvHeq-<_C$WAVah=fDDZ9IZs|n_HG`w^J@C1246xJuB z>y0yefM>q`WPkWFP|{zK?bw%?iRMX{vHxW|TCnf4-fhmt^N@Mr+v`}T!uNdP*YLco zZ}Sg9|A)4&3B&$v+20h2c@{o>&5RuDy4(Sru&Oi+`x`fE^9#?Sy~3{&-b8<6rDF$C zU#K)$@qC$%hpqrlJo-b^&*-Q5?SIgpd0#{_a!&Dw>)?w+mAn^-Jf})V_ v?(R*@_pJH}+}~H}u5hS#stg(xLmf7sZJf&swn*>C?yY|Tz&gqhuoM6QKWHU0 literal 0 HcmV?d00001 From 254c57d25ce7c128336dc7fd95d7fbeebbde3db3 Mon Sep 17 00:00:00 2001 From: Tim Cadman <41470917+timcadman@users.noreply.github.com> Date: Tue, 7 Oct 2025 15:24:56 +0200 Subject: [PATCH 16/18] added extra data to test levels disclosure --- .../data_files/STANDARDISE/std_1_d.csv | 101 ++++++++++++++++++ .../data_files/STANDARDISE/std_1_d.rda | Bin 0 -> 2228 bytes .../data_files/STANDARDISE/std_2_d.csv | 101 ++++++++++++++++++ .../data_files/STANDARDISE/std_2_d.rda | Bin 0 -> 2461 bytes .../data_files/STANDARDISE/std_3_d.csv | 101 ++++++++++++++++++ .../data_files/STANDARDISE/std_3_d.rda | Bin 0 -> 2343 bytes ...lgenis_armadillo-upload_testing_datasets.R | 12 ++- .../obiba_opal-upload_testing_datasets.R | 16 ++- 8 files changed, 326 insertions(+), 5 deletions(-) create mode 100644 tests/testthat/data_files/STANDARDISE/std_1_d.csv create mode 100644 tests/testthat/data_files/STANDARDISE/std_1_d.rda create mode 100644 tests/testthat/data_files/STANDARDISE/std_2_d.csv create mode 100644 tests/testthat/data_files/STANDARDISE/std_2_d.rda create mode 100644 tests/testthat/data_files/STANDARDISE/std_3_d.csv create mode 100644 tests/testthat/data_files/STANDARDISE/std_3_d.rda diff --git a/tests/testthat/data_files/STANDARDISE/std_1_d.csv b/tests/testthat/data_files/STANDARDISE/std_1_d.csv new file mode 100644 index 00000000..9fe7462f --- /dev/null +++ b/tests/testthat/data_files/STANDARDISE/std_1_d.csv @@ -0,0 +1,101 @@ +fac_col1,fac_col2,fac_col3,fac_col4,fac_col5,fac_col6,fac_col9,col12,col15,col18 +High,Blue,Yes,3,NA,Bird,True,28.7577520124614,31,TRUE +High,Blue,Yes,3,NA,Bird,True,78.8305135443807,79,TRUE +High,Blue,Yes,3,NA,Bird,True,40.89769218117,51,TRUE +Medium,Green,No,2,NA,Dog,False,88.3017404004931,14,FALSE +High,Blue,Yes,3,NA,Bird,True,94.0467284293845,67,TRUE +Medium,Green,No,2,NA,Dog,False,4.55564993899316,42,FALSE +Medium,Green,No,2,NA,Dog,False,52.8105488047004,50,FALSE +Medium,Green,No,2,NA,Dog,False,89.2419044394046,43,FALSE +High,Blue,Yes,3,NA,Bird,True,55.1435014465824,14,TRUE +Low,NA,Yes,1,NA,Cat,True,45.6614735303447,25,TRUE +Medium,Green,No,2,NA,Dog,False,95.6833345349878,90,FALSE +Medium,Green,No,2,NA,Dog,False,45.3334156190977,91,FALSE +Low,NA,No,1,NA,Cat,False,67.7570635452867,69,FALSE +Medium,Green,Yes,2,NA,Dog,True,57.2633401956409,91,TRUE +High,Blue,No,3,NA,Bird,False,10.2924682665616,57,FALSE +Low,NA,Yes,1,NA,Cat,True,89.9824970401824,92,TRUE +High,Blue,No,3,NA,Bird,False,24.608773435466,9,FALSE +High,Blue,Yes,3,NA,Bird,True,4.20595335308462,93,TRUE +Low,NA,Yes,1,NA,Cat,True,32.7920719282702,99,TRUE +Low,NA,Yes,1,NA,Cat,True,95.4503649147227,72,TRUE +Low,NA,Yes,1,NA,Cat,True,88.9539316063747,26,TRUE +Low,NA,No,1,NA,Cat,False,69.28034061566,7,FALSE +High,Blue,Yes,3,NA,Bird,True,64.0506813768297,42,TRUE +Medium,Green,Yes,2,NA,Dog,True,99.4269776623696,9,TRUE +High,Blue,Yes,3,NA,Bird,True,65.5705799115822,83,TRUE +Medium,Green,Yes,2,NA,Dog,True,70.8530468167737,36,TRUE +Low,NA,No,1,NA,Cat,False,54.4066024711356,78,FALSE +Medium,Green,No,2,NA,Dog,False,59.414202044718,81,FALSE +High,Blue,Yes,3,NA,Bird,True,28.915973729454,43,TRUE +Medium,Green,No,2,NA,Dog,False,14.7113647311926,76,FALSE +Low,NA,Yes,1,NA,Cat,True,96.3024232536554,15,TRUE +High,Blue,No,3,NA,Bird,False,90.2299045119435,32,FALSE +High,Blue,Yes,3,NA,Bird,True,69.0705278422683,7,TRUE +Low,NA,No,1,NA,Cat,False,79.5467417687178,9,FALSE +High,Blue,No,3,NA,Bird,False,2.46136845089495,41,FALSE +Medium,Green,Yes,2,NA,Dog,True,47.7795971091837,74,TRUE +Low,NA,Yes,1,NA,Cat,True,75.8459537522867,23,TRUE +High,Blue,Yes,3,NA,Bird,True,21.6407935833558,27,TRUE +Low,NA,Yes,1,NA,Cat,True,31.8181007634848,60,TRUE +Low,NA,No,1,NA,Cat,False,23.1625785352662,53,FALSE +Medium,Green,Yes,2,NA,Dog,True,14.2800022382289,7,TRUE +High,Blue,No,3,NA,Bird,False,41.4546335814521,53,FALSE +High,Blue,No,3,NA,Bird,False,41.3724326295778,27,FALSE +Low,NA,Yes,1,NA,Cat,True,36.8845450924709,96,TRUE +High,Blue,Yes,3,NA,Bird,True,15.2444747742265,38,TRUE +Low,NA,Yes,1,NA,Cat,True,13.880606344901,89,TRUE +High,Blue,Yes,3,NA,Bird,True,23.3034099452198,34,TRUE +Medium,Green,No,2,NA,Dog,False,46.5962450252846,93,FALSE +Low,NA,Yes,1,NA,Cat,True,26.5972640365362,69,TRUE +Medium,Green,Yes,2,NA,Dog,True,85.7827715342864,72,TRUE +Low,NA,No,1,NA,Cat,False,4.58311666734517,76,FALSE +Low,NA,Yes,1,NA,Cat,True,44.2200074205175,63,TRUE +High,Blue,Yes,3,NA,Bird,True,79.8924845643342,13,TRUE +Low,NA,Yes,1,NA,Cat,True,12.189925997518,82,TRUE +Medium,Green,Yes,2,NA,Dog,True,56.0947983758524,97,TRUE +Low,NA,No,1,NA,Cat,False,20.653138961643,91,FALSE +Low,NA,No,1,NA,Cat,False,12.7531650243327,25,FALSE +High,Blue,Yes,3,NA,Bird,True,75.3307864302769,38,TRUE +Low,NA,No,1,NA,Cat,False,89.50453591533,21,FALSE +Medium,Green,Yes,2,NA,Dog,True,37.4462775886059,79,TRUE +Low,NA,Yes,1,NA,Cat,True,66.5115194628015,41,TRUE +High,Blue,No,3,NA,Bird,False,9.48406609240919,47,FALSE +Low,NA,No,1,NA,Cat,False,38.3969637798145,90,FALSE +High,Blue,Yes,3,NA,Bird,True,27.438364457339,60,TRUE +Medium,Green,Yes,2,NA,Dog,True,81.4640038879588,95,TRUE +High,Blue,No,3,NA,Bird,False,44.8516341391951,16,FALSE +Medium,Green,Yes,2,NA,Dog,True,81.0064353048801,94,TRUE +Medium,Green,Yes,2,NA,Dog,True,81.2389509519562,6,TRUE +High,Blue,Yes,3,NA,Bird,True,79.4342321110889,72,TRUE +Medium,Green,Yes,2,NA,Dog,True,43.9831687603146,86,TRUE +Medium,Green,No,2,NA,Dog,False,75.4475158639252,86,FALSE +High,Blue,Yes,3,NA,Bird,True,62.9221131559461,39,TRUE +High,Blue,Yes,3,NA,Bird,True,71.0182401351631,31,TRUE +Low,NA,Yes,1,NA,Cat,True,0.0624773325398564,81,TRUE +Medium,Green,Yes,2,NA,Dog,True,47.5316574098542,50,TRUE +Medium,Green,No,2,NA,Dog,False,22.0118885161355,34,FALSE +Low,NA,No,1,NA,Cat,False,37.9816537722945,4,FALSE +Medium,Green,Yes,2,NA,Dog,True,61.2771003274247,13,TRUE +Low,NA,No,1,NA,Cat,False,35.1797909243032,69,FALSE +Low,NA,No,1,NA,Cat,False,11.1135424347594,25,FALSE +Medium,Green,No,2,NA,Dog,False,24.3619472719729,52,FALSE +High,Blue,No,3,NA,Bird,False,66.805558744818,22,FALSE +High,Blue,Yes,3,NA,Bird,True,41.7646779678762,89,TRUE +Low,NA,No,1,NA,Cat,False,78.8195834029466,32,FALSE +Medium,Green,No,2,NA,Dog,False,10.2864644257352,25,FALSE +Low,NA,No,1,NA,Cat,False,43.4892741497606,87,FALSE +Medium,Green,Yes,2,NA,Dog,True,98.4956979984418,35,TRUE +Low,NA,Yes,1,NA,Cat,True,89.3051114398986,40,TRUE +High,Blue,No,3,NA,Bird,False,88.6469060787931,30,FALSE +High,Blue,Yes,3,NA,Bird,True,17.5052650272846,12,TRUE +Medium,Green,No,2,NA,Dog,False,13.0695691565052,31,FALSE +High,Blue,No,3,NA,Bird,False,65.3101925039664,30,FALSE +Low,NA,Yes,1,NA,Cat,True,34.3516472261399,64,TRUE +Medium,Green,No,2,NA,Dog,False,65.6758127966896,99,FALSE +Medium,Green,No,2,NA,Dog,False,32.0373242488131,14,FALSE +High,Blue,Yes,3,NA,Bird,True,18.7691119266674,93,TRUE +Medium,Green,Yes,2,NA,Dog,True,78.2294301316142,96,TRUE +Low,NA,No,1,NA,Cat,False,9.35949867125601,71,FALSE +High,Blue,Yes,3,NA,Bird,True,46.677904156968,67,TRUE +High,Blue,Yes,3,NA,Bird,True,51.1505459900945,23,TRUE diff --git a/tests/testthat/data_files/STANDARDISE/std_1_d.rda b/tests/testthat/data_files/STANDARDISE/std_1_d.rda new file mode 100644 index 0000000000000000000000000000000000000000..4387f72b8b0e93973132f43c9ddd0f762f606166 GIT binary patch literal 2228 zcmV;l2ut@LiwFP!000001MQk=a~;JMhDTSDZ5+pPAjE+z5JEx{!qEHTEOv|+LReyA zh@DMbTNg)FLQY9CRKD>;@*7k61t?GJ_RO?Kcdle%@E5!4t-13~&pD^h)@M|H|H>_W z`qt^HsuruIrBSuG%+G51gPYf03snVCRSVTp^%U2W+dFHka&?WR&v45-3;*R>E;;kr z*7#qm>zg~1dy}o+v!y%Z?MddoxyMhhUrf0JvYr`lZQcLu!UZnlf3JQ#zPoe3FLHTr z$$-psUCUg9#p1eH+>eU;QIB6JewXOW8_l?1=*cPhQSY6F-m|{kQSbRYt}mn9bN~0y z{)g=Auj%`CG(%UhZeL2B*qD4e+1Tb+Kl;z_i~jb`*81kX)lVl~5-jib;(jj2{oJ0; zoO5rk-}`6&z4$h%lGVO5SzCYb$@GPN1})#+7;kT9DXo=l@-=xi`n+oXrPojFb~`w^ z`xV%CYImMVP3erX%Ei9w8D+LKWahl`>8I}AeRjpDa7)QAG`~yq<(2Oh_wBoVzK}EO zubZ)cRO&s9>&vM2@45eb=Kkl{H|kyY`L>i*&G5xB({0qG+vSZ1lP1%zZ%rnf)e%$d zk!1asH{&bK^nPusO=}htWAp-f{OZ%O^v!XRVu|>r18B z=-vBGTfIHmt{z90AJc^XL3n=OLAZ8jWBw_4Hj7EuwM(a_`#5l|2JZXU{QTnoWf#(By4_kl0p1z&|d56mH-4DKh%yoy^F&O8=_=k-DL;LR#Ifv!# zkX=X3i)vac>Fa0h^J@Qy@&u0;U*lN&^eVr%&BedtqpnC*8PX=8UT^ny~Hy=|=H@6;4s>dLFk5v8`~zGAH}Y03 zcpE$mUI6F83it*1Id~a-tC~*P-vtVM4}6$h0c!9*_y)KDz75WRx4_@PPrw`CBzOgU zAAA5_2Y&)L!5_hE;1A%p;P=`627Cwn6#NGKxSCEd&w(r8Rq!rIGtMu;UGP_M3p|~} zcnQ1&E`t|A`g`s>DKbCmQ^tF(mDMp=XY!MCsy0q)7mO7lnrIu| z)JoWBwYFLW<5b66%j+@f0HiDUprv%qd2Oup4IgW5ksF+s$~qxC-UK6@6HyrvWo&pU zY89Mu#whExRVB|-7nF0>b$X?&rPS6aqm4&X=2xVuB^D|hqqj1&%T?_yW?R7vTD4|M zjfbLjQCgn0YFFSx5KcI4Ozfln7Vwk-~MX6q*W)wHG|ANfJpD3Az<;X$w@Aq5Op? zoNpD$15Z&CL(wLcHT7OMBC@HSv~I8@1IaBUA+|wv>s^gl0;mbQEqGrmf?@=|A^+P- zvGhBME0w?pT|08JCUMARS`m5Mn(tH{mBJEf>1nR@vNN@?!jaydP8hrOcweGZSwU`f z-5uU~VSS{RV7@ANhv8C@CXQy(@a;6ppHfYDP6efE>=f^8$IU8qOzh$Q9(MA2u@qMk<`bV^a{o7(N%OL>SR45iA=5B z7d)AVfAK;Lo%f@|N|nOJ(PX=0C5GCh0f1Tw#&kIziLQe7oRgZC^a%bMyuP_J|F6OQ zFTXU)zL#gbG`pr5G|jYWetk7_X^u?ucA6Ql=4+b0)7;w3+G(atvviudPi7hES(;tb zY@FuL8(C(WqtopCVqTlpr5QTS!fCEfb90)rFB*=6$6( zdz|fjF2D0^-e=PDdfrdcv6`Ko=KT{{)`uCdWdGdA{zy8PvW}}+b~Ah5&ihGR`c_FZ zc=~Qh-xO!Eob>xr_SM;Jf7;Xa?Dz9k@z=*{|CeM{T&v=K7T3SdlGEpg-tFrevS*&2 zqw1U`@3?!Y=5X`&Fh~FX{}!-0{$$epw}8``JVm9h-o3ved-tk$uY32VcW-<5u6N&@ z1ItO5DsOCg)Bo=}Pthpq#_sl8e|LBHKLefT^xAl5Tz|a9E7deLzxY2L7VDxOBme-K C!h~@E literal 0 HcmV?d00001 diff --git a/tests/testthat/data_files/STANDARDISE/std_2_d.csv b/tests/testthat/data_files/STANDARDISE/std_2_d.csv new file mode 100644 index 00000000..0a5a46eb --- /dev/null +++ b/tests/testthat/data_files/STANDARDISE/std_2_d.csv @@ -0,0 +1,101 @@ +fac_col1,fac_col2,fac_col3,fac_col4,fac_col5,fac_col7,fac_col10,col13,col16,col19 +High,NA,NA,C,2,Large,Left,o,28.7577520124614,31 +High,NA,NA,C,2,Large,Left,s,78.8305135443807,79 +High,NA,NA,C,2,Large,Left,n,40.89769218117,51 +Medium,Green,No,B,3,Medium,Right,c,88.3017404004931,14 +High,NA,NA,C,2,Large,Left,j,94.0467284293845,67 +Medium,Green,No,B,3,Medium,Right,r,4.55564993899316,42 +Medium,Green,No,B,3,Medium,Right,v,52.8105488047004,50 +Medium,Green,No,B,3,Medium,Right,k,89.2419044394046,43 +High,NA,NA,C,2,Large,Left,e,55.1435014465824,14 +Low,Red,NA,A,1,Small,Left,t,45.6614735303447,25 +Medium,Green,No,B,3,Medium,Right,n,95.6833345349878,90 +Medium,Green,No,B,3,Medium,Right,v,45.3334156190977,91 +Low,Red,No,A,1,Small,Right,y,67.7570635452867,69 +Medium,Green,NA,B,3,Medium,Left,z,57.2633401956409,91 +High,NA,No,C,2,Large,Right,e,10.2924682665616,57 +Low,Red,NA,A,1,Small,Left,s,89.9824970401824,92 +High,NA,No,C,2,Large,Right,y,24.608773435466,9 +High,NA,NA,C,2,Large,Left,y,4.20595335308462,93 +Low,Red,NA,A,1,Small,Left,i,32.7920719282702,99 +Low,Red,NA,A,1,Small,Left,c,95.4503649147227,72 +Low,Red,NA,A,1,Small,Left,h,88.9539316063747,26 +Low,Red,No,A,1,Small,Right,z,69.28034061566,7 +High,NA,NA,C,2,Large,Left,g,64.0506813768297,42 +Medium,Green,NA,B,3,Medium,Left,j,99.4269776623696,9 +High,NA,NA,C,2,Large,Left,i,65.5705799115822,83 +Medium,Green,NA,B,3,Medium,Left,s,70.8530468167737,36 +Low,Red,No,A,1,Small,Right,d,54.4066024711356,78 +Medium,Green,No,B,3,Medium,Right,n,59.414202044718,81 +High,NA,NA,C,2,Large,Left,q,28.915973729454,43 +Medium,Green,No,B,3,Medium,Right,k,14.7113647311926,76 +Low,Red,NA,A,1,Small,Left,g,96.3024232536554,15 +High,NA,No,C,2,Large,Right,u,90.2299045119435,32 +High,NA,NA,C,2,Large,Left,l,69.0705278422683,7 +Low,Red,No,A,1,Small,Right,o,79.5467417687178,9 +High,NA,No,C,2,Large,Right,j,2.46136845089495,41 +Medium,Green,NA,B,3,Medium,Left,m,47.7795971091837,74 +Low,Red,NA,A,1,Small,Left,g,75.8459537522867,23 +High,NA,NA,C,2,Large,Left,i,21.6407935833558,27 +Low,Red,NA,A,1,Small,Left,i,31.8181007634848,60 +Low,Red,No,A,1,Small,Right,j,23.1625785352662,53 +Medium,Green,NA,B,3,Medium,Left,w,14.2800022382289,7 +High,NA,No,C,2,Large,Right,u,41.4546335814521,53 +High,NA,No,C,2,Large,Right,g,41.3724326295778,27 +Low,Red,NA,A,1,Small,Left,u,36.8845450924709,96 +High,NA,NA,C,2,Large,Left,f,15.2444747742265,38 +Low,Red,NA,A,1,Small,Left,y,13.880606344901,89 +High,NA,NA,C,2,Large,Left,b,23.3034099452198,34 +Medium,Green,No,B,3,Medium,Right,e,46.5962450252846,93 +Low,Red,NA,A,1,Small,Left,h,26.5972640365362,69 +Medium,Green,NA,B,3,Medium,Left,l,85.7827715342864,72 +Low,Red,No,A,1,Small,Right,m,4.58311666734517,76 +Low,Red,NA,A,1,Small,Left,r,44.2200074205175,63 +High,NA,NA,C,2,Large,Left,a,79.8924845643342,13 +Low,Red,NA,A,1,Small,Left,y,12.189925997518,82 +Medium,Green,NA,B,3,Medium,Left,y,56.0947983758524,97 +Low,Red,No,A,1,Small,Right,f,20.653138961643,91 +Low,Red,No,A,1,Small,Right,u,12.7531650243327,25 +High,NA,NA,C,2,Large,Left,o,75.3307864302769,38 +Low,Red,No,A,1,Small,Right,i,89.50453591533,21 +Medium,Green,NA,B,3,Medium,Left,o,37.4462775886059,79 +Low,Red,NA,A,1,Small,Left,z,66.5115194628015,41 +High,NA,No,C,2,Large,Right,p,9.48406609240919,47 +Low,Red,No,A,1,Small,Right,t,38.3969637798145,90 +High,NA,NA,C,2,Large,Left,f,27.438364457339,60 +Medium,Green,NA,B,3,Medium,Left,k,81.4640038879588,95 +High,NA,No,C,2,Large,Right,h,44.8516341391951,16 +Medium,Green,NA,B,3,Medium,Left,v,81.0064353048801,94 +Medium,Green,NA,B,3,Medium,Left,v,81.2389509519562,6 +High,NA,NA,C,2,Large,Left,g,79.4342321110889,72 +Medium,Green,NA,B,3,Medium,Left,p,43.9831687603146,86 +Medium,Green,No,B,3,Medium,Right,q,75.4475158639252,86 +High,NA,NA,C,2,Large,Left,v,62.9221131559461,39 +High,NA,NA,C,2,Large,Left,r,71.0182401351631,31 +Low,Red,NA,A,1,Small,Left,q,0.0624773325398564,81 +Medium,Green,NA,B,3,Medium,Left,b,47.5316574098542,50 +Medium,Green,No,B,3,Medium,Right,d,22.0118885161355,34 +Low,Red,No,A,1,Small,Right,m,37.9816537722945,4 +Medium,Green,NA,B,3,Medium,Left,e,61.2771003274247,13 +Low,Red,No,A,1,Small,Right,v,35.1797909243032,69 +Low,Red,No,A,1,Small,Right,s,11.1135424347594,25 +Medium,Green,No,B,3,Medium,Right,y,24.3619472719729,52 +High,NA,No,C,2,Large,Right,t,66.805558744818,22 +High,NA,NA,C,2,Large,Left,v,41.7646779678762,89 +Low,Red,No,A,1,Small,Right,y,78.8195834029466,32 +Medium,Green,No,B,3,Medium,Right,n,10.2864644257352,25 +Low,Red,No,A,1,Small,Right,y,43.4892741497606,87 +Medium,Green,NA,B,3,Medium,Left,w,98.4956979984418,35 +Low,Red,NA,A,1,Small,Left,c,89.3051114398986,40 +High,NA,No,C,2,Large,Right,h,88.6469060787931,30 +High,NA,NA,C,2,Large,Left,p,17.5052650272846,12 +Medium,Green,No,B,3,Medium,Right,l,13.0695691565052,31 +High,NA,No,C,2,Large,Right,y,65.3101925039664,30 +Low,Red,NA,A,1,Small,Left,n,34.3516472261399,64 +Medium,Green,No,B,3,Medium,Right,c,65.6758127966896,99 +Medium,Green,No,B,3,Medium,Right,n,32.0373242488131,14 +High,NA,NA,C,2,Large,Left,g,18.7691119266674,93 +Medium,Green,NA,B,3,Medium,Left,c,78.2294301316142,96 +Low,Red,No,A,1,Small,Right,w,9.35949867125601,71 +High,NA,NA,C,2,Large,Left,v,46.677904156968,67 +High,NA,NA,C,2,Large,Left,z,51.1505459900945,23 diff --git a/tests/testthat/data_files/STANDARDISE/std_2_d.rda b/tests/testthat/data_files/STANDARDISE/std_2_d.rda new file mode 100644 index 0000000000000000000000000000000000000000..cad2198a28fd31f5f70c1df6d297389e536bef15 GIT binary patch literal 2461 zcmV;O31apiiwFP!000001I1eDb{xeO?ioqm;#dw2aUdIkBqSjewJ**BCKy9vAT~=( zHZhh)vVttZNLUAcm|KT^^a`A97`ebc; z_h7s=o)qtv?~V4yf%ffV`trR;f_Jr4`|AY11s`3*as2L+#;J}k}*igzX7Vex(*t>ls0T(0}<@6)f`Q~Gw; zAuHQAlTv4P#t+9k`&gCHe|jeR`v;Tl-L18U<2DIaj*7Tn4spMHEHmWX8{1p=!+Pl! zQYDCeYrL`j*@NkU8HHBXcSigBflJadC11>|;eVR=#~(j))LL-%=xfmL+|fLhV(AQn z$fZ(rN0}}6sX0?V-pmO9KK@38=PK1BJv%#7J{|mq<#T?vq-VSa9s6eXXTSN`URuc` z<5jJTvz5G_eKWEOIf++k^Yr;d={AhneRDD%@5X$)Gu~(x%&@N{=})c2 zr=<0rEoJrTGf&QFR)ld=Q+~rJ#*Ap^YbDM7b%B1sxFpVGj*G)`s&P}imT83DYd+vYdx&b&Ku0 z36^KOV%T@1EB6nZIv(?AeX;M$;V%38^y|v=GUxj8d~nL!+sducWNTd9*}|)LAB=W( zns0Il`O39=>~+vZD}THAl(fFH^LpnZ?;k~G7r&F*(B+dp-t+1#Tc7Gew|r~7c@R_V z&i2;*gXVGED^H=uiH7mLxY&=2c&Qz%$Hl+nViFe* zHBxj&{m$I1gzi;3CrWbfCslA0pN}iW9#)I3iqEIjo=Np8*}hR(Iun@*W^dQJHRi2zXEH%9RLRS8{hy2i~tJU0DcR2 zX8#19Zl+VgPl1ns4}qJ&%YpxO;0?e6zXN{UOnbpS;9tP&!0l$*GZb(OcoO&ta1~eu zehvHz_!;mW;1ckCfCE1OJ^=hbz7XIJ@GamP@Lk{{@F(COz|R4H8956)5Bv~#A9x9P z7kCf&3-BWFXW$ROo6WS3N#J|HFM!_z&o#&Yj?+8^yav1gybYWKegmune+TXYPc+kh z_zLhW@G9^Oa0Pe^I0FbkXKnm))Aq|JD5085V}#&JIIFY_Z!U8pH51xOMXi%gMY<%M zDyM`{f?BD0OIHfVof8&dI-^^`nAX||shEj$M}z`js}18yQ`XX@l~il$xTKD`NN1FA ztEHBbDDa0**IMaTE@Ol-p(K|=8b}KCRmKT}hFm#k6tihNCq_YSMR7pzq$U&a zkW<<*g?CBpD(KcyO|_8Hwc0bd$5Ki;7nXDDGP;2}S{nGm@m5PkxRlZ$1d?-|yvz*o z(!4r`!_A3qiGeQ1JmAq}6{kc)efSe!SOKHp7Z}Ci9GBD)iZJ7h^O*^6wEHzu0eUF^!q9d86CQ4gxmArETbEg4|r>1 zO;)!81?RYtOhO3N8Qr09HTF`$+6|P@ga8$_3C|gm z(KTXfd>=s#wRE2&=t;pDIP6-!Zt7Q~025N172N4ssc-2zi^-*PJ-cp$SvyX*zjXOF|XZNN&XmssX9Y-j{a$|kBv$0QI0v&AO9gXNFp$3VM>_Bg42z?tv!_d9pbIN?3wgqi` zs!@Zlgh1QWE~CS_3WjO{Pw!Md$9EFBV=YR+CjM3tV|`jFFt=?oSTr8WP(e5&UF%QK(J>spa_AVdbU4j_C_2!AX_t zkoxc@dIiVq(N?rW)RFZVNtkctCZi+s;9q#b+1C3`Lrd<%g-?@eKP%1>=?4JRij}g> zafj$~kPW_*0xjuLF16bxb}l- z9H#xy=!a!LRG$qz{5wCK`r+3Pn70E@KPda*_?fWR@AE^lA9nrV><42%aI5fM1=>f= zbcpqX`m;gqjUd`ut??kVM7P-}W3G9sRc_ bhc9f54n|~if+JHOG++K7j6ZI{q$U6WOC;Vg literal 0 HcmV?d00001 diff --git a/tests/testthat/data_files/STANDARDISE/std_3_d.csv b/tests/testthat/data_files/STANDARDISE/std_3_d.csv new file mode 100644 index 00000000..5dfce08f --- /dev/null +++ b/tests/testthat/data_files/STANDARDISE/std_3_d.csv @@ -0,0 +1,101 @@ +fac_col1,fac_col2,fac_col3,fac_col4,fac_col5,col11,col14,col17,col20 +High,Blue,Yes,C,Three,31,TRUE,o,28.7577520124614 +High,Blue,Yes,C,Three,79,TRUE,s,78.8305135443807 +High,Blue,Yes,C,Three,51,TRUE,n,40.89769218117 +Medium,NA,NA,B,Two,14,FALSE,c,88.3017404004931 +High,Blue,Yes,C,Three,67,TRUE,j,94.0467284293845 +Medium,NA,NA,B,Two,42,FALSE,r,4.55564993899316 +Medium,NA,NA,B,Two,50,FALSE,v,52.8105488047004 +Medium,NA,NA,B,Two,43,FALSE,k,89.2419044394046 +High,Blue,Yes,C,Three,14,TRUE,e,55.1435014465824 +Low,NA,Yes,A,One,25,TRUE,t,45.6614735303447 +Medium,NA,NA,B,Two,90,FALSE,n,95.6833345349878 +Medium,NA,NA,B,Two,91,FALSE,v,45.3334156190977 +Low,NA,NA,A,One,69,FALSE,y,67.7570635452867 +Medium,NA,Yes,B,Two,91,TRUE,z,57.2633401956409 +High,Blue,NA,C,Three,57,FALSE,e,10.2924682665616 +Low,NA,Yes,A,One,92,TRUE,s,89.9824970401824 +High,Blue,NA,C,Three,9,FALSE,y,24.608773435466 +High,Blue,Yes,C,Three,93,TRUE,y,4.20595335308462 +Low,NA,Yes,A,One,99,TRUE,i,32.7920719282702 +Low,NA,Yes,A,One,72,TRUE,c,95.4503649147227 +Low,NA,Yes,A,One,26,TRUE,h,88.9539316063747 +Low,NA,NA,A,One,7,FALSE,z,69.28034061566 +High,Blue,Yes,C,Three,42,TRUE,g,64.0506813768297 +Medium,NA,Yes,B,Two,9,TRUE,j,99.4269776623696 +High,Blue,Yes,C,Three,83,TRUE,i,65.5705799115822 +Medium,NA,Yes,B,Two,36,TRUE,s,70.8530468167737 +Low,NA,NA,A,One,78,FALSE,d,54.4066024711356 +Medium,NA,NA,B,Two,81,FALSE,n,59.414202044718 +High,Blue,Yes,C,Three,43,TRUE,q,28.915973729454 +Medium,NA,NA,B,Two,76,FALSE,k,14.7113647311926 +Low,NA,Yes,A,One,15,TRUE,g,96.3024232536554 +High,Blue,NA,C,Three,32,FALSE,u,90.2299045119435 +High,Blue,Yes,C,Three,7,TRUE,l,69.0705278422683 +Low,NA,NA,A,One,9,FALSE,o,79.5467417687178 +High,Blue,NA,C,Three,41,FALSE,j,2.46136845089495 +Medium,NA,Yes,B,Two,74,TRUE,m,47.7795971091837 +Low,NA,Yes,A,One,23,TRUE,g,75.8459537522867 +High,Blue,Yes,C,Three,27,TRUE,i,21.6407935833558 +Low,NA,Yes,A,One,60,TRUE,i,31.8181007634848 +Low,NA,NA,A,One,53,FALSE,j,23.1625785352662 +Medium,NA,Yes,B,Two,7,TRUE,w,14.2800022382289 +High,Blue,NA,C,Three,53,FALSE,u,41.4546335814521 +High,Blue,NA,C,Three,27,FALSE,g,41.3724326295778 +Low,NA,Yes,A,One,96,TRUE,u,36.8845450924709 +High,Blue,Yes,C,Three,38,TRUE,f,15.2444747742265 +Low,NA,Yes,A,One,89,TRUE,y,13.880606344901 +High,Blue,Yes,C,Three,34,TRUE,b,23.3034099452198 +Medium,NA,NA,B,Two,93,FALSE,e,46.5962450252846 +Low,NA,Yes,A,One,69,TRUE,h,26.5972640365362 +Medium,NA,Yes,B,Two,72,TRUE,l,85.7827715342864 +Low,NA,NA,A,One,76,FALSE,m,4.58311666734517 +Low,NA,Yes,A,One,63,TRUE,r,44.2200074205175 +High,Blue,Yes,C,Three,13,TRUE,a,79.8924845643342 +Low,NA,Yes,A,One,82,TRUE,y,12.189925997518 +Medium,NA,Yes,B,Two,97,TRUE,y,56.0947983758524 +Low,NA,NA,A,One,91,FALSE,f,20.653138961643 +Low,NA,NA,A,One,25,FALSE,u,12.7531650243327 +High,Blue,Yes,C,Three,38,TRUE,o,75.3307864302769 +Low,NA,NA,A,One,21,FALSE,i,89.50453591533 +Medium,NA,Yes,B,Two,79,TRUE,o,37.4462775886059 +Low,NA,Yes,A,One,41,TRUE,z,66.5115194628015 +High,Blue,NA,C,Three,47,FALSE,p,9.48406609240919 +Low,NA,NA,A,One,90,FALSE,t,38.3969637798145 +High,Blue,Yes,C,Three,60,TRUE,f,27.438364457339 +Medium,NA,Yes,B,Two,95,TRUE,k,81.4640038879588 +High,Blue,NA,C,Three,16,FALSE,h,44.8516341391951 +Medium,NA,Yes,B,Two,94,TRUE,v,81.0064353048801 +Medium,NA,Yes,B,Two,6,TRUE,v,81.2389509519562 +High,Blue,Yes,C,Three,72,TRUE,g,79.4342321110889 +Medium,NA,Yes,B,Two,86,TRUE,p,43.9831687603146 +Medium,NA,NA,B,Two,86,FALSE,q,75.4475158639252 +High,Blue,Yes,C,Three,39,TRUE,v,62.9221131559461 +High,Blue,Yes,C,Three,31,TRUE,r,71.0182401351631 +Low,NA,Yes,A,One,81,TRUE,q,0.0624773325398564 +Medium,NA,Yes,B,Two,50,TRUE,b,47.5316574098542 +Medium,NA,NA,B,Two,34,FALSE,d,22.0118885161355 +Low,NA,NA,A,One,4,FALSE,m,37.9816537722945 +Medium,NA,Yes,B,Two,13,TRUE,e,61.2771003274247 +Low,NA,NA,A,One,69,FALSE,v,35.1797909243032 +Low,NA,NA,A,One,25,FALSE,s,11.1135424347594 +Medium,NA,NA,B,Two,52,FALSE,y,24.3619472719729 +High,Blue,NA,C,Three,22,FALSE,t,66.805558744818 +High,Blue,Yes,C,Three,89,TRUE,v,41.7646779678762 +Low,NA,NA,A,One,32,FALSE,y,78.8195834029466 +Medium,NA,NA,B,Two,25,FALSE,n,10.2864644257352 +Low,NA,NA,A,One,87,FALSE,y,43.4892741497606 +Medium,NA,Yes,B,Two,35,TRUE,w,98.4956979984418 +Low,NA,Yes,A,One,40,TRUE,c,89.3051114398986 +High,Blue,NA,C,Three,30,FALSE,h,88.6469060787931 +High,Blue,Yes,C,Three,12,TRUE,p,17.5052650272846 +Medium,NA,NA,B,Two,31,FALSE,l,13.0695691565052 +High,Blue,NA,C,Three,30,FALSE,y,65.3101925039664 +Low,NA,Yes,A,One,64,TRUE,n,34.3516472261399 +Medium,NA,NA,B,Two,99,FALSE,c,65.6758127966896 +Medium,NA,NA,B,Two,14,FALSE,n,32.0373242488131 +High,Blue,Yes,C,Three,93,TRUE,g,18.7691119266674 +Medium,NA,Yes,B,Two,96,TRUE,c,78.2294301316142 +Low,NA,NA,A,One,71,FALSE,w,9.35949867125601 +High,Blue,Yes,C,Three,67,TRUE,v,46.677904156968 +High,Blue,Yes,C,Three,23,TRUE,z,51.1505459900945 diff --git a/tests/testthat/data_files/STANDARDISE/std_3_d.rda b/tests/testthat/data_files/STANDARDISE/std_3_d.rda new file mode 100644 index 0000000000000000000000000000000000000000..4165f09836d3a23ed62cd6ad8e3179416d1a16d5 GIT binary patch literal 2343 zcmV+?3E1`@iwFP!000001I1bCb{t0)F3m{sLa;;(goG_DAz?{T`(lWGckuw-lRUw{3jz!(6=R@f@LfakgS;r6E9+{V$%*aBMl51!?bCy!>6k2ZJr52riR zS@mx9=43ui*cVUm=J{EJ^>|&L%w`8iSFYeO`DF9fWb5#tX1R89WPm3 z{ixWFDtx6_msIl`E!eMAd`f&&eX~-%tND(q_sg)FM`?4t92`^JgPlyh(F?%Yo6@ta7MB=)W8 z_U_#~%?GCxTHD&2%;$+q)-oqwn^&WMvGyN*{={)-!MWo{px^o9Wh%9$GfE=Iwde(9 zwmPKdVEH7IPo6E+`x-C59eO{+Z%FQPkeqWLwZ$1yjp?oglQ)5Hl*0P55StjSKG8gf?UmHrQv1K+am;XRnHU3~a zXOAPDKab!?n$6`!rXJko@NV}j6<%Ki%3)p)`FcMz5jISRZu*Z@0 z4@IH+XUx;5e#YR3@$%m=b;5re^bSaaE`g{Fe*{vXA1CY?(4Rqn0bNVa2>L5K`Hjy0 z2HFC>0eTj6F7cpue*k?S^fu_-#PdziI_SA{md^bU^jk6q^mWkBK{ma&3Ht}=2cQo? z-%WC_CAq}&yr^z0Yv6-CxcXD*%^~XdnB>%jd{6R8TlZGWK}{sj6S=ta=CKu;zQd65Z2EQb`_m~l7g_)B3aogtV!sHhpNsg^ z*wDANtRa1t$vLacMf~p92DSuSX2UZ1pQq-x+qrhoF6QlGzg=v#i+{F@S-ZH`E;nL^uF5d zE%M=`9_PNSTRq!v_8fJ)r&v}aM`tqZ%v(L72R)rLlHL1JFSwb{hdssa^%grlJ|Fka z%zCe~?b|&|r@hbn`E%GilV9!kkQ|+Uj%A(qhWTvuke%MEqnIY}dRyn>rE9=L}tCp_TMY?Tt+L4EihE-*R= zQF3R5j~X%1)@Ey!;zG(0f|Ure&0>}2(!syM!o#i$E^sY0;t`EiGIo3n*G@*`m6y@0 znAI`diGZ{+h=9x5E5|L4K_v}sGy8SrxzPdMF$k^BPE#<}dT*7G2(avo4IJT!UPGd> zIb*ftK?W&=M^J*vN@^IZy;ewt5Tup`?xPaWTm|JtHdezZ_ytBOI45RxwB^!AA1O28 zjqbcdDnJiQFTCi^!zvUA3<$xYW(qt8WSaJn00S}}xQ;G5;CP?$OGfYz5p@OQy81Da z!+xWcQxQ%I1rA*t!c9DsEO3wV0dMDnFX~pK;G*(I8VC`tz#|IR;jGZI%Rqw+SIBVW zt3Wy10;g=!1}>A)c5*G(8YpTLp7XxI9b)RKkD!J+oN@#`37Kz;%wGpi$_wHYU1u%| zh04|=IRmP_z;PN$BGE*G&g?DP0xHX({Dml-ZxxaUIEorE6gjD^;V0b)k)?K$-C&Un zNNyn!VjEO<-f^%+05!sH3*2*spcnz)ApdivSoAvyS1JJ?bnVDVj>JJOqZJ`Vh*dsNw+n5bx&$0-;T?zQW}zO5 zkL*Bimk50qLod<2;Bz6VPP>Bklxo6bN@%oA7YiKDwJ_9actRC8>K3yMEvD4c2Tp~l z!5{6h%PHUi;S10cuM0haYZL>lMH>?xAH-H0r4cVFLN^S6qe4+}h?SRJ=>kV3qIDrd zkSbYjfV~0``2zVV3mju6qVHJBs`5PMByuMNlt9S*gXKO@T3Ij;T`~kT9wAXd_{PM} zpTN;EBBmTV#%UZ*qalw7`z|>HqG(X%@InMLSK#CxiGqm%6Tgi*a4QT(4Z6A3#p#IP zMh|slQCU$?RKOja5S(1O5vdPvqE{%~J-UjHh&r+!R}x9B+!r`95B`N0V(7dd9kf&w zE{rDIjg=UxMPrCCiN|H@?^?Q7L#~c5uX7CG(%=j*pN3Jz(p_?aASU- Ne*r@x227A5001-BpXmSq literal 0 HcmV?d00001 diff --git a/tests/testthat/data_files/molgenis_armadillo-upload_testing_datasets.R b/tests/testthat/data_files/molgenis_armadillo-upload_testing_datasets.R index 9e129971..e6dac369 100644 --- a/tests/testthat/data_files/molgenis_armadillo-upload_testing_datasets.R +++ b/tests/testthat/data_files/molgenis_armadillo-upload_testing_datasets.R @@ -1,4 +1,4 @@ -# +# # Molgenis' Armadillo - Upload Testing Datasets # @@ -15,7 +15,7 @@ upload_testing_dataset_table <- function(project_name, folder_name, table_name, MolgenisArmadillo::armadillo.login_basic(armadillo = 'http://127.0.0.1:8080', username = "admin", password = "admin") if (! 'datashield' %in% MolgenisArmadillo::armadillo.list_projects()) - MolgenisArmadillo::armadillo.create_project('datashield') + MolgenisArmadillo::armadillo.create_project('datashield', overwrite_existing = "no") upload_testing_dataset_table('datashield', 'anthro', 'anthro1', 'ANTHRO/anthro1.rda') upload_testing_dataset_table('datashield', 'anthro', 'anthro2', 'ANTHRO/anthro2.rda') @@ -69,4 +69,12 @@ upload_testing_dataset_table('datashield', 'testing', 'DATASET1', 'TESTING/DATAS upload_testing_dataset_table('datashield', 'testing', 'DATASET2', 'TESTING/DATASET2.rda') upload_testing_dataset_table('datashield', 'testing', 'DATASET3', 'TESTING/DATASET3.rda') +upload_testing_dataset_table('datashield', 'standardise', 'std_1', 'STANDARDISE/std_1.rda') +upload_testing_dataset_table('datashield', 'standardise', 'std_2', 'STANDARDISE/std_2.rda') +upload_testing_dataset_table('datashield', 'standardise', 'std_3', 'STANDARDISE/std_3.rda') + +upload_testing_dataset_table('datashield', 'standardise', 'std_1_d', 'STANDARDISE/std_1.rda') +upload_testing_dataset_table('datashield', 'standardise', 'std_2_d', 'STANDARDISE/std_2.rda') +upload_testing_dataset_table('datashield', 'standardise', 'std_3_d', 'STANDARDISE/std_3.rda') + print(MolgenisArmadillo::armadillo.list_tables('datashield')) diff --git a/tests/testthat/data_files/obiba_opal-upload_testing_datasets.R b/tests/testthat/data_files/obiba_opal-upload_testing_datasets.R index ae79d2e6..1fce513b 100644 --- a/tests/testthat/data_files/obiba_opal-upload_testing_datasets.R +++ b/tests/testthat/data_files/obiba_opal-upload_testing_datasets.R @@ -1,4 +1,4 @@ -# +# # Obiba's Opal - Upload Testing Datasets # @@ -9,11 +9,11 @@ library(tibble) upload_testing_dataset_table <- function(opal, project_name, table_name, local_file_path) { if (! opal.project_exists(opal, project_name)) opal.project_create(opal, project_name, database = "mongodb") - + dataset_name <- load(file = local_file_path) dataset <- eval(as.symbol(dataset_name)) data <- as_tibble(dataset, rownames = '_row_id_') - + opal.table_save(opal, data, project_name, table_name, id.name = "_row_id_", force = TRUE) } @@ -72,4 +72,14 @@ upload_testing_dataset_table(opal, 'TESTING', 'DATASET1', 'TESTING/DATASET1.rda' upload_testing_dataset_table(opal, 'TESTING', 'DATASET2', 'TESTING/DATASET2.rda') upload_testing_dataset_table(opal, 'TESTING', 'DATASET3', 'TESTING/DATASET3.rda') +upload_testing_dataset_table('STANDARDISE', 'std_1', 'STANDARDISE/std_1.rda') +upload_testing_dataset_table('STANDARDISE', 'std_2', 'STANDARDISE/std_2.rda') +upload_testing_dataset_table('STANDARDISE', 'std_3', 'STANDARDISE/std_3.rda') + +upload_testing_dataset_table('STANDARDISE', 'std_1_d', 'STANDARDISE/std_1_d.rda') +upload_testing_dataset_table('STANDARDISE', 'std_2_d', 'STANDARDISE/std_2_d.rda') +upload_testing_dataset_table('STANDARDISE', 'std_3_d', 'STANDARDISE/std_3_d.rda') + + + opal.logout(opal) From d6153ec7b5da5ea1c799f68d50871e2db5df354c Mon Sep 17 00:00:00 2001 From: Tim Cadman <41470917+timcadman@users.noreply.github.com> Date: Tue, 7 Oct 2025 15:25:14 +0200 Subject: [PATCH 17/18] helper function to load extra data --- .../init_studies_datasets.R | 36 +++++++++++++++++++ 1 file changed, 36 insertions(+) diff --git a/tests/testthat/connection_to_datasets/init_studies_datasets.R b/tests/testthat/connection_to_datasets/init_studies_datasets.R index e7e412ab..b95ed48a 100644 --- a/tests/testthat/connection_to_datasets/init_studies_datasets.R +++ b/tests/testthat/connection_to_datasets/init_studies_datasets.R @@ -249,6 +249,34 @@ init.studies.dataset.stand <- function(variables) } } +init.studies.dataset.stand_disclosure <- function(variables) +{ + if (ds.test_env$secure_login_details) + { + if (ds.test_env$driver == "OpalDriver") + { + builder <- DSI::newDSLoginBuilder(.silent = TRUE) + builder$append(server = "sim1", url = ds.test_env$ip_address_1, user = ds.test_env$user_1, password = ds.test_env$password_1, table = "STANDARDISE.std_1_d", options=ds.test_env$options_1) + builder$append(server = "sim2", url = ds.test_env$ip_address_2, user = ds.test_env$user_2, password = ds.test_env$password_2, table = "STANDARDISE.std_2_d", options=ds.test_env$options_2) + builder$append(server = "sim3", url = ds.test_env$ip_address_3, user = ds.test_env$user_3, password = ds.test_env$password_3, table = "STANDARDISE.std_3_d", options=ds.test_env$options_3) + ds.test_env$login.data <- builder$build() + } + else if (ds.test_env$driver == "ArmadilloDriver") + { + builder <- DSI::newDSLoginBuilder(.silent = TRUE) + builder$append(server = "sim1", url = ds.test_env$ip_address_1, user = ds.test_env$user_1, password = ds.test_env$password_1, table = "datashield/standardise/std_1_d", driver = ds.test_env$driver) + builder$append(server = "sim2", url = ds.test_env$ip_address_2, user = ds.test_env$user_2, password = ds.test_env$password_2, table = "datashield/standardise/std_2_d", driver = ds.test_env$driver) + builder$append(server = "sim3", url = ds.test_env$ip_address_3, user = ds.test_env$user_3, password = ds.test_env$password_3, table = "datashield/standardise/std_3_d", driver = ds.test_env$driver) + ds.test_env$login.data <- builder$build() + } + else + { + ds.test_env$login.data <- DSLite::setupCNSIMTest("dsBase", env = ds.test_env) + } + ds.test_env$stats.var <- variables + } +} + connect.studies.dataset.cnsim <- function(variables) { @@ -314,6 +342,14 @@ connect.studies.dataset.stand <- function(variables) log.in.data.server() } +connect.studies.dataset.stand_disclosure <- function(variables) +{ + log.out.data.server() + source("connection_to_datasets/login_details.R") + init.studies.dataset.stand_disclosure(variables) + log.in.data.server() +} + disconnect.studies.dataset.cnsim <- function() { log.out.data.server() From a046eb4a0fb47aeb780f1f884f02cc603fff2349 Mon Sep 17 00:00:00 2001 From: Tim Cadman <41470917+timcadman@users.noreply.github.com> Date: Tue, 7 Oct 2025 15:25:37 +0200 Subject: [PATCH 18/18] test: test that disclosive levels are not returned --- tests/testthat/test-smk-standardiseDf.R | 24 ++++++++++++++++++++---- 1 file changed, 20 insertions(+), 4 deletions(-) diff --git a/tests/testthat/test-smk-standardiseDf.R b/tests/testthat/test-smk-standardiseDf.R index 765d655c..d557c325 100644 --- a/tests/testthat/test-smk-standardiseDf.R +++ b/tests/testthat/test-smk-standardiseDf.R @@ -686,11 +686,27 @@ test_that("ds.standardiseDf doesn't run if levels are not identical and fix_clas ) }) +test_that("ds.standardiseDf doesn't run if a factor variable has too many levels", { + connect.studies.dataset.stand_disclosure( + c( + "fac_col1", "fac_col2", "fac_col3", "fac_col4", "fac_col5", "fac_col6", "fac_col7", "fac_col9", + "fac_col10", "col11", "col12", "col13", "col14", "col15", "col16", "col17", "col18", "col19", + "col20") + ) -## Add disclosure check levels -## Push change to dsBase - - + expect_error( + with_mocked_bindings( + ds.standardiseDf( + df = "D", + newobj = "test_fill" + ), + prompt_user_class_decision_all_vars = function(var, server, classes, newobj, datasources) "1", + ask_question_wait_response_levels = function(levels_conflict, newobj, datasources) "2" + ) + ) +}) +disconnect.studies.dataset.stand() +context("ds.standardiseDf::smk::done")