From 366e88b13cf3258fb86412f8e0a6ee4959403916 Mon Sep 17 00:00:00 2001
From: Tim Cadman <41470917+timcadman@users.noreply.github.com>
Date: Mon, 21 Jul 2025 16:49:10 +0200
Subject: [PATCH 01/18] feat: replacement to data frame fill

---
 R/ds.standardiseDf.R | 603 +++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 603 insertions(+)
 create mode 100644 R/ds.standardiseDf.R

diff --git a/R/ds.standardiseDf.R b/R/ds.standardiseDf.R
new file mode 100644
index 00000000..206375cb
--- /dev/null
+++ b/R/ds.standardiseDf.R
@@ -0,0 +1,603 @@
+#' Fill DataFrame with Missing Columns and Adjust Classes
+#'
+#' This function fills a given DataFrame by adding missing columns, ensuring consistent column classes, and adjusting factor levels where necessary.
+#' It performs checks to detect class and factor level conflicts and prompts the user for decisions to resolve these conflicts.
+#'
+#' @param df.name Name of the input DataFrame to fill.
+#' @param newobj Name of the new DataFrame object created after filling.
+#' @param fix_class Character, determines behaviour if class of variables is not the same in all
+#' studies. Option "ask" (default) provides the user with a prompt asking if they want to set the
+#' class across all studies, option "no" will throw an error if class conflicts are present.
+#' @param fix_levels Character, determines behaviour if levels of factor variables is not the same
+#' in all studies. Option "ask" (default) provides the user with a prompt asking if they want to set
+#' the levels of factor variables to be the same across all studies, whilst option "no" will throw
+#' an error if factor variables do not have the same class.
+#' @param datasources Data sources from which to aggregate data. Default is `NULL`.
+#' @importFrom assertthat assert_that
+#' @importFrom DSI datashield.aggregate datashield.assign
+#' @return The filled DataFrame with added columns and adjusted classes or factor levels.
+#' @export
+ds.standardiseDf <- function(df.name = NULL, newobj = NULL, fix_class = "ask", fix_levels = "ask",
+                             datasources = NULL) {
+  fill_warnings <- list()
+
+  .check_arguments(df.name, newobj, fix_class, fix_levels)
+
+  if(is.null(datasources)){
+    datasources <- datashield.connections_find()
+  }
+
+  col_names <- datashield.aggregate(datasources, call("colnamesDS", df.name))
+  .stop_if_cols_identical(col_names)
+
+  var_classes <- .get_var_classes(df.name, datasources)
+  class_conflicts <- .identify_class_conflicts(var_classes)
+
+  datashield.assign(datasources, newobj, as.symbol(df.name))
+
+  if (length(class_conflicts) > 0 & fix_class == "no") {
+    DSI::datashield.aggregate(datasources, call("rmDS", newobj))
+    cli_abort("Variables do not have the same class in all studies and `fix_class` is 'no'")
+  } else if (length(class_conflicts) > 0 & fix_class == "ask") {
+    class_decisions <- prompt_user_class_decision_all_vars(
+      names(class_conflicts),
+      var_classes$server,
+      dplyr::select(var_classes, all_of(names(class_conflicts))),
+      newobj,
+      datasources
+    )
+
+    withCallingHandlers({
+      .fix_classes(newobj, names(class_conflicts), class_decisions, newobj, datasources)
+    }, warning = function(w) {
+      fill_warnings <<- c(fill_warnings, conditionMessage(w))  # Append warning to the list
+      invokeRestart("muffleWarning")  # Suppress immediate display of the warning
+    })
+  }
+
+  unique_cols <- .get_unique_cols(col_names)
+  .add_missing_cols_to_df(newobj, unique_cols, newobj, datasources)
+  new_names <- datashield.aggregate(datasources, call("colnamesDS", newobj))
+  added_cols <- .get_added_cols(col_names, new_names)
+
+  new_classes <- .get_var_classes(newobj, datasources)
+  factor_vars <- .identify_factor_vars(new_classes)
+  factor_levels <- .get_factor_levels(factor_vars, newobj, datasources)
+  level_conflicts <- .identify_level_conflicts(factor_levels)
+
+  if (length(level_conflicts) > 0 & fix_levels == "no") {
+    DSI::datashield.aggregate(datasources, call("rmDS", newobj))
+    cli_abort("Factor variables do not have the same levels in all studies and `fix_levels` is 'no'")
+  } else if (length(level_conflicts) > 0 & fix_levels == "ask") {
+    levels_decision <- ask_question_wait_response_levels(level_conflicts, newobj, datasources)
+  }
+
+  if (levels_decision == "1") {
+    unique_levels <- .get_unique_levels(factor_levels, level_conflicts)
+    .set_factor_levels(newobj, unique_levels, datasources)
+  }
+
+  .print_out_messages(added_cols, class_decisions, names(class_conflicts), unique_levels,
+                      level_conflicts, levels_decision, newobj)
+
+  .handle_warnings(fill_warnings)
+  .print_class_warning(class_conflicts, fix_class, class_decisions)
+}
+
+#' Check Function Arguments for Validity
+#'
+#' This function validates the arguments provided to ensure they meet specified conditions.
+#' It checks that the `fix_class` and `fix_levels` arguments are set to accepted values
+#' and that `df.name` and `newobj` are character strings.
+#'
+#' @param df.name A character string representing the name of the data frame.
+#' @param newobj A character string representing the name of the new object to be created.
+#' @param fix_class A character string indicating the method for handling class issues.
+#'   Must be either `"ask"` or `"no"`.
+#' @param fix_levels A character string indicating the method for handling level issues.
+#'   Must be either `"ask"` or `"no"`.
+#' @return NULL. This function is used for validation and does not return a value.
+#' @importFrom assertthat assert_that
+#' @noRd
+.check_arguments <- function(df.name, newobj, fix_class, fix_levels) {
+  assert_that(fix_class %in% c("ask", "no"))
+  assert_that(fix_levels %in% c("ask", "no"))
+  assert_that(is.character(df.name))
+  assert_that(is.character(newobj))
+}
+
+#' Stop If Columns Are Identical
+#'
+#' Checks if the columns in the data frames are identical and throws an error if they are.
+#'
+#' @param col_names A list of column names from different data sources.
+#' @return None. Throws an error if columns are identical.
+#' @importFrom cli cli_abort
+#' @noRd
+.stop_if_cols_identical <- function(col_names) {
+  are_identical <- all(sapply(col_names, identical, col_names[[1]]))
+  if (are_identical) {
+    cli_abort("Columns are identical in all data frames: nothing to fill")
+  }
+}
+
+#' Get Variable Classes from DataFrame
+#'
+#' Retrieves the class of each variable in the specified DataFrame from different data sources.
+#'
+#' @param df.name Name of the input DataFrame.
+#' @param datasources Data sources from which to aggregate data.
+#' @return A DataFrame containing the variable classes from each data source.
+#' @import dplyr
+#' @noRd
+.get_var_classes <- function(df.name, datasources) {
+  cally <- call("getClassAllColsDS", df.name)
+  classes <- datashield.aggregate(datasources, cally) %>%
+    bind_rows(.id = "server")
+  return(classes)
+}
+
+#' Identify Class Conflicts
+#'
+#' Identifies conflicts in variable classes across different data sources.
+#'
+#' @param classes A DataFrame containing variable classes across data sources.
+#' @return A list of variables that have class conflicts.
+#' @import dplyr
+#' @importFrom purrr map
+#' @noRd
+.identify_class_conflicts <- function(classes) {
+  server <- NULL
+  different_class <- classes |>
+    dplyr::select(-server) |>
+    map(~ unique(na.omit(.)))
+
+  out <- different_class[which(different_class %>% map(length) > 1)]
+  return(out)
+}
+
+#' Prompt User for Class Decision for All Variables
+#'
+#' Prompts the user to resolve class conflicts for all variables.
+#'
+#' @param vars A vector of variable names with class conflicts.
+#' @param all_servers The names of all servers.
+#' @param all_classes The classes of the variables across servers.
+#' @return A vector of decisions for each variable's class.
+#' @noRd
+prompt_user_class_decision_all_vars <- function(vars, all_servers, all_classes, newobj, datasources) {
+  decisions <- c()
+  for (i in 1:length(vars)) {
+    decisions[i] <- prompt_user_class_decision(vars[i], all_servers, all_classes[[i]], newobj, datasources)
+  }
+  return(decisions)
+}
+
+#' Prompt User for Class Decision for a Single Variable
+#'
+#' Prompts the user to resolve a class conflict for a single variable.
+#'
+#' @param var The variable name with a class conflict.
+#' @param all_servers The names of all servers.
+#' @param all_classes The classes of the variable across servers.
+#' @importFrom cli cli_alert_warning cli_alert_danger
+#' @return A decision for the variable's class.
+#' @noRd
+prompt_user_class_decision <- function(var, servers, classes, newobj, datasources) {
+  cli_alert_warning("`ds.dataFrameFill` requires that all columns have the same class.")
+  cli_alert_danger("Column {.strong {var}} has following classes:")
+  print_all_classes(servers, classes)
+  cli_text("")
+  return(ask_question_wait_response_class(var, newobj, datasources))
+}
+
+#' Print All Server-Class Pairs
+#'
+#' This function prints out a list of server names along with their corresponding
+#' class types. It formats the output with a bullet-point list using the `cli` package.
+#'
+#' @param all_servers A character vector containing the names of servers.
+#' @param all_classes A character vector containing the class types corresponding
+#' to each server.
+#' @return This function does not return a value. It prints the server-class pairs
+#' to the console as a bulleted list.
+#' @importFrom cli cli_ul cli_li cli_end
+#' @noRd
+print_all_classes <- function(all_servers, all_classes) {
+  combined <- paste(all_servers, all_classes, sep = ": ")
+  cli_ul()
+  for (i in 1:length(combined)) {
+    cli_li("{combined[i]}")
+  }
+  cli_end()
+}
+
+#' Ask Question and Wait for Class Response
+#'
+#' Prompts the user with a question and waits for a response related to class decisions.
+#'
+#' @param question The question to ask the user.
+#' @return The user's decision.
+#' @importFrom cli cli_text cli_alert_warning cli_abort
+#' @noRd
+ask_question_wait_response_class <- function(var, newobj, datasources) {
+  readline <- NULL
+  ask_question_class(var)
+  answer <- readline()
+  if (answer == "6") {
+    DSI::datashield.aggregate(datasources, call("rmDS", newobj))
+    cli_abort("Aborted `ds.dataFrameFill`", .call = NULL)
+  } else if (!answer %in% as.character(1:5)) {
+    cli_text("")
+    cli_alert_warning("Invalid input. Please try again.")
+    cli_text("")
+    ask_question_wait_response_class(var, newobj, datasources)
+  } else {
+    return(answer)
+  }
+}
+
+#' Prompt User for Class Conversion Options
+#'
+#' This function prompts the user with options to convert a variable to a specific class (e.g., factor, integer, numeric, character, or logical).
+#' The function provides a list of class conversion options for the specified variable and includes an option to cancel the operation.
+#'
+#' @param var The name of the variable for which the user is prompted to select a class conversion option.
+#'
+#' @importFrom cli cli_alert_info cli_ol
+#' @return None. This function is used for prompting the user and does not return a value.
+#' @examples
+#' ask_question("variable_name")
+#' @noRd
+ask_question_class <- function(var) {
+  cli_alert_info("Would you like to:")
+  class_options <- c("a factor", "an integer", "numeric", "a character", "a logical vector")
+  class_message <- paste0("Convert `{var}` to ", class_options, " in all studies")
+  cli_ol(
+    c(class_message, "Cancel `ds.dataFrameFill` operation")
+  )
+}
+
+#' Fix Variable Classes
+#'
+#' Applies the user's class decisions to fix the classes of variables across different data sources.
+#'
+#' @param df.name The name of the DataFrame.
+#' @param different_classes A list of variables with class conflicts.
+#' @param class_decisions The decisions made by the user.
+#' @param newobj The name of the new DataFrame.
+#' @param datasources Data sources from which to aggregate data.
+#' @return None. Updates the DataFrame with consistent variable classes.
+#' @noRd
+.fix_classes <- function(df.name, different_classes, class_decisions, newobj, datasources) {
+  cally <- call("fixClassDS", df.name, different_classes, class_decisions)
+  datashield.assign(datasources, newobj, cally)
+}
+
+#' Get Unique Columns from Data Sources
+#'
+#' Retrieves all unique columns from the data sources.
+#'
+#' @param col_names A list of column names.
+#' @return A vector of unique column names.
+#' @noRd
+.get_unique_cols <- function(col_names) {
+  return(
+    unique(
+      unlist(col_names)
+    )
+  )
+}
+
+#' Add Missing Columns to DataFrame
+#'
+#' Adds any missing columns to the DataFrame to ensure all columns are present across data sources.
+#'
+#' @param df.name The name of the DataFrame.
+#' @param unique_cols A vector of unique column names.
+#' @param newobj The name of the new DataFrame.
+#' @param datasources Data sources from which to aggregate data.
+#' @return None. Updates the DataFrame with added columns.
+#' @noRd
+.add_missing_cols_to_df <- function(df.name, cols_to_add_if_missing, newobj, datasources) {
+  cally <- call("fixColsDS", df.name, cols_to_add_if_missing)
+  datashield.assign(datasources, newobj, cally)
+}
+
+#' Get Added Columns
+#'
+#' Compares the old and new column names and identifies newly added columns.
+#'
+#' @param old_names A list of old column names.
+#' @param new_names A list of new column names.
+#' @importFrom purrr pmap
+#' @return A list of added column names.
+#' @noRd
+.get_added_cols <- function(old_names, new_names) {
+  list(old_names, new_names) %>%
+    pmap(function(.x, .y) {
+      .y[!.y %in% .x]
+    })
+}
+
+#' Identify Factor Variables
+#'
+#' Identifies which variables are factors in the DataFrame.
+#'
+#' @param var_classes A DataFrame containing variable classes.
+#' @return A vector of factor variables.
+#' @noRd
+.identify_factor_vars <- function(var_classes) {
+  return(
+    var_classes %>%
+      dplyr::filter(row_number() == 1) %>%
+      dplyr::select(where(~ . == "factor"))
+  )
+}
+
+#' Get Factor Levels from Data Sources
+#'
+#' Retrieves the levels of factor variables from different data sources.
+#'
+#' @param factor_vars A vector of factor variables.
+#' @param newobj The name of the new DataFrame.
+#' @param datasources Data sources from which to aggregate data.
+#' @return A list of factor levels.
+#' @noRd
+.get_factor_levels <- function(factor_vars, df, datasources) {
+  cally <- call("getAllLevelsDS", df, names(factor_vars))
+  return(datashield.aggregate(datasources, cally))
+}
+
+#' Identify Factor Level Conflicts
+#'
+#' Identifies conflicts in factor levels across different data sources.
+#'
+#' @param factor_levels A list of factor levels.
+#' @return A list of variables with level conflicts.
+#' @importFrom purrr map_lgl pmap_lgl
+#' @noRd
+.identify_level_conflicts <- function(factor_levels) {
+  levels <- factor_levels %>%
+    pmap_lgl(function(...) {
+      args <- list(...)
+      !all(map_lgl(args[-1], ~ identical(.x, args[[1]])))
+    })
+
+  return(names(levels[levels == TRUE]))
+}
+
+#' Ask Question and Wait for Response on Factor Levels
+#'
+#' Prompts the user with options for resolving factor level conflicts and waits for a response.
+#'
+#' @param level_conflicts A list of variables with factor level conflicts.
+#' @return The user's decision.
+#' @noRd
+ask_question_wait_response_levels <- function(level_conflicts, newobj, datasources) {
+  .make_levels_message(level_conflicts)
+  answer <- readline()
+  if (answer == "3") {
+    DSI::datashield.aggregate(datasources, call("rmDS", newobj))
+    cli_abort("Aborted `ds.dataFrameFill`", .call = NULL)
+  } else if (!answer %in% as.character(1:2)) {
+    cli_alert_warning("Invalid input. Please try again.")
+    cli_alert_info("")
+    .make_levels_message(level_conflicts)
+    return(ask_question_wait_response_levels(level_conflicts, newobj, datasources))
+  } else {
+    return(answer)
+  }
+}
+
+#' Make Factor Level Conflict Message
+#'
+#' Creates a message to alert the user about factor level conflicts and prompt for action.
+#'
+#' @param level_conflicts A list of variables with factor level conflicts.
+#' @importFrom cli cli_alert_warning cli_alert_info cli_ol
+#' @return None. Prints the message to the console.
+#' @noRd
+.make_levels_message <- function(level_conflicts) {
+  cli_alert_warning("Warning: factor variables {level_conflicts} do not have the same levels in all studies")
+  cli_alert_info("Would you like to:")
+  cli_ol(c("Create the missing levels where they are not present", "Do nothing", "Cancel `ds.dataFrameFill` operation"))
+}
+
+#' Get Unique Factor Levels
+#'
+#' Retrieves the unique factor levels for variables with conflicts.
+#'
+#' @param factor_levels A list of factor levels.
+#' @param level_conflicts A list of variables with level conflicts.
+#' @importFrom purrr pmap
+#' @return A list of unique factor levels.
+#' @noRd
+.get_unique_levels <- function(factor_levels, level_conflicts) {
+  unique_levels <- factor_levels %>%
+    map(~ .[level_conflicts]) %>%
+    pmap(function(...) {
+      as.character(c(...))
+    }) %>%
+    map(~ unique(.))
+  return(unique_levels)
+}
+
+#' Set Factor Levels in DataFrame
+#'
+#' Applies the unique factor levels to the DataFrame.
+#'
+#' @param newobj The name of the new DataFrame.
+#' @param unique_levels A list of unique factor levels.
+#' @param datasources Data sources from which to aggregate data.
+#' @return None. Updates the DataFrame with the new factor levels.
+#' @noRd
+.set_factor_levels <- function(df, unique_levels, datasources) {
+  cally <- call("fixLevelsDS", df, names(unique_levels), unique_levels)
+  datashield.assign(datasources, df, cally)
+}
+
+#' Print Out Summary Messages
+#'
+#' Prints summary messages regarding the filled DataFrame, including added columns, class decisions, and factor level adjustments.
+#'
+#' @param added_cols A list of added columns.
+#' @param class_decisions A vector of class decisions.
+#' @param different_classes A list of variables with class conflicts.
+#' @param unique_levels A list of unique factor levels.
+#' @param level_conflicts A list of variables with level conflicts.
+#' @param levels_decision The decision made regarding factor levels.
+#' @param newobj The name of the new DataFrame.
+#' @importFrom cli cli_text
+#' @return None. Prints messages to the console.
+#' @noRd
+.print_out_messages <- function(added_cols, class_decisions, different_classes, unique_levels,
+                                level_conflicts, levels_decision, newobj) {
+  .print_var_recode_message(added_cols, newobj)
+
+  if (length(different_classes) > 0) {
+    .print_class_recode_message(class_decisions, different_classes, newobj)
+    cli_text("")
+  }
+
+  if (length(level_conflicts) > 0 & levels_decision == "1") {
+    .print_levels_recode_message(unique_levels, newobj)
+  }
+}
+
+#' Print Variable Recode Message
+#'
+#' Prints a message summarizing the columns that were added to the DataFrame.
+#'
+#' @param added_cols A list of added columns.
+#' @param newobj The name of the new DataFrame.
+#' @importFrom cli cli_text
+#' @return None. Prints the message to the console.
+#' @noRd
+.print_var_recode_message <- function(added_cols, newobj) {
+  cli_alert_success("The following variables have been added to {newobj}:")
+  added_cols_neat <- added_cols %>% map(~ ifelse(length(.) == 0, "", .))
+  var_message <- paste0(names(added_cols), " --> ", added_cols_neat)
+  for (i in 1:length(var_message)) {
+    cli_alert_info("{var_message[[i]]}")
+  }
+  cli_text("")
+}
+
+#' Print Class Recode Message
+#'
+#' Prints a message summarizing the class decisions that were made for variables with conflicts.
+#'
+#' @param class_decisions A vector of class decisions.
+#' @param different_classes A list of variables with class conflicts.
+#' @param newobj The name of the new DataFrame.
+#' @importFrom cli cli_alert_info cli_alert_success
+#' @return None. Prints the message to the console.
+#' @noRd
+.print_class_recode_message <- function(class_decisions, different_classes, newobj) {
+  choice_neat <- .change_choice_to_string(class_decisions)
+  class_message <- paste0(different_classes, " --> ", choice_neat)
+  cli_alert_success("The following classes have been set for all datasources in {newobj}: ")
+  for (i in 1:length(class_message)) {
+    cli_alert_info("{class_message[[i]]}")
+  }
+}
+
+#' Convert Class Decision Code to String
+#'
+#' This function converts a numeric class decision input (represented as a string)
+#' into the corresponding class type string (e.g., "factor", "integer", "numeric", etc.).
+#' @param class_decision A string representing the class decision. It should be
+#' one of the following values: "1", "2", "3", "4", or "5".
+#' @return A string representing the class type corresponding to the input:
+#' "factor", "integer", "numeric", "character", or "logical".
+#' @noRd
+.change_choice_to_string <- function(class_decision) {
+  case_when(
+    class_decision == "1" ~ "factor",
+    class_decision == "2" ~ "integer",
+    class_decision == "3" ~ "numeric",
+    class_decision == "4" ~ "character",
+    class_decision == "5" ~ "logical"
+  )
+}
+
+#' Print Factor Levels Recode Message
+#'
+#' Prints a message summarizing the factor level decisions that were made for variables with conflicts.
+#'
+#' @param unique_levels A list of unique factor levels.
+#' @param newobj The name of the new DataFrame.
+#' @importFrom cli cli_alert_success cli_alert_info
+#' @return None. Prints the message to the console.
+#' @noRd
+.print_levels_recode_message <- function(unique_levels, newobj) {
+  levels_message <- .make_levels_recode_message(unique_levels)
+  cli_alert_success("The following levels have been set for all datasources in {newobj}: ")
+  for (i in 1:length(levels_message)) {
+    cli_alert_info("{levels_message[[i]]}")
+  }
+}
+
+#' Make Levels Recode Message
+#'
+#' Creates a message to alert the user about factor level recoding.
+#'
+#' @param unique_levels A list of unique factor levels.
+#' @return A formatted string summarizing the level recoding.
+#' @importFrom purrr pmap
+#' @noRd
+.make_levels_recode_message <- function(unique_levels) {
+  return(
+    list(names(unique_levels), unique_levels) %>%
+      pmap(function(.x, .y) {
+        paste0(.x, " --> ", paste0(.y, collapse = ", "))
+      })
+  )
+}
+
+#' Handle Warnings for Class Conversion Issues
+#'
+#' This function iterates through a list of warnings generated during class conversion and
+#' triggers a danger alert if any warnings indicate that the conversion has resulted in `NA` values.
+#'
+#' @param fill_warnings A list or vector of warning messages generated during class conversion.
+#'   If any warnings indicate that `NA` values were introduced, a danger alert will be displayed.
+#' @return NULL. This function is used for its side effects of printing alerts.
+#' @importFrom cli cli_alert_danger
+#' @importFrom stringr str_detect
+#' @noRd
+.handle_warnings <- function(fill_warnings) {
+  if(length(fill_warnings) > 0) {
+    for(i in 1:length(fill_warnings)) {
+      if(str_detect(fill_warnings[[i]], "NAs introduced by coercion")){
+        cli_alert_danger("Class conversion resulted in the creation of NA values.")
+      } else {
+        cli_alert_danger(fill_warnings[[i]])
+      }
+    }
+  }
+}
+
+#' Print Warning for Class Conflicts in Data Conversion
+#'
+#' This function displays a warning when there are class conflicts in a dataset that may have resulted
+#' from incompatible class changes during data conversion. It alerts users to verify column classes,
+#' as incompatible changes could corrupt the data.
+#'
+#' @param class_conflicts A list or vector of conflicting classes identified during conversion.
+#' @param fix_class A string indicating the user's choice for fixing class conflicts. Typically,
+#'   this is "ask" if the user is prompted to confirm class changes.
+#' @param class_decisions A vector of decisions made for class conversions. When any value is not
+#'   "6", it indicates unresolved class conflicts.
+#' @return NULL. This function is used for its side effects of printing alerts.
+#' @importFrom cli cli_alert_warning
+#' @noRd
+.print_class_warning <- function(class_conflicts, fix_class, class_decisions) {
+  if(length(class_conflicts) > 0 & fix_class == "ask" & all(!class_decisions == "6")) {
+    cli_alert_warning("Please check all columns that have changed class. Not all class changes
+    are compatible with all data types, so this could have corrupted the data.")
+  }
+}
+
+readline <- NULL

From 2d251dd706390e32dc929c620dd9e27e1df21901 Mon Sep 17 00:00:00 2001
From: Tim Cadman <41470917+timcadman@users.noreply.github.com>
Date: Mon, 21 Jul 2025 16:50:48 +0200
Subject: [PATCH 02/18] test: added tests for standardise function

---
 tests/testthat/test-smk-standardiseDf.R | 743 ++++++++++++++++++++++++
 1 file changed, 743 insertions(+)
 create mode 100644 tests/testthat/test-smk-standardiseDf.R

diff --git a/tests/testthat/test-smk-standardiseDf.R b/tests/testthat/test-smk-standardiseDf.R
new file mode 100644
index 00000000..92ebd7b1
--- /dev/null
+++ b/tests/testthat/test-smk-standardiseDf.R
@@ -0,0 +1,743 @@
+suppressWarnings(library(DSLite))
+library(purrr)
+library(dplyr)
+library(dsBase)
+library(dsBaseClient)
+library(purrr)
+library(dsTidyverse)
+# devtools::load_all("~/Library/Mobile Documents/com~apple~CloudDocs/work/repos/dsTidyverse")
+options("datashield.return_errors" = TRUE)
+
+df <- create_mixed_dataframe(n_rows = 100, n_factor_cols = 10, n_other_cols = 10)
+
+df_1 <- df %>% select(1:5, 6, 9, 12, 15, 18) %>%
+  mutate(
+    fac_col2 = factor(fac_col2, levels = c("Blue", "Green")),
+    fac_col4 = as.numeric(fac_col4),
+    fac_col5 = as.logical(fac_col5))
+
+df_2 <- df %>% select(1:5, 7, 10, 13, 16, 19) %>%
+  mutate(
+    fac_col2 = factor(fac_col2, levels = c("Green", "Red")),
+    fac_col3 = factor(fac_col3, levels = "No"),
+    fac_col4 = as.character(fac_col4),
+    fac_col5 = as.integer(fac_col5))
+
+df_3 <- df %>% select(1:5, 11, 14, 17, 20) %>%
+  mutate(
+    fac_col2 = factor(fac_col2, levels = "Blue"),
+    fac_col3 = factor(fac_col3, levels = "Yes"))
+
+options(datashield.env = environment())
+
+dslite.server <- newDSLiteServer(
+  tables = list(
+    df_1 = df_1,
+    df_2 = df_2,
+    df_3 = df_3
+  )
+)
+
+dslite.server$config(defaultDSConfiguration(include = c("dsBase", "dsTidyverse", "dsDanger")))
+dslite.server$aggregateMethod("getClassAllColsDS", "getClassAllColsDS")
+dslite.server$assignMethod("fixClassDS", "fixClassDS")
+dslite.server$assignMethod("fixColsDS", "fixColsDS")
+dslite.server$aggregateMethod("getAllLevelsDS", "getAllLevelsDS")
+dslite.server$assignMethod("fixLevelsDS", "fixLevelsDS")
+
+builder <- DSI::newDSLoginBuilder()
+
+builder$append(
+  server = "server_1",
+  url = "dslite.server",
+  driver = "DSLiteDriver"
+)
+
+builder$append(
+  server = "server_2",
+  url = "dslite.server",
+  driver = "DSLiteDriver"
+)
+
+builder$append(
+  server = "server_3",
+  url = "dslite.server",
+  driver = "DSLiteDriver"
+)
+
+logindata <- builder$build()
+conns <- DSI::datashield.login(logins = logindata, assign = FALSE)
+
+datashield.assign.table(conns["server_1"], "df", "df_1")
+datashield.assign.table(conns["server_2"], "df", "df_2")
+datashield.assign.table(conns["server_3"], "df", "df_3")
+
+datashield.assign.table(conns["server_1"], "df_ident", "df_1")
+datashield.assign.table(conns["server_2"], "df_ident", "df_1")
+datashield.assign.table(conns["server_3"], "df_ident", "df_1")
+
+####################################################################################################
+# Code that will be used in multiple tests
+####################################################################################################
+var_class <- .get_var_classes("df", datasources = conns)
+
+class_conflicts <- .identify_class_conflicts(var_class)
+
+different_classes <- c("fac_col4", "fac_col5")
+
+class_decisions <- c("1", "5")
+
+.fix_classes(
+  df.name = "df",
+  different_classes = different_classes,
+  class_decisions = class_decisions,
+  newobj = "new_classes",
+  datasources = conns)
+
+cols_to_set <- c(
+  "fac_col1", "fac_col2", "fac_col3", "fac_col4", "fac_col5", "fac_col6", "fac_col9", "col12",
+  "col15", "col18", "fac_col7", "fac_col10", "col13", "col16", "col19", "col11", "col14", "col17",
+  "col20")
+
+.add_missing_cols_to_df(
+  df.name = "df",
+  cols_to_add_if_missing = cols_to_set,
+  newobj = "with_new_cols",
+  datasources = conns)
+
+old_cols <- ds.colnames("df")
+
+new_cols <- c("col11", "col12", "col13", "col14", "col15", "col16", "col17", "col18", "col19",
+              "col20", "fac_col1", "fac_col10", "fac_col2", "fac_col3", "fac_col4", "fac_col5",
+              "fac_col6", "fac_col7", "fac_col9")
+
+new_cols_servers <- list(
+  server_1 = new_cols,
+  server_2 = new_cols,
+  server_3 = new_cols
+)
+
+added_cols <- .get_added_cols(old_cols, new_cols_servers)
+
+var_class_fact <- .get_var_classes("with_new_cols", datasources = conns)
+
+fac_vars <- .identify_factor_vars(var_class_fact)
+
+fac_levels <- .get_factor_levels(fac_vars, "with_new_cols", conns)
+
+level_conflicts <- .identify_level_conflicts(fac_levels)
+
+unique_levs <- .get_unique_levels(fac_levels, level_conflicts)
+
+####################################################################################################
+# Tests
+####################################################################################################
+test_that(".stop_if_cols_identical throws error if columns are identical", {
+
+  identical_cols <- list(
+    c("col1", "col2", "col3"),
+    c("col1", "col2", "col3"),
+    c("col1", "col2", "col3")
+  )
+
+  expect_error(
+    .stop_if_cols_identical(identical_cols),
+    "Columns are identical in all data frames: nothing to fill"
+  )
+
+})
+
+test_that(".stop_if_cols_identical doesn't throw error if data frames have different columns", {
+
+  different_cols <- list(
+    c("col1", "col2", "col3"),
+    c("col1", "col2", "col4"),
+    c("col1", "col5", "col3")
+  )
+
+  expect_silent(
+    .stop_if_cols_identical(different_cols)
+  )
+
+})
+
+test_that(".get_var_classes returns correct output", {
+
+  expected <- tibble(
+    server = c("server_1", "server_2", "server_3"),
+    fac_col1 = c("factor", "factor", "factor"),
+    fac_col2 = c("factor", "factor", "factor"),
+    fac_col3 = c("factor", "factor", "factor"),
+    fac_col4 = c("numeric", "character", "factor"),
+    fac_col5 = c("logical", "integer", "factor"),
+    fac_col6 = c("factor", NA, NA),
+    fac_col9 = c("factor", NA, NA),
+    col12 = c("numeric", NA, NA),
+    col15 = c("integer", NA, NA),
+    col18 = c("logical", NA, NA),
+    fac_col7 = c(NA, "factor", NA),
+    fac_col10 = c(NA, "factor", NA),
+    col13 = c(NA, "character", NA),
+    col16 = c(NA, "numeric", NA),
+    col19 = c(NA, "integer", NA),
+    col11 = c(NA, NA, "integer"),
+    col14 = c(NA, NA, "logical"),
+    col17 = c(NA, NA, "character"),
+    col20 = c(NA, NA, "numeric")
+  )
+
+  expect_equal(var_class, expected)
+
+})
+
+test_that(".identify_class_conflicts returns correct output", {
+  expected <- list(
+    fac_col4 = c("numeric", "character", "factor"),
+    fac_col5 = c("logical", "integer", "factor")
+  )
+
+  expect_equal(class_conflicts, expected)
+
+})
+
+test_that("ask_question displays the correct prompt", {
+  expect_snapshot(ask_question_class("my_var"))
+})
+
+test_that("ask_question_wait_response_class continues with valid response", {
+  expect_equal(
+    with_mocked_bindings(
+      ask_question_wait_response_class("a variable"),
+      ask_question_class = function(var) "A question",
+      readline = function() "1"
+    ), "1"
+  )
+})
+
+test_that("ask_question_wait_response_class throws error if option 6 selected", {
+  expect_error(
+    with_mocked_bindings(
+      ask_question_wait_response_class("a variable"),
+      ask_question_class = function(var) "A question",
+      readline = function() "6")
+  )
+})
+
+test_that("print_all_classes prints the correct message", {
+  expect_snapshot(
+    print_all_classes(
+      c("server_1", "server_2", "server_3"),
+      c("numeric", "factor", "integer")
+    )
+  )
+})
+
+test_that("prompt_user_class_decision function properly", {
+  expect_message(
+    with_mocked_bindings(
+      prompt_user_class_decision(
+        var = "test_col",
+        servers = c("server_1", "server_2", "server_3"),
+        classes = c("numeric", "character", "factor"),
+        newobj = "test_df",
+        datasources = datasources),
+      ask_question_wait_response_class = function(var, newobj, datasources) "test_col"
+    )
+  )
+
+  expect_equal(
+    with_mocked_bindings(
+      prompt_user_class_decision(
+        var = "test_col",
+        servers = c("server_1", "server_2", "server_3"),
+        classes = c("numeric", "character", "factor"),
+        newobj = "test_df",
+        datasources = datasources),
+      ask_question_wait_response_class = function(var, newobj, datasources) "test_col"
+    ),
+    "test_col"
+  )
+})
+
+test_that("prompt_user_class_decision_all_vars returns correct value", {
+  expect_equal(
+    with_mocked_bindings(
+      prompt_user_class_decision_all_vars(
+        vars = c("test_var_1", "test_var_2"),
+        all_servers = c("server_1", "server_2", "server_3"),
+        all_classes = tibble(
+          test_var_1 = c("numeric", "character", "factor"),
+          test_var_2 = c("logical", "integer", "factor")
+        ),
+        "test_df",
+        conns),
+      prompt_user_class_decision = function(var, server, classes, newobj, datasources) "1"
+    ),
+    c("1", "1")
+  )
+})
+
+test_that(".fix_classes sets the correct classes in serverside data frame", {
+
+  expect_equal(
+    unname(unlist(ds.class("df$fac_col4"))),
+    c("numeric", "character", "factor")
+  )
+
+  expect_equal(
+    unname(unlist(ds.class("df$fac_col5"))),
+    c("logical", "integer", "factor")
+  )
+
+  expect_equal(
+    unname(unlist(ds.class("new_classes$fac_col4"))),
+    rep("factor", 3)
+  )
+
+  expect_equal(
+    unname(unlist(ds.class("new_classes$fac_col5"))),
+    rep("logical", 3)
+  )
+
+})
+
+test_that(".get_unique_cols extracts unique names from a list", {
+  expect_equal(
+    .get_unique_cols(
+      list(
+        server_1 = c("col_1", "col_2", "col_3"),
+        server_1 = c("col_1", "col_2", "col_4"),
+        server_1 = c("col_2", "col_3", "col_3", "col_5")
+      )
+    ),
+    c("col_1", "col_2", "col_3", "col_4", "col_5")
+  )
+})
+
+test_that(".add_missing_cols_to_df correctly creates missing columns", {
+
+  new_cols <- c("col11", "col12", "col13", "col14", "col15", "col16", "col17", "col18", "col19",
+                "col20", "fac_col1", "fac_col10", "fac_col2", "fac_col3", "fac_col4", "fac_col5",
+                "fac_col6", "fac_col7", "fac_col9")
+
+  observed <- ds.colnames("with_new_cols")
+
+  expected <- list(
+    server_1 = new_cols,
+    server_2 = new_cols,
+    server_3 = new_cols
+  )
+
+  expect_equal(observed, expected)
+})
+
+test_that(".get_added_cols correctly identifies newly added columns", {
+
+  expect_equal(
+    added_cols,
+    list(
+      server_1 = c("col11", "col13", "col14", "col16", "col17", "col19", "col20", "fac_col10", "fac_col7"),
+      server_2 = c("col11", "col12", "col14", "col15", "col17", "col18", "col20", "fac_col6", "fac_col9"),
+      server_3 = c("col12", "col13", "col15", "col16", "col18", "col19", "fac_col10", "fac_col6", "fac_col7", "fac_col9")
+    )
+  )
+})
+
+test_that(".identify_factor_vars correctly identifies factor variables", {
+
+
+
+  var_class_fact <- var_class |> dplyr::select(server: col18)
+  expect_equal(
+    names(fac_vars),
+    c("fac_col1", "fac_col2", "fac_col3", "fac_col6", "fac_col9")
+  )
+})
+
+test_that(".get_factor_levels correctly identifies factor levels", {
+  expected <- list(
+    server_1 = list(
+      fac_col1 = c("High", "Low", "Medium"),
+      fac_col2 = c("Blue", "Green"),
+      fac_col3 = c("No", "Yes"),
+      fac_col6 = c("Bird", "Cat", "Dog"),
+      fac_col9 = c("False", "True")
+    ),
+    server_2 = list(
+      fac_col1 = c("High", "Low", "Medium"),
+      fac_col2 = c("Green", "Red"),
+      fac_col3 = c("No"),
+      fac_col6 = NULL,
+      fac_col9 = NULL
+    ),
+    server_3 = list(
+      fac_col1 = c("High", "Low", "Medium"),
+      fac_col2 = c("Blue"),
+      fac_col3 = c("Yes"),
+      fac_col6 = NULL,
+      fac_col9 = NULL
+    )
+  )
+
+  expect_equal(fac_levels, expected)
+})
+
+test_that(".identify_level_conflicts correctly factor columns with different levels", {
+  expect_equal(
+    .identify_level_conflicts(fac_levels),
+    c("fac_col2", "fac_col3", "fac_col6", "fac_col9")
+  )
+
+})
+
+test_that("ask_question_wait_response_levels continues with valid response", {
+  expect_equal(
+    with_mocked_bindings(
+      suppressWarnings(ask_question_wait_response_levels("test variable", "test_obj", conns)),
+      readline = function() "1"
+    ), "1"
+  )
+
+  expect_equal(
+    with_mocked_bindings(
+      suppressWarnings(ask_question_wait_response_levels("test variable", "test_obj", conns)),
+      readline = function() "1"
+    ), "1"
+  )
+
+})
+
+test_that("ask_question_wait_response_levels aborts with response of 3", {
+  expect_error(
+    with_mocked_bindings(
+      suppressWarnings(ask_question_wait_response_levels("test variable", "test_obj", conns)),
+      readline = function() "3")
+  )
+})
+
+test_that(".make_levels_message makes correct message", {
+  expect_snapshot(.make_levels_message(level_conflicts))
+})
+
+test_that(".get_unique_levels extracts all possible levels", {
+
+  expected <- list(
+    fac_col2 = c("Blue", "Green", "Red"),
+    fac_col3 = c("No", "Yes"),
+    fac_col6 = c("Bird", "Cat", "Dog"),
+    fac_col9 = c("False", "True")
+  )
+
+  expect_equal(unique_levs, expected)
+
+})
+
+test_that(".set_factor_levels sets levels correctly", {
+  .set_factor_levels("with_new_cols", unique_levs,  conns)
+
+  expect_equal(
+    ds.levels("with_new_cols$fac_col2") |> map(~.x[[1]]),
+    list(
+      server_1 = c("Blue", "Green", "Red"),
+      server_2 = c("Blue", "Green", "Red"),
+      server_3 = c("Blue", "Green", "Red")
+    )
+  )
+
+  expect_equal(
+    ds.levels("with_new_cols$fac_col3") |> map(~.x[[1]]),
+    list(
+      server_1 = c("No", "Yes"),
+      server_2 = c("No", "Yes"),
+      server_3 = c("No", "Yes")
+    )
+  )
+
+  expect_equal(
+    ds.levels("with_new_cols$fac_col6") |> map(~.x[[1]]),
+    list(
+      server_1 = c("Bird", "Cat", "Dog"),
+      server_2 = c("Bird", "Cat", "Dog"),
+      server_3 = c("Bird", "Cat", "Dog")
+    )
+  )
+
+  expect_equal(
+    ds.levels("with_new_cols$fac_col9") |> map(~.x[[1]]),
+    list(
+      server_1 = c("False", "True"),
+      server_2 = c("False", "True"),
+      server_3 = c("False", "True")
+    )
+  )
+
+})
+
+test_that(".print_var_recode_message prints the correct message", {
+  expect_snapshot(.print_var_recode_message(added_cols, "test_df"))
+})
+
+test_that(".print_class_recode_message prints the correct message", {
+  expect_snapshot(
+    .print_class_recode_message(class_decisions, different_classes, "test_df")
+  )
+})
+
+test_that(".print_levels_recode_message prints the correct message", {
+  expect_snapshot(
+    .print_levels_recode_message(unique_levs, "test_df")
+  )
+})
+
+test_that(".make_levels_recode_message prints the correct message", {
+  expect_equal(
+    .make_levels_recode_message(unique_levs),
+    list(
+      "fac_col2 --> Blue, Green, Red",
+      "fac_col3 --> No, Yes",
+      "fac_col6 --> Bird, Cat, Dog",
+      "fac_col9 --> False, True"
+    )
+  )
+})
+
+test_that(".print_out_messages prints the correct messages", {
+  expect_snapshot(
+    .print_out_messages(
+      added_cols, class_decisions, different_classes, unique_levs, level_conflicts, "1", "test_df"
+    )
+  )
+})
+
+test_that(".change_choice_to_string converts numeric class codes to strings correctly", {
+  expect_equal(.change_choice_to_string("1"), "factor")
+  expect_equal(.change_choice_to_string("2"), "integer")
+  expect_equal(.change_choice_to_string("3"), "numeric")
+  expect_equal(.change_choice_to_string("4"), "character")
+  expect_equal(.change_choice_to_string("5"), "logical")
+})
+
+test_that("ds.standardiseDf doesn't run if dataframes are identical", {
+  expect_error(
+    ds.standardiseDf(
+      df = "df_ident",
+      newobj = "test_fill"
+    ),
+    "Columns are identical"
+  )
+})
+
+test_that("ds.standardiseDf works when called directly and class conversion is factor", {
+  with_mocked_bindings(
+    ds.standardiseDf(
+      df = "df",
+      newobj = "test_fill"
+    ),
+    prompt_user_class_decision_all_vars = function(var, server, classes, newobj, datasources) "1",
+    ask_question_wait_response_levels = function(levels_conflict, newobj, datasources) "2"
+  )
+
+  expect_equal(
+    ds.class("test_fill$fac_col4")[[1]],
+    "factor"
+  )
+})
+
+test_that("ds.standardiseDf returns warning when called directly and class conversion is integer", {
+  with_mocked_bindings(
+    ds.standardiseDf(
+      df = "df",
+      newobj = "test_fill"
+    ),
+    prompt_user_class_decision_all_vars = function(var, server, classes, newobj, datasources) c("2", "2"),
+    ask_question_wait_response_levels = function(levels_conflict, newobj, datasources) "2"
+  )
+
+  expect_equal(
+    ds.class("test_fill$fac_col4")[[1]],
+    "integer"
+  )
+
+  expect_equal(
+    ds.class("test_fill$fac_col5")[[1]],
+    "integer"
+  )
+})
+
+test_that("ds.standardiseDf returns warning when called directly and class conversion is numeric", {
+  with_mocked_bindings(
+    ds.standardiseDf(
+      df = "df",
+      newobj = "test_fill"
+    ),
+    prompt_user_class_decision_all_vars = function(var, server, classes, newobj, datasources) c("3", "3"),
+    ask_question_wait_response_levels = function(levels_conflict, newobj, datasources) "2"
+  )
+
+  expect_equal(
+    ds.class("test_fill$fac_col4")[[1]],
+    "numeric"
+  )
+
+  expect_equal(
+    ds.class("test_fill$fac_col5")[[1]],
+    "numeric"
+  )
+})
+
+test_that("ds.standardiseDf returns warning when called directly and class conversion is character", {
+  with_mocked_bindings(
+    ds.standardiseDf(
+      df = "df",
+      newobj = "test_fill"
+    ),
+    prompt_user_class_decision_all_vars = function(var, server, classes, newobj, datasources) c("4", "4"),
+    ask_question_wait_response_levels = function(levels_conflict, newobj, datasources) "2"
+  )
+
+  expect_equal(
+    ds.class("test_fill$fac_col4")[[1]],
+    "character"
+  )
+
+  expect_equal(
+    ds.class("test_fill$fac_col5")[[1]],
+    "character"
+  )
+})
+
+test_that("ds.standardiseDf returns warning when called directly and class conversion is logical", {
+  with_mocked_bindings(
+    ds.standardiseDf(
+      df = "df",
+      newobj = "test_fill"
+    ),
+    prompt_user_class_decision_all_vars = function(var, server, classes, newobj, datasources) c("5", "5"),
+    ask_question_wait_response_levels = function(levels_conflict, newobj, datasources) "2"
+  )
+
+  expect_equal(
+    ds.class("test_fill$fac_col4")[[1]],
+    "logical"
+  )
+
+  expect_equal(
+    ds.class("test_fill$fac_col5")[[1]],
+    "logical"
+  )
+})
+
+test_that("ds.standardiseDf changes levels if this option is selected", {
+  with_mocked_bindings(
+    ds.standardiseDf(
+      df = "df",
+      newobj = "test_fill"
+    ),
+    prompt_user_class_decision_all_vars = function(var, server, classes, newobj, datasources) c("1", "1"),
+    ask_question_wait_response_levels = function(levels_conflict, newobj, datasources) "1"
+  )
+
+  levels_2 <- ds.levels("test_fill$fac_col2") %>% map(~.$Levels)
+  levels_3 <- ds.levels("test_fill$fac_col3") %>% map(~.$Levels)
+  levels_4 <- ds.levels("test_fill$fac_col4") %>% map(~.$Levels)
+  levels_5 <- ds.levels("test_fill$fac_col5") %>% map(~.$Levels)
+  levels_6 <- ds.levels("test_fill$fac_col6") %>% map(~.$Levels)
+  levels_9 <- ds.levels("test_fill$fac_col9") %>% map(~.$Levels)
+
+  expect_equal(
+    levels_2,
+    list(
+      server_1 = c("Blue", "Green", "Red"),
+      server_2 = c("Blue", "Green", "Red"),
+      server_3 = c("Blue", "Green", "Red")
+    )
+  )
+
+  expect_equal(
+    levels_3,
+    list(
+      server_1 = c("No", "Yes"),
+      server_2 = c("No", "Yes"),
+      server_3 = c("No", "Yes")
+    )
+  )
+
+  expect_equal(
+    levels_4,
+    list(
+      server_1 = c("1", "2", "3", "A", "B", "C"),
+      server_2 = c("1", "2", "3", "A", "B", "C"),
+      server_3 = c("1", "2", "3", "A", "B", "C")
+    )
+  )
+
+  expect_equal(
+    levels_5,
+    list(
+      server_1 = c("1", "2", "3", "One", "Three", "Two"),
+      server_2 = c("1", "2", "3", "One", "Three", "Two"),
+      server_3 = c("1", "2", "3", "One", "Three", "Two")
+    )
+  )
+
+  expect_equal(
+    levels_6,
+    list(
+      server_1 = c("Bird", "Cat", "Dog"),
+      server_2 = c("Bird", "Cat", "Dog"),
+      server_3 = c("Bird", "Cat", "Dog")
+    )
+  )
+
+  expect_equal(
+    levels_9,
+    list(
+      server_1 = c("False", "True"),
+      server_2 = c("False", "True"),
+      server_3 = c("False", "True")
+    )
+  )
+
+})
+
+test_that("ds.standardiseDf doesn't run if classes are not identical and fix_class is no", {
+  expect_error(
+    ds.standardiseDf(
+      df = "df",
+      newobj = "shouldnt_exist",
+      fix_class = "no"
+    ),
+    "Variables do not have the same class in all studies"
+  )
+
+  expect_equal(
+    ds.exists("shouldnt_exist")[[1]],
+    FALSE
+  )
+})
+
+test_that("ds.standardiseDf doesn't run if levels are not identical and fix_class is no", {
+  expect_error(
+    with_mocked_bindings(
+      ds.standardiseDf(
+        df = "df",
+        newobj = "shouldnt_exist",
+        fix_levels = "no"
+      ),
+      prompt_user_class_decision_all_vars = function(var, server, classes, newobj, datasources) c("1", "1")
+    ),
+    "Factor variables do not have the same levels in all studies"
+  )
+
+  expect_equal(
+    ds.exists("shouldnt_exist")[[1]],
+    FALSE
+  )
+})
+
+
+## 9. Handle incorrect response for level fix
+
+
+
+

From 5fac73bdbe9836a20a2066e1483d6c35f9ffca26 Mon Sep 17 00:00:00 2001
From: Tim Cadman <41470917+timcadman@users.noreply.github.com>
Date: Mon, 21 Jul 2025 16:56:02 +0200
Subject: [PATCH 03/18] docs: updated following check

---
 DESCRIPTION             |  9 +++++++--
 NAMESPACE               | 20 ++++++++++++++++++++
 man/ds.standardiseDf.Rd | 37 +++++++++++++++++++++++++++++++++++++
 3 files changed, 64 insertions(+), 2 deletions(-)
 create mode 100644 man/ds.standardiseDf.Rd

diff --git a/DESCRIPTION b/DESCRIPTION
index 882df32c..102604cd 100644
--- a/DESCRIPTION
+++ b/DESCRIPTION
@@ -54,7 +54,11 @@ Imports:
     gridExtra,
     data.table,
     methods,
-    dplyr
+    dplyr,
+    cli,
+    purrr,
+    stringr, 
+    assertthat
 Suggests:
     lme4,
     httr,
@@ -64,6 +68,7 @@ Suggests:
     DescTools,
     DSOpal,
     DSMolgenisArmadillo,
-    DSLite
+    DSLite,
+    assertthat
 RoxygenNote: 7.3.2
 Encoding: UTF-8
diff --git a/NAMESPACE b/NAMESPACE
index d737d5e6..7919f665 100644
--- a/NAMESPACE
+++ b/NAMESPACE
@@ -104,6 +104,7 @@ export(ds.seq)
 export(ds.setSeed)
 export(ds.skewness)
 export(ds.sqrt)
+export(ds.standardiseDf)
 export(ds.summary)
 export(ds.table)
 export(ds.table1D)
@@ -117,7 +118,26 @@ export(ds.var)
 export(ds.vectorCalc)
 import(DSI)
 import(data.table)
+import(dplyr)
+importFrom(DSI,datashield.aggregate)
+importFrom(DSI,datashield.assign)
+importFrom(assertthat,assert_that)
+importFrom(cli,cli_abort)
+importFrom(cli,cli_alert_danger)
+importFrom(cli,cli_alert_info)
+importFrom(cli,cli_alert_success)
+importFrom(cli,cli_alert_warning)
+importFrom(cli,cli_end)
+importFrom(cli,cli_li)
+importFrom(cli,cli_ol)
+importFrom(cli,cli_text)
+importFrom(cli,cli_ul)
+importFrom(purrr,map)
+importFrom(purrr,map_lgl)
+importFrom(purrr,pmap)
+importFrom(purrr,pmap_lgl)
 importFrom(stats,as.formula)
 importFrom(stats,na.omit)
 importFrom(stats,ts)
 importFrom(stats,weighted.mean)
+importFrom(stringr,str_detect)
diff --git a/man/ds.standardiseDf.Rd b/man/ds.standardiseDf.Rd
new file mode 100644
index 00000000..4f544f1d
--- /dev/null
+++ b/man/ds.standardiseDf.Rd
@@ -0,0 +1,37 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/ds.standardiseDf.R
+\name{ds.standardiseDf}
+\alias{ds.standardiseDf}
+\title{Fill DataFrame with Missing Columns and Adjust Classes}
+\usage{
+ds.standardiseDf(
+  df.name = NULL,
+  newobj = NULL,
+  fix_class = "ask",
+  fix_levels = "ask",
+  datasources = NULL
+)
+}
+\arguments{
+\item{df.name}{Name of the input DataFrame to fill.}
+
+\item{newobj}{Name of the new DataFrame object created after filling.}
+
+\item{fix_class}{Character, determines behaviour if class of variables is not the same in all
+studies. Option "ask" (default) provides the user with a prompt asking if they want to set the
+class across all studies, option "no" will throw an error if class conflicts are present.}
+
+\item{fix_levels}{Character, determines behaviour if levels of factor variables is not the same
+in all studies. Option "ask" (default) provides the user with a prompt asking if they want to set
+the levels of factor variables to be the same across all studies, whilst option "no" will throw
+an error if factor variables do not have the same class.}
+
+\item{datasources}{Data sources from which to aggregate data. Default is `NULL`.}
+}
+\value{
+The filled DataFrame with added columns and adjusted classes or factor levels.
+}
+\description{
+This function fills a given DataFrame by adding missing columns, ensuring consistent column classes, and adjusting factor levels where necessary.
+It performs checks to detect class and factor level conflicts and prompts the user for decisions to resolve these conflicts.
+}

From c2f6c427d6f0f9289101270d8ba8860e7b9e3d58 Mon Sep 17 00:00:00 2001
From: Tim Cadman <41470917+timcadman@users.noreply.github.com>
Date: Tue, 22 Jul 2025 09:58:11 +0200
Subject: [PATCH 04/18] removed dependency on stringr

---
 DESCRIPTION          | 3 +--
 NAMESPACE            | 1 -
 R/ds.standardiseDf.R | 7 +++----
 3 files changed, 4 insertions(+), 7 deletions(-)

diff --git a/DESCRIPTION b/DESCRIPTION
index 102604cd..a3a4ceaf 100644
--- a/DESCRIPTION
+++ b/DESCRIPTION
@@ -57,13 +57,12 @@ Imports:
     dplyr,
     cli,
     purrr,
-    stringr, 
     assertthat
 Suggests:
     lme4,
     httr,
     tibble,
-    testthat,
+    testthat (>= 3.0.0),
     e1071,
     DescTools,
     DSOpal,
diff --git a/NAMESPACE b/NAMESPACE
index 7919f665..59da737b 100644
--- a/NAMESPACE
+++ b/NAMESPACE
@@ -140,4 +140,3 @@ importFrom(stats,as.formula)
 importFrom(stats,na.omit)
 importFrom(stats,ts)
 importFrom(stats,weighted.mean)
-importFrom(stringr,str_detect)
diff --git a/R/ds.standardiseDf.R b/R/ds.standardiseDf.R
index 206375cb..4ba24f90 100644
--- a/R/ds.standardiseDf.R
+++ b/R/ds.standardiseDf.R
@@ -565,12 +565,11 @@ ask_question_wait_response_levels <- function(level_conflicts, newobj, datasourc
 #'   If any warnings indicate that `NA` values were introduced, a danger alert will be displayed.
 #' @return NULL. This function is used for its side effects of printing alerts.
 #' @importFrom cli cli_alert_danger
-#' @importFrom stringr str_detect
 #' @noRd
 .handle_warnings <- function(fill_warnings) {
-  if(length(fill_warnings) > 0) {
-    for(i in 1:length(fill_warnings)) {
-      if(str_detect(fill_warnings[[i]], "NAs introduced by coercion")){
+  if (length(fill_warnings) > 0) {
+    for (i in seq_along(fill_warnings)) {
+      if (grepl("NAs introduced by coercion", fill_warnings[[i]])) {
         cli_alert_danger("Class conversion resulted in the creation of NA values.")
       } else {
         cli_alert_danger(fill_warnings[[i]])

From 574e54d9b4e780fda08d24f16cd01f3148443c81 Mon Sep 17 00:00:00 2001
From: Tim Cadman <41470917+timcadman@users.noreply.github.com>
Date: Tue, 22 Jul 2025 09:58:47 +0200
Subject: [PATCH 05/18] added helper file, saved snapshots

---
 tests/testthat/_snaps/smk-standardiseDf.md |  86 ++++++++
 tests/testthat/helpers.R                   | 218 +++++++++++++++++++++
 tests/testthat/test-smk-standardiseDf.R    |   3 +
 3 files changed, 307 insertions(+)
 create mode 100644 tests/testthat/_snaps/smk-standardiseDf.md
 create mode 100644 tests/testthat/helpers.R

diff --git a/tests/testthat/_snaps/smk-standardiseDf.md b/tests/testthat/_snaps/smk-standardiseDf.md
new file mode 100644
index 00000000..ac1558bd
--- /dev/null
+++ b/tests/testthat/_snaps/smk-standardiseDf.md
@@ -0,0 +1,86 @@
+# ask_question displays the correct prompt
+
+    Code
+      ask_question_class("my_var")
+    Message
+      i Would you like to:
+      1. Convert `my_var` to a factor in all studies
+      2. Convert `my_var` to an integer in all studies
+      3. Convert `my_var` to numeric in all studies
+      4. Convert `my_var` to a character in all studies
+      5. Convert `my_var` to a logical vector in all studies
+      6. Cancel `ds.dataFrameFill` operation
+
+# print_all_classes prints the correct message
+
+    Code
+      print_all_classes(c("server_1", "server_2", "server_3"), c("numeric", "factor",
+        "integer"))
+    Message
+      * server_1: numeric
+      * server_2: factor
+      * server_3: integer
+
+# .make_levels_message makes correct message
+
+    Code
+      .make_levels_message(level_conflicts)
+    Message
+      ! Warning: factor variables fac_col2, fac_col3, fac_col6, and fac_col9 do not have the same levels in all studies
+      i Would you like to:
+      1. Create the missing levels where they are not present
+      2. Do nothing
+      3. Cancel `ds.dataFrameFill` operation
+
+# .print_var_recode_message prints the correct message
+
+    Code
+      .print_var_recode_message(added_cols, "test_df")
+    Message
+      v The following variables have been added to test_df:
+      i server_1 --> col11
+      i server_2 --> col11
+      i server_3 --> col12
+      
+
+# .print_class_recode_message prints the correct message
+
+    Code
+      .print_class_recode_message(class_decisions, different_classes, "test_df")
+    Message
+      v The following classes have been set for all datasources in test_df: 
+      i fac_col4 --> factor
+      i fac_col5 --> logical
+
+# .print_levels_recode_message prints the correct message
+
+    Code
+      .print_levels_recode_message(unique_levs, "test_df")
+    Message
+      v The following levels have been set for all datasources in test_df: 
+      i fac_col2 --> Blue, Green, Red
+      i fac_col3 --> No, Yes
+      i fac_col6 --> Bird, Cat, Dog
+      i fac_col9 --> False, True
+
+# .print_out_messages prints the correct messages
+
+    Code
+      .print_out_messages(added_cols, class_decisions, different_classes, unique_levs,
+        level_conflicts, "1", "test_df")
+    Message
+      v The following variables have been added to test_df:
+      i server_1 --> col11
+      i server_2 --> col11
+      i server_3 --> col12
+      
+      v The following classes have been set for all datasources in test_df: 
+      i fac_col4 --> factor
+      i fac_col5 --> logical
+      
+      v The following levels have been set for all datasources in test_df: 
+      i fac_col2 --> Blue, Green, Red
+      i fac_col3 --> No, Yes
+      i fac_col6 --> Bird, Cat, Dog
+      i fac_col9 --> False, True
+
diff --git a/tests/testthat/helpers.R b/tests/testthat/helpers.R
new file mode 100644
index 00000000..919df923
--- /dev/null
+++ b/tests/testthat/helpers.R
@@ -0,0 +1,218 @@
+#' Create a DSLite login object that can be used for testing
+#'
+#' @param assign_method A string specifying the name of the custom assign method to be added
+#'   to the DSLite server. If `NULL`, no additional assign method is added. Default is `NULL`.
+#' @param aggregate_method A string specifying the name of the custom aggregate method to be
+#'   added to the DSLite server. If `NULL`, no additional aggregate method is added. Default is `NULL`.
+#' @param tables A named list of tables to be made available on the DSLite server. Default is `NULL`.
+#'
+#' @return A DataSHIELD login object containing the necessary connection information for the DSLite server.
+#'
+#' @examples
+#' \dontrun{
+#' # Prepare a DSLite server with default methods and custom assign/aggregate methods
+#' login_data <- .prepare_dslite(
+#'   assign_method = "customAssign",
+#'   aggregate_method = "customAggregate",
+#'   tables = list(mtcars = mtcars, mtcars_group = mtcars_group)
+#'   )
+#'
+#' @importFrom DSLite newDSLiteServer
+#' @importFrom DSI newDSLoginBuilder
+#' @export
+.prepare_dslite <- function(assign_method = NULL, aggregate_method = NULL, tables = NULL) {
+
+  options(datashield.env = environment())
+  dslite.server <- DSLite::newDSLiteServer(tables = tables)
+  dslite.server$config(defaultDSConfiguration(include = c("dsBase", "dsTidyverse")))
+  dslite.server$aggregateMethod("exists", "base::exists")
+  dslite.server$aggregateMethod("classDS", "dsBase::classDS")
+  dslite.server$aggregateMethod("lsDS", "dsBase::lsDS")
+  dslite.server$aggregateMethod("dsListDisclosureSettings", "dsTidyverse::dsListDisclosureSettings")
+
+  if (!is.null(assign_method)) {
+    dslite.server$assignMethod(assign_method, paste0("dsTidyverse::", assign_method))
+  }
+
+  if (!is.null(aggregate_method)) {
+    dslite.server$aggregateMethod(assign_method, paste0("dsTidyverse::", assign_method))
+  }
+
+  builder <- DSI::newDSLoginBuilder()
+  builder$append(server = "server_1", url = "dslite.server", driver = "DSLiteDriver")
+  builder$append(server = "server_2", url = "dslite.server", driver = "DSLiteDriver")
+  builder$append(server = "server_3", url = "dslite.server", driver = "DSLiteDriver")
+  login_data <- builder$build()
+  return(login_data)
+}
+
+#' Create a mixed dataframe with factor and other types of columns
+#'
+#' This function generates a dataframe with a specified number of rows,
+#' factor columns, and other columns (integer, numeric, and string).
+#'
+#' @param n_rows Number of rows in the dataframe. Default is 10,000.
+#' @param n_factor_cols Number of factor columns in the dataframe. Default is 15.
+#' @param n_other_cols Number of other columns (integer, numeric, and string) in the dataframe. Default is 15.
+#'
+#' @return A dataframe with the specified number of rows and columns, containing mixed data types.
+#' @importFrom dplyr bind_cols
+#' @importFrom purrr map_dfc
+#' @examples
+#' df <- create_mixed_dataframe(n_rows = 100, n_factor_cols = 10, n_other_cols = 5)
+create_mixed_dataframe <- function(n_rows = 10000, n_factor_cols = 15, n_other_cols = 15) {
+
+  # Function to create a factor column with defined levels
+  create_factor_column <- function(levels, n = n_rows) {
+    set.seed(123)  # Set seed before sample for reproducibility
+    factor(sample(levels, n, replace = TRUE))
+  }
+
+  # Define factor levels for different columns
+  factor_levels <- list(
+    c("Low", "Medium", "High"),
+    c("Red", "Green", "Blue"),
+    c("Yes", "No"),
+    c("A", "B", "C"),
+    c("One", "Two", "Three"),
+    c("Cat", "Dog", "Bird"),
+    c("Small", "Medium", "Large"),
+    c("Alpha", "Beta", "Gamma"),
+    c("True", "False"),
+    c("Left", "Right"),
+    c("North", "South", "East", "West"),
+    c("Day", "Night"),
+    c("Up", "Down"),
+    c("Male", "Female"),
+    c("Summer", "Winter", "Spring", "Fall")
+  )
+
+  # Create factor columns
+  factor_columns <- map_dfc(factor_levels[1:n_factor_cols], create_factor_column)
+  colnames(factor_columns) <- paste0("fac_col", 1:n_factor_cols)
+
+  # Function to create other types of columns
+  create_other_column <- function(type, n = n_rows) {
+    set.seed(123)  # Set seed before sample for reproducibility
+    switch(type,
+           "int" = sample(1:100, n, replace = TRUE),        # Integer column
+           "num" = runif(n, 0, 100),                       # Numeric column
+           "str" = sample(letters, n, replace = TRUE),      # Character column
+           "log" = sample(c(TRUE, FALSE), n, replace = TRUE) # Logical column
+    )
+  }
+
+  # Ensure that each data type is included
+  column_types <- c(
+    "int", "num", "str", "log", "int",
+    "num", "str", "log", "int", "num",
+    "str", "int", "num", "log", "str"
+  )
+
+  # Create other columns with specified types
+  other_columns <- map_dfc(column_types[1:n_other_cols], create_other_column)
+  colnames(other_columns) <- paste0("col", (n_factor_cols + 1):(n_factor_cols + n_other_cols))
+
+  # Combine factor and other columns into a single dataframe
+  df <- bind_cols(factor_columns, other_columns)
+
+  return(df)
+}
+
+
+#' Modify factor levels for partial overlap
+#'
+#' This function takes two sets of factor levels, computes the common and unique levels,
+#' and returns a new set of levels with partial overlap.
+#'
+#' @param levels1 First set of factor levels.
+#' @param levels2 Second set of factor levels.
+#'
+#' @return A character vector of new factor levels with partial overlap.
+#' @examples
+#' new_levels <- partial_overlap_levels(c("A", "B", "C"), c("B", "C", "D"))
+partial_overlap_levels <- function(levels1, levels2) {
+  common <- intersect(levels1, levels2)
+  unique1 <- setdiff(levels1, common)
+  unique2 <- setdiff(levels2, common)
+
+  # Set seed before each sample call
+  set.seed(123)
+  sampled_unique1 <- sample(unique1, length(unique1) * 0.5)
+
+  set.seed(123)
+  sampled_unique2 <- sample(unique2, length(unique2) * 0.5)
+
+  new_levels <- c(common, sampled_unique1, sampled_unique2)
+  return(new_levels)
+}
+
+
+#' Create additional dataframes with specific conditions
+#'
+#' This function generates additional dataframes based on an input dataframe, modifying column classes and levels,
+#' and adding new columns with unique names. Different seeds are used for each iteration of the loop,
+#' ensuring reproducibility of the generated dataframes.
+#'
+#' @param base_df The base dataframe used to create the additional dataframes.
+#' @param n_rows Number of rows in the additional dataframes. Default is 10,000.
+#' @param df_names Names of the additional dataframes to be created. Default is c("df1", "df2", "df3").
+#'
+#' @return A list of dataframes with the specified modifications.
+#' @importFrom dplyr bind_cols
+#' @examples
+#' base_df <- create_mixed_dataframe(n_rows = 100, n_factor_cols = 10, n_other_cols = 5)
+#' additional_dfs <- create_additional_dataframes(base_df, n_rows = 1000, df_names = c("df1", "df2"))
+create_additional_dataframes <- function(base_df, n_rows = 10000, df_names = c("df1", "df2", "df3")) {
+
+  # Define a fixed sequence of seeds, one for each dataframe to be created
+  seeds <- c(123, 456, 789, 101112)
+
+  df_list <- list()
+
+  for (i in seq_along(df_names)) {
+    # Set the seed for this iteration based on the pre-defined seeds
+    set.seed(seeds[i])
+
+    # Proceed with the dataframe generation process
+    overlap_cols <- sample(colnames(base_df), size = round(0.8 * ncol(base_df)))
+    df <- base_df
+    cols_to_modify_class <- sample(overlap_cols, size = round(0.2 * length(overlap_cols)))
+
+    # Modify columns to have different data types
+    for (col in cols_to_modify_class) {
+      current_class <- class(df[[col]])
+      new_class <- switch(current_class,
+                          "factor" = as.character(df[[col]]),
+                          "character" = as.factor(df[[col]]),
+                          "numeric" = as.integer(df[[col]]),
+                          "integer" = as.numeric(df[[col]]),
+                          df[[col]])
+      df[[col]] <- new_class
+    }
+
+    # Modify factor levels for partial overlap
+    factor_cols <- colnames(base_df)[sapply(base_df, is.factor)]
+    overlap_factor_cols <- intersect(overlap_cols, factor_cols)
+    cols_to_modify_levels <- sample(overlap_factor_cols, size = round(0.5 * length(overlap_factor_cols)))
+
+    for (col in cols_to_modify_levels) {
+      original_levels <- levels(base_df[[col]])
+      new_levels <- partial_overlap_levels(original_levels, original_levels)
+      df[[col]] <- factor(df[[col]], levels = new_levels)
+    }
+
+    # Create new random columns for each dataframe (these will vary by seed)
+    set.seed(seeds[i])  # Set the seed again for generating new columns
+    n_new_cols <- round(0.2 * ncol(base_df))
+    new_col_names <- paste0(df_names[i], "_new_col_", 1:n_new_cols)
+    new_cols <- data.frame(matrix(runif(n_rows * n_new_cols), ncol = n_new_cols))
+    colnames(new_cols) <- new_col_names
+
+    # Bind new columns to the dataframe
+    df <- bind_cols(df, new_cols)
+    df_list[[df_names[i]]] <- df
+  }
+
+  return(df_list)
+}
diff --git a/tests/testthat/test-smk-standardiseDf.R b/tests/testthat/test-smk-standardiseDf.R
index 92ebd7b1..fca136ac 100644
--- a/tests/testthat/test-smk-standardiseDf.R
+++ b/tests/testthat/test-smk-standardiseDf.R
@@ -1,12 +1,15 @@
 suppressWarnings(library(DSLite))
 library(purrr)
 library(dplyr)
+# devtools::install_github("datashield/dsBase", ref = "v6.4.0-dev")
 library(dsBase)
 library(dsBaseClient)
 library(purrr)
 library(dsTidyverse)
+source("~/Library/Mobile Documents/com~apple~CloudDocs/work/repos/dsBaseClient/tests/testthat/helpers.R")
 # devtools::load_all("~/Library/Mobile Documents/com~apple~CloudDocs/work/repos/dsTidyverse")
 options("datashield.return_errors" = TRUE)
+testthat::local_edition(3)
 
 df <- create_mixed_dataframe(n_rows = 100, n_factor_cols = 10, n_other_cols = 10)
 

From de444d043eec5ff99ae22b807902795f2ddff1b7 Mon Sep 17 00:00:00 2001
From: Tim Cadman <41470917+timcadman@users.noreply.github.com>
Date: Tue, 22 Jul 2025 09:59:40 +0200
Subject: [PATCH 06/18] removed duplicate dependency

---
 DESCRIPTION | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/DESCRIPTION b/DESCRIPTION
index a3a4ceaf..b0779e7f 100644
--- a/DESCRIPTION
+++ b/DESCRIPTION
@@ -67,7 +67,6 @@ Suggests:
     DescTools,
     DSOpal,
     DSMolgenisArmadillo,
-    DSLite,
-    assertthat
+    DSLite
 RoxygenNote: 7.3.2
 Encoding: UTF-8

From f664e047bbabc6f7230de643dbe29e1bdcff13a5 Mon Sep 17 00:00:00 2001
From: Tim Cadman <41470917+timcadman@users.noreply.github.com>
Date: Mon, 6 Oct 2025 16:38:24 +0200
Subject: [PATCH 07/18] fix: corrected how to pass variable names through the
 parser

---
 R/ds.standardiseDf.R | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

diff --git a/R/ds.standardiseDf.R b/R/ds.standardiseDf.R
index 4ba24f90..daaadbd6 100644
--- a/R/ds.standardiseDf.R
+++ b/R/ds.standardiseDf.R
@@ -62,7 +62,7 @@ ds.standardiseDf <- function(df.name = NULL, newobj = NULL, fix_class = "ask", f
 
   new_classes <- .get_var_classes(newobj, datasources)
   factor_vars <- .identify_factor_vars(new_classes)
-  factor_levels <- .get_factor_levels(factor_vars, newobj, datasources)
+  factor_levels <- .get_factor_levels(newobj, factor_vars, datasources)
   level_conflicts <- .identify_level_conflicts(factor_levels)
 
   if (length(level_conflicts) > 0 & fix_levels == "no") {
@@ -344,8 +344,9 @@ ask_question_class <- function(var) {
 #' @param datasources Data sources from which to aggregate data.
 #' @return A list of factor levels.
 #' @noRd
-.get_factor_levels <- function(factor_vars, df, datasources) {
-  cally <- call("getAllLevelsDS", df, names(factor_vars))
+.get_factor_levels <- function(df, factor_vars, datasources) {
+  factor_vars <- paste(names(factor_vars), collapse = ",")
+  cally <- call("getAllLevelsDS", df, factor_vars)
   return(datashield.aggregate(datasources, cally))
 }
 

From eb97f82534c12400405901583925f2ee88469787 Mon Sep 17 00:00:00 2001
From: Tim Cadman <41470917+timcadman@users.noreply.github.com>
Date: Mon, 6 Oct 2025 16:41:27 +0200
Subject: [PATCH 08/18] put test file in correct format

---
 tests/testthat/test-smk-standardiseDf.R | 1201 +++++++++++------------
 1 file changed, 570 insertions(+), 631 deletions(-)

diff --git a/tests/testthat/test-smk-standardiseDf.R b/tests/testthat/test-smk-standardiseDf.R
index fca136ac..d3ceb647 100644
--- a/tests/testthat/test-smk-standardiseDf.R
+++ b/tests/testthat/test-smk-standardiseDf.R
@@ -1,88 +1,25 @@
-suppressWarnings(library(DSLite))
-library(purrr)
-library(dplyr)
-# devtools::install_github("datashield/dsBase", ref = "v6.4.0-dev")
-library(dsBase)
-library(dsBaseClient)
-library(purrr)
-library(dsTidyverse)
-source("~/Library/Mobile Documents/com~apple~CloudDocs/work/repos/dsBaseClient/tests/testthat/helpers.R")
-# devtools::load_all("~/Library/Mobile Documents/com~apple~CloudDocs/work/repos/dsTidyverse")
-options("datashield.return_errors" = TRUE)
-testthat::local_edition(3)
-
-df <- create_mixed_dataframe(n_rows = 100, n_factor_cols = 10, n_other_cols = 10)
-
-df_1 <- df %>% select(1:5, 6, 9, 12, 15, 18) %>%
-  mutate(
-    fac_col2 = factor(fac_col2, levels = c("Blue", "Green")),
-    fac_col4 = as.numeric(fac_col4),
-    fac_col5 = as.logical(fac_col5))
-
-df_2 <- df %>% select(1:5, 7, 10, 13, 16, 19) %>%
-  mutate(
-    fac_col2 = factor(fac_col2, levels = c("Green", "Red")),
-    fac_col3 = factor(fac_col3, levels = "No"),
-    fac_col4 = as.character(fac_col4),
-    fac_col5 = as.integer(fac_col5))
-
-df_3 <- df %>% select(1:5, 11, 14, 17, 20) %>%
-  mutate(
-    fac_col2 = factor(fac_col2, levels = "Blue"),
-    fac_col3 = factor(fac_col3, levels = "Yes"))
-
-options(datashield.env = environment())
-
-dslite.server <- newDSLiteServer(
-  tables = list(
-    df_1 = df_1,
-    df_2 = df_2,
-    df_3 = df_3
-  )
-)
-
-dslite.server$config(defaultDSConfiguration(include = c("dsBase", "dsTidyverse", "dsDanger")))
-dslite.server$aggregateMethod("getClassAllColsDS", "getClassAllColsDS")
-dslite.server$assignMethod("fixClassDS", "fixClassDS")
-dslite.server$assignMethod("fixColsDS", "fixColsDS")
-dslite.server$aggregateMethod("getAllLevelsDS", "getAllLevelsDS")
-dslite.server$assignMethod("fixLevelsDS", "fixLevelsDS")
-
-builder <- DSI::newDSLoginBuilder()
-
-builder$append(
-  server = "server_1",
-  url = "dslite.server",
-  driver = "DSLiteDriver"
-)
+#
+# Set up
+#
 
-builder$append(
-  server = "server_2",
-  url = "dslite.server",
-  driver = "DSLiteDriver"
-)
-
-builder$append(
-  server = "server_3",
-  url = "dslite.server",
-  driver = "DSLiteDriver"
-)
+context("ds.standardiseDf::smk::setup")
+options(datashield.errors.print = TRUE)
 
-logindata <- builder$build()
-conns <- DSI::datashield.login(logins = logindata, assign = FALSE)
+connect.studies.dataset.stand(c("fac_col1", "fac_col2", "fac_col3", "fac_col4", "fac_col5",
+                                "fac_col6", "fac_col9", "col12", "col15", "col18"))
 
-datashield.assign.table(conns["server_1"], "df", "df_1")
-datashield.assign.table(conns["server_2"], "df", "df_2")
-datashield.assign.table(conns["server_3"], "df", "df_3")
+test_that("setup", {
+  ds_expect_variables(c("D"))
+})
 
-datashield.assign.table(conns["server_1"], "df_ident", "df_1")
-datashield.assign.table(conns["server_2"], "df_ident", "df_1")
-datashield.assign.table(conns["server_3"], "df_ident", "df_1")
+#
+# Tests
+#
 
 ####################################################################################################
 # Code that will be used in multiple tests
 ####################################################################################################
-var_class <- .get_var_classes("df", datasources = conns)
+var_class <- .get_var_classes("D", datasources = ds.test_env$connections)
 
 class_conflicts <- .identify_class_conflicts(var_class)
 
@@ -91,11 +28,11 @@ different_classes <- c("fac_col4", "fac_col5")
 class_decisions <- c("1", "5")
 
 .fix_classes(
-  df.name = "df",
+  df.name = "D",
   different_classes = different_classes,
   class_decisions = class_decisions,
   newobj = "new_classes",
-  datasources = conns)
+  datasources = ds.test_env$connections)
 
 cols_to_set <- c(
   "fac_col1", "fac_col2", "fac_col3", "fac_col4", "fac_col5", "fac_col6", "fac_col9", "col12",
@@ -103,12 +40,12 @@ cols_to_set <- c(
   "col20")
 
 .add_missing_cols_to_df(
-  df.name = "df",
+  df.name = "D",
   cols_to_add_if_missing = cols_to_set,
   newobj = "with_new_cols",
-  datasources = conns)
+  datasources = ds.test_env$connections)
 
-old_cols <- ds.colnames("df")
+old_cols <- ds.colnames("D")
 
 new_cols <- c("col11", "col12", "col13", "col14", "col15", "col16", "col17", "col18", "col19",
               "col20", "fac_col1", "fac_col10", "fac_col2", "fac_col3", "fac_col4", "fac_col5",
@@ -122,11 +59,11 @@ new_cols_servers <- list(
 
 added_cols <- .get_added_cols(old_cols, new_cols_servers)
 
-var_class_fact <- .get_var_classes("with_new_cols", datasources = conns)
+var_class_fact <- .get_var_classes("with_new_cols", datasources = ds.test_env$connections)
 
 fac_vars <- .identify_factor_vars(var_class_fact)
 
-fac_levels <- .get_factor_levels(fac_vars, "with_new_cols", conns)
+fac_levels <- .get_factor_levels("with_new_cols", fac_vars, ds.test_env$connections)
 
 level_conflicts <- .identify_level_conflicts(fac_levels)
 
@@ -192,554 +129,556 @@ test_that(".get_var_classes returns correct output", {
   expect_equal(var_class, expected)
 
 })
+#
+# test_that(".identify_class_conflicts returns correct output", {
+#   expected <- list(
+#     fac_col4 = c("numeric", "character", "factor"),
+#     fac_col5 = c("logical", "integer", "factor")
+#   )
+#
+#   expect_equal(class_conflicts, expected)
+#
+# })
+#
+# test_that("ask_question displays the correct prompt", {
+#   expect_snapshot(ask_question_class("my_var"))
+# })
+#
+# test_that("ask_question_wait_response_class continues with valid response", {
+#   expect_equal(
+#     with_mocked_bindings(
+#       ask_question_wait_response_class("a variable"),
+#       ask_question_class = function(var) "A question",
+#       readline = function() "1"
+#     ), "1"
+#   )
+# })
+#
+# test_that("ask_question_wait_response_class throws error if option 6 selected", {
+#   expect_error(
+#     with_mocked_bindings(
+#       ask_question_wait_response_class("a variable"),
+#       ask_question_class = function(var) "A question",
+#       readline = function() "6")
+#   )
+# })
+#
+# test_that("print_all_classes prints the correct message", {
+#   expect_snapshot(
+#     print_all_classes(
+#       c("server_1", "server_2", "server_3"),
+#       c("numeric", "factor", "integer")
+#     )
+#   )
+# })
+#
+# test_that("prompt_user_class_decision function properly", {
+#   expect_message(
+#     with_mocked_bindings(
+#       prompt_user_class_decision(
+#         var = "test_col",
+#         servers = c("server_1", "server_2", "server_3"),
+#         classes = c("numeric", "character", "factor"),
+#         newobj = "test_df",
+#         datasources = datasources),
+#       ask_question_wait_response_class = function(var, newobj, datasources) "test_col"
+#     )
+#   )
+#
+#   expect_equal(
+#     with_mocked_bindings(
+#       prompt_user_class_decision(
+#         var = "test_col",
+#         servers = c("server_1", "server_2", "server_3"),
+#         classes = c("numeric", "character", "factor"),
+#         newobj = "test_df",
+#         datasources = datasources),
+#       ask_question_wait_response_class = function(var, newobj, datasources) "test_col"
+#     ),
+#     "test_col"
+#   )
+# })
+#
+# test_that("prompt_user_class_decision_all_vars returns correct value", {
+#   expect_equal(
+#     with_mocked_bindings(
+#       prompt_user_class_decision_all_vars(
+#         vars = c("test_var_1", "test_var_2"),
+#         all_servers = c("server_1", "server_2", "server_3"),
+#         all_classes = tibble(
+#           test_var_1 = c("numeric", "character", "factor"),
+#           test_var_2 = c("logical", "integer", "factor")
+#         ),
+#         "test_df",
+#         conns),
+#       prompt_user_class_decision = function(var, server, classes, newobj, datasources) "1"
+#     ),
+#     c("1", "1")
+#   )
+# })
+#
+# test_that(".fix_classes sets the correct classes in serverside data frame", {
+#
+#   expect_equal(
+#     unname(unlist(ds.class("df$fac_col4"))),
+#     c("numeric", "character", "factor")
+#   )
+#
+#   expect_equal(
+#     unname(unlist(ds.class("df$fac_col5"))),
+#     c("logical", "integer", "factor")
+#   )
+#
+#   expect_equal(
+#     unname(unlist(ds.class("new_classes$fac_col4"))),
+#     rep("factor", 3)
+#   )
+#
+#   expect_equal(
+#     unname(unlist(ds.class("new_classes$fac_col5"))),
+#     rep("logical", 3)
+#   )
+#
+# })
+#
+# test_that(".get_unique_cols extracts unique names from a list", {
+#   expect_equal(
+#     .get_unique_cols(
+#       list(
+#         server_1 = c("col_1", "col_2", "col_3"),
+#         server_1 = c("col_1", "col_2", "col_4"),
+#         server_1 = c("col_2", "col_3", "col_3", "col_5")
+#       )
+#     ),
+#     c("col_1", "col_2", "col_3", "col_4", "col_5")
+#   )
+# })
+#
+# test_that(".add_missing_cols_to_df correctly creates missing columns", {
+#
+#   new_cols <- c("col11", "col12", "col13", "col14", "col15", "col16", "col17", "col18", "col19",
+#                 "col20", "fac_col1", "fac_col10", "fac_col2", "fac_col3", "fac_col4", "fac_col5",
+#                 "fac_col6", "fac_col7", "fac_col9")
+#
+#   observed <- ds.colnames("with_new_cols")
+#
+#   expected <- list(
+#     server_1 = new_cols,
+#     server_2 = new_cols,
+#     server_3 = new_cols
+#   )
+#
+#   expect_equal(observed, expected)
+# })
+#
+# test_that(".get_added_cols correctly identifies newly added columns", {
+#
+#   expect_equal(
+#     added_cols,
+#     list(
+#       server_1 = c("col11", "col13", "col14", "col16", "col17", "col19", "col20", "fac_col10", "fac_col7"),
+#       server_2 = c("col11", "col12", "col14", "col15", "col17", "col18", "col20", "fac_col6", "fac_col9"),
+#       server_3 = c("col12", "col13", "col15", "col16", "col18", "col19", "fac_col10", "fac_col6", "fac_col7", "fac_col9")
+#     )
+#   )
+# })
+#
+# test_that(".identify_factor_vars correctly identifies factor variables", {
+#
+#
+#
+#   var_class_fact <- var_class |> dplyr::select(server: col18)
+#   expect_equal(
+#     names(fac_vars),
+#     c("fac_col1", "fac_col2", "fac_col3", "fac_col6", "fac_col9")
+#   )
+# })
+#
+# test_that(".get_factor_levels correctly identifies factor levels", {
+#   expected <- list(
+#     server_1 = list(
+#       fac_col1 = c("High", "Low", "Medium"),
+#       fac_col2 = c("Blue", "Green"),
+#       fac_col3 = c("No", "Yes"),
+#       fac_col6 = c("Bird", "Cat", "Dog"),
+#       fac_col9 = c("False", "True")
+#     ),
+#     server_2 = list(
+#       fac_col1 = c("High", "Low", "Medium"),
+#       fac_col2 = c("Green", "Red"),
+#       fac_col3 = c("No"),
+#       fac_col6 = NULL,
+#       fac_col9 = NULL
+#     ),
+#     server_3 = list(
+#       fac_col1 = c("High", "Low", "Medium"),
+#       fac_col2 = c("Blue"),
+#       fac_col3 = c("Yes"),
+#       fac_col6 = NULL,
+#       fac_col9 = NULL
+#     )
+#   )
+#
+#   expect_equal(fac_levels, expected)
+# })
+#
+# test_that(".identify_level_conflicts correctly factor columns with different levels", {
+#   expect_equal(
+#     .identify_level_conflicts(fac_levels),
+#     c("fac_col2", "fac_col3", "fac_col6", "fac_col9")
+#   )
+#
+# })
+#
+# test_that("ask_question_wait_response_levels continues with valid response", {
+#   expect_equal(
+#     with_mocked_bindings(
+#       suppressWarnings(ask_question_wait_response_levels("test variable", "test_obj", conns)),
+#       readline = function() "1"
+#     ), "1"
+#   )
+#
+#   expect_equal(
+#     with_mocked_bindings(
+#       suppressWarnings(ask_question_wait_response_levels("test variable", "test_obj", conns)),
+#       readline = function() "1"
+#     ), "1"
+#   )
+#
+# })
+#
+# test_that("ask_question_wait_response_levels aborts with response of 3", {
+#   expect_error(
+#     with_mocked_bindings(
+#       suppressWarnings(ask_question_wait_response_levels("test variable", "test_obj", conns)),
+#       readline = function() "3")
+#   )
+# })
+#
+# test_that(".make_levels_message makes correct message", {
+#   expect_snapshot(.make_levels_message(level_conflicts))
+# })
+#
+# test_that(".get_unique_levels extracts all possible levels", {
+#
+#   expected <- list(
+#     fac_col2 = c("Blue", "Green", "Red"),
+#     fac_col3 = c("No", "Yes"),
+#     fac_col6 = c("Bird", "Cat", "Dog"),
+#     fac_col9 = c("False", "True")
+#   )
+#
+#   expect_equal(unique_levs, expected)
+#
+# })
+#
+# test_that(".set_factor_levels sets levels correctly", {
+#   .set_factor_levels("with_new_cols", unique_levs,  conns)
+#
+#   expect_equal(
+#     ds.levels("with_new_cols$fac_col2") |> map(~.x[[1]]),
+#     list(
+#       server_1 = c("Blue", "Green", "Red"),
+#       server_2 = c("Blue", "Green", "Red"),
+#       server_3 = c("Blue", "Green", "Red")
+#     )
+#   )
+#
+#   expect_equal(
+#     ds.levels("with_new_cols$fac_col3") |> map(~.x[[1]]),
+#     list(
+#       server_1 = c("No", "Yes"),
+#       server_2 = c("No", "Yes"),
+#       server_3 = c("No", "Yes")
+#     )
+#   )
+#
+#   expect_equal(
+#     ds.levels("with_new_cols$fac_col6") |> map(~.x[[1]]),
+#     list(
+#       server_1 = c("Bird", "Cat", "Dog"),
+#       server_2 = c("Bird", "Cat", "Dog"),
+#       server_3 = c("Bird", "Cat", "Dog")
+#     )
+#   )
+#
+#   expect_equal(
+#     ds.levels("with_new_cols$fac_col9") |> map(~.x[[1]]),
+#     list(
+#       server_1 = c("False", "True"),
+#       server_2 = c("False", "True"),
+#       server_3 = c("False", "True")
+#     )
+#   )
+#
+# })
+#
+# test_that(".print_var_recode_message prints the correct message", {
+#   expect_snapshot(.print_var_recode_message(added_cols, "test_df"))
+# })
+#
+# test_that(".print_class_recode_message prints the correct message", {
+#   expect_snapshot(
+#     .print_class_recode_message(class_decisions, different_classes, "test_df")
+#   )
+# })
+#
+# test_that(".print_levels_recode_message prints the correct message", {
+#   expect_snapshot(
+#     .print_levels_recode_message(unique_levs, "test_df")
+#   )
+# })
+#
+# test_that(".make_levels_recode_message prints the correct message", {
+#   expect_equal(
+#     .make_levels_recode_message(unique_levs),
+#     list(
+#       "fac_col2 --> Blue, Green, Red",
+#       "fac_col3 --> No, Yes",
+#       "fac_col6 --> Bird, Cat, Dog",
+#       "fac_col9 --> False, True"
+#     )
+#   )
+# })
+#
+# test_that(".print_out_messages prints the correct messages", {
+#   expect_snapshot(
+#     .print_out_messages(
+#       added_cols, class_decisions, different_classes, unique_levs, level_conflicts, "1", "test_df"
+#     )
+#   )
+# })
+#
+# test_that(".change_choice_to_string converts numeric class codes to strings correctly", {
+#   expect_equal(.change_choice_to_string("1"), "factor")
+#   expect_equal(.change_choice_to_string("2"), "integer")
+#   expect_equal(.change_choice_to_string("3"), "numeric")
+#   expect_equal(.change_choice_to_string("4"), "character")
+#   expect_equal(.change_choice_to_string("5"), "logical")
+# })
+#
+# test_that("ds.standardiseDf doesn't run if dataframes are identical", {
+#   expect_error(
+#     ds.standardiseDf(
+#       df = "df_ident",
+#       newobj = "test_fill"
+#     ),
+#     "Columns are identical"
+#   )
+# })
+#
+# test_that("ds.standardiseDf works when called directly and class conversion is factor", {
+#   with_mocked_bindings(
+#     ds.standardiseDf(
+#       df = "df",
+#       newobj = "test_fill"
+#     ),
+#     prompt_user_class_decision_all_vars = function(var, server, classes, newobj, datasources) "1",
+#     ask_question_wait_response_levels = function(levels_conflict, newobj, datasources) "2"
+#   )
+#
+#   expect_equal(
+#     ds.class("test_fill$fac_col4")[[1]],
+#     "factor"
+#   )
+# })
+#
+# test_that("ds.standardiseDf returns warning when called directly and class conversion is integer", {
+#   with_mocked_bindings(
+#     ds.standardiseDf(
+#       df = "df",
+#       newobj = "test_fill"
+#     ),
+#     prompt_user_class_decision_all_vars = function(var, server, classes, newobj, datasources) c("2", "2"),
+#     ask_question_wait_response_levels = function(levels_conflict, newobj, datasources) "2"
+#   )
+#
+#   expect_equal(
+#     ds.class("test_fill$fac_col4")[[1]],
+#     "integer"
+#   )
+#
+#   expect_equal(
+#     ds.class("test_fill$fac_col5")[[1]],
+#     "integer"
+#   )
+# })
+#
+# test_that("ds.standardiseDf returns warning when called directly and class conversion is numeric", {
+#   with_mocked_bindings(
+#     ds.standardiseDf(
+#       df = "df",
+#       newobj = "test_fill"
+#     ),
+#     prompt_user_class_decision_all_vars = function(var, server, classes, newobj, datasources) c("3", "3"),
+#     ask_question_wait_response_levels = function(levels_conflict, newobj, datasources) "2"
+#   )
+#
+#   expect_equal(
+#     ds.class("test_fill$fac_col4")[[1]],
+#     "numeric"
+#   )
+#
+#   expect_equal(
+#     ds.class("test_fill$fac_col5")[[1]],
+#     "numeric"
+#   )
+# })
+#
+# test_that("ds.standardiseDf returns warning when called directly and class conversion is character", {
+#   with_mocked_bindings(
+#     ds.standardiseDf(
+#       df = "df",
+#       newobj = "test_fill"
+#     ),
+#     prompt_user_class_decision_all_vars = function(var, server, classes, newobj, datasources) c("4", "4"),
+#     ask_question_wait_response_levels = function(levels_conflict, newobj, datasources) "2"
+#   )
+#
+#   expect_equal(
+#     ds.class("test_fill$fac_col4")[[1]],
+#     "character"
+#   )
+#
+#   expect_equal(
+#     ds.class("test_fill$fac_col5")[[1]],
+#     "character"
+#   )
+# })
+#
+# test_that("ds.standardiseDf returns warning when called directly and class conversion is logical", {
+#   with_mocked_bindings(
+#     ds.standardiseDf(
+#       df = "df",
+#       newobj = "test_fill"
+#     ),
+#     prompt_user_class_decision_all_vars = function(var, server, classes, newobj, datasources) c("5", "5"),
+#     ask_question_wait_response_levels = function(levels_conflict, newobj, datasources) "2"
+#   )
+#
+#   expect_equal(
+#     ds.class("test_fill$fac_col4")[[1]],
+#     "logical"
+#   )
+#
+#   expect_equal(
+#     ds.class("test_fill$fac_col5")[[1]],
+#     "logical"
+#   )
+# })
+#
+# test_that("ds.standardiseDf changes levels if this option is selected", {
+#   with_mocked_bindings(
+#     ds.standardiseDf(
+#       df = "df",
+#       newobj = "test_fill"
+#     ),
+#     prompt_user_class_decision_all_vars = function(var, server, classes, newobj, datasources) c("1", "1"),
+#     ask_question_wait_response_levels = function(levels_conflict, newobj, datasources) "1"
+#   )
+#
+#   levels_2 <- ds.levels("test_fill$fac_col2") %>% map(~.$Levels)
+#   levels_3 <- ds.levels("test_fill$fac_col3") %>% map(~.$Levels)
+#   levels_4 <- ds.levels("test_fill$fac_col4") %>% map(~.$Levels)
+#   levels_5 <- ds.levels("test_fill$fac_col5") %>% map(~.$Levels)
+#   levels_6 <- ds.levels("test_fill$fac_col6") %>% map(~.$Levels)
+#   levels_9 <- ds.levels("test_fill$fac_col9") %>% map(~.$Levels)
+#
+#   expect_equal(
+#     levels_2,
+#     list(
+#       server_1 = c("Blue", "Green", "Red"),
+#       server_2 = c("Blue", "Green", "Red"),
+#       server_3 = c("Blue", "Green", "Red")
+#     )
+#   )
+#
+#   expect_equal(
+#     levels_3,
+#     list(
+#       server_1 = c("No", "Yes"),
+#       server_2 = c("No", "Yes"),
+#       server_3 = c("No", "Yes")
+#     )
+#   )
+#
+#   expect_equal(
+#     levels_4,
+#     list(
+#       server_1 = c("1", "2", "3", "A", "B", "C"),
+#       server_2 = c("1", "2", "3", "A", "B", "C"),
+#       server_3 = c("1", "2", "3", "A", "B", "C")
+#     )
+#   )
+#
+#   expect_equal(
+#     levels_5,
+#     list(
+#       server_1 = c("1", "2", "3", "One", "Three", "Two"),
+#       server_2 = c("1", "2", "3", "One", "Three", "Two"),
+#       server_3 = c("1", "2", "3", "One", "Three", "Two")
+#     )
+#   )
+#
+#   expect_equal(
+#     levels_6,
+#     list(
+#       server_1 = c("Bird", "Cat", "Dog"),
+#       server_2 = c("Bird", "Cat", "Dog"),
+#       server_3 = c("Bird", "Cat", "Dog")
+#     )
+#   )
+#
+#   expect_equal(
+#     levels_9,
+#     list(
+#       server_1 = c("False", "True"),
+#       server_2 = c("False", "True"),
+#       server_3 = c("False", "True")
+#     )
+#   )
+#
+# })
+#
+# test_that("ds.standardiseDf doesn't run if classes are not identical and fix_class is no", {
+#   expect_error(
+#     ds.standardiseDf(
+#       df = "df",
+#       newobj = "shouldnt_exist",
+#       fix_class = "no"
+#     ),
+#     "Variables do not have the same class in all studies"
+#   )
+#
+#   expect_equal(
+#     ds.exists("shouldnt_exist")[[1]],
+#     FALSE
+#   )
+# })
+#
+# test_that("ds.standardiseDf doesn't run if levels are not identical and fix_class is no", {
+#   expect_error(
+#     with_mocked_bindings(
+#       ds.standardiseDf(
+#         df = "df",
+#         newobj = "shouldnt_exist",
+#         fix_levels = "no"
+#       ),
+#       prompt_user_class_decision_all_vars = function(var, server, classes, newobj, datasources) c("1", "1")
+#     ),
+#     "Factor variables do not have the same levels in all studies"
+#   )
+#
+#   expect_equal(
+#     ds.exists("shouldnt_exist")[[1]],
+#     FALSE
+#   )
+# })
+
+
+## Add disclosure check levels
+## Push change to dsBase
 
-test_that(".identify_class_conflicts returns correct output", {
-  expected <- list(
-    fac_col4 = c("numeric", "character", "factor"),
-    fac_col5 = c("logical", "integer", "factor")
-  )
-
-  expect_equal(class_conflicts, expected)
-
-})
-
-test_that("ask_question displays the correct prompt", {
-  expect_snapshot(ask_question_class("my_var"))
-})
-
-test_that("ask_question_wait_response_class continues with valid response", {
-  expect_equal(
-    with_mocked_bindings(
-      ask_question_wait_response_class("a variable"),
-      ask_question_class = function(var) "A question",
-      readline = function() "1"
-    ), "1"
-  )
-})
-
-test_that("ask_question_wait_response_class throws error if option 6 selected", {
-  expect_error(
-    with_mocked_bindings(
-      ask_question_wait_response_class("a variable"),
-      ask_question_class = function(var) "A question",
-      readline = function() "6")
-  )
-})
-
-test_that("print_all_classes prints the correct message", {
-  expect_snapshot(
-    print_all_classes(
-      c("server_1", "server_2", "server_3"),
-      c("numeric", "factor", "integer")
-    )
-  )
-})
-
-test_that("prompt_user_class_decision function properly", {
-  expect_message(
-    with_mocked_bindings(
-      prompt_user_class_decision(
-        var = "test_col",
-        servers = c("server_1", "server_2", "server_3"),
-        classes = c("numeric", "character", "factor"),
-        newobj = "test_df",
-        datasources = datasources),
-      ask_question_wait_response_class = function(var, newobj, datasources) "test_col"
-    )
-  )
-
-  expect_equal(
-    with_mocked_bindings(
-      prompt_user_class_decision(
-        var = "test_col",
-        servers = c("server_1", "server_2", "server_3"),
-        classes = c("numeric", "character", "factor"),
-        newobj = "test_df",
-        datasources = datasources),
-      ask_question_wait_response_class = function(var, newobj, datasources) "test_col"
-    ),
-    "test_col"
-  )
-})
-
-test_that("prompt_user_class_decision_all_vars returns correct value", {
-  expect_equal(
-    with_mocked_bindings(
-      prompt_user_class_decision_all_vars(
-        vars = c("test_var_1", "test_var_2"),
-        all_servers = c("server_1", "server_2", "server_3"),
-        all_classes = tibble(
-          test_var_1 = c("numeric", "character", "factor"),
-          test_var_2 = c("logical", "integer", "factor")
-        ),
-        "test_df",
-        conns),
-      prompt_user_class_decision = function(var, server, classes, newobj, datasources) "1"
-    ),
-    c("1", "1")
-  )
-})
-
-test_that(".fix_classes sets the correct classes in serverside data frame", {
-
-  expect_equal(
-    unname(unlist(ds.class("df$fac_col4"))),
-    c("numeric", "character", "factor")
-  )
-
-  expect_equal(
-    unname(unlist(ds.class("df$fac_col5"))),
-    c("logical", "integer", "factor")
-  )
-
-  expect_equal(
-    unname(unlist(ds.class("new_classes$fac_col4"))),
-    rep("factor", 3)
-  )
-
-  expect_equal(
-    unname(unlist(ds.class("new_classes$fac_col5"))),
-    rep("logical", 3)
-  )
-
-})
-
-test_that(".get_unique_cols extracts unique names from a list", {
-  expect_equal(
-    .get_unique_cols(
-      list(
-        server_1 = c("col_1", "col_2", "col_3"),
-        server_1 = c("col_1", "col_2", "col_4"),
-        server_1 = c("col_2", "col_3", "col_3", "col_5")
-      )
-    ),
-    c("col_1", "col_2", "col_3", "col_4", "col_5")
-  )
-})
-
-test_that(".add_missing_cols_to_df correctly creates missing columns", {
-
-  new_cols <- c("col11", "col12", "col13", "col14", "col15", "col16", "col17", "col18", "col19",
-                "col20", "fac_col1", "fac_col10", "fac_col2", "fac_col3", "fac_col4", "fac_col5",
-                "fac_col6", "fac_col7", "fac_col9")
-
-  observed <- ds.colnames("with_new_cols")
-
-  expected <- list(
-    server_1 = new_cols,
-    server_2 = new_cols,
-    server_3 = new_cols
-  )
-
-  expect_equal(observed, expected)
-})
-
-test_that(".get_added_cols correctly identifies newly added columns", {
-
-  expect_equal(
-    added_cols,
-    list(
-      server_1 = c("col11", "col13", "col14", "col16", "col17", "col19", "col20", "fac_col10", "fac_col7"),
-      server_2 = c("col11", "col12", "col14", "col15", "col17", "col18", "col20", "fac_col6", "fac_col9"),
-      server_3 = c("col12", "col13", "col15", "col16", "col18", "col19", "fac_col10", "fac_col6", "fac_col7", "fac_col9")
-    )
-  )
-})
-
-test_that(".identify_factor_vars correctly identifies factor variables", {
-
-
-
-  var_class_fact <- var_class |> dplyr::select(server: col18)
-  expect_equal(
-    names(fac_vars),
-    c("fac_col1", "fac_col2", "fac_col3", "fac_col6", "fac_col9")
-  )
-})
-
-test_that(".get_factor_levels correctly identifies factor levels", {
-  expected <- list(
-    server_1 = list(
-      fac_col1 = c("High", "Low", "Medium"),
-      fac_col2 = c("Blue", "Green"),
-      fac_col3 = c("No", "Yes"),
-      fac_col6 = c("Bird", "Cat", "Dog"),
-      fac_col9 = c("False", "True")
-    ),
-    server_2 = list(
-      fac_col1 = c("High", "Low", "Medium"),
-      fac_col2 = c("Green", "Red"),
-      fac_col3 = c("No"),
-      fac_col6 = NULL,
-      fac_col9 = NULL
-    ),
-    server_3 = list(
-      fac_col1 = c("High", "Low", "Medium"),
-      fac_col2 = c("Blue"),
-      fac_col3 = c("Yes"),
-      fac_col6 = NULL,
-      fac_col9 = NULL
-    )
-  )
-
-  expect_equal(fac_levels, expected)
-})
-
-test_that(".identify_level_conflicts correctly factor columns with different levels", {
-  expect_equal(
-    .identify_level_conflicts(fac_levels),
-    c("fac_col2", "fac_col3", "fac_col6", "fac_col9")
-  )
-
-})
-
-test_that("ask_question_wait_response_levels continues with valid response", {
-  expect_equal(
-    with_mocked_bindings(
-      suppressWarnings(ask_question_wait_response_levels("test variable", "test_obj", conns)),
-      readline = function() "1"
-    ), "1"
-  )
-
-  expect_equal(
-    with_mocked_bindings(
-      suppressWarnings(ask_question_wait_response_levels("test variable", "test_obj", conns)),
-      readline = function() "1"
-    ), "1"
-  )
-
-})
-
-test_that("ask_question_wait_response_levels aborts with response of 3", {
-  expect_error(
-    with_mocked_bindings(
-      suppressWarnings(ask_question_wait_response_levels("test variable", "test_obj", conns)),
-      readline = function() "3")
-  )
-})
-
-test_that(".make_levels_message makes correct message", {
-  expect_snapshot(.make_levels_message(level_conflicts))
-})
-
-test_that(".get_unique_levels extracts all possible levels", {
-
-  expected <- list(
-    fac_col2 = c("Blue", "Green", "Red"),
-    fac_col3 = c("No", "Yes"),
-    fac_col6 = c("Bird", "Cat", "Dog"),
-    fac_col9 = c("False", "True")
-  )
-
-  expect_equal(unique_levs, expected)
-
-})
-
-test_that(".set_factor_levels sets levels correctly", {
-  .set_factor_levels("with_new_cols", unique_levs,  conns)
-
-  expect_equal(
-    ds.levels("with_new_cols$fac_col2") |> map(~.x[[1]]),
-    list(
-      server_1 = c("Blue", "Green", "Red"),
-      server_2 = c("Blue", "Green", "Red"),
-      server_3 = c("Blue", "Green", "Red")
-    )
-  )
-
-  expect_equal(
-    ds.levels("with_new_cols$fac_col3") |> map(~.x[[1]]),
-    list(
-      server_1 = c("No", "Yes"),
-      server_2 = c("No", "Yes"),
-      server_3 = c("No", "Yes")
-    )
-  )
-
-  expect_equal(
-    ds.levels("with_new_cols$fac_col6") |> map(~.x[[1]]),
-    list(
-      server_1 = c("Bird", "Cat", "Dog"),
-      server_2 = c("Bird", "Cat", "Dog"),
-      server_3 = c("Bird", "Cat", "Dog")
-    )
-  )
-
-  expect_equal(
-    ds.levels("with_new_cols$fac_col9") |> map(~.x[[1]]),
-    list(
-      server_1 = c("False", "True"),
-      server_2 = c("False", "True"),
-      server_3 = c("False", "True")
-    )
-  )
-
-})
-
-test_that(".print_var_recode_message prints the correct message", {
-  expect_snapshot(.print_var_recode_message(added_cols, "test_df"))
-})
-
-test_that(".print_class_recode_message prints the correct message", {
-  expect_snapshot(
-    .print_class_recode_message(class_decisions, different_classes, "test_df")
-  )
-})
-
-test_that(".print_levels_recode_message prints the correct message", {
-  expect_snapshot(
-    .print_levels_recode_message(unique_levs, "test_df")
-  )
-})
-
-test_that(".make_levels_recode_message prints the correct message", {
-  expect_equal(
-    .make_levels_recode_message(unique_levs),
-    list(
-      "fac_col2 --> Blue, Green, Red",
-      "fac_col3 --> No, Yes",
-      "fac_col6 --> Bird, Cat, Dog",
-      "fac_col9 --> False, True"
-    )
-  )
-})
-
-test_that(".print_out_messages prints the correct messages", {
-  expect_snapshot(
-    .print_out_messages(
-      added_cols, class_decisions, different_classes, unique_levs, level_conflicts, "1", "test_df"
-    )
-  )
-})
-
-test_that(".change_choice_to_string converts numeric class codes to strings correctly", {
-  expect_equal(.change_choice_to_string("1"), "factor")
-  expect_equal(.change_choice_to_string("2"), "integer")
-  expect_equal(.change_choice_to_string("3"), "numeric")
-  expect_equal(.change_choice_to_string("4"), "character")
-  expect_equal(.change_choice_to_string("5"), "logical")
-})
-
-test_that("ds.standardiseDf doesn't run if dataframes are identical", {
-  expect_error(
-    ds.standardiseDf(
-      df = "df_ident",
-      newobj = "test_fill"
-    ),
-    "Columns are identical"
-  )
-})
-
-test_that("ds.standardiseDf works when called directly and class conversion is factor", {
-  with_mocked_bindings(
-    ds.standardiseDf(
-      df = "df",
-      newobj = "test_fill"
-    ),
-    prompt_user_class_decision_all_vars = function(var, server, classes, newobj, datasources) "1",
-    ask_question_wait_response_levels = function(levels_conflict, newobj, datasources) "2"
-  )
-
-  expect_equal(
-    ds.class("test_fill$fac_col4")[[1]],
-    "factor"
-  )
-})
-
-test_that("ds.standardiseDf returns warning when called directly and class conversion is integer", {
-  with_mocked_bindings(
-    ds.standardiseDf(
-      df = "df",
-      newobj = "test_fill"
-    ),
-    prompt_user_class_decision_all_vars = function(var, server, classes, newobj, datasources) c("2", "2"),
-    ask_question_wait_response_levels = function(levels_conflict, newobj, datasources) "2"
-  )
-
-  expect_equal(
-    ds.class("test_fill$fac_col4")[[1]],
-    "integer"
-  )
-
-  expect_equal(
-    ds.class("test_fill$fac_col5")[[1]],
-    "integer"
-  )
-})
-
-test_that("ds.standardiseDf returns warning when called directly and class conversion is numeric", {
-  with_mocked_bindings(
-    ds.standardiseDf(
-      df = "df",
-      newobj = "test_fill"
-    ),
-    prompt_user_class_decision_all_vars = function(var, server, classes, newobj, datasources) c("3", "3"),
-    ask_question_wait_response_levels = function(levels_conflict, newobj, datasources) "2"
-  )
-
-  expect_equal(
-    ds.class("test_fill$fac_col4")[[1]],
-    "numeric"
-  )
-
-  expect_equal(
-    ds.class("test_fill$fac_col5")[[1]],
-    "numeric"
-  )
-})
-
-test_that("ds.standardiseDf returns warning when called directly and class conversion is character", {
-  with_mocked_bindings(
-    ds.standardiseDf(
-      df = "df",
-      newobj = "test_fill"
-    ),
-    prompt_user_class_decision_all_vars = function(var, server, classes, newobj, datasources) c("4", "4"),
-    ask_question_wait_response_levels = function(levels_conflict, newobj, datasources) "2"
-  )
-
-  expect_equal(
-    ds.class("test_fill$fac_col4")[[1]],
-    "character"
-  )
-
-  expect_equal(
-    ds.class("test_fill$fac_col5")[[1]],
-    "character"
-  )
-})
-
-test_that("ds.standardiseDf returns warning when called directly and class conversion is logical", {
-  with_mocked_bindings(
-    ds.standardiseDf(
-      df = "df",
-      newobj = "test_fill"
-    ),
-    prompt_user_class_decision_all_vars = function(var, server, classes, newobj, datasources) c("5", "5"),
-    ask_question_wait_response_levels = function(levels_conflict, newobj, datasources) "2"
-  )
-
-  expect_equal(
-    ds.class("test_fill$fac_col4")[[1]],
-    "logical"
-  )
-
-  expect_equal(
-    ds.class("test_fill$fac_col5")[[1]],
-    "logical"
-  )
-})
-
-test_that("ds.standardiseDf changes levels if this option is selected", {
-  with_mocked_bindings(
-    ds.standardiseDf(
-      df = "df",
-      newobj = "test_fill"
-    ),
-    prompt_user_class_decision_all_vars = function(var, server, classes, newobj, datasources) c("1", "1"),
-    ask_question_wait_response_levels = function(levels_conflict, newobj, datasources) "1"
-  )
-
-  levels_2 <- ds.levels("test_fill$fac_col2") %>% map(~.$Levels)
-  levels_3 <- ds.levels("test_fill$fac_col3") %>% map(~.$Levels)
-  levels_4 <- ds.levels("test_fill$fac_col4") %>% map(~.$Levels)
-  levels_5 <- ds.levels("test_fill$fac_col5") %>% map(~.$Levels)
-  levels_6 <- ds.levels("test_fill$fac_col6") %>% map(~.$Levels)
-  levels_9 <- ds.levels("test_fill$fac_col9") %>% map(~.$Levels)
-
-  expect_equal(
-    levels_2,
-    list(
-      server_1 = c("Blue", "Green", "Red"),
-      server_2 = c("Blue", "Green", "Red"),
-      server_3 = c("Blue", "Green", "Red")
-    )
-  )
-
-  expect_equal(
-    levels_3,
-    list(
-      server_1 = c("No", "Yes"),
-      server_2 = c("No", "Yes"),
-      server_3 = c("No", "Yes")
-    )
-  )
-
-  expect_equal(
-    levels_4,
-    list(
-      server_1 = c("1", "2", "3", "A", "B", "C"),
-      server_2 = c("1", "2", "3", "A", "B", "C"),
-      server_3 = c("1", "2", "3", "A", "B", "C")
-    )
-  )
-
-  expect_equal(
-    levels_5,
-    list(
-      server_1 = c("1", "2", "3", "One", "Three", "Two"),
-      server_2 = c("1", "2", "3", "One", "Three", "Two"),
-      server_3 = c("1", "2", "3", "One", "Three", "Two")
-    )
-  )
-
-  expect_equal(
-    levels_6,
-    list(
-      server_1 = c("Bird", "Cat", "Dog"),
-      server_2 = c("Bird", "Cat", "Dog"),
-      server_3 = c("Bird", "Cat", "Dog")
-    )
-  )
-
-  expect_equal(
-    levels_9,
-    list(
-      server_1 = c("False", "True"),
-      server_2 = c("False", "True"),
-      server_3 = c("False", "True")
-    )
-  )
-
-})
-
-test_that("ds.standardiseDf doesn't run if classes are not identical and fix_class is no", {
-  expect_error(
-    ds.standardiseDf(
-      df = "df",
-      newobj = "shouldnt_exist",
-      fix_class = "no"
-    ),
-    "Variables do not have the same class in all studies"
-  )
-
-  expect_equal(
-    ds.exists("shouldnt_exist")[[1]],
-    FALSE
-  )
-})
-
-test_that("ds.standardiseDf doesn't run if levels are not identical and fix_class is no", {
-  expect_error(
-    with_mocked_bindings(
-      ds.standardiseDf(
-        df = "df",
-        newobj = "shouldnt_exist",
-        fix_levels = "no"
-      ),
-      prompt_user_class_decision_all_vars = function(var, server, classes, newobj, datasources) c("1", "1")
-    ),
-    "Factor variables do not have the same levels in all studies"
-  )
-
-  expect_equal(
-    ds.exists("shouldnt_exist")[[1]],
-    FALSE
-  )
-})
-
-
-## 9. Handle incorrect response for level fix
 
 
From 3d29e879ae054b60bcfa614f8c875cde60678adb Mon Sep 17 00:00:00 2001
From: Tim Cadman <41470917+timcadman@users.noreply.github.com>
Date: Tue, 7 Oct 2025 12:11:25 +0200
Subject: [PATCH 09/18] test: all tests working

---
 tests/testthat/test-smk-standardiseDf.R | 1109 ++++++++++++-----------
 1 file changed, 560 insertions(+), 549 deletions(-)

diff --git a/tests/testthat/test-smk-standardiseDf.R b/tests/testthat/test-smk-standardiseDf.R
index d3ceb647..765d655c 100644
--- a/tests/testthat/test-smk-standardiseDf.R
+++ b/tests/testthat/test-smk-standardiseDf.R
@@ -1,12 +1,15 @@
 #
 # Set up
 #
-
 context("ds.standardiseDf::smk::setup")
 options(datashield.errors.print = TRUE)
 
-connect.studies.dataset.stand(c("fac_col1", "fac_col2", "fac_col3", "fac_col4", "fac_col5",
-                                "fac_col6", "fac_col9", "col12", "col15", "col18"))
+connect.studies.dataset.stand(
+  c(
+    "fac_col1", "fac_col2", "fac_col3", "fac_col4", "fac_col5", "fac_col6", "fac_col7", "fac_col9",
+    "fac_col10", "col11", "col12", "col13", "col14", "col15", "col16", "col17", "col18", "col19",
+    "col20")
+  )
 
 test_that("setup", {
   ds_expect_variables(c("D"))
@@ -104,7 +107,7 @@ test_that(".stop_if_cols_identical doesn't throw error if data frames have diffe
 test_that(".get_var_classes returns correct output", {
 
   expected <- tibble(
-    server = c("server_1", "server_2", "server_3"),
+    server = c("sim1", "sim2", "sim3"),
     fac_col1 = c("factor", "factor", "factor"),
     fac_col2 = c("factor", "factor", "factor"),
     fac_col3 = c("factor", "factor", "factor"),
@@ -129,551 +132,559 @@ test_that(".get_var_classes returns correct output", {
   expect_equal(var_class, expected)
 
 })
-#
-# test_that(".identify_class_conflicts returns correct output", {
-#   expected <- list(
-#     fac_col4 = c("numeric", "character", "factor"),
-#     fac_col5 = c("logical", "integer", "factor")
-#   )
-#
-#   expect_equal(class_conflicts, expected)
-#
-# })
-#
-# test_that("ask_question displays the correct prompt", {
-#   expect_snapshot(ask_question_class("my_var"))
-# })
-#
-# test_that("ask_question_wait_response_class continues with valid response", {
-#   expect_equal(
-#     with_mocked_bindings(
-#       ask_question_wait_response_class("a variable"),
-#       ask_question_class = function(var) "A question",
-#       readline = function() "1"
-#     ), "1"
-#   )
-# })
-#
-# test_that("ask_question_wait_response_class throws error if option 6 selected", {
-#   expect_error(
-#     with_mocked_bindings(
-#       ask_question_wait_response_class("a variable"),
-#       ask_question_class = function(var) "A question",
-#       readline = function() "6")
-#   )
-# })
-#
-# test_that("print_all_classes prints the correct message", {
-#   expect_snapshot(
-#     print_all_classes(
-#       c("server_1", "server_2", "server_3"),
-#       c("numeric", "factor", "integer")
-#     )
-#   )
-# })
-#
-# test_that("prompt_user_class_decision function properly", {
-#   expect_message(
-#     with_mocked_bindings(
-#       prompt_user_class_decision(
-#         var = "test_col",
-#         servers = c("server_1", "server_2", "server_3"),
-#         classes = c("numeric", "character", "factor"),
-#         newobj = "test_df",
-#         datasources = datasources),
-#       ask_question_wait_response_class = function(var, newobj, datasources) "test_col"
-#     )
-#   )
-#
-#   expect_equal(
-#     with_mocked_bindings(
-#       prompt_user_class_decision(
-#         var = "test_col",
-#         servers = c("server_1", "server_2", "server_3"),
-#         classes = c("numeric", "character", "factor"),
-#         newobj = "test_df",
-#         datasources = datasources),
-#       ask_question_wait_response_class = function(var, newobj, datasources) "test_col"
-#     ),
-#     "test_col"
-#   )
-# })
-#
-# test_that("prompt_user_class_decision_all_vars returns correct value", {
-#   expect_equal(
-#     with_mocked_bindings(
-#       prompt_user_class_decision_all_vars(
-#         vars = c("test_var_1", "test_var_2"),
-#         all_servers = c("server_1", "server_2", "server_3"),
-#         all_classes = tibble(
-#           test_var_1 = c("numeric", "character", "factor"),
-#           test_var_2 = c("logical", "integer", "factor")
-#         ),
-#         "test_df",
-#         conns),
-#       prompt_user_class_decision = function(var, server, classes, newobj, datasources) "1"
-#     ),
-#     c("1", "1")
-#   )
-# })
-#
-# test_that(".fix_classes sets the correct classes in serverside data frame", {
-#
-#   expect_equal(
-#     unname(unlist(ds.class("df$fac_col4"))),
-#     c("numeric", "character", "factor")
-#   )
-#
-#   expect_equal(
-#     unname(unlist(ds.class("df$fac_col5"))),
-#     c("logical", "integer", "factor")
-#   )
-#
-#   expect_equal(
-#     unname(unlist(ds.class("new_classes$fac_col4"))),
-#     rep("factor", 3)
-#   )
-#
-#   expect_equal(
-#     unname(unlist(ds.class("new_classes$fac_col5"))),
-#     rep("logical", 3)
-#   )
-#
-# })
-#
-# test_that(".get_unique_cols extracts unique names from a list", {
-#   expect_equal(
-#     .get_unique_cols(
-#       list(
-#         server_1 = c("col_1", "col_2", "col_3"),
-#         server_1 = c("col_1", "col_2", "col_4"),
-#         server_1 = c("col_2", "col_3", "col_3", "col_5")
-#       )
-#     ),
-#     c("col_1", "col_2", "col_3", "col_4", "col_5")
-#   )
-# })
-#
-# test_that(".add_missing_cols_to_df correctly creates missing columns", {
-#
-#   new_cols <- c("col11", "col12", "col13", "col14", "col15", "col16", "col17", "col18", "col19",
-#                 "col20", "fac_col1", "fac_col10", "fac_col2", "fac_col3", "fac_col4", "fac_col5",
-#                 "fac_col6", "fac_col7", "fac_col9")
-#
-#   observed <- ds.colnames("with_new_cols")
-#
-#   expected <- list(
-#     server_1 = new_cols,
-#     server_2 = new_cols,
-#     server_3 = new_cols
-#   )
-#
-#   expect_equal(observed, expected)
-# })
-#
-# test_that(".get_added_cols correctly identifies newly added columns", {
-#
-#   expect_equal(
-#     added_cols,
-#     list(
-#       server_1 = c("col11", "col13", "col14", "col16", "col17", "col19", "col20", "fac_col10", "fac_col7"),
-#       server_2 = c("col11", "col12", "col14", "col15", "col17", "col18", "col20", "fac_col6", "fac_col9"),
-#       server_3 = c("col12", "col13", "col15", "col16", "col18", "col19", "fac_col10", "fac_col6", "fac_col7", "fac_col9")
-#     )
-#   )
-# })
-#
-# test_that(".identify_factor_vars correctly identifies factor variables", {
-#
-#
-#
-#   var_class_fact <- var_class |> dplyr::select(server: col18)
-#   expect_equal(
-#     names(fac_vars),
-#     c("fac_col1", "fac_col2", "fac_col3", "fac_col6", "fac_col9")
-#   )
-# })
-#
-# test_that(".get_factor_levels correctly identifies factor levels", {
-#   expected <- list(
-#     server_1 = list(
-#       fac_col1 = c("High", "Low", "Medium"),
-#       fac_col2 = c("Blue", "Green"),
-#       fac_col3 = c("No", "Yes"),
-#       fac_col6 = c("Bird", "Cat", "Dog"),
-#       fac_col9 = c("False", "True")
-#     ),
-#     server_2 = list(
-#       fac_col1 = c("High", "Low", "Medium"),
-#       fac_col2 = c("Green", "Red"),
-#       fac_col3 = c("No"),
-#       fac_col6 = NULL,
-#       fac_col9 = NULL
-#     ),
-#     server_3 = list(
-#       fac_col1 = c("High", "Low", "Medium"),
-#       fac_col2 = c("Blue"),
-#       fac_col3 = c("Yes"),
-#       fac_col6 = NULL,
-#       fac_col9 = NULL
-#     )
-#   )
-#
-#   expect_equal(fac_levels, expected)
-# })
-#
-# test_that(".identify_level_conflicts correctly factor columns with different levels", {
-#   expect_equal(
-#     .identify_level_conflicts(fac_levels),
-#     c("fac_col2", "fac_col3", "fac_col6", "fac_col9")
-#   )
-#
-# })
-#
-# test_that("ask_question_wait_response_levels continues with valid response", {
-#   expect_equal(
-#     with_mocked_bindings(
-#       suppressWarnings(ask_question_wait_response_levels("test variable", "test_obj", conns)),
-#       readline = function() "1"
-#     ), "1"
-#   )
-#
-#   expect_equal(
-#     with_mocked_bindings(
-#       suppressWarnings(ask_question_wait_response_levels("test variable", "test_obj", conns)),
-#       readline = function() "1"
-#     ), "1"
-#   )
-#
-# })
-#
-# test_that("ask_question_wait_response_levels aborts with response of 3", {
-#   expect_error(
-#     with_mocked_bindings(
-#       suppressWarnings(ask_question_wait_response_levels("test variable", "test_obj", conns)),
-#       readline = function() "3")
-#   )
-# })
-#
-# test_that(".make_levels_message makes correct message", {
-#   expect_snapshot(.make_levels_message(level_conflicts))
-# })
-#
-# test_that(".get_unique_levels extracts all possible levels", {
-#
-#   expected <- list(
-#     fac_col2 = c("Blue", "Green", "Red"),
-#     fac_col3 = c("No", "Yes"),
-#     fac_col6 = c("Bird", "Cat", "Dog"),
-#     fac_col9 = c("False", "True")
-#   )
-#
-#   expect_equal(unique_levs, expected)
-#
-# })
-#
-# test_that(".set_factor_levels sets levels correctly", {
-#   .set_factor_levels("with_new_cols", unique_levs,  conns)
-#
-#   expect_equal(
-#     ds.levels("with_new_cols$fac_col2") |> map(~.x[[1]]),
-#     list(
-#       server_1 = c("Blue", "Green", "Red"),
-#       server_2 = c("Blue", "Green", "Red"),
-#       server_3 = c("Blue", "Green", "Red")
-#     )
-#   )
-#
-#   expect_equal(
-#     ds.levels("with_new_cols$fac_col3") |> map(~.x[[1]]),
-#     list(
-#       server_1 = c("No", "Yes"),
-#       server_2 = c("No", "Yes"),
-#       server_3 = c("No", "Yes")
-#     )
-#   )
-#
-#   expect_equal(
-#     ds.levels("with_new_cols$fac_col6") |> map(~.x[[1]]),
-#     list(
-#       server_1 = c("Bird", "Cat", "Dog"),
-#       server_2 = c("Bird", "Cat", "Dog"),
-#       server_3 = c("Bird", "Cat", "Dog")
-#     )
-#   )
-#
-#   expect_equal(
-#     ds.levels("with_new_cols$fac_col9") |> map(~.x[[1]]),
-#     list(
-#       server_1 = c("False", "True"),
-#       server_2 = c("False", "True"),
-#       server_3 = c("False", "True")
-#     )
-#   )
-#
-# })
-#
-# test_that(".print_var_recode_message prints the correct message", {
-#   expect_snapshot(.print_var_recode_message(added_cols, "test_df"))
-# })
-#
-# test_that(".print_class_recode_message prints the correct message", {
-#   expect_snapshot(
-#     .print_class_recode_message(class_decisions, different_classes, "test_df")
-#   )
-# })
-#
-# test_that(".print_levels_recode_message prints the correct message", {
-#   expect_snapshot(
-#     .print_levels_recode_message(unique_levs, "test_df")
-#   )
-# })
-#
-# test_that(".make_levels_recode_message prints the correct message", {
-#   expect_equal(
-#     .make_levels_recode_message(unique_levs),
-#     list(
-#       "fac_col2 --> Blue, Green, Red",
-#       "fac_col3 --> No, Yes",
-#       "fac_col6 --> Bird, Cat, Dog",
-#       "fac_col9 --> False, True"
-#     )
-#   )
-# })
-#
-# test_that(".print_out_messages prints the correct messages", {
-#   expect_snapshot(
-#     .print_out_messages(
-#       added_cols, class_decisions, different_classes, unique_levs, level_conflicts, "1", "test_df"
-#     )
-#   )
-# })
-#
-# test_that(".change_choice_to_string converts numeric class codes to strings correctly", {
-#   expect_equal(.change_choice_to_string("1"), "factor")
-#   expect_equal(.change_choice_to_string("2"), "integer")
-#   expect_equal(.change_choice_to_string("3"), "numeric")
-#   expect_equal(.change_choice_to_string("4"), "character")
-#   expect_equal(.change_choice_to_string("5"), "logical")
-# })
-#
-# test_that("ds.standardiseDf doesn't run if dataframes are identical", {
-#   expect_error(
-#     ds.standardiseDf(
-#       df = "df_ident",
-#       newobj = "test_fill"
-#     ),
-#     "Columns are identical"
-#   )
-# })
-#
-# test_that("ds.standardiseDf works when called directly and class conversion is factor", {
-#   with_mocked_bindings(
-#     ds.standardiseDf(
-#       df = "df",
-#       newobj = "test_fill"
-#     ),
-#     prompt_user_class_decision_all_vars = function(var, server, classes, newobj, datasources) "1",
-#     ask_question_wait_response_levels = function(levels_conflict, newobj, datasources) "2"
-#   )
-#
-#   expect_equal(
-#     ds.class("test_fill$fac_col4")[[1]],
-#     "factor"
-#   )
-# })
-#
-# test_that("ds.standardiseDf returns warning when called directly and class conversion is integer", {
-#   with_mocked_bindings(
-#     ds.standardiseDf(
-#       df = "df",
-#       newobj = "test_fill"
-#     ),
-#     prompt_user_class_decision_all_vars = function(var, server, classes, newobj, datasources) c("2", "2"),
-#     ask_question_wait_response_levels = function(levels_conflict, newobj, datasources) "2"
-#   )
-#
-#   expect_equal(
-#     ds.class("test_fill$fac_col4")[[1]],
-#     "integer"
-#   )
-#
-#   expect_equal(
-#     ds.class("test_fill$fac_col5")[[1]],
-#     "integer"
-#   )
-# })
-#
-# test_that("ds.standardiseDf returns warning when called directly and class conversion is numeric", {
-#   with_mocked_bindings(
-#     ds.standardiseDf(
-#       df = "df",
-#       newobj = "test_fill"
-#     ),
-#     prompt_user_class_decision_all_vars = function(var, server, classes, newobj, datasources) c("3", "3"),
-#     ask_question_wait_response_levels = function(levels_conflict, newobj, datasources) "2"
-#   )
-#
-#   expect_equal(
-#     ds.class("test_fill$fac_col4")[[1]],
-#     "numeric"
-#   )
-#
-#   expect_equal(
-#     ds.class("test_fill$fac_col5")[[1]],
-#     "numeric"
-#   )
-# })
-#
-# test_that("ds.standardiseDf returns warning when called directly and class conversion is character", {
-#   with_mocked_bindings(
-#     ds.standardiseDf(
-#       df = "df",
-#       newobj = "test_fill"
-#     ),
-#     prompt_user_class_decision_all_vars = function(var, server, classes, newobj, datasources) c("4", "4"),
-#     ask_question_wait_response_levels = function(levels_conflict, newobj, datasources) "2"
-#   )
-#
-#   expect_equal(
-#     ds.class("test_fill$fac_col4")[[1]],
-#     "character"
-#   )
-#
-#   expect_equal(
-#     ds.class("test_fill$fac_col5")[[1]],
-#     "character"
-#   )
-# })
-#
-# test_that("ds.standardiseDf returns warning when called directly and class conversion is logical", {
-#   with_mocked_bindings(
-#     ds.standardiseDf(
-#       df = "df",
-#       newobj = "test_fill"
-#     ),
-#     prompt_user_class_decision_all_vars = function(var, server, classes, newobj, datasources) c("5", "5"),
-#     ask_question_wait_response_levels = function(levels_conflict, newobj, datasources) "2"
-#   )
-#
-#   expect_equal(
-#     ds.class("test_fill$fac_col4")[[1]],
-#     "logical"
-#   )
-#
-#   expect_equal(
-#     ds.class("test_fill$fac_col5")[[1]],
-#     "logical"
-#   )
-# })
-#
-# test_that("ds.standardiseDf changes levels if this option is selected", {
-#   with_mocked_bindings(
-#     ds.standardiseDf(
-#       df = "df",
-#       newobj = "test_fill"
-#     ),
-#     prompt_user_class_decision_all_vars = function(var, server, classes, newobj, datasources) c("1", "1"),
-#     ask_question_wait_response_levels = function(levels_conflict, newobj, datasources) "1"
-#   )
-#
-#   levels_2 <- ds.levels("test_fill$fac_col2") %>% map(~.$Levels)
-#   levels_3 <- ds.levels("test_fill$fac_col3") %>% map(~.$Levels)
-#   levels_4 <- ds.levels("test_fill$fac_col4") %>% map(~.$Levels)
-#   levels_5 <- ds.levels("test_fill$fac_col5") %>% map(~.$Levels)
-#   levels_6 <- ds.levels("test_fill$fac_col6") %>% map(~.$Levels)
-#   levels_9 <- ds.levels("test_fill$fac_col9") %>% map(~.$Levels)
-#
-#   expect_equal(
-#     levels_2,
-#     list(
-#       server_1 = c("Blue", "Green", "Red"),
-#       server_2 = c("Blue", "Green", "Red"),
-#       server_3 = c("Blue", "Green", "Red")
-#     )
-#   )
-#
-#   expect_equal(
-#     levels_3,
-#     list(
-#       server_1 = c("No", "Yes"),
-#       server_2 = c("No", "Yes"),
-#       server_3 = c("No", "Yes")
-#     )
-#   )
-#
-#   expect_equal(
-#     levels_4,
-#     list(
-#       server_1 = c("1", "2", "3", "A", "B", "C"),
-#       server_2 = c("1", "2", "3", "A", "B", "C"),
-#       server_3 = c("1", "2", "3", "A", "B", "C")
-#     )
-#   )
-#
-#   expect_equal(
-#     levels_5,
-#     list(
-#       server_1 = c("1", "2", "3", "One", "Three", "Two"),
-#       server_2 = c("1", "2", "3", "One", "Three", "Two"),
-#       server_3 = c("1", "2", "3", "One", "Three", "Two")
-#     )
-#   )
-#
-#   expect_equal(
-#     levels_6,
-#     list(
-#       server_1 = c("Bird", "Cat", "Dog"),
-#       server_2 = c("Bird", "Cat", "Dog"),
-#       server_3 = c("Bird", "Cat", "Dog")
-#     )
-#   )
-#
-#   expect_equal(
-#     levels_9,
-#     list(
-#       server_1 = c("False", "True"),
-#       server_2 = c("False", "True"),
-#       server_3 = c("False", "True")
-#     )
-#   )
-#
-# })
-#
-# test_that("ds.standardiseDf doesn't run if classes are not identical and fix_class is no", {
-#   expect_error(
-#     ds.standardiseDf(
-#       df = "df",
-#       newobj = "shouldnt_exist",
-#       fix_class = "no"
-#     ),
-#     "Variables do not have the same class in all studies"
-#   )
-#
-#   expect_equal(
-#     ds.exists("shouldnt_exist")[[1]],
-#     FALSE
-#   )
-# })
-#
-# test_that("ds.standardiseDf doesn't run if levels are not identical and fix_class is no", {
-#   expect_error(
-#     with_mocked_bindings(
-#       ds.standardiseDf(
-#         df = "df",
-#         newobj = "shouldnt_exist",
-#         fix_levels = "no"
-#       ),
-#       prompt_user_class_decision_all_vars = function(var, server, classes, newobj, datasources) c("1", "1")
-#     ),
-#     "Factor variables do not have the same levels in all studies"
-#   )
-#
-#   expect_equal(
-#     ds.exists("shouldnt_exist")[[1]],
-#     FALSE
-#   )
-# })
+
+test_that(".identify_class_conflicts returns correct output", {
+  expected <- list(
+    fac_col4 = c("numeric", "character", "factor"),
+    fac_col5 = c("logical", "integer", "factor")
+  )
+
+  expect_equal(class_conflicts, expected)
+
+})
+
+test_that("ask_question displays the correct prompt", {
+  expect_snapshot(ask_question_class("my_var"))
+})
+
+test_that("ask_question_wait_response_class continues with valid response", {
+  expect_equal(
+    with_mocked_bindings(
+      ask_question_wait_response_class("a variable"),
+      ask_question_class = function(var) "A question",
+      readline = function() "1"
+    ), "1"
+  )
+})
+
+test_that("ask_question_wait_response_class throws error if option 6 selected", {
+  expect_error(
+    with_mocked_bindings(
+      ask_question_wait_response_class("a variable"),
+      ask_question_class = function(var) "A question",
+      readline = function() "6")
+  )
+})
+
+test_that("print_all_classes prints the correct message", {
+  expect_snapshot(
+    print_all_classes(
+      c("server_1", "server_2", "server_3"),
+      c("numeric", "factor", "integer")
+    )
+  )
+})
+
+test_that("prompt_user_class_decision function properly", {
+  expect_message(
+    with_mocked_bindings(
+      prompt_user_class_decision(
+        var = "test_col",
+        servers = c("sim2", "sim2", "sim3"),
+        classes = c("numeric", "character", "factor"),
+        newobj = "test_df",
+        datasources = datasources),
+      ask_question_wait_response_class = function(var, newobj, datasources) "test_col"
+    )
+  )
+
+  expect_equal(
+    with_mocked_bindings(
+      prompt_user_class_decision(
+        var = "test_col",
+        servers = c("sim2", "sim2", "sim3"),
+        classes = c("numeric", "character", "factor"),
+        newobj = "test_df",
+        datasources = datasources),
+      ask_question_wait_response_class = function(var, newobj, datasources) "test_col"
+    ),
+    "test_col"
+  )
+})
+
+test_that("prompt_user_class_decision_all_vars returns correct value", {
+  expect_equal(
+    with_mocked_bindings(
+      prompt_user_class_decision_all_vars(
+        vars = c("test_var_1", "test_var_2"),
+        all_servers = c("sim2", "sim2", "sim3"),
+        all_classes = tibble(
+          test_var_1 = c("numeric", "character", "factor"),
+          test_var_2 = c("logical", "integer", "factor")
+        ),
+        "test_df",
+        conns),
+      prompt_user_class_decision = function(var, server, classes, newobj, datasources) "1"
+    ),
+    c("1", "1")
+  )
+})
+
+test_that(".fix_classes sets the correct classes in serverside data frame", {
+
+  expect_equal(
+    unname(unlist(ds.class("D$fac_col4"))),
+    c("numeric", "character", "factor")
+  )
+
+  expect_equal(
+    unname(unlist(ds.class("D$fac_col5"))),
+    c("logical", "integer", "factor")
+  )
+
+  expect_equal(
+    unname(unlist(ds.class("new_classes$fac_col4"))),
+    rep("factor", 3)
+  )
+
+  expect_equal(
+    unname(unlist(ds.class("new_classes$fac_col5"))),
+    rep("logical", 3)
+  )
+
+})
+
+test_that(".get_unique_cols extracts unique names from a list", {
+  expect_equal(
+    .get_unique_cols(
+      list(
+        server_1 = c("col_1", "col_2", "col_3"),
+        server_1 = c("col_1", "col_2", "col_4"),
+        server_1 = c("col_2", "col_3", "col_3", "col_5")
+      )
+    ),
+    c("col_1", "col_2", "col_3", "col_4", "col_5")
+  )
+})
+
+test_that(".add_missing_cols_to_df correctly creates missing columns", {
+
+  new_cols <- c("col11", "col12", "col13", "col14", "col15", "col16", "col17", "col18", "col19",
+                "col20", "fac_col1", "fac_col10", "fac_col2", "fac_col3", "fac_col4", "fac_col5",
+                "fac_col6", "fac_col7", "fac_col9")
+
+  observed <- ds.colnames("with_new_cols")
+
+  expected <- list(
+    sim1 = new_cols,
+    sim2 = new_cols,
+    sim3 = new_cols
+  )
+
+  expect_equal(observed, expected)
+})
+
+test_that(".get_added_cols correctly identifies newly added columns", {
+
+  expect_equal(
+    added_cols,
+    list(
+      sim1 = c("col11", "col13", "col14", "col16", "col17", "col19", "col20", "fac_col10", "fac_col7"),
+      sim2 = c("col11", "col12", "col14", "col15", "col17", "col18", "col20", "fac_col6", "fac_col9"),
+      sim3 = c("col12", "col13", "col15", "col16", "col18", "col19", "fac_col10", "fac_col6", "fac_col7", "fac_col9")
+    )
+  )
+})
+
+test_that(".identify_factor_vars correctly identifies factor variables", {
+
+
+
+  var_class_fact <- var_class |> dplyr::select(server: col18)
+  expect_equal(
+    names(fac_vars),
+    c("fac_col1", "fac_col2", "fac_col3", "fac_col6", "fac_col9")
+  )
+})
+
+test_that(".get_factor_levels correctly identifies factor levels", {
+  expected <- list(
+    sim1 = list(
+      fac_col1 = c("High", "Low", "Medium"),
+      fac_col2 = c("Blue", "Green"),
+      fac_col3 = c("No", "Yes"),
+      fac_col6 = c("Bird", "Cat", "Dog"),
+      fac_col9 = c("False", "True")
+    ),
+    sim2 = list(
+      fac_col1 = c("High", "Low", "Medium"),
+      fac_col2 = c("Green", "Red"),
+      fac_col3 = c("No"),
+      fac_col6 = NULL,
+      fac_col9 = NULL
+    ),
+    sim3 = list(
+      fac_col1 = c("High", "Low", "Medium"),
+      fac_col2 = c("Blue"),
+      fac_col3 = c("Yes"),
+      fac_col6 = NULL,
+      fac_col9 = NULL
+    )
+  )
+
+  expect_equal(fac_levels, expected)
+})
+
+test_that(".identify_level_conflicts correctly factor columns with different levels", {
+  expect_equal(
+    .identify_level_conflicts(fac_levels),
+    c("fac_col2", "fac_col3", "fac_col6", "fac_col9")
+  )
+
+})
+
+test_that("ask_question_wait_response_levels continues with valid response", {
+  expect_equal(
+    with_mocked_bindings(
+      suppressWarnings(ask_question_wait_response_levels("test variable", "test_obj", conns)),
+      readline = function() "1"
+    ), "1"
+  )
+
+  expect_equal(
+    with_mocked_bindings(
+      suppressWarnings(ask_question_wait_response_levels("test variable", "test_obj", conns)),
+      readline = function() "1"
+    ), "1"
+  )
+
+})
+
+test_that("ask_question_wait_response_levels aborts with response of 3", {
+  expect_error(
+    with_mocked_bindings(
+      suppressWarnings(ask_question_wait_response_levels("test variable", "test_obj", conns)),
+      readline = function() "3")
+  )
+})
+
+test_that(".make_levels_message makes correct message", {
+  expect_snapshot(.make_levels_message(level_conflicts))
+})
+
+test_that(".get_unique_levels extracts all possible levels", {
+
+  expected <- list(
+    fac_col2 = c("Blue", "Green", "Red"),
+    fac_col3 = c("No", "Yes"),
+    fac_col6 = c("Bird", "Cat", "Dog"),
+    fac_col9 = c("False", "True")
+  )
+
+  expect_equal(unique_levs, expected)
+
+})
+
+test_that(".set_factor_levels sets levels correctly", {
+  .set_factor_levels("with_new_cols", unique_levs,  ds.test_env$connections)
+
+  expect_equal(
+    ds.levels("with_new_cols$fac_col2") |> map(~.x[[1]]),
+    list(
+      sim1 = c("Blue", "Green", "Red"),
+      sim2 = c("Blue", "Green", "Red"),
+      sim3 = c("Blue", "Green", "Red")
+    )
+  )
+
+  expect_equal(
+    ds.levels("with_new_cols$fac_col3") |> map(~.x[[1]]),
+    list(
+      sim1 = c("No", "Yes"),
+      sim2 = c("No", "Yes"),
+      sim3 = c("No", "Yes")
+    )
+  )
+
+  expect_equal(
+    ds.levels("with_new_cols$fac_col6") |> map(~.x[[1]]),
+    list(
+      sim1 = c("Bird", "Cat", "Dog"),
+      sim2 = c("Bird", "Cat", "Dog"),
+      sim3 = c("Bird", "Cat", "Dog")
+    )
+  )
+
+  expect_equal(
+    ds.levels("with_new_cols$fac_col9") |> map(~.x[[1]]),
+    list(
+      sim1 = c("False", "True"),
+      sim2 = c("False", "True"),
+      sim3 = c("False", "True")
+    )
+  )
+
+})
+
+test_that(".print_var_recode_message prints the correct message", {
+  expect_snapshot(.print_var_recode_message(added_cols, "test_df"))
+})
+
+test_that(".print_class_recode_message prints the correct message", {
+  expect_snapshot(
+    .print_class_recode_message(class_decisions, different_classes, "test_df")
+  )
+})
+
+test_that(".print_levels_recode_message prints the correct message", {
+  expect_snapshot(
+    .print_levels_recode_message(unique_levs, "test_df")
+  )
+})
+
+test_that(".make_levels_recode_message prints the correct message", {
+  expect_equal(
+    .make_levels_recode_message(unique_levs),
+    list(
+      "fac_col2 --> Blue, Green, Red",
+      "fac_col3 --> No, Yes",
+      "fac_col6 --> Bird, Cat, Dog",
+      "fac_col9 --> False, True"
+    )
+  )
+})
+
+test_that(".print_out_messages prints the correct messages", {
+  expect_snapshot(
+    .print_out_messages(
+      added_cols, class_decisions, different_classes, unique_levs, level_conflicts, "1", "test_df"
+    )
+  )
+})
+
+test_that(".change_choice_to_string converts numeric class codes to strings correctly", {
+  expect_equal(.change_choice_to_string("1"), "factor")
+  expect_equal(.change_choice_to_string("2"), "integer")
+  expect_equal(.change_choice_to_string("3"), "numeric")
+  expect_equal(.change_choice_to_string("4"), "character")
+  expect_equal(.change_choice_to_string("5"), "logical")
+})
+
+test_that("ds.standardiseDf doesn't run if dataframes are identical", {
+  with_mocked_bindings(
+    ds.standardiseDf(
+      df = "D",
+      newobj = "test_fill"
+      ),
+    prompt_user_class_decision_all_vars = function(var, server, classes, newobj, datasources) "1",
+    ask_question_wait_response_levels = function(levels_conflict, newobj, datasources) "2"
+    )
+
+  expect_error(
+    ds.standardiseDf(
+      df = "test_fill",
+      newobj = "shouldn't_exist"),
+    "Columns are identical"
+    )
+  })
+
+test_that("ds.standardiseDf works when called directly and class conversion is factor", {
+  with_mocked_bindings(
+    ds.standardiseDf(
+      df = "D",
+      newobj = "test_fill"
+    ),
+    prompt_user_class_decision_all_vars = function(var, server, classes, newobj, datasources) "1",
+    ask_question_wait_response_levels = function(levels_conflict, newobj, datasources) "2"
+  )
+
+  expect_equal(
+    ds.class("test_fill$fac_col4")[[1]],
+    "factor"
+  )
+})
+
+test_that("ds.standardiseDf returns warning when called directly and class conversion is integer", {
+  with_mocked_bindings(
+    ds.standardiseDf(
+      df = "D",
+      newobj = "test_fill"
+    ),
+    prompt_user_class_decision_all_vars = function(var, server, classes, newobj, datasources) c("2", "2"),
+    ask_question_wait_response_levels = function(levels_conflict, newobj, datasources) "2"
+  )
+
+  expect_equal(
+    ds.class("test_fill$fac_col4")[[1]],
+    "integer"
+  )
+
+  expect_equal(
+    ds.class("test_fill$fac_col5")[[1]],
+    "integer"
+  )
+})
+
+test_that("ds.standardiseDf returns warning when called directly and class conversion is numeric", {
+  with_mocked_bindings(
+    ds.standardiseDf(
+      df = "D",
+      newobj = "test_fill"
+    ),
+    prompt_user_class_decision_all_vars = function(var, server, classes, newobj, datasources) c("3", "3"),
+    ask_question_wait_response_levels = function(levels_conflict, newobj, datasources) "2"
+  )
+
+  expect_equal(
+    ds.class("test_fill$fac_col4")[[1]],
+    "numeric"
+  )
+
+  expect_equal(
+    ds.class("test_fill$fac_col5")[[1]],
+    "numeric"
+  )
+})
+
+test_that("ds.standardiseDf returns warning when called directly and class conversion is character", {
+  with_mocked_bindings(
+    ds.standardiseDf(
+      df = "D",
+      newobj = "test_fill"
+    ),
+    prompt_user_class_decision_all_vars = function(var, server, classes, newobj, datasources) c("4", "4"),
+    ask_question_wait_response_levels = function(levels_conflict, newobj, datasources) "2"
+  )
+
+  expect_equal(
+    ds.class("test_fill$fac_col4")[[1]],
+    "character"
+  )
+
+  expect_equal(
+    ds.class("test_fill$fac_col5")[[1]],
+    "character"
+  )
+})
+
+test_that("ds.standardiseDf returns warning when called directly and class conversion is logical", {
+  with_mocked_bindings(
+    ds.standardiseDf(
+      df = "D",
+      newobj = "test_fill"
+    ),
+    prompt_user_class_decision_all_vars = function(var, server, classes, newobj, datasources) c("5", "5"),
+    ask_question_wait_response_levels = function(levels_conflict, newobj, datasources) "2"
+  )
+
+  expect_equal(
+    ds.class("test_fill$fac_col4")[[1]],
+    "logical"
+  )
+
+  expect_equal(
+    ds.class("test_fill$fac_col5")[[1]],
+    "logical"
+  )
+})
+
+test_that("ds.standardiseDf changes levels if this option is selected", {
+  with_mocked_bindings(
+    ds.standardiseDf(
+      df = "D",
+      newobj = "test_fill"
+    ),
+    prompt_user_class_decision_all_vars = function(var, server, classes, newobj, datasources) c("1", "1"),
+    ask_question_wait_response_levels = function(levels_conflict, newobj, datasources) "1"
+  )
+
+  levels_2 <- ds.levels("test_fill$fac_col2") %>% map(~.$Levels)
+  levels_3 <- ds.levels("test_fill$fac_col3") %>% map(~.$Levels)
+  levels_4 <- ds.levels("test_fill$fac_col4") %>% map(~.$Levels)
+  levels_5 <- ds.levels("test_fill$fac_col5") %>% map(~.$Levels)
+  levels_6 <- ds.levels("test_fill$fac_col6") %>% map(~.$Levels)
+  levels_9 <- ds.levels("test_fill$fac_col9") %>% map(~.$Levels)
+
+  expect_equal(
+    levels_2,
+    list(
+      sim1 = c("Blue", "Green", "Red"),
+      sim2 = c("Blue", "Green", "Red"),
+      sim3 = c("Blue", "Green", "Red")
+    )
+  )
+
+  expect_equal(
+    levels_3,
+    list(
+      sim1 = c("No", "Yes"),
+      sim2 = c("No", "Yes"),
+      sim3 = c("No", "Yes")
+    )
+  )
+
+  expect_equal(
+    levels_4,
+    list(
+      sim1 = c("1", "2", "3", "A", "B", "C"),
+      sim2 = c("1", "2", "3", "A", "B", "C"),
+      sim3 = c("1", "2", "3", "A", "B", "C")
+    )
+  )
+
+  expect_equal(
+    levels_5,
+    list(
+      sim1 = c("1", "2", "3", "One", "Three", "Two"),
+      sim2 = c("1", "2", "3", "One", "Three", "Two"),
+      sim3 = c("1", "2", "3", "One", "Three", "Two")
+    )
+  )
+
+  expect_equal(
+    levels_6,
+    list(
+      sim1 = c("Bird", "Cat", "Dog"),
+      sim2 = c("Bird", "Cat", "Dog"),
+      sim3 = c("Bird", "Cat", "Dog")
+    )
+  )
+
+  expect_equal(
+    levels_9,
+    list(
+      sim1 = c("False", "True"),
+      sim2 = c("False", "True"),
+      sim3 = c("False", "True")
+    )
+  )
+
+})
+
+test_that("ds.standardiseDf doesn't run if classes are not identical and fix_class is no", {
+  expect_error(
+    ds.standardiseDf(
+      df = "D",
+      newobj = "shouldnt_exist",
+      fix_class = "no"
+    ),
+    "Variables do not have the same class in all studies"
+  )
+
+  expect_equal(
+    ds.exists("shouldnt_exist")[[1]],
+    FALSE
+  )
+})
+
+test_that("ds.standardiseDf doesn't run if levels are not identical and fix_class is no", {
+  expect_error(
+    with_mocked_bindings(
+      ds.standardiseDf(
+        df = "D",
+        newobj = "shouldnt_exist",
+        fix_levels = "no"
+      ),
+      prompt_user_class_decision_all_vars = function(var, server, classes, newobj, datasources) c("1", "1")
+    ),
+    "Factor variables do not have the same levels in all studies"
+  )
+
+  expect_equal(
+    ds.exists("shouldnt_exist")[[1]],
+    FALSE
+  )
+})
 
 
 ## Add disclosure check levels

From de99b8ffb43adee5e244968ce6a890d3db3d08d7 Mon Sep 17 00:00:00 2001
From: Tim Cadman <41470917+timcadman@users.noreply.github.com>
Date: Tue, 7 Oct 2025 12:13:05 +0200
Subject: [PATCH 10/18] test: added snapshots

---
 tests/testthat/_snaps/smk-standardiseDf.md | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

diff --git a/tests/testthat/_snaps/smk-standardiseDf.md b/tests/testthat/_snaps/smk-standardiseDf.md
index ac1558bd..4fa52eca 100644
--- a/tests/testthat/_snaps/smk-standardiseDf.md
+++ b/tests/testthat/_snaps/smk-standardiseDf.md
@@ -38,9 +38,9 @@
       .print_var_recode_message(added_cols, "test_df")
     Message
       v The following variables have been added to test_df:
-      i server_1 --> col11
-      i server_2 --> col11
-      i server_3 --> col12
+      i sim1 --> col11
+      i sim2 --> col11
+      i sim3 --> col12
       
 
 # .print_class_recode_message prints the correct message
@@ -70,9 +70,9 @@
         level_conflicts, "1", "test_df")
     Message
       v The following variables have been added to test_df:
-      i server_1 --> col11
-      i server_2 --> col11
-      i server_3 --> col12
+      i sim1 --> col11
+      i sim2 --> col11
+      i sim3 --> col12
       
       v The following classes have been set for all datasources in test_df: 
       i fac_col4 --> factor

From 7c49bab4c9a353c034d60ebc71e6efffef14561a Mon Sep 17 00:00:00 2001
From: Tim Cadman <41470917+timcadman@users.noreply.github.com>
Date: Tue, 7 Oct 2025 12:13:47 +0200
Subject: [PATCH 11/18] require dsbase

---
 tests/testthat/setup.R | 1 +
 1 file changed, 1 insertion(+)

diff --git a/tests/testthat/setup.R b/tests/testthat/setup.R
index 4c55c6e7..1b0f9b8a 100644
--- a/tests/testthat/setup.R
+++ b/tests/testthat/setup.R
@@ -24,6 +24,7 @@ library(DescTools)
 library(DSOpal)
 library(DSMolgenisArmadillo)
 library(DSLite)
+library(dsBase)
 
 source("dstest_functions/ds_expect_variables.R")
 source("perf_tests/perf_rate.R")

From 02ef0aa4b9cecff43ad7abf59111d5146f011821 Mon Sep 17 00:00:00 2001
From: Tim Cadman <41470917+timcadman@users.noreply.github.com>
Date: Tue, 7 Oct 2025 12:14:16 +0200
Subject: [PATCH 12/18] set minimum testthat edition as 3

---
 DESCRIPTION | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/DESCRIPTION b/DESCRIPTION
index b0779e7f..d5dc606a 100644
--- a/DESCRIPTION
+++ b/DESCRIPTION
@@ -67,6 +67,8 @@ Suggests:
     DescTools,
     DSOpal,
     DSMolgenisArmadillo,
-    DSLite
+    DSLite,
+    dsBase
 RoxygenNote: 7.3.2
 Encoding: UTF-8
+Config/testthat/edition: 3

From 4c419b3c592839943ba48a04e0d419ba800dd762 Mon Sep 17 00:00:00 2001
From: Tim Cadman <41470917+timcadman@users.noreply.github.com>
Date: Tue, 7 Oct 2025 12:14:36 +0200
Subject: [PATCH 13/18] test: define new dataset for standardiseDf testing

---
 .../init_studies_datasets.R                   | 213 +++++++++++-------
 1 file changed, 127 insertions(+), 86 deletions(-)

diff --git a/tests/testthat/connection_to_datasets/init_studies_datasets.R b/tests/testthat/connection_to_datasets/init_studies_datasets.R
index 0639aac6..e7e412ab 100644
--- a/tests/testthat/connection_to_datasets/init_studies_datasets.R
+++ b/tests/testthat/connection_to_datasets/init_studies_datasets.R
@@ -1,85 +1,85 @@
 init.studies.dataset.cnsim <- function(variables)
 {
-    if (ds.test_env$secure_login_details)
+  if (ds.test_env$secure_login_details)
+  {
+    if (ds.test_env$driver == "OpalDriver")
     {
-      if (ds.test_env$driver == "OpalDriver")
-      {
-        builder <- DSI::newDSLoginBuilder(.silent = TRUE)
-        builder$append(server = "sim1", url = ds.test_env$ip_address_1, user = ds.test_env$user_1, password = ds.test_env$password_1, table = "CNSIM.CNSIM1", options=ds.test_env$options_1)
-        builder$append(server = "sim2", url = ds.test_env$ip_address_2, user = ds.test_env$user_2, password = ds.test_env$password_2, table = "CNSIM.CNSIM2", options=ds.test_env$options_2)
-        builder$append(server = "sim3", url = ds.test_env$ip_address_3, user = ds.test_env$user_3, password = ds.test_env$password_3, table = "CNSIM.CNSIM3", options=ds.test_env$options_3)
-        ds.test_env$login.data <- builder$build()
-      }
-      else if (ds.test_env$driver == "ArmadilloDriver")
-      {
-        builder <- DSI::newDSLoginBuilder(.silent = TRUE)
-        builder$append(server = "sim1", url = ds.test_env$ip_address_1, user = ds.test_env$user_1, password = ds.test_env$password_1, table = "datashield/cnsim/CNSIM1", driver = ds.test_env$driver)
-        builder$append(server = "sim2", url = ds.test_env$ip_address_2, user = ds.test_env$user_2, password = ds.test_env$password_2, table = "datashield/cnsim/CNSIM2", driver = ds.test_env$driver)
-        builder$append(server = "sim3", url = ds.test_env$ip_address_3, user = ds.test_env$user_3, password = ds.test_env$password_3, table = "datashield/cnsim/CNSIM3", driver = ds.test_env$driver)
-        ds.test_env$login.data <- builder$build()
-      }
-      else 
-      {
-         ds.test_env$login.data <- DSLite::setupCNSIMTest("dsBase", env = ds.test_env)
-      }
-      ds.test_env$stats.var <- variables
+      builder <- DSI::newDSLoginBuilder(.silent = TRUE)
+      builder$append(server = "sim1", url = ds.test_env$ip_address_1, user = ds.test_env$user_1, password = ds.test_env$password_1, table = "CNSIM.CNSIM1", options=ds.test_env$options_1)
+      builder$append(server = "sim2", url = ds.test_env$ip_address_2, user = ds.test_env$user_2, password = ds.test_env$password_2, table = "CNSIM.CNSIM2", options=ds.test_env$options_2)
+      builder$append(server = "sim3", url = ds.test_env$ip_address_3, user = ds.test_env$user_3, password = ds.test_env$password_3, table = "CNSIM.CNSIM3", options=ds.test_env$options_3)
+      ds.test_env$login.data <- builder$build()
     }
+    else if (ds.test_env$driver == "ArmadilloDriver")
+    {
+      builder <- DSI::newDSLoginBuilder(.silent = TRUE)
+      builder$append(server = "sim1", url = ds.test_env$ip_address_1, user = ds.test_env$user_1, password = ds.test_env$password_1, table = "datashield/cnsim/CNSIM1", driver = ds.test_env$driver)
+      builder$append(server = "sim2", url = ds.test_env$ip_address_2, user = ds.test_env$user_2, password = ds.test_env$password_2, table = "datashield/cnsim/CNSIM2", driver = ds.test_env$driver)
+      builder$append(server = "sim3", url = ds.test_env$ip_address_3, user = ds.test_env$user_3, password = ds.test_env$password_3, table = "datashield/cnsim/CNSIM3", driver = ds.test_env$driver)
+      ds.test_env$login.data <- builder$build()
+    }
+    else
+    {
+      ds.test_env$login.data <- DSLite::setupCNSIMTest("dsBase", env = ds.test_env)
+    }
+    ds.test_env$stats.var <- variables
+  }
 }
 
 init.studies.dataset.dasim <- function(variables)
 {
-    if (ds.test_env$secure_login_details)
+  if (ds.test_env$secure_login_details)
+  {
+    if (ds.test_env$driver == "OpalDriver")
+    {
+      builder <- DSI::newDSLoginBuilder(.silent = TRUE)
+      builder$append(server = "sim1", url = ds.test_env$ip_address_1, user = ds.test_env$user_1, password = ds.test_env$password_1, table = "DASIM.DASIM1", options=ds.test_env$options_1)
+      builder$append(server = "sim2", url = ds.test_env$ip_address_2, user = ds.test_env$user_2, password = ds.test_env$password_2, table = "DASIM.DASIM2", options=ds.test_env$options_2)
+      builder$append(server = "sim3", url = ds.test_env$ip_address_3, user = ds.test_env$user_3, password = ds.test_env$password_3, table = "DASIM.DASIM3", options=ds.test_env$options_3)
+      ds.test_env$login.data <- builder$build()
+    }
+    else if (ds.test_env$driver == "ArmadilloDriver")
+    {
+      builder <- DSI::newDSLoginBuilder(.silent = TRUE)
+      builder$append(server = "sim1", url = ds.test_env$ip_address_1, user = ds.test_env$user_1, password = ds.test_env$password_1, table = "datashield/dasim/DASIM1", driver = ds.test_env$driver)
+      builder$append(server = "sim2", url = ds.test_env$ip_address_2, user = ds.test_env$user_2, password = ds.test_env$password_2, table = "datashield/dasim/DASIM2", driver = ds.test_env$driver)
+      builder$append(server = "sim3", url = ds.test_env$ip_address_3, user = ds.test_env$user_3, password = ds.test_env$password_3, table = "datashield/dasim/DASIM3", driver = ds.test_env$driver)
+      ds.test_env$login.data <- builder$build()
+    }
+    else
     {
-      if (ds.test_env$driver == "OpalDriver")
-      {
-        builder <- DSI::newDSLoginBuilder(.silent = TRUE)
-        builder$append(server = "sim1", url = ds.test_env$ip_address_1, user = ds.test_env$user_1, password = ds.test_env$password_1, table = "DASIM.DASIM1", options=ds.test_env$options_1)
-        builder$append(server = "sim2", url = ds.test_env$ip_address_2, user = ds.test_env$user_2, password = ds.test_env$password_2, table = "DASIM.DASIM2", options=ds.test_env$options_2)
-        builder$append(server = "sim3", url = ds.test_env$ip_address_3, user = ds.test_env$user_3, password = ds.test_env$password_3, table = "DASIM.DASIM3", options=ds.test_env$options_3)
-        ds.test_env$login.data <- builder$build()
-      }
-      else if (ds.test_env$driver == "ArmadilloDriver")
-      {
-        builder <- DSI::newDSLoginBuilder(.silent = TRUE)
-        builder$append(server = "sim1", url = ds.test_env$ip_address_1, user = ds.test_env$user_1, password = ds.test_env$password_1, table = "datashield/dasim/DASIM1", driver = ds.test_env$driver)
-        builder$append(server = "sim2", url = ds.test_env$ip_address_2, user = ds.test_env$user_2, password = ds.test_env$password_2, table = "datashield/dasim/DASIM2", driver = ds.test_env$driver)
-        builder$append(server = "sim3", url = ds.test_env$ip_address_3, user = ds.test_env$user_3, password = ds.test_env$password_3, table = "datashield/dasim/DASIM3", driver = ds.test_env$driver)
-        ds.test_env$login.data <- builder$build()
-      }
-      else 
-      {
-        ds.test_env$login.data <- DSLite::setupDASIMTest("dsBase", env = ds.test_env)
-      }
-      ds.test_env$stats.var <- variables
+      ds.test_env$login.data <- DSLite::setupDASIMTest("dsBase", env = ds.test_env)
     }
+    ds.test_env$stats.var <- variables
+  }
 }
 
 init.studies.dataset.survival <- function(variables)
 {
-    if (ds.test_env$secure_login_details)
+  if (ds.test_env$secure_login_details)
+  {
+    if (ds.test_env$driver == "OpalDriver")
     {
-      if (ds.test_env$driver == "OpalDriver")
-      {
-        builder <- DSI::newDSLoginBuilder(.silent = TRUE)
-        builder$append(server = "survival1", url = ds.test_env$ip_address_1, user = ds.test_env$user_1, password = ds.test_env$password_1, table = "SURVIVAL.EXPAND_WITH_MISSING1", options=ds.test_env$options_1)
-        builder$append(server = "survival2", url = ds.test_env$ip_address_2, user = ds.test_env$user_2, password = ds.test_env$password_2, table = "SURVIVAL.EXPAND_WITH_MISSING2", options=ds.test_env$options_2)
-        builder$append(server = "survival3", url = ds.test_env$ip_address_3, user = ds.test_env$user_3, password = ds.test_env$password_3, table = "SURVIVAL.EXPAND_WITH_MISSING3", options=ds.test_env$options_3)
-        ds.test_env$login.data <- builder$build()
-      }
-      else if (ds.test_env$driver == "ArmadilloDriver")
-      {
-        builder <- DSI::newDSLoginBuilder(.silent = TRUE)
-        builder$append(server = "survival1", url = ds.test_env$ip_address_1, user = ds.test_env$user_1, password = ds.test_env$password_1, table = "datashield/survival/EXPAND_WITH_MISSING1", driver = ds.test_env$driver)
-        builder$append(server = "survival2", url = ds.test_env$ip_address_2, user = ds.test_env$user_2, password = ds.test_env$password_2, table = "datashield/survival/EXPAND_WITH_MISSING2", driver = ds.test_env$driver)
-        builder$append(server = "survival3", url = ds.test_env$ip_address_3, user = ds.test_env$user_3, password = ds.test_env$password_3, table = "datashield/survival/EXPAND_WITH_MISSING3", driver = ds.test_env$driver)
-        ds.test_env$login.data <- builder$build()
-      }
-      else 
-      {
-        ds.test_env$login.data <- DSLite::setupSURVIVALTest("dsBase", env = ds.test_env)
-      }
-      ds.test_env$stats.var <- variables
+      builder <- DSI::newDSLoginBuilder(.silent = TRUE)
+      builder$append(server = "survival1", url = ds.test_env$ip_address_1, user = ds.test_env$user_1, password = ds.test_env$password_1, table = "SURVIVAL.EXPAND_WITH_MISSING1", options=ds.test_env$options_1)
+      builder$append(server = "survival2", url = ds.test_env$ip_address_2, user = ds.test_env$user_2, password = ds.test_env$password_2, table = "SURVIVAL.EXPAND_WITH_MISSING2", options=ds.test_env$options_2)
+      builder$append(server = "survival3", url = ds.test_env$ip_address_3, user = ds.test_env$user_3, password = ds.test_env$password_3, table = "SURVIVAL.EXPAND_WITH_MISSING3", options=ds.test_env$options_3)
+      ds.test_env$login.data <- builder$build()
+    }
+    else if (ds.test_env$driver == "ArmadilloDriver")
+    {
+      builder <- DSI::newDSLoginBuilder(.silent = TRUE)
+      builder$append(server = "survival1", url = ds.test_env$ip_address_1, user = ds.test_env$user_1, password = ds.test_env$password_1, table = "datashield/survival/EXPAND_WITH_MISSING1", driver = ds.test_env$driver)
+      builder$append(server = "survival2", url = ds.test_env$ip_address_2, user = ds.test_env$user_2, password = ds.test_env$password_2, table = "datashield/survival/EXPAND_WITH_MISSING2", driver = ds.test_env$driver)
+      builder$append(server = "survival3", url = ds.test_env$ip_address_3, user = ds.test_env$user_3, password = ds.test_env$password_3, table = "datashield/survival/EXPAND_WITH_MISSING3", driver = ds.test_env$driver)
+      ds.test_env$login.data <- builder$build()
+    }
+    else
+    {
+      ds.test_env$login.data <- DSLite::setupSURVIVALTest("dsBase", env = ds.test_env)
     }
+    ds.test_env$stats.var <- variables
+  }
 }
 
 init.studies.dataset.cluster.int <- function(variables)
@@ -107,7 +107,7 @@ init.studies.dataset.cluster.int <- function(variables)
       builder$append(server = "cluster.int3", url = ds.test_env$ip_address_3, user = ds.test_env$user_3, password = ds.test_env$password_3, table = "datashield/cluster/CLUSTER_INT3", driver = ds.test_env$driver)
       ds.test_env$login.data <- builder$build()
     }
-    else 
+    else
     {
       #to do
       #ds.test_env$login.data <- DSLite::setupCLUSTERTest("dsBase", env = ds.test_env)
@@ -141,7 +141,7 @@ init.studies.dataset.cluster.slo <- function(variables)
       builder$append(server = "cluster.slo3", url = ds.test_env$ip_address_3, user = ds.test_env$user_3, password = ds.test_env$password_3, table = "datashield/cluster/CLUSTER_SLO3", driver = ds.test_env$driver)
       ds.test_env$login.data <- builder$build()
     }
-    else 
+    else
     {
       #to do
       #ds.test_env$login.data <- DSLite::setupCLUSTERTest("dsBase", env = ds.test_env)
@@ -176,7 +176,7 @@ init.studies.dataset.anthro <- function(variables)
       builder$append(server = "study3", url = ds.test_env$ip_address_3, user = ds.test_env$user_3, password = ds.test_env$password_3, table = "datashield/anthro/anthro3", driver = ds.test_env$driver)
       ds.test_env$login.data <- builder$build()
     }
-    else 
+    else
     {
       #to do
       #ds.test_env$login.data <- DSLite::setupCLUSTERTest("dsBase", env = ds.test_env)
@@ -211,39 +211,67 @@ init.studies.dataset.gamlss <- function(variables)
       builder$append(server = "study3", url = ds.test_env$ip_address_3, user = ds.test_env$user_3, password = ds.test_env$password_3, table = "datashield/gamlss/gamlss3", driver = ds.test_env$driver)
       ds.test_env$login.data <- builder$build()
     }
-    else 
+    else
     {
       #to do
       #ds.test_env$login.data <- DSLite::setupCLUSTERTest("dsBase", env = ds.test_env)
     }
     ds.test_env$stats.var <- variables
-    
+
+  }
+}
+
+init.studies.dataset.stand <- function(variables)
+{
+  if (ds.test_env$secure_login_details)
+  {
+    if (ds.test_env$driver == "OpalDriver")
+    {
+      builder <- DSI::newDSLoginBuilder(.silent = TRUE)
+      builder$append(server = "sim1", url = ds.test_env$ip_address_1, user = ds.test_env$user_1, password = ds.test_env$password_1, table = "STANDARDISE.std_1", options=ds.test_env$options_1)
+      builder$append(server = "sim2", url = ds.test_env$ip_address_2, user = ds.test_env$user_2, password = ds.test_env$password_2, table = "STANDARDISE.std_1", options=ds.test_env$options_2)
+      builder$append(server = "sim3", url = ds.test_env$ip_address_3, user = ds.test_env$user_3, password = ds.test_env$password_3, table = "STANDARDISE.std_1", options=ds.test_env$options_3)
+      ds.test_env$login.data <- builder$build()
+    }
+    else if (ds.test_env$driver == "ArmadilloDriver")
+    {
+      builder <- DSI::newDSLoginBuilder(.silent = TRUE)
+      builder$append(server = "sim1", url = ds.test_env$ip_address_1, user = ds.test_env$user_1, password = ds.test_env$password_1, table = "datashield/standardise/std_1", driver = ds.test_env$driver)
+      builder$append(server = "sim2", url = ds.test_env$ip_address_2, user = ds.test_env$user_2, password = ds.test_env$password_2, table = "datashield/standardise/std_2", driver = ds.test_env$driver)
+      builder$append(server = "sim3", url = ds.test_env$ip_address_3, user = ds.test_env$user_3, password = ds.test_env$password_3, table = "datashield/standardise/std_3", driver = ds.test_env$driver)
+      ds.test_env$login.data <- builder$build()
+    }
+    else
+    {
+      ds.test_env$login.data <- DSLite::setupCNSIMTest("dsBase", env = ds.test_env)
+    }
+    ds.test_env$stats.var <- variables
   }
 }
 
 
 connect.studies.dataset.cnsim <- function(variables)
 {
-    log.out.data.server()
-    source("connection_to_datasets/login_details.R")
-    init.studies.dataset.cnsim(variables)
-    log.in.data.server()
+  log.out.data.server()
+  source("connection_to_datasets/login_details.R")
+  init.studies.dataset.cnsim(variables)
+  log.in.data.server()
 }
 
 connect.studies.dataset.dasim <- function(variables)
 {
-    log.out.data.server()
-    source("connection_to_datasets/login_details.R")
-    init.studies.dataset.dasim(variables)
-    log.in.data.server()
+  log.out.data.server()
+  source("connection_to_datasets/login_details.R")
+  init.studies.dataset.dasim(variables)
+  log.in.data.server()
 }
 
 connect.studies.dataset.survival <- function(variables)
 {
-    log.out.data.server()
-    source("connection_to_datasets/login_details.R")
-    init.studies.dataset.survival(variables)
-    log.in.data.server()
+  log.out.data.server()
+  source("connection_to_datasets/login_details.R")
+  init.studies.dataset.survival(variables)
+  log.in.data.server()
 }
 
 connect.studies.dataset.cluster.int <- function(variables)
@@ -278,19 +306,27 @@ connect.studies.dataset.gamlss <- function(variables)
   log.in.data.server()
 }
 
+connect.studies.dataset.stand <- function(variables)
+{
+  log.out.data.server()
+  source("connection_to_datasets/login_details.R")
+  init.studies.dataset.stand(variables)
+  log.in.data.server()
+}
+
 disconnect.studies.dataset.cnsim <- function()
 {
-    log.out.data.server()
+  log.out.data.server()
 }
 
 disconnect.studies.dataset.dasim <- function()
 {
-    log.out.data.server()
+  log.out.data.server()
 }
 
 disconnect.studies.dataset.survival <- function()
 {
-    log.out.data.server()
+  log.out.data.server()
 }
 
 disconnect.studies.dataset.cluster.int <- function()
@@ -312,3 +348,8 @@ disconnect.studies.dataset.gamlss <- function()
 {
   log.out.data.server()
 }
+
+disconnect.studies.dataset.stand <- function()
+{
+  log.out.data.server()
+}

From b237582c56bc2bbc07a7f50b547cbe445601096a Mon Sep 17 00:00:00 2001
From: Tim Cadman <41470917+timcadman@users.noreply.github.com>
Date: Tue, 7 Oct 2025 12:16:34 +0200
Subject: [PATCH 14/18] remove old helper file

---
 tests/testthat/helpers.R | 218 ---------------------------------------
 1 file changed, 218 deletions(-)
 delete mode 100644 tests/testthat/helpers.R

diff --git a/tests/testthat/helpers.R b/tests/testthat/helpers.R
deleted file mode 100644
index 919df923..00000000
--- a/tests/testthat/helpers.R
+++ /dev/null
@@ -1,218 +0,0 @@
-#' Create a DSLite login object that can be used for testing
-#'
-#' @param assign_method A string specifying the name of the custom assign method to be added
-#'   to the DSLite server. If `NULL`, no additional assign method is added. Default is `NULL`.
-#' @param aggregate_method A string specifying the name of the custom aggregate method to be
-#'   added to the DSLite server. If `NULL`, no additional aggregate method is added. Default is `NULL`.
-#' @param tables A named list of tables to be made available on the DSLite server. Default is `NULL`.
-#'
-#' @return A DataSHIELD login object containing the necessary connection information for the DSLite server.
-#'
-#' @examples
-#' \dontrun{
-#' # Prepare a DSLite server with default methods and custom assign/aggregate methods
-#' login_data <- .prepare_dslite(
-#'   assign_method = "customAssign",
-#'   aggregate_method = "customAggregate",
-#'   tables = list(mtcars = mtcars, mtcars_group = mtcars_group)
-#'   )
-#'
-#' @importFrom DSLite newDSLiteServer
-#' @importFrom DSI newDSLoginBuilder
-#' @export
-.prepare_dslite <- function(assign_method = NULL, aggregate_method = NULL, tables = NULL) {
-
-  options(datashield.env = environment())
-  dslite.server <- DSLite::newDSLiteServer(tables = tables)
-  dslite.server$config(defaultDSConfiguration(include = c("dsBase", "dsTidyverse")))
-  dslite.server$aggregateMethod("exists", "base::exists")
-  dslite.server$aggregateMethod("classDS", "dsBase::classDS")
-  dslite.server$aggregateMethod("lsDS", "dsBase::lsDS")
-  dslite.server$aggregateMethod("dsListDisclosureSettings", "dsTidyverse::dsListDisclosureSettings")
-
-  if (!is.null(assign_method)) {
-    dslite.server$assignMethod(assign_method, paste0("dsTidyverse::", assign_method))
-  }
-
-  if (!is.null(aggregate_method)) {
-    dslite.server$aggregateMethod(assign_method, paste0("dsTidyverse::", assign_method))
-  }
-
-  builder <- DSI::newDSLoginBuilder()
-  builder$append(server = "server_1", url = "dslite.server", driver = "DSLiteDriver")
-  builder$append(server = "server_2", url = "dslite.server", driver = "DSLiteDriver")
-  builder$append(server = "server_3", url = "dslite.server", driver = "DSLiteDriver")
-  login_data <- builder$build()
-  return(login_data)
-}
-
-#' Create a mixed dataframe with factor and other types of columns
-#'
-#' This function generates a dataframe with a specified number of rows,
-#' factor columns, and other columns (integer, numeric, and string).
-#'
-#' @param n_rows Number of rows in the dataframe. Default is 10,000.
-#' @param n_factor_cols Number of factor columns in the dataframe. Default is 15.
-#' @param n_other_cols Number of other columns (integer, numeric, and string) in the dataframe. Default is 15.
-#'
-#' @return A dataframe with the specified number of rows and columns, containing mixed data types.
-#' @importFrom dplyr bind_cols
-#' @importFrom purrr map_dfc
-#' @examples
-#' df <- create_mixed_dataframe(n_rows = 100, n_factor_cols = 10, n_other_cols = 5)
-create_mixed_dataframe <- function(n_rows = 10000, n_factor_cols = 15, n_other_cols = 15) {
-
-  # Function to create a factor column with defined levels
-  create_factor_column <- function(levels, n = n_rows) {
-    set.seed(123)  # Set seed before sample for reproducibility
-    factor(sample(levels, n, replace = TRUE))
-  }
-
-  # Define factor levels for different columns
-  factor_levels <- list(
-    c("Low", "Medium", "High"),
-    c("Red", "Green", "Blue"),
-    c("Yes", "No"),
-    c("A", "B", "C"),
-    c("One", "Two", "Three"),
-    c("Cat", "Dog", "Bird"),
-    c("Small", "Medium", "Large"),
-    c("Alpha", "Beta", "Gamma"),
-    c("True", "False"),
-    c("Left", "Right"),
-    c("North", "South", "East", "West"),
-    c("Day", "Night"),
-    c("Up", "Down"),
-    c("Male", "Female"),
-    c("Summer", "Winter", "Spring", "Fall")
-  )
-
-  # Create factor columns
-  factor_columns <- map_dfc(factor_levels[1:n_factor_cols], create_factor_column)
-  colnames(factor_columns) <- paste0("fac_col", 1:n_factor_cols)
-
-  # Function to create other types of columns
-  create_other_column <- function(type, n = n_rows) {
-    set.seed(123)  # Set seed before sample for reproducibility
-    switch(type,
-           "int" = sample(1:100, n, replace = TRUE),        # Integer column
-           "num" = runif(n, 0, 100),                       # Numeric column
-           "str" = sample(letters, n, replace = TRUE),      # Character column
-           "log" = sample(c(TRUE, FALSE), n, replace = TRUE) # Logical column
-    )
-  }
-
-  # Ensure that each data type is included
-  column_types <- c(
-    "int", "num", "str", "log", "int",
-    "num", "str", "log", "int", "num",
-    "str", "int", "num", "log", "str"
-  )
-
-  # Create other columns with specified types
-  other_columns <- map_dfc(column_types[1:n_other_cols], create_other_column)
-  colnames(other_columns) <- paste0("col", (n_factor_cols + 1):(n_factor_cols + n_other_cols))
-
-  # Combine factor and other columns into a single dataframe
-  df <- bind_cols(factor_columns, other_columns)
-
-  return(df)
-}
-
-
-#' Modify factor levels for partial overlap
-#'
-#' This function takes two sets of factor levels, computes the common and unique levels,
-#' and returns a new set of levels with partial overlap.
-#'
-#' @param levels1 First set of factor levels.
-#' @param levels2 Second set of factor levels.
-#'
-#' @return A character vector of new factor levels with partial overlap.
-#' @examples
-#' new_levels <- partial_overlap_levels(c("A", "B", "C"), c("B", "C", "D"))
-partial_overlap_levels <- function(levels1, levels2) {
-  common <- intersect(levels1, levels2)
-  unique1 <- setdiff(levels1, common)
-  unique2 <- setdiff(levels2, common)
-
-  # Set seed before each sample call
-  set.seed(123)
-  sampled_unique1 <- sample(unique1, length(unique1) * 0.5)
-
-  set.seed(123)
-  sampled_unique2 <- sample(unique2, length(unique2) * 0.5)
-
-  new_levels <- c(common, sampled_unique1, sampled_unique2)
-  return(new_levels)
-}
-
-
-#' Create additional dataframes with specific conditions
-#'
-#' This function generates additional dataframes based on an input dataframe, modifying column classes and levels,
-#' and adding new columns with unique names. Different seeds are used for each iteration of the loop,
-#' ensuring reproducibility of the generated dataframes.
-#'
-#' @param base_df The base dataframe used to create the additional dataframes.
-#' @param n_rows Number of rows in the additional dataframes. Default is 10,000.
-#' @param df_names Names of the additional dataframes to be created. Default is c("df1", "df2", "df3").
-#'
-#' @return A list of dataframes with the specified modifications.
-#' @importFrom dplyr bind_cols
-#' @examples
-#' base_df <- create_mixed_dataframe(n_rows = 100, n_factor_cols = 10, n_other_cols = 5)
-#' additional_dfs <- create_additional_dataframes(base_df, n_rows = 1000, df_names = c("df1", "df2"))
-create_additional_dataframes <- function(base_df, n_rows = 10000, df_names = c("df1", "df2", "df3")) {
-
-  # Define a fixed sequence of seeds, one for each dataframe to be created
-  seeds <- c(123, 456, 789, 101112)
-
-  df_list <- list()
-
-  for (i in seq_along(df_names)) {
-    # Set the seed for this iteration based on the pre-defined seeds
-    set.seed(seeds[i])
-
-    # Proceed with the dataframe generation process
-    overlap_cols <- sample(colnames(base_df), size = round(0.8 * ncol(base_df)))
-    df <- base_df
-    cols_to_modify_class <- sample(overlap_cols, size = round(0.2 * length(overlap_cols)))
-
-    # Modify columns to have different data types
-    for (col in cols_to_modify_class) {
-      current_class <- class(df[[col]])
-      new_class <- switch(current_class,
-                          "factor" = as.character(df[[col]]),
-                          "character" = as.factor(df[[col]]),
-                          "numeric" = as.integer(df[[col]]),
-                          "integer" = as.numeric(df[[col]]),
-                          df[[col]])
-      df[[col]] <- new_class
-    }
-
-    # Modify factor levels for partial overlap
-    factor_cols <- colnames(base_df)[sapply(base_df, is.factor)]
-    overlap_factor_cols <- intersect(overlap_cols, factor_cols)
-    cols_to_modify_levels <- sample(overlap_factor_cols, size = round(0.5 * length(overlap_factor_cols)))
-
-    for (col in cols_to_modify_levels) {
-      original_levels <- levels(base_df[[col]])
-      new_levels <- partial_overlap_levels(original_levels, original_levels)
-      df[[col]] <- factor(df[[col]], levels = new_levels)
-    }
-
-    # Create new random columns for each dataframe (these will vary by seed)
-    set.seed(seeds[i])  # Set the seed again for generating new columns
-    n_new_cols <- round(0.2 * ncol(base_df))
-    new_col_names <- paste0(df_names[i], "_new_col_", 1:n_new_cols)
-    new_cols <- data.frame(matrix(runif(n_rows * n_new_cols), ncol = n_new_cols))
-    colnames(new_cols) <- new_col_names
-
-    # Bind new columns to the dataframe
-    df <- bind_cols(df, new_cols)
-    df_list[[df_names[i]]] <- df
-  }
-
-  return(df_list)
-}

From ea62922988afa61df72fac6f74e50fad7cf1d1d6 Mon Sep 17 00:00:00 2001
From: Tim Cadman <41470917+timcadman@users.noreply.github.com>
Date: Tue, 7 Oct 2025 12:16:45 +0200
Subject: [PATCH 15/18] add test data

---
 .../testthat/data_files/STANDARDISE/std_1.csv | 101 ++++++++++++++++++
 .../testthat/data_files/STANDARDISE/std_1.rda | Bin 0 -> 1474 bytes
 .../testthat/data_files/STANDARDISE/std_2.csv | 101 ++++++++++++++++++
 .../testthat/data_files/STANDARDISE/std_2.rda | Bin 0 -> 1706 bytes
 .../testthat/data_files/STANDARDISE/std_3.csv | 101 ++++++++++++++++++
 .../testthat/data_files/STANDARDISE/std_3.rda | Bin 0 -> 1597 bytes
 6 files changed, 303 insertions(+)
 create mode 100644 tests/testthat/data_files/STANDARDISE/std_1.csv
 create mode 100644 tests/testthat/data_files/STANDARDISE/std_1.rda
 create mode 100644 tests/testthat/data_files/STANDARDISE/std_2.csv
 create mode 100644 tests/testthat/data_files/STANDARDISE/std_2.rda
 create mode 100644 tests/testthat/data_files/STANDARDISE/std_3.csv
 create mode 100644 tests/testthat/data_files/STANDARDISE/std_3.rda

diff --git a/tests/testthat/data_files/STANDARDISE/std_1.csv b/tests/testthat/data_files/STANDARDISE/std_1.csv
new file mode 100644
index 00000000..32890959
--- /dev/null
+++ b/tests/testthat/data_files/STANDARDISE/std_1.csv
@@ -0,0 +1,101 @@
+fac_col1,fac_col2,fac_col3,fac_col4,fac_col5,fac_col6,fac_col9,col12,col15,col18
+High,Blue,Yes,3,NA,Bird,True,28.757752012461424,31,TRUE
+High,Blue,Yes,3,NA,Bird,True,78.83051354438066,79,TRUE
+High,Blue,Yes,3,NA,Bird,True,40.89769218116999,51,TRUE
+Medium,Green,No,2,NA,Dog,False,88.301740400493145,14,FALSE
+High,Blue,Yes,3,NA,Bird,True,94.04672842938453,67,TRUE
+Medium,Green,No,2,NA,Dog,False,4.555649938993156,42,FALSE
+Medium,Green,No,2,NA,Dog,False,52.810548804700375,50,FALSE
+Medium,Green,No,2,NA,Dog,False,89.2419044394046,43,FALSE
+High,Blue,Yes,3,NA,Bird,True,55.14350144658238,14,TRUE
+Low,NA,Yes,1,NA,Cat,True,45.661473530344665,25,TRUE
+Medium,Green,No,2,NA,Dog,False,95.68333453498781,90,FALSE
+Medium,Green,No,2,NA,Dog,False,45.33341561909765,91,FALSE
+Low,NA,No,1,NA,Cat,False,67.75706354528666,69,FALSE
+Medium,Green,Yes,2,NA,Dog,True,57.26334019564092,91,TRUE
+High,Blue,No,3,NA,Bird,False,10.292468266561627,57,FALSE
+Low,NA,Yes,1,NA,Cat,True,89.98249704018235,92,TRUE
+High,Blue,No,3,NA,Bird,False,24.60877343546599,9,FALSE
+High,Blue,Yes,3,NA,Bird,True,4.205953353084624,93,TRUE
+Low,NA,Yes,1,NA,Cat,True,32.79207192827016,99,TRUE
+Low,NA,Yes,1,NA,Cat,True,95.45036491472274,72,TRUE
+Low,NA,Yes,1,NA,Cat,True,88.95393160637468,26,TRUE
+Low,NA,No,1,NA,Cat,False,69.28034061565995,7,FALSE
+High,Blue,Yes,3,NA,Bird,True,64.05068137682974,42,TRUE
+Medium,Green,Yes,2,NA,Dog,True,99.42697766236961,9,TRUE
+High,Blue,Yes,3,NA,Bird,True,65.57057991158217,83,TRUE
+Medium,Green,Yes,2,NA,Dog,True,70.85304681677371,36,TRUE
+Low,NA,No,1,NA,Cat,False,54.40660247113556,78,FALSE
+Medium,Green,No,2,NA,Dog,False,59.41420204471797,81,FALSE
+High,Blue,Yes,3,NA,Bird,True,28.91597372945398,43,TRUE
+Medium,Green,No,2,NA,Dog,False,14.711364731192589,76,FALSE
+Low,NA,Yes,1,NA,Cat,True,96.30242325365543,15,TRUE
+High,Blue,No,3,NA,Bird,False,90.22990451194346,32,FALSE
+High,Blue,Yes,3,NA,Bird,True,69.07052784226835,7,TRUE
+Low,NA,No,1,NA,Cat,False,79.54674176871777,9,FALSE
+High,Blue,No,3,NA,Bird,False,2.461368450894952,41,FALSE
+Medium,Green,Yes,2,NA,Dog,True,47.77959710918367,74,TRUE
+Low,NA,Yes,1,NA,Cat,True,75.84595375228673,23,TRUE
+High,Blue,Yes,3,NA,Bird,True,21.640793583355844,27,TRUE
+Low,NA,Yes,1,NA,Cat,True,31.818100763484836,60,TRUE
+Low,NA,No,1,NA,Cat,False,23.16257853526622,53,FALSE
+Medium,Green,Yes,2,NA,Dog,True,14.280002238228917,7,TRUE
+High,Blue,No,3,NA,Bird,False,41.45463358145207,53,FALSE
+High,Blue,No,3,NA,Bird,False,41.372432629577816,27,FALSE
+Low,NA,Yes,1,NA,Cat,True,36.884545092470944,96,TRUE
+High,Blue,Yes,3,NA,Bird,True,15.244474774226546,38,TRUE
+Low,NA,Yes,1,NA,Cat,True,13.880606344901025,89,TRUE
+High,Blue,Yes,3,NA,Bird,True,23.303409945219755,34,TRUE
+Medium,Green,No,2,NA,Dog,False,46.59624502528459,93,FALSE
+Low,NA,Yes,1,NA,Cat,True,26.597264036536217,69,TRUE
+Medium,Green,Yes,2,NA,Dog,True,85.78277153428644,72,TRUE
+Low,NA,No,1,NA,Cat,False,4.583116667345166,76,FALSE
+Low,NA,Yes,1,NA,Cat,True,44.220007420517504,63,TRUE
+High,Blue,Yes,3,NA,Bird,True,79.89248456433415,13,TRUE
+Low,NA,Yes,1,NA,Cat,True,12.189925997518003,82,TRUE
+Medium,Green,Yes,2,NA,Dog,True,56.094798375852406,97,TRUE
+Low,NA,No,1,NA,Cat,False,20.65313896164298,91,FALSE
+Low,NA,No,1,NA,Cat,False,12.753165024332702,25,FALSE
+High,Blue,Yes,3,NA,Bird,True,75.33078643027693,38,TRUE
+Low,NA,No,1,NA,Cat,False,89.50453591533005,21,FALSE
+Medium,Green,Yes,2,NA,Dog,True,37.44627758860588,79,TRUE
+Low,NA,Yes,1,NA,Cat,True,66.51151946280152,41,TRUE
+High,Blue,No,3,NA,Bird,False,9.484066092409194,47,FALSE
+Low,NA,No,1,NA,Cat,False,38.39696377981454,90,FALSE
+High,Blue,Yes,3,NA,Bird,True,27.43836445733905,60,TRUE
+Medium,Green,Yes,2,NA,Dog,True,81.46400388795882,95,TRUE
+High,Blue,No,3,NA,Bird,False,44.851634139195085,16,FALSE
+Medium,Green,Yes,2,NA,Dog,True,81.00643530488014,94,TRUE
+Medium,Green,Yes,2,NA,Dog,True,81.23895095195621,6,TRUE
+High,Blue,Yes,3,NA,Bird,True,79.43423211108893,72,TRUE
+Medium,Green,Yes,2,NA,Dog,True,43.983168760314584,86,TRUE
+Medium,Green,No,2,NA,Dog,False,75.44751586392522,86,FALSE
+High,Blue,Yes,3,NA,Bird,True,62.922113155946136,39,TRUE
+High,Blue,Yes,3,NA,Bird,True,71.01824013516307,31,TRUE
+Low,NA,Yes,1,NA,Cat,True,0.062477332539856434,81,TRUE
+Medium,Green,Yes,2,NA,Dog,True,47.53165740985423,50,TRUE
+Medium,Green,No,2,NA,Dog,False,22.011888516135514,34,FALSE
+Low,NA,No,1,NA,Cat,False,37.98165377229452,4,FALSE
+Medium,Green,Yes,2,NA,Dog,True,61.277100327424705,13,TRUE
+Low,NA,No,1,NA,Cat,False,35.179790924303234,69,FALSE
+Low,NA,No,1,NA,Cat,False,11.113542434759438,25,FALSE
+Medium,Green,No,2,NA,Dog,False,24.361947271972895,52,FALSE
+High,Blue,No,3,NA,Bird,False,66.80555874481797,22,FALSE
+High,Blue,Yes,3,NA,Bird,True,41.764677967876196,89,TRUE
+Low,NA,No,1,NA,Cat,False,78.81958340294659,32,FALSE
+Medium,Green,No,2,NA,Dog,False,10.286464425735176,25,FALSE
+Low,NA,No,1,NA,Cat,False,43.489274149760604,87,FALSE
+Medium,Green,Yes,2,NA,Dog,True,98.49569799844176,35,TRUE
+Low,NA,Yes,1,NA,Cat,True,89.30511143989861,40,TRUE
+High,Blue,No,3,NA,Bird,False,88.64690607879311,30,FALSE
+High,Blue,Yes,3,NA,Bird,True,17.505265027284622,12,TRUE
+Medium,Green,No,2,NA,Dog,False,13.069569156505167,31,FALSE
+High,Blue,No,3,NA,Bird,False,65.31019250396639,30,FALSE
+Low,NA,Yes,1,NA,Cat,True,34.3516472261399,64,TRUE
+Medium,Green,No,2,NA,Dog,False,65.67581279668957,99,FALSE
+Medium,Green,No,2,NA,Dog,False,32.03732424881309,14,FALSE
+High,Blue,Yes,3,NA,Bird,True,18.769111926667392,93,TRUE
+Medium,Green,Yes,2,NA,Dog,True,78.22943013161421,96,TRUE
+Low,NA,No,1,NA,Cat,False,9.359498671256006,71,FALSE
+High,Blue,Yes,3,NA,Bird,True,46.677904156968,67,TRUE
+High,Blue,Yes,3,NA,Bird,True,51.15054599009454,23,TRUE
diff --git a/tests/testthat/data_files/STANDARDISE/std_1.rda b/tests/testthat/data_files/STANDARDISE/std_1.rda
new file mode 100644
index 0000000000000000000000000000000000000000..df5a8854fcbee0ae5fbf212f51ae0b47675348b7
GIT binary patch
literal 1474
zcmV;z1wHy7iwFP!000001MOIQP*hbIKipm3U`$LYNs22Y0zvUuK!I3b0YhHP0s?`!
zunSTPYYRw?ql3m&HVzr|Aasx>CoMFc3Pz0~a0tq=DIG0Jr=o{C_EM)wE0Ml$_guOh
z?%f6c$v@ngkKg&e^PTT`&b^15t1gUiD|91-$cS9d5t$<liDQ0VYDf$rARxqn$cZbU
zTwhrh272Bwh6OTDK)q%DKavShCg3>%&oOcb!B|MFJx+voVC)Fx9J9uOnPcs8%)Bib
zYeSebJC@2{Dn3?+tt}_wOQ1KUXHG`LMuV{qMqbtyTRXX`x@tWg%Q8%jymF>NU)@l{
zm~gBx>gsq4lXfe074;@FjBZKn1hxwnzKK|V7G5ys<qrH<NUS~KIsreqmX!<aaIBt)
zJ}2~RPsZ9HQ*&m=qWRm1k7IC3ZH2aoNG+^w<R*x<a~$9MBx8etZ*7{{V5qe!%6LWF
zEoKj>aDz@uSEMXb2VG1iwbs@HV<z*n@hN6+t~rR+M9Q|9o`^5e7IRr9{%{u=>Soej
zG7sCUa<OioX-Fh^7U_J#74i9La#r6(TrP$7;*;_>ZC=8!J^g9%uzn=$*|pE`lI!QF
zXD<%PKHHPevj4NLpB+2)_N`Q0Gu;=&a_=GfCNlL6b%*|6x^ov5jqx_Qi%!0tCU2if
z)n+~af=bra^Fy_%>dsZfhEFkjxBFGZF;!=*<FA|YJTqK9XMP=t8@%cI5IlahJpqbU
zfNJx_w$A|zyt8|60Iu0K{#rcX-7z;aP%ph|zzeY;dsE4Az}2m56UzZp&vXYZL;asW
z7#K$W?Sm!10p?t(JGLJ%<Mo~;2LSzF?yua5>$i->ssUqmjkXy9y-xpRLb=LNzWXxT
zIe&Vj6!qdgqXPgnnyWrTXixTV>3x8O=d>M*kk>cscHw+)yXGLq6Y0LAA272=-uen)
z+~v)QzJMXd=!qs==Usf!3+=Y_4nBtK)K5Pfh53I!ehTxF9Ui-?0!;hilP`7v=Kkrw
zPzk8q>+Vqkm~hJE3IDXKq60fWj0Ak(g_3%-r#==|djauK_qPRrNl(NK!;4H6x;gkH
z+6_Mb&DkTkF7rSt@@W~G!4rTh+b^sh0L(Wp_;m#9obS8c59{X}7|CH=e=TZ!32?bC
z{%`C@hKHgU<Br@HdH)8$z^$F5?HK3T;Et~_uXX7fT$glo@gB6B)796v1W<AP%WWS6
zCWn94^E_behM0XAM_%c>@mS~78m{mU<W<$CF2wBg8^5Ffb0-=$0j7+PK9z@l&hPcX
zc(OM-e|i$@HyAVueN8wt?!2C0UPGQm9e`0XcR#F8()gQa4g+Sr@uZUzVB)K?3hc|W
zm5SI)*#DCKZP@pm_gYeMKd8sPyN-3zzF+To4fjiHcjE%|-`};}1N*n<XtNLIseR^}
z0x_`i(jZ{i)*=t=Z_v`t7oS6W+ONajLVqFLa3AuItC}^qzvLrsTL8m{er)~){j|LE
z5BgIa@lqnD6@0Wa4)Bq-l=&G1fBd6xF;F&81U);+KtVv^K*3Z7k&$AcM}bl(t^k6a
zV>y9PX;J|MCyl@^CeG9b&S`=CfwF)!)Fwu77my#-MZG|vbfEb_3xOb7qNMXmDq9Bx
zr;!u^DX8BR>KAP$(7bYibTm#My5=65C;EAi=7T=U=+k5^K1ryp0!jh2J{npN^eLx4
zQmAd%4dh-LlOL6>0a^;Q90*P`@t}5atfIA=N8`tw(rDc=cQ-QqIzj|U1iYAxMKL?9
zOlmFbODY~)erD=Z%-&pcAU0&Sa<Jil>UXVLU5$bNuH{CBF7O&Et1ubE7(SfgBN#rC
z;gt*@#qcX8KdynVa3P}<GVCW<SF@=xl#d%XFxLcsCMG6!Gp|+tuH{r;ZY<MR@>eY2
c=+I5CtJj5An!y&azH@K=3;o3R#|st!04bEq-v9sr

literal 0
HcmV?d00001

diff --git a/tests/testthat/data_files/STANDARDISE/std_2.csv b/tests/testthat/data_files/STANDARDISE/std_2.csv
new file mode 100644
index 00000000..9c20866c
--- /dev/null
+++ b/tests/testthat/data_files/STANDARDISE/std_2.csv
@@ -0,0 +1,101 @@
+fac_col1,fac_col2,fac_col3,fac_col4,fac_col5,fac_col7,fac_col10,col13,col16,col19
+High,NA,NA,C,2,Large,Left,o,28.757752012461424,31
+High,NA,NA,C,2,Large,Left,s,78.83051354438066,79
+High,NA,NA,C,2,Large,Left,n,40.89769218116999,51
+Medium,Green,No,B,3,Medium,Right,c,88.301740400493145,14
+High,NA,NA,C,2,Large,Left,j,94.04672842938453,67
+Medium,Green,No,B,3,Medium,Right,r,4.555649938993156,42
+Medium,Green,No,B,3,Medium,Right,v,52.810548804700375,50
+Medium,Green,No,B,3,Medium,Right,k,89.2419044394046,43
+High,NA,NA,C,2,Large,Left,e,55.14350144658238,14
+Low,Red,NA,A,1,Small,Left,t,45.661473530344665,25
+Medium,Green,No,B,3,Medium,Right,n,95.68333453498781,90
+Medium,Green,No,B,3,Medium,Right,v,45.33341561909765,91
+Low,Red,No,A,1,Small,Right,y,67.75706354528666,69
+Medium,Green,NA,B,3,Medium,Left,z,57.26334019564092,91
+High,NA,No,C,2,Large,Right,e,10.292468266561627,57
+Low,Red,NA,A,1,Small,Left,s,89.98249704018235,92
+High,NA,No,C,2,Large,Right,y,24.60877343546599,9
+High,NA,NA,C,2,Large,Left,y,4.205953353084624,93
+Low,Red,NA,A,1,Small,Left,i,32.79207192827016,99
+Low,Red,NA,A,1,Small,Left,c,95.45036491472274,72
+Low,Red,NA,A,1,Small,Left,h,88.95393160637468,26
+Low,Red,No,A,1,Small,Right,z,69.28034061565995,7
+High,NA,NA,C,2,Large,Left,g,64.05068137682974,42
+Medium,Green,NA,B,3,Medium,Left,j,99.42697766236961,9
+High,NA,NA,C,2,Large,Left,i,65.57057991158217,83
+Medium,Green,NA,B,3,Medium,Left,s,70.85304681677371,36
+Low,Red,No,A,1,Small,Right,d,54.40660247113556,78
+Medium,Green,No,B,3,Medium,Right,n,59.41420204471797,81
+High,NA,NA,C,2,Large,Left,q,28.91597372945398,43
+Medium,Green,No,B,3,Medium,Right,k,14.711364731192589,76
+Low,Red,NA,A,1,Small,Left,g,96.30242325365543,15
+High,NA,No,C,2,Large,Right,u,90.22990451194346,32
+High,NA,NA,C,2,Large,Left,l,69.07052784226835,7
+Low,Red,No,A,1,Small,Right,o,79.54674176871777,9
+High,NA,No,C,2,Large,Right,j,2.461368450894952,41
+Medium,Green,NA,B,3,Medium,Left,m,47.77959710918367,74
+Low,Red,NA,A,1,Small,Left,g,75.84595375228673,23
+High,NA,NA,C,2,Large,Left,i,21.640793583355844,27
+Low,Red,NA,A,1,Small,Left,i,31.818100763484836,60
+Low,Red,No,A,1,Small,Right,j,23.16257853526622,53
+Medium,Green,NA,B,3,Medium,Left,w,14.280002238228917,7
+High,NA,No,C,2,Large,Right,u,41.45463358145207,53
+High,NA,No,C,2,Large,Right,g,41.372432629577816,27
+Low,Red,NA,A,1,Small,Left,u,36.884545092470944,96
+High,NA,NA,C,2,Large,Left,f,15.244474774226546,38
+Low,Red,NA,A,1,Small,Left,y,13.880606344901025,89
+High,NA,NA,C,2,Large,Left,b,23.303409945219755,34
+Medium,Green,No,B,3,Medium,Right,e,46.59624502528459,93
+Low,Red,NA,A,1,Small,Left,h,26.597264036536217,69
+Medium,Green,NA,B,3,Medium,Left,l,85.78277153428644,72
+Low,Red,No,A,1,Small,Right,m,4.583116667345166,76
+Low,Red,NA,A,1,Small,Left,r,44.220007420517504,63
+High,NA,NA,C,2,Large,Left,a,79.89248456433415,13
+Low,Red,NA,A,1,Small,Left,y,12.189925997518003,82
+Medium,Green,NA,B,3,Medium,Left,y,56.094798375852406,97
+Low,Red,No,A,1,Small,Right,f,20.65313896164298,91
+Low,Red,No,A,1,Small,Right,u,12.753165024332702,25
+High,NA,NA,C,2,Large,Left,o,75.33078643027693,38
+Low,Red,No,A,1,Small,Right,i,89.50453591533005,21
+Medium,Green,NA,B,3,Medium,Left,o,37.44627758860588,79
+Low,Red,NA,A,1,Small,Left,z,66.51151946280152,41
+High,NA,No,C,2,Large,Right,p,9.484066092409194,47
+Low,Red,No,A,1,Small,Right,t,38.39696377981454,90
+High,NA,NA,C,2,Large,Left,f,27.43836445733905,60
+Medium,Green,NA,B,3,Medium,Left,k,81.46400388795882,95
+High,NA,No,C,2,Large,Right,h,44.851634139195085,16
+Medium,Green,NA,B,3,Medium,Left,v,81.00643530488014,94
+Medium,Green,NA,B,3,Medium,Left,v,81.23895095195621,6
+High,NA,NA,C,2,Large,Left,g,79.43423211108893,72
+Medium,Green,NA,B,3,Medium,Left,p,43.983168760314584,86
+Medium,Green,No,B,3,Medium,Right,q,75.44751586392522,86
+High,NA,NA,C,2,Large,Left,v,62.922113155946136,39
+High,NA,NA,C,2,Large,Left,r,71.01824013516307,31
+Low,Red,NA,A,1,Small,Left,q,0.062477332539856434,81
+Medium,Green,NA,B,3,Medium,Left,b,47.53165740985423,50
+Medium,Green,No,B,3,Medium,Right,d,22.011888516135514,34
+Low,Red,No,A,1,Small,Right,m,37.98165377229452,4
+Medium,Green,NA,B,3,Medium,Left,e,61.277100327424705,13
+Low,Red,No,A,1,Small,Right,v,35.179790924303234,69
+Low,Red,No,A,1,Small,Right,s,11.113542434759438,25
+Medium,Green,No,B,3,Medium,Right,y,24.361947271972895,52
+High,NA,No,C,2,Large,Right,t,66.80555874481797,22
+High,NA,NA,C,2,Large,Left,v,41.764677967876196,89
+Low,Red,No,A,1,Small,Right,y,78.81958340294659,32
+Medium,Green,No,B,3,Medium,Right,n,10.286464425735176,25
+Low,Red,No,A,1,Small,Right,y,43.489274149760604,87
+Medium,Green,NA,B,3,Medium,Left,w,98.49569799844176,35
+Low,Red,NA,A,1,Small,Left,c,89.30511143989861,40
+High,NA,No,C,2,Large,Right,h,88.64690607879311,30
+High,NA,NA,C,2,Large,Left,p,17.505265027284622,12
+Medium,Green,No,B,3,Medium,Right,l,13.069569156505167,31
+High,NA,No,C,2,Large,Right,y,65.31019250396639,30
+Low,Red,NA,A,1,Small,Left,n,34.3516472261399,64
+Medium,Green,No,B,3,Medium,Right,c,65.67581279668957,99
+Medium,Green,No,B,3,Medium,Right,n,32.03732424881309,14
+High,NA,NA,C,2,Large,Left,g,18.769111926667392,93
+Medium,Green,NA,B,3,Medium,Left,c,78.22943013161421,96
+Low,Red,No,A,1,Small,Right,w,9.359498671256006,71
+High,NA,NA,C,2,Large,Left,v,46.677904156968,67
+High,NA,NA,C,2,Large,Left,z,51.15054599009454,23
diff --git a/tests/testthat/data_files/STANDARDISE/std_2.rda b/tests/testthat/data_files/STANDARDISE/std_2.rda
new file mode 100644
index 0000000000000000000000000000000000000000..cbe3d2791b8807083caa46ecb97bb452bf1a303f
GIT binary patch
literal 1706
zcmV;b237eViwFP!000001Ep9CY?MV1-uv%bY-zEPC{z&2L4dY`^hL{~axHyeIVjhI
zQm|02_h<{(9$b%BAP~St#Sld51AzcWA}An4EdeA*34#)gLO`n!6(2zG2{D37sWaT}
z@88}1FGp!I{bpxpznPuc%S~I7SC$rC7EK5-5|c?G#t2YJ#I&ge!?Fkg10e=tB2j=Q
zm!~2X{Ca{82aK_Rt~>kxi&29{&05i{70zzZ=vv|V6v^7a@o4P|cgDc&@w^JRAIipa
zXz$GHdiU4MkN4qgt4LyL{5th4((7(;d+R}q3${4e$q6-8)l@fHeT~d{lG|0Yu$H5U
zaC#l}j6vn~IGh2WAGF)-B`k+uOwf}d6S@|<#_XE2onC^3PvO_vv%FoyWsvxF^4FmD
ztgemc&~OPlcUFj}<JTz`7f0pd?;KoX8<h?c?3}{dS?G7W>sa$k+^(RXk@?+Yhh;V%
zWjwr1?<^lnFP~6x!i_28>*^hdq$YaYGC%n}I(3+JPMPcrr>A#zTy)1UUSG-8w}^`B
zlcVn=Zi%PFG;+dF!pS*AbkQe78sVH0GZce{U=AmiXbHh{kIvyF7UGeFv(>ft)3rK>
zLm1A-Aw;U-JuO1mET#SC)6N0f*+DjI2;U@}@O-kFN;d28Gn#BB;{9dwRw$cnzT#&k
zJ}!TMz5I9|zP1KQERCOZ#^r0x&yq5J0~d>*jrHbB{;aIldUGQ0uQxa9+4Op(_ts?h
z*UL}#9%Xa6PAxip@3E;i!s_r>xw%=EZrN)cUT^r5PM95+*?5%k@HV}(p+3#w?B7jH
z8DCfLAghTUcc>So(WSGj$?En5Sc^;GZ<@f}4?aGo)H8(*#|9@;<}$_4lm@2EV+wnC
zhJe7MIxk|%V&PQ1;G-%vLO#_3PnD2rjlk>@5?dhLLgiV=6t7SyHAJm&szy*$=SG29
zjO7vTUMUo+T1ZSyL+y@3znC5&s87gQwW#HtFEm%>nI|OHAShJ=&jP{6FKnrIcL_Ij
z3&+Lx6A*mVtvbP?c1L_;YR+DL9;aZb61EzJd=|s69{5?bjBULYHG&X}y=91b0$`s1
za`Wear9F%H-2|Mzvi-Gezz4E!S<$bk>PQ@7Y4MWsqkt3F&CaO=EI7L}X)yZ#{K1jq
zxPE(g`EP(z{-{5=1JL?<%Ya>ggI+%DS&QeFT^y4Kn6<I38NMDYac6$=q21!H+;kOr
zE}S_r3;nWVGlu}$Y}ewCA)oQ_Sq}r2owsl3hig}pV<YbGTW{Nq`J{JWeHd_3i)q~}
zfMc&N&FKp`%$wP<9MAQfc{vVw*X(PZi|6v5es(0@|NHjScrWALi`OiGg+F}q#cIHk
zzXtWm0L<9dJ*FIR+-YBI7GP%L<`2^WAAO-bfP8reQ|c}u?%DZmDPZn`tmDyu!<QzX
zLf+(~-<;cz=O*ncz;&V3)_M|f^!iH^j{r{d_x|++)_GdrRS8(XzKQ7y=Jj{K#+Lwx
zI<o)4epqA7GcoV<?dgxq2TWYCxoth>c`kXwS9q^EMK(N_d!YXo<ejqh(4hf<<{MwG
z{1|Y2>Sry_1CE)WwH@=AI_uqRtaCxFQuZgVEj7Nah{Z)Wf5-UePcB>nnBU&EXe!3J
zuq__*DQ@WY=_#yVYf>BZb=;ozZq)?ubu4zq2EdWV?g?0*-1axm?ggCu##51zfH|)k
zo!FPbqs?QkVE@Z^G-KbVytk$R-$UNT?`~k7?C%F+uj6}Jx3jS~#y`AuRSfoT%YmkN
zyr=z{>t@76&y`ldloc~#u)j$IH^2BC^4Y&mc?;tWQ;r|P^*l?H4d0jPMD#Mi)MGz3
z{ep4Uyz?){Gw+YfKrAf%Xzf_QCz|tnSqc8Iv7|ppF-RKy?BE{{Ng$~p$<zj!k(nS*
zg5*;i1p+VU&Ib=FO`IU`Nh9!zNjJ)Ydv=gPAd^9CloK<!A0&bLqF*9N5lAnPJ|K`S
z$)Nig)HVkMK8<7sh?&OCr*V;U9KBZwh=b-CPtQC^?}>39qxZoW6(somJsTvFa+OjU
zLhECr^}v`W8Y7=_!)qW9(VP;fZ92$6kf9*(X(ln02XzarRS%j!-l>q*y%+zV(mF@2
zoBgLWni}te(WAoY^QLg?RBoNdt<$-62Di?n>nO%Qf@Oh=H0@%fb}@=eI?C^B9L^HQ
zN-4|XS4T(3CT^BuV|HYq(p%wT-+x9}Q8n7-2snm&{J=$mgVyc;04=|izFrsr0J@22
A82|tP

literal 0
HcmV?d00001

diff --git a/tests/testthat/data_files/STANDARDISE/std_3.csv b/tests/testthat/data_files/STANDARDISE/std_3.csv
new file mode 100644
index 00000000..583c9a22
--- /dev/null
+++ b/tests/testthat/data_files/STANDARDISE/std_3.csv
@@ -0,0 +1,101 @@
+fac_col1,fac_col2,fac_col3,fac_col4,fac_col5,col11,col14,col17,col20
+High,Blue,Yes,C,Three,31,TRUE,o,28.757752012461424
+High,Blue,Yes,C,Three,79,TRUE,s,78.83051354438066
+High,Blue,Yes,C,Three,51,TRUE,n,40.89769218116999
+Medium,NA,NA,B,Two,14,FALSE,c,88.301740400493145
+High,Blue,Yes,C,Three,67,TRUE,j,94.04672842938453
+Medium,NA,NA,B,Two,42,FALSE,r,4.555649938993156
+Medium,NA,NA,B,Two,50,FALSE,v,52.810548804700375
+Medium,NA,NA,B,Two,43,FALSE,k,89.2419044394046
+High,Blue,Yes,C,Three,14,TRUE,e,55.14350144658238
+Low,NA,Yes,A,One,25,TRUE,t,45.661473530344665
+Medium,NA,NA,B,Two,90,FALSE,n,95.68333453498781
+Medium,NA,NA,B,Two,91,FALSE,v,45.33341561909765
+Low,NA,NA,A,One,69,FALSE,y,67.75706354528666
+Medium,NA,Yes,B,Two,91,TRUE,z,57.26334019564092
+High,Blue,NA,C,Three,57,FALSE,e,10.292468266561627
+Low,NA,Yes,A,One,92,TRUE,s,89.98249704018235
+High,Blue,NA,C,Three,9,FALSE,y,24.60877343546599
+High,Blue,Yes,C,Three,93,TRUE,y,4.205953353084624
+Low,NA,Yes,A,One,99,TRUE,i,32.79207192827016
+Low,NA,Yes,A,One,72,TRUE,c,95.45036491472274
+Low,NA,Yes,A,One,26,TRUE,h,88.95393160637468
+Low,NA,NA,A,One,7,FALSE,z,69.28034061565995
+High,Blue,Yes,C,Three,42,TRUE,g,64.05068137682974
+Medium,NA,Yes,B,Two,9,TRUE,j,99.42697766236961
+High,Blue,Yes,C,Three,83,TRUE,i,65.57057991158217
+Medium,NA,Yes,B,Two,36,TRUE,s,70.85304681677371
+Low,NA,NA,A,One,78,FALSE,d,54.40660247113556
+Medium,NA,NA,B,Two,81,FALSE,n,59.41420204471797
+High,Blue,Yes,C,Three,43,TRUE,q,28.91597372945398
+Medium,NA,NA,B,Two,76,FALSE,k,14.711364731192589
+Low,NA,Yes,A,One,15,TRUE,g,96.30242325365543
+High,Blue,NA,C,Three,32,FALSE,u,90.22990451194346
+High,Blue,Yes,C,Three,7,TRUE,l,69.07052784226835
+Low,NA,NA,A,One,9,FALSE,o,79.54674176871777
+High,Blue,NA,C,Three,41,FALSE,j,2.461368450894952
+Medium,NA,Yes,B,Two,74,TRUE,m,47.77959710918367
+Low,NA,Yes,A,One,23,TRUE,g,75.84595375228673
+High,Blue,Yes,C,Three,27,TRUE,i,21.640793583355844
+Low,NA,Yes,A,One,60,TRUE,i,31.818100763484836
+Low,NA,NA,A,One,53,FALSE,j,23.16257853526622
+Medium,NA,Yes,B,Two,7,TRUE,w,14.280002238228917
+High,Blue,NA,C,Three,53,FALSE,u,41.45463358145207
+High,Blue,NA,C,Three,27,FALSE,g,41.372432629577816
+Low,NA,Yes,A,One,96,TRUE,u,36.884545092470944
+High,Blue,Yes,C,Three,38,TRUE,f,15.244474774226546
+Low,NA,Yes,A,One,89,TRUE,y,13.880606344901025
+High,Blue,Yes,C,Three,34,TRUE,b,23.303409945219755
+Medium,NA,NA,B,Two,93,FALSE,e,46.59624502528459
+Low,NA,Yes,A,One,69,TRUE,h,26.597264036536217
+Medium,NA,Yes,B,Two,72,TRUE,l,85.78277153428644
+Low,NA,NA,A,One,76,FALSE,m,4.583116667345166
+Low,NA,Yes,A,One,63,TRUE,r,44.220007420517504
+High,Blue,Yes,C,Three,13,TRUE,a,79.89248456433415
+Low,NA,Yes,A,One,82,TRUE,y,12.189925997518003
+Medium,NA,Yes,B,Two,97,TRUE,y,56.094798375852406
+Low,NA,NA,A,One,91,FALSE,f,20.65313896164298
+Low,NA,NA,A,One,25,FALSE,u,12.753165024332702
+High,Blue,Yes,C,Three,38,TRUE,o,75.33078643027693
+Low,NA,NA,A,One,21,FALSE,i,89.50453591533005
+Medium,NA,Yes,B,Two,79,TRUE,o,37.44627758860588
+Low,NA,Yes,A,One,41,TRUE,z,66.51151946280152
+High,Blue,NA,C,Three,47,FALSE,p,9.484066092409194
+Low,NA,NA,A,One,90,FALSE,t,38.39696377981454
+High,Blue,Yes,C,Three,60,TRUE,f,27.43836445733905
+Medium,NA,Yes,B,Two,95,TRUE,k,81.46400388795882
+High,Blue,NA,C,Three,16,FALSE,h,44.851634139195085
+Medium,NA,Yes,B,Two,94,TRUE,v,81.00643530488014
+Medium,NA,Yes,B,Two,6,TRUE,v,81.23895095195621
+High,Blue,Yes,C,Three,72,TRUE,g,79.43423211108893
+Medium,NA,Yes,B,Two,86,TRUE,p,43.983168760314584
+Medium,NA,NA,B,Two,86,FALSE,q,75.44751586392522
+High,Blue,Yes,C,Three,39,TRUE,v,62.922113155946136
+High,Blue,Yes,C,Three,31,TRUE,r,71.01824013516307
+Low,NA,Yes,A,One,81,TRUE,q,0.062477332539856434
+Medium,NA,Yes,B,Two,50,TRUE,b,47.53165740985423
+Medium,NA,NA,B,Two,34,FALSE,d,22.011888516135514
+Low,NA,NA,A,One,4,FALSE,m,37.98165377229452
+Medium,NA,Yes,B,Two,13,TRUE,e,61.277100327424705
+Low,NA,NA,A,One,69,FALSE,v,35.179790924303234
+Low,NA,NA,A,One,25,FALSE,s,11.113542434759438
+Medium,NA,NA,B,Two,52,FALSE,y,24.361947271972895
+High,Blue,NA,C,Three,22,FALSE,t,66.80555874481797
+High,Blue,Yes,C,Three,89,TRUE,v,41.764677967876196
+Low,NA,NA,A,One,32,FALSE,y,78.81958340294659
+Medium,NA,NA,B,Two,25,FALSE,n,10.286464425735176
+Low,NA,NA,A,One,87,FALSE,y,43.489274149760604
+Medium,NA,Yes,B,Two,35,TRUE,w,98.49569799844176
+Low,NA,Yes,A,One,40,TRUE,c,89.30511143989861
+High,Blue,NA,C,Three,30,FALSE,h,88.64690607879311
+High,Blue,Yes,C,Three,12,TRUE,p,17.505265027284622
+Medium,NA,NA,B,Two,31,FALSE,l,13.069569156505167
+High,Blue,NA,C,Three,30,FALSE,y,65.31019250396639
+Low,NA,Yes,A,One,64,TRUE,n,34.3516472261399
+Medium,NA,NA,B,Two,99,FALSE,c,65.67581279668957
+Medium,NA,NA,B,Two,14,FALSE,n,32.03732424881309
+High,Blue,Yes,C,Three,93,TRUE,g,18.769111926667392
+Medium,NA,Yes,B,Two,96,TRUE,c,78.22943013161421
+Low,NA,NA,A,One,71,FALSE,w,9.359498671256006
+High,Blue,Yes,C,Three,67,TRUE,v,46.677904156968
+High,Blue,Yes,C,Three,23,TRUE,z,51.15054599009454
diff --git a/tests/testthat/data_files/STANDARDISE/std_3.rda b/tests/testthat/data_files/STANDARDISE/std_3.rda
new file mode 100644
index 0000000000000000000000000000000000000000..8036267480506dd3af4291f4e142386a083abad1
GIT binary patch
literal 1597
zcmV-D2EzFtiwFP!000001D#k4P*hbIKHR%XAQ+QVN}A%zpg@|yBBYWR<Y9>70wNfQ
z%RT^ISX~x2b#&C2%Elp+9;A-i<fMhBQ{kvNCXQ)2Hl?FQ=~VVm$DTS(T8Z@kaPIAT
z&bb@P%s1cvpU3|^oO5RjGE0&|OF{`DGBKGXBKHH4^qXCjJvNmP7$78wm`Dh)$>FL<
zf^~yH23-yZcJ$2u2U*9mPM37L#Eb{&WIb4Wl0Xk)?C8fOW={|^$J&*c`9Nc=4Smk6
zY?QxIeC!&wwvxb?ZryD?>+g0pI^FdkMfA3~x0BOqs;a3h=Xw0<;u%gyO+zhXqMzMu
zt5+>(c3n2R&*KH@mgpp`Bt3sUtb*0qas6JSc_Vv9enorOb^jMvps_%<jN%J~K<kWh
zH1gHkMyZ?J-Z@i-yTREzC$n>R+10*OMP!E^Guq2%X54KfKYjHnvUj&R=)sN!$|10S
zVOQl+XZ;;DRewRV7ZQ6xvlpP~g4178qKi_m8(m&T7pCYTU>^_BgTju99uvtxHp<^9
zKGCBfdh85paa&W>YdR=h6*5#|>K!$iZKyuzzBELiydJDQ(F<*KaM%Wk3e4ISz4At5
zqC>|vgY_#qXrlPoH31qbs)ex0a(<mteQArTy-rn;i~OFxG@Gc+9s$S)Bmo8j;HZ!|
zKq4TXjzt3I0-gY5QGOIqMtVN!NIAd`m<D(d5KL|0Tp=JDFcV;<HuC`Y0;1?zTo((-
z1q=oZ1xyAc)A?jNHXjfJC<U0Q-z@4EZBC(i6##5B&PclFewruxd4%SJJ}PLH=3#}X
ztzwGNv_4i^5A<oGKC-B73*Z47QxqMW0~iGu0~ihnqjpeMXsrg)_%Wv(T6fGnl<=R&
zM8`y@i^f<M*<r_w_OiZ=;tAxZuRcZg?luP!U@TA$f&51h><hh5J*rTz3U#VrSA_+t
z;8lf2RamGB>VIvJkF%pKUZM(1xn1?#8cnF-;;H8BRB@r!aF!ihV2imdG&>Ed;N}XY
zxv1rK)o_Bg*w0z!eYv>YE4e~dbAf4LXx*_H7t_T#^>8t3BU*VEan052EaU=f<b*2D
z&SGwjmz&aZcW{|Hx$XS<@o{UksXA^%>kfa$w3yw-cI@0p6*uMQ;#o?1?@os@x#MQY
zc(_mt+ee$>)>bmTms&mpE*_M>_XhBs)m^Wq0pFK;GY{A0RvnH&F3w+8b_96(`g!S<
zz}aWo;$m?9PwyW-hWcB(%6<i&^?Ut+9l&|7wU68h9R1QE*E-z4;=-g%;M9$sEpFh5
z(?5D}Tya)zx`KAjpFUoW>(atg-~~~v)~k_6(VqNRIlPWa$+^M}!%=rM**4<*-d5`_
zjK?xy?IGY9?WXlF15duPJbf7OSa(YIO58VS?xhH{+q}190q)Cu>X`|c|94%dFfVz}
zg{um1&i5aGz81LP&*-7>aw^H&286*&txP%P2~P!1iQW7Gyjse`&zJeoUgm*>x{JuW
z+rEKUPsv!EdJJA6W!&=klV~^o$k%80;l3F=vr*5<vv!;Sp4fVE`eESN-XXsn$2!j*
zwk8VeH!Rj7VO)O=_rC}{#+LRs_9HLMJQw4(Y_~kL2sn1t=FV1(^KATvFEOwAxmMhl
zv46xCv^#6-!Gj}#&DXzJ{Solg#82Cw1D>=fbvwpURQ^sH);YUYD)|F-rN*-rIY0Nt
zZ|MKriH2psSzVn=iqOybZIKvHeq-<_C$WAVah=fDDZ9IZs|n_HG`w^J@C1246xJuB
z>y0yefM>q`WPkWFP|{zK?bw%?iRMX{vHxW|TCnf4-fhmt^N@Mr+v`}T!uNdP*YLco
zZ}Sg9|A)4&3B&$v+20h2c@{o>&5RuDy4(Sru&Oi+`x`fE^9#?Sy~3{&-b8<6rDF$C
zU#K)$@qC$%hpqrlJo-b^&*-Q5?SIgpd0#{_a!&Dw>)?w<dAucSFn$d&zsJ<sYMtuu
zF(GvD9{41zuzTDIjGoBoNsMk`^knTvAF!36S1fuZMW-Z=X95iIdi>+mAn^-Jf})V_
v?(R*@_pJH}+}~H}u5hS#stg(xLmf7sZJf&swn*>C?yY|Tz&gqhuoM6QKWHU0

literal 0
HcmV?d00001


From 254c57d25ce7c128336dc7fd95d7fbeebbde3db3 Mon Sep 17 00:00:00 2001
From: Tim Cadman <41470917+timcadman@users.noreply.github.com>
Date: Tue, 7 Oct 2025 15:24:56 +0200
Subject: [PATCH 16/18] added extra data to test levels disclosure

---
 .../data_files/STANDARDISE/std_1_d.csv        | 101 ++++++++++++++++++
 .../data_files/STANDARDISE/std_1_d.rda        | Bin 0 -> 2228 bytes
 .../data_files/STANDARDISE/std_2_d.csv        | 101 ++++++++++++++++++
 .../data_files/STANDARDISE/std_2_d.rda        | Bin 0 -> 2461 bytes
 .../data_files/STANDARDISE/std_3_d.csv        | 101 ++++++++++++++++++
 .../data_files/STANDARDISE/std_3_d.rda        | Bin 0 -> 2343 bytes
 ...lgenis_armadillo-upload_testing_datasets.R |  12 ++-
 .../obiba_opal-upload_testing_datasets.R      |  16 ++-
 8 files changed, 326 insertions(+), 5 deletions(-)
 create mode 100644 tests/testthat/data_files/STANDARDISE/std_1_d.csv
 create mode 100644 tests/testthat/data_files/STANDARDISE/std_1_d.rda
 create mode 100644 tests/testthat/data_files/STANDARDISE/std_2_d.csv
 create mode 100644 tests/testthat/data_files/STANDARDISE/std_2_d.rda
 create mode 100644 tests/testthat/data_files/STANDARDISE/std_3_d.csv
 create mode 100644 tests/testthat/data_files/STANDARDISE/std_3_d.rda

diff --git a/tests/testthat/data_files/STANDARDISE/std_1_d.csv b/tests/testthat/data_files/STANDARDISE/std_1_d.csv
new file mode 100644
index 00000000..9fe7462f
--- /dev/null
+++ b/tests/testthat/data_files/STANDARDISE/std_1_d.csv
@@ -0,0 +1,101 @@
+fac_col1,fac_col2,fac_col3,fac_col4,fac_col5,fac_col6,fac_col9,col12,col15,col18
+High,Blue,Yes,3,NA,Bird,True,28.7577520124614,31,TRUE
+High,Blue,Yes,3,NA,Bird,True,78.8305135443807,79,TRUE
+High,Blue,Yes,3,NA,Bird,True,40.89769218117,51,TRUE
+Medium,Green,No,2,NA,Dog,False,88.3017404004931,14,FALSE
+High,Blue,Yes,3,NA,Bird,True,94.0467284293845,67,TRUE
+Medium,Green,No,2,NA,Dog,False,4.55564993899316,42,FALSE
+Medium,Green,No,2,NA,Dog,False,52.8105488047004,50,FALSE
+Medium,Green,No,2,NA,Dog,False,89.2419044394046,43,FALSE
+High,Blue,Yes,3,NA,Bird,True,55.1435014465824,14,TRUE
+Low,NA,Yes,1,NA,Cat,True,45.6614735303447,25,TRUE
+Medium,Green,No,2,NA,Dog,False,95.6833345349878,90,FALSE
+Medium,Green,No,2,NA,Dog,False,45.3334156190977,91,FALSE
+Low,NA,No,1,NA,Cat,False,67.7570635452867,69,FALSE
+Medium,Green,Yes,2,NA,Dog,True,57.2633401956409,91,TRUE
+High,Blue,No,3,NA,Bird,False,10.2924682665616,57,FALSE
+Low,NA,Yes,1,NA,Cat,True,89.9824970401824,92,TRUE
+High,Blue,No,3,NA,Bird,False,24.608773435466,9,FALSE
+High,Blue,Yes,3,NA,Bird,True,4.20595335308462,93,TRUE
+Low,NA,Yes,1,NA,Cat,True,32.7920719282702,99,TRUE
+Low,NA,Yes,1,NA,Cat,True,95.4503649147227,72,TRUE
+Low,NA,Yes,1,NA,Cat,True,88.9539316063747,26,TRUE
+Low,NA,No,1,NA,Cat,False,69.28034061566,7,FALSE
+High,Blue,Yes,3,NA,Bird,True,64.0506813768297,42,TRUE
+Medium,Green,Yes,2,NA,Dog,True,99.4269776623696,9,TRUE
+High,Blue,Yes,3,NA,Bird,True,65.5705799115822,83,TRUE
+Medium,Green,Yes,2,NA,Dog,True,70.8530468167737,36,TRUE
+Low,NA,No,1,NA,Cat,False,54.4066024711356,78,FALSE
+Medium,Green,No,2,NA,Dog,False,59.414202044718,81,FALSE
+High,Blue,Yes,3,NA,Bird,True,28.915973729454,43,TRUE
+Medium,Green,No,2,NA,Dog,False,14.7113647311926,76,FALSE
+Low,NA,Yes,1,NA,Cat,True,96.3024232536554,15,TRUE
+High,Blue,No,3,NA,Bird,False,90.2299045119435,32,FALSE
+High,Blue,Yes,3,NA,Bird,True,69.0705278422683,7,TRUE
+Low,NA,No,1,NA,Cat,False,79.5467417687178,9,FALSE
+High,Blue,No,3,NA,Bird,False,2.46136845089495,41,FALSE
+Medium,Green,Yes,2,NA,Dog,True,47.7795971091837,74,TRUE
+Low,NA,Yes,1,NA,Cat,True,75.8459537522867,23,TRUE
+High,Blue,Yes,3,NA,Bird,True,21.6407935833558,27,TRUE
+Low,NA,Yes,1,NA,Cat,True,31.8181007634848,60,TRUE
+Low,NA,No,1,NA,Cat,False,23.1625785352662,53,FALSE
+Medium,Green,Yes,2,NA,Dog,True,14.2800022382289,7,TRUE
+High,Blue,No,3,NA,Bird,False,41.4546335814521,53,FALSE
+High,Blue,No,3,NA,Bird,False,41.3724326295778,27,FALSE
+Low,NA,Yes,1,NA,Cat,True,36.8845450924709,96,TRUE
+High,Blue,Yes,3,NA,Bird,True,15.2444747742265,38,TRUE
+Low,NA,Yes,1,NA,Cat,True,13.880606344901,89,TRUE
+High,Blue,Yes,3,NA,Bird,True,23.3034099452198,34,TRUE
+Medium,Green,No,2,NA,Dog,False,46.5962450252846,93,FALSE
+Low,NA,Yes,1,NA,Cat,True,26.5972640365362,69,TRUE
+Medium,Green,Yes,2,NA,Dog,True,85.7827715342864,72,TRUE
+Low,NA,No,1,NA,Cat,False,4.58311666734517,76,FALSE
+Low,NA,Yes,1,NA,Cat,True,44.2200074205175,63,TRUE
+High,Blue,Yes,3,NA,Bird,True,79.8924845643342,13,TRUE
+Low,NA,Yes,1,NA,Cat,True,12.189925997518,82,TRUE
+Medium,Green,Yes,2,NA,Dog,True,56.0947983758524,97,TRUE
+Low,NA,No,1,NA,Cat,False,20.653138961643,91,FALSE
+Low,NA,No,1,NA,Cat,False,12.7531650243327,25,FALSE
+High,Blue,Yes,3,NA,Bird,True,75.3307864302769,38,TRUE
+Low,NA,No,1,NA,Cat,False,89.50453591533,21,FALSE
+Medium,Green,Yes,2,NA,Dog,True,37.4462775886059,79,TRUE
+Low,NA,Yes,1,NA,Cat,True,66.5115194628015,41,TRUE
+High,Blue,No,3,NA,Bird,False,9.48406609240919,47,FALSE
+Low,NA,No,1,NA,Cat,False,38.3969637798145,90,FALSE
+High,Blue,Yes,3,NA,Bird,True,27.438364457339,60,TRUE
+Medium,Green,Yes,2,NA,Dog,True,81.4640038879588,95,TRUE
+High,Blue,No,3,NA,Bird,False,44.8516341391951,16,FALSE
+Medium,Green,Yes,2,NA,Dog,True,81.0064353048801,94,TRUE
+Medium,Green,Yes,2,NA,Dog,True,81.2389509519562,6,TRUE
+High,Blue,Yes,3,NA,Bird,True,79.4342321110889,72,TRUE
+Medium,Green,Yes,2,NA,Dog,True,43.9831687603146,86,TRUE
+Medium,Green,No,2,NA,Dog,False,75.4475158639252,86,FALSE
+High,Blue,Yes,3,NA,Bird,True,62.9221131559461,39,TRUE
+High,Blue,Yes,3,NA,Bird,True,71.0182401351631,31,TRUE
+Low,NA,Yes,1,NA,Cat,True,0.0624773325398564,81,TRUE
+Medium,Green,Yes,2,NA,Dog,True,47.5316574098542,50,TRUE
+Medium,Green,No,2,NA,Dog,False,22.0118885161355,34,FALSE
+Low,NA,No,1,NA,Cat,False,37.9816537722945,4,FALSE
+Medium,Green,Yes,2,NA,Dog,True,61.2771003274247,13,TRUE
+Low,NA,No,1,NA,Cat,False,35.1797909243032,69,FALSE
+Low,NA,No,1,NA,Cat,False,11.1135424347594,25,FALSE
+Medium,Green,No,2,NA,Dog,False,24.3619472719729,52,FALSE
+High,Blue,No,3,NA,Bird,False,66.805558744818,22,FALSE
+High,Blue,Yes,3,NA,Bird,True,41.7646779678762,89,TRUE
+Low,NA,No,1,NA,Cat,False,78.8195834029466,32,FALSE
+Medium,Green,No,2,NA,Dog,False,10.2864644257352,25,FALSE
+Low,NA,No,1,NA,Cat,False,43.4892741497606,87,FALSE
+Medium,Green,Yes,2,NA,Dog,True,98.4956979984418,35,TRUE
+Low,NA,Yes,1,NA,Cat,True,89.3051114398986,40,TRUE
+High,Blue,No,3,NA,Bird,False,88.6469060787931,30,FALSE
+High,Blue,Yes,3,NA,Bird,True,17.5052650272846,12,TRUE
+Medium,Green,No,2,NA,Dog,False,13.0695691565052,31,FALSE
+High,Blue,No,3,NA,Bird,False,65.3101925039664,30,FALSE
+Low,NA,Yes,1,NA,Cat,True,34.3516472261399,64,TRUE
+Medium,Green,No,2,NA,Dog,False,65.6758127966896,99,FALSE
+Medium,Green,No,2,NA,Dog,False,32.0373242488131,14,FALSE
+High,Blue,Yes,3,NA,Bird,True,18.7691119266674,93,TRUE
+Medium,Green,Yes,2,NA,Dog,True,78.2294301316142,96,TRUE
+Low,NA,No,1,NA,Cat,False,9.35949867125601,71,FALSE
+High,Blue,Yes,3,NA,Bird,True,46.677904156968,67,TRUE
+High,Blue,Yes,3,NA,Bird,True,51.1505459900945,23,TRUE
diff --git a/tests/testthat/data_files/STANDARDISE/std_1_d.rda b/tests/testthat/data_files/STANDARDISE/std_1_d.rda
new file mode 100644
index 0000000000000000000000000000000000000000..4387f72b8b0e93973132f43c9ddd0f762f606166
GIT binary patch
literal 2228
zcmV;l2ut@LiwFP!000001MQk=a~;JMhDTSDZ5+pPAjE+z5JEx{!qEHTEOv|+LReyA
zh@DMbTNg)FLQY9CRKD>;@*7k61t?GJ_RO?Kcdle%@E5!4t-13~&pD^h)@M|H|H>_W
z`qt^HsuruIrBSuG%+G51gPYf03snVCRSVTp^%U2W+dFHka&?WR&v45-3;*R>E;;kr
z*7#qm>zg~1dy}o+v!y%Z?MddoxyMhhUrf0JvYr`lZQcLu!UZnlf3JQ#zPoe3FLHTr
z$$-psUCUg9#p1eH+>eU;QIB6JewXOW8_l?1=*cPhQSY6F-m|{kQSbRYt}mn9bN~0y
z{)g=Auj%`CG(%UhZeL2B*qD4e+1Tb+Kl;z_i~jb`*81kX)lVl~5-jib;(jj2{oJ0;
zoO5rk-}`6&z4$h%lGVO5SzCYb$@GPN1})#+7;kT9DXo=l@-=xi`n+oXrPojFb~`w^
z`xV%CYImMVP3erX%Ei9w8D+LKWahl`>8I}AeRjpDa7)QAG`~yq<(2Oh_wBoVzK}EO
zubZ)cRO&s9>&vM2@45eb=Kkl{H|kyY`L>i*&G5xB({0qG+vSZ1lP1%zZ%rnf)e%$d
zk!1asH{&bK^nP<Mo7;h?R|m6W=#ZGj4ZS<>usO=}htWAp-f{OZ%O^v!XRVu|>r18B
z=-vBGTfIHmt{z90AJc^XL3n=OLAZ8jWBw_4Hj7EuwM(a_`#5l|2JZXU{QTnoW<Ni`
z_t&4A%@17<vu{Y>f#(By4_kl0p1z&|d56mH-4DKh%yoy^F&O8=_=k-DL;LR#Ifv!#
zkX=X3i)vac>Fa0h^J@Qy@&u0;U*lN&<g4)D@1gw<+4l&(3XlI4b$NYjtvPGo81FPc
zuiU>^eVr%&BedtqpnC*8PX=8UT^ny~Hy=|=H@6;4s>d<x9^Z_`CM$mc-UB}ae+H{b
z67b+JAOZ=-K!B^@S0G9JJ9xgDc9kE2kHBqk9bC-ve+S+K0sI>LFk5v8`~zGAH}Y03
zcpE$mUI6F83it*1Id~a-tC~*P-vtVM4}6$h0c!9*_y)KDz75WRx4_@PPrw`CBzOgU
zAAA5_2Y&)L!5_hE;1A%p;P=`627Cwn6#NGKxSCEd&w(r8Rq!rIGtMu;UGP_M3p|~}
zcnQ1&E`t|A`g`s>DKbC<SER$aJ^xK3_q3>mQ^tF(mDMp=XY!MCsy0q)7mO7lnrIu|
z)JoWBwYFLW<5b66%j+@f0HiDUprv%qd2Oup4IgW5ksF+s$~qxC-UK6@6HyrvWo&pU
zY89Mu#whExRVB|-7nF0>b$X?&rPS6aqm4&X=2xVuB^D|hqqj1&%T?_yW?R7vTD4|M
zjfbLjQCgn0YFFSx5KcI4Ozf<exF-l9l+r<|5DVU8jx!!#MAcboYh{e_gup1}S}#jq
zCvC|buW)nAhuUMyo(w$N?n;T;VLrac1xB|)l(jR$M@<a0we4<|s)dvx1S<)$ZPhAW
zONW1h#lx-(F1!|+ctm5BjGY`)YbT@e%FF0gY|SyX6M?icM8LJyD_2{VVUot$R{V<c
zwb23Z7(%OCrztjTy|+q80xY}DhMI6huhD31o3UEfK?W&=Cn&+RMrt;zy;h_`2vW;}
z_fZKfS3!BvZdS7?{KBRb&WWu#+SbxXA5&)Hjc&ao6|lpm2VQjR*%bxC210PuOu@%M
zrfH7`Hqi2+*3m@=&-0c)YeXF)QCDoPs~;n4_HVRuD&nM2@Yv!AcaouG!F$RF+0F-F
z)UBrAqVh%>ln7Vwk-~MX6<T%~XvlCyhLf*?a<&DZvPm0UCZp~2+FEN^Y7@_SU+|8Y
z`qW3Lp^i^ELQf*|ZISsa@X2_QBt_S4mqJNp>q*W)wHG|ANfJpD3Az<;X$w@Aq5Op?
zoNpD$15Z&CL(wLcHT7OMBC@HSv~I8@1IaBUA+|wv>s^gl0;mbQEqGrmf?@=|A^+P-
zvGhBME0w?pT|08JCUMARS`m5Mn(tH{mBJEf>1nR@vNN@?!jaydP8hrOcweGZSwU`f
z-5uU~VSS{RV7@ANhv8C@CXQy(@<fG|l)9t8$;NFf@Ilb{gEuA==KEB+G)Ejw*Y3=T
zwLq=%A$7aZ^r^G(h{ZdO=(b8diBERW+a;mzV(2B^3!e*_>a;6ppHfYDP6<uhbg|%Z
zu4SW6;|W#p)GcQjEvB^6hdLFeMn3Ja%PIIk_yTtFxUdsmQw;2uHYPeb#8w-niI)_i
zI}G5dP%4gCdD)dNJe5f6B11@(R&QXx0xJ1}{FDXHF%#)KHf2@0o^z7i34sy_t^8nX
zA5vObgoiE}0*yyVDu{PX?EDE&$B3MAbd3ExPD_V85%yhj2BK)Fa=Z``<_bQ!M^Z2`
zaN;*nhquCT)X>efE>=f^8$IU8qOzh$Q9(MA2u@qMk<`bV^a{o7(N%OL>SR45iA=5B
z7d)AVfAK;Lo%f@|N|nOJ(PX=0C5GCh0f1Tw#&kIziLQe7oRgZC^a%bMyuP_J|F6OQ
zFTXU)zL#gbG`pr5G|jYWetk7_X^u?ucA6Ql=4+b0)7;w3+G(atvviudPi7hES(;tb
zY@FuL8(C(WqtopCVqTlpr5QTS!fCEfb90)<ZGLVu_fa*Sanl_CQr3Gl>rFB*=6$6(
zdz|fjF2D0^-e=PDdfrdcv6`Ko=KT{{)`uCdWdGdA{zy8PvW}}+b~Ah5&ihGR`c_FZ
zc=~Qh-xO!Eob>xr_SM;Jf7;Xa?Dz9k@z=*{|CeM{T&v=K7T3SdlGEpg-tFrevS*&2
zqw1U`@3?!Y=5X`&Fh~FX{}!-0{$$epw}8``JVm9h-o3ved-tk$uY32VcW-<5u6N&@
z1ItO5DsOCg)Bo=}Pthpq#_sl8e|LBHKLefT^xAl5Tz|a9E7deLzxY2L7VDxOBme-K
C!h~@E

literal 0
HcmV?d00001

diff --git a/tests/testthat/data_files/STANDARDISE/std_2_d.csv b/tests/testthat/data_files/STANDARDISE/std_2_d.csv
new file mode 100644
index 00000000..0a5a46eb
--- /dev/null
+++ b/tests/testthat/data_files/STANDARDISE/std_2_d.csv
@@ -0,0 +1,101 @@
+fac_col1,fac_col2,fac_col3,fac_col4,fac_col5,fac_col7,fac_col10,col13,col16,col19
+High,NA,NA,C,2,Large,Left,o,28.7577520124614,31
+High,NA,NA,C,2,Large,Left,s,78.8305135443807,79
+High,NA,NA,C,2,Large,Left,n,40.89769218117,51
+Medium,Green,No,B,3,Medium,Right,c,88.3017404004931,14
+High,NA,NA,C,2,Large,Left,j,94.0467284293845,67
+Medium,Green,No,B,3,Medium,Right,r,4.55564993899316,42
+Medium,Green,No,B,3,Medium,Right,v,52.8105488047004,50
+Medium,Green,No,B,3,Medium,Right,k,89.2419044394046,43
+High,NA,NA,C,2,Large,Left,e,55.1435014465824,14
+Low,Red,NA,A,1,Small,Left,t,45.6614735303447,25
+Medium,Green,No,B,3,Medium,Right,n,95.6833345349878,90
+Medium,Green,No,B,3,Medium,Right,v,45.3334156190977,91
+Low,Red,No,A,1,Small,Right,y,67.7570635452867,69
+Medium,Green,NA,B,3,Medium,Left,z,57.2633401956409,91
+High,NA,No,C,2,Large,Right,e,10.2924682665616,57
+Low,Red,NA,A,1,Small,Left,s,89.9824970401824,92
+High,NA,No,C,2,Large,Right,y,24.608773435466,9
+High,NA,NA,C,2,Large,Left,y,4.20595335308462,93
+Low,Red,NA,A,1,Small,Left,i,32.7920719282702,99
+Low,Red,NA,A,1,Small,Left,c,95.4503649147227,72
+Low,Red,NA,A,1,Small,Left,h,88.9539316063747,26
+Low,Red,No,A,1,Small,Right,z,69.28034061566,7
+High,NA,NA,C,2,Large,Left,g,64.0506813768297,42
+Medium,Green,NA,B,3,Medium,Left,j,99.4269776623696,9
+High,NA,NA,C,2,Large,Left,i,65.5705799115822,83
+Medium,Green,NA,B,3,Medium,Left,s,70.8530468167737,36
+Low,Red,No,A,1,Small,Right,d,54.4066024711356,78
+Medium,Green,No,B,3,Medium,Right,n,59.414202044718,81
+High,NA,NA,C,2,Large,Left,q,28.915973729454,43
+Medium,Green,No,B,3,Medium,Right,k,14.7113647311926,76
+Low,Red,NA,A,1,Small,Left,g,96.3024232536554,15
+High,NA,No,C,2,Large,Right,u,90.2299045119435,32
+High,NA,NA,C,2,Large,Left,l,69.0705278422683,7
+Low,Red,No,A,1,Small,Right,o,79.5467417687178,9
+High,NA,No,C,2,Large,Right,j,2.46136845089495,41
+Medium,Green,NA,B,3,Medium,Left,m,47.7795971091837,74
+Low,Red,NA,A,1,Small,Left,g,75.8459537522867,23
+High,NA,NA,C,2,Large,Left,i,21.6407935833558,27
+Low,Red,NA,A,1,Small,Left,i,31.8181007634848,60
+Low,Red,No,A,1,Small,Right,j,23.1625785352662,53
+Medium,Green,NA,B,3,Medium,Left,w,14.2800022382289,7
+High,NA,No,C,2,Large,Right,u,41.4546335814521,53
+High,NA,No,C,2,Large,Right,g,41.3724326295778,27
+Low,Red,NA,A,1,Small,Left,u,36.8845450924709,96
+High,NA,NA,C,2,Large,Left,f,15.2444747742265,38
+Low,Red,NA,A,1,Small,Left,y,13.880606344901,89
+High,NA,NA,C,2,Large,Left,b,23.3034099452198,34
+Medium,Green,No,B,3,Medium,Right,e,46.5962450252846,93
+Low,Red,NA,A,1,Small,Left,h,26.5972640365362,69
+Medium,Green,NA,B,3,Medium,Left,l,85.7827715342864,72
+Low,Red,No,A,1,Small,Right,m,4.58311666734517,76
+Low,Red,NA,A,1,Small,Left,r,44.2200074205175,63
+High,NA,NA,C,2,Large,Left,a,79.8924845643342,13
+Low,Red,NA,A,1,Small,Left,y,12.189925997518,82
+Medium,Green,NA,B,3,Medium,Left,y,56.0947983758524,97
+Low,Red,No,A,1,Small,Right,f,20.653138961643,91
+Low,Red,No,A,1,Small,Right,u,12.7531650243327,25
+High,NA,NA,C,2,Large,Left,o,75.3307864302769,38
+Low,Red,No,A,1,Small,Right,i,89.50453591533,21
+Medium,Green,NA,B,3,Medium,Left,o,37.4462775886059,79
+Low,Red,NA,A,1,Small,Left,z,66.5115194628015,41
+High,NA,No,C,2,Large,Right,p,9.48406609240919,47
+Low,Red,No,A,1,Small,Right,t,38.3969637798145,90
+High,NA,NA,C,2,Large,Left,f,27.438364457339,60
+Medium,Green,NA,B,3,Medium,Left,k,81.4640038879588,95
+High,NA,No,C,2,Large,Right,h,44.8516341391951,16
+Medium,Green,NA,B,3,Medium,Left,v,81.0064353048801,94
+Medium,Green,NA,B,3,Medium,Left,v,81.2389509519562,6
+High,NA,NA,C,2,Large,Left,g,79.4342321110889,72
+Medium,Green,NA,B,3,Medium,Left,p,43.9831687603146,86
+Medium,Green,No,B,3,Medium,Right,q,75.4475158639252,86
+High,NA,NA,C,2,Large,Left,v,62.9221131559461,39
+High,NA,NA,C,2,Large,Left,r,71.0182401351631,31
+Low,Red,NA,A,1,Small,Left,q,0.0624773325398564,81
+Medium,Green,NA,B,3,Medium,Left,b,47.5316574098542,50
+Medium,Green,No,B,3,Medium,Right,d,22.0118885161355,34
+Low,Red,No,A,1,Small,Right,m,37.9816537722945,4
+Medium,Green,NA,B,3,Medium,Left,e,61.2771003274247,13
+Low,Red,No,A,1,Small,Right,v,35.1797909243032,69
+Low,Red,No,A,1,Small,Right,s,11.1135424347594,25
+Medium,Green,No,B,3,Medium,Right,y,24.3619472719729,52
+High,NA,No,C,2,Large,Right,t,66.805558744818,22
+High,NA,NA,C,2,Large,Left,v,41.7646779678762,89
+Low,Red,No,A,1,Small,Right,y,78.8195834029466,32
+Medium,Green,No,B,3,Medium,Right,n,10.2864644257352,25
+Low,Red,No,A,1,Small,Right,y,43.4892741497606,87
+Medium,Green,NA,B,3,Medium,Left,w,98.4956979984418,35
+Low,Red,NA,A,1,Small,Left,c,89.3051114398986,40
+High,NA,No,C,2,Large,Right,h,88.6469060787931,30
+High,NA,NA,C,2,Large,Left,p,17.5052650272846,12
+Medium,Green,No,B,3,Medium,Right,l,13.0695691565052,31
+High,NA,No,C,2,Large,Right,y,65.3101925039664,30
+Low,Red,NA,A,1,Small,Left,n,34.3516472261399,64
+Medium,Green,No,B,3,Medium,Right,c,65.6758127966896,99
+Medium,Green,No,B,3,Medium,Right,n,32.0373242488131,14
+High,NA,NA,C,2,Large,Left,g,18.7691119266674,93
+Medium,Green,NA,B,3,Medium,Left,c,78.2294301316142,96
+Low,Red,No,A,1,Small,Right,w,9.35949867125601,71
+High,NA,NA,C,2,Large,Left,v,46.677904156968,67
+High,NA,NA,C,2,Large,Left,z,51.1505459900945,23
diff --git a/tests/testthat/data_files/STANDARDISE/std_2_d.rda b/tests/testthat/data_files/STANDARDISE/std_2_d.rda
new file mode 100644
index 0000000000000000000000000000000000000000..cad2198a28fd31f5f70c1df6d297389e536bef15
GIT binary patch
literal 2461
zcmV;O31apiiwFP!000001I1eDb{xeO?ioqm;#dw2aUdIkBqSjewJ**BCKy9vAT~=(
zHZhh)vVttZNLUAc<w5d{c?6WNG*#6#Ro!#O5@4V6ovFTEb@zMwmhC&Q-4z$^UTB(T
zsaakgHcKm5HY@MH_xg*rX+UV2L9^VP$M4zwgN-%5wt=lz@Q5>m|KT^^a`A97`ebc;
z_h7s=o)qtv?~V4yf%ffV`t<k9DXrplWi*-W9bUVJpV8-Qo1^uEy^`h1@s=K*{&yqL
zB9^k>rR;f_Jr4`|AY11s`3*as2L+#;J}k}*igzX7Vex(*t>ls0T(0}<@6)f`Q~Gw;
zAuHQAlTv4P#t+9k`&gCHe|jeR`v;Tl-L18U<2DIaj*7Tn4spMHEHmWX8{1p=!+Pl!
zQYDCeYrL`j*@NkU8HHBXcSigBflJadC11>|;eVR=#~(j))LL-%=xfmL+|fLhV(AQn
z$fZ(rN0}}6sX0?V-pmO9KK@38=PK1BJv%#7J{|mq<#T?vq-VSa9s6eXXTSN`URuc`
z<5jJTvz5G_eKWEOIf++k^Yr;d={AhneRDD%@5X$)Gu~(x%&@N{=})c2<IVFfTKU_>
zr=<0rEoJrTGf&Q<GQ0Sl)P|<RCw;uT_I2BFq6~{!Gkklm`6e^_Byt~x|3=_2_+$U#
z)qYC7NPDjL@2Owar)o8>FR)ld=Q+~rJ#*Ap<W#L;Ep!g&MP${KIebpJc@f3tB*`ML
zPL@9>^YbDM7b%B1sxFpVGj*G)`s&P}imT83DYd+v<YQ4jS?AMu*YsYE83jsyUAL5B
z!Kdri({;-z_jlc3=Fv(Xxy|Le&;CCB$~~oT2OYBVeP8RC(GjEaW?9-)>Ydx&b&Ku0
z36^KOV%T@1EB6nZIv(?AeX;M$;V%38^y|v=GUxj8d~nL!+sducWNTd9*}|)LAB=W(
zns0Il`O39=>~+vZD}THAl(fFH^LpnZ?;k~G7r&F*(B+dp-t+1#Tc7Gew|r~7c@R_V
z&i2;*gXVGED^H=uiH7mLxY&=2c&Qz%$Hl+nViFe*<KpAEh_yO6sQ9GKpU1@))v5hz
zPg-nO`rNPhY*ngmSDZI0#Xha9NPIqvi=E0*NyLNd)ONK<n-43_wXDs`_Is71?pKN>
zHBxj&{m$I1gzi;3CrWbfCslA0pN}iW9#)I3iqEIjo=Np8*}hR(I<Ah_`EyY1Nw0P*
zN{UXMV@c<oem?6JWvhC1Sn2b{v>un@*W^dQJHRi2zXEH%9RLRS8{hy2i~tJU0DcR2
zX8#19Zl+VgPl1ns4}qJ&%YpxO;0?e6zXN{UOnbpS;9tP&!0l$*GZb(OcoO&ta1~eu
zehvHz_!;mW;1ckCfCE1OJ^=hbz7XIJ@GamP@Lk{{@F(COz|R4H8956)5Bv~#A9x9P
z7kCf&3-BWFXW$ROo6WS3N#J|HFM!_z&o#&Yj?+8^yav1gybYWKegmune+TXYPc+kh
z_zLhW@G9^Oa0Pe^I0FbkXKnm))Aq|JD5085V}#&JIIFY_Z!U8pH51xOMXi%gMY<%M
zDyM`{f?BD0OIHfVof8&dI-^^`nAX||shEj$M}z`js}18yQ`XX@l~il$xTKD`NN1FA
ztEHBbD<c%o>Da0**IMaTE@Ol-p(K|=8b}KCRmKT}hFm#k6tihNCq_YSMR7pzq$U&a
zkW<<*g?CBpD(KcyO|_8Hwc0bd$5Ki;7nXDDGP;2}S{nGm@m5PkxRlZ$1d?-|yvz*o
z(!4r`!_A3qiGeQ1JmAq}6{kc)efS<OkRk<<5iO~40x=LuC2Kh+lrd|qVhFNIV!0qp
z!@pL+!@3PF=z<Ew!%4-NYxzhbG;`8$!<^wRsUwM|7HK6B0iCqxnka08N&?y@_N&|w
zDJ;Ar5n7R)#$l{7MsdmzVAdLI3BuuwfJCWM#tKF(vy4)Mpjerd6fjm8!I28qGQl#s
zahyVPZn>e!SOKHp7Z}Ci9GBD)iZJ7h^O*^6wEHzu0eUF^!q9d<tU`gn0BbdBCZjux
zOcMqYU_hc<BAj*_bi7aWOOg`n5OogY+WL`<z<w#X<_=D>86CQ4gxmArETbEg4|r>1
zO;)!81?RYtOhO3N8Qr09HTF`$+6<J)aE=T|zEYI4%IH3ugoMkOQ>|P@ga8$_3C|gm
z(KTXfd>=s#wRE2&=t;<Ym1X`a=w5m1IXT^$i$d{bYml54)t=F@8%e^@M65{c71{zS
zOQQTK&77||k_U7YHDX9pQYlG}yAdMm+evbRLNXw^DMN^r<?Vh=z!m`%2)oMYhHwN$
zQuqe>pDIP6-!Zt7Q~025N172N4ssc-2zi^-*PJ-cp$SvyX*zjXOF|XZNN<Bq=-U18
zK7&lIC~~Xq?w~6}m2v1LP@iXX4aFHpnrJkWL`PJx6Q!=v-&l0pD(IG?@mnLM&D1x(
za?u>&XmssX9Y-j{a$|kBv$0QI0v&AO9gXNFp$3VM>_Bg42z?tv!_d9pbIN?3wgqi`
zs!@Zlgh1QWE~CS_3WjO{Pw<S6y2ZB)Ehg8}SmFy)fIr$}n^T}$gwH}xyw3ClU7#3X
zE!r4u`5?AJa)Ed;YTJ(i=%`Rs9AafyTe_g564APlAxM=Z*TP;3lJ^DjlVx;#W*qvC
z@>!Md$9EFBV=YR+CjM3tV|`jFFt=?oSTr8WP(e5&UF%QK(J>spa_AVdbU4j_<Pl-h
zCZ|OdC8``=aA2l0x_1vl!8nUAevCTkib{NH(9MO&b~^;u8K}du%5v041>C_2!AX_t
zkoxc@dIiVq(N?rW)RFZVNtkctCZi+s;9q#b+1C3`Lrd<%g-?@eKP%1>=?4JRij}g>
zafj$~kPW_*0xjuLF16<u{VN$itiBV5I6s{FA=3}7en5R8(EOn22WmeQ-Uz>bxb}l-
z9H#xy=!a!LRG$qz{5wCK`r+3Pn70E@KPda*_?fWR@AE^lA9nrV><42%aI5fM1=>f=
zbcpqX`m;gqjUd<ad^y<V2kBAJ^Qmy=$zZ3K^HQ+K%UBEE^aK5w!0Us6=YxN42Y+~(
z%R$DCz&j4t9|n6o&0j|RknJxf{?c$U@bT+cg0C(G{k=^$gWs<<9sf6Fcl2N!|8L5L
zKs<+#udVOxu;Q5)&!Tvi#j`4&V+x;-{Atv}KZyK6=MSz>`ut??kVM7P-}W3G9sRc_
bhc9f54n|~if+JHOG++K7j6ZI{q$U6WOC;Vg

literal 0
HcmV?d00001

diff --git a/tests/testthat/data_files/STANDARDISE/std_3_d.csv b/tests/testthat/data_files/STANDARDISE/std_3_d.csv
new file mode 100644
index 00000000..5dfce08f
--- /dev/null
+++ b/tests/testthat/data_files/STANDARDISE/std_3_d.csv
@@ -0,0 +1,101 @@
+fac_col1,fac_col2,fac_col3,fac_col4,fac_col5,col11,col14,col17,col20
+High,Blue,Yes,C,Three,31,TRUE,o,28.7577520124614
+High,Blue,Yes,C,Three,79,TRUE,s,78.8305135443807
+High,Blue,Yes,C,Three,51,TRUE,n,40.89769218117
+Medium,NA,NA,B,Two,14,FALSE,c,88.3017404004931
+High,Blue,Yes,C,Three,67,TRUE,j,94.0467284293845
+Medium,NA,NA,B,Two,42,FALSE,r,4.55564993899316
+Medium,NA,NA,B,Two,50,FALSE,v,52.8105488047004
+Medium,NA,NA,B,Two,43,FALSE,k,89.2419044394046
+High,Blue,Yes,C,Three,14,TRUE,e,55.1435014465824
+Low,NA,Yes,A,One,25,TRUE,t,45.6614735303447
+Medium,NA,NA,B,Two,90,FALSE,n,95.6833345349878
+Medium,NA,NA,B,Two,91,FALSE,v,45.3334156190977
+Low,NA,NA,A,One,69,FALSE,y,67.7570635452867
+Medium,NA,Yes,B,Two,91,TRUE,z,57.2633401956409
+High,Blue,NA,C,Three,57,FALSE,e,10.2924682665616
+Low,NA,Yes,A,One,92,TRUE,s,89.9824970401824
+High,Blue,NA,C,Three,9,FALSE,y,24.608773435466
+High,Blue,Yes,C,Three,93,TRUE,y,4.20595335308462
+Low,NA,Yes,A,One,99,TRUE,i,32.7920719282702
+Low,NA,Yes,A,One,72,TRUE,c,95.4503649147227
+Low,NA,Yes,A,One,26,TRUE,h,88.9539316063747
+Low,NA,NA,A,One,7,FALSE,z,69.28034061566
+High,Blue,Yes,C,Three,42,TRUE,g,64.0506813768297
+Medium,NA,Yes,B,Two,9,TRUE,j,99.4269776623696
+High,Blue,Yes,C,Three,83,TRUE,i,65.5705799115822
+Medium,NA,Yes,B,Two,36,TRUE,s,70.8530468167737
+Low,NA,NA,A,One,78,FALSE,d,54.4066024711356
+Medium,NA,NA,B,Two,81,FALSE,n,59.414202044718
+High,Blue,Yes,C,Three,43,TRUE,q,28.915973729454
+Medium,NA,NA,B,Two,76,FALSE,k,14.7113647311926
+Low,NA,Yes,A,One,15,TRUE,g,96.3024232536554
+High,Blue,NA,C,Three,32,FALSE,u,90.2299045119435
+High,Blue,Yes,C,Three,7,TRUE,l,69.0705278422683
+Low,NA,NA,A,One,9,FALSE,o,79.5467417687178
+High,Blue,NA,C,Three,41,FALSE,j,2.46136845089495
+Medium,NA,Yes,B,Two,74,TRUE,m,47.7795971091837
+Low,NA,Yes,A,One,23,TRUE,g,75.8459537522867
+High,Blue,Yes,C,Three,27,TRUE,i,21.6407935833558
+Low,NA,Yes,A,One,60,TRUE,i,31.8181007634848
+Low,NA,NA,A,One,53,FALSE,j,23.1625785352662
+Medium,NA,Yes,B,Two,7,TRUE,w,14.2800022382289
+High,Blue,NA,C,Three,53,FALSE,u,41.4546335814521
+High,Blue,NA,C,Three,27,FALSE,g,41.3724326295778
+Low,NA,Yes,A,One,96,TRUE,u,36.8845450924709
+High,Blue,Yes,C,Three,38,TRUE,f,15.2444747742265
+Low,NA,Yes,A,One,89,TRUE,y,13.880606344901
+High,Blue,Yes,C,Three,34,TRUE,b,23.3034099452198
+Medium,NA,NA,B,Two,93,FALSE,e,46.5962450252846
+Low,NA,Yes,A,One,69,TRUE,h,26.5972640365362
+Medium,NA,Yes,B,Two,72,TRUE,l,85.7827715342864
+Low,NA,NA,A,One,76,FALSE,m,4.58311666734517
+Low,NA,Yes,A,One,63,TRUE,r,44.2200074205175
+High,Blue,Yes,C,Three,13,TRUE,a,79.8924845643342
+Low,NA,Yes,A,One,82,TRUE,y,12.189925997518
+Medium,NA,Yes,B,Two,97,TRUE,y,56.0947983758524
+Low,NA,NA,A,One,91,FALSE,f,20.653138961643
+Low,NA,NA,A,One,25,FALSE,u,12.7531650243327
+High,Blue,Yes,C,Three,38,TRUE,o,75.3307864302769
+Low,NA,NA,A,One,21,FALSE,i,89.50453591533
+Medium,NA,Yes,B,Two,79,TRUE,o,37.4462775886059
+Low,NA,Yes,A,One,41,TRUE,z,66.5115194628015
+High,Blue,NA,C,Three,47,FALSE,p,9.48406609240919
+Low,NA,NA,A,One,90,FALSE,t,38.3969637798145
+High,Blue,Yes,C,Three,60,TRUE,f,27.438364457339
+Medium,NA,Yes,B,Two,95,TRUE,k,81.4640038879588
+High,Blue,NA,C,Three,16,FALSE,h,44.8516341391951
+Medium,NA,Yes,B,Two,94,TRUE,v,81.0064353048801
+Medium,NA,Yes,B,Two,6,TRUE,v,81.2389509519562
+High,Blue,Yes,C,Three,72,TRUE,g,79.4342321110889
+Medium,NA,Yes,B,Two,86,TRUE,p,43.9831687603146
+Medium,NA,NA,B,Two,86,FALSE,q,75.4475158639252
+High,Blue,Yes,C,Three,39,TRUE,v,62.9221131559461
+High,Blue,Yes,C,Three,31,TRUE,r,71.0182401351631
+Low,NA,Yes,A,One,81,TRUE,q,0.0624773325398564
+Medium,NA,Yes,B,Two,50,TRUE,b,47.5316574098542
+Medium,NA,NA,B,Two,34,FALSE,d,22.0118885161355
+Low,NA,NA,A,One,4,FALSE,m,37.9816537722945
+Medium,NA,Yes,B,Two,13,TRUE,e,61.2771003274247
+Low,NA,NA,A,One,69,FALSE,v,35.1797909243032
+Low,NA,NA,A,One,25,FALSE,s,11.1135424347594
+Medium,NA,NA,B,Two,52,FALSE,y,24.3619472719729
+High,Blue,NA,C,Three,22,FALSE,t,66.805558744818
+High,Blue,Yes,C,Three,89,TRUE,v,41.7646779678762
+Low,NA,NA,A,One,32,FALSE,y,78.8195834029466
+Medium,NA,NA,B,Two,25,FALSE,n,10.2864644257352
+Low,NA,NA,A,One,87,FALSE,y,43.4892741497606
+Medium,NA,Yes,B,Two,35,TRUE,w,98.4956979984418
+Low,NA,Yes,A,One,40,TRUE,c,89.3051114398986
+High,Blue,NA,C,Three,30,FALSE,h,88.6469060787931
+High,Blue,Yes,C,Three,12,TRUE,p,17.5052650272846
+Medium,NA,NA,B,Two,31,FALSE,l,13.0695691565052
+High,Blue,NA,C,Three,30,FALSE,y,65.3101925039664
+Low,NA,Yes,A,One,64,TRUE,n,34.3516472261399
+Medium,NA,NA,B,Two,99,FALSE,c,65.6758127966896
+Medium,NA,NA,B,Two,14,FALSE,n,32.0373242488131
+High,Blue,Yes,C,Three,93,TRUE,g,18.7691119266674
+Medium,NA,Yes,B,Two,96,TRUE,c,78.2294301316142
+Low,NA,NA,A,One,71,FALSE,w,9.35949867125601
+High,Blue,Yes,C,Three,67,TRUE,v,46.677904156968
+High,Blue,Yes,C,Three,23,TRUE,z,51.1505459900945
diff --git a/tests/testthat/data_files/STANDARDISE/std_3_d.rda b/tests/testthat/data_files/STANDARDISE/std_3_d.rda
new file mode 100644
index 0000000000000000000000000000000000000000..4165f09836d3a23ed62cd6ad8e3179416d1a16d5
GIT binary patch
literal 2343
zcmV+?3E1`@iwFP!000001I1bCb{t0)F3m{sLa;;(goG_DAz?{T`(l<PCJq6zkl=vB
zCdSfO7Llc7Mz&7!iwEHud4!1Xs;jF@)zgFG1^b+Py6WrdyDv5Jd#~Nl7jIl-jE&jq
z>WGckuw-lRUw{3jz!(6=R@f@LfakgS;r6E9+{V$%*aBMl51!?bCy!>6k2ZJr52riR
zS@mx9=43ui*cVUm=J{EJ^>|&L%w`8iSFYeO`DF9fWb5#tX1R89WPm3<w-eUVxOk3>
z{ixWFDtx6_msIl`E!eMAd`f&&eX~-%tND(q_sg)FM`?4t9<qN(zxqt=+tC79#ko@{
z^~B!v-gIw{RUQ3jPep%zINROd*}OOHl3?w)iu*>2`^JgPlyh(F?%Yo6@ta7MB=)W8
z_U_#~%?GCxTHD&2%;$+q)-oqwn^&WMvGyN*{={)-!MWo{px^o9Wh%9$GfE=Iwde(9
zwmPKdVEH7IPo6E+`x-C59eO{+Z%FQ<Z|ZaZORr_vGG0UaEu&@UhHM;?b)V^$TkzGr
zyHj??ta>PkeqWLwZ$1yjp?oglQ)5Hl*0P55StjSKG8gf?UmHrQv1K+am;XRnHU3~a
zXOAPDKab!?n$6`!rXJko@NV}j6<%Ki%3)p)`FcMz<cBU6i!My{C2P^etHu^xx<&)*
zA^V5)tIrHw3I}}gK$n4)SKGzacJV9r|IF@(p~xf6_Byl6=$$1q%Pc6V=C|ldKU6_=
zrY_2~ns0sGUxqEJzB(&wxr-`pk$&}=C7Hd*7Y~$V<9GI_?WOzr?b)>5jISRZu*Z@0
z4@IH+XUx;5e#YR3@$%m=b;5re^bSaaE`g{Fe*{vXA1CY?(4Rqn0bNVa2>L5K`Hjy0
z2HFC>0eTj6F7cpue*k?S^fu_-#PdziI_SA{md^bU^jk6q^mWkBK{ma&3Ht}=2cQo?
z-%WC_CAq}&<z&};ph?p6ne@$9lAR>yr^z0Yv6-CxcXD*%^~XdnB>%jd{6R8TlZ<PL
zw*h@M>GWK}{sj6S=ta=CKu;z<w0<S|>Qd65Z2EQb`_m~l7g_)B3aogtV!sHhpNsg^
z*wDANtRa1t$vLacMf~p92DSuSX2UZ1pQq-x+qrhoF6QlGzg=v#i+{F@S-ZH`E<S7*
zZL?T8?D5HmKWP`A_P(0;&g8{zPoLX8K07^CcYB<-dy0MBvm*1k+b;Hcj>;nL^uF5d
zE%M=`9_PNSTRq!v_8fJ)r&v}aM`tqZ%v(L72R)rLlHL1JFSwb{hdssa^%grlJ|Fka
z%zCe~?b|&|r@hbn`E%GilV9!kkQ|+Uj%A(qhWTvuke%MEqn<vWHuX4eXWchhGXtnE
zQ;(+JPyO7pW**Q?ldPE?f;Bz<XRMoXG~4Kc@*URnZR+{dyI)THUuDhw2&|bc)bBSy
zH(4`ZUT4iLp`JlAjb;|jC7KP?`)MZ9+@u*rGxJNVnG;`Q&HSMmNOOt)fX-Pn=e_{C
z0{Sv*<_yhHn#<3#W~R{mq&fBt(EF^JeKeD=gMJTs3G_SAFG0UyO&@D#8qLjLux6Ig
zOrqKL8f#`a&GYl1pRi`0(X71zdWtpu@D<j~%d4!Ji<d!fv1Z27UBeZ3sSVck%clev
zP8sjDR#wMgok?#t6gN(47mO7lnrK_xa3yTCT3aoGajL_u#pf~V0Fti2gO<`c=e4oY
zw|L~*f^TqMD(i&oa1)GhPDEuyl(EI7;3_!dj8WEWt4bV4T~N+h*U6QVOR23<MjH=F
z3163rOK7NUjNZzSk1Osi)V2a2XqDBJ8Xk()MQQOai(LmEf^foVV`8VhgnNPzLMa`T
z3bDXF)N#hc7g2RuTCR*S9w9JFx$I@>IY}dRyn>rE9=L}tCp_TMY?Tt+L4EihE-*R=
zQF3R5j~X%1)@Ey!;zG(0f|Ure&0>}2(!syM!o#i$E^sY0;t`EiGIo3n*G@*`m6y@0
znAI`diGZ{+h=9x5E5|L4K_v}sGy8SrxzPdMF$k^BPE#<}dT*7G2(avo4IJT!UPGd>
zIb*ftK?W&=M^J*vN@^IZy;ewt5Tup`?xPaWTm|JtHdezZ_ytBOI45RxwB^!AA1O28
zjqbcdDnJiQFTCi^!zvUA3<$xYW(qt8WSaJn00S}}xQ;G5;CP?$OGfYz5p@OQy81Da
z!+xWcQxQ%I1rA*t!c9DsEO3wV0dMDnFX~pK;G*(I8VC`tz#|IR;jGZI%Rqw+SIBVW
zt3Wy10;g=!1}>A)c5*G(8YpTLp7XxI9b)RKkD!J+oN@#`37Kz;%wGpi$_wHYU1u%|
zh04|=IRmP_z;PN$BGE*G&g?DP0xHX({Dml-ZxxaUIEorE6gjD^;V0b)k)?K$-C&Un
zNNyn!VjEO<-f^%+05!sH3*2*spcnz)ApdivSoAvyS1JJ?bnVDVj>JJOqZJ`<v-(c)
zs1!7j7Cp^nFFV796%Og`(FtRB9^RLbsjNV5b=@7f^}_myUIO)1fjcNJ71G3^nPePM
z!AX?5Lw}R4+tz^xfyN)aF`-c3Q{|#L!qMp3ojQ>Vh*dsNw+n5bx&$0-;T?zQW}zO5
zkL*Bimk50qLod<2;Bz6VPP>Bklxo6bN@%oA7YiKDwJ_9actRC8>K3yMEvD4c2Tp~l
z!5{6h%PHUi;S10cuM0haYZL>lMH>?xAH-H0r4cVFLN^S6qe4+}h?SRJ=>kV3qIDrd
zkSbYjfV~0``2zVV3mju6qVHJBs`5PMByuMNlt9S*gXKO@T3Ij;T`~kT9wAXd_{PM}
zpTN;EBBmTV#%UZ*qalw7`z|>HqG(X%@InMLSK#CxiGqm%6Tgi*a4QT(4Z6A3#p#IP
zMh|slQCU$?RKOja5S(1O5vdPvqE{%~J-UjHh&r+!R}x9B+!r`95B`N0V(7dd9kf&w
zE{rDIjg=U<p$h<NB^cA?ctms+V2?Se(UQ*c_oUPRH@!c(bMoH~bo@dB&*S*!*1?{v
z_Nv<JYHzB2>xMPrCCiN|H@?^?Q7L#~c5uX7CG(%=j*pN3Jz(p_?aASU-<si*#$NW>
Ne*r@x227A5001-BpXmSq

literal 0
HcmV?d00001

diff --git a/tests/testthat/data_files/molgenis_armadillo-upload_testing_datasets.R b/tests/testthat/data_files/molgenis_armadillo-upload_testing_datasets.R
index 9e129971..e6dac369 100644
--- a/tests/testthat/data_files/molgenis_armadillo-upload_testing_datasets.R
+++ b/tests/testthat/data_files/molgenis_armadillo-upload_testing_datasets.R
@@ -1,4 +1,4 @@
-# 
+#
 # Molgenis' Armadillo - Upload Testing Datasets
 #
 
@@ -15,7 +15,7 @@ upload_testing_dataset_table <- function(project_name, folder_name, table_name,
 MolgenisArmadillo::armadillo.login_basic(armadillo = 'http://127.0.0.1:8080', username = "admin", password = "admin")
 
 if (! 'datashield' %in% MolgenisArmadillo::armadillo.list_projects())
-    MolgenisArmadillo::armadillo.create_project('datashield')
+    MolgenisArmadillo::armadillo.create_project('datashield', overwrite_existing = "no")
 
 upload_testing_dataset_table('datashield', 'anthro', 'anthro1', 'ANTHRO/anthro1.rda')
 upload_testing_dataset_table('datashield', 'anthro', 'anthro2', 'ANTHRO/anthro2.rda')
@@ -69,4 +69,12 @@ upload_testing_dataset_table('datashield', 'testing', 'DATASET1', 'TESTING/DATAS
 upload_testing_dataset_table('datashield', 'testing', 'DATASET2', 'TESTING/DATASET2.rda')
 upload_testing_dataset_table('datashield', 'testing', 'DATASET3', 'TESTING/DATASET3.rda')
 
+upload_testing_dataset_table('datashield', 'standardise', 'std_1', 'STANDARDISE/std_1.rda')
+upload_testing_dataset_table('datashield', 'standardise', 'std_2', 'STANDARDISE/std_2.rda')
+upload_testing_dataset_table('datashield', 'standardise', 'std_3', 'STANDARDISE/std_3.rda')
+
+upload_testing_dataset_table('datashield', 'standardise', 'std_1_d', 'STANDARDISE/std_1.rda')
+upload_testing_dataset_table('datashield', 'standardise', 'std_2_d', 'STANDARDISE/std_2.rda')
+upload_testing_dataset_table('datashield', 'standardise', 'std_3_d', 'STANDARDISE/std_3.rda')
+
 print(MolgenisArmadillo::armadillo.list_tables('datashield'))
diff --git a/tests/testthat/data_files/obiba_opal-upload_testing_datasets.R b/tests/testthat/data_files/obiba_opal-upload_testing_datasets.R
index ae79d2e6..1fce513b 100644
--- a/tests/testthat/data_files/obiba_opal-upload_testing_datasets.R
+++ b/tests/testthat/data_files/obiba_opal-upload_testing_datasets.R
@@ -1,4 +1,4 @@
-# 
+#
 # Obiba's Opal - Upload Testing Datasets
 #
 
@@ -9,11 +9,11 @@ library(tibble)
 upload_testing_dataset_table <- function(opal, project_name, table_name, local_file_path) {
     if (! opal.project_exists(opal, project_name))
         opal.project_create(opal, project_name, database = "mongodb")
-  
+
     dataset_name <- load(file = local_file_path)
     dataset      <- eval(as.symbol(dataset_name))
     data         <- as_tibble(dataset, rownames = '_row_id_')
-  
+
     opal.table_save(opal, data, project_name, table_name, id.name = "_row_id_", force = TRUE)
 }
 
@@ -72,4 +72,14 @@ upload_testing_dataset_table(opal, 'TESTING', 'DATASET1', 'TESTING/DATASET1.rda'
 upload_testing_dataset_table(opal, 'TESTING', 'DATASET2', 'TESTING/DATASET2.rda')
 upload_testing_dataset_table(opal, 'TESTING', 'DATASET3', 'TESTING/DATASET3.rda')
 
+upload_testing_dataset_table('STANDARDISE', 'std_1', 'STANDARDISE/std_1.rda')
+upload_testing_dataset_table('STANDARDISE', 'std_2', 'STANDARDISE/std_2.rda')
+upload_testing_dataset_table('STANDARDISE', 'std_3', 'STANDARDISE/std_3.rda')
+
+upload_testing_dataset_table('STANDARDISE', 'std_1_d', 'STANDARDISE/std_1_d.rda')
+upload_testing_dataset_table('STANDARDISE', 'std_2_d', 'STANDARDISE/std_2_d.rda')
+upload_testing_dataset_table('STANDARDISE', 'std_3_d', 'STANDARDISE/std_3_d.rda')
+
+
+
 opal.logout(opal)

From d6153ec7b5da5ea1c799f68d50871e2db5df354c Mon Sep 17 00:00:00 2001
From: Tim Cadman <41470917+timcadman@users.noreply.github.com>
Date: Tue, 7 Oct 2025 15:25:14 +0200
Subject: [PATCH 17/18] helper function to load extra data

---
 .../init_studies_datasets.R                   | 36 +++++++++++++++++++
 1 file changed, 36 insertions(+)

diff --git a/tests/testthat/connection_to_datasets/init_studies_datasets.R b/tests/testthat/connection_to_datasets/init_studies_datasets.R
index e7e412ab..b95ed48a 100644
--- a/tests/testthat/connection_to_datasets/init_studies_datasets.R
+++ b/tests/testthat/connection_to_datasets/init_studies_datasets.R
@@ -249,6 +249,34 @@ init.studies.dataset.stand <- function(variables)
   }
 }
 
+init.studies.dataset.stand_disclosure <- function(variables)
+{
+  if (ds.test_env$secure_login_details)
+  {
+    if (ds.test_env$driver == "OpalDriver")
+    {
+      builder <- DSI::newDSLoginBuilder(.silent = TRUE)
+      builder$append(server = "sim1", url = ds.test_env$ip_address_1, user = ds.test_env$user_1, password = ds.test_env$password_1, table = "STANDARDISE.std_1_d", options=ds.test_env$options_1)
+      builder$append(server = "sim2", url = ds.test_env$ip_address_2, user = ds.test_env$user_2, password = ds.test_env$password_2, table = "STANDARDISE.std_2_d", options=ds.test_env$options_2)
+      builder$append(server = "sim3", url = ds.test_env$ip_address_3, user = ds.test_env$user_3, password = ds.test_env$password_3, table = "STANDARDISE.std_3_d", options=ds.test_env$options_3)
+      ds.test_env$login.data <- builder$build()
+    }
+    else if (ds.test_env$driver == "ArmadilloDriver")
+    {
+      builder <- DSI::newDSLoginBuilder(.silent = TRUE)
+      builder$append(server = "sim1", url = ds.test_env$ip_address_1, user = ds.test_env$user_1, password = ds.test_env$password_1, table = "datashield/standardise/std_1_d", driver = ds.test_env$driver)
+      builder$append(server = "sim2", url = ds.test_env$ip_address_2, user = ds.test_env$user_2, password = ds.test_env$password_2, table = "datashield/standardise/std_2_d", driver = ds.test_env$driver)
+      builder$append(server = "sim3", url = ds.test_env$ip_address_3, user = ds.test_env$user_3, password = ds.test_env$password_3, table = "datashield/standardise/std_3_d", driver = ds.test_env$driver)
+      ds.test_env$login.data <- builder$build()
+    }
+    else
+    {
+      ds.test_env$login.data <- DSLite::setupCNSIMTest("dsBase", env = ds.test_env)
+    }
+    ds.test_env$stats.var <- variables
+  }
+}
+
 
 connect.studies.dataset.cnsim <- function(variables)
 {
@@ -314,6 +342,14 @@ connect.studies.dataset.stand <- function(variables)
   log.in.data.server()
 }
 
+connect.studies.dataset.stand_disclosure <- function(variables)
+{
+  log.out.data.server()
+  source("connection_to_datasets/login_details.R")
+  init.studies.dataset.stand_disclosure(variables)
+  log.in.data.server()
+}
+
 disconnect.studies.dataset.cnsim <- function()
 {
   log.out.data.server()

From a046eb4a0fb47aeb780f1f884f02cc603fff2349 Mon Sep 17 00:00:00 2001
From: Tim Cadman <41470917+timcadman@users.noreply.github.com>
Date: Tue, 7 Oct 2025 15:25:37 +0200
Subject: [PATCH 18/18] test: test that disclosive levels are not returned

---
 tests/testthat/test-smk-standardiseDf.R | 24 ++++++++++++++++++++----
 1 file changed, 20 insertions(+), 4 deletions(-)

diff --git a/tests/testthat/test-smk-standardiseDf.R b/tests/testthat/test-smk-standardiseDf.R
index 765d655c..d557c325 100644
--- a/tests/testthat/test-smk-standardiseDf.R
+++ b/tests/testthat/test-smk-standardiseDf.R
@@ -686,11 +686,27 @@ test_that("ds.standardiseDf doesn't run if levels are not identical and fix_clas
   )
 })
 
+test_that("ds.standardiseDf doesn't run if a factor variable has too many levels", {
+  connect.studies.dataset.stand_disclosure(
+    c(
+      "fac_col1", "fac_col2", "fac_col3", "fac_col4", "fac_col5", "fac_col6", "fac_col7", "fac_col9",
+      "fac_col10", "col11", "col12", "col13", "col14", "col15", "col16", "col17", "col18", "col19",
+      "col20")
+  )
 
-## Add disclosure check levels
-## Push change to dsBase
-
-
+  expect_error(
+    with_mocked_bindings(
+      ds.standardiseDf(
+        df = "D",
+        newobj = "test_fill"
+        ),
+      prompt_user_class_decision_all_vars = function(var, server, classes, newobj, datasources) "1",
+      ask_question_wait_response_levels = function(levels_conflict, newobj, datasources) "2"
+      )
+  )
 
+})
 
+disconnect.studies.dataset.stand()
 
+context("ds.standardiseDf::smk::done")