From 17cebc056909e7c715cf7243b8704ac762a45ef8 Mon Sep 17 00:00:00 2001
From: Roberto Villegas-Diaz <villegar@users.noreply.github.com>
Date: Sat, 20 Sep 2025 18:30:10 +0100
Subject: [PATCH 1/8] Update workflow to avoid relying on external PAT
 (secrets.GH_TOKEN) and create an artifact with test suite ouputs, instead of
 pushing to testStatus. Additionally, a new workflow was added to testStatus
 to 'consume' these artifacts

---
 .github/workflows/dsBase_test_suite.yaml | 48 ++++++++++--------------
 1 file changed, 20 insertions(+), 28 deletions(-)
 mode change 100644 => 100755 .github/workflows/dsBase_test_suite.yaml

diff --git a/.github/workflows/dsBase_test_suite.yaml b/.github/workflows/dsBase_test_suite.yaml
old mode 100644
new mode 100755
index 44020e83..8f03b10f
--- a/.github/workflows/dsBase_test_suite.yaml
+++ b/.github/workflows/dsBase_test_suite.yaml
@@ -37,6 +37,7 @@ jobs:
       BRANCH_NAME: ${{ github.ref_name }}
       REPO_OWNER: ${{ github.repository_owner }}
       R_KEEP_PKG_SOURCE: yes
+      GITHUB_TOKEN: ${{ github.token || 'placeholder-token' }}
 
     steps:
       - name: Checkout dsBase
@@ -45,12 +46,14 @@ jobs:
           path: dsBase
 
       - name: Checkout testStatus
+        if: ${{ github.actor != 'nektos/act' }} # for local deployment only
         uses: actions/checkout@v4
         with:
           repository: ${{ env.REPO_OWNER }}/testStatus
-          token: ${{ secrets.GH_TOKEN }}
           ref: master
           path: testStatus
+          persist-credentials: false
+          token: ${{ env.GITHUB_TOKEN }}
 
       - uses: r-lib/actions/setup-pandoc@v2
 
@@ -156,44 +159,33 @@ jobs:
         run: |
           Rscript --verbose --vanilla ../testStatus/source/parse_test_report.R logs/
         working-directory: dsBase
-
-      - name: Commit results to testStatus
+        
+      - name: Render report
         run: |
-          git config --global user.email "github-actions[bot]@users.noreply.github.com"
-          git config --global user.name "github-actions[bot]"
           cd testStatus
           
-          # Reconfigure remote to use GitHub token for authentication
-          git remote set-url origin https://x-access-token:${{ secrets.GITHUB_TOKEN }}@github.com/${{ env.REPO_OWNER }}/testStatus.git
-          git checkout master
-          git pull origin master
-
-          mkdir -p logs/${{ env.PROJECT_NAME }}/${{ env.BRANCH_NAME }}/${{ env.WORKFLOW_ID }}/
-          mkdir -p docs/${{ env.PROJECT_NAME }}/${{ env.BRANCH_NAME }}/${{ env.WORKFLOW_ID }}/
-          mkdir -p docs/${{ env.PROJECT_NAME }}/${{ env.BRANCH_NAME }}/latest/
-          # clear the latest directory 
-          rm -rf docs/${{ env.PROJECT_NAME }}/${{ env.BRANCH_NAME }}/latest/*
+          mkdir -p new/logs/${{ env.PROJECT_NAME }}/${{ env.BRANCH_NAME }}/${{ env.WORKFLOW_ID }}/
+          mkdir -p new/docs/${{ env.PROJECT_NAME }}/${{ env.BRANCH_NAME }}/${{ env.WORKFLOW_ID }}/
+          mkdir -p new/docs/${{ env.PROJECT_NAME }}/${{ env.BRANCH_NAME }}/latest/
           
           # Copy logs to new logs directory location
-          cp -rv ../dsBase/logs/* logs/${{ env.PROJECT_NAME }}/${{ env.BRANCH_NAME }}/${{ env.WORKFLOW_ID }}/
-          cp -rv ../dsBase/logs/${{ env.WORKFLOW_ID }}.txt logs/${{ env.PROJECT_NAME }}/${{ env.BRANCH_NAME }}/${{ env.WORKFLOW_ID }}/
+          cp -rv ../${{ env.PROJECT_NAME }}/logs/* new/logs/${{ env.PROJECT_NAME }}/${{ env.BRANCH_NAME }}/${{ env.WORKFLOW_ID }}/
+          cp -rv ../${{ env.PROJECT_NAME }}/logs/${{ env.WORKFLOW_ID }}.txt new/logs/${{ env.PROJECT_NAME }}/${{ env.BRANCH_NAME }}/${{ env.WORKFLOW_ID }}/
           
-          # Create symbolic links
-          ln -sf ${{ env.WORKFLOW_ID }}/ logs/${{ env.PROJECT_NAME }}/${{ env.BRANCH_NAME }}/.LATEST
-          # ln -sf docs/${{ env.PROJECT_NAME }}/${{ env.BRANCH_NAME }}/${{ env.WORKFLOW_ID }}/ docs/${{ env.PROJECT_NAME }}/${{ env.BRANCH_NAME }}/${{ env.WORKFLOW_ID }}/latest
-          
-          R -e 'input_dir <- file.path("../logs", Sys.getenv("PROJECT_NAME"), Sys.getenv("BRANCH_NAME"), Sys.getenv("WORKFLOW_ID")); quarto::quarto_render("source/test_report.qmd", execute_params = list(input_dir = input_dir))'
-          mv source/test_report.html docs/${{ env.PROJECT_NAME }}/${{ env.BRANCH_NAME }}/${{ env.WORKFLOW_ID }}/index.html
-          cp -r docs/${{ env.PROJECT_NAME }}/${{ env.BRANCH_NAME }}/${{ env.WORKFLOW_ID }}/* docs/${{ env.PROJECT_NAME }}/${{ env.BRANCH_NAME }}/latest
-
-          git add .
-          git commit -m "Auto test for ${{ env.PROJECT_NAME }}/${{ env.BRANCH_NAME }} @ ${{ env.WORKFLOW_ID }}" || echo "No changes to commit"
-          git push origin master
+          R -e 'input_dir <- file.path("../new/logs", Sys.getenv("PROJECT_NAME"), Sys.getenv("BRANCH_NAME"), Sys.getenv("WORKFLOW_ID")); quarto::quarto_render("source/test_report.qmd", execute_params = list(input_dir = input_dir))'
+          mv source/test_report.html new/docs/${{ env.PROJECT_NAME }}/${{ env.BRANCH_NAME }}/${{ env.WORKFLOW_ID }}/index.html
+          cp -r new/docs/${{ env.PROJECT_NAME }}/${{ env.BRANCH_NAME }}/${{ env.WORKFLOW_ID }}/* new/docs/${{ env.PROJECT_NAME }}/${{ env.BRANCH_NAME }}/latest
           
         env:
           PROJECT_NAME: ${{ env.PROJECT_NAME }}
           BRANCH_NAME: ${{ env.BRANCH_NAME }}
           WORKFLOW_ID: ${{ env.WORKFLOW_ID }}
+        
+      - name: Upload test logs
+        uses: actions/upload-artifact@v4
+        with:
+          name: dsbase-logs
+          path: testStatus/new
 
       - name: Dump environment info
         run: |

From f49bc73aa72a386b1eda946b1aeb4c3741ef8fac Mon Sep 17 00:00:00 2001
From: Tim Cadman <41470917+timcadman@users.noreply.github.com>
Date: Tue, 7 Oct 2025 12:33:19 +0200
Subject: [PATCH 2/8] added privacy level check

---
 R/standardiseDfDS.R | 103 ++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 103 insertions(+)
 create mode 100644 R/standardiseDfDS.R

diff --git a/R/standardiseDfDS.R b/R/standardiseDfDS.R
new file mode 100644
index 00000000..1a8b79fe
--- /dev/null
+++ b/R/standardiseDfDS.R
@@ -0,0 +1,103 @@
+#' Get the Class of All Columns in a Data Frame
+#' @param df.name A string representing the name of the data frame.
+#' @return A tibble with the class of each column in the data frame.
+#' @importFrom dplyr %>%
+#' @importFrom tibble as_tibble
+#' @importFrom purrr map
+#' @export
+getClassAllColsDS <- function(df.name){
+  dsBase::checkPermissivePrivacyControlLevel(c('permissive', 'banana', 'carrot'))
+  
+  df.name <- eval(parse(text = df.name), envir = parent.frame())
+  all_classes <- map(df.name, class) %>% as_tibble()
+  return(all_classes)
+}
+
+#' Change Class of Target Variables in a Data Frame
+#' @param df.name A string representing the name of the data frame.
+#' @param target_vars A character vector specifying the columns to be modified.
+#' @param target_class A character vector specifying the new classes for each column (1 = factor,
+#' 2 = integer, 3 = numeric, 4 = character, 5 = logical).
+#' @return A modified data frame with the specified columns converted to the target classes.
+#' @importFrom dplyr mutate across
+#' @importFrom tidyselect all_of
+#' @export
+fixClassDS <- function(df.name, target_vars, target_class) {
+  dsBase::checkPermissivePrivacyControlLevel(c('permissive', 'banana', 'carrot'))
+  
+  df <- eval(parse(text = df.name), envir = parent.frame())
+  df_transformed <- df %>%
+    mutate(
+      across(all_of(target_vars),
+             ~ .convertClass(.x, target_class[which(target_vars == cur_column())])))
+  return(df_transformed)
+}
+
+#' Convert a Vector to a Specified Class
+#' @param x The vector to be converted.
+#' @param class_name A string indicating the target class (1 = factor, 2 = integer, 3 = numeric,
+#' 4 = character, 5 = logical).
+#' @return The converted vector.
+#' @noRd
+.convertClass <- function(target_var, target_class_code) {
+  switch(target_class_code,
+         "1" = as.factor(target_var),
+         "2" = as.integer(target_var),
+         "3" = as.numeric(target_var),
+         "4" = as.character(target_var),
+         "5" = as.logical(target_var)
+  )
+}
+
+#' Add Missing Columns with NA Values
+#' @param .data A string representing the name of the data frame.
+#' @param cols A character vector specifying the columns to be added if missing.
+#' @return A modified data frame with missing columns added and filled with NA.
+#' @importFrom dplyr mutate select
+#' @importFrom tidyselect peek_vars
+#' @importFrom purrr set_names
+#' @export
+fixColsDS <- function(.data, cols) {
+  dsBase::checkPermissivePrivacyControlLevel(c('permissive', 'banana', 'carrot'))
+  
+  .data <- eval(parse(text = .data), envir = parent.frame())
+  missing <- setdiff(cols, colnames(.data))
+  out <- .data %>%
+    mutate(!!!set_names(rep(list(NA), length(missing)), missing)) %>%
+    select(sort(peek_vars()))
+  return(out)
+}
+
+#' Retrieve Factor Levels for Specific Columns
+#' @param df.name A string representing the name of the data frame.
+#' @param factor_vars A character vector specifying the factor columns.
+#' @return A list of factor levels for the specified columns.
+#' @importFrom tidyselect all_of
+#' @importFrom purrr map
+#' @export
+getAllLevelsDS <- function(df.name, factor_vars) {
+  dsBase::checkPermissivePrivacyControlLevel(c('permissive', 'banana', 'carrot'))
+  
+  df <- eval(parse(text = df.name), envir = parent.frame())
+  factor_vars_split <- strsplit(factor_vars, ",\\s*")[[1]]
+  levels <- df %>% dplyr::select(all_of(factor_vars_split)) %>% map(levels)
+  browser()
+  return()
+}
+
+
+
+
+#' Set Factor Levels for Specific Columns in a Data Frame
+#' @param df.name A string representing the name of the data frame to modify.
+#' @param vars A character vector specifying the columns to be modified.
+#' @param levels A named list where each element contains the levels for the corresponding factor variable.
+#' @return A modified data frame with the specified columns converted to factors with the provided levels.
+#' @export
+fixLevelsDS <- function(df.name, vars, levels) {
+  dsBase::checkPermissivePrivacyControlLevel(c('permissive', 'banana', 'carrot'))
+  
+  df.name <- eval(parse(text = df.name), envir = parent.frame())
+  out <- df.name %>%
+    mutate(across(all_of(vars), ~factor(., levels = levels[[dplyr::cur_column()]])))
+}

From 9d82aaec380a91d44aa2209e89950d0bb41d3264 Mon Sep 17 00:00:00 2001
From: Tim Cadman <41470917+timcadman@users.noreply.github.com>
Date: Tue, 7 Oct 2025 12:54:47 +0200
Subject: [PATCH 3/8] add disclosure check when levels are returned

---
 R/standardiseDfDS.R | 25 +++++++++++++++++++++----
 1 file changed, 21 insertions(+), 4 deletions(-)

diff --git a/R/standardiseDfDS.R b/R/standardiseDfDS.R
index 1a8b79fe..1a8dab72 100644
--- a/R/standardiseDfDS.R
+++ b/R/standardiseDfDS.R
@@ -81,12 +81,29 @@ getAllLevelsDS <- function(df.name, factor_vars) {
   df <- eval(parse(text = df.name), envir = parent.frame())
   factor_vars_split <- strsplit(factor_vars, ",\\s*")[[1]]
   levels <- df %>% dplyr::select(all_of(factor_vars_split)) %>% map(levels)
-  browser()
-  return()
+  
+  disclosure_check <- imap(levels, function(lvls, var) {
+    .checkLevelsDisclosure(df.name = df.name, var = var, levels = lvls)
+  })
+  
+  failed <- paste(names(disclosure_check)[unlist(disclosure_check)], collapse = ", ")
+  
+  if(length(failed) > 0) {
+  stop("Based on the value of nfilter.levels.density, these factor variables", " {", failed, "} ", "have too many levels compared to the length of the variable. Please reduce the numnber of levels or change the variable type and try again")
+  } else {
+    return(levels)  
+  }
 }
 
-
-
+.checkLevelsDisclosure <- function(df.name, var, levels) {
+  
+  thr <- dsBase::listDisclosureSettingsDS()                        
+  nfilter.levels.density <- as.numeric(thr$nfilter.levels.density)
+  n_levels <- length(levels)
+  length_var <- length(get(df.name)[[var]])
+  fail <- (length_var * nfilter.levels.density) < n_levels
+  return(fail)
+}
 
 #' Set Factor Levels for Specific Columns in a Data Frame
 #' @param df.name A string representing the name of the data frame to modify.

From 5dd562152e9d4fd72b8539b62077c638031bb71d Mon Sep 17 00:00:00 2001
From: Tim Cadman <41470917+timcadman@users.noreply.github.com>
Date: Tue, 7 Oct 2025 13:36:48 +0200
Subject: [PATCH 4/8] fixed environment scoping issue

---
 R/standardiseDfDS.R | 31 ++++++++++++++++++++++---------
 1 file changed, 22 insertions(+), 9 deletions(-)

diff --git a/R/standardiseDfDS.R b/R/standardiseDfDS.R
index 1a8dab72..b3ffcd1e 100644
--- a/R/standardiseDfDS.R
+++ b/R/standardiseDfDS.R
@@ -73,7 +73,7 @@ fixColsDS <- function(.data, cols) {
 #' @param factor_vars A character vector specifying the factor columns.
 #' @return A list of factor levels for the specified columns.
 #' @importFrom tidyselect all_of
-#' @importFrom purrr map
+#' @importFrom purrr map imap
 #' @export
 getAllLevelsDS <- function(df.name, factor_vars) {
   dsBase::checkPermissivePrivacyControlLevel(c('permissive', 'banana', 'carrot'))
@@ -83,24 +83,37 @@ getAllLevelsDS <- function(df.name, factor_vars) {
   levels <- df %>% dplyr::select(all_of(factor_vars_split)) %>% map(levels)
   
   disclosure_check <- imap(levels, function(lvls, var) {
-    .checkLevelsDisclosure(df.name = df.name, var = var, levels = lvls)
+    .checkLevelsDisclosure(df = df, var = var, levels = lvls)
   })
   
-  failed <- paste(names(disclosure_check)[unlist(disclosure_check)], collapse = ", ")
+  failed_vars <- names(disclosure_check)[unlist(disclosure_check)]
   
-  if(length(failed) > 0) {
-  stop("Based on the value of nfilter.levels.density, these factor variables", " {", failed, "} ", "have too many levels compared to the length of the variable. Please reduce the numnber of levels or change the variable type and try again")
+  if(length(failed_vars) > 0) {
+  stop("Based on the value of nfilter.levels.density, these factor variables", " {", failed_vars, "} ", "have too many levels compared to the length of the variable. Please reduce the numnber of levels or change the variable type and try again")
   } else {
     return(levels)  
   }
 }
 
-.checkLevelsDisclosure <- function(df.name, var, levels) {
-  
-  thr <- dsBase::listDisclosureSettingsDS()                        
+#' Check variable levels against disclosure thresholds
+#'
+#' Internal helper function to verify whether the number of levels in a variable
+#' exceeds the allowed density threshold defined by `dsBase::listDisclosureSettingsDS()`.
+#'
+#' @param df A data frame containing the variable.
+#' @param var Character string. Name of the variable to check.
+#' @param levels Character vector. Levels of the variable.
+#'
+#' @return Logical. `TRUE` if the check fails (i.e., disclosure threshold is violated),
+#'   otherwise `FALSE`.
+#'
+#' @keywords internal
+#' @noRd
+.checkLevelsDisclosure <- function(df, var, levels) {
+  thr <- dsBase::listDisclosureSettingsDS()
   nfilter.levels.density <- as.numeric(thr$nfilter.levels.density)
   n_levels <- length(levels)
-  length_var <- length(get(df.name)[[var]])
+  length_var <- length(df[[var]])
   fail <- (length_var * nfilter.levels.density) < n_levels
   return(fail)
 }

From 7a4d418d51863b52fca08ad04dacb8f864b76eab Mon Sep 17 00:00:00 2001
From: Tim Cadman <41470917+timcadman@users.noreply.github.com>
Date: Tue, 7 Oct 2025 14:18:54 +0200
Subject: [PATCH 5/8] explitly use base levels

---
 R/standardiseDfDS.R | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/R/standardiseDfDS.R b/R/standardiseDfDS.R
index b3ffcd1e..24e3eecf 100644
--- a/R/standardiseDfDS.R
+++ b/R/standardiseDfDS.R
@@ -80,7 +80,7 @@ getAllLevelsDS <- function(df.name, factor_vars) {
   
   df <- eval(parse(text = df.name), envir = parent.frame())
   factor_vars_split <- strsplit(factor_vars, ",\\s*")[[1]]
-  levels <- df %>% dplyr::select(all_of(factor_vars_split)) %>% map(levels)
+  levels <- purrr::map(df[factor_vars_split], base::levels)
   
   disclosure_check <- imap(levels, function(lvls, var) {
     .checkLevelsDisclosure(df = df, var = var, levels = lvls)

From 5aad33006456c2a1f5050a100079c086f81a834b Mon Sep 17 00:00:00 2001
From: Tim Cadman <41470917+timcadman@users.noreply.github.com>
Date: Tue, 7 Oct 2025 14:19:10 +0200
Subject: [PATCH 6/8] add test for disclosure check

---
 tests/testthat/test-smk-standardiseDfDS.R | 171 ++++++++++++++++++++++
 1 file changed, 171 insertions(+)
 create mode 100644 tests/testthat/test-smk-standardiseDfDS.R

diff --git a/tests/testthat/test-smk-standardiseDfDS.R b/tests/testthat/test-smk-standardiseDfDS.R
new file mode 100644
index 00000000..4397fe65
--- /dev/null
+++ b/tests/testthat/test-smk-standardiseDfDS.R
@@ -0,0 +1,171 @@
+#-------------------------------------------------------------------------------
+# Copyright (c) 2025 University Medical Center Groningen (UCMG), Netherlands. All rights reserved.
+#
+# This program and the accompanying materials
+# are made available under the terms of the GNU Public License v3.0.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program.  If not, see <http://www.gnu.org/licenses/>.
+#-------------------------------------------------------------------------------
+
+#
+# Set up
+#
+
+context("standardiseDfDS::smk::setup")
+
+df <- create_mixed_dataframe()
+df_list <- create_additional_dataframes(df)
+
+df_1 <- df
+df_2 <- df_list[[1]]
+df_3 <- df_list[[2]]
+df_4 <- df_list[[3]]
+
+context("standardiseDfDS::smk")
+test_that("getClassAllColsDS returns correct classes", {
+  expect_equal(
+    getClassAllColsDS("df_1"),
+    tibble(
+      fac_col1 = "factor", fac_col2 = "factor", fac_col3 = "factor", fac_col4 = "factor", fac_col5 = "factor",
+      fac_col6 = "factor", fac_col7 = "factor", fac_col8 = "factor", fac_col9 = "factor", fac_col10 = "factor",
+      fac_col11 = "factor", fac_col12 = "factor", fac_col13 = "factor", fac_col14 = "factor", fac_col15 = "factor",
+      col16 = "integer", col17 = "integer", col18 = "numeric", col19 = "numeric", col20 = "character",
+      col21 = "character", col22 = "integer", col23 = "numeric", col24 = "character", col25 = "integer",
+      col26 = "numeric", col27 = "character", col28 = "integer", col29 = "numeric", col30 = "character"
+    )
+  )
+})
+
+test_that("fixClassDS sets classes correctly", {
+
+  cols_to_set <- c("fac_col13", "fac_col5", "col22", "col19", "col25", "col20", "col28",
+                   "fac_col14", "fac_col3", "fac_col8")
+
+  classes_to_set <- c("4", "1", "3", "5", "3", "2", "5", "5", "3", "2")
+
+  expect_warning(
+    classes_changed_df <- fixClassDS("df_1", cols_to_set, classes_to_set)
+  )
+
+  expect_equal(
+    classes_changed_df %>% purrr::map_chr(class) %>% unname(),
+    c("factor", "factor", "numeric", "factor", "factor", "factor", "factor", "integer", "factor",
+      "factor", "factor", "factor", "character", "logical", "factor", "integer", "integer",
+      "numeric", "logical", "integer", "character", "numeric", "numeric", "character", "numeric",
+      "numeric", "character", "logical", "numeric", "character")
+  )
+})
+
+test_that("convert_class calls the correct function", {
+
+  result <- .convertClass(c(1, 2, 3), "1")
+  expect_true(is.factor(result))
+
+  result <- .convertClass(c(1.5, 2.5, 3.7), "2")
+  expect_true(is.integer(result))
+
+  result <- .convertClass(c("1", "2", "3"), "3")
+  expect_true(is.numeric(result))
+
+  result <- .convertClass(c(1, 2, 3), "4")
+  expect_true(is.character(result))
+
+  result <- .convertClass(c(0, 1, 0), "5")
+  expect_true(is.logical(result))
+
+})
+
+test_that("fixColsDS correctly adds missing columns", {
+
+  all_cols <- unique(c(colnames(df_1), colnames(df_2), colnames(df_3), colnames(df_4)))
+  out <- fixColsDS("df_3", all_cols)
+
+  expect_equal(
+    colnames(out),
+    sort(all_cols))
+
+})
+
+test_that("getAllLevelsDS correctly retrieves the levels of specified factor columns", {
+
+  factor_vars <- "fac_col1, fac_col2, fac_col3, fac_col4, fac_col5, fac_col6, fac_col7, fac_col8, fac_col9, fac_col10, fac_col11, fac_col12, fac_col14, fac_col15, col27"
+
+  observed <- getAllLevelsDS("df_3", factor_vars)
+
+    expected <- list(
+      fac_col1 = c("High", "Low", "Medium"),
+      fac_col2 = c("Blue", "Green", "Red"),
+      fac_col3 = c("No", "Yes"),
+      fac_col4 = c("A", "B", "C"),
+      fac_col5 = c("One", "Three", "Two"),
+      fac_col6 = c("Bird", "Cat", "Dog"),
+      fac_col7 = c("Large", "Medium", "Small"),
+      fac_col8 = c("Alpha", "Beta", "Gamma"),
+      fac_col9 = c("False", "True"),
+      fac_col10 = c("Left", "Right"),
+      fac_col11 = c("East", "North", "South", "West"),
+      fac_col12 = c("Day", "Night"),
+      fac_col14 = c("Female", "Male"),
+      fac_col15 = c("Fall", "Spring", "Summer", "Winter"),
+      col27 = letters
+    )
+
+  expect_equal(expected, observed)
+
+})
+
+example_df <- data.frame(
+  col1 = c("A", "B", "A", "C"),
+  col2 = c("X", "Y", "X", "Z"),
+  col3 = c("Yes", "No", "Yes", "No"),
+  stringsAsFactors = FALSE
+)
+
+test_that("fixLevelsDS sets factor levels correctly", {
+
+  levels <- list(
+    col1 = c("A", "B", "C"),
+    col2 = c("X", "Y", "Z"),
+    col3 = c("Yes", "No")
+  )
+
+  modified_df <- fixLevelsDS("example_df", c("col1", "col2", "col3"), levels)
+
+  expect_s3_class(modified_df$col1, "factor")
+  expect_s3_class(modified_df$col2, "factor")
+  expect_s3_class(modified_df$col3, "factor")
+
+  expect_equal(levels(modified_df$col1), levels$col1)
+  expect_equal(levels(modified_df$col2), levels$col2)
+  expect_equal(levels(modified_df$col3), levels$col3)
+
+})
+
+test_that("fixLevelsDS throws an error for invalid input", {
+
+  levels <- list(
+    col1 = c("A", "B", "C"),
+    col2 = c("X", "Y", "Z")
+  )
+
+  expect_error(fixLevelsDS("example_df", c("col1", "non_existent_col"), levels))
+})
+
+test_that("getAllLevelsDS stops when a variable exceeds disclosure threshold", {
+  
+  mtcars_bad_group <- mtcars %>% mutate(qsec = as.factor(qsec))
+
+  expect_error(
+    getAllLevelsDS("mtcars_bad_group", "qsec"),
+    regexp = "Based on the value of nfilter.levels.density"
+  )
+})
+
+#
+# Done
+#
+
+context("standardiseDfDS::smk::shutdown")
+
+context("standardiseDfDS::smk::done")

From cd876c6f8cb7e50282249a97f0dae2b0b9d0398c Mon Sep 17 00:00:00 2001
From: Tim Cadman <41470917+timcadman@users.noreply.github.com>
Date: Tue, 7 Oct 2025 14:19:42 +0200
Subject: [PATCH 7/8] export functions

---
 NAMESPACE       |  9 +++++++++
 inst/DATASHIELD | 12 +++++++++++-
 2 files changed, 20 insertions(+), 1 deletion(-)

diff --git a/NAMESPACE b/NAMESPACE
index 897148d1..09705258 100644
--- a/NAMESPACE
+++ b/NAMESPACE
@@ -140,3 +140,12 @@ import(gamlss.dist)
 import(mice)
 importFrom(gamlss.dist,pST3)
 importFrom(gamlss.dist,qST3)
+<<<<<<< HEAD
+=======
+importFrom(purrr,imap)
+importFrom(purrr,map)
+importFrom(purrr,set_names)
+importFrom(tibble,as_tibble)
+importFrom(tidyselect,all_of)
+importFrom(tidyselect,peek_vars)
+>>>>>>> acb2b95 (export functions)
diff --git a/inst/DATASHIELD b/inst/DATASHIELD
index c9dd9390..804f4780 100644
--- a/inst/DATASHIELD
+++ b/inst/DATASHIELD
@@ -69,7 +69,9 @@ AggregateMethods:
     is.null=base::is.null,
     is.numeric=base::is.numeric,
     NROW=base::NROW,
-    t.test=stats::t.test
+    t.test=stats::t.test, 
+    getClassAllColsDS,
+    getAllLevelsDS
 AssignMethods:
     absDS,
     asCharacterDS,
@@ -160,6 +162,7 @@ AssignMethods:
     acos=base::acos,
     atan=base::atan,
     sum=base::sum,
+<<<<<<< HEAD
     unlist=base::unlist
 Options:
     datashield.privacyLevel=5,
@@ -173,3 +176,10 @@ Options:
     default.nfilter.noise=0.25,
     default.nfilter.levels.density=0.33,
     default.nfilter.levels.max=40
+=======
+    unlist=base::unlist, 
+    fixClassDS,
+    fixColsDS,
+    fixLevelsDS
+    
+>>>>>>> acb2b95 (export functions)

From 54150425de192c42688d80d66e49b6aff160493f Mon Sep 17 00:00:00 2001
From: Tim Cadman <41470917+timcadman@users.noreply.github.com>
Date: Tue, 7 Oct 2025 14:20:06 +0200
Subject: [PATCH 8/8] test: explicitly use namespace

---
 tests/testthat/helper.R | 162 ++++++++++++++++++++++++++++++++++++++++
 1 file changed, 162 insertions(+)
 create mode 100644 tests/testthat/helper.R

diff --git a/tests/testthat/helper.R b/tests/testthat/helper.R
new file mode 100644
index 00000000..1a51bbe4
--- /dev/null
+++ b/tests/testthat/helper.R
@@ -0,0 +1,162 @@
+#' Create a mixed dataframe with factor and other types of columns
+#'
+#' This function generates a dataframe with a specified number of rows,
+#' factor columns, and other columns (integer, numeric, and string).
+#'
+#' @param n_rows Number of rows in the dataframe. Default is 10,000.
+#' @param n_factor_cols Number of factor columns in the dataframe. Default is 15.
+#' @param n_other_cols Number of other columns (integer, numeric, and string) in the dataframe. Default is 15.
+#'
+#' @return A dataframe with the specified number of rows and columns, containing mixed data types.
+#' @importFrom dplyr bind_cols
+#' @importFrom purrr purrr::map_dfc
+#' @examples
+#' df <- create_mixed_dataframe(n_rows = 100, n_factor_cols = 10, n_other_cols = 5)
+create_mixed_dataframe <- function(n_rows = 10000, n_factor_cols = 15, n_other_cols = 15) {
+
+  create_factor_column <- function(levels, n = n_rows) {
+    set.seed(123)  # Set seed before sample
+    factor(sample(levels, n, replace = TRUE))
+  }
+
+  factor_levels <- list(
+    c("Low", "Medium", "High"),
+    c("Red", "Green", "Blue"),
+    c("Yes", "No"),
+    c("A", "B", "C"),
+    c("One", "Two", "Three"),
+    c("Cat", "Dog", "Bird"),
+    c("Small", "Medium", "Large"),
+    c("Alpha", "Beta", "Gamma"),
+    c("True", "False"),
+    c("Left", "Right"),
+    c("North", "South", "East", "West"),
+    c("Day", "Night"),
+    c("Up", "Down"),
+    c("Male", "Female"),
+    c("Summer", "Winter", "Spring", "Fall")
+  )
+
+  factor_columns <- purrr::map_dfc(factor_levels[1:n_factor_cols], create_factor_column)
+  colnames(factor_columns) <- paste0("fac_col", 1:n_factor_cols)
+
+  create_other_column <- function(type, n = n_rows) {
+    set.seed(123)  # Set seed before sample
+    switch(type,
+           "int" = sample(1:100, n, replace = TRUE),
+           "num" = runif(n, 0, 100),
+           "str" = sample(letters, n, replace = TRUE)
+    )
+  }
+
+  column_types <- c(
+    "int", "int", "num", "num", "str",
+    "str", "int", "num", "str", "int",
+    "num", "str", "int", "num", "str"
+  )
+
+  other_columns <- purrr::map_dfc(column_types[1:n_other_cols], create_other_column)
+  colnames(other_columns) <- paste0("col", (n_factor_cols + 1):(n_factor_cols + n_other_cols))
+  df <- bind_cols(factor_columns, other_columns)
+
+  return(df)
+}
+
+
+#' Modify factor levels for partial overlap
+#'
+#' This function takes two sets of factor levels, computes the common and unique levels,
+#' and returns a new set of levels with partial overlap.
+#'
+#' @param levels1 First set of factor levels.
+#' @param levels2 Second set of factor levels.
+#'
+#' @return A character vector of new factor levels with partial overlap.
+#' @examples
+#' new_levels <- partial_overlap_levels(c("A", "B", "C"), c("B", "C", "D"))
+partial_overlap_levels <- function(levels1, levels2) {
+  common <- intersect(levels1, levels2)
+  unique1 <- setdiff(levels1, common)
+  unique2 <- setdiff(levels2, common)
+
+  # Set seed before each sample call
+  set.seed(123)
+  sampled_unique1 <- sample(unique1, length(unique1) * 0.5)
+
+  set.seed(123)
+  sampled_unique2 <- sample(unique2, length(unique2) * 0.5)
+
+  new_levels <- c(common, sampled_unique1, sampled_unique2)
+  return(new_levels)
+}
+
+
+#' Create additional dataframes with specific conditions
+#'
+#' This function generates additional dataframes based on an input dataframe, modifying column classes and levels,
+#' and adding new columns with unique names. Different seeds are used for each iteration of the loop,
+#' ensuring reproducibility of the generated dataframes.
+#'
+#' @param base_df The base dataframe used to create the additional dataframes.
+#' @param n_rows Number of rows in the additional dataframes. Default is 10,000.
+#' @param df_names Names of the additional dataframes to be created. Default is c("df1", "df2", "df3").
+#'
+#' @return A list of dataframes with the specified modifications.
+#' @importFrom dplyr bind_cols
+#' @examples
+#' base_df <- create_mixed_dataframe(n_rows = 100, n_factor_cols = 10, n_other_cols = 5)
+#' additional_dfs <- create_additional_dataframes(base_df, n_rows = 1000, df_names = c("df1", "df2"))
+create_additional_dataframes <- function(base_df, n_rows = 10000, df_names = c("df1", "df2", "df3")) {
+
+  # Define a fixed sequence of seeds, one for each dataframe to be created
+  seeds <- c(123, 456, 789, 101112)
+
+  df_list <- list()
+
+  for (i in seq_along(df_names)) {
+    # Set the seed for this iteration based on the pre-defined seeds
+    set.seed(seeds[i])
+
+    # Proceed with the dataframe generation process
+    overlap_cols <- sample(colnames(base_df), size = round(0.8 * ncol(base_df)))
+    df <- base_df
+    cols_to_modify_class <- sample(overlap_cols, size = round(0.2 * length(overlap_cols)))
+
+    # Modify columns to have different data types
+    for (col in cols_to_modify_class) {
+      current_class <- class(df[[col]])
+      new_class <- switch(current_class,
+                          "factor" = as.character(df[[col]]),
+                          "character" = as.factor(df[[col]]),
+                          "numeric" = as.integer(df[[col]]),
+                          "integer" = as.numeric(df[[col]]),
+                          df[[col]])
+      df[[col]] <- new_class
+    }
+
+    # Modify factor levels for partial overlap
+    factor_cols <- colnames(base_df)[sapply(base_df, is.factor)]
+    overlap_factor_cols <- intersect(overlap_cols, factor_cols)
+    cols_to_modify_levels <- sample(overlap_factor_cols, size = round(0.5 * length(overlap_factor_cols)))
+
+    for (col in cols_to_modify_levels) {
+      original_levels <- levels(base_df[[col]])
+      new_levels <- partial_overlap_levels(original_levels, original_levels)
+      df[[col]] <- factor(df[[col]], levels = new_levels)
+    }
+
+    # Create new random columns for each dataframe (these will vary by seed)
+    set.seed(seeds[i])  # Set the seed again for generating new columns
+    n_new_cols <- round(0.2 * ncol(base_df))
+    new_col_names <- paste0(df_names[i], "_new_col_", 1:n_new_cols)
+    new_cols <- data.frame(matrix(runif(n_rows * n_new_cols), ncol = n_new_cols))
+    colnames(new_cols) <- new_col_names
+
+    # Bind new columns to the dataframe
+    df <- bind_cols(df, new_cols)
+    df_list[[df_names[i]]] <- df
+  }
+
+  return(df_list)
+}
+