Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
48 changes: 20 additions & 28 deletions .github/workflows/dsBase_test_suite.yaml
100644 → 100755
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,7 @@ jobs:
BRANCH_NAME: ${{ github.ref_name }}
REPO_OWNER: ${{ github.repository_owner }}
R_KEEP_PKG_SOURCE: yes
GITHUB_TOKEN: ${{ github.token || 'placeholder-token' }}

steps:
- name: Checkout dsBase
Expand All @@ -45,12 +46,14 @@ jobs:
path: dsBase

- name: Checkout testStatus
if: ${{ github.actor != 'nektos/act' }} # for local deployment only
uses: actions/checkout@v4
with:
repository: ${{ env.REPO_OWNER }}/testStatus
token: ${{ secrets.GH_TOKEN }}
ref: master
path: testStatus
persist-credentials: false
token: ${{ env.GITHUB_TOKEN }}

- uses: r-lib/actions/setup-pandoc@v2

Expand Down Expand Up @@ -156,44 +159,33 @@ jobs:
run: |
Rscript --verbose --vanilla ../testStatus/source/parse_test_report.R logs/
working-directory: dsBase

- name: Commit results to testStatus
- name: Render report
run: |
git config --global user.email "github-actions[bot]@users.noreply.github.com"
git config --global user.name "github-actions[bot]"
cd testStatus

# Reconfigure remote to use GitHub token for authentication
git remote set-url origin https://x-access-token:${{ secrets.GITHUB_TOKEN }}@github.com/${{ env.REPO_OWNER }}/testStatus.git
git checkout master
git pull origin master

mkdir -p logs/${{ env.PROJECT_NAME }}/${{ env.BRANCH_NAME }}/${{ env.WORKFLOW_ID }}/
mkdir -p docs/${{ env.PROJECT_NAME }}/${{ env.BRANCH_NAME }}/${{ env.WORKFLOW_ID }}/
mkdir -p docs/${{ env.PROJECT_NAME }}/${{ env.BRANCH_NAME }}/latest/
# clear the latest directory
rm -rf docs/${{ env.PROJECT_NAME }}/${{ env.BRANCH_NAME }}/latest/*
mkdir -p new/logs/${{ env.PROJECT_NAME }}/${{ env.BRANCH_NAME }}/${{ env.WORKFLOW_ID }}/
mkdir -p new/docs/${{ env.PROJECT_NAME }}/${{ env.BRANCH_NAME }}/${{ env.WORKFLOW_ID }}/
mkdir -p new/docs/${{ env.PROJECT_NAME }}/${{ env.BRANCH_NAME }}/latest/

# Copy logs to new logs directory location
cp -rv ../dsBase/logs/* logs/${{ env.PROJECT_NAME }}/${{ env.BRANCH_NAME }}/${{ env.WORKFLOW_ID }}/
cp -rv ../dsBase/logs/${{ env.WORKFLOW_ID }}.txt logs/${{ env.PROJECT_NAME }}/${{ env.BRANCH_NAME }}/${{ env.WORKFLOW_ID }}/
cp -rv ../${{ env.PROJECT_NAME }}/logs/* new/logs/${{ env.PROJECT_NAME }}/${{ env.BRANCH_NAME }}/${{ env.WORKFLOW_ID }}/
cp -rv ../${{ env.PROJECT_NAME }}/logs/${{ env.WORKFLOW_ID }}.txt new/logs/${{ env.PROJECT_NAME }}/${{ env.BRANCH_NAME }}/${{ env.WORKFLOW_ID }}/

# Create symbolic links
ln -sf ${{ env.WORKFLOW_ID }}/ logs/${{ env.PROJECT_NAME }}/${{ env.BRANCH_NAME }}/.LATEST
# ln -sf docs/${{ env.PROJECT_NAME }}/${{ env.BRANCH_NAME }}/${{ env.WORKFLOW_ID }}/ docs/${{ env.PROJECT_NAME }}/${{ env.BRANCH_NAME }}/${{ env.WORKFLOW_ID }}/latest

R -e 'input_dir <- file.path("../logs", Sys.getenv("PROJECT_NAME"), Sys.getenv("BRANCH_NAME"), Sys.getenv("WORKFLOW_ID")); quarto::quarto_render("source/test_report.qmd", execute_params = list(input_dir = input_dir))'
mv source/test_report.html docs/${{ env.PROJECT_NAME }}/${{ env.BRANCH_NAME }}/${{ env.WORKFLOW_ID }}/index.html
cp -r docs/${{ env.PROJECT_NAME }}/${{ env.BRANCH_NAME }}/${{ env.WORKFLOW_ID }}/* docs/${{ env.PROJECT_NAME }}/${{ env.BRANCH_NAME }}/latest

git add .
git commit -m "Auto test for ${{ env.PROJECT_NAME }}/${{ env.BRANCH_NAME }} @ ${{ env.WORKFLOW_ID }}" || echo "No changes to commit"
git push origin master
R -e 'input_dir <- file.path("../new/logs", Sys.getenv("PROJECT_NAME"), Sys.getenv("BRANCH_NAME"), Sys.getenv("WORKFLOW_ID")); quarto::quarto_render("source/test_report.qmd", execute_params = list(input_dir = input_dir))'
mv source/test_report.html new/docs/${{ env.PROJECT_NAME }}/${{ env.BRANCH_NAME }}/${{ env.WORKFLOW_ID }}/index.html
cp -r new/docs/${{ env.PROJECT_NAME }}/${{ env.BRANCH_NAME }}/${{ env.WORKFLOW_ID }}/* new/docs/${{ env.PROJECT_NAME }}/${{ env.BRANCH_NAME }}/latest

env:
PROJECT_NAME: ${{ env.PROJECT_NAME }}
BRANCH_NAME: ${{ env.BRANCH_NAME }}
WORKFLOW_ID: ${{ env.WORKFLOW_ID }}

- name: Upload test logs
uses: actions/upload-artifact@v4
with:
name: dsbase-logs
path: testStatus/new

- name: Dump environment info
run: |
Expand Down
9 changes: 9 additions & 0 deletions NAMESPACE
Original file line number Diff line number Diff line change
Expand Up @@ -140,3 +140,12 @@ import(gamlss.dist)
import(mice)
importFrom(gamlss.dist,pST3)
importFrom(gamlss.dist,qST3)
<<<<<<< HEAD
=======
importFrom(purrr,imap)
importFrom(purrr,map)
importFrom(purrr,set_names)
importFrom(tibble,as_tibble)
importFrom(tidyselect,all_of)
importFrom(tidyselect,peek_vars)
>>>>>>> acb2b95 (export functions)
133 changes: 133 additions & 0 deletions R/standardiseDfDS.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,133 @@
#' Get the Class of All Columns in a Data Frame
#' @param df.name A string representing the name of the data frame.
#' @return A tibble with the class of each column in the data frame.
#' @importFrom dplyr %>%
#' @importFrom tibble as_tibble
#' @importFrom purrr map
#' @export
getClassAllColsDS <- function(df.name){
dsBase::checkPermissivePrivacyControlLevel(c('permissive', 'banana', 'carrot'))

df.name <- eval(parse(text = df.name), envir = parent.frame())
all_classes <- map(df.name, class) %>% as_tibble()
return(all_classes)
}

#' Change Class of Target Variables in a Data Frame
#' @param df.name A string representing the name of the data frame.
#' @param target_vars A character vector specifying the columns to be modified.
#' @param target_class A character vector specifying the new classes for each column (1 = factor,
#' 2 = integer, 3 = numeric, 4 = character, 5 = logical).
#' @return A modified data frame with the specified columns converted to the target classes.
#' @importFrom dplyr mutate across
#' @importFrom tidyselect all_of
#' @export
fixClassDS <- function(df.name, target_vars, target_class) {
dsBase::checkPermissivePrivacyControlLevel(c('permissive', 'banana', 'carrot'))

df <- eval(parse(text = df.name), envir = parent.frame())
df_transformed <- df %>%
mutate(
across(all_of(target_vars),
~ .convertClass(.x, target_class[which(target_vars == cur_column())])))
return(df_transformed)
}

#' Convert a Vector to a Specified Class
#' @param x The vector to be converted.
#' @param class_name A string indicating the target class (1 = factor, 2 = integer, 3 = numeric,
#' 4 = character, 5 = logical).
#' @return The converted vector.
#' @noRd
.convertClass <- function(target_var, target_class_code) {
switch(target_class_code,
"1" = as.factor(target_var),
"2" = as.integer(target_var),
"3" = as.numeric(target_var),
"4" = as.character(target_var),
"5" = as.logical(target_var)
)
}

#' Add Missing Columns with NA Values
#' @param .data A string representing the name of the data frame.
#' @param cols A character vector specifying the columns to be added if missing.
#' @return A modified data frame with missing columns added and filled with NA.
#' @importFrom dplyr mutate select
#' @importFrom tidyselect peek_vars
#' @importFrom purrr set_names
#' @export
fixColsDS <- function(.data, cols) {
dsBase::checkPermissivePrivacyControlLevel(c('permissive', 'banana', 'carrot'))

.data <- eval(parse(text = .data), envir = parent.frame())
missing <- setdiff(cols, colnames(.data))
out <- .data %>%
mutate(!!!set_names(rep(list(NA), length(missing)), missing)) %>%
select(sort(peek_vars()))
return(out)
}

#' Retrieve Factor Levels for Specific Columns
#' @param df.name A string representing the name of the data frame.
#' @param factor_vars A character vector specifying the factor columns.
#' @return A list of factor levels for the specified columns.
#' @importFrom tidyselect all_of
#' @importFrom purrr map imap
#' @export
getAllLevelsDS <- function(df.name, factor_vars) {
dsBase::checkPermissivePrivacyControlLevel(c('permissive', 'banana', 'carrot'))

df <- eval(parse(text = df.name), envir = parent.frame())
factor_vars_split <- strsplit(factor_vars, ",\\s*")[[1]]
levels <- purrr::map(df[factor_vars_split], base::levels)

disclosure_check <- imap(levels, function(lvls, var) {
.checkLevelsDisclosure(df = df, var = var, levels = lvls)
})

failed_vars <- names(disclosure_check)[unlist(disclosure_check)]

if(length(failed_vars) > 0) {
stop("Based on the value of nfilter.levels.density, these factor variables", " {", failed_vars, "} ", "have too many levels compared to the length of the variable. Please reduce the numnber of levels or change the variable type and try again")
} else {
return(levels)
}
}

#' Check variable levels against disclosure thresholds
#'
#' Internal helper function to verify whether the number of levels in a variable
#' exceeds the allowed density threshold defined by `dsBase::listDisclosureSettingsDS()`.
#'
#' @param df A data frame containing the variable.
#' @param var Character string. Name of the variable to check.
#' @param levels Character vector. Levels of the variable.
#'
#' @return Logical. `TRUE` if the check fails (i.e., disclosure threshold is violated),
#' otherwise `FALSE`.
#'
#' @keywords internal
#' @noRd
.checkLevelsDisclosure <- function(df, var, levels) {
thr <- dsBase::listDisclosureSettingsDS()
nfilter.levels.density <- as.numeric(thr$nfilter.levels.density)
n_levels <- length(levels)
length_var <- length(df[[var]])
fail <- (length_var * nfilter.levels.density) < n_levels
return(fail)
}

#' Set Factor Levels for Specific Columns in a Data Frame
#' @param df.name A string representing the name of the data frame to modify.
#' @param vars A character vector specifying the columns to be modified.
#' @param levels A named list where each element contains the levels for the corresponding factor variable.
#' @return A modified data frame with the specified columns converted to factors with the provided levels.
#' @export
fixLevelsDS <- function(df.name, vars, levels) {
dsBase::checkPermissivePrivacyControlLevel(c('permissive', 'banana', 'carrot'))

df.name <- eval(parse(text = df.name), envir = parent.frame())
out <- df.name %>%
mutate(across(all_of(vars), ~factor(., levels = levels[[dplyr::cur_column()]])))
}
12 changes: 11 additions & 1 deletion inst/DATASHIELD
Original file line number Diff line number Diff line change
Expand Up @@ -69,7 +69,9 @@ AggregateMethods:
is.null=base::is.null,
is.numeric=base::is.numeric,
NROW=base::NROW,
t.test=stats::t.test
t.test=stats::t.test,
getClassAllColsDS,
getAllLevelsDS
AssignMethods:
absDS,
asCharacterDS,
Expand Down Expand Up @@ -160,6 +162,7 @@ AssignMethods:
acos=base::acos,
atan=base::atan,
sum=base::sum,
<<<<<<< HEAD
unlist=base::unlist
Options:
datashield.privacyLevel=5,
Expand All @@ -173,3 +176,10 @@ Options:
default.nfilter.noise=0.25,
default.nfilter.levels.density=0.33,
default.nfilter.levels.max=40
=======
unlist=base::unlist,
fixClassDS,
fixColsDS,
fixLevelsDS

>>>>>>> acb2b95 (export functions)
Loading
Loading