diff --git a/.Rbuildignore b/.Rbuildignore
index 0bfe703..8995890 100644
--- a/.Rbuildignore
+++ b/.Rbuildignore
@@ -1,4 +1,4 @@
-^veupathUtils\.Rproj$
+^mbioUtils\.Rproj$
 ^\.Rproj\.user$
 ^\.github$
 ^codecov\.yml$
diff --git a/.dev/megastudy-helpers.R b/.dev/megastudy-helpers.R
deleted file mode 100644
index 7b9e094..0000000
--- a/.dev/megastudy-helpers.R
+++ /dev/null
@@ -1,208 +0,0 @@
-## ignore these. they are helpers i use during testing, to recreate what java does outside the data service
-## ive just had to do this too many times to not have them in git right next to the thing im testing now
-
-# to read in root-vocab endpoint results
-# rootVocabHandle is a file handle to a two column data.table w no header
-# first column is internal study id, second is a variable value from the vocab
-# it is produced by the root-vocab endpoint in the subsetting service
-getStudySpecificVocabularyByVariable <- function(rootVocabHandle, entityId, variableId, studyIdColName) {
-  rootVocab <- readRootVocab(rootVocabHandle)
-  tbl <- getStudyVocabTibble(rootVocab, entityId, variableId, studyIdColName)
-  
-  veupathUtils::StudySpecificVocabulariesByVariable(S4Vectors::SimpleList(eval(parse(text=paste0('c(',paste(tbl$values, collapse=','),')')))))
-}
-
-readRootVocab <- function(rootVocabHandle) {
-  rootVocab <- data.table::fread(rootVocabHandle, header=FALSE)
-  names(rootVocab) <- c('studyId', 'value')
-
-  if (nrow(rootVocab) == 0) {
-    return(veupathUtils::StudySpecificVocabulariesByVariable())
-  }
-
-  return(rootVocab)
-}
-
-getStudyVocabTibble <- function(rootVocab, entityId, variableId, studyIdColName) {
-  varSpecString <- getVarSpecAsString(entityId, variableId)
-
-  tbl <- 
-  dplyr::reframe(
-    dplyr::group_by(
-      rootVocab, 
-      studyId
-    ), 
-    values=paste0(
-      "veupathUtils::StudySpecificVocabulary(variableSpec=", 
-      varSpecString, 
-      ", vocabulary=c('",
-      paste(value, collapse='\',\''),
-      "'),study='",
-      studyId, 
-      "',studyIdColumnName='",
-      studyIdColName,
-      "')"
-    )
-  )
-
-  return(unique(tbl))
-}
-
-getVarSpecAsString <- function(entityId, variableId) {
-  paste0("veupathUtils::VariableSpec(entityId='", entityId, "', variableId='", variableId, "')")
-}
-
-# an example output from the `values` column of the output of getStudyVocabTibble function
-#veupathUtils::StudySpecificVocabulary(
-#  variableSpec=veupathUtils::VariableSpec(
-#    entityId='EUPATH_0000609', 
-#    variableId='PATO_0000047'
-#  ), 
-#  vocabulary=c('female,male'),
-#  study='1969-Iowa-surveillance',
-#  studyIdColumnName='EUPATH_0000605.Study_stable_id'
-#)
-
-## for reading in some vocabs
-sexVocab <- getStudySpecificVocabularyByVariable(
-  '../rootVocab_bobs_analysis_sex.tsv',
-  'EUPATH_0000609',
-  'PATO_0000047',
-  'EUPATH_0000605.Study_stable_id'
-)
-
-speciesVocab <- getStudySpecificVocabularyByVariable(
-  '../rootVocab_bobs_analysis_species.tsv',
-  'EUPATH_0000609',
-  'OBI_0001909',
-  'EUPATH_0000605.Study_stable_id'
-)
-
-lifeStageVocab <- getStudySpecificVocabularyByVariable(
-  '../rootVocab_bobs_analysis_lifeStage.tsv',
-  'EUPATH_0000609',
-  'UBERON_0000105',
-  'EUPATH_0000605.Study_stable_id'
-)
-
-feedingStatusVocab <- getStudySpecificVocabularyByVariable(
-  '../rootVocab_bobs_analysis_feedingStatus.tsv',
-  'EUPATH_0000609',
-  'EUPATH_0043227',
-  'EUPATH_0000605.Study_stable_id'
-)
-
-## some 'real life' test data, though minified for brevity
-# some stuff for bobs analysis
-
-sexVocabReal.mini <- 
-  StudySpecificVocabulariesByVariable(S4Vectors::SimpleList(StudySpecificVocabulary(
-    studyIdColumnName='EUPATH_0000605.Study_stable_id', 
-    study='2023-abundance-SLCMCD-2022', 
-    variableSpec=VariableSpec(
-      entityId='EUPATH_0000609',
-      variableId='PATO_0000047'
-    ), 
-    vocabulary=c('male','mixed sex')
-  )))
-
-speciesVocabReal.mini <- 
-  StudySpecificVocabulariesByVariable(S4Vectors::SimpleList(StudySpecificVocabulary(
-    studyIdColumnName='EUPATH_0000605.Study_stable_id', 
-    study='2023-abundance-SLCMCD-2022', 
-    variableSpec=VariableSpec(
-      entityId='EUPATH_0000609',
-      variableId='OBI_0001909'
-    ), 
-    vocabulary=c(
-      'Aedes vexans nipponii',
-      'Anopheles freeborni',
-      'Coquillettidia perturbans',
-      'Culex erythrothorax',
-      'Culex pipiens',
-      'Culex salinarius',
-      'Culex tarsalis',
-      'Culiseta incidens',
-      'Culiseta inornata',
-      'Ochlerotatus dorsalis',
-      'Ochlerotatus increpitus',
-      'Ochlerotatus nigromaculis',
-      'Ochlerotatus sierrensis'
-    )
-  )))
-
-  lifeStageVocabReal.mini <- 
-  StudySpecificVocabulariesByVariable(S4Vectors::SimpleList(StudySpecificVocabulary(
-    studyIdColumnName='EUPATH_0000605.Study_stable_id', 
-    study='2023-abundance-SLCMCD-2022', 
-    variableSpec=VariableSpec(
-      entityId='EUPATH_0000609',
-      variableId='UBERON_0000105'
-    ), 
-    vocabulary=c('prime adult stage')
-  )))
-
-  feedingStatusVocabReal.mini <- 
-  StudySpecificVocabulariesByVariable(S4Vectors::SimpleList(StudySpecificVocabulary(
-    studyIdColumnName='EUPATH_0000605.Study_stable_id', 
-    study='2023-abundance-SLCMCD-2022', 
-    variableSpec=VariableSpec(
-      entityId='EUPATH_0000609',
-      variableId='EUPATH_0043227'
-    ), 
-    vocabulary=c('')
-  )))
-
-  megastudyVariablesReal <- new("VariableMetadataList", SimpleList(
-    new("VariableMetadata",
-      variableClass = new("VariableClass", value = 'native'),
-      variableSpec = new("VariableSpec", variableId = 'POPBIO_8000017', entityId = 'EUPATH_0000609'),
-      plotReference = new("PlotReference", value = 'yAxis'),
-      dataType = new("DataType", value = 'NUMBER'),
-      dataShape = new("DataShape", value = 'CONTINUOUS'),
-      hasStudyDependentVocabulary = FALSE),
-    new("VariableMetadata",
-      variableClass = new("VariableClass", value = 'native'),
-      variableSpec = new("VariableSpec", variableId = 'PATO_0000047', entityId = 'EUPATH_0000609'),
-      plotReference = new("PlotReference", value = 'xAxis'),
-      dataType = new("DataType", value = 'STRING'),
-      dataShape = new("DataShape", value = 'CATEGORICAL'),
-      weightingVariableSpec = VariableSpec(variableId='POPBIO_8000017',entityId='EUPATH_0000609'),
-      hasStudyDependentVocabulary = TRUE),
-    new("VariableMetadata",
-      variableClass = new("VariableClass", value = 'native'),
-      variableSpec = new("VariableSpec", variableId = 'OBI_0001909', entityId = 'EUPATH_0000609'),
-      plotReference = new("PlotReference", value = 'overlay'),
-      dataType = new("DataType", value = 'STRING'),
-      dataShape = new("DataShape", value = 'CATEGORICAL'),
-      weightingVariableSpec = VariableSpec(variableId='POPBIO_8000017',entityId='EUPATH_0000609'),
-      hasStudyDependentVocabulary = TRUE),
-    new("VariableMetadata",
-      variableClass = new("VariableClass", value = 'native'),
-      variableSpec = new("VariableSpec", variableId = 'UBERON_0000105', entityId = 'EUPATH_0000609'),
-      dataType = new("DataType", value = 'STRING'),
-      dataShape = new("DataShape", value = 'CATEGORICAL'),
-      weightingVariableSpec = VariableSpec(variableId='POPBIO_8000017',entityId='EUPATH_0000609'),
-      hasStudyDependentVocabulary = TRUE),
-    new("VariableMetadata",
-      variableClass = new("VariableClass", value = 'native'),
-      variableSpec = new("VariableSpec", variableId = 'EUPATH_0043227', entityId = 'EUPATH_0000609'),
-      dataType = new("DataType", value = 'STRING'),
-      dataShape = new("DataShape", value = 'CATEGORICAL'),
-      weightingVariableSpec = VariableSpec(variableId='POPBIO_8000017',entityId='EUPATH_0000609'),
-      hasStudyDependentVocabulary = TRUE)
-  ))
-
-  ## mini vocabs
-  megastudyReal.mini <- Megastudy(
-    data=megastudyDataReal,
-    ancestorIdColumns=c('EUPATH_0000605.Study_stable_id', 'GAZ_00000448.GeographicLocation_stable_id','OBI_0000659.ParentOfSample_stable_id','EUPATH_0000609.Sample_stable_id'),
-    studySpecificVocabularies=StudySpecificVocabulariesByVariableList(S4Vectors::SimpleList(sexVocabReal.mini, speciesVocabReal.mini, lifeStageVocabReal.mini, feedingStatusVocabReal.mini))
-  )
-
-  ## full vocabs
-  megastudyReal <- Megastudy(
-    data=megastudyDataReal,
-    ancestorIdColumns=c('EUPATH_0000605.Study_stable_id', 'GAZ_00000448.GeographicLocation_stable_id','OBI_0000659.ParentOfSample_stable_id','EUPATH_0000609.Sample_stable_id'),
-    studySpecificVocabularies=StudySpecificVocabulariesByVariableList(S4Vectors::SimpleList(sexVocab, speciesVocab, lifeStageVocab, feedingStatusVocab))
-  )
\ No newline at end of file
diff --git a/DESCRIPTION b/DESCRIPTION
index 77c09f5..4b9ab5c 100644
--- a/DESCRIPTION
+++ b/DESCRIPTION
@@ -1,39 +1,39 @@
-Package: veupathUtils
-Title: General Helper Functions for MicrobiomeDB Projects
-Version: 2.6.7
+Package: mbioUtils
+Title: Utility Functions for MicrobiomeDB R Packages
+Version: 0.1.0
 Authors@R: 
     c(person(given = "Danielle",
            family = "Callan",
-           role = c("aut"),
+           role = c("aut", "cre"),
            email = "dcallan@upenn.edu"),
       person(given = "Ann",
            family = "Blevins",
-           role = c("aut", "cre"),
-           email = "annsize@upenn.edu"))
+           role = c("aut"),
+           email = "annsize@upenn.edu",
+           comment = "Original veupathUtils author"))
 biocViews: 
 Imports:
     boot,
     data.table,
-    digest,
     Hmisc,
-    microbenchmark,
+    methods,
+    purrr,
     S4Vectors,
-    SpiecEasi (>= 1.1.1),
-    stringi,
-    purrr
+    SpiecEasi (>= 1.0.7),
+    stringi
 Depends:
     R (>= 2.10),
     jsonlite
 Remotes:
-    zdk123/SpiecEasi
-URL: https://github.com/microbiomeDB/veupathUtils
-BugReports: https://github.com/microbiomeDB/veupathUtils/issues
-Description: veupathUtils contains various R helper functions intended to be useful across a variety of projects, including the plot.data and microbiomeComputations packages.
+    zdk123/SpiecEasi@v1.0.7
+URL: https://github.com/microbiomeDB/mbioUtils
+BugReports: https://github.com/microbiomeDB/mbioUtils/issues
+Description: mbioUtils contains utility functions and data structures for microbiomeDB R packages, including microbiomeComputations, MicrobiomeDB, and microbiomeData. Forked from veupathUtils 2.6.7.
 License: Apache License (>= 2)
 Encoding: UTF-8
 LazyData: true
 Roxygen: list(markdown = TRUE)
-RoxygenNote: 7.3.1
+RoxygenNote: 7.3.3
 Suggests: 
     testthat (>= 3.0.0)
 Config/testthat/edition: 3
@@ -46,22 +46,16 @@ Collate:
     'class-VariableMetadata.R'
     'class-ComputeResult.R'
     'class-CorrelationResult.R'
-    'class-Megastudy.R'
-    'class-Range.R'
-    'class-Statistic.R'
-    'data.R'
+    'mbioUtils-package.R'
     'method-correlation.R'
     'methods-Bin.R'
     'methods-CollectionWithMetadata.R'
     'methods-Collections.R'
     'methods-VariableMetadata.R'
     'methods-ComputeResult.R'
-    'methods-Megastudy.R'
     'methods-SampleMetadata.R'
-    'methods-Statistic.R'
     'utils-classes.R'
     'utils-cut.R'
     'utils-numeric.R'
     'utils-string.R'
     'utils.R'
-    'veupathUtils-package.R'
diff --git a/NAMESPACE b/NAMESPACE
index b8c035b..d7e9bab 100644
--- a/NAMESPACE
+++ b/NAMESPACE
@@ -1,12 +1,5 @@
 # Generated by roxygen2: do not edit by hand
 
-S3method(naToZero,data.frame)
-S3method(naToZero,data.table)
-S3method(naToZero,default)
-S3method(naToZero,list)
-S3method(validateNumericCols,data.table)
-S3method(validateNumericCols,default)
-S3method(validateNumericCols,list)
 export("name<-")
 export(Bin)
 export(BinList)
@@ -17,15 +10,9 @@ export(ComputeResult)
 export(CorrelationResult)
 export(DataShape)
 export(DataType)
-export(Megastudy)
 export(PlotReference)
-export(Range)
 export(S4SimpleListToJSON)
 export(SampleMetadata)
-export(Statistic)
-export(StatisticList)
-export(StudySpecificVocabulariesByVariable)
-export(StudySpecificVocabulariesByVariableList)
 export(VariableClass)
 export(VariableMetadata)
 export(VariableMetadataList)
@@ -68,37 +55,27 @@ export(getCollectionName)
 export(getCollectionNames)
 export(getCollectionVariableNames)
 export(getCollectionsList)
-export(getDTWithImputedZeroes)
 export(getDataFromSource)
-export(getDataTable)
-export(getDiscretizedBins)
-export(getEntityId)
 export(getHasStudyDependentVocabulary)
 export(getIdColumns)
 export(getMetadataVariableNames)
 export(getMetadataVariableSummary)
 export(getSampleMetadata)
 export(getSampleMetadataIdColumns)
-export(getStudyIdColumnName)
-export(getVariableSpec)
-export(getVariableSpecColumnName)
 export(is.POSIXct)
 export(is.error)
 export(isOneToManyWithAncestor)
 export(logWithTime)
 export(matchArg)
 export(merge)
-export(naToZero)
 export(name)
 export(new_data_frame)
-export(nonZeroRound)
 export(predicateFactory)
 export(pruneFeatures)
 export(removeAttr)
 export(removeIncompleteRecords)
 export(selfCorrelation)
 export(setAttrFromList)
-export(setNaToZero)
 export(setroworder)
 export(shiftToNonNeg)
 export(strSplit)
@@ -108,7 +85,6 @@ export(toStringOrNull)
 export(toStringOrPoint)
 export(trim)
 export(updateAttrById)
-export(validateNumericCols)
 export(whichValuesInBin)
 export(whichValuesInBinList)
 export(writeData)
@@ -122,14 +98,8 @@ exportClasses(ComputeResult)
 exportClasses(CorrelationResult)
 exportClasses(DataShape)
 exportClasses(DataType)
-exportClasses(Megastudy)
 exportClasses(PlotReference)
-exportClasses(Range)
 exportClasses(SampleMetadata)
-exportClasses(Statistic)
-exportClasses(StatisticList)
-exportClasses(StudySpecificVocabulariesByVariable)
-exportClasses(StudySpecificVocabulariesByVariableList)
 exportClasses(VariableClass)
 exportClasses(VariableMetadata)
 exportClasses(VariableMetadataList)
@@ -152,13 +122,7 @@ exportMethods(findVariablesNeedingWeightingVariableMetadata)
 exportMethods(findWeightingVariableSpecs)
 exportMethods(findWeightingVariablesMetadata)
 exportMethods(getColName)
-exportMethods(getDTWithImputedZeroes)
-exportMethods(getDataTable)
-exportMethods(getEntityId)
 exportMethods(getHasStudyDependentVocabulary)
-exportMethods(getStudyIdColumnName)
-exportMethods(getVariableSpec)
-exportMethods(getVariableSpecColumnName)
 exportMethods(merge)
 exportMethods(predicateFactory)
 exportMethods(toJSON)
@@ -173,8 +137,13 @@ importFrom(S4Vectors,SimpleList)
 importFrom(SpiecEasi,pval.sparccboot)
 importFrom(SpiecEasi,sparcc)
 importFrom(SpiecEasi,sparccboot)
-importFrom(digest,digest)
-importFrom(microbenchmark,microbenchmark)
+importFrom(methods,new)
+importFrom(methods,slotNames)
+importFrom(methods,validObject)
 importFrom(purrr,map)
-importFrom(purrr,map_lgl)
+importFrom(stats,complete.cases)
+importFrom(stats,median)
+importFrom(stats,sd)
+importFrom(stats,var)
 importFrom(stringi,stri_detect_regex)
+importFrom(utils,tail)
diff --git a/R/class-Collections.R b/R/class-Collections.R
index 9ee578e..4c8a01b 100644
--- a/R/class-Collections.R
+++ b/R/class-Collections.R
@@ -23,7 +23,7 @@ check_collection <- function(object) {
     # collection data should all come from the same entity
     # using the presence of the period to indicate eda services formatted data
     if (all(grepl(".", names(df), fixed = TRUE))) {
-        if (uniqueN(veupathUtils::strSplit(names(df)[!names(df) %in% object@ancestorIdColumns], ".", ncol=2, index=1)) > 1) {
+        if (uniqueN(mbioUtils::strSplit(names(df)[!names(df) %in% object@ancestorIdColumns], ".", ncol=2, index=1)) > 1) {
             msg <- paste("All columns must belong to the same entity.")
             errors <- c(errors, msg)
         }
diff --git a/R/class-ComputeResult.R b/R/class-ComputeResult.R
index 38c7054..25e9bae 100644
--- a/R/class-ComputeResult.R
+++ b/R/class-ComputeResult.R
@@ -17,7 +17,7 @@ check_compute_result <- function(object) {
     # the variable classes are correct.
     if (!!length(object@computedVariableMetadata)) {
       variables <- object@computedVariableMetadata
-      col_names <- stripEntityIdFromColumnHeader(veupathUtils::findAllColNames(variables))
+      col_names <- stripEntityIdFromColumnHeader(mbioUtils::findAllColNames(variables))
 
       if (!all(col_names %in% names(object@data))) {
         msg <- paste("Some specified computed variables are not present in compute result data.frame")
@@ -64,7 +64,7 @@ check_compute_result <- function(object) {
 #' @slot name The name of the compute, ex: 'alphaDiv'.
 #' @slot recordIdColumn The name of the column containing IDs for the samples. All other columns will be treated as computed values.
 #' @slot ancestorIdColumns A character vector of column names representing parent entities of the recordIdColumn.
-#' @slot computedVariableMetadata veupathUtils::VariableMetadataList detailing the computed variables.
+#' @slot computedVariableMetadata mbioUtils::VariableMetadataList detailing the computed variables.
 #' @slot statistics An optional slot of any values. List or data.frame are recommended. It is not required to have rows or cols map to samples.
 #' @slot computationDetails An optional message about the computed results.
 #' @slot parameters A record of the input parameters used to generate the computed results.
diff --git a/R/class-Megastudy.R b/R/class-Megastudy.R
deleted file mode 100644
index 4ff2df8..0000000
--- a/R/class-Megastudy.R
+++ /dev/null
@@ -1,108 +0,0 @@
-check_study_vocabulary <- function(object) {
-  errors <- character()
-
-  # the column names should be in the vocabulary
-  if (!object@studyIdColumnName %in% names(object@studyVocab)) {
-    msg <- paste0("Study ID column '", object@getStudyIdColumnName, "' not found in vocabulary.")
-    errors <- c(errors, msg)
-  }
-
-  if (!veupathUtils::getColName(object@variableSpec) %in% names(object@studyVocab)) {
-    msg <- paste0("Variable spec column '", veupathUtils::getColName(object@variableSpec), "' not found in vocabulary.")
-    errors <- c(errors, msg)
-  }
-
-  return(if (length(errors) == 0) TRUE else errors)
-}
-
-#' Study Specific Vocabularies By Variable
-#' 
-#' A class to specify expected values per study for some variable
-#' of interest.
-#' 
-#' @slot studyIdColumnName A string specifying the name of the column in the vocab data table that contains the study id
-#' @slot variableSpecColumnName A string specifying the name of the column in the vocab data table that contains the variable vocabulary values
-#' @slot studyVocab A data.table with columns studyIdColumnName and variableSpecColumnName that specifies expected vocabularies for each study
-#' @name StudySpecificVocabulariesByVariable-class
-#' @rdname StudySpecificVocabulariesByVariable-class
-#' @include class-VariableMetadata.R
-#' @export
-StudySpecificVocabulariesByVariable <- setClass("StudySpecificVocabulariesByVariable",
-  representation = representation(
-    studyIdColumnName = 'character',
-    variableSpec = 'VariableSpec',
-    studyVocab = 'data.table'
-  ),
-  validity = check_study_vocabulary
-)
-
-check_multiple_study_vocabularies_on_same_entity <- function(object) {
-  errors <- character()
-
-  if (length(unique(unlist(lapply(as.list(object), getStudyIdColumnName)))) != 1) {
-    errors <- c(errors, paste0("All study vocabularies must be able to be identified by the same study entity. Found the following study entities: ", paste(unique(unlist(lapply(as.list(object), getStudyIdColumnName))), collapse = ", ")))
-  }
-
-  if (length(unique(unlist(lapply(as.list(object), getEntityId)))) != 1) {
-    errors <- c(errors, paste0("All study vocabularies must belong to the same entity. Found the following entities: ", paste(unique(unlist(lapply(as.list(object), getEntityId))), collapse = ", ")))
-  }
-
-  return(if (length(errors) == 0) TRUE else errors)
-}
-
-#' @export
-StudySpecificVocabulariesByVariableList <- setClass("StudySpecificVocabulariesByVariableList",
-  contains = "SimpleList",
-  prototype = prototype(elementType = "StudySpecificVocabulariesByVariable"),
-  validity = check_multiple_study_vocabularies_on_same_entity
-)
-
-#this also sets us up for megastudy specific methods in plot.data if it turns out we need them
-
-check_megastudy <- function(object) {
-  errors <- character()
-  df <- object@data
-  ancestor_id_cols <- object@ancestorIdColumns
-
-  if (!!length(ancestor_id_cols)) {
-    if (!all(ancestor_id_cols %in% names(df))) {
-      msg <- paste("Not all ancestor ID columns are present in data.frame")
-      errors <- c(errors, msg)
-    }
-  } else {
-    msg <- paste("Ancestor ID columns are required but not provided.")
-    errors <- c(errors, msg)
-  }
-
-  if (!!length(object@collectionsDT)) {
-    if (!all(ancestor_id_cols[1:length(ancestor_id_cols)-1] %in% names(object@collectionsDT))) {
-      msg <- paste("Not all ancestor ID columns are present in collection data.frame")
-      errors <- c(errors, msg)
-    }
-  }
-
-  return(if (length(errors) == 0) TRUE else errors)
-}
-
-#' Megastudy
-#' 
-#' A class to encapsulate everything we need for our special handling 
-#' of 'megastudies' in EDA. Currently that is imputing zeroes on tall data
-#' given that each (sub-)study has different expected vocabularies for
-#' that data.
-#' 
-#' @slot data A data.table
-#' @slot ancestorIdColumns A character vector of column names representing parent entities of the recordIdColumn.
-#' @slot studySpecificVocabularies veupathUtils::StudySpecificVocabulariesByVariableList
-#' @slot collectionIds A data.table including collection ids and any variables of interest for the collection entity.
-#' If none provided, the collection ids will be inferred from those present in `data`.
-#' 
-#' @name Megastudy-class
-#' @rdname Megastudy-class
-#' @export 
-Megastudy <- setClass("Megastudy", representation(
-    data = 'data.table',
-    ancestorIdColumns = 'character',
-    studySpecificVocabularies = 'StudySpecificVocabulariesByVariableList',
-    collectionsDT = 'data.frame'
-), validity = check_megastudy)
\ No newline at end of file
diff --git a/R/class-Range.R b/R/class-Range.R
deleted file mode 100644
index c5db34d..0000000
--- a/R/class-Range.R
+++ /dev/null
@@ -1,36 +0,0 @@
-check_range <- function(object) {
-    errors <- character()
-    min <- object@minimum
-    max <- object@maximum
-
-    if (class(min) != class(max)) {
-       msg <- "Provided minimum and maximum values must be of the same base type (numeric, Date, Posix)."
-       errors <- c(errors, msg) 
-    }
-
-    if (!inherits(min, c('numeric', 'Date', 'Posix'))) {
-       msg <- "Provided minimum and maximum values must be numeric, Date, or Posix."
-       errors <- c(errors, msg) 
-    }
- 
-    return(if (length(errors) == 0) TRUE else errors)
-}
-
-#' Data Range
-#' 
-#' A class to a data range, represented as minimum and maximum values.
-#' The range could be numeric or Date/ Posix
-#' 
-#' @slot minimum A number, Date or Posix value
-#' @slot maximum A number, Date or Posix value
-#' 
-#' @name Range-class
-#' @rdname Range-class
-#' @export 
-Range <- setClass("Range", representation(
-    minimum = 'ANY',
-    maximum = 'ANY'
-), prototype = prototype(
-    minimum = NA_real_,
-    maximum = NA_real_
-), validity = check_range)
\ No newline at end of file
diff --git a/R/class-Statistic.R b/R/class-Statistic.R
deleted file mode 100644
index 5b5c898..0000000
--- a/R/class-Statistic.R
+++ /dev/null
@@ -1,103 +0,0 @@
-check_statistic <- function(object) {
-    errors <- character()
-    name <- object@name
-    value <- object@value
-    pvalue <- object@pvalue
-    ciMin <- object@confidenceInterval@minimum
-    ciMax <- object@confidenceInterval@maximum
-    ciLevel <- object@confidenceLevel
-
-    if (length(name) != 1  || is.na(name)) {
-        msg <- "The slot `name` must have a single value."
-        errors <- c(errors, msg)
-    }
-
-    if (length(value) != 1) {
-        msg <- "The slot `value` must have a single value."
-        errors <- c(errors, msg)
-    }
-
-    if (!is.na(pvalue)) {
-      if (length(pvalue) != 1) {
-        msg <- "The slot `pvalue` must have a single value."
-        errors <- c(errors, msg)
-      } else {
-        numericPVal <- suppressWarnings(as.numeric(pvalue))
-        if (is.na(numericPVal)) {
-          if (pvalue != '<0.0001') {
-            msg <- "Provided p-value is invalid. It must either be coercible to a number or be the string '<0.0001'."
-            errors <- c(errors, msg)
-          }
-        } else if (numericPVal < 0 || numericPVal > 1) {
-          msg <- "Provided p-value is invalid. It is not between 0 and 1."
-          errors <- c(errors, msg)
-        }
-      }
-    }  
-
-    # false for NaN, while is.na is TRUE for NaN
-    is.NA <- function(object) { ifelse(is.na(object), ifelse(is.nan(object), FALSE, TRUE), FALSE) }
-
-    if (is.NA(ciMax) || is.NA(ciMin)) {
-      if (!is.na(ciLevel)) {
-        msg <- "A confidence level was provided without a confidence interval."
-        errors <- c(errors, msg)
-      }
-    } else {
-      #just to guarantee the below condition works
-      if (is.nan(ciMin)) ciMin <- -Inf
-      if (is.nan(ciMax)) ciMax <- Inf
-
-      if (!is.nan(value)) {
-        if (value < ciMin || value > ciMax) {
-          msg <- "Provided value is not within the specified confidence interval."
-          errors <- c(errors, msg)
-        }
-      }
-      
-      if (!is.na(ciLevel)) {
-        if (ciLevel < 0 || ciLevel > 1) {
-          msg <- "Provided confidence level is invalid."
-          errors <- c(errors, msg)
-        }
-      } else {
-        msg <- "A confidence interval was provided without a confidence level."
-        errors <- c(errors, msg)
-      }
-    }
-
-    
- 
-    return(if (length(errors) == 0) TRUE else errors)
-}
-
-#' Statistic
-#' 
-#' A class to specify a named statistic, its value, confidence interval and p-value.
-#' This is primarily to help maintain consistency in meeting the EDA API.
-#' 
-#' @slot name A string specifying what statistic was calculated
-#' @slot value A number
-#' @slot confidenceInterval An optional Range specifying minimum and maximum values for the confidence interval
-#' @slot confidenceLevel An optional decimal number indicating the degree of confidence represented by the confidence interval
-#' @slot pvalue An optional string representing the p-value associated with the statistic. The string should either be coercible to a number or be '<0.0001'.
-#' 
-#' @name Statistic-class
-#' @rdname Statistic-class
-#' @export 
-Statistic <- setClass("Statistic", representation(
-    name = 'character',
-    value = 'numeric',
-    confidenceInterval = 'Range',
-    confidenceLevel = 'numeric',
-    pvalue = 'character'
-), prototype = prototype(
-    confidenceLevel = NA_real_,
-    pvalue = NA_character_
-), validity = check_statistic)
-
-#' @export
-StatisticList <- setClass("StatisticList",
-  contains = "SimpleList",
-  prototype = prototype(elementType = "Statistic")
-)
diff --git a/R/class-VariableMetadata.R b/R/class-VariableMetadata.R
index 2db8f78..ac18e50 100644
--- a/R/class-VariableMetadata.R
+++ b/R/class-VariableMetadata.R
@@ -179,7 +179,7 @@ check_variable_metadata <- function(object) {
         errors <- c(errors, "Members must be non-empty for collection variables.")
       } else {
         memberEntityIds <- unlist(lapply(as.list(object@members), function(x) {return(x@entityId)}))
-        memberColNames <- unlist(lapply(as.list(object@members), function(x) {return(veupathUtils::getColName(x))}))
+        memberColNames <- unlist(lapply(as.list(object@members), function(x) {return(mbioUtils::getColName(x))}))
         
         # Require all members to have the same entity
         if (data.table::uniqueN(memberEntityIds) > 1) {
diff --git a/R/data.R b/R/data.R
deleted file mode 100644
index 129a79f..0000000
--- a/R/data.R
+++ /dev/null
@@ -1,61 +0,0 @@
-#' Some Real Life Popbio Megastudy Test Data
-#'
-#' It is the data used in the subset associated w this saved analysis:
-#' https://vectorbase.org/vectorbase/app/workspace/maps/A4nuqcm/import
-#' as of 9 Jan 2024.
-#' As of the time of writing the filters in this analysis are like this:
-#' "filters": [
-#'    {
-#'      "entityId": "EUPATH_0000605",
-#'      "variableId": "POPBIO_8000215",
-#'      "stringSet": [
-#'        "VBP0000844"
-#'      ],
-#'      "type": "stringSet"
-#'    },
-#'    {
-#'      "entityId": "GAZ_00000448",
-#'      "variableId": "EUPATH_0000542",
-#'      "stringSet": [
-#'        "Canyon Rim Church"
-#'      ],
-#'      "type": "stringSet"
-#'    },
-#'    {
-#'      "variableId": "EUPATH_0043256",
-#'      "entityId": "OBI_0000659",
-#'      "type": "dateRange",
-#'      "min": "2022-05-01T00:00:00Z",
-#'      "max": "2022-11-01T00:00:00Z"
-#'    },
-#'    {
-#'      "type": "numberRange",
-#'      "variableId": "OBI_0001620",
-#'      "entityId": "GAZ_00000448",
-#'      "min": 40.698307645373106,
-#'      "max": 40.71193901146775
-#'    },
-#'    {
-#'      "type": "longitudeRange",
-#'      "variableId": "OBI_0001621",
-#'      "entityId": "GAZ_00000448",
-#'      "left": -111.8212938308716,
-#'      "right": -111.79749727249147
-#'    }
-#'  ]
-#'
-#' @format ## `megastudyDataReal`
-#' A data frame with 14 rows and 9 columns:
-#' \describe{
-#'   \item{EUPATH_0000609.Sample_stable_id}{Stable ID for the `Sample` entity}
-#'   \item{OBI_0000659.ParentOfSample_stable_id}{Stable ID for the `Collections` entity}
-#'   \item{GAZ_00000448.GeographicLocation_stable_id}{Stable ID for the `Collection sites` entity}
-#'   \item{EUPATH_0000605.Study_stable_id}{Stable ID for the `Studies` entity}
-#'   \item{EUPATH_0000609.PATO_0000047}{Sample -> Sex}
-#'   \item{EUPATH_0000609.POPBIO_8000017}{Sample -> Unbiased Specimen Count}
-#'   \item{EUPATH_0000609.OBI_0001909}{Sample -> Species}
-#'   \item{EUPATH_0000609.EUPATH_0043227}{Sample -> Female Insect Feeding Status}
-#'   \item{EUPATH_0000609.UBERON_0000105}{Sample -> Life Cycle Stage}
-#' }
-#' @source <https://vectorbase.org/vectorbase/app/workspace/maps/A4nuqcm/import>
-"megastudyDataReal"
diff --git a/R/internal-utils.R b/R/internal-utils.R
index d093885..810ffb5 100644
--- a/R/internal-utils.R
+++ b/R/internal-utils.R
@@ -80,11 +80,11 @@ clean_names <- function(names, makeUnique = FALSE) {
 ## cleanColumnNames will clean up the column names to make them valid column names in R, and hopefully improve consistncy of labels as well
 #' @export
 getDataFromSource <- function(dataSource, keepIdsAndNumbersOnly = c(TRUE, FALSE), cleanColumnNames = c(FALSE, TRUE)) {
-    keepIdsAndNumbersOnly <- veupathUtils::matchArg(keepIdsAndNumbersOnly)
-    cleanColumnNames <- veupathUtils::matchArg(cleanColumnNames)
+    keepIdsAndNumbersOnly <- mbioUtils::matchArg(keepIdsAndNumbersOnly)
+    cleanColumnNames <- mbioUtils::matchArg(cleanColumnNames)
 
     if (inherits(dataSource, "character")) {
-        veupathUtils::logWithTime(sprintf("Attempting to read file: %s", dataSource), verbose = TRUE)
+        mbioUtils::logWithTime(sprintf("Attempting to read file: %s", dataSource), verbose = TRUE)
         dt <- data.table::fread(dataSource, na.strings=c(''))
     } else if (inherits(dataSource, "data.frame")) {
         dt <- data.table::as.data.table(dataSource)        
diff --git a/R/veupathUtils-package.R b/R/mbioUtils-package.R
similarity index 61%
rename from R/veupathUtils-package.R
rename to R/mbioUtils-package.R
index 018367d..369548d 100644
--- a/R/veupathUtils-package.R
+++ b/R/mbioUtils-package.R
@@ -1,8 +1,9 @@
 #' @keywords internal
+#' @importFrom methods new slotNames validObject
+#' @importFrom stats complete.cases median sd var
+#' @importFrom utils tail
 "_PACKAGE"
 
-#' @importFrom microbenchmark microbenchmark
-
 # The following block is used by usethis to automatically manage
 # roxygen namespace tags. Modify with care!
 ## usethis namespace: start
diff --git a/R/method-correlation.R b/R/method-correlation.R
index 5c19fc1..361d77e 100644
--- a/R/method-correlation.R
+++ b/R/method-correlation.R
@@ -31,7 +31,7 @@ function(
   predicateType = c('proportionNonZero', 'variance', 'sd'), 
   threshold = 0.5
 ) {
-  predicateType <- veupathUtils::matchArg(predicateType)
+  predicateType <- mbioUtils::matchArg(predicateType)
 
   if (predicateType == 'proportionNonZero') {
     if (threshold < 0 | threshold > 1) {
@@ -101,9 +101,9 @@ function(
   verbose = c(TRUE, FALSE)
 ) {
 
-  format <- veupathUtils::matchArg(format)
-  method <- veupathUtils::matchArg(method)
-  verbose <- veupathUtils::matchArg(verbose)
+  format <- mbioUtils::matchArg(format)
+  method <- mbioUtils::matchArg(method)
+  verbose <- mbioUtils::matchArg(verbose)
 
   # Check that the number of rows match.
   if (!identical(nrow(data1), nrow(data2))) {
@@ -111,17 +111,17 @@ function(
   }
 
   # Check that all values are numeric
-  if (!identical(veupathUtils::findNumericCols(data1), names(data1))) { 
+  if (!identical(mbioUtils::findNumericCols(data1), names(data1))) { 
     warning("All columns in data1 are not numeric. Only numeric columns will be used.")
-    keepCols <- veupathUtils::findNumericCols(data1)
+    keepCols <- mbioUtils::findNumericCols(data1)
     if (length(keepCols) == 0) {
       stop("No numeric columns found in data1.")
     }
     data1 <- data1[, ..keepCols]
   }
-  if (!identical(veupathUtils::findNumericCols(data2), names(data2))) { 
+  if (!identical(mbioUtils::findNumericCols(data2), names(data2))) { 
     warning("All columns in data2 are not numeric. Only numeric columns will be used.")
-    keepCols <- veupathUtils::findNumericCols(data2)
+    keepCols <- mbioUtils::findNumericCols(data2)
     if (length(keepCols) == 0) {
       stop("No numeric columns found in data2.")
     }
@@ -146,7 +146,7 @@ function(
     keep.rownames = T
   )
 
-  veupathUtils::logWithTime(paste0('Completed correlation with method=', method,'. Formatting results.'), verbose)
+  mbioUtils::logWithTime(paste0('Completed correlation with method=', method,'. Formatting results.'), verbose)
 
 
   ## Format results
@@ -184,14 +184,14 @@ function(
   verbose = c(TRUE, FALSE)
 ) {
 
-  format <- veupathUtils::matchArg(format)
-  method <- veupathUtils::matchArg(method)
-  verbose <- veupathUtils::matchArg(verbose)
+  format <- mbioUtils::matchArg(format)
+  method <- mbioUtils::matchArg(method)
+  verbose <- mbioUtils::matchArg(verbose)
 
   # Check that all values are numeric
-  if (!identical(veupathUtils::findNumericCols(data1), names(data1))) { 
+  if (!identical(mbioUtils::findNumericCols(data1), names(data1))) { 
     warning("All columns in data1 are not numeric. Only numeric columns will be used.")
-    keepCols <- veupathUtils::findNumericCols(data1)
+    keepCols <- mbioUtils::findNumericCols(data1)
     if (length(keepCols) == 0) {
       stop("No numeric columns found in data1.")
     }
@@ -232,7 +232,7 @@ function(
   
   }
 
-  veupathUtils::logWithTime(paste0('Completed correlation with method=', method,'. Formatting results.'), verbose)
+  mbioUtils::logWithTime(paste0('Completed correlation with method=', method,'. Formatting results.'), verbose)
 
   ## Format results
   rowAndColNames <- expand.grid(rownames(corrResult), colnames(corrResult))
@@ -270,8 +270,8 @@ buildCorrelationComputeResult <- function(
   method = c('spearman','pearson','sparcc'), 
   verbose = c(TRUE, FALSE)
 ) {
-  method <- veupathUtils::matchArg(method)
-  verbose <- veupathUtils::matchArg(verbose)
+  method <- mbioUtils::matchArg(method)
+  verbose <- mbioUtils::matchArg(verbose)
 
   # both AbundanceData and SampleMetadata have these slots
   recordIdColumn <- ifelse('recordIdColumn' %in% slotNames(data1), data1@recordIdColumn, NA_character_)
@@ -293,7 +293,7 @@ buildCorrelationComputeResult <- function(
   result@parameters <- paste0('method = ', method)
 
   validObject(result)
-  veupathUtils::logWithTime(
+  mbioUtils::logWithTime(
     paste(
       'Correlation computation completed with parameters recordIdColumn=', recordIdColumn, 
       ', method = ', method
@@ -339,9 +339,9 @@ function(
   verbose = c(TRUE, FALSE)
 ) {
 
-  format <- veupathUtils::matchArg(format)
-  method <- veupathUtils::matchArg(method)
-  verbose <- veupathUtils::matchArg(verbose)
+  format <- mbioUtils::matchArg(format)
+  method <- mbioUtils::matchArg(method)
+  verbose <- mbioUtils::matchArg(verbose)
   
   correlation(data, NULL, method = method, format = format, verbose = verbose)
 })
@@ -361,10 +361,10 @@ function(
   metadataIsFirst = c(FALSE,TRUE)
 ) {
   
-  format <- veupathUtils::matchArg(format)
-  method <- veupathUtils::matchArg(method)
-  verbose <- veupathUtils::matchArg(verbose)
-  metadataIsFirst <- veupathUtils::matchArg(metadataIsFirst)
+  format <- mbioUtils::matchArg(format)
+  method <- mbioUtils::matchArg(method)
+  verbose <- mbioUtils::matchArg(verbose)
+  metadataIsFirst <- mbioUtils::matchArg(metadataIsFirst)
   
   #prefilters applied
   data1 <- pruneFeatures(data1, predicateFactory('proportionNonZero', proportionNonZeroThreshold), verbose)
@@ -390,7 +390,7 @@ function(
     )
   }
 
-  veupathUtils::logWithTime(
+  mbioUtils::logWithTime(
     paste(
       "Received df table with", 
       nrow(values), "samples and", 
@@ -425,9 +425,9 @@ function(
   stdDevThreshold = 0
 ) {
   
-  format <- veupathUtils::matchArg(format)
-  method <- veupathUtils::matchArg(method)
-  verbose <- veupathUtils::matchArg(verbose)
+  format <- mbioUtils::matchArg(format)
+  method <- mbioUtils::matchArg(method)
+  verbose <- mbioUtils::matchArg(verbose)
 
   #prefilters applied
   data <- pruneFeatures(data, predicateFactory('proportionNonZero', proportionNonZeroThreshold), verbose)
@@ -437,7 +437,7 @@ function(
   values <- getCollectionData(data, variableNames = NULL, ignoreImputeZero = FALSE, includeIds = FALSE, verbose = verbose)
   corrResult <- correlation(values, NULL, method = method, format = 'data.table', verbose = verbose)
 
-  veupathUtils::logWithTime(
+  mbioUtils::logWithTime(
     paste(
       "Received df table with", 
       nrow(values), "samples and", 
@@ -464,9 +464,9 @@ function(
   format = c('ComputeResult', 'data.table'), 
   verbose = c(TRUE, FALSE)
 ) {
-  format <- veupathUtils::matchArg(format)
-  method <- veupathUtils::matchArg(method)
-  verbose <- veupathUtils::matchArg(verbose)
+  format <- mbioUtils::matchArg(format)
+  method <- mbioUtils::matchArg(method)
+  verbose <- mbioUtils::matchArg(verbose)
   
   corrResult <- correlation(
     getSampleMetadata(data, TRUE, FALSE), 
@@ -476,7 +476,7 @@ function(
     verbose = verbose
   )
 
-  veupathUtils::logWithTime(
+  mbioUtils::logWithTime(
     paste(
       "Received df table with", 
       nrow(data), "samples and", 
@@ -508,9 +508,9 @@ function(
   stdDevThreshold = 0
 ) {
   
-  format <- veupathUtils::matchArg(format)
-  method <- veupathUtils::matchArg(method)
-  verbose <- veupathUtils::matchArg(verbose)
+  format <- mbioUtils::matchArg(format)
+  method <- mbioUtils::matchArg(method)
+  verbose <- mbioUtils::matchArg(verbose)
   
   #prefilters applied
   data1 <- pruneFeatures(data1, predicateFactory('proportionNonZero', proportionNonZeroThreshold), verbose)
@@ -523,7 +523,7 @@ function(
   values1 <- getCollectionData(data1, variableNames = NULL, ignoreImputeZero = FALSE, includeIds = TRUE, verbose = verbose)
   values2 <- getCollectionData(data2, variableNames = NULL, ignoreImputeZero = FALSE, includeIds = TRUE, verbose = verbose)
 
-  veupathUtils::logWithTime(
+  mbioUtils::logWithTime(
     paste(
       "Received first df table with", 
       nrow(values1), "samples and", 
@@ -531,7 +531,7 @@ function(
     ), 
     verbose
   )
-  veupathUtils::logWithTime(
+  mbioUtils::logWithTime(
     paste(
       "Received second df table with", 
       nrow(values2), "samples and", 
@@ -557,7 +557,7 @@ function(
   if (length(commonSamples) == 0) {
     stop('No samples in common between data1 and data2')
   } else {
-    veupathUtils::logWithTime(
+    mbioUtils::logWithTime(
       paste(
         "Found", length(commonSamples), "samples in common between data1 and data2. Only these samples will be used."
       ), 
@@ -572,7 +572,7 @@ function(
   values1 <- values1[, -..allIdColumns]
   values2 <- values2[, -..allIdColumns]  
 
-  corrResult <- veupathUtils::correlation(
+  corrResult <- mbioUtils::correlation(
     values1, 
     values2, 
     method = method, 
@@ -583,7 +583,7 @@ function(
   if (format == 'data.table') {
     return(corrResult)
   } else {
-    result <- veupathUtils::buildCorrelationComputeResult(corrResult, data1, data2, method, verbose)
+    result <- mbioUtils::buildCorrelationComputeResult(corrResult, data1, data2, method, verbose)
     result@computationDetails <- 'correlation'
     return(result)
   } 
diff --git a/R/methods-CollectionWithMetadata.R b/R/methods-CollectionWithMetadata.R
index 32b5dd9..3e6f1eb 100644
--- a/R/methods-CollectionWithMetadata.R
+++ b/R/methods-CollectionWithMetadata.R
@@ -86,7 +86,7 @@ setMethod("getSampleMetadataIdColumns", "CollectionWithMetadata", function(objec
 #' @param includeIds boolean indicating whether we should include recordIdColumn and ancestorIdColumns
 #' @param metadataVariables The metadata variables to include in the sample metadata. If NULL, all metadata variables will be included.
 #' @return data.table of sample metadata
-#' @import veupathUtils
+#' @import mbioUtils
 #' @import data.table
 #' @rdname getSampleMetadata
 #' @export
@@ -98,8 +98,8 @@ setGeneric("getSampleMetadata",
 #' @rdname getSampleMetadata
 #' @aliases getSampleMetadata,CollectionWithMetadata-method
 setMethod("getSampleMetadata", signature("CollectionWithMetadata"), function(object, asCopy = c(TRUE, FALSE), includeIds = c(TRUE, FALSE), metadataVariables = NULL) {
-    asCopy <- veupathUtils::matchArg(asCopy)
-    includeIds <- veupathUtils::matchArg(includeIds)
+    asCopy <- mbioUtils::matchArg(asCopy)
+    includeIds <- mbioUtils::matchArg(includeIds)
 
     dt <- object@sampleMetadata@data
     allIdColumns <- getSampleMetadataIdColumns(object)
@@ -152,7 +152,7 @@ setGeneric("removeIncompleteRecords",
 #' @rdname removeIncompleteRecords
 #' @aliases removeIncompleteRecords,CollectionWithMetadata-method
 setMethod("removeIncompleteRecords", signature("CollectionWithMetadata"), function(object, colName = character(), verbose = c(TRUE, FALSE)) {
-    verbose <- veupathUtils::matchArg(verbose)
+    verbose <- mbioUtils::matchArg(verbose)
     df <- getCollectionData(object, verbose = verbose)
     sampleMetadata <- getSampleMetadata(object)
     # df may have had rows removed due to getCollectionData behavior. Subset sampleMetadata to match
@@ -160,7 +160,7 @@ setMethod("removeIncompleteRecords", signature("CollectionWithMetadata"), functi
 
     # Remove Records with NA from data and metadata
     if (any(is.na(sampleMetadata[[colName]]))) {
-        veupathUtils::logWithTime("Found NAs in specified variable. Removing these records.", verbose)
+        mbioUtils::logWithTime("Found NAs in specified variable. Removing these records.", verbose)
         recordsWithData <- which(!is.na(sampleMetadata[[colName]]))
         # Keep records with data. Recall the CollectionWithMetadata object requires records to be in the same order
         # in both the data and metadata
diff --git a/R/methods-Collections.R b/R/methods-Collections.R
index 855b2df..7f51dfc 100644
--- a/R/methods-Collections.R
+++ b/R/methods-Collections.R
@@ -85,9 +85,9 @@ function(
     includeIds = c(TRUE, FALSE), 
     verbose = c(TRUE, FALSE)
 ) {
-    ignoreImputeZero <- veupathUtils::matchArg(ignoreImputeZero)
-    includeIds <- veupathUtils::matchArg(includeIds)
-    verbose <- veupathUtils::matchArg(verbose)
+    ignoreImputeZero <- mbioUtils::matchArg(ignoreImputeZero)
+    includeIds <- mbioUtils::matchArg(includeIds)
+    verbose <- mbioUtils::matchArg(verbose)
 
     allIdColumns <- getIdColumns(object)
     if (is.null(variableNames)) {
@@ -108,13 +108,21 @@ function(
         dt <- dt[rowSums(isNAorZero(dt.noIds)) != ncol(dt.noIds),]
         numRecordsRemoved <- nrow(dt.noIds) - nrow(dt)
         if (numRecordsRemoved > 0) {
-            veupathUtils::logWithTime(paste0("Removed ", numRecordsRemoved, " records with no data."), verbose)
+            mbioUtils::logWithTime(paste0("Removed ", numRecordsRemoved, " records with no data."), verbose)
         }
     }
 
     # Replace NA values with 0
     if (!ignoreImputeZero && object@imputeZero) {
-        veupathUtils::setNaToZero(dt)
+        # Find numeric columns and replace NAs with 0
+        # NOTE: This is a simplified inline version of veupathUtils::setNaToZero()
+        # which also supported column-specific targeting and validation.
+        # If edge cases arise, see veupathUtils/R/utils-numeric.R for the full
+        # implementation with setNaToZero(), findNumericCols(), and validateNumericCols().
+        numericCols <- names(dt)[sapply(dt, is.numeric)]
+        if (length(numericCols) > 0) {
+            data.table::setnafill(dt, fill = 0, cols = numericCols)
+        }
     }
 
     if (!includeIds) {
diff --git a/R/methods-ComputeResult.R b/R/methods-ComputeResult.R
index 4f0c0f8..ca474bf 100644
--- a/R/methods-ComputeResult.R
+++ b/R/methods-ComputeResult.R
@@ -16,9 +16,9 @@ setGeneric("writeMeta",
 
 #'@export 
 setMethod("writeMeta", signature("ComputeResult"), function(object, pattern = NULL, verbose = c(TRUE, FALSE)) {
-  verbose <- veupathUtils::matchArg(verbose)
+  verbose <- mbioUtils::matchArg(verbose)
 
-  outJson <- veupathUtils::toJSON(object@computedVariableMetadata)
+  outJson <- mbioUtils::toJSON(object@computedVariableMetadata)
 
   if (is.null(pattern)) { 
     pattern <- object@name
@@ -30,7 +30,7 @@ setMethod("writeMeta", signature("ComputeResult"), function(object, pattern = NU
 
   outFileName <- basename(tempfile(pattern = pattern, tmpdir = tempdir(), fileext = ".json"))
   write(outJson, outFileName)
-  veupathUtils::logWithTime(paste('New output file written:', outFileName), verbose)
+  mbioUtils::logWithTime(paste('New output file written:', outFileName), verbose)
 
   return(outFileName)
 })
@@ -52,7 +52,7 @@ setGeneric("writeData",
 
 #'@export 
 setMethod("writeData", signature("ComputeResult"), function(object, pattern = NULL, verbose = c(TRUE, FALSE)) {
-  verbose <- veupathUtils::matchArg(verbose)
+  verbose <- mbioUtils::matchArg(verbose)
 
   if (is.null(pattern)) { 
     pattern <- object@name
@@ -64,7 +64,7 @@ setMethod("writeData", signature("ComputeResult"), function(object, pattern = NU
 
   outFileName <- basename(tempfile(pattern = pattern, tmpdir = tempdir(), fileext = ".tab"))
   data.table::fwrite(object@data, outFileName, sep = '\t', quote = FALSE)
-  veupathUtils::logWithTime(paste('New output file written:', outFileName), verbose)
+  mbioUtils::logWithTime(paste('New output file written:', outFileName), verbose)
 
   return(outFileName)
 })
@@ -90,7 +90,7 @@ setGeneric("writeStatistics",
 
 #'@export 
 setMethod("writeStatistics", signature("ComputeResult"), function(object, pattern = NULL, verbose = c(TRUE, FALSE)) {
-  verbose <- veupathUtils::matchArg(verbose)
+  verbose <- mbioUtils::matchArg(verbose)
 
   # Convert all to character but maintain structure
   if (inherits(object@statistics, 'data.frame')) {
@@ -111,7 +111,7 @@ setMethod("writeStatistics", signature("ComputeResult"), function(object, patter
 
   outFileName <- basename(tempfile(pattern = pattern, tmpdir = tempdir(), fileext = ".json"))
   write(outJson, outFileName)
-  veupathUtils::logWithTime(paste('New output file written:', outFileName), verbose)
+  mbioUtils::logWithTime(paste('New output file written:', outFileName), verbose)
 
   return(outFileName)
 })
diff --git a/R/methods-Megastudy.R b/R/methods-Megastudy.R
deleted file mode 100644
index 06e80c0..0000000
--- a/R/methods-Megastudy.R
+++ /dev/null
@@ -1,313 +0,0 @@
-#' get a VariableSpec
-#' 
-#' This function returns a string representation of a VariableSpec. By
-#' default it assumes the VariableSpec is available in a slot called
-#' `variableSpec`.
-#' 
-#' @param object An object containing a veupathUtils::VariableSpec
-#' @return character
-#' @export
-setGeneric("getVariableSpec", 
-  function(object, ...) standardGeneric("getVariableSpec"),
-  signature = "object"
-)
-
-#' @export 
-setMethod('getVariableSpec', signature('StudySpecificVocabulariesByVariable'), function(object) {
-  return(object@variableSpec)
-})
-
-#' @export
-setMethod('getVariableSpec', signature('ANY'), function(object) {
-  return(object@variableSpec)
-})
-
-#' @export
-setMethod('getVariableSpec', signature('VariableMetadata'), function(object, getCollectionMemberVarSpecs = c("Dynamic", "Never", "Always")) {
-  getCollectionMemberVarSpecs <- veupathUtils::matchArg(getCollectionMemberVarSpecs)
-  varSpecs <- list(object@variableSpec)
-
-  #if the variable is a collection, then we want to return the member variable specs
-  if (object@isCollection && getCollectionMemberVarSpecs %in% c("Dynamic", "Always")) {
-    varSpecs <- as.list(object@members)
-  } else if (!object@isCollection && getCollectionMemberVarSpecs == "Always") {
-    varSpecs <- NULL
-  }
-
-  return(varSpecs)
-})
-
-#' @export 
-setMethod('getVariableSpec', signature('VariableMetadataList'), function(object, getCollectionMemberVarSpecs = c("Dynamic", "Never", "Always")) {
-  getCollectionMemberVarSpecs <- veupathUtils::matchArg(getCollectionMemberVarSpecs)
-  
-  varSpecs <- unlist(lapply(as.list(object), veupathUtils::getVariableSpec, getCollectionMemberVarSpecs))
-  if (all(unlist(lapply(varSpecs, is.null)))) {
-    return(NULL)
-  }
-
-  return(varSpecs)
-})
-
-#' StuydIdColName as String
-#' 
-#' This function returns the studyIdColName from an StudySpecificVocabulariesByVariable
-#' 
-#' @param object veupathUtils::StudySpecificVocabulariesByVariable
-#' @return character
-#' @export
-setGeneric("getStudyIdColumnName", 
-  function(object) standardGeneric("getStudyIdColumnName"),
-  signature = "object"
-)
-
-#' @export
-setMethod('getStudyIdColumnName', signature('StudySpecificVocabulariesByVariable'), function(object) {
-  return(object@studyIdColumnName)
-})
-
-#' @export 
-setMethod('getStudyIdColumnName', signature('StudySpecificVocabulariesByVariableList'), function(object) {
-  #works bc of class validation that theyre all the same
-  return(veupathUtils::getStudyIdColumnName(object[[1]]))
-})
-
-#' VarSpecColName as String
-#' 
-#' This function returns the variableSpec from an StudySpecificVocabulariesByVariable
-#' 
-#' @param object veupathUtils::StudySpecificVocabulariesByVariable
-#' @return character
-#' @export
-setGeneric("getVariableSpecColumnName", 
-  function(object) standardGeneric("getVariableSpecColumnName"),
-  signature = "object"
-)
-
-#' @export
-setMethod('getVariableSpecColumnName', signature('StudySpecificVocabulariesByVariable'), function(object) {
-  #since we validate theyre all the same, can just take the first
-  return(veupathUtils::getColName(object@variableSpec))
-})
-
-#' @export 
-setMethod('getVariableSpecColumnName', signature('StudySpecificVocabulariesByVariableList'), function(object) {
-  return(unlist(lapply(as.list(object), veupathUtils::getVariableSpecColumnName)))
-})
-
-#' @export
-setGeneric("getEntityId", 
-  function(object) standardGeneric("getEntityId"),
-  signature = "object"
-)
-
-#' @export
-setMethod('getEntityId', signature('VariableSpec'), function(object) {
-  return(object@entityId)
-})
-
-#' @export
-setMethod('getEntityId', signature('StudySpecificVocabulariesByVariable'), function(object) {
-  return(veupathUtils::getEntityId(object@variableSpec))
-})
-
-#should this be an s4 method?
-findEntityIdColumnNameForVariableSpec <- function(varSpec, entityIdColumns) {
-  if (!inherits(varSpec, 'VariableSpec')) stop("The first argument must be of the S4 class `VariableSpec`.")
-  
-  return(entityIdColumns[grepl(varSpec@entityId, entityIdColumns)])
-}
-
-findStudyVocabularyByVariableSpec <- function(vocabs, variables, variableSpec) {
-  if (!inherits(vocabs, 'StudySpecificVocabulariesByVariableList')) stop("The first argument must be of the S4 class `StudySpecificVocabulariesByVariableList`.")
-  if (!inherits(variables, 'VariableMetadataList')) stop("The second argument must be of the S4 class `VariableMetadataList`.")
-  if (!inherits(variableSpec, 'VariableSpec')) stop("The third argument must be of the S4 class `VariableSpec`.")
-
-  vocabVariableSpecs <- lapply(as.list(vocabs), veupathUtils::getVariableSpec)
-  vocabVariableMetadata <- veupathUtils::findVariableMetadataFromVariableSpec(variables, veupathUtils::VariableSpecList(S4Vectors::SimpleList(vocabVariableSpecs)))
-  vocabVariableSpecsAdjustedForVariableCollectionMembers <- veupathUtils::getVariableSpec(vocabVariableMetadata, "Always")
-  
-  # if we have found variable collection members in the VariableMetadata, need to check if the passed varspec was a member
-  # look through the list that includes the members, and if we match one, get the varspec of the parent/ collection
-  # use the varspec of the parent/ collection to get the VariableMetadata associated w the entire collection
-  # remember, individual members dont have their own VariableMetadata
-  if (length(vocabVariableSpecsAdjustedForVariableCollectionMembers) > 0) {
-    index <- which(purrr::map(vocabVariableSpecsAdjustedForVariableCollectionMembers, function(x) {veupathUtils::getColName(x)}) == veupathUtils::getColName(variableSpec))
-    variableCollectionSpecs <- vocabVariableSpecsAdjustedForVariableCollectionMembers[[index]]
-    index <- which(purrr::map(vocabVariableMetadata, function(x) {veupathUtils::getColName(variableCollectionSpecs) %in% unlist(veupathUtils::getColName(x@members))}) == TRUE)
-  } else {
-    index <- which(purrr::map(vocabVariableSpecs, function(x) {veupathUtils::getColName(x)}) == veupathUtils::getColName(variableSpec))
-  }
- 
-  return(vocabs[[index]])  
-}
-
-
-findVariableSpecsFromStudyVocabulary <- function(vocabs, variables, getCollectionMemberVarSpecs = c("Dynamic", "Never", "Always")) {
-  if (!inherits(vocabs, 'StudySpecificVocabulariesByVariableList')) stop("The first argument must be of the S4 class `StudySpecificVocabulariesByVariableList`.")
-  if (!inherits(variables, 'VariableMetadataList')) stop("The second argument must be of the S4 class `VariableMetadataList`.")
-  getCollectionMemberVarSpecs <- veupathUtils::matchArg(getCollectionMemberVarSpecs)
-
-  varSpecsWithVocabs <- VariableSpecList(S4Vectors::SimpleList(lapply(as.list(vocabs), getVariableSpec)))
-
-  if (getCollectionMemberVarSpecs != "Never") {
-    varMetadataWithVocabs <- findVariableMetadataFromVariableSpec(variables, varSpecsWithVocabs)
-    varSpecsWithVocabs <- getVariableSpec(varMetadataWithVocabs, getCollectionMemberVarSpecs)
-    if (is.null(varSpecsWithVocabs)) {
-      return(NULL)
-    }
-    varSpecsWithVocabs <- VariableSpecList(S4Vectors::SimpleList(varSpecsWithVocabs))
-  }
-
-  return(varSpecsWithVocabs)
-}
-
-getVariableColumnNames <- function(variableMetadata) {
-  if (!inherits(variableMetadata, 'VariableMetadata')) stop("The specified object must be of the S4 class `VariableMetadata`.")
-
-  colNames <- veupathUtils::getColName(variableMetadata@variableSpec)
-
-  if (variableMetadata@isCollection) {
-    colNames <- unlist(lapply(as.list(variableMetadata@members), veupathUtils::getColName))
-  }
-
-  return(colNames)
-}
-
-#' Impute Zeroes (on tall data)
-#' 
-#' This function returns a data.table which has explicit zero values
-#' for all expected categories of some variable of interest in a megastudy.
-#' 
-#' @param object veupathUtils::Megastudy
-#' @return data.table
-#' @include class-VariableMetadata.R
-#' @export
-setGeneric("getDTWithImputedZeroes", 
-  function(object, variables, verbose = c(TRUE, FALSE)) standardGeneric("getDTWithImputedZeroes"),
-  signature = c("object", "variables")
-)
-
-#' @importFrom digest digest
-#' @export
-setMethod('getDTWithImputedZeroes', signature = c('Megastudy', 'VariableMetadataList'), function (object, variables, verbose = c(TRUE, FALSE)) {
-  verbose <- veupathUtils::matchArg(verbose)
-  veupathUtils::logWithTime("Start imputing zeroes...", verbose)
-  
-  weightingVariablesMetadata <- findWeightingVariablesMetadata(variables)
-  if (is.null(weightingVariablesMetadata))  {
-    veupathUtils::logWithTime("No weighting variables present in the plot. No imputation will be done.", verbose)
-    return(object@data)
-  }
-
-  .dt <- object@data
-  veupathUtils::logWithTime(paste0("Imputing zeroes for data.table with ", ncol(.dt), " columns and ", nrow(.dt), " rows"), verbose)
-  allEntityIdColumns <- object@ancestorIdColumns
-  vocabs <- object@studySpecificVocabularies
-  collectionsDT <- object@collectionsDT
-
-  # it seems a lot of this validation could belong to some custom obj w both a megastudy and vm slot.. but what is that? a MegastudyPlot?
-  # plus going that route means using this class in plot.data means an api change for plot.data
-  # that api change might be worth making in any case, but not doing it now
-  ## TODO validate that any collections variables are present in collectionsDT
-  variableMetadataNeedingStudyVocabularies <- findStudyDependentVocabularyVariableMetadata(variables)
-  variableSpecsWithStudyVocabs <- findVariableSpecsFromStudyVocabulary(vocabs, variables, "Never")
-  variableCollectionSpecsWithStudyVocabs <- findVariableSpecsFromStudyVocabulary(vocabs, variables, "Always")
-  variableMetadataForStudyVocabVariables <- findVariableMetadataFromVariableSpec(variables, variableSpecsWithStudyVocabs)
-  if (length(variableMetadataForStudyVocabVariables) < length(variableMetadataNeedingStudyVocabularies)) {
-    stop("Some provided variables require study vocabularies but dont have one.")
-  }
-  if (length(weightingVariablesMetadata) > 1) {
-    stop("Megastudy class does not yet support imputing zeroes when there is more than one weighting variable present.")
-  }
-  weightingVarSpecsForStudyVocabVariables <- findWeightingVariableSpecs(variableMetadataForStudyVocabVariables)
-  if (length(vocabs) > 1) {    
-    weightingVarColName <- unique(unlist(lapply(weightingVarSpecsForStudyVocabVariables, veupathUtils::getColName)))
-    if (length(weightingVarColName) > 1) {
-      stop("All study vocabularies must belong to variables on the same entity using the same weighting variable.")
-    }
-  } else {
-    weightingVarColName <- veupathUtils::getColName(findVariableMetadataFromVariableSpec(variables, veupathUtils::getVariableSpec(vocabs[[1]]))[[1]]@weightingVariableSpec)
-  }
-
-  veupathUtils::logWithTime("Finding variables with study vocabularies...", verbose)
-  variableSpecsToImputeZeroesFor <- veupathUtils::getVariableSpec(variableMetadataForStudyVocabVariables)
-  studyIdColName <- getStudyIdColumnName(vocabs)
-  varSpecColNames <- unlist(lapply(variableSpecsToImputeZeroesFor, veupathUtils::getColName))
-  # this works bc we validate all vocabs must be on the same entity
-  varSpecEntityIdColName <- findEntityIdColumnNameForVariableSpec(veupathUtils::getVariableSpec(vocabs[[1]]), allEntityIdColumns)
-  variablesFromEntityOfInterest <- findVariableMetadataFromEntityId(variables, veupathUtils::getVariableSpec(vocabs[[1]])@entityId)
-  variableSpecsFromEntityOfInterest <- veupathUtils::getVariableSpec(variablesFromEntityOfInterest)
-  if (any(unlist(getHasStudyDependentVocabulary(variablesFromEntityOfInterest)) & 
-          unlist(lapply(variableSpecsFromEntityOfInterest, identical, weightingVarSpecsForStudyVocabVariables[[1]])))) {
-    stop("Not all variables on the entity associated with the present study vocabulary have study vocabularies.")
-  }
-  veupathUtils::logWithTime("Imputing zeroes request validated.", verbose)
-
-  # !!!! this assumes entity ids are passed in order, from a single branch
-  # alternative would i guess be to make this class aware of the entity diagram
-  upstreamEntityIdColNames <- allEntityIdColumns[1:(which(allEntityIdColumns %in% varSpecEntityIdColName)-1)]
-  if (!all(allEntityIdColumns[!allEntityIdColumns %in% varSpecEntityIdColName] %in% upstreamEntityIdColNames)) {
-    # if we have downstream entities, it doesnt make sense to do all this work. plot.data will just remove the imputed values.
-    # if/when the map supports missingness and NA values on downstream entities start to matter, we can revisit.
-    veupathUtils::logWithTime("Downstream entities present. No imputation will be done (for now... mwahahaha).", verbose)
-    return(.dt)
-  }
-  studyEntityIdColName <- upstreamEntityIdColNames[1] # still working off the assumption theyre ordered
-
-  # variables that are from the upstream entities need to be in collectionsDT
-  # otherwise we erroneously try to impute values for those variables too, rather than only the weighting variable
-  upstreamEntities <- veupathUtils::strSplit(upstreamEntityIdColNames, ".", 2, 1)
-  if (!!length(collectionsDT)) {
-    upstreamEntityVariableColNames <- findColNamesByPredicate(variables, function(x) { x@variableSpec@entityId %in% upstreamEntities })
-    if (!all(upstreamEntityVariableColNames %in% names(collectionsDT))) {
-      stop("All variables from the upstream entities must be in collectionsDT.")
-    }
-  }
-
-  # for upstream entities data
-  upstreamEntityVariables.dt <- .dt[, -c(weightingVarColName, varSpecColNames), with=FALSE]
-  upstreamEntityVariables.dt[[varSpecEntityIdColName]] <- NULL
-  upstreamEntityVariables.dt <- unique(upstreamEntityVariables.dt)
-  veupathUtils::logWithTime(paste("Found", nrow(upstreamEntityVariables.dt), "unique existing upstream variable value combinations."), verbose)
-  if (!!length(collectionsDT)) {
-    upstreamEntityVariables.dt <- collectionsDT
-  }
-  entityIds.dt <- unique(.dt[, c(upstreamEntityIdColNames, varSpecEntityIdColName), with=FALSE])
-
-  # make all possible variable value combinations table
-  vocabDTs <- lapply(vocabs, function(x) {x@studyVocab})
-  if (!!length(collectionsDT)) {
-    vocabDTs <- lapply(vocabDTs, function(x) { merge(x, collectionsDT[, upstreamEntityIdColNames], by=studyEntityIdColName, all=TRUE, allow.cartesian=TRUE) })
-  }
-  mergeBy <- studyEntityIdColName
-  if (!!length(collectionsDT)) mergeBy <- upstreamEntityIdColNames
-  allCombinations.dt <- purrr::reduce(vocabDTs, merge, by = mergeBy, allow.cartesian=TRUE, all=TRUE)
-
-  # find which ones we need to add
-  presentCombinations.dt <- unique(.dt[, c(upstreamEntityIdColNames, varSpecColNames), with=FALSE])
-  # need upstream entity ids for all combinations in order to properly find and merge missing values
-  allCombinations.dt <- merge(allCombinations.dt, upstreamEntityVariables.dt, by = mergeBy, all = TRUE, allow.cartesian=TRUE)
-  # NOTE: we're assuming if a value was explicitly filtered against that its not in the vocab
-  addCombinations.dt <- allCombinations.dt[!presentCombinations.dt, on=c(upstreamEntityIdColNames, varSpecColNames)]
-
-  if (nrow(addCombinations.dt) == 0) {
-    veupathUtils::logWithTime("No new combinations to add. Returning existing table.", verbose)
-    return(.dt)
-  } else {
-    veupathUtils::logWithTime(paste("Adding", nrow(addCombinations.dt), "new combinations."), verbose)
-  }
-
-  # go ahead and add them, first filling in values for all columns
-  addCombinations.dt[[weightingVarColName]] <- 0  
-  addCombinations.dt[[varSpecEntityIdColName]] <- stringi::stri_rand_strings(nrow(addCombinations.dt), 10)
-  # bind them to the existing rows
-  upstreamVariablesInCollectionsDT <- names(collectionsDT)[!names(collectionsDT) %in% upstreamEntityIdColNames]
-  if (!!length(collectionsDT) & !all(upstreamVariablesInCollectionsDT %in% names(.dt))) {
-    .dt <- merge(.dt, upstreamEntityVariables.dt)
-  }
-  .dt <- data.table::rbindlist(list(.dt, addCombinations.dt), use.names=TRUE)
-  veupathUtils::logWithTime("Added imputed values to existing table. Finished imputing zeroes.", verbose)
-
-  return(.dt)
-})
diff --git a/R/methods-SampleMetadata.R b/R/methods-SampleMetadata.R
index eb81fb5..f76ddf3 100644
--- a/R/methods-SampleMetadata.R
+++ b/R/methods-SampleMetadata.R
@@ -1,8 +1,8 @@
 #' @rdname getSampleMetadata
 #' @aliases getSampleMetadata,SampleMetadata-method 
 setMethod("getSampleMetadata", signature("SampleMetadata"), function(object, asCopy = c(TRUE, FALSE), includeIds = c(TRUE, FALSE)) {
-  asCopy <- veupathUtils::matchArg(asCopy)
-  includeIds <- veupathUtils::matchArg(includeIds)
+  asCopy <- mbioUtils::matchArg(asCopy)
+  includeIds <- mbioUtils::matchArg(includeIds)
   dt <- object@data
 
   # Check that incoming dt meets requirements
diff --git a/R/methods-Statistic.R b/R/methods-Statistic.R
deleted file mode 100644
index 0773843..0000000
--- a/R/methods-Statistic.R
+++ /dev/null
@@ -1,30 +0,0 @@
-
-
-#' Convert StatisicList to data.table
-#' 
-#' This function returns a data.table representation of a StatisticList.
-#' The content of the slots are stored as JSON. Each entry in the list becomes a named column.
-#' @param object A StatisticList object
-#' @return data.table
-#' @export
-setGeneric("getDataTable", 
-  function(object) standardGeneric("getDataTable"),
-  signature = "object"
-)
-
-#' @export
-setMethod("getDataTable", signature("StatisticList"), function(object) {
-    colNames <- unlist(lapply(as.list(object), function(x) {x@name}))
-    dt <- data.table::as.data.table(as.list(object))
-    data.table::setnames(dt, colNames)
-
-    return(dt)
-})
-
-#' @export
-setMethod("getDataTable", signature("Statistic"), function(object) {
-    colName <- object@name
-    dt <- data.table::as.data.table(colName = object)
-
-    return(dt)
-})
\ No newline at end of file
diff --git a/R/methods-VariableMetadata.R b/R/methods-VariableMetadata.R
index e3da533..3d31335 100644
--- a/R/methods-VariableMetadata.R
+++ b/R/methods-VariableMetadata.R
@@ -14,18 +14,18 @@ setGeneric("merge",
 
 #'@export 
 setMethod("merge", signature("VariableMetadataList", "VariableMetadataList"), function(x,y) {
-  veupathUtils::VariableMetadataList(S4Vectors::SimpleList(c(as.list(x), as.list(y))))
+  mbioUtils::VariableMetadataList(S4Vectors::SimpleList(c(as.list(x), as.list(y))))
 })
 
 #'@export 
 setMethod("merge", signature("VariableSpecList", "VariableSpecList"), function(x,y) {
-  veupathUtils::VariableSpecList(S4Vectors::SimpleList(c(as.list(x), as.list(y))))
+  mbioUtils::VariableSpecList(S4Vectors::SimpleList(c(as.list(x), as.list(y))))
 })
 
 #' R object as JSON string
 #' 
 #' This function converts an R object to a JSON string.
-#' see `methods(veupathUtils::toJSON)` for a list of support classes.
+#' see `methods(mbioUtils::toJSON)` for a list of support classes.
 #' @param object object of a supported S4 class to convert to a JSON string representation
 #' @param named logical indicating whether the result should be a complete named JSON object or just the value of the object
 #' @return character vector of length 1 containing JSON string
@@ -40,7 +40,7 @@ setGeneric("toJSON",
 
 #' @export 
 setMethod("toJSON", signature("Bin"), function(object, named = c(TRUE, FALSE)) {
-    named <- veupathUtils::matchArg(named) 
+    named <- mbioUtils::matchArg(named) 
     
     # possible well want to make these optional, rather than null
     start_json <- jsonlite::toJSON(jsonlite::unbox(object@binStart), na = 'null')
@@ -69,7 +69,7 @@ setMethod("toJSON", signature("Bin"), function(object, named = c(TRUE, FALSE)) {
 
 #' @export
 setMethod("toJSON", signature("BinList"), function(object, named = c(TRUE, FALSE)) {
-    named <- veupathUtils::matchArg(named) 
+    named <- mbioUtils::matchArg(named) 
     tmp <- S4SimpleListToJSON(object, named)
 
     if (named) tmp <- paste0('{"bins":', tmp, "}")
@@ -77,70 +77,13 @@ setMethod("toJSON", signature("BinList"), function(object, named = c(TRUE, FALSE
     return(tmp)
 })
 
-#' @export
-setMethod("toJSON", signature("Range"), function(object, named = c(TRUE, FALSE)) {
-    named <- veupathUtils::matchArg(named) 
-
-    if (all(is.na(c(object@minimum, object@maximum)))) {
-      range_string <- NA
-    } else {
-      range_string <- paste0('(', object@minimum, ' - ', object@maximum, ')')
-    }
-    
-    range_json <- jsonlite::toJSON(jsonlite::unbox(range_string))
-
-    if (named) {
-      range_json <- paste0('{"range":', range_json, "}")  
-    }
-    
-    return(range_json)
-})
-
-#' @export
-setMethod("toJSON", signature("Statistic"), function(object, named = c(TRUE, FALSE)) {
-    named <- veupathUtils::matchArg(named) 
-    
-    value_json <- jsonlite::toJSON(jsonlite::unbox(object@value), na = 'null')
-    tmp <- paste0('"value":', value_json)
-
-    ci_json <- veupathUtils::toJSON(object@confidenceInterval, FALSE)
-    tmp <- paste0(tmp, ',"confidenceInterval":', ci_json)
-
-    conf_level_json <- jsonlite::toJSON(jsonlite::unbox(object@confidenceLevel), na = 'null')
-    tmp <- paste0(tmp, ',"confidenceLevel":', conf_level_json)
-
-    pvalue_json <- jsonlite::toJSON(jsonlite::unbox(object@pvalue))
-    tmp <- paste0(tmp, ',"pvalue":', pvalue_json)
-
-    tmp <- paste0("{", tmp, "}")
-    if (named) {
-      tmp <- paste0('{"', object@name, '":', tmp, "}")  
-    }
-    
-    return(tmp)
-})
-
-#' @export
-setMethod("toJSON", signature("StatisticList"), function(object, named = c(TRUE, FALSE)) {
-    named <- veupathUtils::matchArg(named) 
-    tmp <- S4SimpleListToJSON(object, TRUE)
-
-    if (named) tmp <- paste0('{"statistics":', tmp, "}")
-
-    return(tmp)
-})
-
-# these let jsonlite::toJSON work by using the veupathUtils::toJSON methods for our custom S4 classes
-asJSONGeneric <- getGeneric("asJSON", package = "jsonlite")
-setMethod(asJSONGeneric, "Statistic", function(x, ...) veupathUtils::toJSON(x, FALSE))
-setMethod(asJSONGeneric, "StatisticList", function(x, ...) veupathUtils::toJSON(x, FALSE))
 
 
 ##############################################################################################
 
 #' @export
 setMethod("toJSON", signature("VariableClass"), function(object, named = c(TRUE, FALSE)) {
-    named <- veupathUtils::matchArg(named) 
+    named <- mbioUtils::matchArg(named) 
     tmp <- jsonlite::toJSON(jsonlite::unbox(object@value))
 
     if (named) tmp <- paste0('{"variableClass":', tmp, '}')
@@ -150,7 +93,7 @@ setMethod("toJSON", signature("VariableClass"), function(object, named = c(TRUE,
 
 #' @export
 setMethod("toJSON", signature("VariableSpec"), function(object, named = c(TRUE, FALSE)) {
-    named <- veupathUtils::matchArg(named) 
+    named <- mbioUtils::matchArg(named) 
     tmp <- list("variableId" = jsonlite::unbox(object@variableId),
                 "entityId" = jsonlite::unbox(object@entityId))
 
@@ -161,7 +104,7 @@ setMethod("toJSON", signature("VariableSpec"), function(object, named = c(TRUE,
 
 #' @export
 setMethod("toJSON", signature("PlotReference"), function(object, named = c(TRUE, FALSE)) {
-    named <- veupathUtils::matchArg(named) 
+    named <- mbioUtils::matchArg(named) 
     tmp <- jsonlite::toJSON(jsonlite::unbox(object@value))
 
     if (named) tmp <- paste0('{"plotReference":', tmp, '}')
@@ -171,7 +114,7 @@ setMethod("toJSON", signature("PlotReference"), function(object, named = c(TRUE,
 
 #' @export
 setMethod("toJSON", signature("VariableSpecList"), function(object, named = c(TRUE, FALSE)) {
-    named <- veupathUtils::matchArg(named) 
+    named <- mbioUtils::matchArg(named) 
     tmp <- S4SimpleListToJSON(object, named)
 
     if (named) tmp <- paste0('{"variableSpecs":', tmp, "}")
@@ -181,7 +124,7 @@ setMethod("toJSON", signature("VariableSpecList"), function(object, named = c(TR
 
 #' @export
 setMethod("toJSON", signature("DataType"), function(object, named = c(TRUE, FALSE)) {
-    named <- veupathUtils::matchArg(named) 
+    named <- mbioUtils::matchArg(named) 
     tmp <- jsonlite::unbox(jsonlite::unbox(tolower(object@value)))
 
     if (named) tmp <- list("dataType" = tmp)
@@ -191,7 +134,7 @@ setMethod("toJSON", signature("DataType"), function(object, named = c(TRUE, FALS
 
 #' @export
 setMethod("toJSON", signature("DataShape"), function(object, named = c(TRUE, FALSE)) {
-    named <- veupathUtils::matchArg(named) 
+    named <- mbioUtils::matchArg(named) 
     tmp <- jsonlite::unbox(jsonlite::unbox(tolower(object@value)))
 
     if (named) tmp <- list("dataShape" = tmp)
@@ -201,17 +144,17 @@ setMethod("toJSON", signature("DataShape"), function(object, named = c(TRUE, FAL
 
 #' @export
 setMethod("toJSON", signature("VariableMetadata"), function(object, named = c(TRUE, FALSE)) {
-    named <- veupathUtils::matchArg(named)    
+    named <- mbioUtils::matchArg(named)    
     tmp <- character()
 
-    variable_class_json <- veupathUtils::toJSON(object@variableClass, named = FALSE)
-    variable_spec_json <- veupathUtils::toJSON(object@variableSpec, named = FALSE)
+    variable_class_json <- mbioUtils::toJSON(object@variableClass, named = FALSE)
+    variable_spec_json <- mbioUtils::toJSON(object@variableSpec, named = FALSE)
     
 
     tmp <- paste0('"variableClass":', variable_class_json, ',"variableSpec":', variable_spec_json)
 
     if (!is.na(object@plotReference@value)) {
-      plot_reference_json <- veupathUtils::toJSON(object@plotReference, named = FALSE)
+      plot_reference_json <- mbioUtils::toJSON(object@plotReference, named = FALSE)
       tmp <- paste0(tmp, ',"plotReference":', plot_reference_json)
     }
 
@@ -231,12 +174,12 @@ setMethod("toJSON", signature("VariableMetadata"), function(object, named = c(TR
     }
 
     if (!is.na(object@dataType@value)) {
-      data_type_json <- veupathUtils::toJSON(object@dataType, named = FALSE)
+      data_type_json <- mbioUtils::toJSON(object@dataType, named = FALSE)
       tmp <- paste0(tmp, ',"dataType":', data_type_json)
     }
     
     if (!is.na(object@dataType@value)) {
-      data_shape_json <- veupathUtils::toJSON(object@dataShape, named = FALSE)
+      data_shape_json <- mbioUtils::toJSON(object@dataShape, named = FALSE)
       tmp <- paste0(tmp, ',"dataShape":', data_shape_json)
     }
 
@@ -249,14 +192,14 @@ setMethod("toJSON", signature("VariableMetadata"), function(object, named = c(TR
     tmp <- paste0(tmp, ',"imputeZero":', jsonlite::toJSON(jsonlite::unbox(object@imputeZero)))
     
     if (!is.na(object@weightingVariableSpec@variableId)) {
-      weighting_variable_spec_json <- veupathUtils::toJSON(object@weightingVariableSpec, named = FALSE)
+      weighting_variable_spec_json <- mbioUtils::toJSON(object@weightingVariableSpec, named = FALSE)
       tmp <- paste0(tmp, ',"weightingVariableSpec":', weighting_variable_spec_json)
     }
 
     tmp <- paste0(tmp, ',"hasStudyDependentVocabulary":', jsonlite::toJSON(jsonlite::unbox(object@hasStudyDependentVocabulary)))
 
     if (!!length(object@members)) {
-      members_json <- veupathUtils::toJSON(object@members, named = FALSE)
+      members_json <- mbioUtils::toJSON(object@members, named = FALSE)
       tmp <- paste0(tmp, ',"members":', members_json)
     }
     
@@ -268,7 +211,7 @@ setMethod("toJSON", signature("VariableMetadata"), function(object, named = c(TR
 
 #' @export
 setMethod("toJSON", signature("VariableMetadataList"), function(object, named = c(TRUE, FALSE)) {
-    named <- veupathUtils::matchArg(named) 
+    named <- mbioUtils::matchArg(named) 
     tmp <- S4SimpleListToJSON(object, FALSE)
 
     if (named) tmp <- paste0('{"variables":', tmp, "}")
@@ -316,10 +259,10 @@ setGeneric("findWeightingVariablesMetadata",
 
 #' @export
 setMethod("findWeightingVariablesMetadata", signature("VariableMetadataList"), function(variables) {
-  weightingVarSpecs <- veupathUtils::findWeightingVariableSpecs(variables)
-  weightingVarSpecsColumnNames <- unlist(lapply(weightingVarSpecs, veupathUtils::getColName))
+  weightingVarSpecs <- mbioUtils::findWeightingVariableSpecs(variables)
+  weightingVarSpecsColumnNames <- unlist(lapply(weightingVarSpecs, mbioUtils::getColName))
 
-  weightingVarIndex <- which(purrr::map(as.list(variables), function(x) {veupathUtils::getColName(x@variableSpec)}) %in% weightingVarSpecsColumnNames)
+  weightingVarIndex <- which(purrr::map(as.list(variables), function(x) {mbioUtils::getColName(x@variableSpec)}) %in% weightingVarSpecsColumnNames)
   if (!length(weightingVarIndex)) return(NULL)
 
   return(variables[weightingVarIndex])
@@ -339,7 +282,7 @@ setMethod("findWeightingVariableSpecs", signature("VariableMetadata"), function(
 #TODO should this return a VariableSpecList?
 #' @export 
 setMethod("findWeightingVariableSpecs", signature("VariableMetadataList"), function(object) {
-  return(lapply(as.list(object), veupathUtils::findWeightingVariableSpecs))
+  return(lapply(as.list(object), mbioUtils::findWeightingVariableSpecs))
 })
 
 #' EDA Variable Metadata with a Study-dependent Vocabulary
@@ -378,7 +321,7 @@ setMethod("getHasStudyDependentVocabulary", signature("VariableMetadata"), funct
 
 #' @export
 setMethod("getHasStudyDependentVocabulary", signature("VariableMetadataList"), function(object) {
-  return(lapply(as.list(object), veupathUtils::getHasStudyDependentVocabulary))
+  return(lapply(as.list(object), mbioUtils::getHasStudyDependentVocabulary))
 })
 
 #' EDA Variable Metadata which needs weighting
@@ -419,7 +362,7 @@ setGeneric("findVariableMetadataFromPlotRef",
 
 #' @export
 setMethod("findVariableMetadataFromPlotRef", signature("VariableMetadataList"), function(variables, plotRef) {
-  index <- veupathUtils::findIndexFromPlotRef(variables, plotRef)
+  index <- mbioUtils::findIndexFromPlotRef(variables, plotRef)
   if (!length(index)) return(NULL)
 
   return(variables[[index]])
@@ -454,7 +397,7 @@ setGeneric("findVariableSpecFromPlotRef",
 
 #' @export
 setMethod("findVariableSpecFromPlotRef", signature("VariableMetadataList"), function(variables, plotRef) {
-  index <- veupathUtils::findIndexFromPlotRef(variables, plotRef)
+  index <- mbioUtils::findIndexFromPlotRef(variables, plotRef)
   if (!length(index)) return(NULL)
 
   return(variables[[index]]@variableSpec)
@@ -496,11 +439,11 @@ setGeneric("findColNamesFromPlotRef",
 
 #' @export
 setMethod("findColNamesFromPlotRef", signature("VariableMetadataList"), function(variables, plotRef) {
-  colNames <- veupathUtils::findColNamesByPredicate(variables, function(x) {if (!is.na(x@plotReference@value) && x@plotReference@value == plotRef && !x@isCollection) TRUE})
+  colNames <- mbioUtils::findColNamesByPredicate(variables, function(x) {if (!is.na(x@plotReference@value) && x@plotReference@value == plotRef && !x@isCollection) TRUE})
   if (!length(colNames)) {
-    collectionVM <- veupathUtils::findCollectionVariableMetadata(variables)
+    collectionVM <- mbioUtils::findCollectionVariableMetadata(variables)
     if (!length(collectionVM)) return(NULL)
-    if (collectionVM@plotReference@value == plotRef) colNames <- unlist(lapply(as.list(collectionVM@members), veupathUtils::getColName))
+    if (collectionVM@plotReference@value == plotRef) colNames <- unlist(lapply(as.list(collectionVM@members), mbioUtils::getColName))
   }
 
   return(colNames)
@@ -520,11 +463,11 @@ setGeneric("findAllColNames",
 
 #' @export
 setMethod("findAllColNames", signature("VariableMetadataList"), function(variables) {
-  colNames <- veupathUtils::findColNamesByPredicate(variables, function(x) {if (!x@isCollection) TRUE})
+  colNames <- mbioUtils::findColNamesByPredicate(variables, function(x) {if (!x@isCollection) TRUE})
   if (!length(colNames)) {
-    collectionVM <- veupathUtils::findCollectionVariableMetadata(variables)
+    collectionVM <- mbioUtils::findCollectionVariableMetadata(variables)
     if (!length(collectionVM)) return(NULL)
-    colNames <- unlist(lapply(as.list(collectionVM@members), veupathUtils::getColName))
+    colNames <- unlist(lapply(as.list(collectionVM@members), mbioUtils::getColName))
   }
 
   return(colNames)
@@ -549,7 +492,7 @@ setMethod("findDataTypesFromPlotRef", signature("VariableMetadataList"), functio
 
   dataTypes <- purrr::map(as.list(variables), function(x) { if(!is.na(x@plotReference@value) && x@plotReference@value == plotRef) { return(x@dataType@value) } })
 
-  return(veupathUtils::toStringOrNull(unlist(dataTypes)))
+  return(mbioUtils::toStringOrNull(unlist(dataTypes)))
 })
 
 #' EDA Variable Data Shapes matching a PlotReference
@@ -571,7 +514,7 @@ setMethod("findDataShapesFromPlotRef", signature("VariableMetadataList"), functi
 
   dataShapes <- purrr::map(as.list(variables), function(x) { if(!is.na(x@plotReference@value) && x@plotReference@value == plotRef) { return(x@dataShape@value) } })
 
-  return(veupathUtils::toStringOrNull(unlist(dataShapes)))
+  return(mbioUtils::toStringOrNull(unlist(dataShapes)))
 })
 
 #' EDA Variable Column Name of a VariableSpec
@@ -600,12 +543,12 @@ setMethod("getColName", signature("VariableSpec"), function(varSpec) {
   } 
   if (entityId == '' || is.na(entityId)) return(varSpec@variableId)
 
-  return(veupathUtils::toStringOrNull(paste0(entityId, ".", varId)))
+  return(mbioUtils::toStringOrNull(paste0(entityId, ".", varId)))
 })
 
 #' @export 
 setMethod("getColName", signature("VariableSpecList"), function(varSpec) {
-  lapply(as.list(varSpec), veupathUtils::getColName)
+  lapply(as.list(varSpec), mbioUtils::getColName)
 })
 
 #' @export
@@ -628,7 +571,7 @@ setGeneric("findColNamesByPredicate",
 #' @export
 setMethod("findColNamesByPredicate", signature("VariableMetadataList"), function(variables, predicateFunction) {
   # For each variable in the variable list, return the column name if the predicate is true for that variable
-  colNames <- purrr::map(as.list(variables), function(x) {if (identical(predicateFunction(x), TRUE)) {return(veupathUtils::getColName(x@variableSpec))}})
+  colNames <- purrr::map(as.list(variables), function(x) {if (identical(predicateFunction(x), TRUE)) {return(mbioUtils::getColName(x@variableSpec))}})
   colNames <- unlist(colNames)
 
   return (colNames)
@@ -642,10 +585,10 @@ setGeneric("findVariableMetadataFromVariableSpec",
 
 #' @export
 setMethod("findVariableMetadataFromVariableSpec", signature("VariableMetadataList", "VariableSpecList"), function(variables, object) {
-  variableSpecs <- unlist(lapply(as.list(variables), veupathUtils::getVariableSpec, "Never"))
-  colNamesToMatch <- unlist(lapply(as.list(object), veupathUtils::getColName))
+  variableSpecs <- unlist(lapply(as.list(variables), mbioUtils::getVariableSpec, "Never"))
+  colNamesToMatch <- unlist(lapply(as.list(object), mbioUtils::getColName))
  
-  index <- which(purrr::map(variableSpecs, function(x) {veupathUtils::getColName(x)}) %in% colNamesToMatch)
+  index <- which(purrr::map(variableSpecs, function(x) {mbioUtils::getColName(x)}) %in% colNamesToMatch)
   
   if (!length(index)) return(NULL)
   
@@ -654,9 +597,9 @@ setMethod("findVariableMetadataFromVariableSpec", signature("VariableMetadataLis
 
 #' @export
 setMethod("findVariableMetadataFromVariableSpec", signature("VariableMetadataList", "VariableSpec"), function(variables, object) {
-  variableSpecs <- unlist(lapply(as.list(variables), veupathUtils::getVariableSpec, "Never"))
+  variableSpecs <- unlist(lapply(as.list(variables), mbioUtils::getVariableSpec, "Never"))
  
-  index <- which(purrr::map(variableSpecs, function(x) {veupathUtils::getColName(x)}) == veupathUtils::getColName(object))
+  index <- which(purrr::map(variableSpecs, function(x) {mbioUtils::getColName(x)}) == mbioUtils::getColName(object))
   if (!length(index)) return(NULL)
 
   return(variables[index])
diff --git a/R/utils-classes.R b/R/utils-classes.R
index 24cc3ac..045cd9f 100644
--- a/R/utils-classes.R
+++ b/R/utils-classes.R
@@ -5,7 +5,7 @@ S4SimpleListToJSON <- function(S4SimpleList, named = c(TRUE, FALSE)) {
     if (!inherits(S4SimpleList, 'SimpleList')) stop("S4SimpleListToJSON only accepts an S4Vectors::SimpleList as input.", class(S4SimpleList), "was provided.")
 
     tmp <- as.list(S4SimpleList)
-    tmp <- lapply(tmp, veupathUtils::toJSON, named)
+    tmp <- lapply(tmp, mbioUtils::toJSON, named)
     tmp <- paste(tmp, collapse = ",")
     if (tmp != "") tmp <- paste0("[", tmp, "]")
 
diff --git a/R/utils-cut.R b/R/utils-cut.R
index 4a3bb4f..2573525 100644
--- a/R/utils-cut.R
+++ b/R/utils-cut.R
@@ -52,7 +52,7 @@ cut_width <- function(x, width, center = NULL, boundary = NULL, closed = c("righ
   x <- as.numeric(x)
   width <- as.numeric(width)
 
-  closed <- veupathUtils::matchArg(closed)
+  closed <- mbioUtils::matchArg(closed)
 
   x_range <- range(x, na.rm = TRUE, finite = TRUE)
   if (length(x_range) == 0) {
@@ -196,7 +196,7 @@ find_origin <- function(x_range, width, boundary) {
 }
 
 breaks <- function(x, method = c('equalInterval', 'quantile', 'sd'), nbins = NULL, binwidth = NULL) {
-  method <- veupathUtils::matchArg(method)
+  method <- mbioUtils::matchArg(method)
   if (method != 'sd') {
     if ((!is.null(nbins) && !is.null(binwidth)) || (is.null(nbins) && is.null(binwidth))) {
       stop("Specify exactly one of n and width for methods `equalInterval` and `quantile`.")
diff --git a/R/utils-numeric.R b/R/utils-numeric.R
index 9f798e0..c1887e5 100644
--- a/R/utils-numeric.R
+++ b/R/utils-numeric.R
@@ -1,305 +1,14 @@
-#' Find Bin Ranges for a Continuous Variable
-#' 
-#' This function will find bin start, end and labels for a 
-#' continuous variable. Optionally, it can return a value/ count
-#' per bin. By default returns 10 bins for equalRanges and quantile
-#' methods and 6 for sd (standard deviations).
-#' @param x Numeric (or Date) vector to find bins for
-#' @param method A string indicating which method to use to find bins ('equalRanges', 'quantile', 'sd')
-#' @param numBins A number indicating how many bins are desired
-#' @param getValue A boolean indicating whether to return the counts per bin
-#' @export 
-getDiscretizedBins <- function(x, method = c('equalInterval', 'quantile', 'sd'), numBins = NULL, getValue = c(TRUE, FALSE)) {
-  method <- veupathUtils::matchArg(method)
-  getValue <- veupathUtils::matchArg(getValue)
-  if (is.null(numBins)) numBins <- 10
-
-  isDate <- FALSE
-  if (class(x) == 'Date') {
-    x <- as.numeric(x)
-    isDate <- TRUE
-  }
-
-  binEdges <- unname(breaks(x, method, numBins))
-  if (anyDuplicated(binEdges)) {
-    warning("There is insufficient data to produce the requested number of bins. Returning as many bins as possible.")
-    binEdges <- unique(binEdges)
-  }
-
-  if (isDate) {
-    binEdges <- as.Date(binEdges, origin = "1970-01-01")  # Origin matches as.numeric default origin
-  }
-
-  binStarts <- binEdges[1:(length(binEdges)-1)]
-  binEnds <- binEdges[2:length(binEdges)]
-  if (length(binEdges) == 1) binEnds <- binEnds[[2]]
-
-  # only format human-friendly labels. binStarts and binEnds should provide exact values
-  # must also guarantee that the first binStart and last binEnd encompass the full data range even after formatting
-  if (isDate) {
-    # Dates are already human-friendly.
-    formattedBinStarts <- binStarts
-    formattedBinEnds <- binEnds
-  } else {
-    formattedBinStarts <- formatC(binStarts)
-    formattedBinEnds <- formatC(binEnds)
-  }
-  # think the alternative is to write a recursive fxn to call formatC w more digits until we get a result we like. 
-  # that seems costly, so ill wait to do that until we see how much an issue this really is
-  if (as.numeric(formattedBinStarts[[1]]) > as.numeric(binStarts[[1]])) formattedBinStarts[[1]] <- as.character(binStarts[[1]])
-  if (as.numeric(formattedBinEnds[[length(binEnds)]]) < as.numeric(binEnds[[length(binEnds)]])) formattedBinEnds[[length(binEnds)]] <- as.character(binEnds[[length(binEnds)]])
-
-  binLabels <- paste0("(",formattedBinStarts,", ", formattedBinEnds, "]")
-  binLabels[[1]] <- gsub("(","[",binLabels[[1]], fixed=T)
-
-  if (getValue) {
-    if (length(binEdges) == 1) {
-      values <- 1
-    } else {
-      values <- c(table(cut(x, binEdges, include.lowest=TRUE)))
-    }
-  } else {
-    values <- rep(NA_real_, length(binStarts))
-  }
-  
-  # For numeric vars, coerce bin starts and ends to character so as to not lose any precision.
-  # It's possible we wouldnt lose precision regardless, but that's something we can look into in the future.
-  # We don't want to do this for dates, because then we loose the date being a date
-  if (isDate) {
-    bins <- lapply(1:length(binStarts), FUN = function(x) { Bin(binStart = binStarts[[x]],
-                                                              binEnd = binEnds[[x]],
-                                                              binLabel = binLabels[[x]],
-                                                              value = values[[x]])})
-
-  } else {
-    bins <- lapply(1:length(binStarts), FUN = function(x) { Bin(binStart = as.character(binStarts[[x]]),
-                                                              binEnd = as.character(binEnds[[x]]),
-                                                              binLabel = binLabels[[x]],
-                                                              value = values[[x]])})
-  }
-
-  return(BinList(S4Vectors::SimpleList(bins)))
-}
-
-#' Non-Zero Rounding
-#' 
-#' This function will recursively attempt to round a value to
-#' greater and greater precision until it results in a non-zero
-#' value. One consequence of this is that the precision of the
-#' output value may not be exactly what was requested.
-#' @param x Numeric value to round
-#' @param digits Number indicating the desired precision
-#' @return number rounded as nearly to the requested precision as 
-#' possible without returning zero.
-#' @export
-nonZeroRound <- function(x, digits) {
-  if (x == 0) {
-    warning("Input is already zero and cannot be rounded to a non-zero number.")
-    return(x)
-  }
-  if (round(x,digits) == 0) { 
-    Recall(x,digits+1) 
-  } else { 
-    round(x,digits) 
-  } 
-}
-
-#' Replace numeric NAs with 0 - update by reference
-#'
-#' This function replaces NAs in numeric columns with 0.
-#' @param x data.table, data.frame, or list
-#' @param cols vector of column names for which the NA replacement should occur.
-#' Default is all numeric columns.
-#' @return x with desired NAs replaced with 0.
-#' @export
-setNaToZero <- function(df, cols = NULL) {
-
-  # if cols not set, use all numeric cols
-  if (is.null(cols)) cols <- findNumericCols(df)
-  cols <- validateNumericCols(df, cols=cols)
-
-  data.table::setnafill(df, fill = 0, cols = cols)
-}
-
-
-#' Replace numeric NAs with 0
-#'
-#' This function replaces numeric NAs with 0.
-#' @param x list, data.frame, array, or vector
-#' @param cols Optional. When appropriate, vector of column names 
-#' for which the NA replacement should occur.
-#' Default is all numeric columns.
-#' @return object the same type as x, where desired NAs are replaced with 0.
-#' @export
-naToZero <- function(x, ...) {
-  UseMethod("naToZero")
-}
-
-#' @export
-naToZero.data.table <- function(x, cols = NULL) {
-
-  # if cols not set, use all numeric cols
-  if (is.null(cols)) cols <- findNumericCols(x)
-  cols <- validateNumericCols(x, cols)
-
-  x[, cols] <- x[, ..cols][, lapply(.SD, function(y){y[is.na(y)] <- 0; y})]
-  return(x)
-}
-
-#' @export
-naToZero.data.frame <- function(x, cols = NULL) {
-
-  # if cols not set, use all numeric cols
-  if (is.null(cols)) cols <- findNumericCols(x)
-  cols <- validateNumericCols(x, cols)
-
-  x[cols][is.na(x[cols])] <- 0
-  return(x)
-}
-
-#' @export
-naToZero.list <- function(x, cols = NULL) {
-
-  # if cols not set, use all numeric cols
-  if (is.null(cols)) cols <- findNumericCols(x)
-  cols <- validateNumericCols(x, cols)
-
-  x[cols] <- lapply(x[cols], function(y) {y[is.na(y)] <- 0; return(y)})
-  return(x)
-}
-
-#' @export
-naToZero.default <- function(x) {
-  numericEntries <- purrr::map_lgl(x, is.numeric)
-  x[numericEntries][is.na(x[numericEntries])] <- 0
-  return(x)
-}
-
-
 #' Find numeric columns
 #'
-#' This function finds all numeric columns in a list
-#' @param x list, data.frame, array, or vector
+#' This function finds all numeric columns in a data structure
+#' @param x list, data.frame, data.table, or vector
 #' @return vector of numeric column names in x. If no numeric columns found, returns NULL
-#' @importFrom purrr map_lgl
 #' @export
 findNumericCols <- function(x) {
-  numericCols <- names(x)[purrr::map_lgl(x, is.numeric)]
+  numericCols <- names(x)[sapply(x, is.numeric)]
   
   # If no numeric cols, return NULL
   if (!length(numericCols)) return(NULL)
-
+  
   return(numericCols)
 }
-
-
-#' Validate numeric columns
-#' 
-#' Given a vector of column names or indices, this function ensures all
-#' referenced columns are numeric
-#' @param x list, data.frame, array, data.table
-#' @param cols vector of column names or column indices. NAs will be removed.
-#' @return given column names
-#' @importFrom purrr map_lgl
-#' @import data.table
-#' @export
-validateNumericCols <- function(x, cols, ...) {
-  UseMethod("validateNumericCols")
-}
-
-#' @importFrom purrr map_lgl
-#' @export
-validateNumericCols.data.table <- function(x, cols) {
-  if (any(is.na(cols))) {cols <- cols[!is.na(cols)]; warning("validateNumericCols warning: NAs in cols removed")}
-  if (!length(cols)) {warning("validateNumericCols warning: no numeric columns given"); return(cols)}
-  if (is.character(cols)) {
-    if (!all(cols %in% names(x))) stop('validateNumericCols failed: Column name not found in the input')
-  } else {
-    if (max(cols) > ncol(x) | min(cols) < 1) stop('validateNumericCols failed: column index does not represent a valid column')
-  }
-  if (!all(purrr::map_lgl(x[, ..cols], is.numeric))) stop('validateNumericCols failed: All columns must be numeric')
-  return(cols)
-}
-
-#' @importFrom purrr map_lgl
-#' @export
-validateNumericCols.list <- function(x, cols) {
-  if (any(is.na(cols))) {cols <- cols[!is.na(cols)]; warning("validateNumericCols warning: NAs in cols removed")}
-  if (!length(cols)) {warning("validateNumericCols warning: no numeric columns given"); return(cols)}
-  if (is.character(cols)) {
-    if (!all(cols %in% names(x))) stop('validateNumericCols failed: Column name not found in the input')
-  } else {
-    if (max(cols) > length(names(x)) | min(cols) < 1) stop('validateNumericCols failed: column index does not represent a valid column')
-  }
-  if (!all(purrr::map_lgl(x[cols], is.numeric))) stop('validateNumericCols failed: All columns must be numeric')
-  return(cols)
-}
-
-
-#' @importFrom purrr map_lgl
-#' @export
-validateNumericCols.default <- function(x, cols) {
-  if (any(is.na(cols))) {cols <- cols[!is.na(cols)]; warning("validateNumericCols warning: NAs in cols removed")}
-  if (!length(cols)) {warning("validateNumericCols warning: no numeric columns given"); return(cols)}
-  if (is.character(cols)) {
-    if (!all(cols %in% names(x))) stop('validateNumericCols failed: Column name not found in the input')
-  } else {
-    if (max(cols) > ncol(x) | min(cols) < 1) stop('validateNumericCols failed: column index does not represent a valid column')
-  }
-  if (!all(purrr::map_lgl(x[cols], is.numeric))) stop('validateNumericCols failed: All columns must be numeric')
-  return(cols)
-}
-
-#
-# For any number, return an absolute delta (numeric) at the last
-# significant digit in the number, using the number of digits specified
-#
-# e.g. assuming 3 significant digits
-# 
-# 1.23 -> 0.01
-# 11.0 -> 0.1
-# 12.3 -> 0.1
-# 101000 -> 1000
-# 1.20e-05 -> 0.01e-05 == 1.0e-07
-# 0.0123e-05 -> 0.0001e-05 == 1.0e-09
-# -2.34e-02 -> 0.01e-02 == 1.0e-04
-# 
-signifDigitEpsilon <- function(x, digits) {
-
-  # '#' flag ensures trailing zeroes
-  # take abs() here because we don't care about sign
-  rounded <- formatC(abs(x), digits = digits, width = 1L, flag = '#')
-
-  # split into vector of single characters
-  characters <- strsplit(rounded, '')
-
-  result <- c()
-  seenSignificant <- FALSE
-  significantCount <- 0
-  # walk through string, looking for first non-zero, non decimal point character
-  for (c in unlist(characters)) {
-    if (!(c %in% c('0', '.'))) {
-      seenSignificant <- TRUE
-    }
-    if (c == '.') {
-      result <- c(result, c)
-    } else if (seenSignificant) {
-      significantCount <- significantCount + 1
-      if (significantCount < digits) {
-        result <- c(result, '0')
-      } else if (significantCount == digits) {
-        result <- c(result, '1')
-      } else {
-        # we're out of the significant digits
-        # we must be in the exponent part (if present) or in trailing zeroes (e.g. in 101000 example)
-        # so just copy it over
-        result <- c(result, c)
-      }
-    } else {
-      result <- c(result, '0')
-    }
-  }
-
-  # return joined result as a number
-  as.numeric(paste(result, collapse=""))
-}
diff --git a/R/utils.R b/R/utils.R
index c241271..3ee60b1 100644
--- a/R/utils.R
+++ b/R/utils.R
@@ -115,7 +115,7 @@ stripEntityIdFromColumnHeader <- function(columnNames) {
   columnsToFix <- grepl(".", columnNames, fixed=T)
 
   if (sum(columnsToFix) > 0) {
-    columnNames[columnsToFix] <- veupathUtils::strSplit(columnNames[columnsToFix], ".", index=2)
+    columnNames[columnsToFix] <- mbioUtils::strSplit(columnNames[columnsToFix], ".", index=2)
   }
 
   return(columnNames)
diff --git a/README.md b/README.md
index 033c1ae..f37f900 100644
--- a/README.md
+++ b/README.md
@@ -1,30 +1,30 @@
 <!-- badges: start -->
-  [![R-CMD-check](https://github.com/microbiomeDB/veupathUtils/workflows/R-CMD-check/badge.svg)](https://github.com/microbiomeDB/veupathUtils/actions)
+  [![R-CMD-check](https://github.com/microbiomeDB/mbioUtils/workflows/R-CMD-check/badge.svg)](https://github.com/microbiomeDB/mbioUtils/actions)
   <!-- badges: end -->
 
-# veupathUtils
+# mbioUtils
 
-veupathUtils is an R package which provides helper functions for solving common problems in the MicrobiomeDB project.
+mbioUtils is an R package which provides utility functions and data structures for microbiomeDB R packages. It was forked from veupathUtils 2.6.7 and stripped of VEuPathDB-specific functionality.
 
 ## Installation
 
-Use the R package [remotes](https://cran.r-project.org/web/packages/remotes/index.html) to install veupathUtils. From the R command prompt:
+Use the R package [remotes](https://cran.r-project.org/web/packages/remotes/index.html) to install mbioUtils. From the R command prompt:
 
 ```R
-remotes::install_github('microbiomeDB/veupathUtils')
+remotes::install_github('microbiomeDB/mbioUtils')
 ```
 
 ## Usage
 This package is primarily intended for use as a dependency in other R packages. In order to establish that depedency the developer of the 
 dependent package must follow these steps:
-1. add ```veupathUtils``` to the ```Imports``` section of the dependent package's ```DESCRIPTION``` file.
+1. add ```mbioUtils``` to the ```Imports``` section of the dependent package's ```DESCRIPTION``` file.
 2. add a ```Remotes``` section to the dependent package's ```DESCRIPTION``` file.
-3. add ```microbiomeDB/veupathUtils``` to the ```Remotes``` section of the dependent package's ```DESCRIPTION``` file.
-4. add ```#' @import veupathUtils``` to the dependent package's package-level documentation file (usually called ```{mypackage}-package.R```).
+3. add ```microbiomeDB/mbioUtils``` to the ```Remotes``` section of the dependent package's ```DESCRIPTION``` file.
+4. add ```#' @import mbioUtils``` to the dependent package's package-level documentation file (usually called ```{mypackage}-package.R```).
 5. run ```devtools::document()```.
 
 The developer of the dependent package can either install this package using ```remotes``` as descripted in the "Installation" section above,
-or if they mean to also develop veupathUtils simultaneously, can use ```devtools::load_all("{path-to-veupathUtils}")``` to load this package in 
+or if they mean to also develop mbioUtils simultaneously, can use ```devtools::load_all("{path-to-mbioUtils}")``` to load this package in 
 their R session.
 
 ## Contributing
diff --git a/data/megastudyDataReal.rda b/data/megastudyDataReal.rda
deleted file mode 100644
index 1cdde00..0000000
Binary files a/data/megastudyDataReal.rda and /dev/null differ
diff --git a/data/studyVocabsReal.rda b/data/studyVocabsReal.rda
deleted file mode 100644
index 7760af5..0000000
Binary files a/data/studyVocabsReal.rda and /dev/null differ
diff --git a/man/ComputeResult-class.Rd b/man/ComputeResult-class.Rd
index 1930df3..962cad9 100644
--- a/man/ComputeResult-class.Rd
+++ b/man/ComputeResult-class.Rd
@@ -20,7 +20,7 @@ This includes their representation in R, as JSON and how they are written to fil
 
 \item{\code{ancestorIdColumns}}{A character vector of column names representing parent entities of the recordIdColumn.}
 
-\item{\code{computedVariableMetadata}}{veupathUtils::VariableMetadataList detailing the computed variables.}
+\item{\code{computedVariableMetadata}}{mbioUtils::VariableMetadataList detailing the computed variables.}
 
 \item{\code{statistics}}{An optional slot of any values. List or data.frame are recommended. It is not required to have rows or cols map to samples.}
 
diff --git a/man/Megastudy-class.Rd b/man/Megastudy-class.Rd
deleted file mode 100644
index 1527fa1..0000000
--- a/man/Megastudy-class.Rd
+++ /dev/null
@@ -1,26 +0,0 @@
-% Generated by roxygen2: do not edit by hand
-% Please edit documentation in R/class-Megastudy.R
-\docType{class}
-\name{Megastudy-class}
-\alias{Megastudy-class}
-\alias{Megastudy}
-\title{Megastudy}
-\description{
-A class to encapsulate everything we need for our special handling
-of 'megastudies' in EDA. Currently that is imputing zeroes on tall data
-given that each (sub-)study has different expected vocabularies for
-that data.
-}
-\section{Slots}{
-
-\describe{
-\item{\code{data}}{A data.table}
-
-\item{\code{ancestorIdColumns}}{A character vector of column names representing parent entities of the recordIdColumn.}
-
-\item{\code{studySpecificVocabularies}}{veupathUtils::StudySpecificVocabulariesByVariableList}
-
-\item{\code{collectionIds}}{A data.table including collection ids and any variables of interest for the collection entity.
-If none provided, the collection ids will be inferred from those present in \code{data}.}
-}}
-
diff --git a/man/Range-class.Rd b/man/Range-class.Rd
deleted file mode 100644
index c4f55e5..0000000
--- a/man/Range-class.Rd
+++ /dev/null
@@ -1,19 +0,0 @@
-% Generated by roxygen2: do not edit by hand
-% Please edit documentation in R/class-Range.R
-\docType{class}
-\name{Range-class}
-\alias{Range-class}
-\alias{Range}
-\title{Data Range}
-\description{
-A class to a data range, represented as minimum and maximum values.
-The range could be numeric or Date/ Posix
-}
-\section{Slots}{
-
-\describe{
-\item{\code{minimum}}{A number, Date or Posix value}
-
-\item{\code{maximum}}{A number, Date or Posix value}
-}}
-
diff --git a/man/Statistic-class.Rd b/man/Statistic-class.Rd
deleted file mode 100644
index 9328a3b..0000000
--- a/man/Statistic-class.Rd
+++ /dev/null
@@ -1,25 +0,0 @@
-% Generated by roxygen2: do not edit by hand
-% Please edit documentation in R/class-Statistic.R
-\docType{class}
-\name{Statistic-class}
-\alias{Statistic-class}
-\alias{Statistic}
-\title{Statistic}
-\description{
-A class to specify a named statistic, its value, confidence interval and p-value.
-This is primarily to help maintain consistency in meeting the EDA API.
-}
-\section{Slots}{
-
-\describe{
-\item{\code{name}}{A string specifying what statistic was calculated}
-
-\item{\code{value}}{A number}
-
-\item{\code{confidenceInterval}}{An optional Range specifying minimum and maximum values for the confidence interval}
-
-\item{\code{confidenceLevel}}{An optional decimal number indicating the degree of confidence represented by the confidence interval}
-
-\item{\code{pvalue}}{An optional string representing the p-value associated with the statistic. The string should either be coercible to a number or be '<0.0001'.}
-}}
-
diff --git a/man/StudySpecificVocabulariesByVariable-class.Rd b/man/StudySpecificVocabulariesByVariable-class.Rd
deleted file mode 100644
index d66571f..0000000
--- a/man/StudySpecificVocabulariesByVariable-class.Rd
+++ /dev/null
@@ -1,21 +0,0 @@
-% Generated by roxygen2: do not edit by hand
-% Please edit documentation in R/class-Megastudy.R
-\docType{class}
-\name{StudySpecificVocabulariesByVariable-class}
-\alias{StudySpecificVocabulariesByVariable-class}
-\alias{StudySpecificVocabulariesByVariable}
-\title{Study Specific Vocabularies By Variable}
-\description{
-A class to specify expected values per study for some variable
-of interest.
-}
-\section{Slots}{
-
-\describe{
-\item{\code{studyIdColumnName}}{A string specifying the name of the column in the vocab data table that contains the study id}
-
-\item{\code{variableSpecColumnName}}{A string specifying the name of the column in the vocab data table that contains the variable vocabulary values}
-
-\item{\code{studyVocab}}{A data.table with columns studyIdColumnName and variableSpecColumnName that specifies expected vocabularies for each study}
-}}
-
diff --git a/man/findNumericCols.Rd b/man/findNumericCols.Rd
index 42b8e06..ab2b3d2 100644
--- a/man/findNumericCols.Rd
+++ b/man/findNumericCols.Rd
@@ -7,11 +7,11 @@
 findNumericCols(x)
 }
 \arguments{
-\item{x}{list, data.frame, array, or vector}
+\item{x}{list, data.frame, data.table, or vector}
 }
 \value{
 vector of numeric column names in x. If no numeric columns found, returns NULL
 }
 \description{
-This function finds all numeric columns in a list
+This function finds all numeric columns in a data structure
 }
diff --git a/man/getDTWithImputedZeroes.Rd b/man/getDTWithImputedZeroes.Rd
deleted file mode 100644
index 4b2dae5..0000000
--- a/man/getDTWithImputedZeroes.Rd
+++ /dev/null
@@ -1,18 +0,0 @@
-% Generated by roxygen2: do not edit by hand
-% Please edit documentation in R/methods-Megastudy.R
-\name{getDTWithImputedZeroes}
-\alias{getDTWithImputedZeroes}
-\title{Impute Zeroes (on tall data)}
-\usage{
-getDTWithImputedZeroes(object, variables, verbose = c(TRUE, FALSE))
-}
-\arguments{
-\item{object}{veupathUtils::Megastudy}
-}
-\value{
-data.table
-}
-\description{
-This function returns a data.table which has explicit zero values
-for all expected categories of some variable of interest in a megastudy.
-}
diff --git a/man/getDataTable.Rd b/man/getDataTable.Rd
deleted file mode 100644
index 2d127f9..0000000
--- a/man/getDataTable.Rd
+++ /dev/null
@@ -1,18 +0,0 @@
-% Generated by roxygen2: do not edit by hand
-% Please edit documentation in R/methods-Statistic.R
-\name{getDataTable}
-\alias{getDataTable}
-\title{Convert StatisicList to data.table}
-\usage{
-getDataTable(object)
-}
-\arguments{
-\item{object}{A StatisticList object}
-}
-\value{
-data.table
-}
-\description{
-This function returns a data.table representation of a StatisticList.
-The content of the slots are stored as JSON. Each entry in the list becomes a named column.
-}
diff --git a/man/getDiscretizedBins.Rd b/man/getDiscretizedBins.Rd
deleted file mode 100644
index 5eccf14..0000000
--- a/man/getDiscretizedBins.Rd
+++ /dev/null
@@ -1,28 +0,0 @@
-% Generated by roxygen2: do not edit by hand
-% Please edit documentation in R/utils-numeric.R
-\name{getDiscretizedBins}
-\alias{getDiscretizedBins}
-\title{Find Bin Ranges for a Continuous Variable}
-\usage{
-getDiscretizedBins(
-  x,
-  method = c("equalInterval", "quantile", "sd"),
-  numBins = NULL,
-  getValue = c(TRUE, FALSE)
-)
-}
-\arguments{
-\item{x}{Numeric (or Date) vector to find bins for}
-
-\item{method}{A string indicating which method to use to find bins ('equalRanges', 'quantile', 'sd')}
-
-\item{numBins}{A number indicating how many bins are desired}
-
-\item{getValue}{A boolean indicating whether to return the counts per bin}
-}
-\description{
-This function will find bin start, end and labels for a
-continuous variable. Optionally, it can return a value/ count
-per bin. By default returns 10 bins for equalRanges and quantile
-methods and 6 for sd (standard deviations).
-}
diff --git a/man/getStudyIdColumnName.Rd b/man/getStudyIdColumnName.Rd
deleted file mode 100644
index c06d036..0000000
--- a/man/getStudyIdColumnName.Rd
+++ /dev/null
@@ -1,17 +0,0 @@
-% Generated by roxygen2: do not edit by hand
-% Please edit documentation in R/methods-Megastudy.R
-\name{getStudyIdColumnName}
-\alias{getStudyIdColumnName}
-\title{StuydIdColName as String}
-\usage{
-getStudyIdColumnName(object)
-}
-\arguments{
-\item{object}{veupathUtils::StudySpecificVocabulariesByVariable}
-}
-\value{
-character
-}
-\description{
-This function returns the studyIdColName from an StudySpecificVocabulariesByVariable
-}
diff --git a/man/getVariableSpec.Rd b/man/getVariableSpec.Rd
deleted file mode 100644
index a7978f6..0000000
--- a/man/getVariableSpec.Rd
+++ /dev/null
@@ -1,19 +0,0 @@
-% Generated by roxygen2: do not edit by hand
-% Please edit documentation in R/methods-Megastudy.R
-\name{getVariableSpec}
-\alias{getVariableSpec}
-\title{get a VariableSpec}
-\usage{
-getVariableSpec(object, ...)
-}
-\arguments{
-\item{object}{An object containing a veupathUtils::VariableSpec}
-}
-\value{
-character
-}
-\description{
-This function returns a string representation of a VariableSpec. By
-default it assumes the VariableSpec is available in a slot called
-\code{variableSpec}.
-}
diff --git a/man/getVariableSpecColumnName.Rd b/man/getVariableSpecColumnName.Rd
deleted file mode 100644
index 84bf745..0000000
--- a/man/getVariableSpecColumnName.Rd
+++ /dev/null
@@ -1,17 +0,0 @@
-% Generated by roxygen2: do not edit by hand
-% Please edit documentation in R/methods-Megastudy.R
-\name{getVariableSpecColumnName}
-\alias{getVariableSpecColumnName}
-\title{VarSpecColName as String}
-\usage{
-getVariableSpecColumnName(object)
-}
-\arguments{
-\item{object}{veupathUtils::StudySpecificVocabulariesByVariable}
-}
-\value{
-character
-}
-\description{
-This function returns the variableSpec from an StudySpecificVocabulariesByVariable
-}
diff --git a/man/mbioUtils-package.Rd b/man/mbioUtils-package.Rd
new file mode 100644
index 0000000..7467ed4
--- /dev/null
+++ b/man/mbioUtils-package.Rd
@@ -0,0 +1,28 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/mbioUtils-package.R
+\docType{package}
+\name{mbioUtils-package}
+\alias{mbioUtils}
+\alias{mbioUtils-package}
+\title{mbioUtils: Utility Functions for MicrobiomeDB R Packages}
+\description{
+mbioUtils contains utility functions and data structures for microbiomeDB R packages, including microbiomeComputations, MicrobiomeDB, and microbiomeData. Forked from veupathUtils 2.6.7.
+}
+\seealso{
+Useful links:
+\itemize{
+  \item \url{https://github.com/microbiomeDB/mbioUtils}
+  \item Report bugs at \url{https://github.com/microbiomeDB/mbioUtils/issues}
+}
+
+}
+\author{
+\strong{Maintainer}: Danielle Callan \email{dcallan@upenn.edu}
+
+Authors:
+\itemize{
+  \item Ann Blevins \email{annsize@upenn.edu} (Original veupathUtils author)
+}
+
+}
+\keyword{internal}
diff --git a/man/megastudyDataReal.Rd b/man/megastudyDataReal.Rd
deleted file mode 100644
index 3c936d2..0000000
--- a/man/megastudyDataReal.Rd
+++ /dev/null
@@ -1,75 +0,0 @@
-% Generated by roxygen2: do not edit by hand
-% Please edit documentation in R/data.R
-\docType{data}
-\name{megastudyDataReal}
-\alias{megastudyDataReal}
-\title{Some Real Life Popbio Megastudy Test Data}
-\format{
-\subsection{\code{megastudyDataReal}}{
-
-A data frame with 14 rows and 9 columns:
-\describe{
-\item{EUPATH_0000609.Sample_stable_id}{Stable ID for the \code{Sample} entity}
-\item{OBI_0000659.ParentOfSample_stable_id}{Stable ID for the \code{Collections} entity}
-\item{GAZ_00000448.GeographicLocation_stable_id}{Stable ID for the \verb{Collection sites} entity}
-\item{EUPATH_0000605.Study_stable_id}{Stable ID for the \code{Studies} entity}
-\item{EUPATH_0000609.PATO_0000047}{Sample -> Sex}
-\item{EUPATH_0000609.POPBIO_8000017}{Sample -> Unbiased Specimen Count}
-\item{EUPATH_0000609.OBI_0001909}{Sample -> Species}
-\item{EUPATH_0000609.EUPATH_0043227}{Sample -> Female Insect Feeding Status}
-\item{EUPATH_0000609.UBERON_0000105}{Sample -> Life Cycle Stage}
-}
-}
-}
-\source{
-\url{https://vectorbase.org/vectorbase/app/workspace/maps/A4nuqcm/import}
-}
-\usage{
-megastudyDataReal
-}
-\description{
-It is the data used in the subset associated w this saved analysis:
-https://vectorbase.org/vectorbase/app/workspace/maps/A4nuqcm/import
-as of 9 Jan 2024.
-As of the time of writing the filters in this analysis are like this:
-"filters": [
-{
-"entityId": "EUPATH_0000605",
-"variableId": "POPBIO_8000215",
-"stringSet": \link[=
-       "VBP0000844"
-     ]{ "VBP0000844" },
-"type": "stringSet"
-},
-{
-"entityId": "GAZ_00000448",
-"variableId": "EUPATH_0000542",
-"stringSet": \link[=
-       "Canyon Rim Church"
-     ]{ "Canyon Rim Church" },
-"type": "stringSet"
-},
-{
-"variableId": "EUPATH_0043256",
-"entityId": "OBI_0000659",
-"type": "dateRange",
-"min": "2022-05-01T00:00:00Z",
-"max": "2022-11-01T00:00:00Z"
-},
-{
-"type": "numberRange",
-"variableId": "OBI_0001620",
-"entityId": "GAZ_00000448",
-"min": 40.698307645373106,
-"max": 40.71193901146775
-},
-{
-"type": "longitudeRange",
-"variableId": "OBI_0001621",
-"entityId": "GAZ_00000448",
-"left": -111.8212938308716,
-"right": -111.79749727249147
-}
-]
-}
-\keyword{datasets}
diff --git a/man/naToZero.Rd b/man/naToZero.Rd
deleted file mode 100644
index 8ae2380..0000000
--- a/man/naToZero.Rd
+++ /dev/null
@@ -1,21 +0,0 @@
-% Generated by roxygen2: do not edit by hand
-% Please edit documentation in R/utils-numeric.R
-\name{naToZero}
-\alias{naToZero}
-\title{Replace numeric NAs with 0}
-\usage{
-naToZero(x, ...)
-}
-\arguments{
-\item{x}{list, data.frame, array, or vector}
-
-\item{cols}{Optional. When appropriate, vector of column names
-for which the NA replacement should occur.
-Default is all numeric columns.}
-}
-\value{
-object the same type as x, where desired NAs are replaced with 0.
-}
-\description{
-This function replaces numeric NAs with 0.
-}
diff --git a/man/nonZeroRound.Rd b/man/nonZeroRound.Rd
deleted file mode 100644
index 039fafb..0000000
--- a/man/nonZeroRound.Rd
+++ /dev/null
@@ -1,23 +0,0 @@
-% Generated by roxygen2: do not edit by hand
-% Please edit documentation in R/utils-numeric.R
-\name{nonZeroRound}
-\alias{nonZeroRound}
-\title{Non-Zero Rounding}
-\usage{
-nonZeroRound(x, digits)
-}
-\arguments{
-\item{x}{Numeric value to round}
-
-\item{digits}{Number indicating the desired precision}
-}
-\value{
-number rounded as nearly to the requested precision as
-possible without returning zero.
-}
-\description{
-This function will recursively attempt to round a value to
-greater and greater precision until it results in a non-zero
-value. One consequence of this is that the precision of the
-output value may not be exactly what was requested.
-}
diff --git a/man/setNaToZero.Rd b/man/setNaToZero.Rd
deleted file mode 100644
index ef87028..0000000
--- a/man/setNaToZero.Rd
+++ /dev/null
@@ -1,20 +0,0 @@
-% Generated by roxygen2: do not edit by hand
-% Please edit documentation in R/utils-numeric.R
-\name{setNaToZero}
-\alias{setNaToZero}
-\title{Replace numeric NAs with 0 - update by reference}
-\usage{
-setNaToZero(df, cols = NULL)
-}
-\arguments{
-\item{cols}{vector of column names for which the NA replacement should occur.
-Default is all numeric columns.}
-
-\item{x}{data.table, data.frame, or list}
-}
-\value{
-x with desired NAs replaced with 0.
-}
-\description{
-This function replaces NAs in numeric columns with 0.
-}
diff --git a/man/toJSON.Rd b/man/toJSON.Rd
index 9bbfb4d..53cb60b 100644
--- a/man/toJSON.Rd
+++ b/man/toJSON.Rd
@@ -16,5 +16,5 @@ character vector of length 1 containing JSON string
 }
 \description{
 This function converts an R object to a JSON string.
-see \code{methods(veupathUtils::toJSON)} for a list of support classes.
+see \code{methods(mbioUtils::toJSON)} for a list of support classes.
 }
diff --git a/man/validateNumericCols.Rd b/man/validateNumericCols.Rd
deleted file mode 100644
index cd79fb6..0000000
--- a/man/validateNumericCols.Rd
+++ /dev/null
@@ -1,20 +0,0 @@
-% Generated by roxygen2: do not edit by hand
-% Please edit documentation in R/utils-numeric.R
-\name{validateNumericCols}
-\alias{validateNumericCols}
-\title{Validate numeric columns}
-\usage{
-validateNumericCols(x, cols, ...)
-}
-\arguments{
-\item{x}{list, data.frame, array, data.table}
-
-\item{cols}{vector of column names or column indices. NAs will be removed.}
-}
-\value{
-given column names
-}
-\description{
-Given a vector of column names or indices, this function ensures all
-referenced columns are numeric
-}
diff --git a/man/veupathUtils-package.Rd b/man/veupathUtils-package.Rd
deleted file mode 100644
index d60a5da..0000000
--- a/man/veupathUtils-package.Rd
+++ /dev/null
@@ -1,28 +0,0 @@
-% Generated by roxygen2: do not edit by hand
-% Please edit documentation in R/veupathUtils-package.R
-\docType{package}
-\name{veupathUtils-package}
-\alias{veupathUtils}
-\alias{veupathUtils-package}
-\title{veupathUtils: General Helper Functions for MicrobiomeDB Projects}
-\description{
-veupathUtils contains various R helper functions intended to be useful across a variety of projects, including the plot.data and microbiomeComputations packages.
-}
-\seealso{
-Useful links:
-\itemize{
-  \item \url{https://github.com/microbiomeDB/veupathUtils}
-  \item Report bugs at \url{https://github.com/microbiomeDB/veupathUtils/issues}
-}
-
-}
-\author{
-\strong{Maintainer}: Ann Blevins \email{annsize@upenn.edu}
-
-Authors:
-\itemize{
-  \item Danielle Callan \email{dcallan@upenn.edu}
-}
-
-}
-\keyword{internal}
diff --git a/tests/testthat.R b/tests/testthat.R
index 6b3cd5a..f5b7dbf 100644
--- a/tests/testthat.R
+++ b/tests/testthat.R
@@ -1,4 +1,4 @@
 library(testthat)
-library(veupathUtils)
+library(mbioUtils)
 
-test_check("veupathUtils")
+test_check("mbioUtils")
diff --git a/tests/testthat/test-ComputeResult.R b/tests/testthat/test-ComputeResult.R
index 958c26c..3e60e60 100644
--- a/tests/testthat/test-ComputeResult.R
+++ b/tests/testthat/test-ComputeResult.R
@@ -8,25 +8,25 @@ test_that('ComputeResult validation works', {
   names(df) <- stripEntityIdFromColumnHeader(names(df))
   df$alphaDiversity <- .1
 
-  computedVariableMetadata <- veupathUtils::VariableMetadata(
-                 variableClass = veupathUtils::VariableClass(value = "computed"),
-                 variableSpec = veupathUtils::VariableSpec(variableId = 'alphaDiversity', entityId = 'entity'),
-                 plotReference = veupathUtils::PlotReference(value = "yAxis"),
+  computedVariableMetadata <- mbioUtils::VariableMetadata(
+                 variableClass = mbioUtils::VariableClass(value = "computed"),
+                 variableSpec = mbioUtils::VariableSpec(variableId = 'alphaDiversity', entityId = 'entity'),
+                 plotReference = mbioUtils::PlotReference(value = "yAxis"),
                  displayName = "Alpha Diversity",
                  displayRangeMin = 0,
                  displayRangeMax = max(max(df$alphaDiversity, na.rm = TRUE),1),
-                 dataType = veupathUtils::DataType(value = "NUMBER"),
-                 dataShape = veupathUtils::DataShape(value = "CONTINUOUS")
+                 dataType = mbioUtils::DataType(value = "NUMBER"),
+                 dataShape = mbioUtils::DataShape(value = "CONTINUOUS")
    )
 
   expect_error(new("ComputeResult",
                 recordIdColumn = 'entity.SampleID',
-                computedVariableMetadata = veupathUtils::VariableMetadataList(S4Vectors::SimpleList(computedVariableMetadata)),
+                computedVariableMetadata = mbioUtils::VariableMetadataList(S4Vectors::SimpleList(computedVariableMetadata)),
                 data = df))
   expect_error(new("ComputeResult",
                 name = c('alphaDiv', 'test'),
                 recordIdColumn = 'entity.SampleID',
-                computedVariableMetadata = veupathUtils::VariableMetadataList(S4Vectors::SimpleList(computedVariableMetadata)),
+                computedVariableMetadata = mbioUtils::VariableMetadataList(S4Vectors::SimpleList(computedVariableMetadata)),
                 data = df))
   expect_error(new("ComputeResult",
                 name = NULL,
@@ -57,25 +57,25 @@ test_that('ComputeResult validation works', {
   expect_error(new("ComputeResult",
                 name = c('alphaDiv'),
                 recordIdColumn = 'entity.SampleID',
-                computedVariableMetadata = veupathUtils::VariableMetadataList(S4Vectors::SimpleList(computedVariableMetadata)),
+                computedVariableMetadata = mbioUtils::VariableMetadataList(S4Vectors::SimpleList(computedVariableMetadata)),
                 data = df))
 
   df$alphaDiversity <- .1
-  computedVariableMetadata <- veupathUtils::VariableMetadata(
-                 variableClass = veupathUtils::VariableClass(value = "native"),
-                 variableSpec = veupathUtils::VariableSpec(variableId = 'alphaDiversity', entityId = 'entity'),
-                 plotReference = veupathUtils::PlotReference(value = "yAxis"),
+  computedVariableMetadata <- mbioUtils::VariableMetadata(
+                 variableClass = mbioUtils::VariableClass(value = "native"),
+                 variableSpec = mbioUtils::VariableSpec(variableId = 'alphaDiversity', entityId = 'entity'),
+                 plotReference = mbioUtils::PlotReference(value = "yAxis"),
                  displayName = "Alpha Diversity",
                  displayRangeMin = 0,
                  displayRangeMax = max(max(df$alphaDiversity, na.rm = TRUE),1),
-                 dataType = veupathUtils::DataType(value = "NUMBER"),
-                 dataShape = veupathUtils::DataShape(value = "CONTINUOUS")
+                 dataType = mbioUtils::DataType(value = "NUMBER"),
+                 dataShape = mbioUtils::DataShape(value = "CONTINUOUS")
    )
 
    expect_error(new("ComputeResult",
                 name = c('alphaDiv'),
                 recordIdColumn = 'entity.SampleID',
-                computedVariableMetadata = veupathUtils::VariableMetadataList(S4Vectors::SimpleList(computedVariableMetadata)),
+                computedVariableMetadata = mbioUtils::VariableMetadataList(S4Vectors::SimpleList(computedVariableMetadata)),
                 data = df))
 
   # TODO test that extra cols in data will err, dot notation of output cols or not, column order
@@ -90,24 +90,24 @@ test_that("ComputeResult writeMeta method returns well formatted json", {
   names(df) <- stripEntityIdFromColumnHeader(names(df))
   df$alphaDiversity <- .1
 
-  computedVariableMetadata <- veupathUtils::VariableMetadata(
-                 variableClass = veupathUtils::VariableClass(value = "computed"),
-                 variableSpec = veupathUtils::VariableSpec(variableId = 'alphaDiversity', entityId = 'entity'),
-                 plotReference = veupathUtils::PlotReference(value = "yAxis"),
+  computedVariableMetadata <- mbioUtils::VariableMetadata(
+                 variableClass = mbioUtils::VariableClass(value = "computed"),
+                 variableSpec = mbioUtils::VariableSpec(variableId = 'alphaDiversity', entityId = 'entity'),
+                 plotReference = mbioUtils::PlotReference(value = "yAxis"),
                  displayName = "Alpha Diversity",
                  displayRangeMin = 0,
                  displayRangeMax = max(max(df$alphaDiversity, na.rm = TRUE),1),
-                 dataType = veupathUtils::DataType(value = "NUMBER"),
-                 dataShape = veupathUtils::DataShape(value = "CONTINUOUS")
+                 dataType = mbioUtils::DataType(value = "NUMBER"),
+                 dataShape = mbioUtils::DataShape(value = "CONTINUOUS")
    )
 
   result <- new("ComputeResult",
                 name = "alphaDiv",
                 recordIdColumn = 'entity.SampleID',
-                computedVariableMetadata = veupathUtils::VariableMetadataList(S4Vectors::SimpleList(computedVariableMetadata)),
+                computedVariableMetadata = mbioUtils::VariableMetadataList(S4Vectors::SimpleList(computedVariableMetadata)),
                 data = df)
 
-  jsonlist <- jsonlite::fromJSON(veupathUtils::toJSON(result@computedVariableMetadata))
+  jsonlist <- jsonlite::fromJSON(mbioUtils::toJSON(result@computedVariableMetadata))
 
   expect_equal(names(jsonlist), 'variables')
   expect_equal(names(jsonlist$variables), c("variableClass","variableSpec","plotReference","displayName","displayRangeMin","displayRangeMax","dataType","dataShape","isCollection","imputeZero","hasStudyDependentVocabulary"))
diff --git a/tests/testthat/test-class-CollectionWithMetadata.R b/tests/testthat/test-class-CollectionWithMetadata.R
index df3a823..a751aac 100644
--- a/tests/testthat/test-class-CollectionWithMetadata.R
+++ b/tests/testthat/test-class-CollectionWithMetadata.R
@@ -156,7 +156,7 @@ test_that("pruneFeatures works", {
     )
 
     # pruneFeatures touched SampleMetadata, which this CollectionWithMetadata object has none. it shouldnt fail for that though.
-    testing <- pruneFeatures(testing, veupathUtils::predicateFactory('proportionNonZero', 0.5))
+    testing <- pruneFeatures(testing, mbioUtils::predicateFactory('proportionNonZero', 0.5))
     expect_equal(nrow(testing@data), 200)
     expect_equal(ncol(testing@data), 3)
 })
diff --git a/tests/testthat/test-class-Megastudy.R b/tests/testthat/test-class-Megastudy.R
deleted file mode 100644
index ffffead..0000000
--- a/tests/testthat/test-class-Megastudy.R
+++ /dev/null
@@ -1,863 +0,0 @@
-megastudyDT <- data.table('study.id'=c('a','a','a','b','b','b'),
-                          'study.author'=c('Cool Guy', 'Cool Guy', 'Cool Guy', 'Uncool Guy', 'Uncool Guy', 'Uncool Guy'),
-                          'collection.id'=c(1,1,2,1,2,2), 
-                          'collection.attractant'=c('A','A','B','C','D','D'),
-                          'sample.id'=c(1,2,3,4,5,6),
-                          'sample.species'=c('species1','species2','species1','species1','species1','species2'),
-                          'sample.sex'=c('male','male','female','male','female','male'),
-                          'sample.specimen_count'=c(10,20,15,15,10,20),
-                          'assay.id'=c(11,12,13,14,15,16),
-                          'assay.pathogen_prevalence'=c(.1,.2,.3,.4,.5,.6),
-                          'assay.pathogen_presence'=c('Yes','Yes','No','No','Yes','No'),
-                          'assay.pathogen2_presence'=c('Yes','No','Yes','No','Yes','No'),
-                          'assay.pathogen3_presence'=c('No','Yes','No','Yes','No','Yes'),
-                          'assay.weighting_variable'=c(5,10,15,20,25,30))
-
-## the collectionsDT needs to include all collections, unless explicitly filtered against
-## it also needs to tell us values for any collection variables that are in the plot
-## were trying to impute samples, based on collections that dont have samples.
-## so the only way to get collection variables values for those samples, is to get them from the collectionsDT
-collectionsDT <- data.table(
-  'study.id' = c('a', 'a', 'a','b', 'b', 'b', 'b'),
-  'collection.id' = c(1, 2, 3, 1, 2, 3, 4),
-  'collection.attractant' = c('A', 'B', 'C', 'C', 'D', 'D', 'E'))
-
-sexVocabs.dt <- data.table::data.table(
-  'study.id' = c('a', 'a', 'a', 'a', 'a', 'b', 'b'),
-  'sample.sex' = c('female', 'male', 'non-binary', 'other', 'do not wish to specify', 'male', 'female'))
-
-sexVocabs <- veupathUtils::StudySpecificVocabulariesByVariable(
-  studyIdColumnName='study.id',
-  variableSpec=veupathUtils::VariableSpec(entityId='sample',variableId='sex'),
-  studyVocab=sexVocabs.dt
-)
-
-sexVocabsSMALL.dt <- data.table::data.table(
-  'study.id' = c('a', 'a', 'b', 'b'),
-  'sample.sex' = c('female', 'male', 'male', 'female'))
-
-sexVocabsSMALL <- veupathUtils::StudySpecificVocabulariesByVariable(
-  studyIdColumnName='study.id',
-  variableSpec=veupathUtils::VariableSpec(entityId='sample',variableId='sex'),
-  studyVocab=sexVocabsSMALL.dt
-)
-
-sexVocabsSingleStudy.dt <- data.table::data.table(
-  'study.id' = c('a', 'a', 'a', 'a', 'a'),
-  'sample.sex' = c('female', 'male', 'non-binary', 'other', 'do not wish to specify'))
-
-sexVocabsSingleStudy <- veupathUtils::StudySpecificVocabulariesByVariable(
-  studyIdColumnName='study.id',
-  variableSpec=veupathUtils::VariableSpec(entityId='sample',variableId='sex'),
-  studyVocab=sexVocabsSingleStudy.dt
-)
-
-speciesVocabs.dt <- data.table::data.table(
-  'study.id' = c('a', 'a', 'a', 'b', 'b', 'b'),
-  'sample.species' = c('species1', 'species2', 'species3', 'species1', 'species2', 'species5'))
-
-speciesVocabs <- veupathUtils::StudySpecificVocabulariesByVariable(
-  studyIdColumnName='study.id',
-  variableSpec=veupathUtils::VariableSpec(entityId='sample',variableId='species'),
-  studyVocab=speciesVocabs.dt
-)
-
-speciesVocabsSMALL.dt <- data.table::data.table(
-  'study.id' = c('a', 'a', 'b', 'b'),
-  'sample.species' = c('species1', 'species2', 'species1', 'species2'))
-
-speciesVocabsSMALL <- veupathUtils::StudySpecificVocabulariesByVariable(
-  studyIdColumnName='study.id',
-  variableSpec=veupathUtils::VariableSpec(entityId='sample',variableId='species'),
-  studyVocab=speciesVocabsSMALL.dt
-)
-
-pathogenVocabs.dt <- data.table::data.table(
-  'study.id' = c('a', 'a', 'b', 'b'),
-  'assay.pathogen_presence' = c('Yes', 'No', 'Yes', 'No'))
-
-pathogenVocabs <- veupathUtils::StudySpecificVocabulariesByVariable(
-  studyIdColumnName='study.id',
-  variableSpec=veupathUtils::VariableSpec(entityId='assay',variableId='pathogen_presence'),
-  studyVocab=pathogenVocabs.dt
-)
-
-test_that("Megastudy and associated validation works", {
-  # works at all
-  m <- Megastudy(data=megastudyDT,
-                 ancestorIdColumns=c('study.id', 'collection.id'),
-                 studySpecificVocabularies=StudySpecificVocabulariesByVariableList(S4Vectors::SimpleList(speciesVocabs)))
-
-  expect_equal(slotNames(m), c('data','ancestorIdColumns','studySpecificVocabularies','collectionsDT'))
-  expect_equal(length(m@studySpecificVocabularies), 1)
-  expect_equal(data.table::uniqueN(m@studySpecificVocabularies[[1]]@studyVocab[,1]), 2)
-  expect_equal(slotNames(m@studySpecificVocabularies[[1]]), c("studyIdColumnName","variableSpec","studyVocab"))    
-
-  # works w multiple vocab lists
-  m <- Megastudy(data=megastudyDT,
-                 ancestorIdColumns=c('study.id', 'collection.id'),
-                 studySpecificVocabularies=StudySpecificVocabulariesByVariableList(S4Vectors::SimpleList(speciesVocabs, sexVocabs)))
-
-  expect_equal(slotNames(m), c('data','ancestorIdColumns','studySpecificVocabularies','collectionsDT'))
-  expect_equal(length(m@studySpecificVocabularies), 2)
-  expect_equal(data.table::uniqueN(m@studySpecificVocabularies[[2]]@studyVocab[,1]), 2)
-  expect_equal(slotNames(m@studySpecificVocabularies[[2]]), c("studyIdColumnName","variableSpec","studyVocab"))              
-
-  # errs if no ancestors/ids?
-  expect_error(Megastudy(data=megastudyDT,
-                         studySpecificVocabularies=StudySpecificVocabulariesByVariableList(S4Vectors::SimpleList(speciesVocabs, sexVocabs))))
-
-  expect_error(Megastudy(data=megastudyDT,
-                         ancestorIdColumns=NA,
-                         studySpecificVocabularies=StudySpecificVocabulariesByVariableList(S4Vectors::SimpleList(speciesVocabs, sexVocabs))))
-
-  expect_error(Megastudy(data=megastudyDT,
-                         ancestorIdColumns=NULL,
-                         studySpecificVocabularies=StudySpecificVocabulariesByVariableList(S4Vectors::SimpleList(speciesVocabs, sexVocabs))))   
-
-  expect_error(Megastudy(data=megastudyDT,
-                         ancestorIdColumns=c(),
-                         studySpecificVocabularies=StudySpecificVocabulariesByVariableList(S4Vectors::SimpleList(speciesVocabs, sexVocabs))))    
-
-  expect_error(Megastudy(data=megastudyDT,
-                         ancestorIdColumns=character(),
-                         studySpecificVocabularies=StudySpecificVocabulariesByVariableList(S4Vectors::SimpleList(speciesVocabs, sexVocabs))))   
-
-  expect_error(Megastudy(data=megastudyDT,
-                         ancestorIdColumns=NA_character_,
-                         studySpecificVocabularies=StudySpecificVocabulariesByVariableList(S4Vectors::SimpleList(speciesVocabs, sexVocabs))))                                                                                  
-
-  # errs if extra ancestor ids provided but not in dt
-  expect_error(Megastudy(data=megastudyDT,
-                         ancestorIdColumns=c('study.id', 'collection.id', 'imNOTan.id'),
-                         studySpecificVocabularies=StudySpecificVocabulariesByVariableList(S4Vectors::SimpleList(speciesVocabs, sexVocabs))))
-
-  # errs if special vocabs on different entities
-  expect_error(Megastudy(data=megastudyDT,
-                         ancestorIdColumns=c('study.id', 'collection.id', 'sample.id','assay.id'),
-                         studySpecificVocabularies=StudySpecificVocabulariesByVariableList(S4Vectors::SimpleList(speciesVocabs, pathogenVocabs))))
-
-  # errs if studyIdColumnName and varSpec col name not in dt
-  expect_error(StudySpecificVocabulariesByVariable(
-    studyIdColumnName='imNOTan.id', 
-    variableSpec=VariableSpec(entityId='sample',variableId='species'), 
-    vocabulary=speciesVocabs.dt)
-  )
-
-  # errs if no study id provided for study vocab
-  expect_error(StudySpecificVocabulariesByVariable(variableSpec=VariableSpec(entityId='sample',variableId='species'), studyVocab=speciesVocabs.dt))
-  expect_error(StudySpecificVocabulariesByVariable(studyIdColumnName=NA, variableSpec=VariableSpec(entityId='sample',variableId='species'), studyVocab=speciesVocabs.dt))
-  expect_error(StudySpecificVocabulariesByVariable(studyIdColumnName=NULL, variableSpec=VariableSpec(entityId='sample',variableId='species'), studyVocab=speciesVocabs.dt))
-  expect_error(StudySpecificVocabulariesByVariable(studyIdColumnName=c(), variableSpec=VariableSpec(entityId='sample',variableId='species'), studyVocab=speciesVocabs.dt))
-  expect_error(StudySpecificVocabulariesByVariable(studyIdColumnName=character(), variableSpec=VariableSpec(entityId='sample',variableId='species'), studyVocab=speciesVocabs.dt))
-  expect_error(StudySpecificVocabulariesByVariable(studyIdColumnName=NA_character_, variableSpec=VariableSpec(entityId='sample',variableId='species'), studyVocab=speciesVocabs.dt))
-
-  # errs if no var spec provided for study vocab
-  expect_error(StudySpecificVocabulariesByVariable(studyIdColumnName='study.id', studyVocab=speciesVocabs.dt))
-  expect_error(StudySpecificVocabulariesByVariable(studyIdColumnName='study.id', variableSpec=NA, studyVocab=speciesVocabs.dt))
-  expect_error(StudySpecificVocabulariesByVariable(studyIdColumnName='study.id', variableSpec=NULL, studyVocab=speciesVocabs.dt))
-})
-
-# TODO this could go in its own file maybe
-test_that("imputeZeroes method is sane", {
-  m <- Megastudy(data=megastudyDT[, c('study.id', 'collection.id', 'sample.id', 'sample.specimen_count', 'sample.sex', 'sample.species', 'collection.attractant', 'study.author'), with=FALSE],
-                 ancestorIdColumns=c('study.id', 'collection.id', 'sample.id'),
-                 studySpecificVocabularies=StudySpecificVocabulariesByVariableList(S4Vectors::SimpleList(speciesVocabs, sexVocabs)))
-
-  # case where neither study nor collection vars in the plot
-  variables <- new("VariableMetadataList", SimpleList(
-    new("VariableMetadata",
-      variableClass = new("VariableClass", value = 'native'),
-      variableSpec = new("VariableSpec", variableId = 'species', entityId = 'sample'),
-      plotReference = new("PlotReference", value = 'xAxis'),
-      dataType = new("DataType", value = 'STRING'),
-      dataShape = new("DataShape", value = 'CATEGORICAL'),
-      weightingVariableSpec = VariableSpec(variableId='specimen_count',entityId='sample'),
-      hasStudyDependentVocabulary = TRUE),
-    new("VariableMetadata",
-      variableClass = new("VariableClass", value = 'native'),
-      variableSpec = new("VariableSpec", variableId = 'specimen_count', entityId = 'sample'),
-      plotReference = new("PlotReference", value = 'yAxis'),
-      dataType = new("DataType", value = 'NUMBER'),
-      dataShape = new("DataShape", value = 'CONTINUOUS')),
-    new("VariableMetadata",
-      variableClass = new("VariableClass", value = 'native'),
-      variableSpec = new("VariableSpec", variableId = 'sex', entityId = 'sample'),
-      # empty plotReference means that it is not plotted
-      dataType = new("DataType", value = 'STRING'),
-      dataShape = new("DataShape", value = 'CATEGORICAL'),
-      weightingVariableSpec = VariableSpec(variableId='specimen_count',entityId='sample'),
-      hasStudyDependentVocabulary = TRUE)
-  ))
-
-  imputedDT <- getDTWithImputedZeroes(m, variables, FALSE)
-  # result has the columns needed to build a plot, based on variables AND the correct number of rows/ zeroes
-  expect_equal(all(c("sample.species","sample.specimen_count") %in% names(imputedDT)), TRUE)
-  # 5 sexes * 3 species in study A (15) + 2 sexes * 3 species in study B (6) * 2 collections per study = 42
-  expect_equal(nrow(imputedDT), 42)
-  expect_equal(nrow(imputedDT[imputedDT$sample.specimen_count == 0]), 36)
-
-  # case where some collection ids are missing
-  # in real life, we have some collections where all samples are 0 and so not loaded
-  # in this case, the collection ids are missing from the data table R gets handed. 
-  # we want to impute zeroes for their samples anyhow.
-
-  # in this version, some collection level variables are in both data and collectionsDT
-  m <- Megastudy(data=megastudyDT[, c('study.id', 'collection.id', 'sample.id', 'sample.specimen_count', 'sample.sex', 'sample.species', 'collection.attractant'), with=FALSE],
-                  ancestorIdColumns=c('study.id', 'collection.id', 'sample.id'),
-                  studySpecificVocabularies=StudySpecificVocabulariesByVariableList(S4Vectors::SimpleList(speciesVocabs, sexVocabs)),
-                  collectionsDT=collectionsDT)
-
-  variables <- new("VariableMetadataList", SimpleList(
-    new("VariableMetadata",
-      variableClass = new("VariableClass", value = 'native'),
-      variableSpec = new("VariableSpec", variableId = 'species', entityId = 'sample'),
-      plotReference = new("PlotReference", value = 'xAxis'),
-      dataType = new("DataType", value = 'STRING'),
-      dataShape = new("DataShape", value = 'CATEGORICAL'),
-      weightingVariableSpec = VariableSpec(variableId='specimen_count',entityId='sample'),
-      hasStudyDependentVocabulary = TRUE),
-    new("VariableMetadata",
-      variableClass = new("VariableClass", value = 'native'),
-      variableSpec = new("VariableSpec", variableId = 'specimen_count', entityId = 'sample'),
-      plotReference = new("PlotReference", value = 'yAxis'),
-      dataType = new("DataType", value = 'NUMBER'),
-      dataShape = new("DataShape", value = 'CONTINUOUS')),
-    new("VariableMetadata",
-      variableClass = new("VariableClass", value = 'native'),
-      variableSpec = new("VariableSpec", variableId = 'sex', entityId = 'sample'),
-      # empty plotReference means that it is not plotted
-      dataType = new("DataType", value = 'STRING'),
-      dataShape = new("DataShape", value = 'CATEGORICAL'),
-      weightingVariableSpec = VariableSpec(variableId='specimen_count',entityId='sample'),
-      hasStudyDependentVocabulary = TRUE)
-  ))
-
-  imputedDT <- getDTWithImputedZeroes(m, variables, FALSE)
-  # result has the columns needed to build a plot, based on variables AND the correct number of rows/ zeroes
-  expect_equal(all(c("sample.species","sample.specimen_count") %in% names(imputedDT)), TRUE)
-  # 5 sexes * 3 species * 3 collections in study A (45) + 2 sexes * 3 species * 4 collections in study B (30) = 69
-  expect_equal(nrow(imputedDT), 69) ## TODO check these numbers
-  expect_equal(nrow(imputedDT[imputedDT$sample.specimen_count == 0]), 63)
-
-  # in this version, collection level variables are only in collectionsDT
-  m <- Megastudy(data=megastudyDT[, c('study.id', 'collection.id', 'sample.id', 'sample.specimen_count', 'sample.sex', 'sample.species'), with=FALSE],
-                  ancestorIdColumns=c('study.id', 'collection.id', 'sample.id'),
-                  studySpecificVocabularies=StudySpecificVocabulariesByVariableList(S4Vectors::SimpleList(speciesVocabs, sexVocabs)),
-                  collectionsDT=collectionsDT)
-
-  variables <- new("VariableMetadataList", SimpleList(
-    new("VariableMetadata",
-      variableClass = new("VariableClass", value = 'native'),
-      variableSpec = new("VariableSpec", variableId = 'species', entityId = 'sample'),
-      plotReference = new("PlotReference", value = 'xAxis'),
-      dataType = new("DataType", value = 'STRING'),
-      dataShape = new("DataShape", value = 'CATEGORICAL'),
-      weightingVariableSpec = VariableSpec(variableId='specimen_count',entityId='sample'),
-      hasStudyDependentVocabulary = TRUE),
-    new("VariableMetadata",
-      variableClass = new("VariableClass", value = 'native'),
-      variableSpec = new("VariableSpec", variableId = 'specimen_count', entityId = 'sample'),
-      plotReference = new("PlotReference", value = 'yAxis'),
-      dataType = new("DataType", value = 'NUMBER'),
-      dataShape = new("DataShape", value = 'CONTINUOUS')),
-    new("VariableMetadata",
-      variableClass = new("VariableClass", value = 'native'),
-      variableSpec = new("VariableSpec", variableId = 'sex', entityId = 'sample'),
-      # empty plotReference means that it is not plotted
-      dataType = new("DataType", value = 'STRING'),
-      dataShape = new("DataShape", value = 'CATEGORICAL'),
-      weightingVariableSpec = VariableSpec(variableId='specimen_count',entityId='sample'),
-      hasStudyDependentVocabulary = TRUE)
-  ))
-
-  imputedDT <- getDTWithImputedZeroes(m, variables, FALSE)
-  # result has the columns needed to build a plot, based on variables AND the correct number of rows/ zeroes
-  expect_equal(all(c("sample.species","sample.specimen_count") %in% names(imputedDT)), TRUE)
-  # 5 sexes * 3 species * 3 collections in study A (45) + 2 sexes * 3 species * 4 collections in study B (30) = 69
-  expect_equal(nrow(imputedDT), 69) ## TODO check these numbers
-  expect_equal(nrow(imputedDT[imputedDT$sample.specimen_count == 0]), 63)
-
-  #################################################################################################################
-
-
-  # case where one study vocab is missing a study
-  mDTSexSingleStudy <- megastudyDT[, c('study.id', 'collection.id', 'sample.id', 'sample.specimen_count', 'sample.sex', 'sample.species', 'collection.attractant', 'study.author'), with=FALSE]
-  mDTSexSingleStudy$sample.sex[mDTSexSingleStudy$study.id == 'b'] <- NA_character_
-  mDTSexSingleStudy <- unique(mDTSexSingleStudy)
-
-  mSexSingleStudy <- Megastudy(data=mDTSexSingleStudy,
-                 ancestorIdColumns=c('study.id', 'collection.id', 'sample.id'),
-                 studySpecificVocabularies=StudySpecificVocabulariesByVariableList(S4Vectors::SimpleList(speciesVocabs, sexVocabsSingleStudy)))
-
-  imputedDT <- getDTWithImputedZeroes(mSexSingleStudy, variables, FALSE)
-  # result has the columns needed to build a plot, based on variables AND the correct number of rows/ zeroes
-  expect_equal(all(c("sample.species","sample.specimen_count") %in% names(imputedDT)), TRUE)
-  # 5 sexes * 3 species in study A (15) + 3 species in study B * 2 collections per study = 42
-  expect_equal(nrow(imputedDT), 36)
-  expect_equal(nrow(imputedDT[imputedDT$sample.specimen_count == 0]), 30)
-
-  m <- Megastudy(data=megastudyDT[, c('study.id', 'collection.id', 'sample.id', 'sample.specimen_count', 'sample.sex', 'sample.species', 'collection.attractant', 'study.author'), with=FALSE],
-                  ancestorIdColumns=c('study.id', 'collection.id', 'sample.id'),
-                  studySpecificVocabularies=StudySpecificVocabulariesByVariableList(S4Vectors::SimpleList(speciesVocabs, sexVocabs)))
-
-  # collection entity var is present
-  variables <- new("VariableMetadataList", SimpleList(
-    new("VariableMetadata",
-      variableClass = new("VariableClass", value = 'native'),
-      variableSpec = new("VariableSpec", variableId = 'species', entityId = 'sample'),
-      plotReference = new("PlotReference", value = 'xAxis'),
-      dataType = new("DataType", value = 'STRING'),
-      dataShape = new("DataShape", value = 'CATEGORICAL'),
-      weightingVariableSpec = VariableSpec(variableId='specimen_count',entityId='sample'),
-      hasStudyDependentVocabulary = TRUE),
-    new("VariableMetadata",
-      variableClass = new("VariableClass", value = 'native'),
-      variableSpec = new("VariableSpec", variableId = 'specimen_count', entityId = 'sample'),
-      plotReference = new("PlotReference", value = 'yAxis'),
-      dataType = new("DataType", value = 'NUMBER'),
-      dataShape = new("DataShape", value = 'CONTINUOUS')),
-    new("VariableMetadata",
-      variableClass = new("VariableClass", value = 'native'),
-      variableSpec = new("VariableSpec", variableId = 'attractant', entityId = 'collection'),
-      plotReference = new("PlotReference", value = 'overlay'),
-      dataType = new("DataType", value = 'STRING'),
-      dataShape = new("DataShape", value = 'CATEGORICAL')),
-    new("VariableMetadata",
-      variableClass = new("VariableClass", value = 'native'),
-      variableSpec = new("VariableSpec", variableId = 'sex', entityId = 'sample'),
-      # empty plotReference means that it is not plotted
-      dataType = new("DataType", value = 'STRING'),
-      dataShape = new("DataShape", value = 'CATEGORICAL'),
-      weightingVariableSpec = VariableSpec(variableId='specimen_count',entityId='sample'),
-      hasStudyDependentVocabulary = TRUE)
-  ))
-
-  imputedDT <- getDTWithImputedZeroes(m, variables, FALSE)
-  expect_equal(all(c("sample.species","sample.specimen_count","collection.attractant") %in% names(imputedDT)), TRUE)
-  expect_equal(nrow(imputedDT), 42)
-  expect_equal(nrow(imputedDT[imputedDT$sample.specimen_count == 0]), 36)
-
-  # both collection and study entity vars are present
-  variables <- new("VariableMetadataList", SimpleList(
-    new("VariableMetadata",
-      variableClass = new("VariableClass", value = 'native'),
-      variableSpec = new("VariableSpec", variableId = 'species', entityId = 'sample'),
-      plotReference = new("PlotReference", value = 'xAxis'),
-      dataType = new("DataType", value = 'STRING'),
-      dataShape = new("DataShape", value = 'CATEGORICAL'),
-      weightingVariableSpec = VariableSpec(variableId='specimen_count',entityId='sample'),
-      hasStudyDependentVocabulary = TRUE),
-    new("VariableMetadata",
-      variableClass = new("VariableClass", value = 'native'),
-      variableSpec = new("VariableSpec", variableId = 'specimen_count', entityId = 'sample'),
-      plotReference = new("PlotReference", value = 'yAxis'),
-      dataType = new("DataType", value = 'NUMBER'),
-      dataShape = new("DataShape", value = 'CONTINUOUS')),
-    new("VariableMetadata",
-      variableClass = new("VariableClass", value = 'native'),
-      variableSpec = new("VariableSpec", variableId = 'attractant', entityId = 'collection'),
-      plotReference = new("PlotReference", value = 'overlay'),
-      dataType = new("DataType", value = 'STRING'),
-      dataShape = new("DataShape", value = 'CATEGORICAL')),
-    new("VariableMetadata",
-      variableClass = new("VariableClass", value = 'native'),
-      variableSpec = new("VariableSpec", variableId = 'author', entityId = 'study'),
-      plotReference = new("PlotReference", value = 'facet1'),
-      dataType = new("DataType", value = 'STRING'),
-      dataShape = new("DataShape", value = 'CATEGORICAL')),
-    new("VariableMetadata",
-      variableClass = new("VariableClass", value = 'native'),
-      variableSpec = new("VariableSpec", variableId = 'sex', entityId = 'sample'),
-      # empty plotReference means that it is not plotted
-      dataType = new("DataType", value = 'STRING'),
-      dataShape = new("DataShape", value = 'CATEGORICAL'),
-      weightingVariableSpec = VariableSpec(variableId='specimen_count',entityId='sample'),
-      hasStudyDependentVocabulary = TRUE)
-  ))
-
-  imputedDT <- getDTWithImputedZeroes(m, variables, FALSE)
-  expect_equal(all(c("sample.species","sample.specimen_count","collection.attractant","study.author") %in% names(imputedDT)), TRUE)
-  expect_equal(nrow(imputedDT), 42)
-  expect_equal(nrow(imputedDT[imputedDT$sample.specimen_count == 0]), 36)
-
-  # all values in vocab already present
-  megastudyDTSMALL <- rbind(megastudyDT, 
-                            data.table::data.table(study.id=c('a','b'),
-                                                   study.author=c('Cool Guy','Uncool Guy'),
-                                                   collection.id=c(2,1),
-                                                   collection.attractant=c('B','C'),
-                                                   sample.id=c(7,8),
-                                                   sample.species=c('species2','species2'),
-                                                   sample.sex=c('female','female'),
-                                                   sample.specimen_count=c(5,5),
-                                                   assay.id=c(17,18),
-                                                   assay.pathogen_prevalence=c(.7,.8),
-                                                   assay.pathogen_presence=c('No','Yes'),
-                                                   assay.pathogen2_presence=c('Yes','No'),
-                                                   assay.pathogen3_presence=c('No','Yes'),
-                                                   assay.weighting_variable=c(35,40)))
-  
-  mCOMPLETE <- Megastudy(data=megastudyDTSMALL[, c('study.id', 'collection.id', 'sample.id', 'sample.specimen_count', 'sample.species', 'sample.sex', 'collection.attractant', 'study.author'), with=FALSE],
-                 ancestorIdColumns=c('study.id', 'collection.id', 'sample.id'),
-                 studySpecificVocabularies=StudySpecificVocabulariesByVariableList(S4Vectors::SimpleList(speciesVocabsSMALL, sexVocabsSMALL)))
-  
-  variables <- new("VariableMetadataList", SimpleList(
-    new("VariableMetadata",
-      variableClass = new("VariableClass", value = 'native'),
-      variableSpec = new("VariableSpec", variableId = 'species', entityId = 'sample'),
-      plotReference = new("PlotReference", value = 'xAxis'),
-      dataType = new("DataType", value = 'STRING'),
-      dataShape = new("DataShape", value = 'CATEGORICAL'),
-      weightingVariableSpec = VariableSpec(variableId='specimen_count',entityId='sample'),
-      hasStudyDependentVocabulary = TRUE),
-    new("VariableMetadata",
-      variableClass = new("VariableClass", value = 'native'),
-      variableSpec = new("VariableSpec", variableId = 'specimen_count', entityId = 'sample'),
-      plotReference = new("PlotReference", value = 'yAxis'),
-      dataType = new("DataType", value = 'NUMBER'),
-      dataShape = new("DataShape", value = 'CONTINUOUS')),
-    new("VariableMetadata",
-      variableClass = new("VariableClass", value = 'native'),
-      variableSpec = new("VariableSpec", variableId = 'attractant', entityId = 'collection'),
-      plotReference = new("PlotReference", value = 'overlay'),
-      dataType = new("DataType", value = 'STRING'),
-      dataShape = new("DataShape", value = 'CATEGORICAL')),
-    new("VariableMetadata",
-      variableClass = new("VariableClass", value = 'native'),
-      variableSpec = new("VariableSpec", variableId = 'author', entityId = 'study'),
-      plotReference = new("PlotReference", value = 'facet1'),
-      dataType = new("DataType", value = 'STRING'),
-      dataShape = new("DataShape", value = 'CATEGORICAL')),
-    new("VariableMetadata",
-      variableClass = new("VariableClass", value = 'native'),
-      variableSpec = new("VariableSpec", variableId = 'sex', entityId = 'sample'),
-      # empty plotReference means that it is not plotted
-      dataType = new("DataType", value = 'STRING'),
-      dataShape = new("DataShape", value = 'CATEGORICAL'),
-      weightingVariableSpec = VariableSpec(variableId='specimen_count',entityId='sample'),
-      hasStudyDependentVocabulary = TRUE)
-  ))
-
-  imputedDT <- getDTWithImputedZeroes(mCOMPLETE, variables, FALSE)
-  expect_equal(all(names(imputedDT) %in% names(mCOMPLETE@data)), TRUE)
-  # 2 species * 2 sexes * 2 collections * 2 studies = 16
-  expect_equal(nrow(imputedDT), 16)
-  expect_equal(nrow(imputedDT[imputedDT$sample.specimen_count == 0]), 8)
-
-  # no weighting var in plot
-  variables <- new("VariableMetadataList", SimpleList(
-    new("VariableMetadata",
-      variableClass = new("VariableClass", value = 'native'),
-      variableSpec = new("VariableSpec", variableId = 'species', entityId = 'sample'),
-      plotReference = new("PlotReference", value = 'xAxis'),
-      dataType = new("DataType", value = 'STRING'),
-      dataShape = new("DataShape", value = 'CATEGORICAL'),
-      weightingVariableSpec = VariableSpec(variableId='specimen_count',entityId='sample'),
-      hasStudyDependentVocabulary = TRUE),
-    new("VariableMetadata",
-      variableClass = new("VariableClass", value = 'native'),
-      variableSpec = new("VariableSpec", variableId = 'attractant', entityId = 'collection'),
-      plotReference = new("PlotReference", value = 'overlay'),
-      dataType = new("DataType", value = 'STRING'),
-      dataShape = new("DataShape", value = 'CATEGORICAL')),
-    new("VariableMetadata",
-      variableClass = new("VariableClass", value = 'native'),
-      variableSpec = new("VariableSpec", variableId = 'author', entityId = 'study'),
-      plotReference = new("PlotReference", value = 'facet1'),
-      dataType = new("DataType", value = 'STRING'),
-      dataShape = new("DataShape", value = 'CATEGORICAL')),
-    new("VariableMetadata",
-      variableClass = new("VariableClass", value = 'native'),
-      variableSpec = new("VariableSpec", variableId = 'sex', entityId = 'sample'),
-      # empty plotReference means that it is not plotted
-      dataType = new("DataType", value = 'STRING'),
-      dataShape = new("DataShape", value = 'CATEGORICAL'),
-      weightingVariableSpec = VariableSpec(variableId='specimen_count',entityId='sample'),
-      hasStudyDependentVocabulary = TRUE)
-  ))
-
-  imputedDT <- getDTWithImputedZeroes(mCOMPLETE, variables, FALSE)
-  expect_equal(all(names(imputedDT) %in% names(mCOMPLETE@data)), TRUE)
-  expect_equal(nrow(imputedDT), nrow(mCOMPLETE@data))
-  expect_equal(nrow(imputedDT[imputedDT$sample.specimen_count == 0]), 0)
-
-  # an assay var is present 
-  m <- Megastudy(data=megastudyDT[, c('study.id', 'collection.id', 'sample.id','assay.id', 'assay.pathogen_prevalence', 'sample.species', 'sample.sex', 'collection.attractant', 'study.author')],
-                 ancestorIdColumns=c('study.id', 'collection.id', 'sample.id','assay.id'),
-                 studySpecificVocabularies=StudySpecificVocabulariesByVariableList(S4Vectors::SimpleList(speciesVocabs, sexVocabs)))
-
-  variables <- new("VariableMetadataList", SimpleList(
-    new("VariableMetadata",
-      variableClass = new("VariableClass", value = 'native'),
-      variableSpec = new("VariableSpec", variableId = 'species', entityId = 'sample'),
-      plotReference = new("PlotReference", value = 'xAxis'),
-      dataType = new("DataType", value = 'STRING'),
-      dataShape = new("DataShape", value = 'CATEGORICAL'),
-      weightingVariableSpec = VariableSpec(variableId='specimen_count',entityId='sample'),
-      hasStudyDependentVocabulary = TRUE),
-    new("VariableMetadata",
-      variableClass = new("VariableClass", value = 'native'),
-      variableSpec = new("VariableSpec", variableId = 'pathogen_prevalence', entityId = 'assay'),
-      plotReference = new("PlotReference", value = 'yAxis'),
-      dataType = new("DataType", value = 'NUMBER'),
-      dataShape = new("DataShape", value = 'CONTINUOUS')),
-    new("VariableMetadata",
-      variableClass = new("VariableClass", value = 'native'),
-      variableSpec = new("VariableSpec", variableId = 'attractant', entityId = 'collection'),
-      plotReference = new("PlotReference", value = 'overlay'),
-      dataType = new("DataType", value = 'STRING'),
-      dataShape = new("DataShape", value = 'CATEGORICAL')),
-    new("VariableMetadata",
-      variableClass = new("VariableClass", value = 'native'),
-      variableSpec = new("VariableSpec", variableId = 'author', entityId = 'study'),
-      plotReference = new("PlotReference", value = 'facet1'),
-      dataType = new("DataType", value = 'STRING'),
-      dataShape = new("DataShape", value = 'CATEGORICAL')),
-    new("VariableMetadata",
-      variableClass = new("VariableClass", value = 'native'),
-      variableSpec = new("VariableSpec", variableId = 'sex', entityId = 'sample'),
-      # empty plotReference means that it is not plotted
-      dataType = new("DataType", value = 'STRING'),
-      dataShape = new("DataShape", value = 'CATEGORICAL'),
-      weightingVariableSpec = VariableSpec(variableId='specimen_count',entityId='sample'),
-      hasStudyDependentVocabulary = TRUE)
-  ))
-
-  imputedDT <- getDTWithImputedZeroes(m, variables, FALSE)
-  expect_equal(all(names(imputedDT) %in% names(m@data)), TRUE)
-  expect_equal(nrow(imputedDT), nrow(m@data))
-  expect_equal(nrow(imputedDT[imputedDT$sample.specimen_count == 0]), 0)
-
-  variables <- new("VariableMetadataList", SimpleList(
-    new("VariableMetadata",
-      variableClass = new("VariableClass", value = 'native'),
-      variableSpec = new("VariableSpec", variableId = 'species', entityId = 'sample'),
-      plotReference = new("PlotReference", value = 'xAxis'),
-      dataType = new("DataType", value = 'STRING'),
-      dataShape = new("DataShape", value = 'CATEGORICAL'),
-      weightingVariableSpec = VariableSpec(variableId='specimen_count',entityId='sample'),
-      hasStudyDependentVocabulary = TRUE),
-    new("VariableMetadata",
-      variableClass = new("VariableClass", value = 'native'),
-      variableSpec = new("VariableSpec", variableId = 'specimen_count', entityId = 'sample'),
-      plotReference = new("PlotReference", value = 'yAxis'),
-      dataType = new("DataType", value = 'NUMBER'),
-      dataShape = new("DataShape", value = 'CONTINUOUS')),
-    new("VariableMetadata",
-      variableClass = new("VariableClass", value = 'native'),
-      variableSpec = new("VariableSpec", variableId = 'attractant', entityId = 'collection'),
-      plotReference = new("PlotReference", value = 'overlay'),
-      dataType = new("DataType", value = 'STRING'),
-      dataShape = new("DataShape", value = 'CATEGORICAL')),
-    new("VariableMetadata",
-      variableClass = new("VariableClass", value = 'native'),
-      variableSpec = new("VariableSpec", variableId = 'pathogen_presence', entityId = 'assay'),
-      plotReference = new("PlotReference", value = 'facet1'),
-      dataType = new("DataType", value = 'STRING'),
-      dataShape = new("DataShape", value = 'CATEGORICAL')),
-    new("VariableMetadata",
-      variableClass = new("VariableClass", value = 'native'),
-      variableSpec = new("VariableSpec", variableId = 'sex', entityId = 'sample'),
-      # empty plotReference means that it is not plotted
-      dataType = new("DataType", value = 'STRING'),
-      dataShape = new("DataShape", value = 'CATEGORICAL'),
-      weightingVariableSpec = VariableSpec(variableId='specimen_count',entityId='sample'),
-      hasStudyDependentVocabulary = TRUE)
-  ))
-
-  imputedDT <- getDTWithImputedZeroes(m, variables, FALSE)
-  expect_equal(all(names(imputedDT) %in% names(m@data)), TRUE)
-  expect_equal(nrow(imputedDT), nrow(m@data))
-  expect_equal(nrow(imputedDT[imputedDT$sample.specimen_count == 0]), 0)
-
-  # multiple special vocabs in same plot, w one shared weighting var
-  m <- Megastudy(data=megastudyDT[, c('study.id', 'collection.id', 'sample.id', 'sample.specimen_count', 'sample.species', 'sample.sex', 'collection.attractant'), with=FALSE],
-                 ancestorIdColumns=c('study.id', 'collection.id', 'sample.id'),
-                 studySpecificVocabularies=StudySpecificVocabulariesByVariableList(S4Vectors::SimpleList(speciesVocabs,sexVocabs)))
-
-  variables <- new("VariableMetadataList", SimpleList(
-    new("VariableMetadata",
-      variableClass = new("VariableClass", value = 'native'),
-      variableSpec = new("VariableSpec", variableId = 'species', entityId = 'sample'),
-      plotReference = new("PlotReference", value = 'xAxis'),
-      dataType = new("DataType", value = 'STRING'),
-      dataShape = new("DataShape", value = 'CATEGORICAL'),
-      weightingVariableSpec = VariableSpec(variableId='specimen_count',entityId='sample'),
-      hasStudyDependentVocabulary = TRUE),
-    new("VariableMetadata",
-      variableClass = new("VariableClass", value = 'native'),
-      variableSpec = new("VariableSpec", variableId = 'specimen_count', entityId = 'sample'),
-      plotReference = new("PlotReference", value = 'yAxis'),
-      dataType = new("DataType", value = 'NUMBER'),
-      dataShape = new("DataShape", value = 'CONTINUOUS')),
-    new("VariableMetadata",
-      variableClass = new("VariableClass", value = 'native'),
-      variableSpec = new("VariableSpec", variableId = 'sex', entityId = 'sample'),
-      plotReference = new("PlotReference", value = 'overlay'),
-      dataType = new("DataType", value = 'STRING'),
-      dataShape = new("DataShape", value = 'CATEGORICAL'),
-      weightingVariableSpec = VariableSpec(variableId='specimen_count',entityId='sample'),
-      hasStudyDependentVocabulary=TRUE),
-    new("VariableMetadata",
-      variableClass = new("VariableClass", value = 'native'),
-      variableSpec = new("VariableSpec", variableId = 'attractant', entityId = 'collection'),
-      plotReference = new("PlotReference", value = 'facet1'),
-      dataType = new("DataType", value = 'STRING'),
-      dataShape = new("DataShape", value = 'CATEGORICAL'))
-  ))
-
-  imputedDT <- getDTWithImputedZeroes(m, variables, FALSE)
-  expect_equal(all(c("sample.species","sample.specimen_count","sample.sex","collection.attractant") %in% names(imputedDT)), TRUE)
-  expect_equal(nrow(imputedDT), 42)
-  expect_equal(nrow(imputedDT[imputedDT$sample.specimen_count == 0]), 36)
-
-  # special vocab on sample, regular weighting var on assay
-  # phase this in
-#  m <- Megastudy(data=megastudyDT,
-#                 ancestorIdColumns=c('study.id', 'collection.id', 'sample.id','assay.id'),
-#                 studySpecificVocabularies=StudySpecificVocabulariesByVariableList(S4Vectors::SimpleList(speciesVocabs)))
-#
-#  variables <- new("VariableMetadataList", SimpleList(
-#    new("VariableMetadata",
-#      variableClass = new("VariableClass", value = 'native'),
-#      variableSpec = new("VariableSpec", variableId = 'weighting_variable', entityId = 'assay'),
-#      plotReference = new("PlotReference", value = 'xAxis'),
-#      dataType = new("DataType", value = 'NUMBER'),
-#      dataShape = new("DataShape", value = 'CONTINUOUS')),
-#    new("VariableMetadata",
-#      variableClass = new("VariableClass", value = 'native'),
-#      variableSpec = new("VariableSpec", variableId = 'specimen_count', entityId = 'sample'),
-#      plotReference = new("PlotReference", value = 'yAxis'),
-#      dataType = new("DataType", value = 'NUMBER'),
-#      dataShape = new("DataShape", value = 'CONTINUOUS')),
-#    new("VariableMetadata",
-#      variableClass = new("VariableClass", value = 'native'),
-#      variableSpec = new("VariableSpec", variableId = 'sex', entityId = 'sample'),
-#      plotReference = new("PlotReference", value = 'overlay'),
-#      dataType = new("DataType", value = 'STRING'),
-#      dataShape = new("DataShape", value = 'CATEGORICAL'),
-#      weightingVariableSpec = VariableSpec(variableId='specimen_count',entityId='sample'),
-#      hasStudyDependentVocabulary=TRUE),
-#    new("VariableMetadata",
-#      variableClass = new("VariableClass", value = 'native'),
-#      variableSpec = new("VariableSpec", variableId = 'pathogen_prevalence', entityId = 'assay'),
-#      plotReference = new("PlotReference", value = 'facet1'),
-#      dataType = new("DataType", value = 'STRING'),
-#      dataShape = new("DataShape", value = 'CATEGORICAL'),
-#      weightingVariableSpec = VariableSpec(variableId='weighting_variable',entityId='assay'),
-#      hasStudyDependentVocabulary = FALSE)
-#  ))
-#
-#  imputedDT <- getDTWithImputedZeroes(m, variables)
-#  expect_equal(names(imputedDT), c("sample.species","sample.specimen_count","collection.attractant","study.author","study.id","collection.id","sample.id"))
-#  expect_equal(nrow(imputedDT), 12)
-#  expect_equal(nrow(imputedDT[imputedDT$sample.specimen_count == 0]), 6)
-#
-  # no special vocab or weight present
-  m <- Megastudy(data=megastudyDT,
-                 ancestorIdColumns=c('study.id', 'collection.id', 'sample.id','assay.id'))
-
-  variables <- new("VariableMetadataList", SimpleList(
-    new("VariableMetadata",
-      variableClass = new("VariableClass", value = 'native'),
-      variableSpec = new("VariableSpec", variableId = 'pathogen_prevalence', entityId = 'assay'),
-      plotReference = new("PlotReference", value = 'yAxis'),
-      dataType = new("DataType", value = 'NUMBER'),
-      dataShape = new("DataShape", value = 'CONTINUOUS')),
-    new("VariableMetadata",
-      variableClass = new("VariableClass", value = 'native'),
-      variableSpec = new("VariableSpec", variableId = 'attractant', entityId = 'collection'),
-      plotReference = new("PlotReference", value = 'xAxis'),
-      dataType = new("DataType", value = 'STRING'),
-      dataShape = new("DataShape", value = 'CATEGORICAL')),
-    new("VariableMetadata",
-      variableClass = new("VariableClass", value = 'native'),
-      variableSpec = new("VariableSpec", variableId = 'author', entityId = 'study'),
-      plotReference = new("PlotReference", value = 'overlay'),
-      dataType = new("DataType", value = 'STRING'),
-      dataShape = new("DataShape", value = 'CATEGORICAL'))
-  ))
-
-  imputedDT <- getDTWithImputedZeroes(m, variables, FALSE)
-  expect_equal(all(names(imputedDT) %in% names(m@data)), TRUE)
-  expect_equal(nrow(imputedDT), nrow(m@data))
-  expect_equal(nrow(imputedDT[imputedDT$sample.specimen_count == 0]), 0)
-
-  # only regular weight var present
-  m <- Megastudy(data=megastudyDT,
-                 ancestorIdColumns=c('study.id', 'collection.id', 'sample.id','assay.id'))
-
-  variables <- new("VariableMetadataList", SimpleList(
-    new("VariableMetadata",
-      variableClass = new("VariableClass", value = 'native'),
-      variableSpec = new("VariableSpec", variableId = 'pathogen_prevalence', entityId = 'assay'),
-      plotReference = new("PlotReference", value = 'yAxis'),
-      dataType = new("DataType", value = 'NUMBER'),
-      dataShape = new("DataShape", value = 'CONTINUOUS'),
-      weightingVariableSpec = VariableSpec(variableId='weighting_variable',entityId='assay')),
-    new("VariableMetadata",
-      variableClass = new("VariableClass", value = 'native'),
-      variableSpec = new("VariableSpec", variableId = 'attractant', entityId = 'collection'),
-      plotReference = new("PlotReference", value = 'xAxis'),
-      dataType = new("DataType", value = 'STRING'),
-      dataShape = new("DataShape", value = 'CATEGORICAL')),
-    new("VariableMetadata",
-      variableClass = new("VariableClass", value = 'native'),
-      variableSpec = new("VariableSpec", variableId = 'author', entityId = 'study'),
-      plotReference = new("PlotReference", value = 'overlay'),
-      dataType = new("DataType", value = 'STRING'),
-      dataShape = new("DataShape", value = 'CATEGORICAL'))
-  ))
-
-  imputedDT <- getDTWithImputedZeroes(m, variables, FALSE)
-  expect_equal(all(names(imputedDT) %in% names(m@data)), TRUE)
-  expect_equal(nrow(imputedDT), nrow(m@data))
-  expect_equal(nrow(imputedDT[imputedDT$sample.specimen_count == 0]), 0)
-
-  # sample var without a study vocab present when another sample var has one
-  m <- Megastudy(data=megastudyDT,
-                 ancestorIdColumns=c('study.id', 'collection.id', 'sample.id'),
-                 studySpecificVocabularies=StudySpecificVocabulariesByVariableList(S4Vectors::SimpleList(speciesVocabs)))
-
-  variables <- new("VariableMetadataList", SimpleList(
-    new("VariableMetadata",
-      variableClass = new("VariableClass", value = 'native'),
-      variableSpec = new("VariableSpec", variableId = 'species', entityId = 'sample'),
-      plotReference = new("PlotReference", value = 'xAxis'),
-      dataType = new("DataType", value = 'STRING'),
-      dataShape = new("DataShape", value = 'CATEGORICAL'),
-      weightingVariableSpec = VariableSpec(variableId='specimen_count',entityId='sample'),
-      hasStudyDependentVocabulary = TRUE),
-    new("VariableMetadata",
-      variableClass = new("VariableClass", value = 'native'),
-      variableSpec = new("VariableSpec", variableId = 'specimen_count', entityId = 'sample'),
-      plotReference = new("PlotReference", value = 'yAxis'),
-      dataType = new("DataType", value = 'NUMBER'),
-      dataShape = new("DataShape", value = 'CONTINUOUS')),
-    new("VariableMetadata",
-      variableClass = new("VariableClass", value = 'native'),
-      variableSpec = new("VariableSpec", variableId = 'sex', entityId = 'sample'),
-      plotReference = new("PlotReference", value = 'overlay'),
-      dataType = new("DataType", value = 'STRING'),
-      dataShape = new("DataShape", value = 'CATEGORICAL'),
-      weightingVariableSpec = VariableSpec(variableId='specimen_count',entityId='sample'),
-      hasStudyDependentVocabulary = TRUE),
-    new("VariableMetadata",
-      variableClass = new("VariableClass", value = 'native'),
-      variableSpec = new("VariableSpec", variableId = 'attractant', entityId = 'collection'),
-      plotReference = new("PlotReference", value = 'facet1'),
-      dataType = new("DataType", value = 'STRING'),
-      dataShape = new("DataShape", value = 'CATEGORICAL'))
-  ))
-
-  expect_error(getDTWithImputedZeroes(m, variables, FALSE))
-
-  # variable collection exists in plot
-  pathogenVariableCollectionVocabs.dt <- data.table::data.table(
-    study.id = c('a','a','b','b'),
-    assay.pathogen_presence_variable_collection = c('Yes','No','Yes','No')
-  )
-  pathogenVariableCollectionVocabs <- StudySpecificVocabulariesByVariable(
-    studyIdColumnName='study.id',
-    variableSpec=VariableSpec(entityId='assay',variableId='pathogen_presence_variable_collection'),
-    studyVocab=pathogenVariableCollectionVocabs.dt
-  )
-
-  m <- Megastudy(data=megastudyDT[, c('study.id', 'collection.id', 'sample.id', 'assay.id', 'sample.specimen_count', 'collection.attractant', 'study.author', 'assay.pathogen_presence', 'assay.pathogen2_presence', 'assay.pathogen3_presence'), with=FALSE],
-                 ancestorIdColumns=c('study.id', 'collection.id', 'sample.id', 'assay.id'),
-                 studySpecificVocabularies=StudySpecificVocabulariesByVariableList(S4Vectors::SimpleList(pathogenVariableCollectionVocabs)))
-
-  variables <- new("VariableMetadataList", SimpleList(
-    new("VariableMetadata",
-      variableClass = new("VariableClass", value = 'native'),
-      variableSpec = new("VariableSpec", variableId = 'pathogen_presence_variable_collection', entityId = 'assay'),
-      plotReference = new("PlotReference", value = 'overlay'),
-      dataType = new("DataType", value = 'STRING'),
-      dataShape = new("DataShape", value = 'CATEGORICAL'),
-      isCollection = TRUE,
-      weightingVariableSpec = VariableSpec(variableId='specimen_count',entityId='sample'),
-      hasStudyDependentVocabulary = TRUE,
-      members = VariableSpecList(S4Vectors::SimpleList(VariableSpec(variableId='pathogen_presence', entityId='assay'),
-                                                       VariableSpec(variableId='pathogen2_presence', entityId='assay'),
-                                                       VariableSpec(variableId='pathogen3_presence', entityId='assay')))
-    ),
-    new("VariableMetadata",
-      variableClass = new("VariableClass", value = 'native'),
-      variableSpec = new("VariableSpec", variableId = 'specimen_count', entityId = 'sample'),
-      plotReference = new("PlotReference", value = 'yAxis'),
-      dataType = new("DataType", value = 'NUMBER'),
-      dataShape = new("DataShape", value = 'CONTINUOUS'))
-  ))
-
-  ## Im inclined to commenting this for now. i dont think we have a case like this yet,
-  ## and im not even completely sure yet what the api would be for when we do...
-
-  #imputedDT <- getDTWithImputedZeroes(m, variables, FALSE)
-  # result has the columns needed to build a plot, based on variables AND the correct number of rows/ zeroes
-  #expect_equal(all(c("assay.pathogen_presence","assay.pathogen2_presence","assay.pathogen3_presence","sample.specimen_count") %in% names(imputedDT)), TRUE)
-  # 2 studies * 2 collections per study * 2 values for each of 3 pathogen variables = 32?
-  # im not sure this test makes any sense, bc were imputing 0 on a sample for a collection on assay
-  # im going to comment until we see a real use case
-  #expect_equal(nrow(imputedDT), 32)
-  #expect_equal(nrow(imputedDT[imputedDT$sample.specimen_count == 0]), 26)
-})
-
-test_that("we have reasonable perf w a real-ish use case", {
-  megastudyVariablesReal <- new("VariableMetadataList", SimpleList(
-    new("VariableMetadata",
-      variableClass = new("VariableClass", value = 'native'),
-      variableSpec = new("VariableSpec", variableId = 'POPBIO_8000017', entityId = 'EUPATH_0000609'),
-      plotReference = new("PlotReference", value = 'yAxis'),
-      dataType = new("DataType", value = 'NUMBER'),
-      dataShape = new("DataShape", value = 'CONTINUOUS'),
-      hasStudyDependentVocabulary = FALSE),
-    new("VariableMetadata",
-      variableClass = new("VariableClass", value = 'native'),
-      variableSpec = new("VariableSpec", variableId = 'PATO_0000047', entityId = 'EUPATH_0000609'),
-      plotReference = new("PlotReference", value = 'xAxis'),
-      dataType = new("DataType", value = 'STRING'),
-      dataShape = new("DataShape", value = 'CATEGORICAL'),
-      weightingVariableSpec = VariableSpec(variableId='POPBIO_8000017',entityId='EUPATH_0000609'),
-      hasStudyDependentVocabulary = TRUE),
-    new("VariableMetadata",
-      variableClass = new("VariableClass", value = 'native'),
-      variableSpec = new("VariableSpec", variableId = 'OBI_0001909', entityId = 'EUPATH_0000609'),
-      plotReference = new("PlotReference", value = 'overlay'),
-      dataType = new("DataType", value = 'STRING'),
-      dataShape = new("DataShape", value = 'CATEGORICAL'),
-      weightingVariableSpec = VariableSpec(variableId='POPBIO_8000017',entityId='EUPATH_0000609'),
-      hasStudyDependentVocabulary = TRUE),
-    new("VariableMetadata",
-      variableClass = new("VariableClass", value = 'native'),
-      variableSpec = new("VariableSpec", variableId = 'UBERON_0000105', entityId = 'EUPATH_0000609'),
-      dataType = new("DataType", value = 'STRING'),
-      dataShape = new("DataShape", value = 'CATEGORICAL'),
-      weightingVariableSpec = VariableSpec(variableId='POPBIO_8000017',entityId='EUPATH_0000609'),
-      hasStudyDependentVocabulary = TRUE),
-    new("VariableMetadata",
-      variableClass = new("VariableClass", value = 'native'),
-      variableSpec = new("VariableSpec", variableId = 'EUPATH_0043227', entityId = 'EUPATH_0000609'),
-      dataType = new("DataType", value = 'STRING'),
-      dataShape = new("DataShape", value = 'CATEGORICAL'),
-      weightingVariableSpec = VariableSpec(variableId='POPBIO_8000017',entityId='EUPATH_0000609'),
-      hasStudyDependentVocabulary = TRUE)
-  ))
-
-  ## minimal use case, with real vocabularies. based on a heavily subsetted megastudy.
-  megastudyReal <- Megastudy(
-    data=megastudyDataReal,
-    ancestorIdColumns=c(
-      'EUPATH_0000605.Study_stable_id', 
-      'GAZ_00000448.GeographicLocation_stable_id',
-      'OBI_0000659.ParentOfSample_stable_id',
-      'EUPATH_0000609.Sample_stable_id'
-    ),
-    studySpecificVocabularies=studyVocabsReal
-  )
-
-  benchmark <- microbenchmark::microbenchmark(getDTWithImputedZeroes(megastudyReal, megastudyVariablesReal, verbose = FALSE))
-  print(benchmark)
-  expect_true(mean(benchmark$time)/1000000 < 55) ## this is in milliseconds
-  expect_true(median(benchmark$time)/1000000 < 55)
-})
\ No newline at end of file
diff --git a/tests/testthat/test-class-Statistic.R b/tests/testthat/test-class-Statistic.R
deleted file mode 100644
index b74e928..0000000
--- a/tests/testthat/test-class-Statistic.R
+++ /dev/null
@@ -1,93 +0,0 @@
-test_that("Statistic validation works", {
-    # only name and value slot is required, and takes a single value
-    expect_error(new("Statistic",
-                     "value" = NA_real_))
-    expect_error(new("Statistic",
-                     "name" = NA_character_,
-                     "value" = 123))
-    expect_error(new("Statistic",
-                     "name" = c("foo", "bar"),
-                     "value" = 123))
-    expect_error(new("Statistic",
-                     "name" = "foo",
-                     "value" = c(123, 456)))
-
-    stat <- new("Statistic", 
-                "name" = "PI", 
-                "value" = 3.141592)
-
-    expect_equal(stat@pvalue, NA_character_)
-    expect_equal(stat@confidenceInterval@minimum, NA_real_)
-    expect_equal(stat@confidenceLevel, NA_real_)
-
-    #pvalue must have a single value between 0 and 1
-    expect_error(new("Statistic", 
-                     "name" = "PI", 
-                     "value" = 3.141592,
-                     "pvalue" = 1.2))
-    expect_error(new("Statistic", 
-                     "name" = "PI", 
-                     "value" = 3.141592,
-                     "pvalue" = -0.05))
-    expect_error(new("Statistic", 
-                     "name" = "PI", 
-                     "value" = 3.141592,
-                     "pvalue" = c(0.001, 0.0001)))
-
-    #if CI provided, value is within it
-    expect_error(new("Statistic", 
-                     "name" = "PI", 
-                     "value" = 3.141592,
-                     "confidenceInterval" = Range("minimum" = 0.001, "maximum" = 0.0001)))
-    
-    #CI has CI level
-    expect_error(new("Statistic", 
-                     "name" = "PI", 
-                     "value" = 3.141592,
-                     "confidenceInterval" = Range("minimum" = 0.001, "maximum" = 10)))
-    
-    #CI level is between 0 and 1 and is accompanied by a CI
-    expect_error(new("Statistic", 
-                     "name" = "PI", 
-                     "value" = 3.141592,
-                     "confidenceInterval" = Range("minimum" = 0.001, "maximum" = 0.0001),
-                     "confidenceLevel" = 1.2))
-
-    expect_error(new("Statistic", 
-                     "name" = "PI", 
-                     "value" = 3.141592,
-                     "confidenceLevel" = .95))
-})
-
-test_that("toJSON result is properly formatted for Statistic", {
-    stat <- new("Statistic", 
-                "name" = "PI", 
-                "value" = 3.141592,
-                "confidenceInterval" = Range("minimum" = 0.001, "maximum" = 10),
-                "confidenceLevel" = .95,
-                "pvalue" = '<0.0001')
-
-    statjson <- veupathUtils::toJSON(stat)
-    statlist <- jsonlite::fromJSON(statjson)
-
-    expect_equal(names(statlist), 'PI')
-    expect_equal(names(statlist$PI), c('value', 'confidenceInterval', 'confidenceLevel', 'pvalue'))
-    expect_equal(statlist$PI$confidenceInterval, c('(0.001 - 10)'))
-
-    stat2 <- new("Statistic", 
-                "name" = "PI2", 
-                "value" = 3.141592,
-                "confidenceInterval" = Range("minimum" = 1, "maximum" = 5),
-                "confidenceLevel" = .95,
-                "pvalue" = ".47")
-    stats <- StatisticList(S4Vectors::SimpleList(stat, stat2))
-
-    statsjson <- veupathUtils::toJSON(stats)
-    statslist <- jsonlite::fromJSON(statsjson)
-
-    expect_equal(names(statslist), 'statistics')
-    expect_equal(names(statslist$statistics), c('PI', 'PI2'))
-    expect_equal(names(statslist$statistics$PI2), c('value', 'confidenceInterval', 'confidenceLevel', 'pvalue'))
-    expect_equal(statslist$statistics$PI2$confidenceInterval, c(NA, '(1 - 5)'))
-    expect_equal(class(statslist$statistics$PI2$pvalue[2]), 'character')
-})
diff --git a/tests/testthat/test-correlation.R b/tests/testthat/test-correlation.R
index 07add20..7986c28 100644
--- a/tests/testthat/test-correlation.R
+++ b/tests/testthat/test-correlation.R
@@ -203,7 +203,7 @@ test_that("correlation returns an appropriately structured result for metadata v
   statsData <- result@statistics@statistics
   expect_s3_class(statsData, 'data.frame')
   expect_equal(names(statsData), c('data1','data2','correlationCoef','pValue'))
-  nNumericCols <- length(veupathUtils::findNumericCols(sampleMetadata@data[,2:ncol(sampleMetadata@data)]))
+  nNumericCols <- length(mbioUtils::findNumericCols(sampleMetadata@data[,2:ncol(sampleMetadata@data)]))
   expect_equal(nrow(statsData), ((nNumericCols * nNumericCols) - 3)/2) # Should be number of number of numeric vars * number of numeric vars
   expect_equal(as.character(unique(statsData$data1)), c('entity.contA', 'entity.contB'))
   expect_equal(as.character(unique(statsData$data2)), c('entity.contB', 'entity.contC'))
diff --git a/tests/testthat/test-methods-VariableMetadata.R b/tests/testthat/test-methods-VariableMetadata.R
index 2ea6a48..96ee002 100644
--- a/tests/testthat/test-methods-VariableMetadata.R
+++ b/tests/testthat/test-methods-VariableMetadata.R
@@ -189,7 +189,7 @@ test_that("toJSON result is properly formatted for VariableMetadata", {
                     new("VariableSpec", variableId = 'c', entityId = 'b')
                  ))
             )
-    vmjson <- veupathUtils::toJSON(vm)
+    vmjson <- mbioUtils::toJSON(vm)
     vmlist <- jsonlite::fromJSON(vmjson)
 
     expect_equal(names(vmlist), 'variableMetadata')
@@ -214,7 +214,7 @@ test_that("toJSON result is properly formatted for VariableMetadata", {
                  dataShape = new("DataShape", value = 'BINARY'),
                  vocabulary = c('a', 'b')
             )
-    vmjson <- veupathUtils::toJSON(vm)
+    vmjson <- mbioUtils::toJSON(vm)
     vmlist <- jsonlite::fromJSON(vmjson)
 
     expect_equal(names(vmlist), 'variableMetadata')
diff --git a/tests/testthat/test-utils-numeric.R b/tests/testthat/test-utils-numeric.R
deleted file mode 100644
index 5f3416a..0000000
--- a/tests/testthat/test-utils-numeric.R
+++ /dev/null
@@ -1,251 +0,0 @@
-## Tests for numeric utils functions
-
-test_that("getDiscretizedBins returns sane results", {
-  set.seed(1)
-  x <- rnorm(100)
-  
-  # NULL numBins
-  expect_equal(length(getDiscretizedBins(x, 'equalInterval')), 10)
-  expect_equal(length(getDiscretizedBins(x, 'quantile')), 10)
-  expect_equal(length(getDiscretizedBins(x, 'sd')), 6)
-
-  # with and without values
-  bins <- getDiscretizedBins(x, 'quantile', 10, FALSE)
-  expect_equal(all(is.na(unlist(lapply(bins, FUN = function(x) {x@value})))), TRUE)
-  bins <- getDiscretizedBins(x, 'quantile', 10, TRUE)
-  expect_equal(any(is.na(unlist(lapply(bins, FUN = function(x) {x@value})))), FALSE)
-
-  # non-numeric, NULL or NA for input
-  expect_error(getDiscretizedBins(c('a','b','c')))
-  expect_error(getDiscretizedBins(NA))
-  expect_error(getDiscretizedBins(NULL))
-
-  dates <- as.Date(c('1999-12-11','1999-06-14','1999-02-26','1999-05-24','1999-02-25','1999-09-06',
-                     '1999-07-24','1999-05-29','1999-01-03','1999-06-28','1999-02-13','1999-03-17'))
-
-  dateBins <- getDiscretizedBins(dates)
-  # Test that the bin labels match the bin starts and ends, as well as that the
-  # dates of bins make sense
-  expect_equal(length(dateBins), 10)
-  expect_equal(strsplit(dateBins[[1]]@binLabel, ', ')[[1]][1], paste0("[",dateBins[[1]]@binStart))
-  expect_equal(strsplit(dateBins[[1]]@binLabel, ', ')[[1]][2], paste0(dateBins[[1]]@binEnd, "]"))
-  expect_equal(dateBins[[1]]@binStart, min(dates))
-  expect_equal(dateBins[[10]]@binEnd, max(dates))
-
-  expect_equal(length(getDiscretizedBins(dates, 'quantile')), 10)
-  expect_equal(length(getDiscretizedBins(dates, 'sd')), 5)
-
-  ## different types of dates... 
-  dates <- as.Date(c("1969-06-05T00:00:00", "1969-06-06T00:00:00", "1969-06-07T00:00:00","1969-06-08T00:00:00", "1969-06-09T00:00:00", "1969-06-10T00:00:00",
-              "1969-06-11T00:00:00", "1969-06-12T00:00:00", "1969-06-13T00:00:00",
-              "1969-06-14T00:00:00", "1969-06-15T00:00:00", "1969-06-16T00:00:00",
-              "1969-06-17T00:00:00", "1969-06-18T00:00:00", "1969-06-19T00:00:00",
-              "1969-06-20T00:00:00", "1969-06-21T00:00:00", "1969-06-22T00:00:00",
-              "1969-06-23T00:00:00", "1969-06-24T00:00:00"))
-
-  dateBins <- getDiscretizedBins(dates)
-  # Test that the bin labels match the bin starts and ends, as well as that the
-  # dates of bins make sense
-  expect_equal(length(dateBins), 10)
-  expect_equal(strsplit(dateBins[[1]]@binLabel, ', ')[[1]][1], paste0("[",dateBins[[1]]@binStart))
-  expect_equal(strsplit(dateBins[[1]]@binLabel, ', ')[[1]][2], paste0(dateBins[[1]]@binEnd, "]"))
-  expect_equal(dateBins[[1]]@binStart, min(dates))
-  expect_equal(dateBins[[10]]@binEnd, max(dates))
-
-  expect_equal(length(getDiscretizedBins(dates, 'quantile')), 10)
-  expect_equal(length(getDiscretizedBins(dates, 'sd')), 4)
-
-  ## return as many bins as possible for these cases
-  # almost no data
-  expect_equal(length(getDiscretizedBins(1)), 1)
-  expect_equal(length(getDiscretizedBins(1, 'quantile')), 1)
-  expect_equal(length(getDiscretizedBins(1, 'sd')), 1)
-  x <- rnorm(2)
-  expect_equal(length(getDiscretizedBins(x)), 10)
-  expect_equal(sum(unlist(lapply(getDiscretizedBins(x), FUN = function(x){0 != x@value}))), 2)
-  expect_equal(length(getDiscretizedBins(x, 'quantile')), 10)
-  expect_equal(sum(unlist(lapply(getDiscretizedBins(x, 'quantile'), FUN = function(x){0 != x@value}))), 2)
-  expect_equal(length(getDiscretizedBins(x, 'sd')), 2)
-
-  # skewed data
-  x <- rnbinom(100, 10, 0.5)
-  expect_equal(length(getDiscretizedBins(x)), 10)
-  expect_equal(length(getDiscretizedBins(x, 'equalInterval', 50)), 50)
-  expect_equal(length(getDiscretizedBins(x, 'quantile')), 10)
-  expect_equal(length(getDiscretizedBins(x, 'quantile', 50)) <= 50, TRUE)
-  expect_equal(length(getDiscretizedBins(x, 'sd')) <= 6, TRUE)
-})
-
-test_that("nonZeroRound only returns 0 if it receives one", {
-  expect_equal(nonZeroRound(0),0)
-  expect_equal(nonZeroRound(123456789.987654321, 4) == 0, FALSE)
-  expect_equal(nonZeroRound(0.987654321, 4) == 0, FALSE)
-  expect_equal(nonZeroRound(0.00000019, 4) == 0, FALSE)
-  expect_equal(nonZeroRound(0.00000019, 4), 0.0000002)
-})
-
-test_that("setNaToZero replaces intended NAs", {
-  
-  df <- iris
-
-  # Add NAs to all columns
-  nMissing <- 10
-  df <- as.data.frame(lapply(df, function(x) {x[sample(1:length(x), size=nMissing)] <- NA; return(x)}))
-
-  # With specified columns
-  setNaToZero(df, cols=c("Sepal.Length", "Sepal.Width"))
-  expect_equal(class(df), 'data.frame')
-  expect_equal(sum(is.na(df)), 3*nMissing)
-  expect_equal(sum(df[, c('Sepal.Length', 'Sepal.Width')] == 0), 2*nMissing)
-
-  # Err if given a non-numeric column
-  expect_error(naToZero(df, cols = c("Sepal.Length", "Sepal.Width", "Species")))
-
-  # With defualt cols and data.table. Should change only numeric columns
-  dt <- data.table::as.data.table(df)
-  setNaToZero(dt)
-  expect_equal(class(dt), c('data.table','data.frame'))
-  expect_equal(sum(is.na(dt)), nMissing)
-  expect_equal(sum(dt[, c('Petal.Length', 'Petal.Width')] == 0), 2*nMissing)
-
-})
-
-test_that("naToZero replaces intended NAs", {
-
-  df <- iris
-
-  # Add NAs to all columns
-  nMissing <- 10
-  df <- as.data.frame(lapply(df, function(x) {x[sample(1:length(x), size=nMissing)] <- NA; return(x)}))
-
-  # With specified columns
-  df <- naToZero(df, cols=c("Sepal.Length", "Sepal.Width"))
-  expect_equal(class(df), 'data.frame')
-  expect_equal(sum(is.na(df)), 3*nMissing)
-  expect_equal(sum(df[, c('Sepal.Length', 'Sepal.Width')] == 0), 2*nMissing)
-
-  # With defualt cols and data.table input. Should change only numeric columns
-  dt <- data.table::as.data.table(df)
-  dt <- naToZero(dt)
-  expect_equal(class(dt), c('data.table','data.frame'))
-  expect_equal(sum(is.na(dt)), nMissing)
-  expect_equal(sum(dt[, c('Petal.Length', 'Petal.Width')] == 0), 2*nMissing)
-
-  # Testing additional object types
-  # Lists
-  lst <- lapply(iris, function(x) {x[sample(1:length(x), size=nMissing)] <- NA; return(x)})
-  lst <- naToZero(lst)
-  expect_equal(class(lst), 'list')
-  expect_equal(sum(unlist(lapply(lst, function(x) {sum(is.na(x))}))), nMissing)
-  expect_equal(sum(unlist(lapply(lst[c('Petal.Length', 'Petal.Width')], function(x) {sum(x==0)}))), 2*nMissing)
-
-  # Err if given a non-numeric column
-  expect_error(naToZero(lst, cols = c("Sepal.Length", "Sepal.Width", "Species")))
-
-
-  # Vector
-  vec <- c(1,2,3,NA)
-  vec <- naToZero(vec)
-  expect_equal(class(vec), 'numeric')
-  expect_true(all(!is.na(vec)))
-  expect_equal(vec, c(1,2,3,0))
-
-  # Matrix
-  mat <- matrix(rnorm(36), nrow=6)
-  mat[sample(1:36, size=nMissing, replace=F)] <- NA
-  mat <- naToZero(mat)
-  expect_equal(class(mat), c('matrix', 'array'))
-  expect_true(any(!is.na(mat)))
-  expect_equal(sum(mat == 0), nMissing)
-
-  # Do nothing to strings
-  vec <- c('1','2','3',NA)
-  vec <- naToZero(vec)
-  expect_equal(class(vec), 'character')
-  expect_equal(vec, c('1','2','3',NA))
-
-})
-
-
-test_that("finding and validating numeric columns works", {
-
-  df <- iris
-
-  # With specified columns
-  numericCols <- findNumericCols(df)
-  expect_equal(numericCols, c('Sepal.Length', 'Sepal.Width', 'Petal.Length', 'Petal.Width'))
-
-  # In a data.table
-  dt <- data.table::as.data.table(df)
-  numericCols <- findNumericCols(dt)
-  expect_equal(numericCols, c('Sepal.Length', 'Sepal.Width', 'Petal.Length', 'Petal.Width'))
-  
-  validatedCols <- validateNumericCols(dt, c('Sepal.Length', 'Sepal.Width'))
-  expect_equal(validatedCols, c('Sepal.Length', 'Sepal.Width'))
-
-  validatedCols <- validateNumericCols(dt, c(1, 2, 3))
-  expect_equal(validatedCols, c(1, 2, 3))
-
-  # In a list
-  lst <- as.list(iris)
-  numericCols <- findNumericCols(lst)
-  expect_equal(numericCols, c('Sepal.Length', 'Sepal.Width', 'Petal.Length', 'Petal.Width'))
-
-  validatedCols <- validateNumericCols(lst, c('Sepal.Length', 'Sepal.Width'))
-  expect_equal(validatedCols, c('Sepal.Length', 'Sepal.Width'))
-
-  validatedCols <- validateNumericCols(lst, c(2, 3, 4))
-  expect_equal(validatedCols, c(2, 3, 4))
-
-  # If no numeric cols
-  dt_string <- dt[, lapply(.SD, as.character)]
-  numericCols <- findNumericCols(dt_string)
-  expect_equal(numericCols, NULL)
-
-  lst_string <- lapply(iris, function(x) {x <- as.character(x); return(x)})
-  numericCols <- findNumericCols(lst_string)
-  expect_equal(numericCols, NULL)
-
-  # validateNumericCols should err if given non-numeric column names
-  expect_error(validateNumericCols(dt, c('Sepal.Length', 'Species')))
-  expect_error(validateNumericCols(lst, c('Sepal.Length', 'Species')))
-
-  # err if column names do not exist
-  expect_error(validateNumericCols(dt, c('a', 'Species')))
-  expect_error(validateNumericCols(lst, c('a', 'Species')))
-
-  # err if indices too large
-  expect_error(validateNumericCols(dt, c(1, 2, 100)))
-  expect_error(validateNumericCols(lst, c(1, 2, 100)))
-
-  # err if indices too small
-  expect_error(validateNumericCols(dt, c(-1, 2, 100)))
-  expect_error(validateNumericCols(lst, c(-1, 2, 100)))
-
-  # return NULL for NULL input
-  validatedCols <- validateNumericCols(dt, NULL)
-  expect_equal(validatedCols, NULL)
-
-  validatedCols <- validateNumericCols(lst, NULL)
-  expect_equal(validatedCols, NULL)
-
-  # remove NAs in cols arg
-  validatedCols <- validateNumericCols(dt, c(1, NA, 4))
-  expect_equal(validatedCols, c(1, 4))
-
-  validatedCols <- validateNumericCols(lst, c(1, NA, 4))
-  expect_equal(validatedCols, c(1, 4))
-
-})
-
-test_that("signifDigitEpsilon returns appropriate results", {
-  expect_equal(signifDigitEpsilon(1.23, 3), 0.01)
-  expect_equal(signifDigitEpsilon(11.0, 3), 0.1)
-  expect_equal(signifDigitEpsilon(12.3, 3), 0.1)
-  expect_equal(signifDigitEpsilon(101000, 3), 1000)
-  expect_equal(signifDigitEpsilon(1.20e-05, 3), 1.0e-07)
-  expect_equal(signifDigitEpsilon(0.0123e-05, 3), 1.0e-09)
-  expect_equal(signifDigitEpsilon(-2.34e-02, 3), 1.0e-04)
-  expect_equal(signifDigitEpsilon(1234567, 7), 1)
-  expect_equal(signifDigitEpsilon(-1234567, 7), 1)
-})
\ No newline at end of file