diff --git a/DESCRIPTION b/DESCRIPTION index 492ae33..9aecf04 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -4,6 +4,7 @@ Title: Data Cleaning - what else? Version: 0.1.0 Author: Quantargo Instructor Maintainer: Quantargo Instructor +Imports: stats Description: Some lengthy description which somebody should write Sometime. License: GPL-3 diff --git a/NAMESPACE b/NAMESPACE index d75f824..1015c5a 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -1 +1,2 @@ exportPattern("^[[:alpha:]]+") +importFrom("stats", "quantile") \ No newline at end of file diff --git a/R/meanimpute.R b/R/meanimpute.R index cc7cf5e..5a58f8d 100644 --- a/R/meanimpute.R +++ b/R/meanimpute.R @@ -1,5 +1,14 @@ #' Meanimputation +#' +#' Replaces NA values with the mean of \code{x} +#' +#' @param x A numeric vector containing real numbers +#' @return Imputed vector \code{x} where NAs are replaced with the mean of \code{x} +#' @examples +#' meanimpute(x = c(1, NA, 3, 4, 5, 6, 7, 8, NA, 10)) +#' meanimpute(c(1, NA, 3, 4, 5, 6, 7, 8, NA, 10)) #' @export + meanimpute <- function(x) { x[is.na(x)] <- mean(x, na.rm = TRUE) x diff --git a/R/transform_log.R b/R/transform_log.R new file mode 100644 index 0000000..244ed48 --- /dev/null +++ b/R/transform_log.R @@ -0,0 +1,26 @@ +#' Transform_log +#' +#' Transforms values to natural logarithm. +#' +#' @param x A numeric vector containing positive real numbers +#' @return Natural logarithm of the values in \code{x}. +#' @examples +#' transform_log(x = c(1, 2, 3, 4, 5, 6, 7, 8, 9, 10)) +#' transform_log(c(1, 2, 3, 4, 5, 6, 7, 8, 9, 10)) +#' @export + +transform_log <- function(x){ + + if( sum(is.na(x)) ){ + stop("Input x contains value NA") + } + else if( length(x) == 0 ){ + stop("Input x is NULL") + } + else if( sum(x <= 0) ){ + stop("Input x contains non-positive values") + } + + log(x) + +} diff --git a/R/windsorize.R b/R/windsorize.R index b4e15e6..51e0bd3 100644 --- a/R/windsorize.R +++ b/R/windsorize.R @@ -1,10 +1,29 @@ #' Windsorize #' #' Do some windsorization. +#' +#' @param x A numeric vector containing real numbers +#' @param p A number between 0 and 1, representing the mass of probability. Values are winsorized outside \code{p}. Default value is 0.9. +#' @return Windsorized vector of \code{x}. +#' @examples +#' windsorize(x = c(1, 2, 3, 4, 5, 6, 7, 8, 9, 10)) +#' windsorize(x = c(1, 2, 3, 4, 5, 6, 7, 8, 9, 10), p = 0.8) +#' windsorize(c(1, 2, 3, 4, 5, 6, 7, 8, 9, 10), 0.8) #' @export + windsorize <- function(x, p = .90) { - q <- quantile(x, p) - x[x >= q] <- q + + if( sum(is.na(x))){ + stop("Input x contains value NA") + } + else if( length(x) == 0 ){ + stop("Input x is NULL") + } + + q_lower <- quantile(x, (1-p)/2) + q_upper <- quantile(x, p + (1-p)/2) + x[x <= q_lower] <- q_lower + x[x >= q_upper] <- q_upper x } diff --git a/man/meanimpute.Rd b/man/meanimpute.Rd index 8139e8f..7c4032c 100644 --- a/man/meanimpute.Rd +++ b/man/meanimpute.Rd @@ -6,6 +6,16 @@ \usage{ meanimpute(x) } +\arguments{ +\item{x}{A numeric vector containing real numbers} +} +\value{ +Imputed vector \code{x} where NAs are replaced with the mean of \code{x} +} \description{ -Meanimputation +Replaces NA values with the mean of \code{x} +} +\examples{ +meanimpute(x = c(1, NA, 3, 4, 5, 6, 7, 8, NA, 10)) +meanimpute(c(1, NA, 3, 4, 5, 6, 7, 8, NA, 10)) } diff --git a/man/transform_log.Rd b/man/transform_log.Rd new file mode 100644 index 0000000..3a172e8 --- /dev/null +++ b/man/transform_log.Rd @@ -0,0 +1,21 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/transform_log.R +\name{transform_log} +\alias{transform_log} +\title{Transform_log} +\usage{ +transform_log(x) +} +\arguments{ +\item{x}{A numeric vector containing positive real numbers} +} +\value{ +Natural logarithm of the values in \code{x}. +} +\description{ +Transforms values to natural logarithm. +} +\examples{ +transform_log(x = c(1, 2, 3, 4, 5, 6, 7, 8, 9, 10)) +transform_log(c(1, 2, 3, 4, 5, 6, 7, 8, 9, 10)) +} diff --git a/man/windsorize.Rd b/man/windsorize.Rd index 832c3cb..2d73fd4 100644 --- a/man/windsorize.Rd +++ b/man/windsorize.Rd @@ -6,6 +6,19 @@ \usage{ windsorize(x, p = 0.9) } +\arguments{ +\item{x}{A numeric vector containing real numbers} + +\item{p}{A number between 0 and 1, representing the mass of probability. Values are winsorized outside \code{p}. Default value is 0.9.} +} +\value{ +Windsorized vector of \code{x}. +} \description{ Do some windsorization. } +\examples{ +windsorize(x = c(1, 2, 3, 4, 5, 6, 7, 8, 9, 10)) +windsorize(x = c(1, 2, 3, 4, 5, 6, 7, 8, 9, 10), p = 0.8) +windsorize(c(1, 2, 3, 4, 5, 6, 7, 8, 9, 10), 0.8) +} diff --git a/tests/testthat.R b/tests/testthat.R new file mode 100644 index 0000000..2a036fa --- /dev/null +++ b/tests/testthat.R @@ -0,0 +1,4 @@ +library(testthat) +library(datacleaner) + +test_check("datacleaner") \ No newline at end of file diff --git a/tests/testthat/testthat.R b/tests/testthat/testthat.R new file mode 100644 index 0000000..1f69b30 --- /dev/null +++ b/tests/testthat/testthat.R @@ -0,0 +1,19 @@ +library(datacleaner) + +test_that("windsorize the input vector", { + expect_error(windsorize(c(NULL)), "Input x is NULL") + expect_error(windsorize(c(1, 2, NA)), "Input x contains value NA") + expect_equal(windsorize(c(1, 2, 3, 4, 5, 6, 7, 8, 9, 10), 0.8), c(1.9, 2, 3, 4, 5, 6, 7, 8, 9, 9.1)) +}) + +test_that("Takes natural logarithm", { + expect_error(transform_log(c(NULL)), "Input x is NULL") + expect_error(transform_log(c(1, 2, NA)), "Input x contains value NA") + expect_error(transform_log(c(-1, 2, 3)), "Input x contains non-positive values") + expect_equal(transform_log(c(1, exp(1))), c(0, 1)) +}) + +test_that("Takes natural logarithm", { + expect_equal(meanimpute(c(1, NA, 3)), c(1, 2, 3)) + expect_equal(meanimpute(c(1, 2 , 3)), c(1, 2, 3)) +}) \ No newline at end of file