From 9431e331774e54388a7a2373b7c34f3af473699f Mon Sep 17 00:00:00 2001 From: Tibor Szabo Date: Mon, 29 Apr 2019 20:20:10 +0000 Subject: [PATCH 1/8] Fix issue #1: Function windsorize() replaces correctly values below and above the specified percentile --- R/windsorize.R | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/R/windsorize.R b/R/windsorize.R index b4e15e6..bea606e 100644 --- a/R/windsorize.R +++ b/R/windsorize.R @@ -3,8 +3,10 @@ #' Do some windsorization. #' @export windsorize <- function(x, p = .90) { - q <- quantile(x, p) - x[x >= q] <- q + q_lower <- quantile(x, (1-p)/2) + q_upper <- quantile(x, p + (1-p)/2) + x[x <= q_lower] <- q_lower + x[x >= q_upper] <- q_upper x } From 3fae72a0ab8f72c69f93798fe200656bc8300063 Mon Sep 17 00:00:00 2001 From: Tibor Szabo Date: Mon, 29 Apr 2019 20:35:23 +0000 Subject: [PATCH 2/8] Fix issue #2: Function windsorize throws error if the input contains NA or is NULL --- R/windsorize.R | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/R/windsorize.R b/R/windsorize.R index bea606e..24900f0 100644 --- a/R/windsorize.R +++ b/R/windsorize.R @@ -3,6 +3,14 @@ #' Do some windsorization. #' @export windsorize <- function(x, p = .90) { + + if( sum(is.na(x))){ + stop("Input x contains value NA") + } + else if( is.null(x) ){ + stop("Input x is NULL") + } + q_lower <- quantile(x, (1-p)/2) q_upper <- quantile(x, p + (1-p)/2) x[x <= q_lower] <- q_lower From 5e0d2a1a77aa7b270f377358a0a2c936f2bfcbfa Mon Sep 17 00:00:00 2001 From: Tibor Szabo Date: Mon, 29 Apr 2019 20:41:30 +0000 Subject: [PATCH 3/8] Fix issue #3: function transform_log is added --- R/transform_log.R | 15 +++++++++++++++ 1 file changed, 15 insertions(+) create mode 100644 R/transform_log.R diff --git a/R/transform_log.R b/R/transform_log.R new file mode 100644 index 0000000..d6c3e0c --- /dev/null +++ b/R/transform_log.R @@ -0,0 +1,15 @@ +transform_log <- function(x){ + + if( sum(is.na(x)) ){ + stop("Input x contains value NA") + } + else if( is.null(x) ){ + stop("Input x is NULL") + } + else if( sum(x <= 0) ){ + stop("Input x contains non-positive values") + } + + log(x) + +} From e45802851b769618148f841acbec701c231d2536 Mon Sep 17 00:00:00 2001 From: Tibor Szabo Date: Mon, 29 Apr 2019 20:45:40 +0000 Subject: [PATCH 4/8] Fix issue #4: Stopping criteria 'NULL' is corrected --- R/transform_log.R | 2 +- R/windsorize.R | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/R/transform_log.R b/R/transform_log.R index d6c3e0c..543ad03 100644 --- a/R/transform_log.R +++ b/R/transform_log.R @@ -3,7 +3,7 @@ transform_log <- function(x){ if( sum(is.na(x)) ){ stop("Input x contains value NA") } - else if( is.null(x) ){ + else if( length(x) | ){ stop("Input x is NULL") } else if( sum(x <= 0) ){ diff --git a/R/windsorize.R b/R/windsorize.R index 24900f0..248b6f2 100644 --- a/R/windsorize.R +++ b/R/windsorize.R @@ -7,7 +7,7 @@ windsorize <- function(x, p = .90) { if( sum(is.na(x))){ stop("Input x contains value NA") } - else if( is.null(x) ){ + else if( length(x) ){ stop("Input x is NULL") } From 84bf9e5e38fe4ab511b637034fd276dac63ade6b Mon Sep 17 00:00:00 2001 From: Tibor Szabo Date: Mon, 29 Apr 2019 20:49:08 +0000 Subject: [PATCH 5/8] Fix issue #5: Typo is corrected in the code --- R/transform_log.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/R/transform_log.R b/R/transform_log.R index 543ad03..5233a9a 100644 --- a/R/transform_log.R +++ b/R/transform_log.R @@ -3,7 +3,7 @@ transform_log <- function(x){ if( sum(is.na(x)) ){ stop("Input x contains value NA") } - else if( length(x) | ){ + else if( length(x) ){ stop("Input x is NULL") } else if( sum(x <= 0) ){ From 1a15e0fab3afb6cf59e687510446ccd24fe82b21 Mon Sep 17 00:00:00 2001 From: Tibor Szabo Date: Mon, 29 Apr 2019 21:16:58 +0000 Subject: [PATCH 6/8] Fix issue #6: Function documentations are added. Fix issue #7: Handling NULL values corrected --- R/meanimpute.R | 9 +++++++++ R/transform_log.R | 13 ++++++++++++- R/windsorize.R | 11 ++++++++++- man/meanimpute.Rd | 12 +++++++++++- man/transform_log.Rd | 21 +++++++++++++++++++++ man/windsorize.Rd | 13 +++++++++++++ 6 files changed, 76 insertions(+), 3 deletions(-) create mode 100644 man/transform_log.Rd diff --git a/R/meanimpute.R b/R/meanimpute.R index cc7cf5e..5a58f8d 100644 --- a/R/meanimpute.R +++ b/R/meanimpute.R @@ -1,5 +1,14 @@ #' Meanimputation +#' +#' Replaces NA values with the mean of \code{x} +#' +#' @param x A numeric vector containing real numbers +#' @return Imputed vector \code{x} where NAs are replaced with the mean of \code{x} +#' @examples +#' meanimpute(x = c(1, NA, 3, 4, 5, 6, 7, 8, NA, 10)) +#' meanimpute(c(1, NA, 3, 4, 5, 6, 7, 8, NA, 10)) #' @export + meanimpute <- function(x) { x[is.na(x)] <- mean(x, na.rm = TRUE) x diff --git a/R/transform_log.R b/R/transform_log.R index 5233a9a..244ed48 100644 --- a/R/transform_log.R +++ b/R/transform_log.R @@ -1,9 +1,20 @@ +#' Transform_log +#' +#' Transforms values to natural logarithm. +#' +#' @param x A numeric vector containing positive real numbers +#' @return Natural logarithm of the values in \code{x}. +#' @examples +#' transform_log(x = c(1, 2, 3, 4, 5, 6, 7, 8, 9, 10)) +#' transform_log(c(1, 2, 3, 4, 5, 6, 7, 8, 9, 10)) +#' @export + transform_log <- function(x){ if( sum(is.na(x)) ){ stop("Input x contains value NA") } - else if( length(x) ){ + else if( length(x) == 0 ){ stop("Input x is NULL") } else if( sum(x <= 0) ){ diff --git a/R/windsorize.R b/R/windsorize.R index 248b6f2..51e0bd3 100644 --- a/R/windsorize.R +++ b/R/windsorize.R @@ -1,13 +1,22 @@ #' Windsorize #' #' Do some windsorization. +#' +#' @param x A numeric vector containing real numbers +#' @param p A number between 0 and 1, representing the mass of probability. Values are winsorized outside \code{p}. Default value is 0.9. +#' @return Windsorized vector of \code{x}. +#' @examples +#' windsorize(x = c(1, 2, 3, 4, 5, 6, 7, 8, 9, 10)) +#' windsorize(x = c(1, 2, 3, 4, 5, 6, 7, 8, 9, 10), p = 0.8) +#' windsorize(c(1, 2, 3, 4, 5, 6, 7, 8, 9, 10), 0.8) #' @export + windsorize <- function(x, p = .90) { if( sum(is.na(x))){ stop("Input x contains value NA") } - else if( length(x) ){ + else if( length(x) == 0 ){ stop("Input x is NULL") } diff --git a/man/meanimpute.Rd b/man/meanimpute.Rd index 8139e8f..7c4032c 100644 --- a/man/meanimpute.Rd +++ b/man/meanimpute.Rd @@ -6,6 +6,16 @@ \usage{ meanimpute(x) } +\arguments{ +\item{x}{A numeric vector containing real numbers} +} +\value{ +Imputed vector \code{x} where NAs are replaced with the mean of \code{x} +} \description{ -Meanimputation +Replaces NA values with the mean of \code{x} +} +\examples{ +meanimpute(x = c(1, NA, 3, 4, 5, 6, 7, 8, NA, 10)) +meanimpute(c(1, NA, 3, 4, 5, 6, 7, 8, NA, 10)) } diff --git a/man/transform_log.Rd b/man/transform_log.Rd new file mode 100644 index 0000000..3a172e8 --- /dev/null +++ b/man/transform_log.Rd @@ -0,0 +1,21 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/transform_log.R +\name{transform_log} +\alias{transform_log} +\title{Transform_log} +\usage{ +transform_log(x) +} +\arguments{ +\item{x}{A numeric vector containing positive real numbers} +} +\value{ +Natural logarithm of the values in \code{x}. +} +\description{ +Transforms values to natural logarithm. +} +\examples{ +transform_log(x = c(1, 2, 3, 4, 5, 6, 7, 8, 9, 10)) +transform_log(c(1, 2, 3, 4, 5, 6, 7, 8, 9, 10)) +} diff --git a/man/windsorize.Rd b/man/windsorize.Rd index 832c3cb..2d73fd4 100644 --- a/man/windsorize.Rd +++ b/man/windsorize.Rd @@ -6,6 +6,19 @@ \usage{ windsorize(x, p = 0.9) } +\arguments{ +\item{x}{A numeric vector containing real numbers} + +\item{p}{A number between 0 and 1, representing the mass of probability. Values are winsorized outside \code{p}. Default value is 0.9.} +} +\value{ +Windsorized vector of \code{x}. +} \description{ Do some windsorization. } +\examples{ +windsorize(x = c(1, 2, 3, 4, 5, 6, 7, 8, 9, 10)) +windsorize(x = c(1, 2, 3, 4, 5, 6, 7, 8, 9, 10), p = 0.8) +windsorize(c(1, 2, 3, 4, 5, 6, 7, 8, 9, 10), 0.8) +} From b7b0d20cec62333df3eeba0ffd95876e29cd89a2 Mon Sep 17 00:00:00 2001 From: Tibor Szabo Date: Mon, 29 Apr 2019 21:25:28 +0000 Subject: [PATCH 7/8] Fix issue #8: DESCRIPTION and NAMESPACE files are updated --- DESCRIPTION | 1 + NAMESPACE | 1 + 2 files changed, 2 insertions(+) diff --git a/DESCRIPTION b/DESCRIPTION index 492ae33..9aecf04 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -4,6 +4,7 @@ Title: Data Cleaning - what else? Version: 0.1.0 Author: Quantargo Instructor Maintainer: Quantargo Instructor +Imports: stats Description: Some lengthy description which somebody should write Sometime. License: GPL-3 diff --git a/NAMESPACE b/NAMESPACE index d75f824..1015c5a 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -1 +1,2 @@ exportPattern("^[[:alpha:]]+") +importFrom("stats", "quantile") \ No newline at end of file From 0034c46a9a1f08321249308abb7809fd5415df44 Mon Sep 17 00:00:00 2001 From: Tibor Szabo Date: Mon, 29 Apr 2019 22:01:48 +0000 Subject: [PATCH 8/8] Fix issue #9: Tests are added --- tests/testthat.R | 4 ++++ tests/testthat/testthat.R | 19 +++++++++++++++++++ 2 files changed, 23 insertions(+) create mode 100644 tests/testthat.R create mode 100644 tests/testthat/testthat.R diff --git a/tests/testthat.R b/tests/testthat.R new file mode 100644 index 0000000..2a036fa --- /dev/null +++ b/tests/testthat.R @@ -0,0 +1,4 @@ +library(testthat) +library(datacleaner) + +test_check("datacleaner") \ No newline at end of file diff --git a/tests/testthat/testthat.R b/tests/testthat/testthat.R new file mode 100644 index 0000000..1f69b30 --- /dev/null +++ b/tests/testthat/testthat.R @@ -0,0 +1,19 @@ +library(datacleaner) + +test_that("windsorize the input vector", { + expect_error(windsorize(c(NULL)), "Input x is NULL") + expect_error(windsorize(c(1, 2, NA)), "Input x contains value NA") + expect_equal(windsorize(c(1, 2, 3, 4, 5, 6, 7, 8, 9, 10), 0.8), c(1.9, 2, 3, 4, 5, 6, 7, 8, 9, 9.1)) +}) + +test_that("Takes natural logarithm", { + expect_error(transform_log(c(NULL)), "Input x is NULL") + expect_error(transform_log(c(1, 2, NA)), "Input x contains value NA") + expect_error(transform_log(c(-1, 2, 3)), "Input x contains non-positive values") + expect_equal(transform_log(c(1, exp(1))), c(0, 1)) +}) + +test_that("Takes natural logarithm", { + expect_equal(meanimpute(c(1, NA, 3)), c(1, 2, 3)) + expect_equal(meanimpute(c(1, 2 , 3)), c(1, 2, 3)) +}) \ No newline at end of file