-
Notifications
You must be signed in to change notification settings - Fork 14
Improvement of datacleaner package #24
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: master
Are you sure you want to change the base?
Changes from all commits
61e0532
c3d79c6
e25d36c
359da3e
8f6ccde
6b8da66
2da974d
1e649e7
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -1 +1,6 @@ | ||
| exportPattern("^[[:alpha:]]+") | ||
| # Generated by roxygen2: do not edit by hand | ||
|
|
||
| export(meanimpute) | ||
| export(transform_log) | ||
| export(windsorize) | ||
| import(stats) |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -1,6 +1,18 @@ | ||
| #' Meanimputation | ||
| #' Meanimpute | ||
| #' | ||
| #' Replace NA's with mean value | ||
| #' @param x a numeric vector | ||
| #' @return new vector, where NA's are replace by mean \code{x} | ||
| #' @examples | ||
| #' example_vector=c(1,5,NA,NA) | ||
| #' meanimpute(example_vector) | ||
| #' @export | ||
| meanimpute <- function(x) { | ||
|
|
||
| if(is.null(x)) {stop("Input vector cannot be NULL.")} | ||
| if(all(is.na(x))) {stop("Input vector should contain at least one numeric element.")} | ||
| if(any(is.numeric(x)==FALSE)) {stop("Input vector should contain at least one numeric element.")} | ||
|
|
||
| x[is.na(x)] <- mean(x, na.rm = TRUE) | ||
| x | ||
| } | ||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,20 @@ | ||
| #' transform_log | ||
| #' | ||
| #' log-transformation of a numeric vector. For details about log-transformation please see you basic school math textbook. | ||
|
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Please remove the last sentence and either think of
|
||
| #' @param x a numeric vector | ||
| #' @return log-transformed vector \code{x} | ||
| #' @examples | ||
| #' example_vector=c(1,2,3,4,5,6,7,8,9,10) | ||
| #' transform_log(example_vector) | ||
| #' @export | ||
|
|
||
| transform_log <- function(x){ | ||
|
|
||
| if( is.null(x) ) stop("Input vector is not allowed to be NULL.") | ||
| if( any(is.na(x)) ) stop("There is at least one NA value in input vector.") | ||
|
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. The |
||
| if( any(x <= 0) ) stop("There is at least one negative value.") | ||
| if( any(is.numeric(x) == FALSE) ) stop("There is at least one non-numeric value.") | ||
|
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Better here use the negation sign: |
||
| y<-log(x) | ||
| return(y) | ||
|
|
||
| } | ||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -1,10 +1,36 @@ | ||
| #' Windsorize | ||
| #' | ||
| #' Do some windsorization. | ||
| #' Its purposes is to eliminate outliers in a following way. Values of (0.5 +- p/2)th quantiles are calculated and all | ||
| #' values above(below) those quantiles are replaced by the quantiles. | ||
| #' @param x a numeric vector | ||
| #' @param p quantile | ||
| #' @return Windsorized vector \code{x} | ||
| #' @examples | ||
| #' example_vector=c(-1000,1,2,3,4,5,6,7,8,9,1000) | ||
| #' windsorize(example_vector, 0.9) | ||
| #' | ||
| #' example_vector=rnorm(100) | ||
| #' windsorize(example_vector, 0.9) | ||
| #' @export | ||
| #' @import stats | ||
|
|
||
| windsorize <- function(x, p = .90) { | ||
| q <- quantile(x, p) | ||
| x[x >= q] <- q | ||
| x | ||
|
|
||
| if(is.null(x)) {stop("Input vector cannot be NULL.")} | ||
| if(any(is.na(x))) {stop("There should be no NA's in input vector.")} | ||
|
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
|
||
| if(all(is.numeric(x)==FALSE)) {stop("There should only numeric values in the input vector.")} | ||
|
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Again, use the negation sign |
||
|
|
||
| if(is.na(p)==TRUE) {stop("Input quantile should be a number between 0 and 1 ")} | ||
|
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. The test does not match with the error message. |
||
| if(is.numeric(p)==FALSE) {stop("Input quantile should be a number between 0 and 1 ")} | ||
|
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Negation sign |
||
| if(p > 1) {stop("Input quantile should be a number between 0 and 1 ")} | ||
| if(p < 0) {stop("Input quantile should be a number between 0 and 1 ")} | ||
|
|
||
|
|
||
| q_u <- quantile(x, 0.5 + p/2) | ||
| x[x >= q_u] <- q_u | ||
|
|
||
| q_l <- quantile(x, 0.5 - p/2) | ||
| x[x <= q_l] <- q_l | ||
|
|
||
| return(x) | ||
| } | ||
|
|
||
Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.
Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.
Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,4 @@ | ||
| library(testthat) | ||
| library(datacleaner) | ||
|
|
||
| test_check("datacleaner") |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,7 @@ | ||
| test_that("Incorrect input of meanimpute", { | ||
| #tests related to input vector | ||
| expect_error(meanimpute(NULL),"Input vector cannot be NULL.") | ||
| expect_error(meanimpute(c(NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA)),"Input vector should contain at least one numeric element.") | ||
| expect_error(meanimpute(c(1,2,3,4,"Dracula"), "Input vector should contain at least one numeric element.")) | ||
|
|
||
| }) |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,8 @@ | ||
| test_that("Input of transform_log() is correct.", { | ||
| #tests related to input vector | ||
| expect_error(transform_log(NULL), "Input vector is not allowed to be NULL.") | ||
| expect_error(transform_log(c(NA,NA,NA,6,NA,5,NA,NA,7,NA,NA)), "There is at least one NA value in input vector.") | ||
| expect_error(transform_log(c(1,2,3,4,5,6,7,8,"string",1000)), "There is at least one non-numeric value.") | ||
| expect_error(transform_log(c(1,2,3,4,5,6,7,8,-5)), "There is at least one negative value.") | ||
|
|
||
| }) |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,10 @@ | ||
| test_that("Incorrect input", { | ||
| #tests related to input vector | ||
| expect_error(windsorize(NULL, .9), "Input vector cannot be NULL.") | ||
| expect_error(windsorize(c(NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA), .9), "There should be no NA's in input vector.") | ||
| expect_error(windsorize(c(-1000,1,2,3,4,5,6,7,8,"string",1000), .9), "There should only numeric values in the input vector.") | ||
| expect_error(windsorize(c(-1000,1,2,3,4,6,7,8,9,1000), "string"), "Input quantile should be a number between 0 and 1.") | ||
| expect_error(windsorize(c(-1000,1,2,3,4,6,7,8,9,1000), 2), "Input quantile should be a number between 0 and 1") | ||
| expect_error(windsorize(c(-1000,1,2,3,4,6,7,8,9,1000), -2), "Input quantile should be a number between 0 and 1") | ||
|
|
||
| }) |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,3 @@ | ||
| test_that("NA's in vector are correctly replace by mean", { | ||
| expect_equal(meanimpute(c(2,4,6,NA)), c(2,4,6,4)) | ||
| }) |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,3 @@ | ||
| test_that("Vector is correctly log-transformed", { | ||
| expect_equal(transform_log(c(1,1,1)), c(0,0,0)) | ||
| }) |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,3 @@ | ||
| test_that("vector is correctly windsorized", { | ||
| expect_equal(windsorize(c(-1000,1,2,3,4,5,6,7,8,9,1000),0.9), c(-499.5,1,2,3,4,5,6,7,8,9,504.5) ) | ||
| }) |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Being a bit picky here: ´Input vector should contain at least one numeric element which is not NA´.