From 9aa3982d61f1ad1508621743076e6dae72667a92 Mon Sep 17 00:00:00 2001 From: James Hollway Date: Wed, 26 Feb 2025 17:32:08 +0100 Subject: [PATCH 01/27] Moved mreport to manydata --- NAMESPACE | 5 --- R/data_battles.R | 4 -- R/report.R | 73 ------------------------------------ cran-comments.md | 2 + man/battles.Rd | 12 ------ man/mreport.Rd | 27 ------------- pkgdown/_pkgdown.yml | 1 - tests/testthat/test-report.R | 23 ------------ 8 files changed, 2 insertions(+), 145 deletions(-) delete mode 100644 R/report.R delete mode 100644 man/mreport.Rd delete mode 100644 tests/testthat/test-report.R diff --git a/NAMESPACE b/NAMESPACE index e3d1bdf2..26b647ea 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -39,10 +39,8 @@ S3method(messyduration,character) S3method(messyduration,mdate) S3method(min,mdate) S3method(modal,mdate) -S3method(mreport,default) S3method(print,mdate) S3method(print,mdates_duration) -S3method(print,mreport) S3method(random,mdate) S3method(rep,mdate) S3method(seq,mdate) @@ -77,7 +75,6 @@ export(mdate) export(messyduration) export(modal) export(month) -export(mreport) export(new_messydate) export(new_messyduration) export(on_or_after) @@ -87,7 +84,6 @@ export(random) export(validate_messydate) export(validate_messyduration) export(year) -importFrom(dplyr,"%>%") importFrom(dplyr,first) importFrom(dplyr,last) importFrom(dplyr,lead) @@ -99,7 +95,6 @@ importFrom(lubridate,ymd) importFrom(purrr,map) importFrom(purrr,pmap_chr) importFrom(stats,median) -importFrom(stats,na.omit) importFrom(stringi,stri_detect_regex) importFrom(stringi,stri_extract_all_regex) importFrom(stringi,stri_replace_all_fixed) diff --git a/R/data_battles.R b/R/data_battles.R index faf0f7ce..48bf4417 100644 --- a/R/data_battles.R +++ b/R/data_battles.R @@ -11,8 +11,4 @@ #' \item{US_party}{is the US a party to the battle, numeric} #' \item{N_actors}{number of actors to conflict, numeric} #' } -#' @details -#' ``` {r, echo = FALSE, warning = FALSE} -#' mreport(battles) -#' ``` "battles" diff --git a/R/report.R b/R/report.R deleted file mode 100644 index bd83a8ef..00000000 --- a/R/report.R +++ /dev/null @@ -1,73 +0,0 @@ -#' Data report for datasets with 'mdate' variables -#' -#' Create a properly formatted data report for datasets which contain 'mdate' -#' class objects, alongside other object classes. -#' @param data A `{tibble}` or a `{data.frame}`. -#' @return A data report of class 'mreport'. -#' @importFrom dplyr %>% -#' @importFrom stats na.omit -#' @details 'mreport' displays the variable's name, -#' the variable type, the number of observations per variable, -#' the number of missing observations for variable, -#' and the percentage of missing observations in variable. -#' @examples -#' mreport(battles) -#' @export -mreport <- function(data) UseMethod("mreport") - -#' @export -mreport.default <- function(data) { - if (!is.data.frame(data)) { - stop("Data must be a `data.frame` or `tibble`.") - } - rows <- nrow(data) - cols <- ncol(data) - varnames <- names(data) - datatype <- unlist(lapply(data, class)) - counts <- unlist(lapply(data, length)) - mvalues <- unlist(lapply(data, function(z) sum(is.na(z)))) - mvaluesper <- round((mvalues / counts) * 100, 2) - result <- list(Rows = rows, - Columns = cols, - Variables = varnames, - Types = datatype, - Count = counts, - Missing = mvalues, - MissingPer = mvaluesper) - class(result) <- "mreport" - return(result) -} - -#' @export -print.mreport <- function(x, ...) { - columns <- c(" Column Name ", " Data Type ", " Observations ", - " Missing ", " Missing (%) ") - len_col <- as.vector(unlist(lapply(columns, nchar))) - x$Types <- lapply(x$Types, paste, collapse = ", ") - lengths <- list(x$Variables, x$Types, x$Count, x$Missing, x$MissingPer) - n <- length(columns) - nlist <- list() - for (i in seq_len(n)) { - nlist[[i]] <- max(len_col[i], max(unlist(lapply(lengths[[i]], nchar)))) - } - clengths <- unlist(nlist) - dash <- sum(clengths) + 6 - cat(rep("-", dash), sep = "") - cat("\n|") - for (i in seq_len(n)) { - cat(format(columns[i], width = clengths[i], justify = "centre"), - "|", sep = "") - } - cat("\n", rep("-", dash), sep = "") - cat("\n") - for (i in seq_len(x$Columns)) { - cat("|", format(x$Variables[i], width = clengths[1], justify = "centre"), "|", - format(x$Types[i], width = clengths[2], justify = "centre"), "|", - format(x$Count[i], width = clengths[3], justify = "centre"), "|", - format(as.character(x$Missing[i]), width = clengths[4], justify = "centre"), "|", - format(as.character(x$MissingPer[i]), width = clengths[5], justify = "centre"), - "|\n", sep = "") - } - cat(rep("-", dash), sep = "") - cat("\n\n") -} diff --git a/cran-comments.md b/cran-comments.md index 295e9141..71ff5754 100644 --- a/cran-comments.md +++ b/cran-comments.md @@ -8,3 +8,5 @@ ## R CMD check results 0 errors | 0 warnings | 0 notes + +- Fixed redirected URL issue from previous submission diff --git a/man/battles.Rd b/man/battles.Rd index e733f534..b8c39a11 100644 --- a/man/battles.Rd +++ b/man/battles.Rd @@ -21,16 +21,4 @@ battles A dataset containing the names and dates of battles in 2001, according to Wikipedia (https://en.wikipedia.org/wiki/List_of_battles_in_the_21st_century). } -\details{ -\if{html}{\out{
}}\preformatted{#> ---------------------------------------------------------------------------- -#> | Column Name | Data Type | Observations | Missing | Missing (\%) | -#> ---------------------------------------------------------------------------- -#> | Battle | character | 20| 0 | 0 | -#> | Date | mdate | 20| 0 | 0 | -#> | Parties | character | 20| 0 | 0 | -#> | US_party | numeric | 20| 0 | 0 | -#> | N_actors | numeric | 20| 0 | 0 | -#> ---------------------------------------------------------------------------- -}\if{html}{\out{
}} -} \keyword{datasets} diff --git a/man/mreport.Rd b/man/mreport.Rd deleted file mode 100644 index 20ded52a..00000000 --- a/man/mreport.Rd +++ /dev/null @@ -1,27 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/report.R -\name{mreport} -\alias{mreport} -\title{Data report for datasets with 'mdate' variables} -\usage{ -mreport(data) -} -\arguments{ -\item{data}{A \code{{tibble}} or a \code{{data.frame}}.} -} -\value{ -A data report of class 'mreport'. -} -\description{ -Create a properly formatted data report for datasets which contain 'mdate' -class objects, alongside other object classes. -} -\details{ -'mreport' displays the variable's name, -the variable type, the number of observations per variable, -the number of missing observations for variable, -and the percentage of missing observations in variable. -} -\examples{ -mreport(battles) -} diff --git a/pkgdown/_pkgdown.yml b/pkgdown/_pkgdown.yml index aa269171..386c7007 100644 --- a/pkgdown/_pkgdown.yml +++ b/pkgdown/_pkgdown.yml @@ -65,5 +65,4 @@ reference: - title: "Data" desc: "Working with 'messy' data:" contents: - - mreport - battles diff --git a/tests/testthat/test-report.R b/tests/testthat/test-report.R deleted file mode 100644 index f9771a71..00000000 --- a/tests/testthat/test-report.R +++ /dev/null @@ -1,23 +0,0 @@ -test_that("report function work properly", { - expect_error(mreport("a"), "Data must be a `data.frame` or `tibble`.") - report <- mreport(battles) - column <- c("Battle", "Date", "Parties", "US_party", "N_actors") - type <- c("character", "mdate", "character", "numeric", "numeric") - obs <- c(20, 20, 20, 20, 20) - missing <- c(0, 0, 0, 0, 0) - missingp <- c(0, 0, 0, 0, 0) - names(type) <- c("Battle", "Date", "Parties", "US_party", "N_actors") - names(obs) <- c("Battle", "Date", "Parties", "US_party", "N_actors") - names(missing) <- c("Battle", "Date", "Parties", "US_party", "N_actors") - names(missingp) <- c("Battle", "Date", "Parties", "US_party", "N_actors") - expect_true(is.list(report)) - expect_s3_class(mreport(battles), "mreport") - expect_equal(report$Rows, 20) - expect_equal(report$Columns, 5) - expect_equal(report$Variables, column) - expect_equal(report$Types, type) - expect_equal(report$Count, obs) - expect_equal(report$Missing, missing) - expect_equal(report$MissingPer, missingp) - expect_length(mreport(battles), 7) -}) From b6fae8cc44720089011a12e785d43f43eea1d24a Mon Sep 17 00:00:00 2001 From: James Hollway Date: Thu, 27 Feb 2025 14:55:33 +0100 Subject: [PATCH 02/27] Fixed some issues with how coercion from messydates was working with resolution functions --- R/coerce_from_messydate.R | 16 ++++++++-------- man/from_messydate.Rd | 10 +++++----- tests/testthat/test-coerce-from.R | 10 +++++----- 3 files changed, 18 insertions(+), 18 deletions(-) diff --git a/R/coerce_from_messydate.R b/R/coerce_from_messydate.R index 3839751e..c44fa138 100644 --- a/R/coerce_from_messydate.R +++ b/R/coerce_from_messydate.R @@ -41,13 +41,13 @@ NULL #' as.Date(as_messydate("1000 BC"), median) #' as.Date(as_messydate(c("-1000", "2020")), min) #' @export -as.Date.mdate <- function(x, ..., FUN) { +as.Date.mdate <- function(x, FUN = min, ...) { # # fix argument ordering issues # if (missing(FUN)){ # if(length(list(...)) > 0) FUN <- list(...)[[1]] else # FUN <- messydates::min.mdate # } - if(missing(FUN)) FUN <- min + # if(missing(FUN)) FUN <- min x <- FUN(x) x <- suppressWarnings(ifelse(stringi::stri_detect_regex(x, "^-"), lubridate::as_date(negative_dates(x)), @@ -57,24 +57,24 @@ as.Date.mdate <- function(x, ..., FUN) { #' @rdname from_messydate #' @export -as.POSIXct.mdate <- function(x, ..., FUN) { - if (missing(FUN) & length(list(...)) > 0) FUN <- list(...)[[1]] +as.POSIXct.mdate <- function(x, FUN = min, ...) { + # if (missing(FUN) & length(list(...)) > 0) FUN <- list(...)[[1]] x <- FUN(x) if (stringi::stri_detect_regex(x, "^-")) { stop("For conversion of negative dates from mdate class use as.Date()") } - as.POSIXct(x) + as.POSIXct(as.character(x)) } #' @rdname from_messydate #' @export -as.POSIXlt.mdate <- function(x, ..., FUN) { - if (missing(FUN) & length(list(...)) > 0) FUN <- list(...)[[1]] +as.POSIXlt.mdate <- function(x, FUN = min, ...) { + # if (missing(FUN) & length(list(...)) > 0) FUN <- list(...)[[1]] x <- FUN(x) if (stringi::stri_detect_regex(x, "^-")) { stop("For conversion of negative dates from mdate class use as.Date()") } - as.POSIXlt(x) + as.POSIXlt(as.character(x)) } # Helper function for returning negative dates in date formats diff --git a/man/from_messydate.Rd b/man/from_messydate.Rd index 10f71424..55ed3a42 100644 --- a/man/from_messydate.Rd +++ b/man/from_messydate.Rd @@ -7,21 +7,21 @@ \alias{as.POSIXlt.mdate} \title{Coercion from messy dates} \usage{ -\method{as.Date}{mdate}(x, ..., FUN) +\method{as.Date}{mdate}(x, FUN = min, ...) -\method{as.POSIXct}{mdate}(x, ..., FUN) +\method{as.POSIXct}{mdate}(x, FUN = min, ...) -\method{as.POSIXlt}{mdate}(x, ..., FUN) +\method{as.POSIXlt}{mdate}(x, FUN = min, ...) } \arguments{ \item{x}{A \code{mdate} object} -\item{...}{Arguments passed on to the S3 generics.} - \item{FUN}{A function that can be used to resolve expanded messy dates into a single date. For example, \code{min()}, \code{max()}, \code{mean()}, \code{median()}, \code{modal()}, and \code{random()}.} + +\item{...}{Arguments passed on to the S3 generics.} } \value{ A date object of \code{Date}, \code{POSIXct}, or \code{POSIXlt} class diff --git a/tests/testthat/test-coerce-from.R b/tests/testthat/test-coerce-from.R index a19ebc9e..d061fca2 100644 --- a/tests/testthat/test-coerce-from.R +++ b/tests/testthat/test-coerce-from.R @@ -5,15 +5,15 @@ messy <- as_messydate("2010-10-10..2010-10-20") test_that("Coercion from other date classes into messydt works", { # expect_equal(as.character(as.Date(as_messydate("1000 BC"), max)), "-1000-12-31") - expect_equal(as.Date(messy, min), ddate) - expect_equal(as.Date(mdatey, median), ddate) - expect_equal(as.Date(mdatey, random), ddate) + expect_equal(as.Date(messy, FUN = min), ddate) + expect_equal(as.Date(mdatey, FUN = median), ddate) + expect_equal(as.Date(mdatey, FUN = random), ddate) # expect_equal(as.character(as.Date(as_messydate("1000 BC"), min)), min(negative)) }) test_that("Coercion to POSIX works", { - expect_equal(as.POSIXct(messy, max), as.POSIXct("2010-10-20 CEST")) - expect_equal(as.POSIXlt(messy, mean), as.POSIXlt("2010-10-15 CEST")) + expect_equal(as.POSIXct(messy, FUN = max), as.POSIXct("2010-10-20 CEST")) + expect_equal(as.POSIXlt(messy, FUN = mean), as.POSIXlt("2010-10-15 CEST")) }) # neg_dates <- as_messydate(c("-27", "-14")) From 01df1789e32018a580f97939c2b061348df90f86 Mon Sep 17 00:00:00 2001 From: James Hollway Date: Thu, 27 Feb 2025 14:56:10 +0100 Subject: [PATCH 03/27] Fixed bug in how min.mdate() and max.mdate() handled recursivity --- R/coerce_resolve.R | 14 ++++++++++---- 1 file changed, 10 insertions(+), 4 deletions(-) diff --git a/R/coerce_resolve.R b/R/coerce_resolve.R index bc17a6ae..6d65a3f0 100644 --- a/R/coerce_resolve.R +++ b/R/coerce_resolve.R @@ -34,6 +34,7 @@ min.mdate <- function(..., na.rm = TRUE, recursive = FALSE){ dates <- stringi::stri_replace_all_regex(d, "~|\\?", "") dates <- .remove_post(dates) dates <- .replace_earliest(dates) + dates <- mdate(dates) # if(any(stringi::stri_detect_regex(dates, "^~"))) # dates <- expand_approximate_years(dates, approx_range = approx_range) # if(any(stringi::stri_detect_regex(dates, "[:digit:]~"))){ @@ -41,8 +42,10 @@ min.mdate <- function(..., na.rm = TRUE, recursive = FALSE){ # dates <- expand_approximate_days(dates, approx_range = approx_range) # } if(recursive){ - if(any(is_bce(dates))) max(dates[is_bce(dates)]) else - min(dates) + if(any(is_bce(dates))) + dates[is_bce(dates)][order(as.character(dates[is_bce(dates)]), + decreasing = TRUE)][1] else + dates[order(as.character(dates))==1] } else dates } @@ -76,10 +79,13 @@ max.mdate <- function(..., na.rm = TRUE, recursive = FALSE) { dates <- unspecified_months(dates) dates <- .remove_pre(dates) dates <- .replace_latest(dates) + dates <- mdate(dates) if(recursive){ - if(any(is_bce(dates))) max(dates[!is_bce(dates)]) else - max(dates) + if(all(is_bce(dates), na.rm = TRUE)) + dates[order(dates, decreasing = TRUE)][1] else + dates[!is_bce(dates)][order(as.character(dates[!is_bce(dates)]), + decreasing = TRUE)][1] } else dates } From dbd3a704405860f2f2f38a3ef0a5d10ed997a465 Mon Sep 17 00:00:00 2001 From: James Hollway Date: Fri, 28 Feb 2025 10:09:55 +0100 Subject: [PATCH 04/27] Consolidated scripts relating to the construction of mdate and mduration classes --- R/class.R | 213 ------------------ R/{duration.R => class_duration.R} | 18 +- R/class_mdate.R | 117 ++++++++++ R/class_methods.R | 54 +++++ man/{class.Rd => class_create.Rd} | 13 +- man/{duration_class.Rd => class_duration.Rd} | 6 +- man/class_make.Rd | 27 +++ .../{test-class.R => test-class_create.R} | 0 ...{test-duration.R => test-class_duration.R} | 0 ...est-messydate-make.R => test-class_make.R} | 0 10 files changed, 213 insertions(+), 235 deletions(-) delete mode 100644 R/class.R rename R/{duration.R => class_duration.R} (89%) create mode 100644 R/class_mdate.R create mode 100644 R/class_methods.R rename man/{class.Rd => class_create.Rd} (95%) rename man/{duration_class.Rd => class_duration.Rd} (93%) create mode 100644 man/class_make.Rd rename tests/testthat/{test-class.R => test-class_create.R} (100%) rename tests/testthat/{test-duration.R => test-class_duration.R} (100%) rename tests/testthat/{test-messydate-make.R => test-class_make.R} (100%) diff --git a/R/class.R b/R/class.R deleted file mode 100644 index e3b16c6c..00000000 --- a/R/class.R +++ /dev/null @@ -1,213 +0,0 @@ -#' A flexible date class for messy dates -#' -#' @description -#' Recent extensions to standardised date notation in -#' [ISO 8601-2_2019(E)](https://www.iso.org/standard/70908.html) -#' create space for unspecified, uncertain, and approximate dates, -#' as well as succinct representation of ranges of dates. -#' These functions create and validate a new date class for R -#' that can contain and parse these annotations, -#' and are not typically user-facing. -#' Please see `as_messydate()` for the user-facing coercion function. -#' -#' @details -#' ## Date annotations -#' -#' _Unspecified date components_, such as when the day is unknown, -#' can be represented by one or more `X`s in place of the digits. -#' The modifier `*` is recommended to indicate that the entire -#' time scale component value is unspecified, e.g. `X*-03-03`, -#' however this is not implemented here. -#' Please be explicit about the digits that are unspecified, -#' e.g. `XXXX-03-03` expresses 3rd March in some unspecified year, -#' whereas `2003-XX-03` expresses the 3rd of some month in 2003. -#' If time components are not given, they are expanded to this. -#' -#' _Approximate date components_, modified by `~`, -#' represent an estimate whose value is asserted -#' to be possibly correct. -#' For example, `2003~-03-03` -#' The degree of confidence in approximation -#' depends on the application. -#' -#' _Uncertain date components_, modified by `?`, -#' represent a date component whose source is considered -#' to be dubious and therefore not to be relied upon. -#' An additional modifier, `%`, is used to indicate -#' a value that is both uncertain and approximate. -#' -#' ## Date sets -#' -#' These functions also introduce standard notation -#' for ranges of dates. -#' Rather than the typical R notation for ranges, -#' `:`, ISO 8601-2_2019(E) recommends `..`. -#' This then can be applied between two time scale -#' components to create a standard range between -#' these dates (inclusive), e.g. `2009-01-01..2019-01-01`. -#' But it can also be used as an affix, -#' indicating "on or before" if used as a prefix, -#' e.g. `..2019-01-01`, -#' or indicating "on or after" if used as a suffix, -#' e.g. `2009-01-01..`. -#' -#' And lastly, notation for sets of dates is also included. -#' Here braces, `{}`, are used to mean "all members of the set", -#' while brackets, `[]`, are used to mean "one member of the set". -#' @param x A character scalar or vector in the expected `"yyyy-mm-dd"` format -#' annotated, as necessary, according to ISO 8601-2_2019(E). -#' @return Object of class `mdate` -#' @name class -#' @seealso messydate -NULL - -#' @rdname class -#' @export -new_messydate <- function(x = character()) { - stopifnot(is.character(x)) - structure(x, class = "mdate") -} - -#' @rdname class -#' @export -validate_messydate <- function(x) { - values <- unclass(x) - if (any(grepl("[A-WYZa-z]", values) & !grepl("^NA$", values))) { - stop("The only alpha character allowed in messy dates is 'X' for - unspecified time components", call. = FALSE) - } - if (!any(grepl("[0-9]", values))) { - stop("mdate object requires at least one specified date component.", - call. = FALSE) - } - if (any(grepl("!|\\(|\\)|\\+|\\=|\\/|,|;|>|<|_|\\^|'|&|\\$|#", values))) { - stop("mdate object can only consist of numbers and - some special symbols: []{}..X%?~", call. = FALSE) - } - x -} - -#' @importFrom utils str -#' @export -print.mdate <- function(x, ...) { - str(x) -} - -#' @export -`[.mdate` <- function(x, ..., drop = TRUE) { - as_messydate(NextMethod("[", unclass(x))) -} - -#' @export -`[<-.mdate` <- function(x, i, ..., value) { - value <- as_messydate(value) - validate_messydate(value) - as_messydate(NextMethod("[<-", unclass(x))) -} - -#' @export -`[[.mdate` <- function(x, ...) { - as_messydate(NextMethod("[[", unclass(x))) -} - -#' @export -`[[<-.mdate` <- function(x, i, ..., value) { - value <- as_messydate(value) - validate_messydate(value) - as_messydate(NextMethod("[[<-", unclass(x))) -} - -#' @export -c.mdate <- function(...) { - if(length(list(...)) == 1){ - unclass(list(...)[[1]]) - } else { - vecs <- lapply(list(...), function(e) unclass(as_messydate(e))) - x <- as_messydate(unlist(vecs)) - validate_messydate(x) - } -} - -#' @export -as.data.frame.mdate <- function(x, ...) { - as.data.frame.vector(x, ...) -} - -#' @export -rep.mdate <- function(x, ...) { - as_messydate(NextMethod("rep", unclass(x))) -} - -#' @export -as.list.mdate <- function(x, ...) { - lapply(unclass(x), as_messydate) -} - -#' @export -as.numeric.mdate <- function(x, ...) { - as.numeric(as.Date(x)) -} - -#' Sequence method for messydates -#' @description -#' This function provides a sequence (`seq()`) method for messydates. -#' This can be used with ranges or unspecified dates, -#' and is particularly useful for defining a sequence of dates -#' before the common era or between eras. -#' @name messy-sequence -#' @param from A messydate or range. -#' If 'from' is a range and 'to' is not specified, -#' 'from' will be the minimum of the range and 'to' will be maximum. -#' @param to A messydate. -#' @param by Increment of the sequence. By default "days". -#' @param ... Arguments passed to or from methods. -#' @examples -#' seq(mdate("-0001-12-20"), mdate("0001-01-10")) -#' @export -seq.mdate <- function(from, to, by = "days", ...) { - - if(missing(to) & !is_precise(from)){ - to <- max(from) - from <- min(from) - } - - # straight forward sequence - if(!any(is_bce(c(from, to)))){ - seq(as.Date(from), as.Date(to), by = by) - } else { - - fromp <- as.Date(stringi::stri_replace_first_regex(from, "^-", "")) - # sequence before common era - if(is_bce(to)){ - top <- as.Date(stringi::stri_replace_first_regex(to, "^-", "")) - .neg_seqs(fromp, top, by = by) - } else { - # sequence between eras - zero_padding(c(.neg_seqs(fromp, as.Date("0001-12-31"), by = by), - as.character(seq(as.Date("0001-01-01"), as.Date(to), by = by)))) - # zero_padding(c(rev(paste0("-", seq(as.Date("0001-01-01"), fromp, by = by))), - # as.character(seq(as.Date("0001-01-01"), as.Date(to), by = by)))) - } - } -} - -.neg_seqs <- function(fromp, top, by = "days"){ - if(year(fromp) == year(top)){ - zero_padding(paste0("-", seq(min(c(fromp, top)), - max(c(fromp,top)), by = by))) - } else { - strt <- max(c(fromp, top)) - ends <- min(c(fromp, top)) - strt_yr <- year(strt) - strt_sq <- seq(as.Date(strt), as.Date(paste0(strt_yr,"-12-31")), by = by) - ends_yr <- year(ends) - ends_sq <- seq(as.Date(paste0(ends_yr, "-01-01")), as.Date(ends), by = by) - if(strt_yr - ends_yr > 1){ - mids_sq <- seq(as.Date(paste0(ends_yr+1, "-01-01")), - as.Date(paste0(strt_yr-1,"-12-31")), by = by) - if(length(unique(year(mids_sq)))>1) - mids_sq <- mids_sq[order(year(mids_sq), decreasing = TRUE)] - zero_padding(paste0("-", c(strt_sq, mids_sq, ends_sq))) - } else zero_padding(paste0("-", c(strt_sq, ends_sq))) - } -} diff --git a/R/duration.R b/R/class_duration.R similarity index 89% rename from R/duration.R rename to R/class_duration.R index 06d4f561..37edc334 100644 --- a/R/duration.R +++ b/R/class_duration.R @@ -12,29 +12,23 @@ #' @param approx_range Range to expand approximate dates, in days. #' If 3, for example, adds 3 days; if -3, removes 3 days from both sides. #' @return Object of class `description` -#' @name duration_class +#' @name class_duration #' @examples #' messyduration(as_messydate(c("2010-01-01..2010-12-31", "2010-01..2010-12"))) NULL -#' @rdname duration_class +#' @rdname class_duration #' @export new_messyduration <- function(x = character()) { stopifnot(is.character(x)) structure(x, class = "mdates_duration") } -#' @importFrom utils str -#' @export -print.mdates_duration <- function(x, ...) { - str(x) -} - -#' @rdname duration_class +#' @rdname class_duration #' @export messyduration <- function(x, approx_range = 0) UseMethod("messyduration") -#' @rdname duration_class +#' @rdname class_duration #' @export validate_messyduration <- function(x, approx_range = 0) { if (any(!grepl("\\.\\.", x))) { @@ -43,7 +37,7 @@ validate_messyduration <- function(x, approx_range = 0) { } } -#' @rdname duration_class +#' @rdname class_duration #' @export messyduration.character <- function(x, approx_range = 0) { message("Converting to mdate class.") @@ -53,7 +47,7 @@ messyduration.character <- function(x, approx_range = 0) { new_messyduration(x) } -#' @rdname duration_class +#' @rdname class_duration #' @export messyduration.mdate <- function(x, approx_range = 0) { validate_messyduration(x) diff --git a/R/class_mdate.R b/R/class_mdate.R new file mode 100644 index 00000000..b0100f3f --- /dev/null +++ b/R/class_mdate.R @@ -0,0 +1,117 @@ +#' A flexible date class for messy dates +#' @description +#' Recent extensions to standardised date notation in +#' [ISO 8601-2_2019(E)](https://www.iso.org/standard/70908.html) +#' create space for unspecified, uncertain, and approximate dates, +#' as well as succinct representation of ranges of dates. +#' These functions create and validate a new date class for R +#' that can contain and parse these annotations, +#' and are not typically user-facing. +#' Please see `as_messydate()` for the user-facing coercion function. +#' @section Date annotations: +#' _Unspecified date components_, such as when the day is unknown, +#' can be represented by one or more `X`s in place of the digits. +#' The modifier `*` is recommended to indicate that the entire +#' time scale component value is unspecified, e.g. `X*-03-03`, +#' however this is not implemented here. +#' Please be explicit about the digits that are unspecified, +#' e.g. `XXXX-03-03` expresses 3rd March in some unspecified year, +#' whereas `2003-XX-03` expresses the 3rd of some month in 2003. +#' If time components are not given, they are expanded to this. +#' +#' _Approximate date components_, modified by `~`, +#' represent an estimate whose value is asserted +#' to be possibly correct. +#' For example, `2003~-03-03` +#' The degree of confidence in approximation +#' depends on the application. +#' +#' _Uncertain date components_, modified by `?`, +#' represent a date component whose source is considered +#' to be dubious and therefore not to be relied upon. +#' An additional modifier, `%`, is used to indicate +#' a value that is both uncertain and approximate. +#' +#' @section Date sets: +#' These functions also introduce standard notation +#' for ranges of dates. +#' Rather than the typical R notation for ranges, +#' `:`, ISO 8601-2_2019(E) recommends `..`. +#' This then can be applied between two time scale +#' components to create a standard range between +#' these dates (inclusive), e.g. `2009-01-01..2019-01-01`. +#' But it can also be used as an affix, +#' indicating "on or before" if used as a prefix, +#' e.g. `..2019-01-01`, +#' or indicating "on or after" if used as a suffix, +#' e.g. `2009-01-01..`. +#' +#' And lastly, notation for sets of dates is also included. +#' Here braces, `{}`, are used to mean "all members of the set", +#' while brackets, `[]`, are used to mean "one member of the set". +#' @param x A character scalar or vector in the expected `"yyyy-mm-dd"` format +#' annotated, as necessary, according to ISO 8601-2_2019(E). +#' @return Object of class `mdate` +#' @name class_create +#' @seealso messydate +NULL + +#' @rdname class_create +#' @export +new_messydate <- function(x = character()) { + stopifnot(is.character(x)) + structure(x, class = "mdate") +} + +#' @rdname class_create +#' @export +validate_messydate <- function(x) { + values <- unclass(x) + if (any(grepl("[A-WYZa-z]", values) & !grepl("^NA$", values))) { + stop("The only alpha character allowed in messy dates is 'X' for + unspecified time components", call. = FALSE) + } + if (!any(grepl("[0-9]", values))) { + stop("mdate object requires at least one specified date component.", + call. = FALSE) + } + if (any(grepl("!|\\(|\\)|\\+|\\=|\\/|,|;|>|<|_|\\^|'|&|\\$|#", values))) { + stop("mdate object can only consist of numbers and + some special symbols: []{}..X%?~", call. = FALSE) + } + x +} + +# Make #### + +#' Composes `mdate` from multiple variables +#' @param ... One (yyyy-mm-dd), two (yyyy-mm-dd, yyyy-mm-dd), +#' or three (yyyy, mm, dd) variables. +#' @details If three date variables are passed to `make_messydate()`, +#' function will create a single date (yyyy-mm-dd) from it. +#' If two date variables are passed to `make_messydate()`, +#' function will create a range of dates from it (yyyy-mm-dd..yyyy-mm-dd). +#' If one date variable is passed to `make_messydate()`, +#' function defaults to `as_messydate()`. +#' @importFrom purrr map pmap_chr +#' @name class_make +#' @examples +#' make_messydate("2010", "10", "10") +#' @export +make_messydate <- function(..., resequence = FALSE) { + dots <- list(...) + if (length(dots) == 1) { + dots <- do.call(as.character, dots) + dates <- unlist(dots) + } else if (length(dots) == 2) { + dots <- purrr::map(dots, as.character) + dates <- unlist(purrr::pmap_chr(dots, paste, sep = "..")) + dates <- gsub("NA..NA", "NA", dates) + } else if (length(dots) == 3) { + dots <- purrr::map(dots, as.character) + dates <- unlist(purrr::pmap_chr(dots, paste, sep = "-")) + dates <- gsub("NA-NA-NA", "NA", dates) + } else stop("make_messydate() takes one variable (yyyy-mm-dd), + two variables (yyyy-mm-dd, yyyy-mm-dd), or three variables (yyyy, mm, dd).") + as_messydate(dates, resequence) +} diff --git a/R/class_methods.R b/R/class_methods.R new file mode 100644 index 00000000..35a2debe --- /dev/null +++ b/R/class_methods.R @@ -0,0 +1,54 @@ +#' @export +`[.mdate` <- function(x, ..., drop = TRUE) { + as_messydate(NextMethod("[", unclass(x))) +} + +#' @export +`[<-.mdate` <- function(x, i, ..., value) { + value <- as_messydate(value) + validate_messydate(value) + as_messydate(NextMethod("[<-", unclass(x))) +} + +#' @export +`[[.mdate` <- function(x, ...) { + as_messydate(NextMethod("[[", unclass(x))) +} + +#' @export +`[[<-.mdate` <- function(x, i, ..., value) { + value <- as_messydate(value) + validate_messydate(value) + as_messydate(NextMethod("[[<-", unclass(x))) +} + +#' @export +c.mdate <- function(...) { + if(length(list(...)) == 1){ + unclass(list(...)[[1]]) + } else { + vecs <- lapply(list(...), function(e) unclass(as_messydate(e))) + x <- as_messydate(unlist(vecs)) + validate_messydate(x) + } +} + +#' @export +rep.mdate <- function(x, ...) { + as_messydate(NextMethod("rep", unclass(x))) +} + +# Printing #### + +#' @importFrom utils str +#' @export +print.mdate <- function(x, ...) { + str(x) +} +#' @importFrom utils str +#' @export +print.mdates_duration <- function(x, ...) { + str(x) +} + + diff --git a/man/class.Rd b/man/class_create.Rd similarity index 95% rename from man/class.Rd rename to man/class_create.Rd index 91c15bc0..ea526ceb 100644 --- a/man/class.Rd +++ b/man/class_create.Rd @@ -1,7 +1,7 @@ % Generated by roxygen2: do not edit by hand -% Please edit documentation in R/class.R -\name{class} -\alias{class} +% Please edit documentation in R/class_mdate.R +\name{class_create} +\alias{class_create} \alias{new_messydate} \alias{validate_messydate} \title{A flexible date class for messy dates} @@ -27,8 +27,7 @@ that can contain and parse these annotations, and are not typically user-facing. Please see \code{as_messydate()} for the user-facing coercion function. } -\details{ -\subsection{Date annotations}{ +\section{Date annotations}{ \emph{Unspecified date components}, such as when the day is unknown, can be represented by one or more \code{X}s in place of the digits. @@ -54,7 +53,7 @@ An additional modifier, \verb{\%}, is used to indicate a value that is both uncertain and approximate. } -\subsection{Date sets}{ +\section{Date sets}{ These functions also introduce standard notation for ranges of dates. @@ -73,7 +72,7 @@ And lastly, notation for sets of dates is also included. Here braces, \code{{}}, are used to mean "all members of the set", while brackets, \verb{[]}, are used to mean "one member of the set". } -} + \seealso{ messydate } diff --git a/man/duration_class.Rd b/man/class_duration.Rd similarity index 93% rename from man/duration_class.Rd rename to man/class_duration.Rd index b0dc4e83..0ebc57be 100644 --- a/man/duration_class.Rd +++ b/man/class_duration.Rd @@ -1,7 +1,7 @@ % Generated by roxygen2: do not edit by hand -% Please edit documentation in R/duration.R -\name{duration_class} -\alias{duration_class} +% Please edit documentation in R/class_duration.R +\name{class_duration} +\alias{class_duration} \alias{new_messyduration} \alias{messyduration} \alias{validate_messyduration} diff --git a/man/class_make.Rd b/man/class_make.Rd new file mode 100644 index 00000000..a921525f --- /dev/null +++ b/man/class_make.Rd @@ -0,0 +1,27 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/class_mdate.R +\name{class_make} +\alias{class_make} +\alias{make_messydate} +\title{Composes \code{mdate} from multiple variables} +\usage{ +make_messydate(..., resequence = FALSE) +} +\arguments{ +\item{...}{One (yyyy-mm-dd), two (yyyy-mm-dd, yyyy-mm-dd), +or three (yyyy, mm, dd) variables.} +} +\description{ +Composes \code{mdate} from multiple variables +} +\details{ +If three date variables are passed to \code{make_messydate()}, +function will create a single date (yyyy-mm-dd) from it. +If two date variables are passed to \code{make_messydate()}, +function will create a range of dates from it (yyyy-mm-dd..yyyy-mm-dd). +If one date variable is passed to \code{make_messydate()}, +function defaults to \code{as_messydate()}. +} +\examples{ +make_messydate("2010", "10", "10") +} diff --git a/tests/testthat/test-class.R b/tests/testthat/test-class_create.R similarity index 100% rename from tests/testthat/test-class.R rename to tests/testthat/test-class_create.R diff --git a/tests/testthat/test-duration.R b/tests/testthat/test-class_duration.R similarity index 100% rename from tests/testthat/test-duration.R rename to tests/testthat/test-class_duration.R diff --git a/tests/testthat/test-messydate-make.R b/tests/testthat/test-class_make.R similarity index 100% rename from tests/testthat/test-messydate-make.R rename to tests/testthat/test-class_make.R From f87e0841e076d2cd6a26bfdff287559df5cff2bb Mon Sep 17 00:00:00 2001 From: James Hollway Date: Fri, 28 Feb 2025 10:13:20 +0100 Subject: [PATCH 05/27] Consolidated and rationalised structure of coercion scripts --- R/coerce_from_messydate.R | 24 +++++++-- R/coerce_to_messydate.R | 51 ++++--------------- man/{from_messydate.Rd => coerce_from.Rd} | 4 +- man/{messydate.Rd => coerce_to.Rd} | 21 +------- pkgdown/_pkgdown.yml | 5 +- ...{test-coerce-from.R => test-coerce_from.R} | 0 .../{test-coerce-to.R => test-coerce_to.R} | 0 7 files changed, 35 insertions(+), 70 deletions(-) rename man/{from_messydate.Rd => coerce_from.Rd} (97%) rename man/{messydate.Rd => coerce_to.Rd} (85%) rename tests/testthat/{test-coerce-from.R => test-coerce_from.R} (100%) rename tests/testthat/{test-coerce-to.R => test-coerce_to.R} (100%) diff --git a/R/coerce_from_messydate.R b/R/coerce_from_messydate.R index c44fa138..d9c1d325 100644 --- a/R/coerce_from_messydate.R +++ b/R/coerce_from_messydate.R @@ -24,11 +24,11 @@ #' For example, `min()`, `max()`, `mean()`, `median()`, #' `modal()`, and `random()`. #' @return A date object of `Date`, `POSIXct`, or `POSIXlt` class -#' @name from_messydate +#' @name coerce_from NULL #> NULL -#' @rdname from_messydate +#' @rdname coerce_from #' @examples #' as.Date(as_messydate("2012-01"), min) #' as.Date(as_messydate("2012-01-01"), mean) @@ -55,7 +55,7 @@ as.Date.mdate <- function(x, FUN = min, ...) { as.Date(x, origin = "1970-01-01") } -#' @rdname from_messydate +#' @rdname coerce_from #' @export as.POSIXct.mdate <- function(x, FUN = min, ...) { # if (missing(FUN) & length(list(...)) > 0) FUN <- list(...)[[1]] @@ -66,7 +66,7 @@ as.POSIXct.mdate <- function(x, FUN = min, ...) { as.POSIXct(as.character(x)) } -#' @rdname from_messydate +#' @rdname coerce_from #' @export as.POSIXlt.mdate <- function(x, FUN = min, ...) { # if (missing(FUN) & length(list(...)) > 0) FUN <- list(...)[[1]] @@ -88,3 +88,19 @@ negative_dates <- function(x) { x <- lubridate::as_date(x) x } + +#' @export +as.data.frame.mdate <- function(x, ...) { + as.data.frame.vector(x, ...) +} + +#' @export +as.list.mdate <- function(x, ...) { + lapply(unclass(x), as_messydate) +} + +#' @export +as.numeric.mdate <- function(x, ...) { + as.numeric(as.Date(x)) +} + diff --git a/R/coerce_to_messydate.R b/R/coerce_to_messydate.R index 62bcd228..1fc2a6a9 100644 --- a/R/coerce_to_messydate.R +++ b/R/coerce_to_messydate.R @@ -28,10 +28,10 @@ #' based on which the date is reordered into YYYY-MM-DD format #' and further completed to YYYY-MM-DD format if they choose to do so. #' @return A `mdate` class object -#' @name messydate +#' @name coerce_to NULL -#' @describeIn messydate Core `mdate` class coercion function +#' @describeIn coerce_to Core `mdate` class coercion function #' @examples #' as_messydate("2021") #' as_messydate("2021-02") @@ -53,21 +53,21 @@ NULL as_messydate <- function(x, resequence = FALSE) UseMethod("as_messydate") -#' @describeIn messydate Coerce from `Date` to `mdate` class +#' @describeIn coerce_to Coerce from `Date` to `mdate` class #' @export as_messydate.Date <- function(x, resequence = FALSE) { x <- as.character(x) new_messydate(x) } -#' @describeIn messydate Coerce from `POSIXct` to `mdate` class +#' @describeIn coerce_to Coerce from `POSIXct` to `mdate` class #' @export as_messydate.POSIXct <- function(x, resequence = FALSE) { x <- as.character(x) new_messydate(x) } -#' @describeIn messydate Coerce from `POSIXlt` to `mdate` class +#' @describeIn coerce_to Coerce from `POSIXlt` to `mdate` class #' @export as_messydate.POSIXlt <- function(x, resequence = FALSE) { x <- as.character(x) @@ -80,7 +80,7 @@ as_messydate.mdate <- function(x, resequence = FALSE) { new_messydate(x) } -#' @describeIn messydate Coerce character date objects to `mdate` class +#' @describeIn coerce_to Coerce character date objects to `mdate` class #' @export as_messydate.character <- function(x, resequence = NULL) { d <- standardise_text(x) @@ -107,14 +107,14 @@ as_messydate.character <- function(x, resequence = NULL) { new_messydate(d) } -#' @describeIn messydate Coerce numeric objects to `mdate` class +#' @describeIn coerce_to Coerce numeric objects to `mdate` class #' @export as_messydate.numeric <- function(x, resequence = NULL) { d <- as.character(x) new_messydate(d) } -#' @describeIn messydate Coerce list date objects to the most concise +#' @describeIn coerce_to Coerce list date objects to the most concise #' representation of `mdate` class #' @examples #' as_messydate(list(c("2012-06-01", "2012-06-02", "2012-06-03"))) @@ -128,7 +128,7 @@ as_messydate.list <- function(x, resequence = FALSE) { }) } -#' @rdname messydate +#' @rdname coerce_to #' @export mdate <- as_messydate @@ -578,39 +578,6 @@ complete_ambiguous_19 <- function(d) { out } -# Make #### - -#' @describeIn messydate Composes `mdate` from multiple variables -#' @param ... One (yyyy-mm-dd), two (yyyy-mm-dd, yyyy-mm-dd), -#' or three (yyyy, mm, dd) variables. -#' @details If three date variables are passed to `make_messydate()`, -#' function will create a single date (yyyy-mm-dd) from it. -#' If two date variables are passed to `make_messydate()`, -#' function will create a range of dates from it (yyyy-mm-dd..yyyy-mm-dd). -#' If one date variable is passed to `make_messydate()`, -#' function defaults to `as_messydate()`. -#' @importFrom purrr map pmap_chr -#' @examples -#' make_messydate("2010", "10", "10") -#' @export -make_messydate <- function(..., resequence = FALSE) { - dots <- list(...) - if (length(dots) == 1) { - dots <- do.call(as.character, dots) - dates <- unlist(dots) - } else if (length(dots) == 2) { - dots <- purrr::map(dots, as.character) - dates <- unlist(purrr::pmap_chr(dots, paste, sep = "..")) - dates <- gsub("NA..NA", "NA", dates) - } else if (length(dots) == 3) { - dots <- purrr::map(dots, as.character) - dates <- unlist(purrr::pmap_chr(dots, paste, sep = "-")) - dates <- gsub("NA-NA-NA", "NA", dates) - } else stop("make_messydate() takes one variable (yyyy-mm-dd), - two variables (yyyy-mm-dd, yyyy-mm-dd), or three variables (yyyy, mm, dd).") - as_messydate(dates, resequence) -} - stri_squish <- function(charvec){ stringi::stri_trim_both(stringi::stri_replace_all_regex(charvec, "\\s+", " ")) } diff --git a/man/from_messydate.Rd b/man/coerce_from.Rd similarity index 97% rename from man/from_messydate.Rd rename to man/coerce_from.Rd index 55ed3a42..44f2db69 100644 --- a/man/from_messydate.Rd +++ b/man/coerce_from.Rd @@ -1,7 +1,7 @@ % Generated by roxygen2: do not edit by hand % Please edit documentation in R/coerce_from_messydate.R -\name{from_messydate} -\alias{from_messydate} +\name{coerce_from} +\alias{coerce_from} \alias{as.Date.mdate} \alias{as.POSIXct.mdate} \alias{as.POSIXlt.mdate} diff --git a/man/messydate.Rd b/man/coerce_to.Rd similarity index 85% rename from man/messydate.Rd rename to man/coerce_to.Rd index 3b62dbcd..43b78265 100644 --- a/man/messydate.Rd +++ b/man/coerce_to.Rd @@ -1,7 +1,7 @@ % Generated by roxygen2: do not edit by hand % Please edit documentation in R/coerce_to_messydate.R -\name{messydate} -\alias{messydate} +\name{coerce_to} +\alias{coerce_to} \alias{as_messydate} \alias{as_messydate.Date} \alias{as_messydate.POSIXct} @@ -10,7 +10,6 @@ \alias{as_messydate.numeric} \alias{as_messydate.list} \alias{mdate} -\alias{make_messydate} \title{Coercion from regular date classes to mdate} \usage{ as_messydate(x, resequence = FALSE) @@ -28,8 +27,6 @@ as_messydate(x, resequence = FALSE) \method{as_messydate}{list}(x, resequence = FALSE) mdate(x, resequence = FALSE) - -make_messydate(..., resequence = FALSE) } \arguments{ \item{x}{A scalar or vector of a class that can be coerced into \code{mdate}, @@ -54,9 +51,6 @@ If 'interactive', it prompts users to select the existing component order of ambiguous dates, based on which the date is reordered into YYYY-MM-DD format and further completed to YYYY-MM-DD format if they choose to do so.} - -\item{...}{One (yyyy-mm-dd), two (yyyy-mm-dd, yyyy-mm-dd), -or three (yyyy, mm, dd) variables.} } \value{ A \code{mdate} class object @@ -69,14 +63,6 @@ there is also a direct method for converting text or character strings to \code{ The function can also extract dates from text, though this is a work-in-progress and currently only works in English. } -\details{ -If three date variables are passed to \code{make_messydate()}, -function will create a single date (yyyy-mm-dd) from it. -If two date variables are passed to \code{make_messydate()}, -function will create a range of dates from it (yyyy-mm-dd..yyyy-mm-dd). -If one date variable is passed to \code{make_messydate()}, -function defaults to \code{as_messydate()}. -} \section{Functions}{ \itemize{ \item \code{as_messydate()}: Core \code{mdate} class coercion function @@ -94,8 +80,6 @@ function defaults to \code{as_messydate()}. \item \code{as_messydate(list)}: Coerce list date objects to the most concise representation of \code{mdate} class -\item \code{make_messydate()}: Composes \code{mdate} from multiple variables - }} \examples{ as_messydate("2021") @@ -117,5 +101,4 @@ as_messydate(c("010221", "01022021"), resequence = "dmy") as_messydate(list(c("2012-06-01", "2012-06-02", "2012-06-03"))) as_messydate(list(c("2012-06-01", "2012-06-02", "2012-06-03", "{2012-06-01, 2012-06-02, 2012-06-03}", "2012-06-01", "2012-06-03"))) -make_messydate("2010", "10", "10") } diff --git a/pkgdown/_pkgdown.yml b/pkgdown/_pkgdown.yml index 386c7007..4f2e0ba4 100644 --- a/pkgdown/_pkgdown.yml +++ b/pkgdown/_pkgdown.yml @@ -38,10 +38,9 @@ reference: - title: "Coerce to" desc: "These functions construct and/or coerce dates to the `mdate` class:" contents: - - class - - messydate + - starts_with("class_") - component_annotate - - duration_class + - coerce_to - title: "Coerce from" desc: "These functions coerce dates from the `mdate` class into a single `Date`:" contents: diff --git a/tests/testthat/test-coerce-from.R b/tests/testthat/test-coerce_from.R similarity index 100% rename from tests/testthat/test-coerce-from.R rename to tests/testthat/test-coerce_from.R diff --git a/tests/testthat/test-coerce-to.R b/tests/testthat/test-coerce_to.R similarity index 100% rename from tests/testthat/test-coerce-to.R rename to tests/testthat/test-coerce_to.R From 736a6dfaba8f517ce02da9b6217bdac103315134 Mon Sep 17 00:00:00 2001 From: James Hollway Date: Fri, 28 Feb 2025 10:15:19 +0100 Subject: [PATCH 06/27] Sequence method now gets own script --- R/convert_sequence.R | 63 +++++++++++++++++++ ...{messy-sequence.Rd => convert_sequence.Rd} | 6 +- 2 files changed, 66 insertions(+), 3 deletions(-) create mode 100644 R/convert_sequence.R rename man/{messy-sequence.Rd => convert_sequence.Rd} (88%) diff --git a/R/convert_sequence.R b/R/convert_sequence.R new file mode 100644 index 00000000..54cc6501 --- /dev/null +++ b/R/convert_sequence.R @@ -0,0 +1,63 @@ +#' Sequence method for messydates +#' @description +#' This function provides a sequence (`seq()`) method for messydates. +#' This can be used with ranges or unspecified dates, +#' and is particularly useful for defining a sequence of dates +#' before the common era or between eras. +#' @name convert_sequence +#' @param from A messydate or range. +#' If 'from' is a range and 'to' is not specified, +#' 'from' will be the minimum of the range and 'to' will be maximum. +#' @param to A messydate. +#' @param by Increment of the sequence. By default "days". +#' @param ... Arguments passed to or from methods. +#' @examples +#' seq(mdate("-0001-12-20"), mdate("0001-01-10")) +#' @export +seq.mdate <- function(from, to, by = "days", ...) { + + if(missing(to) & !is_precise(from)){ + to <- max(from) + from <- min(from) + } + + # straight forward sequence + if(!any(is_bce(c(from, to)))){ + seq(as.Date(from), as.Date(to), by = by) + } else { + + fromp <- as.Date(stringi::stri_replace_first_regex(from, "^-", "")) + # sequence before common era + if(is_bce(to)){ + top <- as.Date(stringi::stri_replace_first_regex(to, "^-", "")) + .neg_seqs(fromp, top, by = by) + } else { + # sequence between eras + zero_padding(c(.neg_seqs(fromp, as.Date("0001-12-31"), by = by), + as.character(seq(as.Date("0001-01-01"), as.Date(to), by = by)))) + # zero_padding(c(rev(paste0("-", seq(as.Date("0001-01-01"), fromp, by = by))), + # as.character(seq(as.Date("0001-01-01"), as.Date(to), by = by)))) + } + } +} + +.neg_seqs <- function(fromp, top, by = "days"){ + if(year(fromp) == year(top)){ + zero_padding(paste0("-", seq(min(c(fromp, top)), + max(c(fromp,top)), by = by))) + } else { + strt <- max(c(fromp, top)) + ends <- min(c(fromp, top)) + strt_yr <- year(strt) + strt_sq <- seq(as.Date(strt), as.Date(paste0(strt_yr,"-12-31")), by = by) + ends_yr <- year(ends) + ends_sq <- seq(as.Date(paste0(ends_yr, "-01-01")), as.Date(ends), by = by) + if(strt_yr - ends_yr > 1){ + mids_sq <- seq(as.Date(paste0(ends_yr+1, "-01-01")), + as.Date(paste0(strt_yr-1,"-12-31")), by = by) + if(length(unique(year(mids_sq)))>1) + mids_sq <- mids_sq[order(year(mids_sq), decreasing = TRUE)] + zero_padding(paste0("-", c(strt_sq, mids_sq, ends_sq))) + } else zero_padding(paste0("-", c(strt_sq, ends_sq))) + } +} diff --git a/man/messy-sequence.Rd b/man/convert_sequence.Rd similarity index 88% rename from man/messy-sequence.Rd rename to man/convert_sequence.Rd index 72c2fcf6..eb68a8a8 100644 --- a/man/messy-sequence.Rd +++ b/man/convert_sequence.Rd @@ -1,7 +1,7 @@ % Generated by roxygen2: do not edit by hand -% Please edit documentation in R/class.R -\name{messy-sequence} -\alias{messy-sequence} +% Please edit documentation in R/convert_sequence.R +\name{convert_sequence} +\alias{convert_sequence} \alias{seq.mdate} \title{Sequence method for messydates} \usage{ From 9cf137c839351100ba5f058d97ef40e2a13b29db Mon Sep 17 00:00:00 2001 From: James Hollway Date: Fri, 28 Feb 2025 10:15:41 +0100 Subject: [PATCH 07/27] Consolidated operate_ scripts and tests --- R/{set.R => operate_set.R} | 22 ++++----- R/{operate_logical.R => operate_statements.R} | 40 +++++++++------- man/operate_operators.Rd | 47 +++++++++++++++++++ man/{set.Rd => operate_set.Rd} | 6 +-- ...logical_tests.Rd => operate_statements.Rd} | 43 ++--------------- pkgdown/_pkgdown.yml | 7 +-- ...l-operators.R => test-operate_operators.R} | 0 .../{test-set.R => test-operate_set.R} | 0 ...statements.R => test-operate_statements.R} | 0 9 files changed, 91 insertions(+), 74 deletions(-) rename R/{set.R => operate_set.R} (68%) rename R/{operate_logical.R => operate_statements.R} (82%) create mode 100644 man/operate_operators.Rd rename man/{set.Rd => operate_set.Rd} (92%) rename man/{logical_tests.Rd => operate_statements.Rd} (64%) rename tests/testthat/{test-operate_logical-operators.R => test-operate_operators.R} (100%) rename tests/testthat/{test-set.R => test-operate_set.R} (100%) rename tests/testthat/{test-operate_logical-statements.R => test-operate_statements.R} (100%) diff --git a/R/set.R b/R/operate_set.R similarity index 68% rename from R/set.R rename to R/operate_set.R index c3de04e6..0e87f27d 100644 --- a/R/set.R +++ b/R/operate_set.R @@ -1,20 +1,20 @@ #' Set operations for messy dates -#' -#' Performs intersection (`md_intersect()`) and union (`md_union()`) on, -#' inter alia, messy date class objects. -#' For a more typical 'join' that retains all elements, even if duplicated, -#' please use `md_multiset`. -#' @name set +#' @description +#' Performs intersection (`md_intersect()`) and union (`md_union()`) on, +#' inter alia, messy date class objects. +#' For a more typical 'join' that retains all elements, even if duplicated, +#' please use `md_multiset`. +#' @name operate_set #' @param e1,e2 Messy date or other class objects #' @return A vector of the same mode for `intersect`, -#' or a common mode for union. +#' or a common mode for union. NULL -#' @rdname set +#' @rdname operate_set #' @export `%intersect%` <- function(e1, e2) UseMethod("%intersect%") -#' @describeIn set Find intersection of sets of messy dates +#' @describeIn operate_set Find intersection of sets of messy dates #' @examples #' as_messydate("2012-01-01..2012-01-20") %intersect% as_messydate("2012-01") #' @export @@ -29,11 +29,11 @@ evalqOnLoad({ registerS3method("%intersect%", "POSIXt", `%intersect%.mdate`) }) -#' @rdname set +#' @rdname operate_set #' @export `%union%` <- function(e1, e2) UseMethod("%union%") -#' @describeIn set Find intersection of sets of messy dates +#' @describeIn operate_set Find intersection of sets of messy dates #' @examples #' as_messydate("2012-01-01..2012-01-20") %union% as_messydate("2012-01") #' @export diff --git a/R/operate_logical.R b/R/operate_statements.R similarity index 82% rename from R/operate_logical.R rename to R/operate_statements.R index ae10ab03..ebb84ac0 100644 --- a/R/operate_logical.R +++ b/R/operate_statements.R @@ -1,12 +1,14 @@ -#' Logical tests on messy dates -#' -#' These functions provide various logical tests for messy date objects. -#' @name logical_tests +# Statements #### + +#' Logical statements on messy dates +#' @description +#' These functions provide various logical statements about messy date objects. +#' @name operate_statements #' @param x,y,e1,e2 `mdate` or other class objects #' @return A logical vector the same length as the `mdate` passed. NULL -#' @describeIn logical_tests tests whether the object inherits the `mdate` class. +#' @describeIn operate_statements tests whether the object inherits the `mdate` class. #' If more rigorous validation is required, see `validate_messydate()`. #' @examples #' is_messydate(as_messydate("2012-01-01")) @@ -16,7 +18,7 @@ is_messydate <- function(x) { inherits(x, "mdate") } -#' @describeIn logical_tests tests whether there is any intersection between +#' @describeIn operate_statements tests whether there is any intersection between #' two messy dates, leveraging `intersect()`. #' @examples #' is_intersecting(as_messydate("2012-01"), @@ -28,7 +30,7 @@ is_intersecting <- function(x, y) { length(intersect(unlist(expand(x)), unlist(expand(y)))) > 0 } -#' @describeIn logical_tests tests whether one or more messy date can be found +#' @describeIn operate_statements tests whether one or more messy date can be found #' within a messy date range or set. #' @examples #' is_subset(as_messydate("2012-01-01"), as_messydate("2012-01")) @@ -41,7 +43,7 @@ is_subset <- function(x, y) { any(is.element(x, y)) } -#' @describeIn logical_tests tests whether two dates contain similar components. +#' @describeIn operate_statements tests whether two dates contain similar components. #' This can be useful for identifying dates that may be typos of one another. #' @examples #' is_similar(as_messydate("2012-06-02"), as_messydate("2012-02-06")) @@ -51,7 +53,7 @@ is_similar <- function(x, y) { year(x) == year(y) & month(x) == day(y) & day(x) == month(y) } -#' @describeIn logical_tests tests whether a date is precise (i.e. an 8 digit date). +#' @describeIn operate_statements tests whether a date is precise (i.e. an 8 digit date). #' Non-precise dates contain markers that they are approximate (i.e. ~), #' unreliable (i.e. ?), are incomplete dates (i.e. year only), #' or date ranges and sets. @@ -63,7 +65,7 @@ is_precise <- function(x) { |^-[:digit:]{4}-[:digit:]{2}-[:digit:]{2}$") } -#' @describeIn logical_tests tests whether a date is uncertain (i.e. contains ?). +#' @describeIn operate_statements tests whether a date is uncertain (i.e. contains ?). #' @examples #' is_uncertain(as_messydate(c("2012-06-02", "2012-06-02?"))) #' @export @@ -71,7 +73,7 @@ is_uncertain <- function(x) { stringi::stri_detect_regex(x, "\\?|\\%") } -#' @describeIn logical_tests tests whether a date is approximate (i.e. contains ~). +#' @describeIn operate_statements tests whether a date is approximate (i.e. contains ~). #' @examples #' is_approximate(as_messydate(c("2012-06-02~", "2012-06-02"))) #' @export @@ -79,7 +81,7 @@ is_approximate <- function(x) { stringi::stri_detect_regex(x, "\\~|\\%") } -#' @describeIn logical_tests tests whether one or more messy dates are found +#' @describeIn operate_statements tests whether one or more messy dates are found #' before the common era. #' @examples #' is_bce(as_messydate(c("2012-06-02", "-2012-06-02"))) @@ -88,7 +90,13 @@ is_bce <- function(x) { stringi::stri_detect_regex(x, "^-") } -#' @describeIn logical_tests tests whether the dates in the first vector precede +# Operators #### + +#' Logical operations on messy dates +#' @name operate_operators +NULL + +#' @describeIn operate_operators tests whether the dates in the first vector precede #' the dates in the second vector. #' Returns `NA` when the date order can't be determined. #' @examples @@ -150,7 +158,7 @@ numeric_time_ranges <- function(e1, e2) { ) } -#' @describeIn logical_tests tests whether the dates in the first vector +#' @describeIn operate_operators tests whether the dates in the first vector #' succeed the dates in the second vector. #' Returns `NA` when the date order can't be determined. #' @export @@ -171,7 +179,7 @@ evalqOnLoad({ registerS3method(">", "POSIXt", `>.mdate`) }) -#' @describeIn logical_tests tests whether the dates in the first vector are +#' @describeIn operate_operators tests whether the dates in the first vector are #' equal to or precede the dates in the second vector. #' Returns `NA` when the date order can't be determined. #' @export @@ -190,7 +198,7 @@ evalqOnLoad({ registerS3method("<=", "POSIXt", `<=.mdate`) }) -#' @describeIn logical_tests tests whether the dates in the first vector are equal to +#' @describeIn operate_operators tests whether the dates in the first vector are equal to #' or succeed the dates in the second vector. #' Returns `NA` when the date order can't be determined. #' @export diff --git a/man/operate_operators.Rd b/man/operate_operators.Rd new file mode 100644 index 00000000..b8c1b04a --- /dev/null +++ b/man/operate_operators.Rd @@ -0,0 +1,47 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/operate_statements.R +\name{operate_operators} +\alias{operate_operators} +\alias{<.mdate} +\alias{>.mdate} +\alias{<=.mdate} +\alias{>=.mdate} +\title{Logical operations on messy dates} +\usage{ +\method{<}{mdate}(e1, e2) + +\method{>}{mdate}(e1, e2) + +\method{<=}{mdate}(e1, e2) + +\method{>=}{mdate}(e1, e2) +} +\description{ +Logical operations on messy dates +} +\section{Functions}{ +\itemize{ +\item \code{ < }: tests whether the dates in the first vector precede +the dates in the second vector. +Returns \code{NA} when the date order can't be determined. + +\item \code{ > }: tests whether the dates in the first vector +succeed the dates in the second vector. +Returns \code{NA} when the date order can't be determined. + +\item \code{ <= }: tests whether the dates in the first vector are +equal to or precede the dates in the second vector. +Returns \code{NA} when the date order can't be determined. + +\item \code{ >= }: tests whether the dates in the first vector are equal to +or succeed the dates in the second vector. +Returns \code{NA} when the date order can't be determined. + +}} +\examples{ +as_messydate("2012-06-02") > as.Date("2012-06-01") # TRUE +# 2012-06-XX could mean 2012-06-03, so unknown if it comes before 2012-06-02 +as_messydate("2012-06-XX") < as.Date("2012-06-02") # NA +# But 2012-06-XX cannot be before 2012-06-01 +as_messydate("2012-06-XX") >= as.Date("2012-06-01") # TRUE +} diff --git a/man/set.Rd b/man/operate_set.Rd similarity index 92% rename from man/set.Rd rename to man/operate_set.Rd index cb4678cc..8ce6dbab 100644 --- a/man/set.Rd +++ b/man/operate_set.Rd @@ -1,7 +1,7 @@ % Generated by roxygen2: do not edit by hand -% Please edit documentation in R/set.R -\name{set} -\alias{set} +% Please edit documentation in R/operate_set.R +\name{operate_set} +\alias{operate_set} \alias{\%intersect\%} \alias{\%intersect\%.mdate} \alias{\%union\%} diff --git a/man/logical_tests.Rd b/man/operate_statements.Rd similarity index 64% rename from man/logical_tests.Rd rename to man/operate_statements.Rd index 08bc7ade..4af7564e 100644 --- a/man/logical_tests.Rd +++ b/man/operate_statements.Rd @@ -1,7 +1,7 @@ % Generated by roxygen2: do not edit by hand -% Please edit documentation in R/operate_logical.R -\name{logical_tests} -\alias{logical_tests} +% Please edit documentation in R/operate_statements.R +\name{operate_statements} +\alias{operate_statements} \alias{is_messydate} \alias{is_intersecting} \alias{is_subset} @@ -10,11 +10,7 @@ \alias{is_uncertain} \alias{is_approximate} \alias{is_bce} -\alias{<.mdate} -\alias{>.mdate} -\alias{<=.mdate} -\alias{>=.mdate} -\title{Logical tests on messy dates} +\title{Logical statements on messy dates} \usage{ is_messydate(x) @@ -31,14 +27,6 @@ is_uncertain(x) is_approximate(x) is_bce(x) - -\method{<}{mdate}(e1, e2) - -\method{>}{mdate}(e1, e2) - -\method{<=}{mdate}(e1, e2) - -\method{>=}{mdate}(e1, e2) } \arguments{ \item{x, y, e1, e2}{\code{mdate} or other class objects} @@ -47,7 +35,7 @@ is_bce(x) A logical vector the same length as the \code{mdate} passed. } \description{ -These functions provide various logical tests for messy date objects. +These functions provide various logical statements about messy date objects. } \section{Functions}{ \itemize{ @@ -75,22 +63,6 @@ or date ranges and sets. \item \code{is_bce()}: tests whether one or more messy dates are found before the common era. -\item \code{ < }: tests whether the dates in the first vector precede -the dates in the second vector. -Returns \code{NA} when the date order can't be determined. - -\item \code{ > }: tests whether the dates in the first vector -succeed the dates in the second vector. -Returns \code{NA} when the date order can't be determined. - -\item \code{ <= }: tests whether the dates in the first vector are -equal to or precede the dates in the second vector. -Returns \code{NA} when the date order can't be determined. - -\item \code{ >= }: tests whether the dates in the first vector are equal to -or succeed the dates in the second vector. -Returns \code{NA} when the date order can't be determined. - }} \examples{ is_messydate(as_messydate("2012-01-01")) @@ -108,9 +80,4 @@ is_precise(as_messydate(c("2012-06-02", "2012-06"))) is_uncertain(as_messydate(c("2012-06-02", "2012-06-02?"))) is_approximate(as_messydate(c("2012-06-02~", "2012-06-02"))) is_bce(as_messydate(c("2012-06-02", "-2012-06-02"))) -as_messydate("2012-06-02") > as.Date("2012-06-01") # TRUE -# 2012-06-XX could mean 2012-06-03, so unknown if it comes before 2012-06-02 -as_messydate("2012-06-XX") < as.Date("2012-06-02") # NA -# But 2012-06-XX cannot be before 2012-06-01 -as_messydate("2012-06-XX") >= as.Date("2012-06-01") # TRUE } diff --git a/pkgdown/_pkgdown.yml b/pkgdown/_pkgdown.yml index 4f2e0ba4..1788d562 100644 --- a/pkgdown/_pkgdown.yml +++ b/pkgdown/_pkgdown.yml @@ -54,13 +54,8 @@ reference: - title: "Operations" desc: "These methods help operate on objects of the `mdate` class:" contents: - - operate - - set - - logical_tests - - is_messydate + - starts_with("operate_") - component_extract - - proportional - - messy-sequence - title: "Data" desc: "Working with 'messy' data:" contents: diff --git a/tests/testthat/test-operate_logical-operators.R b/tests/testthat/test-operate_operators.R similarity index 100% rename from tests/testthat/test-operate_logical-operators.R rename to tests/testthat/test-operate_operators.R diff --git a/tests/testthat/test-set.R b/tests/testthat/test-operate_set.R similarity index 100% rename from tests/testthat/test-set.R rename to tests/testthat/test-operate_set.R diff --git a/tests/testthat/test-operate_logical-statements.R b/tests/testthat/test-operate_statements.R similarity index 100% rename from tests/testthat/test-operate_logical-statements.R rename to tests/testthat/test-operate_statements.R From 0a177340a760353b051eba1876fd6d2b89f95301 Mon Sep 17 00:00:00 2001 From: James Hollway Date: Sat, 1 Mar 2025 08:03:09 -0600 Subject: [PATCH 08/27] Added vmin.mdate() and vmax.mdate() instead of recursive argument on min.mdate() and max.mdate() (pity pmin() etc are not S3 methods) --- R/coerce_resolve.R | 95 +++++++++++++++++++++++++++++----------------- 1 file changed, 61 insertions(+), 34 deletions(-) diff --git a/R/coerce_resolve.R b/R/coerce_resolve.R index 6d65a3f0..50baf20e 100644 --- a/R/coerce_resolve.R +++ b/R/coerce_resolve.R @@ -1,13 +1,13 @@ #' Resolves messy dates into a single value -#' -#' This collection of S3 methods 'resolve' messy dates into a single date -#' according to some explicit bias, -#' such as returning the minimum or maximum date, -#' the mean, median, or modal date, -#' or a random date from among the possible resolutions for each messy date. -#' If the date is not 'messy' (i.e. has no annotations) -#' then just that precise date is returned. -#' This can be useful for various descriptive or inferential projects. +#' @description +#' This collection of S3 methods 'resolve' messy dates into a single date +#' according to some explicit bias, +#' such as returning the minimum or maximum date, +#' the mean, median, or modal date, +#' or a random date from among the possible resolutions for each messy date. +#' If the date is not 'messy' (i.e. has no annotations) +#' then just that precise date is returned. +#' This can be useful for various descriptive or inferential projects. #' @param ... a mdate object #' @param na.rm Should NAs be removed? True by default. #' @importFrom stringi stri_detect_regex stri_replace_all_regex @@ -19,34 +19,41 @@ #' d #' @name coerce_resolve NULL -#> NULL #' @rdname coerce_resolve -#' @param recursive If recursive = TRUE, then the dates will be resolved -#' to a single date. If recursive = FALSE, then the dates will be resolved -#' to a vector the length of the original vector. -#' By default FALSE. +#' @export +vmin <- function(..., na.rm = FALSE) UseMethod("vmin") + +#' @rdname coerce_resolve +#' @examples +#' vmin(d) +#' @export +vmin.mdate <- function(..., na.rm = TRUE){ + d <- list(...)[[1]] + dates <- d + if(na.rm) dates <- na.omit(d) + dates <- stringi::stri_replace_all_regex(dates, "~|\\?", "") + dates <- .remove_post(dates) + dates <- .replace_earliest(dates) + mdate(dates) +} + +#' @rdname coerce_resolve #' @examples #' min(d) #' @export -min.mdate <- function(..., na.rm = TRUE, recursive = FALSE){ +min.mdate <- function(..., na.rm = TRUE){ d <- list(...)[[1]] - dates <- stringi::stri_replace_all_regex(d, "~|\\?", "") + dates <- d + if(na.rm) dates <- na.omit(d) + dates <- stringi::stri_replace_all_regex(dates, "~|\\?", "") dates <- .remove_post(dates) dates <- .replace_earliest(dates) dates <- mdate(dates) - # if(any(stringi::stri_detect_regex(dates, "^~"))) - # dates <- expand_approximate_years(dates, approx_range = approx_range) - # if(any(stringi::stri_detect_regex(dates, "[:digit:]~"))){ - # dates <- expand_approximate_months(dates, approx_range = approx_range) - # dates <- expand_approximate_days(dates, approx_range = approx_range) - # } - if(recursive){ - if(any(is_bce(dates))) - dates[is_bce(dates)][order(as.character(dates[is_bce(dates)]), - decreasing = TRUE)][1] else - dates[order(as.character(dates))==1] - } else dates + if(any(is_bce(dates))) + dates[is_bce(dates)][order(as.character(dates[is_bce(dates)]), + decreasing = TRUE)][1] else + dates[order(as.character(dates))==1] } .remove_post <- function(dates){ @@ -68,11 +75,34 @@ min.mdate <- function(..., na.rm = TRUE, recursive = FALSE){ dates } +#' @rdname coerce_resolve +#' @export +vmax <- function(..., na.rm = FALSE) UseMethod("vmax") + #' @rdname coerce_resolve #' @examples +#' vmax(d) +#' @export +vmax.mdate <- function(..., na.rm = TRUE){ + d <- list(...)[[1]] + dates <- d + if(na.rm) dates <- na.omit(d) + dates <- stringi::stri_replace_all_regex(dates, "~|\\?", "") + dates <- unspecified_months(dates) + dates <- .remove_pre(dates) + dates <- .replace_latest(dates) + mdate(dates) +} + +#' @rdname coerce_resolve +#' @param recursive If recursive = TRUE, then the dates will be resolved +#' to a single date. If recursive = FALSE, then the dates will be resolved +#' to a vector the length of the original vector. +#' By default FALSE. +#' @examples #' max(d) #' @export -max.mdate <- function(..., na.rm = TRUE, recursive = FALSE) { +max.mdate <- function(..., na.rm = TRUE) { d <- list(...)[[1]] dates <- stringi::stri_replace_all_regex(d, "~|\\?", "") @@ -80,13 +110,10 @@ max.mdate <- function(..., na.rm = TRUE, recursive = FALSE) { dates <- .remove_pre(dates) dates <- .replace_latest(dates) dates <- mdate(dates) - - if(recursive){ - if(all(is_bce(dates), na.rm = TRUE)) - dates[order(dates, decreasing = TRUE)][1] else + if(all(is_bce(dates), na.rm = TRUE)) + dates[order(dates, decreasing = TRUE)][1] else dates[!is_bce(dates)][order(as.character(dates[!is_bce(dates)]), decreasing = TRUE)][1] - } else dates } From ceb214b40967778f9f2435f8dfe7b87714c096e0 Mon Sep 17 00:00:00 2001 From: James Hollway Date: Sat, 1 Mar 2025 08:03:49 -0600 Subject: [PATCH 09/27] Fixed precision to return a vector of precisions, users can sum if they wish --- R/component_extract.R | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/R/component_extract.R b/R/component_extract.R index 5c71e578..aa06a327 100644 --- a/R/component_extract.R +++ b/R/component_extract.R @@ -50,7 +50,6 @@ day <- function(x) { #' precision(as_messydate(c("2012-02-03","2012","2012-02"))) #' @export precision <- function(x) { - x <- expand(x) - out <- sum(lengths(x)) - out + out <- expand(x) + lengths(out) } From 40bc3dd894d87270bf06c44411a62113269844e5 Mon Sep 17 00:00:00 2001 From: James Hollway Date: Sat, 1 Mar 2025 08:04:37 -0600 Subject: [PATCH 10/27] Dropped median.mdate(), modal.mdate(), and mean.mdate() for now until faster solutions can be found --- R/coerce_resolve.R | 176 +++++++++++++-------------- man/coerce_resolve.Rd | 39 +++--- tests/testthat/test-coerce_resolve.R | 34 +++--- 3 files changed, 122 insertions(+), 127 deletions(-) diff --git a/R/coerce_resolve.R b/R/coerce_resolve.R index 50baf20e..7f9df80a 100644 --- a/R/coerce_resolve.R +++ b/R/coerce_resolve.R @@ -132,94 +132,94 @@ max.mdate <- function(..., na.rm = TRUE) { dates } -#' @rdname coerce_resolve -#' @importFrom stats median -#' @examples -#' median(d) -#' @export -median.mdate <- function(..., na.rm = TRUE, recursive = FALSE) { - - x <- as.list(...) - y <- unlist(lapply(x, function(y) ifelse(!is_precise(y), expand(y), y)), - recursive = recursive) - if(recursive){ - if (length(y) %% 2 == 0) { - as.character(median(unlist(y[-1]), na.rm = na.rm)) - } - else{ - as.character(median(y, na.rm = na.rm)) - } - } else { - unlist(lapply(y, function(z) { - if (length(z) %% 2 == 0) { - z <- unlist(z[-1]) - z <- as.character(median(z, na.rm = na.rm)) - z - } - else{ - z <- as.character(median(z, na.rm = na.rm)) - z - } - }), recursive = FALSE) - } -} - -#' @rdname coerce_resolve -#' @param trim the fraction (0 to 0.5) of observations to be trimmed -#' from each end of x before the mean is computed. -#' Values of trim outside that range are taken as the nearest endpoint. -#' @importFrom lubridate as_date -#' @examples -#' mean(d) -#' @export -mean.mdate <- function(..., trim = 0, na.rm = TRUE, recursive = FALSE) { - x <- as.list(...) - y <- unlist(lapply(x, function(y) ifelse(!is_precise(y), expand(y), y)), - recursive = recursive) - if(recursive){ - if (length(y) > 1 & stringi::stri_detect_regex(y[1], "^-", negate = TRUE)) { - y <- as.character(mean(as.Date(y), trim = 0, na.rm = TRUE)) - } - if (length(y) > 1 & stringi::stri_detect_regex(y[1], "^-")) { - y <- paste0("-", as.character(mean(lubridate::as_date(y), - trim = 0, na.rm = TRUE))) - y <- zero_padding(y) - } - y - } else { - unlist(lapply(y, function(x) { - if (length(x) > 1 & stringi::stri_detect_regex(x[1], "^-", negate = TRUE)) { - x <- as.character(mean(as.Date(x), trim = 0, na.rm = TRUE)) - } - if (length(x) > 1 & stringi::stri_detect_regex(x[1], "^-")) { - x <- paste0("-", as.character(mean(lubridate::as_date(x), - trim = 0, na.rm = TRUE))) - x <- zero_padding(x) - } - x - }), recursive = FALSE) - } -} - -#' @rdname coerce_resolve -#' @export -modal <- function(..., na.rm = FALSE, recursive = FALSE) UseMethod("modal") - -#' @rdname coerce_resolve -#' @examples -#' modal(d) -#' @export -modal.mdate <- function(..., na.rm = TRUE, recursive = FALSE) { - - d <- list(...)[[1]] - getmode <- function(v) { - uniqv <- unique(v) - uniqv[which.max(tabulate(match(v, uniqv)))] - } - d <- purrr::map_chr(expand(d), function(y) getmode(y)) - if(recursive) d <- as.character(getmode(d)) - d -} +# #' @rdname coerce_resolve +# #' @importFrom stats median +# #' @examples +# #' median(d) +# #' @export +# median.mdate <- function(..., na.rm = TRUE, recursive = FALSE) { +# +# x <- as.list(...) +# y <- unlist(lapply(x, function(y) ifelse(!is_precise(y), expand(y), y)), +# recursive = recursive) +# if(recursive){ +# if (length(y) %% 2 == 0) { +# as.character(median(unlist(y[-1]), na.rm = na.rm)) +# } +# else{ +# as.character(median(y, na.rm = na.rm)) +# } +# } else { +# unlist(lapply(y, function(z) { +# if (length(z) %% 2 == 0) { +# z <- unlist(z[-1]) +# z <- as.character(median(z, na.rm = na.rm)) +# z +# } +# else{ +# z <- as.character(median(z, na.rm = na.rm)) +# z +# } +# }), recursive = FALSE) +# } +# } + +# #' @rdname coerce_resolve +# #' @param trim the fraction (0 to 0.5) of observations to be trimmed +# #' from each end of x before the mean is computed. +# #' Values of trim outside that range are taken as the nearest endpoint. +# #' @importFrom lubridate as_date +# #' @examples +# #' mean(d) +# #' @export +# mean.mdate <- function(..., trim = 0, na.rm = TRUE, recursive = FALSE) { +# x <- as.list(...) +# y <- unlist(lapply(x, function(y) ifelse(!is_precise(y), expand(y), y)), +# recursive = recursive) +# if(recursive){ +# if (length(y) > 1 & stringi::stri_detect_regex(y[1], "^-", negate = TRUE)) { +# y <- as.character(mean(as.Date(y), trim = 0, na.rm = TRUE)) +# } +# if (length(y) > 1 & stringi::stri_detect_regex(y[1], "^-")) { +# y <- paste0("-", as.character(mean(lubridate::as_date(y), +# trim = 0, na.rm = TRUE))) +# y <- zero_padding(y) +# } +# y +# } else { +# unlist(lapply(y, function(x) { +# if (length(x) > 1 & stringi::stri_detect_regex(x[1], "^-", negate = TRUE)) { +# x <- as.character(mean(as.Date(x), trim = 0, na.rm = TRUE)) +# } +# if (length(x) > 1 & stringi::stri_detect_regex(x[1], "^-")) { +# x <- paste0("-", as.character(mean(lubridate::as_date(x), +# trim = 0, na.rm = TRUE))) +# x <- zero_padding(x) +# } +# x +# }), recursive = FALSE) +# } +# } + +# #' @rdname coerce_resolve +# #' @export +# modal <- function(..., na.rm = FALSE, recursive = FALSE) UseMethod("modal") + +# #' @rdname coerce_resolve +# #' @examples +# #' modal(d) +# #' @export +# modal.mdate <- function(..., na.rm = TRUE, recursive = FALSE) { +# +# d <- list(...)[[1]] +# getmode <- function(v) { +# uniqv <- unique(v) +# uniqv[which.max(tabulate(match(v, uniqv)))] +# } +# d <- purrr::map_chr(expand(d), function(y) getmode(y)) +# if(recursive) d <- as.character(getmode(d)) +# d +# } #' @rdname coerce_resolve #' @param size a non-negative integer giving the number of items to choose. diff --git a/man/coerce_resolve.Rd b/man/coerce_resolve.Rd index 0144fddd..abfb6902 100644 --- a/man/coerce_resolve.Rd +++ b/man/coerce_resolve.Rd @@ -2,27 +2,27 @@ % Please edit documentation in R/coerce_resolve.R \name{coerce_resolve} \alias{coerce_resolve} +\alias{vmin} +\alias{vmin.mdate} \alias{min.mdate} +\alias{vmax} +\alias{vmax.mdate} \alias{max.mdate} -\alias{median.mdate} -\alias{mean.mdate} -\alias{modal} -\alias{modal.mdate} \alias{random} \alias{random.mdate} \title{Resolves messy dates into a single value} \usage{ -\method{min}{mdate}(..., na.rm = TRUE, recursive = FALSE) +vmin(..., na.rm = FALSE) -\method{max}{mdate}(..., na.rm = TRUE, recursive = FALSE) +\method{vmin}{mdate}(..., na.rm = TRUE) -\method{median}{mdate}(..., na.rm = TRUE, recursive = FALSE) +\method{min}{mdate}(..., na.rm = TRUE) -\method{mean}{mdate}(..., trim = 0, na.rm = TRUE, recursive = FALSE) +vmax(..., na.rm = FALSE) -modal(..., na.rm = FALSE, recursive = FALSE) +\method{vmax}{mdate}(..., na.rm = TRUE) -\method{modal}{mdate}(..., na.rm = TRUE, recursive = FALSE) +\method{max}{mdate}(..., na.rm = TRUE) random(..., size, replace = FALSE, prob = NULL, recursive = FALSE) @@ -33,21 +33,17 @@ random(..., size, replace = FALSE, prob = NULL, recursive = FALSE) \item{na.rm}{Should NAs be removed? True by default.} -\item{recursive}{If recursive = TRUE, then the dates will be resolved -to a single date. If recursive = FALSE, then the dates will be resolved -to a vector the length of the original vector. -By default FALSE.} - -\item{trim}{the fraction (0 to 0.5) of observations to be trimmed -from each end of x before the mean is computed. -Values of trim outside that range are taken as the nearest endpoint.} - \item{size}{a non-negative integer giving the number of items to choose.} \item{replace}{should sampling be with replacement?} \item{prob}{a vector of probability weights for obtaining the elements of the vector being sampled.} + +\item{recursive}{If recursive = TRUE, then the dates will be resolved +to a single date. If recursive = FALSE, then the dates will be resolved +to a vector the length of the original vector. +By default FALSE.} } \value{ A single scalar or vector of dates @@ -67,10 +63,9 @@ d <- as_messydate(c("2008-03-25", "?2012-02-27", "2001-01?", "2001~", "2001-01-01..2001-02-02", "{2001-01-01,2001-02-02}", "{2001-01,2001-02-02}", "2008-XX-31", "-0050-01-01")) d +vmin(d) min(d) +vmax(d) max(d) -median(d) -mean(d) -modal(d) random(d) } diff --git a/tests/testthat/test-coerce_resolve.R b/tests/testthat/test-coerce_resolve.R index 7ca2a042..a221ee16 100644 --- a/tests/testthat/test-coerce_resolve.R +++ b/tests/testthat/test-coerce_resolve.R @@ -4,29 +4,29 @@ test_dates <- c(range = as_messydate("2014-01-01..2014-01-05"), # test_dates <- lapply(test_dates, as_messydate) test_that("Min resolving works properly", { - expect_equal(as.character(min(test_dates)), + expect_equal(as.character(vmin(test_dates)), c("2014-01-01","1999-01-01","-1004-02-01")) }) test_that("Max resolving works properly", { - expect_equal(as.character(max(test_dates)), + expect_equal(as.character(vmax(test_dates)), c("2014-01-05","1999-12-31","-1004-02-29")) }) -test_that("Median resolving works properly", { - expect_equal(as.character(median(test_dates)), - c("2014-01-03","1999-07-02","-1004-02-15")) -}) - -test_that("Mean resolving works properly", { - expect_equal(as.character(mean(test_dates)), - c("2014-01-03","1999-07-02","-1004-02-15")) -}) +# test_that("Median resolving works properly", { +# expect_equal(as.character(median(test_dates)), +# c("2014-01-03","1999-07-02","-1004-02-15")) +# }) +# +# test_that("Mean resolving works properly", { +# expect_equal(as.character(mean(test_dates)), +# c("2014-01-03","1999-07-02","-1004-02-15")) +# }) -test_that("Modal resolving works properly", { - expect_equal(as.character(modal(test_dates)), - c("2014-01-01","1999-01-01","-1004-02-01")) -}) +# test_that("Modal resolving works properly", { +# expect_equal(as.character(modal(test_dates)), +# c("2014-01-01","1999-01-01","-1004-02-01")) +# }) test_that("Random resolving works properly", { expect_length(random(test_dates), 3) @@ -68,11 +68,11 @@ test_that("Random resolving works properly", { # # expect_length(random(negative), 1) # }) -test_that("resolve adds zero padding when appropriate", { +test_that("as_mdate adds zero padding when appropriate", { # expect_equal(as_messydate(min(as_messydate("209-12-31"))), # as_messydate("0209-12-31")) # expect_equal(as_messydate(max(as_messydate("-29-12-31"))), # as_messydate("-0029-12-31")) - expect_equal(as_messydate(mean(as_messydate(c("-29-12-31", "193-02-02", "2010-10-10")))), + expect_equal(as_messydate(c("-29-12-31", "193-02-02", "2010-10-10")), as_messydate(c("-0029-12-31", "0193-02-02", "2010-10-10"))) }) From 5733d5a46d1cf46e6b9f85e6d01e5f0269489547 Mon Sep 17 00:00:00 2001 From: James Hollway Date: Sat, 1 Mar 2025 08:05:07 -0600 Subject: [PATCH 11/27] Using vmin() and vmax() throughout --- NAMESPACE | 9 ++++----- R/coerce_from_messydate.R | 6 +++--- R/operate_statements.R | 8 ++++---- man/coerce_from.Rd | 6 +++--- tests/testthat/test-coerce_from.R | 6 +++--- .../{test-convert-contract.R => test-convert_contract.R} | 0 .../{test-convert-expand.R => test-convert_expand.R} | 0 7 files changed, 17 insertions(+), 18 deletions(-) rename tests/testthat/{test-convert-contract.R => test-convert_contract.R} (100%) rename tests/testthat/{test-convert-expand.R => test-convert_expand.R} (100%) diff --git a/NAMESPACE b/NAMESPACE index 26b647ea..025f80ad 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -33,17 +33,16 @@ S3method(as_messydate,mdate) S3method(as_messydate,numeric) S3method(c,mdate) S3method(max,mdate) -S3method(mean,mdate) -S3method(median,mdate) S3method(messyduration,character) S3method(messyduration,mdate) S3method(min,mdate) -S3method(modal,mdate) S3method(print,mdate) S3method(print,mdates_duration) S3method(random,mdate) S3method(rep,mdate) S3method(seq,mdate) +S3method(vmax,mdate) +S3method(vmin,mdate) export("%><%") export("%>=<%") export("%g%") @@ -73,7 +72,6 @@ export(md_multiset) export(md_union) export(mdate) export(messyduration) -export(modal) export(month) export(new_messydate) export(new_messyduration) @@ -83,6 +81,8 @@ export(precision) export(random) export(validate_messydate) export(validate_messyduration) +export(vmax) +export(vmin) export(year) importFrom(dplyr,first) importFrom(dplyr,last) @@ -94,7 +94,6 @@ importFrom(lubridate,years) importFrom(lubridate,ymd) importFrom(purrr,map) importFrom(purrr,pmap_chr) -importFrom(stats,median) importFrom(stringi,stri_detect_regex) importFrom(stringi,stri_extract_all_regex) importFrom(stringi,stri_replace_all_fixed) diff --git a/R/coerce_from_messydate.R b/R/coerce_from_messydate.R index d9c1d325..52f1b5ec 100644 --- a/R/coerce_from_messydate.R +++ b/R/coerce_from_messydate.R @@ -41,7 +41,7 @@ NULL #' as.Date(as_messydate("1000 BC"), median) #' as.Date(as_messydate(c("-1000", "2020")), min) #' @export -as.Date.mdate <- function(x, FUN = min, ...) { +as.Date.mdate <- function(x, FUN = vmin, ...) { # # fix argument ordering issues # if (missing(FUN)){ # if(length(list(...)) > 0) FUN <- list(...)[[1]] else @@ -57,7 +57,7 @@ as.Date.mdate <- function(x, FUN = min, ...) { #' @rdname coerce_from #' @export -as.POSIXct.mdate <- function(x, FUN = min, ...) { +as.POSIXct.mdate <- function(x, FUN = vmin, ...) { # if (missing(FUN) & length(list(...)) > 0) FUN <- list(...)[[1]] x <- FUN(x) if (stringi::stri_detect_regex(x, "^-")) { @@ -68,7 +68,7 @@ as.POSIXct.mdate <- function(x, FUN = min, ...) { #' @rdname coerce_from #' @export -as.POSIXlt.mdate <- function(x, FUN = min, ...) { +as.POSIXlt.mdate <- function(x, FUN = vmin, ...) { # if (missing(FUN) & length(list(...)) > 0) FUN <- list(...)[[1]] x <- FUN(x) if (stringi::stri_detect_regex(x, "^-")) { diff --git a/R/operate_statements.R b/R/operate_statements.R index ebb84ac0..552a4995 100644 --- a/R/operate_statements.R +++ b/R/operate_statements.R @@ -127,8 +127,8 @@ evalqOnLoad({ numeric_time_ranges <- function(e1, e2) { if (is_messydate(e1)) { - min1 <- as.Date(e1, FUN = min) - max1 <- as.Date(e1, FUN = max) + min1 <- as.Date(e1, FUN = vmin) + max1 <- as.Date(e1, FUN = vmax) if (lubridate::is.POSIXt(e2)) { ptz <- lubridate::tz(e2) min1 <- lubridate::force_tz(min1, ptz) @@ -140,8 +140,8 @@ numeric_time_ranges <- function(e1, e2) { min1 <- max1 <- e1 } if (is_messydate(e2)) { - min2 <- as.Date(e2, FUN = min) - max2 <- as.Date(e2, FUN = max) + min2 <- as.Date(e2, FUN = vmin) + max2 <- as.Date(e2, FUN = vmax) if (lubridate::is.POSIXt(e1)) { ptz <- lubridate::tz(e1) min2 <- lubridate::force_tz(min2, ptz) diff --git a/man/coerce_from.Rd b/man/coerce_from.Rd index 44f2db69..f77e36e7 100644 --- a/man/coerce_from.Rd +++ b/man/coerce_from.Rd @@ -7,11 +7,11 @@ \alias{as.POSIXlt.mdate} \title{Coercion from messy dates} \usage{ -\method{as.Date}{mdate}(x, FUN = min, ...) +\method{as.Date}{mdate}(x, FUN = vmin, ...) -\method{as.POSIXct}{mdate}(x, FUN = min, ...) +\method{as.POSIXct}{mdate}(x, FUN = vmin, ...) -\method{as.POSIXlt}{mdate}(x, FUN = min, ...) +\method{as.POSIXlt}{mdate}(x, FUN = vmin, ...) } \arguments{ \item{x}{A \code{mdate} object} diff --git a/tests/testthat/test-coerce_from.R b/tests/testthat/test-coerce_from.R index d061fca2..dfde33f1 100644 --- a/tests/testthat/test-coerce_from.R +++ b/tests/testthat/test-coerce_from.R @@ -1,19 +1,19 @@ +messy <- as_messydate("2010-10-10..2010-10-20") ddate <- as.Date("2010-10-10") mdatey <- as_messydate("2010-10-10") -messy <- as_messydate("2010-10-10..2010-10-20") # negative <- min(as_messydate("1000 BC")) test_that("Coercion from other date classes into messydt works", { # expect_equal(as.character(as.Date(as_messydate("1000 BC"), max)), "-1000-12-31") expect_equal(as.Date(messy, FUN = min), ddate) - expect_equal(as.Date(mdatey, FUN = median), ddate) + # expect_equal(as.Date(mdatey, FUN = median), ddate) expect_equal(as.Date(mdatey, FUN = random), ddate) # expect_equal(as.character(as.Date(as_messydate("1000 BC"), min)), min(negative)) }) test_that("Coercion to POSIX works", { expect_equal(as.POSIXct(messy, FUN = max), as.POSIXct("2010-10-20 CEST")) - expect_equal(as.POSIXlt(messy, FUN = mean), as.POSIXlt("2010-10-15 CEST")) + # expect_equal(as.POSIXlt(messy, FUN = mean), as.POSIXlt("2010-10-15 CEST")) }) # neg_dates <- as_messydate(c("-27", "-14")) diff --git a/tests/testthat/test-convert-contract.R b/tests/testthat/test-convert_contract.R similarity index 100% rename from tests/testthat/test-convert-contract.R rename to tests/testthat/test-convert_contract.R diff --git a/tests/testthat/test-convert-expand.R b/tests/testthat/test-convert_expand.R similarity index 100% rename from tests/testthat/test-convert-expand.R rename to tests/testthat/test-convert_expand.R From c26416f7b4056c05f4778edc12874fdbb99ac9c2 Mon Sep 17 00:00:00 2001 From: James Hollway Date: Thu, 6 Mar 2025 17:20:22 +0100 Subject: [PATCH 12/27] Rename help some operate help files --- R/operate_arithmetic.R | 6 ++--- R/operate_proportional.R | 14 +++++----- man/{operate.Rd => operate_arithmetic.Rd} | 4 +-- ...roportional.Rd => operate_proportional.Rd} | 27 ++----------------- 4 files changed, 14 insertions(+), 37 deletions(-) rename man/{operate.Rd => operate_arithmetic.Rd} (95%) rename man/{proportional.Rd => operate_proportional.Rd} (80%) diff --git a/R/operate_arithmetic.R b/R/operate_arithmetic.R index cd36d12a..80de0853 100644 --- a/R/operate_arithmetic.R +++ b/R/operate_arithmetic.R @@ -18,17 +18,17 @@ #' as_messydate("2001-01-01..2001-01-04") - as_messydate("2001-01-02") #' #as_messydate("2001-01-01") - as_messydate("2001-01-03") #' } -#' @name operate +#' @name operate_arithmetic NULL -#' @rdname operate +#' @rdname operate_arithmetic #' @export `+.mdate` <- function(e1, e2) { e2 <- parse_date_strings(e2) add(e1, e2) } -#' @rdname operate +#' @rdname operate_arithmetic #' @export `-.mdate` <- function(e1, e2) { e2 <- parse_date_strings(e2) diff --git a/R/operate_proportional.R b/R/operate_proportional.R index 083b5eda..c95b9afa 100644 --- a/R/operate_proportional.R +++ b/R/operate_proportional.R @@ -1,13 +1,13 @@ #' Proportion of messy dates meeting logical test #' #' These functions provide various proportional tests for messy date objects. -#' @name proportional +#' @name operate_proportional #' @param e1,e2 `mdate` or other class objects #' @return The proportion that the comparison is true. #' @return A logical vector the same length as the `mdate` passed. NULL -#' @rdname proportional +#' @rdname operate_proportional #' @export `%l%` <- function(e1, e2) UseMethod("%l%") @@ -34,7 +34,7 @@ evalqOnLoad({ #' @export `%g%` <- function(e1, e2) UseMethod("%g%") -#' @describeIn proportional Tests proportion of dates in the first vector +#' @describeIn operate_proportional Tests proportion of dates in the first vector #' that follow the maximum in the second vector. #' @export #' @examples @@ -57,7 +57,7 @@ evalqOnLoad({ #' @export `%ge%` <- function(e1, e2) UseMethod("%ge%") -#' @describeIn proportional Tests proportion of dates in the first vector +#' @describeIn operate_proportional Tests proportion of dates in the first vector #' that follow or are equal to the maximum in the second vector. #' @export #' @examples @@ -80,7 +80,7 @@ evalqOnLoad({ #' @export `%le%` <- function(e1, e2) UseMethod("%le%") -#' @describeIn proportional Tests proportion of dates in the first vector +#' @describeIn operate_proportional Tests proportion of dates in the first vector #' that precede or are equal to the minimum in the second vector. #' @export #' @examples @@ -103,7 +103,7 @@ evalqOnLoad({ #' @export `%><%` <- function(e1, e2) UseMethod("%><%") -#' @describeIn proportional Tests proportion of dates in the first vector +#' @describeIn operate_proportional Tests proportion of dates in the first vector #' that are between the minimum and maximum dates in the second vector. #' @export #' @examples @@ -127,7 +127,7 @@ evalqOnLoad({ #' @export `%>=<%` <- function(e1, e2) UseMethod("%>=<%") -#' @describeIn proportional Tests proportion of dates in the first vector that +#' @describeIn operate_proportional Tests proportion of dates in the first vector that #' are between the minimum and maximum dates in the second vector, inclusive. #' @export #' @examples diff --git a/man/operate.Rd b/man/operate_arithmetic.Rd similarity index 95% rename from man/operate.Rd rename to man/operate_arithmetic.Rd index b7646874..30688a3b 100644 --- a/man/operate.Rd +++ b/man/operate_arithmetic.Rd @@ -1,7 +1,7 @@ % Generated by roxygen2: do not edit by hand % Please edit documentation in R/operate_arithmetic.R -\name{operate} -\alias{operate} +\name{operate_arithmetic} +\alias{operate_arithmetic} \alias{+.mdate} \alias{-.mdate} \title{Arithmetic operations for messydates} diff --git a/man/proportional.Rd b/man/operate_proportional.Rd similarity index 80% rename from man/proportional.Rd rename to man/operate_proportional.Rd index e84a11f7..a2dbf4f4 100644 --- a/man/proportional.Rd +++ b/man/operate_proportional.Rd @@ -1,43 +1,25 @@ % Generated by roxygen2: do not edit by hand % Please edit documentation in R/operate_proportional.R -\name{proportional} -\alias{proportional} +\name{operate_proportional} +\alias{operate_proportional} \alias{\%l\%} -\alias{\%l\%.mdate} -\alias{\%g\%} \alias{\%g\%.mdate} -\alias{\%ge\%} \alias{\%ge\%.mdate} -\alias{\%le\%} \alias{\%le\%.mdate} -\alias{\%><\%} \alias{\%><\%.mdate} -\alias{\%>=<\%} \alias{\%>=<\%.mdate} \title{Proportion of messy dates meeting logical test} \usage{ e1 \%l\% e2 -\method{\%l\%}{mdate}(e1, e2) - -e1 \%g\% e2 - \method{\%g\%}{mdate}(e1, e2) -e1 \%ge\% e2 - \method{\%ge\%}{mdate}(e1, e2) -e1 \%le\% e2 - \method{\%le\%}{mdate}(e1, e2) -e1 \%><\% e2 - \method{\%><\%}{mdate}(e1, e2) -e1 \%>=<\% e2 - \method{\%>=<\%}{mdate}(e1, e2) } \arguments{ @@ -53,9 +35,6 @@ These functions provide various proportional tests for messy date objects. } \section{Functions}{ \itemize{ -\item \code{ \%l\% }: Tests proportion of dates in the first vector -that precede the minimum in the second vector. - \item \code{ \%g\% }: Tests proportion of dates in the first vector that follow the maximum in the second vector. @@ -73,8 +52,6 @@ are between the minimum and maximum dates in the second vector, inclusive. }} \examples{ - as_messydate("2012-06") < as.Date("2012-06-02") - as_messydate("2012-06") \%l\% as_messydate("2012-06-02") as_messydate("2012-06") > as.Date("2012-06-02") as_messydate("2012-06") \%g\% as_messydate("2012-06-02") as_messydate("2012-06") >= as.Date("2012-06-02") From 17910475aba0f5d6174077532668aae7c3151a6b Mon Sep 17 00:00:00 2001 From: James Hollway Date: Thu, 6 Mar 2025 17:23:38 +0100 Subject: [PATCH 13/27] Fixed pkgdown#2855 by dropping operators (inequalities) because they were causing issues with pkgdown --- .Rbuildignore | 1 + .gitignore | 1 + NAMESPACE | 4 -- R/operate_statements.R | 126 --------------------------------------- man/operate_operators.Rd | 47 --------------- pkgdown/_pkgdown.yml | 5 +- 6 files changed, 4 insertions(+), 180 deletions(-) delete mode 100644 man/operate_operators.Rd diff --git a/.Rbuildignore b/.Rbuildignore index 2a59cd98..7c2ab61d 100644 --- a/.Rbuildignore +++ b/.Rbuildignore @@ -14,3 +14,4 @@ ^CRAN-SUBMISSION$ ^article$ ^.DS_Store$ +^next$ diff --git a/.gitignore b/.gitignore index 31b7a9dc..30e81a4f 100644 --- a/.gitignore +++ b/.gitignore @@ -15,4 +15,5 @@ article/jsslogo.jpg article/references.bib article/article_files vignettes-old +next diff --git a/NAMESPACE b/NAMESPACE index 025f80ad..66d91305 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -10,10 +10,6 @@ S3method("%le%",mdate) S3method("%union%",mdate) S3method("+",mdate) S3method("-",mdate) -S3method("<",mdate) -S3method("<=",mdate) -S3method(">",mdate) -S3method(">=",mdate) S3method("[",mdate) S3method("[<-",mdate) S3method("[[",mdate) diff --git a/R/operate_statements.R b/R/operate_statements.R index 552a4995..920e0a72 100644 --- a/R/operate_statements.R +++ b/R/operate_statements.R @@ -90,129 +90,3 @@ is_bce <- function(x) { stringi::stri_detect_regex(x, "^-") } -# Operators #### - -#' Logical operations on messy dates -#' @name operate_operators -NULL - -#' @describeIn operate_operators tests whether the dates in the first vector precede -#' the dates in the second vector. -#' Returns `NA` when the date order can't be determined. -#' @examples -#' as_messydate("2012-06-02") > as.Date("2012-06-01") # TRUE -#' # 2012-06-XX could mean 2012-06-03, so unknown if it comes before 2012-06-02 -#' as_messydate("2012-06-XX") < as.Date("2012-06-02") # NA -#' # But 2012-06-XX cannot be before 2012-06-01 -#' as_messydate("2012-06-XX") >= as.Date("2012-06-01") # TRUE -#' @export -`<.mdate` <- function(e1, e2) { - if (!is_messydate(e1)) e1 <- as_messydate(e1) - if (!is_messydate(e2)) e2 <- as_messydate(e2) - ranges <- numeric_time_ranges(e1, e2) - x <- rep(NA, max(length(e1), length(e2))) - x[ranges[["max1"]] < ranges[["min2"]]] <- TRUE - x[ranges[["min1"]] > ranges[["max2"]]] <- FALSE - x[ranges[["max1"]] == ranges[["min2"]]] <- FALSE - x[ranges[["min1"]] == ranges[["max2"]]] <- FALSE - x -} - -# Quoth the {lubridate} team: -# Nothing else seems to work, only this sneaky trick. -evalqOnLoad({ - registerS3method("<", "Date", `<.mdate`) - registerS3method("<", "POSIXt", `<.mdate`) -}) - -numeric_time_ranges <- function(e1, e2) { - if (is_messydate(e1)) { - min1 <- as.Date(e1, FUN = vmin) - max1 <- as.Date(e1, FUN = vmax) - if (lubridate::is.POSIXt(e2)) { - ptz <- lubridate::tz(e2) - min1 <- lubridate::force_tz(min1, ptz) - min1 <- as.POSIXct(min1) - max1 <- lubridate::force_tz(max1, ptz) - max1 <- as.POSIXct(max1) - } - } else { - min1 <- max1 <- e1 - } - if (is_messydate(e2)) { - min2 <- as.Date(e2, FUN = vmin) - max2 <- as.Date(e2, FUN = vmax) - if (lubridate::is.POSIXt(e1)) { - ptz <- lubridate::tz(e1) - min2 <- lubridate::force_tz(min2, ptz) - min2 <- as.POSIXct(min2) - max2 <- lubridate::force_tz(max2, ptz) - max2 <- as.POSIXct(max2) - } - } else { - min2 <- max2 <- e2 - } - list( - min1 = as.numeric(min1), max1 = as.numeric(max1), - min2 = as.numeric(min2), max2 = as.numeric(max2) - ) -} - -#' @describeIn operate_operators tests whether the dates in the first vector -#' succeed the dates in the second vector. -#' Returns `NA` when the date order can't be determined. -#' @export -`>.mdate` <- function(e1, e2) { - if (!is_messydate(e1)) e1 <- as_messydate(e1) - if (!is_messydate(e2)) e2 <- as_messydate(e2) - ranges <- numeric_time_ranges(e1, e2) - x <- rep(NA, max(length(e1), length(e2))) - x[ranges[["min1"]] > ranges[["max2"]]] <- TRUE - x[ranges[["max1"]] < ranges[["min2"]]] <- FALSE - x[ranges[["min1"]] == ranges[["max2"]]] <- FALSE - x[ranges[["max1"]] == ranges[["min2"]]] <- FALSE - x -} - -evalqOnLoad({ - registerS3method(">", "Date", `>.mdate`) - registerS3method(">", "POSIXt", `>.mdate`) -}) - -#' @describeIn operate_operators tests whether the dates in the first vector are -#' equal to or precede the dates in the second vector. -#' Returns `NA` when the date order can't be determined. -#' @export -`<=.mdate` <- function(e1, e2) { - if (!is_messydate(e1)) e1 <- as_messydate(e1) - if (!is_messydate(e2)) e2 <- as_messydate(e2) - ranges <- numeric_time_ranges(e1, e2) - x <- rep(NA, max(length(e1), length(e2))) - x[ranges[["max1"]] <= ranges[["min2"]]] <- TRUE - x[ranges[["min1"]] > ranges[["max2"]]] <- FALSE - x -} - -evalqOnLoad({ - registerS3method("<=", "Date", `<=.mdate`) - registerS3method("<=", "POSIXt", `<=.mdate`) -}) - -#' @describeIn operate_operators tests whether the dates in the first vector are equal to -#' or succeed the dates in the second vector. -#' Returns `NA` when the date order can't be determined. -#' @export -`>=.mdate` <- function(e1, e2) { - if (!is_messydate(e1)) e1 <- as_messydate(e1) - if (!is_messydate(e2)) e2 <- as_messydate(e2) - ranges <- numeric_time_ranges(e1, e2) - x <- rep(NA, max(length(e1), length(e2))) - x[ranges[["min1"]] >= ranges[["max2"]]] <- TRUE - x[ranges[["max1"]] < ranges[["min2"]]] <- FALSE - x -} - -evalqOnLoad({ - registerS3method(">=", "Date", `>=.mdate`) - registerS3method(">=", "POSIXt", `>=.mdate`) -}) diff --git a/man/operate_operators.Rd b/man/operate_operators.Rd deleted file mode 100644 index b8c1b04a..00000000 --- a/man/operate_operators.Rd +++ /dev/null @@ -1,47 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/operate_statements.R -\name{operate_operators} -\alias{operate_operators} -\alias{<.mdate} -\alias{>.mdate} -\alias{<=.mdate} -\alias{>=.mdate} -\title{Logical operations on messy dates} -\usage{ -\method{<}{mdate}(e1, e2) - -\method{>}{mdate}(e1, e2) - -\method{<=}{mdate}(e1, e2) - -\method{>=}{mdate}(e1, e2) -} -\description{ -Logical operations on messy dates -} -\section{Functions}{ -\itemize{ -\item \code{ < }: tests whether the dates in the first vector precede -the dates in the second vector. -Returns \code{NA} when the date order can't be determined. - -\item \code{ > }: tests whether the dates in the first vector -succeed the dates in the second vector. -Returns \code{NA} when the date order can't be determined. - -\item \code{ <= }: tests whether the dates in the first vector are -equal to or precede the dates in the second vector. -Returns \code{NA} when the date order can't be determined. - -\item \code{ >= }: tests whether the dates in the first vector are equal to -or succeed the dates in the second vector. -Returns \code{NA} when the date order can't be determined. - -}} -\examples{ -as_messydate("2012-06-02") > as.Date("2012-06-01") # TRUE -# 2012-06-XX could mean 2012-06-03, so unknown if it comes before 2012-06-02 -as_messydate("2012-06-XX") < as.Date("2012-06-02") # NA -# But 2012-06-XX cannot be before 2012-06-01 -as_messydate("2012-06-XX") >= as.Date("2012-06-01") # TRUE -} diff --git a/pkgdown/_pkgdown.yml b/pkgdown/_pkgdown.yml index 1788d562..a25d93d0 100644 --- a/pkgdown/_pkgdown.yml +++ b/pkgdown/_pkgdown.yml @@ -4,7 +4,7 @@ development: template: bootstrap: 5 params: - bootswatch: quartz + bootswatch: journal authors: James Hollway: href: https://jameshollway.com @@ -49,8 +49,7 @@ reference: - title: "Manipulation" desc: "These functions expand or contract objects of `mdate` class from/into a list:" contents: - - convert_expand - - convert_contract + - starts_with("convert_") - title: "Operations" desc: "These methods help operate on objects of the `mdate` class:" contents: From a7bed2f5f37575354dc00b60de6f23ca1b5dff07 Mon Sep 17 00:00:00 2001 From: James Hollway Date: Fri, 7 Mar 2025 09:09:02 +0100 Subject: [PATCH 14/27] Fixed how as_messydate methods interpret infinite quantities --- R/coerce_to_messydate.R | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/R/coerce_to_messydate.R b/R/coerce_to_messydate.R index 1fc2a6a9..3003b80d 100644 --- a/R/coerce_to_messydate.R +++ b/R/coerce_to_messydate.R @@ -63,14 +63,16 @@ as_messydate.Date <- function(x, resequence = FALSE) { #' @describeIn coerce_to Coerce from `POSIXct` to `mdate` class #' @export as_messydate.POSIXct <- function(x, resequence = FALSE) { - x <- as.character(x) + if(any(is.infinite(x))) x[is.infinite(x)] <- "9999-12-31" + x <- as.character(as.Date(x)) new_messydate(x) } #' @describeIn coerce_to Coerce from `POSIXlt` to `mdate` class #' @export as_messydate.POSIXlt <- function(x, resequence = FALSE) { - x <- as.character(x) + if(any(is.infinite(x))) x[is.infinite(x)] <- "9999-12-31" + x <- as.character(as.Date(x)) new_messydate(x) } @@ -83,6 +85,7 @@ as_messydate.mdate <- function(x, resequence = FALSE) { #' @describeIn coerce_to Coerce character date objects to `mdate` class #' @export as_messydate.character <- function(x, resequence = NULL) { + if(any(is.infinite(x))) x[is.infinite(x)] <- "9999-12-31" d <- standardise_text(x) d <- standardise_date_separators(d) if (!is.null(resequence)) { @@ -110,6 +113,7 @@ as_messydate.character <- function(x, resequence = NULL) { #' @describeIn coerce_to Coerce numeric objects to `mdate` class #' @export as_messydate.numeric <- function(x, resequence = NULL) { + if(any(is.infinite(x))) x[is.infinite(x)] <- "9999-12-31" d <- as.character(x) new_messydate(d) } From 8cf98b96bd5d4f51adb4abba489b7b7607b3aac3 Mon Sep 17 00:00:00 2001 From: James Hollway Date: Fri, 7 Mar 2025 09:09:19 +0100 Subject: [PATCH 15/27] Fixed operate_proportional documentation --- R/operate_proportional.R | 16 ++++++++-------- man/operate_proportional.Rd | 23 +++++++++++++++++++++++ 2 files changed, 31 insertions(+), 8 deletions(-) diff --git a/R/operate_proportional.R b/R/operate_proportional.R index c95b9afa..2f3eaaef 100644 --- a/R/operate_proportional.R +++ b/R/operate_proportional.R @@ -1,6 +1,6 @@ #' Proportion of messy dates meeting logical test -#' -#' These functions provide various proportional tests for messy date objects. +#' @description +#' These functions provide various proportional tests for messy date objects. #' @name operate_proportional #' @param e1,e2 `mdate` or other class objects #' @return The proportion that the comparison is true. @@ -11,7 +11,7 @@ NULL #' @export `%l%` <- function(e1, e2) UseMethod("%l%") -#' @describeIn proportional Tests proportion of dates in the first vector +#' @describeIn operate_proportional Tests proportion of dates in the first vector #' that precede the minimum in the second vector. #' @examples #' as_messydate("2012-06") < as.Date("2012-06-02") @@ -30,7 +30,7 @@ evalqOnLoad({ registerS3method("%l%", "POSIXt", `%l%.mdate`) }) -#' @rdname proportional +#' @rdname operate_proportional #' @export `%g%` <- function(e1, e2) UseMethod("%g%") @@ -53,7 +53,7 @@ evalqOnLoad({ registerS3method("%g%", "POSIXt", `%g%.mdate`) }) -#' @rdname proportional +#' @rdname operate_proportional #' @export `%ge%` <- function(e1, e2) UseMethod("%ge%") @@ -76,7 +76,7 @@ evalqOnLoad({ registerS3method("%ge%", "POSIXt", `%ge%.mdate`) }) -#' @rdname proportional +#' @rdname operate_proportional #' @export `%le%` <- function(e1, e2) UseMethod("%le%") @@ -99,7 +99,7 @@ evalqOnLoad({ registerS3method("%le%", "POSIXt", `%le%.mdate`) }) -#' @rdname proportional +#' @rdname operate_proportional #' @export `%><%` <- function(e1, e2) UseMethod("%><%") @@ -123,7 +123,7 @@ evalqOnLoad({ registerS3method("%><%", "POSIXt", `%><%.mdate`) }) -#' @rdname proportional +#' @rdname operate_proportional #' @export `%>=<%` <- function(e1, e2) UseMethod("%>=<%") diff --git a/man/operate_proportional.Rd b/man/operate_proportional.Rd index a2dbf4f4..615092e4 100644 --- a/man/operate_proportional.Rd +++ b/man/operate_proportional.Rd @@ -3,23 +3,41 @@ \name{operate_proportional} \alias{operate_proportional} \alias{\%l\%} +\alias{\%l\%.mdate} +\alias{\%g\%} \alias{\%g\%.mdate} +\alias{\%ge\%} \alias{\%ge\%.mdate} +\alias{\%le\%} \alias{\%le\%.mdate} +\alias{\%><\%} \alias{\%><\%.mdate} +\alias{\%>=<\%} \alias{\%>=<\%.mdate} \title{Proportion of messy dates meeting logical test} \usage{ e1 \%l\% e2 +\method{\%l\%}{mdate}(e1, e2) + +e1 \%g\% e2 + \method{\%g\%}{mdate}(e1, e2) +e1 \%ge\% e2 + \method{\%ge\%}{mdate}(e1, e2) +e1 \%le\% e2 + \method{\%le\%}{mdate}(e1, e2) +e1 \%><\% e2 + \method{\%><\%}{mdate}(e1, e2) +e1 \%>=<\% e2 + \method{\%>=<\%}{mdate}(e1, e2) } \arguments{ @@ -35,6 +53,9 @@ These functions provide various proportional tests for messy date objects. } \section{Functions}{ \itemize{ +\item \code{ \%l\% }: Tests proportion of dates in the first vector +that precede the minimum in the second vector. + \item \code{ \%g\% }: Tests proportion of dates in the first vector that follow the maximum in the second vector. @@ -52,6 +73,8 @@ are between the minimum and maximum dates in the second vector, inclusive. }} \examples{ + as_messydate("2012-06") < as.Date("2012-06-02") + as_messydate("2012-06") \%l\% as_messydate("2012-06-02") as_messydate("2012-06") > as.Date("2012-06-02") as_messydate("2012-06") \%g\% as_messydate("2012-06-02") as_messydate("2012-06") >= as.Date("2012-06-02") From 478e3ee48f1b8de00c84c79dd26dd4bd26f81a06 Mon Sep 17 00:00:00 2001 From: James Hollway Date: Fri, 7 Mar 2025 09:09:34 +0100 Subject: [PATCH 16/27] Reinstated inequalities --- NAMESPACE | 4 ++ R/operate_inequalities.R | 126 ++++++++++++++++++++++++++++++++++++ man/operate_inequalities.Rd | 47 ++++++++++++++ 3 files changed, 177 insertions(+) create mode 100644 R/operate_inequalities.R create mode 100644 man/operate_inequalities.Rd diff --git a/NAMESPACE b/NAMESPACE index 66d91305..025f80ad 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -10,6 +10,10 @@ S3method("%le%",mdate) S3method("%union%",mdate) S3method("+",mdate) S3method("-",mdate) +S3method("<",mdate) +S3method("<=",mdate) +S3method(">",mdate) +S3method(">=",mdate) S3method("[",mdate) S3method("[<-",mdate) S3method("[[",mdate) diff --git a/R/operate_inequalities.R b/R/operate_inequalities.R new file mode 100644 index 00000000..449fb518 --- /dev/null +++ b/R/operate_inequalities.R @@ -0,0 +1,126 @@ +# Inequalities #### + +#' Logical operations on messy dates +#' @name operate_inequalities +NULL + +#' @describeIn operate_inequalities tests whether the dates in the first vector precede +#' the dates in the second vector. +#' Returns `NA` when the date order can't be determined. +#' @examples +#' as_messydate("2012-06-02") > as.Date("2012-06-01") # TRUE +#' # 2012-06-XX could mean 2012-06-03, so unknown if it comes before 2012-06-02 +#' as_messydate("2012-06-XX") < as.Date("2012-06-02") # NA +#' # But 2012-06-XX cannot be before 2012-06-01 +#' as_messydate("2012-06-XX") >= as.Date("2012-06-01") # TRUE +#' @export +`<.mdate` <- function(e1, e2) { + if (!is_messydate(e1)) e1 <- as_messydate(e1) + if (!is_messydate(e2)) e2 <- as_messydate(e2) + ranges <- numeric_time_ranges(e1, e2) + x <- rep(NA, max(length(e1), length(e2))) + x[ranges[["max1"]] < ranges[["min2"]]] <- TRUE + x[ranges[["min1"]] > ranges[["max2"]]] <- FALSE + x[ranges[["max1"]] == ranges[["min2"]]] <- FALSE + x[ranges[["min1"]] == ranges[["max2"]]] <- FALSE + x +} + +# Quoth the {lubridate} team: +# Nothing else seems to work, only this sneaky trick. +evalqOnLoad({ + registerS3method("<", "Date", `<.mdate`) + registerS3method("<", "POSIXt", `<.mdate`) +}) + +numeric_time_ranges <- function(e1, e2) { + if (is_messydate(e1)) { + min1 <- as.Date(e1, FUN = vmin) + max1 <- as.Date(e1, FUN = vmax) + if (lubridate::is.POSIXt(e2)) { + ptz <- lubridate::tz(e2) + min1 <- lubridate::force_tz(min1, ptz) + min1 <- as.POSIXct(min1) + max1 <- lubridate::force_tz(max1, ptz) + max1 <- as.POSIXct(max1) + } + } else { + min1 <- max1 <- e1 + } + if (is_messydate(e2)) { + min2 <- as.Date(e2, FUN = vmin) + max2 <- as.Date(e2, FUN = vmax) + if (lubridate::is.POSIXt(e1)) { + ptz <- lubridate::tz(e1) + min2 <- lubridate::force_tz(min2, ptz) + min2 <- as.POSIXct(min2) + max2 <- lubridate::force_tz(max2, ptz) + max2 <- as.POSIXct(max2) + } + } else { + min2 <- max2 <- e2 + } + list( + min1 = as.numeric(min1), max1 = as.numeric(max1), + min2 = as.numeric(min2), max2 = as.numeric(max2) + ) +} + +#' @describeIn operate_inequalities tests whether the dates in the first vector +#' succeed the dates in the second vector. +#' Returns `NA` when the date order can't be determined. +#' @export +`>.mdate` <- function(e1, e2) { + if (!is_messydate(e1)) e1 <- as_messydate(e1) + if (!is_messydate(e2)) e2 <- as_messydate(e2) + ranges <- numeric_time_ranges(e1, e2) + x <- rep(NA, max(length(e1), length(e2))) + x[ranges[["min1"]] > ranges[["max2"]]] <- TRUE + x[ranges[["max1"]] < ranges[["min2"]]] <- FALSE + x[ranges[["min1"]] == ranges[["max2"]]] <- FALSE + x[ranges[["max1"]] == ranges[["min2"]]] <- FALSE + x +} + +evalqOnLoad({ + registerS3method(">", "Date", `>.mdate`) + registerS3method(">", "POSIXt", `>.mdate`) +}) + +#' @describeIn operate_inequalities tests whether the dates in the first vector are +#' equal to or precede the dates in the second vector. +#' Returns `NA` when the date order can't be determined. +#' @export +`<=.mdate` <- function(e1, e2) { + if (!is_messydate(e1)) e1 <- as_messydate(e1) + if (!is_messydate(e2)) e2 <- as_messydate(e2) + ranges <- numeric_time_ranges(e1, e2) + x <- rep(NA, max(length(e1), length(e2))) + x[ranges[["max1"]] <= ranges[["min2"]]] <- TRUE + x[ranges[["min1"]] > ranges[["max2"]]] <- FALSE + x +} + +evalqOnLoad({ + registerS3method("<=", "Date", `<=.mdate`) + registerS3method("<=", "POSIXt", `<=.mdate`) +}) + +#' @describeIn operate_inequalities tests whether the dates in the first vector are equal to +#' or succeed the dates in the second vector. +#' Returns `NA` when the date order can't be determined. +#' @export +`>=.mdate` <- function(e1, e2) { + if (!is_messydate(e1)) e1 <- as_messydate(e1) + if (!is_messydate(e2)) e2 <- as_messydate(e2) + ranges <- numeric_time_ranges(e1, e2) + x <- rep(NA, max(length(e1), length(e2))) + x[ranges[["min1"]] >= ranges[["max2"]]] <- TRUE + x[ranges[["max1"]] < ranges[["min2"]]] <- FALSE + x +} + +evalqOnLoad({ + registerS3method(">=", "Date", `>=.mdate`) + registerS3method(">=", "POSIXt", `>=.mdate`) +}) diff --git a/man/operate_inequalities.Rd b/man/operate_inequalities.Rd new file mode 100644 index 00000000..7b292a57 --- /dev/null +++ b/man/operate_inequalities.Rd @@ -0,0 +1,47 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/operate_inequalities.R +\name{operate_inequalities} +\alias{operate_inequalities} +\alias{<.mdate} +\alias{>.mdate} +\alias{<=.mdate} +\alias{>=.mdate} +\title{Logical operations on messy dates} +\usage{ +\method{<}{mdate}(e1, e2) + +\method{>}{mdate}(e1, e2) + +\method{<=}{mdate}(e1, e2) + +\method{>=}{mdate}(e1, e2) +} +\description{ +Logical operations on messy dates +} +\section{Functions}{ +\itemize{ +\item \code{ < }: tests whether the dates in the first vector precede +the dates in the second vector. +Returns \code{NA} when the date order can't be determined. + +\item \code{ > }: tests whether the dates in the first vector +succeed the dates in the second vector. +Returns \code{NA} when the date order can't be determined. + +\item \code{ <= }: tests whether the dates in the first vector are +equal to or precede the dates in the second vector. +Returns \code{NA} when the date order can't be determined. + +\item \code{ >= }: tests whether the dates in the first vector are equal to +or succeed the dates in the second vector. +Returns \code{NA} when the date order can't be determined. + +}} +\examples{ +as_messydate("2012-06-02") > as.Date("2012-06-01") # TRUE +# 2012-06-XX could mean 2012-06-03, so unknown if it comes before 2012-06-02 +as_messydate("2012-06-XX") < as.Date("2012-06-02") # NA +# But 2012-06-XX cannot be before 2012-06-01 +as_messydate("2012-06-XX") >= as.Date("2012-06-01") # TRUE +} From dcf99c169884225a4ed4ad02a098524cdf911a43 Mon Sep 17 00:00:00 2001 From: James Hollway Date: Fri, 7 Mar 2025 09:52:18 +0100 Subject: [PATCH 17/27] Fixed time zone defaults in as.POSIXct() and as.POSIXlt() --- R/coerce_from_messydate.R | 8 ++++---- man/coerce_from.Rd | 4 ++-- tests/testthat/test-coerce_from.R | 4 ++-- tests/testthat/test-coerce_to.R | 4 ++-- 4 files changed, 10 insertions(+), 10 deletions(-) diff --git a/R/coerce_from_messydate.R b/R/coerce_from_messydate.R index 52f1b5ec..51ca5a82 100644 --- a/R/coerce_from_messydate.R +++ b/R/coerce_from_messydate.R @@ -57,24 +57,24 @@ as.Date.mdate <- function(x, FUN = vmin, ...) { #' @rdname coerce_from #' @export -as.POSIXct.mdate <- function(x, FUN = vmin, ...) { +as.POSIXct.mdate <- function(x, tz = "UTC", FUN = vmin, ...) { # if (missing(FUN) & length(list(...)) > 0) FUN <- list(...)[[1]] x <- FUN(x) if (stringi::stri_detect_regex(x, "^-")) { stop("For conversion of negative dates from mdate class use as.Date()") } - as.POSIXct(as.character(x)) + as.POSIXct(as.character(x), tz = tz) } #' @rdname coerce_from #' @export -as.POSIXlt.mdate <- function(x, FUN = vmin, ...) { +as.POSIXlt.mdate <- function(x, tz = "UTC", FUN = vmin, ...) { # if (missing(FUN) & length(list(...)) > 0) FUN <- list(...)[[1]] x <- FUN(x) if (stringi::stri_detect_regex(x, "^-")) { stop("For conversion of negative dates from mdate class use as.Date()") } - as.POSIXlt(as.character(x)) + as.POSIXlt(as.character(x), tz = tz) } # Helper function for returning negative dates in date formats diff --git a/man/coerce_from.Rd b/man/coerce_from.Rd index f77e36e7..afd8a595 100644 --- a/man/coerce_from.Rd +++ b/man/coerce_from.Rd @@ -9,9 +9,9 @@ \usage{ \method{as.Date}{mdate}(x, FUN = vmin, ...) -\method{as.POSIXct}{mdate}(x, FUN = vmin, ...) +\method{as.POSIXct}{mdate}(x, tz = "UTC", FUN = vmin, ...) -\method{as.POSIXlt}{mdate}(x, FUN = vmin, ...) +\method{as.POSIXlt}{mdate}(x, tz = "UTC", FUN = vmin, ...) } \arguments{ \item{x}{A \code{mdate} object} diff --git a/tests/testthat/test-coerce_from.R b/tests/testthat/test-coerce_from.R index dfde33f1..7e8cb5b5 100644 --- a/tests/testthat/test-coerce_from.R +++ b/tests/testthat/test-coerce_from.R @@ -5,14 +5,14 @@ mdatey <- as_messydate("2010-10-10") test_that("Coercion from other date classes into messydt works", { # expect_equal(as.character(as.Date(as_messydate("1000 BC"), max)), "-1000-12-31") - expect_equal(as.Date(messy, FUN = min), ddate) + expect_equal(as.Date(messy, FUN = vmin), ddate) # expect_equal(as.Date(mdatey, FUN = median), ddate) expect_equal(as.Date(mdatey, FUN = random), ddate) # expect_equal(as.character(as.Date(as_messydate("1000 BC"), min)), min(negative)) }) test_that("Coercion to POSIX works", { - expect_equal(as.POSIXct(messy, FUN = max), as.POSIXct("2010-10-20 CEST")) + expect_equal(as.POSIXct(messy, FUN = vmax), as.POSIXct("2010-10-20", tz = "UTC")) # expect_equal(as.POSIXlt(messy, FUN = mean), as.POSIXlt("2010-10-15 CEST")) }) diff --git a/tests/testthat/test-coerce_to.R b/tests/testthat/test-coerce_to.R index e0d82b98..0d37b39a 100644 --- a/tests/testthat/test-coerce_to.R +++ b/tests/testthat/test-coerce_to.R @@ -1,7 +1,7 @@ test_that("Coercion from other date classes into messydt works", { date <- as.Date("2010-10-10") - POSIXct <- as.POSIXct("2010-10-10") - POSIXlt <- as.POSIXlt("2010-10-10") + POSIXct <- as.POSIXct("2010-10-10", tz = "UTC") + POSIXlt <- as.POSIXlt("2010-10-10", tz = "UTC") character <- "2010-10-10" character2 <- "AD2010-10-10" character3 <- "{BC2010-10-10,BC2010-10-11,BC2010-10-12}" From c7a6fc2250615015c60545bab1f0a304f2716d10 Mon Sep 17 00:00:00 2001 From: James Hollway Date: Fri, 7 Mar 2025 16:21:06 +0100 Subject: [PATCH 18/27] Fixed set bug in validate_messydate() --- R/class_mdate.R | 15 ++++++++------- 1 file changed, 8 insertions(+), 7 deletions(-) diff --git a/R/class_mdate.R b/R/class_mdate.R index b0100f3f..bb98b073 100644 --- a/R/class_mdate.R +++ b/R/class_mdate.R @@ -75,7 +75,7 @@ validate_messydate <- function(x) { stop("mdate object requires at least one specified date component.", call. = FALSE) } - if (any(grepl("!|\\(|\\)|\\+|\\=|\\/|,|;|>|<|_|\\^|'|&|\\$|#", values))) { + if (any(grepl("!|\\(|\\)|\\+|\\=|\\/|;|>|<|_|\\^|'|&|\\$|#", values))) { stop("mdate object can only consist of numbers and some special symbols: []{}..X%?~", call. = FALSE) } @@ -86,13 +86,14 @@ validate_messydate <- function(x) { #' Composes `mdate` from multiple variables #' @param ... One (yyyy-mm-dd), two (yyyy-mm-dd, yyyy-mm-dd), -#' or three (yyyy, mm, dd) variables. +#' or three (yyyy, mm, dd) variables. +#' @inheritParams coerce_to #' @details If three date variables are passed to `make_messydate()`, -#' function will create a single date (yyyy-mm-dd) from it. -#' If two date variables are passed to `make_messydate()`, -#' function will create a range of dates from it (yyyy-mm-dd..yyyy-mm-dd). -#' If one date variable is passed to `make_messydate()`, -#' function defaults to `as_messydate()`. +#' function will create a single date (yyyy-mm-dd) from it. +#' If two date variables are passed to `make_messydate()`, +#' function will create a range of dates from it (yyyy-mm-dd..yyyy-mm-dd). +#' If one date variable is passed to `make_messydate()`, +#' function defaults to `as_messydate()`. #' @importFrom purrr map pmap_chr #' @name class_make #' @examples From a600fa9db4ed8f874a62c16adc190daf04d2b00b Mon Sep 17 00:00:00 2001 From: James Hollway Date: Fri, 7 Mar 2025 16:23:29 +0100 Subject: [PATCH 19/27] Updated documentation on inequalitites and statements --- R/operate_inequalities.R | 1 + R/operate_statements.R | 2 +- man/class_make.Rd | 20 ++++++++++++++++++++ man/operate_inequalities.Rd | 3 +++ man/operate_statements.Rd | 2 +- 5 files changed, 26 insertions(+), 2 deletions(-) diff --git a/R/operate_inequalities.R b/R/operate_inequalities.R index 449fb518..cd680e06 100644 --- a/R/operate_inequalities.R +++ b/R/operate_inequalities.R @@ -1,6 +1,7 @@ # Inequalities #### #' Logical operations on messy dates +#' @param e1,e2 `mdate` or other class objects #' @name operate_inequalities NULL diff --git a/R/operate_statements.R b/R/operate_statements.R index 920e0a72..9b29e58c 100644 --- a/R/operate_statements.R +++ b/R/operate_statements.R @@ -4,7 +4,7 @@ #' @description #' These functions provide various logical statements about messy date objects. #' @name operate_statements -#' @param x,y,e1,e2 `mdate` or other class objects +#' @param x,y `mdate` or other class objects #' @return A logical vector the same length as the `mdate` passed. NULL diff --git a/man/class_make.Rd b/man/class_make.Rd index a921525f..87ae5d61 100644 --- a/man/class_make.Rd +++ b/man/class_make.Rd @@ -10,6 +10,26 @@ make_messydate(..., resequence = FALSE) \arguments{ \item{...}{One (yyyy-mm-dd), two (yyyy-mm-dd, yyyy-mm-dd), or three (yyyy, mm, dd) variables.} + +\item{resequence}{Users have the option to choose the order for +ambiguous dates with or without separators (e.g. "11-01-12" or "20112112"). +\code{NULL} by default. +Other options include: 'dmy', 'ymd', 'mdy', 'ym', 'my' and 'interactive' +If 'dmy', dates are converted from DDMMYY format for 6 digit dates, +or DDMMYYYY format for 8 digit dates. +If 'ymd', dates are converted from YYMMDD format for 6 digit dates, +or YYYYMMDD format for 8 digit dates. +If 'mdy', dates are converted from MMDDYY format for 6 digit dates +or MMDDYYYY format for 8 digit dates. +For these three options, ambiguous dates are converted to YY-MM-DD format +for 6 digit dates, or YYYY-MM-DD format for 8 digit dates. +If 'my', ambiguous 6 digit dates are converted from MM-YYYY format +to YYYY-MM. +If 'ym', ambiguous 6 digit dates are converted to YYYY-MM format. +If 'interactive', it prompts users to select the existing +component order of ambiguous dates, +based on which the date is reordered into YYYY-MM-DD format +and further completed to YYYY-MM-DD format if they choose to do so.} } \description{ Composes \code{mdate} from multiple variables diff --git a/man/operate_inequalities.Rd b/man/operate_inequalities.Rd index 7b292a57..d65546ab 100644 --- a/man/operate_inequalities.Rd +++ b/man/operate_inequalities.Rd @@ -16,6 +16,9 @@ \method{>=}{mdate}(e1, e2) } +\arguments{ +\item{e1, e2}{\code{mdate} or other class objects} +} \description{ Logical operations on messy dates } diff --git a/man/operate_statements.Rd b/man/operate_statements.Rd index 4af7564e..69c6fa4b 100644 --- a/man/operate_statements.Rd +++ b/man/operate_statements.Rd @@ -29,7 +29,7 @@ is_approximate(x) is_bce(x) } \arguments{ -\item{x, y, e1, e2}{\code{mdate} or other class objects} +\item{x, y}{\code{mdate} or other class objects} } \value{ A logical vector the same length as the \code{mdate} passed. From 1811596084b242725c2acbef9e0a16fe03703425 Mon Sep 17 00:00:00 2001 From: James Hollway Date: Fri, 7 Mar 2025 16:24:09 +0100 Subject: [PATCH 20/27] as.numeric() changed to as.double() so that the S3 method is found correctly --- NAMESPACE | 2 +- R/coerce_from_messydate.R | 5 +++-- 2 files changed, 4 insertions(+), 3 deletions(-) diff --git a/NAMESPACE b/NAMESPACE index 025f80ad..d06b3fcf 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -22,8 +22,8 @@ S3method(as.Date,mdate) S3method(as.POSIXct,mdate) S3method(as.POSIXlt,mdate) S3method(as.data.frame,mdate) +S3method(as.double,mdate) S3method(as.list,mdate) -S3method(as.numeric,mdate) S3method(as_messydate,Date) S3method(as_messydate,POSIXct) S3method(as_messydate,POSIXlt) diff --git a/R/coerce_from_messydate.R b/R/coerce_from_messydate.R index 51ca5a82..4671f593 100644 --- a/R/coerce_from_messydate.R +++ b/R/coerce_from_messydate.R @@ -100,7 +100,8 @@ as.list.mdate <- function(x, ...) { } #' @export -as.numeric.mdate <- function(x, ...) { - as.numeric(as.Date(x)) +as.double.mdate <- function(x, ...) { + if(any(is_bce(x))) x[is_bce(x)] <- negative_dates(x)[is_bce(x)] + as.double(lubridate::as_date(x)) } From b4198088e81013a650cf4c6ba82da91ff7a912fa Mon Sep 17 00:00:00 2001 From: James Hollway Date: Fri, 7 Mar 2025 16:26:21 +0100 Subject: [PATCH 21/27] Moved extrema to own script --- R/coerce_extrema.R | 130 ++++++++++ R/coerce_resolve.R | 253 ------------------- man/{coerce_resolve.Rd => coerce_extrema.Rd} | 27 +- pkgdown/_pkgdown.yml | 2 +- 4 files changed, 135 insertions(+), 277 deletions(-) create mode 100644 R/coerce_extrema.R delete mode 100644 R/coerce_resolve.R rename man/{coerce_resolve.Rd => coerce_extrema.Rd} (59%) diff --git a/R/coerce_extrema.R b/R/coerce_extrema.R new file mode 100644 index 00000000..b34fe3d1 --- /dev/null +++ b/R/coerce_extrema.R @@ -0,0 +1,130 @@ +#' Resolves messy dates into an extrema +#' @description +#' This collection of S3 methods 'resolve' messy dates into a single date +#' according to some explicit bias, +#' such as returning the minimum or maximum date, +#' the mean, median, or modal date, +#' or a random date from among the possible resolutions for each messy date. +#' If the date is not 'messy' (i.e. has no annotations) +#' then just that precise date is returned. +#' This can be useful for various descriptive or inferential projects. +#' @param ... a mdate object +#' @param na.rm Should NAs be removed? True by default. +#' @importFrom stringi stri_detect_regex stri_replace_all_regex +#' @return A single scalar or vector of dates +#' @examples +#' d <- as_messydate(c("2008-03-25", "?2012-02-27", "2001-01?", "2001~", +#' "2001-01-01..2001-02-02", "{2001-01-01,2001-02-02}", +#' "{2001-01,2001-02-02}", "2008-XX-31", "-0050-01-01")) +#' d +#' @name coerce_extrema +NULL + +#' @rdname coerce_extrema +#' @export +vmin <- function(..., na.rm = FALSE) UseMethod("vmin") + +#' @rdname coerce_extrema +#' @examples +#' vmin(d) +#' @export +vmin.mdate <- function(..., na.rm = TRUE){ + d <- list(...)[[1]] + dates <- d + if(na.rm) dates <- stats::na.omit(d) + dates <- stringi::stri_replace_all_regex(dates, "~|\\?", "") + dates <- .remove_post(dates) + dates <- .replace_earliest(dates) + mdate(dates) +} + +#' @rdname coerce_extrema +#' @examples +#' min(d) +#' @export +min.mdate <- function(..., na.rm = TRUE){ + d <- list(...)[[1]] + dates <- d + if(na.rm) dates <- stats::na.omit(d) + dates <- stringi::stri_replace_all_regex(dates, "~|\\?", "") + dates <- .remove_post(dates) + dates <- .replace_earliest(dates) + dates <- mdate(dates) + if(any(is_bce(dates))) + dates[is_bce(dates)][order(as.character(dates[is_bce(dates)]), + decreasing = TRUE)][1] else + dates[order(as.character(dates))==1] +} + +.remove_post <- function(dates){ + dates <- stringi::stri_replace_all_regex(dates, "\\.\\.$|,.*$|\\{", "") + dates <- stringi::stri_replace_all_regex(dates, "^(.+)\\.\\..*$", "$1") + dates <- stringi::stri_replace_all_regex(dates, "\\.\\.", "") + dates +} + +.replace_earliest <- function(dates){ + dates <- stringi::stri_replace_last_regex(dates, + "XX", "01") + dates <- stringi::stri_replace_last_regex(dates, + "^(.*[:digit:]{4})$", "$1-01-01") + dates <- stringi::stri_replace_last_regex(dates, + "^(.*[:digit:]{4})-([:digit:]{2})$", "$1-$2-01") + # dates <- stringi::stri_replace_last_regex(dates, + # "^-([:digit:]{4})-([:digit:]{2})$", "-$1-$2-01") + dates +} + +#' @rdname coerce_extrema +#' @export +vmax <- function(..., na.rm = FALSE) UseMethod("vmax") + +#' @rdname coerce_extrema +#' @examples +#' vmax(d) +#' @export +vmax.mdate <- function(..., na.rm = TRUE){ + d <- list(...)[[1]] + dates <- d + if(na.rm) dates <- stats::na.omit(d) + dates <- stringi::stri_replace_all_regex(dates, "~|\\?", "") + dates <- unspecified_months(dates) + dates <- .remove_pre(dates) + dates <- .replace_latest(dates) + mdate(dates) +} + +#' @rdname coerce_extrema +#' @examples +#' max(d) +#' @export +max.mdate <- function(..., na.rm = TRUE) { + + d <- list(...)[[1]] + dates <- stringi::stri_replace_all_regex(d, "~|\\?", "") + dates <- unspecified_months(dates) + dates <- .remove_pre(dates) + dates <- .replace_latest(dates) + dates <- mdate(dates) + if(all(is_bce(dates), na.rm = TRUE)) + dates[order(dates, decreasing = TRUE)][1] else + dates[!is_bce(dates)][order(as.character(dates[!is_bce(dates)]), + decreasing = TRUE)][1] + +} + +.remove_pre <- function(dates){ + dates <- stringi::stri_replace_all_regex(dates, "^\\.\\.|^.*,|\\}", "") + dates <- stringi::stri_replace_all_regex(dates, "^.*\\.\\.(.+)$", "$1") + dates <- stringi::stri_replace_all_regex(dates, "\\.\\.", "") + dates +} + +.replace_latest <- function(dates){ + dates <- stringi::stri_replace_last_regex(dates, + "^(.*[:digit:]{4})$", "$1-12-31") + dates <- stringi::stri_replace_last_regex(dates, + "-XX-", "-12-") + dates +} + diff --git a/R/coerce_resolve.R b/R/coerce_resolve.R deleted file mode 100644 index 7f9df80a..00000000 --- a/R/coerce_resolve.R +++ /dev/null @@ -1,253 +0,0 @@ -#' Resolves messy dates into a single value -#' @description -#' This collection of S3 methods 'resolve' messy dates into a single date -#' according to some explicit bias, -#' such as returning the minimum or maximum date, -#' the mean, median, or modal date, -#' or a random date from among the possible resolutions for each messy date. -#' If the date is not 'messy' (i.e. has no annotations) -#' then just that precise date is returned. -#' This can be useful for various descriptive or inferential projects. -#' @param ... a mdate object -#' @param na.rm Should NAs be removed? True by default. -#' @importFrom stringi stri_detect_regex stri_replace_all_regex -#' @return A single scalar or vector of dates -#' @examples -#' d <- as_messydate(c("2008-03-25", "?2012-02-27", "2001-01?", "2001~", -#' "2001-01-01..2001-02-02", "{2001-01-01,2001-02-02}", -#' "{2001-01,2001-02-02}", "2008-XX-31", "-0050-01-01")) -#' d -#' @name coerce_resolve -NULL - -#' @rdname coerce_resolve -#' @export -vmin <- function(..., na.rm = FALSE) UseMethod("vmin") - -#' @rdname coerce_resolve -#' @examples -#' vmin(d) -#' @export -vmin.mdate <- function(..., na.rm = TRUE){ - d <- list(...)[[1]] - dates <- d - if(na.rm) dates <- na.omit(d) - dates <- stringi::stri_replace_all_regex(dates, "~|\\?", "") - dates <- .remove_post(dates) - dates <- .replace_earliest(dates) - mdate(dates) -} - -#' @rdname coerce_resolve -#' @examples -#' min(d) -#' @export -min.mdate <- function(..., na.rm = TRUE){ - d <- list(...)[[1]] - dates <- d - if(na.rm) dates <- na.omit(d) - dates <- stringi::stri_replace_all_regex(dates, "~|\\?", "") - dates <- .remove_post(dates) - dates <- .replace_earliest(dates) - dates <- mdate(dates) - if(any(is_bce(dates))) - dates[is_bce(dates)][order(as.character(dates[is_bce(dates)]), - decreasing = TRUE)][1] else - dates[order(as.character(dates))==1] -} - -.remove_post <- function(dates){ - dates <- stringi::stri_replace_all_regex(dates, "\\.\\.$|,.*$|\\{", "") - dates <- stringi::stri_replace_all_regex(dates, "^(.+)\\.\\..*$", "$1") - dates <- stringi::stri_replace_all_regex(dates, "\\.\\.", "") - dates -} - -.replace_earliest <- function(dates){ - dates <- stringi::stri_replace_last_regex(dates, - "XX", "01") - dates <- stringi::stri_replace_last_regex(dates, - "^(.*[:digit:]{4})$", "$1-01-01") - dates <- stringi::stri_replace_last_regex(dates, - "^(.*[:digit:]{4})-([:digit:]{2})$", "$1-$2-01") - # dates <- stringi::stri_replace_last_regex(dates, - # "^-([:digit:]{4})-([:digit:]{2})$", "-$1-$2-01") - dates -} - -#' @rdname coerce_resolve -#' @export -vmax <- function(..., na.rm = FALSE) UseMethod("vmax") - -#' @rdname coerce_resolve -#' @examples -#' vmax(d) -#' @export -vmax.mdate <- function(..., na.rm = TRUE){ - d <- list(...)[[1]] - dates <- d - if(na.rm) dates <- na.omit(d) - dates <- stringi::stri_replace_all_regex(dates, "~|\\?", "") - dates <- unspecified_months(dates) - dates <- .remove_pre(dates) - dates <- .replace_latest(dates) - mdate(dates) -} - -#' @rdname coerce_resolve -#' @param recursive If recursive = TRUE, then the dates will be resolved -#' to a single date. If recursive = FALSE, then the dates will be resolved -#' to a vector the length of the original vector. -#' By default FALSE. -#' @examples -#' max(d) -#' @export -max.mdate <- function(..., na.rm = TRUE) { - - d <- list(...)[[1]] - dates <- stringi::stri_replace_all_regex(d, "~|\\?", "") - dates <- unspecified_months(dates) - dates <- .remove_pre(dates) - dates <- .replace_latest(dates) - dates <- mdate(dates) - if(all(is_bce(dates), na.rm = TRUE)) - dates[order(dates, decreasing = TRUE)][1] else - dates[!is_bce(dates)][order(as.character(dates[!is_bce(dates)]), - decreasing = TRUE)][1] - -} - -.remove_pre <- function(dates){ - dates <- stringi::stri_replace_all_regex(dates, "^\\.\\.|^.*,|\\}", "") - dates <- stringi::stri_replace_all_regex(dates, "^.*\\.\\.(.+)$", "$1") - dates <- stringi::stri_replace_all_regex(dates, "\\.\\.", "") - dates -} - -.replace_latest <- function(dates){ - dates <- stringi::stri_replace_last_regex(dates, - "^(.*[:digit:]{4})$", "$1-12-31") - dates <- stringi::stri_replace_last_regex(dates, - "-XX-", "-12-") - dates -} - -# #' @rdname coerce_resolve -# #' @importFrom stats median -# #' @examples -# #' median(d) -# #' @export -# median.mdate <- function(..., na.rm = TRUE, recursive = FALSE) { -# -# x <- as.list(...) -# y <- unlist(lapply(x, function(y) ifelse(!is_precise(y), expand(y), y)), -# recursive = recursive) -# if(recursive){ -# if (length(y) %% 2 == 0) { -# as.character(median(unlist(y[-1]), na.rm = na.rm)) -# } -# else{ -# as.character(median(y, na.rm = na.rm)) -# } -# } else { -# unlist(lapply(y, function(z) { -# if (length(z) %% 2 == 0) { -# z <- unlist(z[-1]) -# z <- as.character(median(z, na.rm = na.rm)) -# z -# } -# else{ -# z <- as.character(median(z, na.rm = na.rm)) -# z -# } -# }), recursive = FALSE) -# } -# } - -# #' @rdname coerce_resolve -# #' @param trim the fraction (0 to 0.5) of observations to be trimmed -# #' from each end of x before the mean is computed. -# #' Values of trim outside that range are taken as the nearest endpoint. -# #' @importFrom lubridate as_date -# #' @examples -# #' mean(d) -# #' @export -# mean.mdate <- function(..., trim = 0, na.rm = TRUE, recursive = FALSE) { -# x <- as.list(...) -# y <- unlist(lapply(x, function(y) ifelse(!is_precise(y), expand(y), y)), -# recursive = recursive) -# if(recursive){ -# if (length(y) > 1 & stringi::stri_detect_regex(y[1], "^-", negate = TRUE)) { -# y <- as.character(mean(as.Date(y), trim = 0, na.rm = TRUE)) -# } -# if (length(y) > 1 & stringi::stri_detect_regex(y[1], "^-")) { -# y <- paste0("-", as.character(mean(lubridate::as_date(y), -# trim = 0, na.rm = TRUE))) -# y <- zero_padding(y) -# } -# y -# } else { -# unlist(lapply(y, function(x) { -# if (length(x) > 1 & stringi::stri_detect_regex(x[1], "^-", negate = TRUE)) { -# x <- as.character(mean(as.Date(x), trim = 0, na.rm = TRUE)) -# } -# if (length(x) > 1 & stringi::stri_detect_regex(x[1], "^-")) { -# x <- paste0("-", as.character(mean(lubridate::as_date(x), -# trim = 0, na.rm = TRUE))) -# x <- zero_padding(x) -# } -# x -# }), recursive = FALSE) -# } -# } - -# #' @rdname coerce_resolve -# #' @export -# modal <- function(..., na.rm = FALSE, recursive = FALSE) UseMethod("modal") - -# #' @rdname coerce_resolve -# #' @examples -# #' modal(d) -# #' @export -# modal.mdate <- function(..., na.rm = TRUE, recursive = FALSE) { -# -# d <- list(...)[[1]] -# getmode <- function(v) { -# uniqv <- unique(v) -# uniqv[which.max(tabulate(match(v, uniqv)))] -# } -# d <- purrr::map_chr(expand(d), function(y) getmode(y)) -# if(recursive) d <- as.character(getmode(d)) -# d -# } - -#' @rdname coerce_resolve -#' @param size a non-negative integer giving the number of items to choose. -#' @param replace should sampling be with replacement? -#' @param prob a vector of probability weights -#' for obtaining the elements of the vector being sampled. -#' @export -random <- function(..., size, - replace = FALSE, - prob = NULL, recursive = FALSE) UseMethod("random") - -#' @rdname coerce_resolve -#' @examples -#' random(d) -#' @export -random.mdate <- function(..., - size, - replace = FALSE, - prob = NULL, recursive = FALSE) { - x <- as.list(...) - y <- unlist(lapply(x, function(y) ifelse(!is_precise(y), expand(y), y)), - recursive = recursive) - if(recursive){ - as.character(sample(y, size = 1)) - } else { - unlist(lapply(y, function(x) { - if (length(x) > 1) x <- as.character(sample(x, size = 1)) - x - }), recursive = FALSE) - } -} diff --git a/man/coerce_resolve.Rd b/man/coerce_extrema.Rd similarity index 59% rename from man/coerce_resolve.Rd rename to man/coerce_extrema.Rd index abfb6902..5183e5df 100644 --- a/man/coerce_resolve.Rd +++ b/man/coerce_extrema.Rd @@ -1,16 +1,14 @@ % Generated by roxygen2: do not edit by hand -% Please edit documentation in R/coerce_resolve.R -\name{coerce_resolve} -\alias{coerce_resolve} +% Please edit documentation in R/coerce_extrema.R +\name{coerce_extrema} +\alias{coerce_extrema} \alias{vmin} \alias{vmin.mdate} \alias{min.mdate} \alias{vmax} \alias{vmax.mdate} \alias{max.mdate} -\alias{random} -\alias{random.mdate} -\title{Resolves messy dates into a single value} +\title{Resolves messy dates into an extrema} \usage{ vmin(..., na.rm = FALSE) @@ -23,27 +21,11 @@ vmax(..., na.rm = FALSE) \method{vmax}{mdate}(..., na.rm = TRUE) \method{max}{mdate}(..., na.rm = TRUE) - -random(..., size, replace = FALSE, prob = NULL, recursive = FALSE) - -\method{random}{mdate}(..., size, replace = FALSE, prob = NULL, recursive = FALSE) } \arguments{ \item{...}{a mdate object} \item{na.rm}{Should NAs be removed? True by default.} - -\item{size}{a non-negative integer giving the number of items to choose.} - -\item{replace}{should sampling be with replacement?} - -\item{prob}{a vector of probability weights -for obtaining the elements of the vector being sampled.} - -\item{recursive}{If recursive = TRUE, then the dates will be resolved -to a single date. If recursive = FALSE, then the dates will be resolved -to a vector the length of the original vector. -By default FALSE.} } \value{ A single scalar or vector of dates @@ -67,5 +49,4 @@ vmin(d) min(d) vmax(d) max(d) -random(d) } diff --git a/pkgdown/_pkgdown.yml b/pkgdown/_pkgdown.yml index a25d93d0..9c83da1d 100644 --- a/pkgdown/_pkgdown.yml +++ b/pkgdown/_pkgdown.yml @@ -45,7 +45,7 @@ reference: desc: "These functions coerce dates from the `mdate` class into a single `Date`:" contents: - starts_with("as\\.") - - coerce_resolve + - coerce_extrema - title: "Manipulation" desc: "These functions expand or contract objects of `mdate` class from/into a list:" contents: From 22f512aabad721f8682a7b4d97404773c5c51b15 Mon Sep 17 00:00:00 2001 From: James Hollway Date: Fri, 7 Mar 2025 16:28:07 +0100 Subject: [PATCH 22/27] Added median() and vmedian() back in, works faster --- R/coerce_from_messydate.R | 52 +++++++++++++++------------------ R/coerce_tendency.R | 60 +++++++++++++++++++++++++++++++++++++++ man/coerce_from.Rd | 16 ++++------- pkgdown/_pkgdown.yml | 1 + 4 files changed, 90 insertions(+), 39 deletions(-) create mode 100644 R/coerce_tendency.R diff --git a/R/coerce_from_messydate.R b/R/coerce_from_messydate.R index 4671f593..0d2ec0a4 100644 --- a/R/coerce_from_messydate.R +++ b/R/coerce_from_messydate.R @@ -1,45 +1,39 @@ #' Coercion from messy dates -#' #' @description -#' These functions coerce objects of `mdate` class to -#' common date classes such as `Date`, `POSIXct`, and `POSIXlt`. -#' Since `mdate` objects can hold multiple individual dates, -#' however, an additional function must be passed as an argument -#' so that these functions know how to coerce resolve multiple dates -#' into a single date. +#' These functions coerce objects of `mdate` class to +#' common date classes such as `Date`, `POSIXct`, and `POSIXlt`. +#' Since `mdate` objects can hold multiple individual dates, +#' however, an additional function must be passed as an argument +#' so that these functions know how to coerce resolve multiple dates +#' into a single date. #' -#' For example, one might wish to use the earliest possible date -#' in any ranges of dates (`min`), the latest possible date (`max`), -#' some notion of a central tendency (`mean`, `median`, or `modal`), -#' or even a `random` selection from among the candidate dates. +#' For example, one might wish to use the earliest possible date +#' in any ranges of dates (`min`), the latest possible date (`max`), +#' some notion of a central tendency (`mean`, `median`, or `modal`), +#' or even a `random` selection from among the candidate dates. #' -#' These functions then, building on `expand()` and the resolve functions, -#' are particularly useful in converting back out of the `mdate` class -#' for use with existing methods and models, -#' especially for checking the robustness of results. +#' These functions then, building on `expand()` and the resolve functions, +#' are particularly useful in converting back out of the `mdate` class +#' for use with existing methods and models, +#' especially for checking the robustness of results. #' @param x A `mdate` object #' @param ... Arguments passed on to the S3 generics. #' @param FUN A function that can be used to resolve expanded messy dates -#' into a single date. -#' For example, `min()`, `max()`, `mean()`, `median()`, -#' `modal()`, and `random()`. +#' into a single date. +#' For example, `min()`, `max()`, `mean()`, `median()`, +#' `modal()`, and `random()`. #' @return A date object of `Date`, `POSIXct`, or `POSIXlt` class #' @name coerce_from NULL -#> NULL #' @rdname coerce_from #' @examples -#' as.Date(as_messydate("2012-01"), min) -#' as.Date(as_messydate("2012-01-01"), mean) -#' as.Date(as_messydate("2012-01"), max) -#' as.Date(as_messydate("2012-01"), median) -#' as.Date(as_messydate("2012-01"), modal) -#' as.Date(as_messydate("2012-01"), random) -#' as.Date(as_messydate("1000 BC"), max) -#' as.Date(as_messydate("1000 BC"), mean) -#' as.Date(as_messydate("1000 BC"), median) -#' as.Date(as_messydate(c("-1000", "2020")), min) +#' as.Date(as_messydate("2012-01"), FUN = vmin) +#' as.Date(as_messydate("2012-01"), FUN = vmax) +#' as.Date(as_messydate("2012-01"), FUN = vmedian) +#' as.Date(as_messydate("1000 BC"), FUN = vmax) +#' as.Date(as_messydate("1000 BC"), FUN = vmedian) +#' as.Date(as_messydate(c("-1000", "2020")), FUN = vmin) #' @export as.Date.mdate <- function(x, FUN = vmin, ...) { # # fix argument ordering issues diff --git a/R/coerce_tendency.R b/R/coerce_tendency.R new file mode 100644 index 00000000..c2539138 --- /dev/null +++ b/R/coerce_tendency.R @@ -0,0 +1,60 @@ +#' Resolves messy dates into a central tendency +#' @description +#' These functions resolve messydates by their central tendency. +#' While the functions `mean()`, `median()`, and `modal()` summarise the +#' vector to a single value, `v*()` versions return a vector of the same length. +#' @name coerce_tendency +#' @inheritParams coerce_extrema +#' @examples +#' d <- as_messydate(c("2008-03-25", "?2012-02-27", "2001-01?", "2001~", +#' "2001-01-01..2001-02-02", "{2001-01-01,2001-02-02}", +#' "{2001-01,2001-02-02}", "2008-XX-31", "-0050-01-01")) +#' d +NULL + +#' @rdname coerce_tendency +#' @importFrom stats median +#' @examples +#' median(d) +#' @export +median.mdate <- function(..., na.rm = TRUE) { + + x <- list(...)[[1]] + y <- unlist(expand(x)) + y <- .order_messy(y) + median(y, na.rm = na.rm) +} + +.order_messy <- function(y){ + if(any(is_bce(y))){ + bcey <- y[is_bce(y)] + cey <- y[!is_bce(y)] + c(bcey[order(bcey, decreasing = TRUE)], + cey[order(cey)]) + } else { + y[order(y)] + } +} + +#' @rdname coerce_tendency +#' @export +vmedian <- function(..., na.rm = TRUE) UseMethod("vmedian") + +#' @rdname coerce_tendency +#' @importFrom stats median +#' @examples +#' vmedian(d) +#' @export +vmedian.mdate <- function(..., na.rm = TRUE) { + + x <- as.list(...) + vapply(x, function(y){ + z <- suppressWarnings(median(y, na.rm = na.rm)) + if(is.na(z)){ + if(length(expand(y)[[1]]) %% 2 == 0) + z <- median(.order_messy(expand(y)[[1]])[-1]) + } + z + }, FUN.VALUE = character(1)) +} + diff --git a/man/coerce_from.Rd b/man/coerce_from.Rd index afd8a595..46c02783 100644 --- a/man/coerce_from.Rd +++ b/man/coerce_from.Rd @@ -45,14 +45,10 @@ for use with existing methods and models, especially for checking the robustness of results. } \examples{ -as.Date(as_messydate("2012-01"), min) -as.Date(as_messydate("2012-01-01"), mean) -as.Date(as_messydate("2012-01"), max) -as.Date(as_messydate("2012-01"), median) -as.Date(as_messydate("2012-01"), modal) -as.Date(as_messydate("2012-01"), random) -as.Date(as_messydate("1000 BC"), max) -as.Date(as_messydate("1000 BC"), mean) -as.Date(as_messydate("1000 BC"), median) -as.Date(as_messydate(c("-1000", "2020")), min) +as.Date(as_messydate("2012-01"), FUN = vmin) +as.Date(as_messydate("2012-01"), FUN = vmax) +as.Date(as_messydate("2012-01"), FUN = vmedian) +as.Date(as_messydate("1000 BC"), FUN = vmax) +as.Date(as_messydate("1000 BC"), FUN = vmedian) +as.Date(as_messydate(c("-1000", "2020")), FUN = vmin) } diff --git a/pkgdown/_pkgdown.yml b/pkgdown/_pkgdown.yml index 9c83da1d..946906ce 100644 --- a/pkgdown/_pkgdown.yml +++ b/pkgdown/_pkgdown.yml @@ -46,6 +46,7 @@ reference: contents: - starts_with("as\\.") - coerce_extrema + - coerce_tendency - title: "Manipulation" desc: "These functions expand or contract objects of `mdate` class from/into a list:" contents: From d20dc92b99d6579291b20083904e60ed5b854483 Mon Sep 17 00:00:00 2001 From: James Hollway Date: Fri, 7 Mar 2025 16:30:08 +0100 Subject: [PATCH 23/27] Added mean.mdate() and vmean.mdate() back in, works better for BCE dates --- NAMESPACE | 7 ++++++ R/coerce_from_messydate.R | 1 + R/coerce_tendency.R | 31 +++++++++++++++++++++++++ man/coerce_from.Rd | 1 + man/coerce_tendency.Rd | 49 +++++++++++++++++++++++++++++++++++++++ 5 files changed, 89 insertions(+) create mode 100644 man/coerce_tendency.Rd diff --git a/NAMESPACE b/NAMESPACE index d06b3fcf..e6fd2574 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -33,6 +33,8 @@ S3method(as_messydate,mdate) S3method(as_messydate,numeric) S3method(c,mdate) S3method(max,mdate) +S3method(mean,mdate) +S3method(median,mdate) S3method(messyduration,character) S3method(messyduration,mdate) S3method(min,mdate) @@ -42,6 +44,8 @@ S3method(random,mdate) S3method(rep,mdate) S3method(seq,mdate) S3method(vmax,mdate) +S3method(vmean,mdate) +S3method(vmedian,mdate) S3method(vmin,mdate) export("%><%") export("%>=<%") @@ -82,6 +86,8 @@ export(random) export(validate_messydate) export(validate_messyduration) export(vmax) +export(vmean) +export(vmedian) export(vmin) export(year) importFrom(dplyr,first) @@ -94,6 +100,7 @@ importFrom(lubridate,years) importFrom(lubridate,ymd) importFrom(purrr,map) importFrom(purrr,pmap_chr) +importFrom(stats,median) importFrom(stringi,stri_detect_regex) importFrom(stringi,stri_extract_all_regex) importFrom(stringi,stri_replace_all_fixed) diff --git a/R/coerce_from_messydate.R b/R/coerce_from_messydate.R index 0d2ec0a4..acc93fb0 100644 --- a/R/coerce_from_messydate.R +++ b/R/coerce_from_messydate.R @@ -29,6 +29,7 @@ NULL #' @rdname coerce_from #' @examples #' as.Date(as_messydate("2012-01"), FUN = vmin) +#' as.Date(as_messydate("2012-01-01"), FUN = vmean) #' as.Date(as_messydate("2012-01"), FUN = vmax) #' as.Date(as_messydate("2012-01"), FUN = vmedian) #' as.Date(as_messydate("1000 BC"), FUN = vmax) diff --git a/R/coerce_tendency.R b/R/coerce_tendency.R index c2539138..84a75241 100644 --- a/R/coerce_tendency.R +++ b/R/coerce_tendency.R @@ -58,3 +58,34 @@ vmedian.mdate <- function(..., na.rm = TRUE) { }, FUN.VALUE = character(1)) } +#' @rdname coerce_tendency +#' @param trim the fraction (0 to 0.5) of observations to be trimmed +#' from each end of x before the mean is computed. +#' Values of trim outside that range are taken as the nearest endpoint. +#' @importFrom lubridate as_date +#' @examples +#' mean(d) +#' @export +mean.mdate <- function(..., trim = 0, na.rm = TRUE) { + + x <- list(...)[[1]] + y <- unlist(expand(x)) + as.character(lubridate::as_date(mean(as.double(lubridate::as_date(y))))) +} + + +#' @rdname coerce_tendency +#' @export +vmean <- function(..., na.rm = TRUE) UseMethod("vmean") + +#' @rdname coerce_tendency +#' @examples +#' vmean(d) +#' @export +vmean.mdate <- function(..., trim = 0, na.rm = TRUE) { + x <- list(...)[[1]] + vapply(expand(x), function(y) + as.character(lubridate::as_date(mean(as.double(lubridate::as_date(y))))), + FUN.VALUE = character(1)) +} + diff --git a/man/coerce_from.Rd b/man/coerce_from.Rd index 46c02783..290fbb15 100644 --- a/man/coerce_from.Rd +++ b/man/coerce_from.Rd @@ -46,6 +46,7 @@ especially for checking the robustness of results. } \examples{ as.Date(as_messydate("2012-01"), FUN = vmin) +as.Date(as_messydate("2012-01-01"), FUN = vmean) as.Date(as_messydate("2012-01"), FUN = vmax) as.Date(as_messydate("2012-01"), FUN = vmedian) as.Date(as_messydate("1000 BC"), FUN = vmax) diff --git a/man/coerce_tendency.Rd b/man/coerce_tendency.Rd new file mode 100644 index 00000000..8edb6995 --- /dev/null +++ b/man/coerce_tendency.Rd @@ -0,0 +1,49 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/coerce_tendency.R +\name{coerce_tendency} +\alias{coerce_tendency} +\alias{median.mdate} +\alias{vmedian} +\alias{vmedian.mdate} +\alias{mean.mdate} +\alias{vmean} +\alias{vmean.mdate} +\title{Resolves messy dates into a central tendency} +\usage{ +\method{median}{mdate}(..., na.rm = TRUE) + +vmedian(..., na.rm = TRUE) + +\method{vmedian}{mdate}(..., na.rm = TRUE) + +\method{mean}{mdate}(..., trim = 0, na.rm = TRUE) + +vmean(..., na.rm = TRUE) + +\method{vmean}{mdate}(..., trim = 0, na.rm = TRUE) + +} +\arguments{ +\item{...}{a mdate object} + +\item{na.rm}{Should NAs be removed? True by default.} + +\item{trim}{the fraction (0 to 0.5) of observations to be trimmed +from each end of x before the mean is computed. +Values of trim outside that range are taken as the nearest endpoint.} +} +\description{ +These functions resolve messydates by their central tendency. +While the functions \code{mean()}, \code{median()}, and \code{modal()} summarise the +vector to a single value, \verb{v*()} versions return a vector of the same length. +} +\examples{ +d <- as_messydate(c("2008-03-25", "?2012-02-27", "2001-01?", "2001~", + "2001-01-01..2001-02-02", "{2001-01-01,2001-02-02}", + "{2001-01,2001-02-02}", "2008-XX-31", "-0050-01-01")) +d +median(d) +vmedian(d) +mean(d) +vmean(d) +} From 4a852d88886a5963523346bf2845cbcedc27e689 Mon Sep 17 00:00:00 2001 From: James Hollway Date: Fri, 7 Mar 2025 16:31:14 +0100 Subject: [PATCH 24/27] Added modal.mdate() and vmodal.mdate() back in --- NAMESPACE | 4 ++++ R/coerce_from_messydate.R | 1 + R/coerce_tendency.R | 33 +++++++++++++++++++++++++++++++++ man/coerce_from.Rd | 1 + man/coerce_tendency.Rd | 14 ++++++++++++++ 5 files changed, 53 insertions(+) diff --git a/NAMESPACE b/NAMESPACE index e6fd2574..9dac0dc0 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -38,6 +38,7 @@ S3method(median,mdate) S3method(messyduration,character) S3method(messyduration,mdate) S3method(min,mdate) +S3method(modal,mdate) S3method(print,mdate) S3method(print,mdates_duration) S3method(random,mdate) @@ -47,6 +48,7 @@ S3method(vmax,mdate) S3method(vmean,mdate) S3method(vmedian,mdate) S3method(vmin,mdate) +S3method(vmodal,mdate) export("%><%") export("%>=<%") export("%g%") @@ -76,6 +78,7 @@ export(md_multiset) export(md_union) export(mdate) export(messyduration) +export(modal) export(month) export(new_messydate) export(new_messyduration) @@ -89,6 +92,7 @@ export(vmax) export(vmean) export(vmedian) export(vmin) +export(vmodal) export(year) importFrom(dplyr,first) importFrom(dplyr,last) diff --git a/R/coerce_from_messydate.R b/R/coerce_from_messydate.R index acc93fb0..1be1a46d 100644 --- a/R/coerce_from_messydate.R +++ b/R/coerce_from_messydate.R @@ -32,6 +32,7 @@ NULL #' as.Date(as_messydate("2012-01-01"), FUN = vmean) #' as.Date(as_messydate("2012-01"), FUN = vmax) #' as.Date(as_messydate("2012-01"), FUN = vmedian) +#' as.Date(as_messydate("2012-01"), FUN = vmodal) #' as.Date(as_messydate("1000 BC"), FUN = vmax) #' as.Date(as_messydate("1000 BC"), FUN = vmedian) #' as.Date(as_messydate(c("-1000", "2020")), FUN = vmin) diff --git a/R/coerce_tendency.R b/R/coerce_tendency.R index 84a75241..ca26f594 100644 --- a/R/coerce_tendency.R +++ b/R/coerce_tendency.R @@ -89,3 +89,36 @@ vmean.mdate <- function(..., trim = 0, na.rm = TRUE) { FUN.VALUE = character(1)) } +#' @rdname coerce_tendency +#' @export +modal <- function(..., na.rm = TRUE) UseMethod("modal") + +#' @rdname coerce_tendency +#' @examples +#' modal(d) +#' @export +modal.mdate <- function(..., na.rm = TRUE) { + d <- list(...)[[1]] + .getmode(unlist(expand(d))) +} + +.getmode <- function(v) { + uniqv <- unique(v) + uniqv[which.max(tabulate(match(v, uniqv)))] +} + +#' @rdname coerce_tendency +#' @export +vmodal <- function(..., na.rm = TRUE) UseMethod("vmodal") + +#' @rdname coerce_tendency +#' @examples +#' vmodal(d) +#' @export +vmodal.mdate <- function(..., na.rm = TRUE) { + + d <- list(...)[[1]] + d <- purrr::map_chr(expand(d), function(y) .getmode(y)) + d +} + diff --git a/man/coerce_from.Rd b/man/coerce_from.Rd index 290fbb15..5983532f 100644 --- a/man/coerce_from.Rd +++ b/man/coerce_from.Rd @@ -49,6 +49,7 @@ as.Date(as_messydate("2012-01"), FUN = vmin) as.Date(as_messydate("2012-01-01"), FUN = vmean) as.Date(as_messydate("2012-01"), FUN = vmax) as.Date(as_messydate("2012-01"), FUN = vmedian) +as.Date(as_messydate("2012-01"), FUN = vmodal) as.Date(as_messydate("1000 BC"), FUN = vmax) as.Date(as_messydate("1000 BC"), FUN = vmedian) as.Date(as_messydate(c("-1000", "2020")), FUN = vmin) diff --git a/man/coerce_tendency.Rd b/man/coerce_tendency.Rd index 8edb6995..e7b5a573 100644 --- a/man/coerce_tendency.Rd +++ b/man/coerce_tendency.Rd @@ -8,6 +8,10 @@ \alias{mean.mdate} \alias{vmean} \alias{vmean.mdate} +\alias{modal} +\alias{modal.mdate} +\alias{vmodal} +\alias{vmodal.mdate} \title{Resolves messy dates into a central tendency} \usage{ \method{median}{mdate}(..., na.rm = TRUE) @@ -22,6 +26,14 @@ vmean(..., na.rm = TRUE) \method{vmean}{mdate}(..., trim = 0, na.rm = TRUE) +modal(..., na.rm = TRUE) + +\method{modal}{mdate}(..., na.rm = TRUE) + +vmodal(..., na.rm = TRUE) + +\method{vmodal}{mdate}(..., na.rm = TRUE) + } \arguments{ \item{...}{a mdate object} @@ -46,4 +58,6 @@ median(d) vmedian(d) mean(d) vmean(d) +modal(d) +vmodal(d) } From ee4a9dae02ba8d0532fd1f6ba8b3cde828f5062b Mon Sep 17 00:00:00 2001 From: James Hollway Date: Fri, 7 Mar 2025 16:32:11 +0100 Subject: [PATCH 25/27] Added random.mdate() and vrandom.mdate() back in --- NAMESPACE | 3 +++ R/coerce_from_messydate.R | 1 + R/coerce_tendency.R | 39 ++++++++++++++++++++++++++++ man/coerce_from.Rd | 1 + man/coerce_tendency.Rd | 13 ++++++++++ tests/testthat/test-coerce_resolve.R | 2 +- 6 files changed, 58 insertions(+), 1 deletion(-) diff --git a/NAMESPACE b/NAMESPACE index 9dac0dc0..0a168c08 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -41,6 +41,7 @@ S3method(min,mdate) S3method(modal,mdate) S3method(print,mdate) S3method(print,mdates_duration) +S3method(random,character) S3method(random,mdate) S3method(rep,mdate) S3method(seq,mdate) @@ -49,6 +50,7 @@ S3method(vmean,mdate) S3method(vmedian,mdate) S3method(vmin,mdate) S3method(vmodal,mdate) +S3method(vrandom,mdate) export("%><%") export("%>=<%") export("%g%") @@ -93,6 +95,7 @@ export(vmean) export(vmedian) export(vmin) export(vmodal) +export(vrandom) export(year) importFrom(dplyr,first) importFrom(dplyr,last) diff --git a/R/coerce_from_messydate.R b/R/coerce_from_messydate.R index 1be1a46d..c3cd010a 100644 --- a/R/coerce_from_messydate.R +++ b/R/coerce_from_messydate.R @@ -33,6 +33,7 @@ NULL #' as.Date(as_messydate("2012-01"), FUN = vmax) #' as.Date(as_messydate("2012-01"), FUN = vmedian) #' as.Date(as_messydate("2012-01"), FUN = vmodal) +#' as.Date(as_messydate("2012-01"), FUN = vrandom) #' as.Date(as_messydate("1000 BC"), FUN = vmax) #' as.Date(as_messydate("1000 BC"), FUN = vmedian) #' as.Date(as_messydate(c("-1000", "2020")), FUN = vmin) diff --git a/R/coerce_tendency.R b/R/coerce_tendency.R index ca26f594..26ebb66b 100644 --- a/R/coerce_tendency.R +++ b/R/coerce_tendency.R @@ -122,3 +122,42 @@ vmodal.mdate <- function(..., na.rm = TRUE) { d } +#' @rdname coerce_tendency +#' @export +random <- function(..., na.rm = TRUE) UseMethod("random") + +#' @rdname coerce_tendency +#' @examples +#' random(d) +#' @export +random.mdate <- function(..., na.rm = TRUE) { + x <- list(...)[[1]] + y <- unlist(expand(x)) + if(na.rm) y <- y[!is.na(y)] + sample(y, 1) +} + +#' @export +random.character <- function(..., na.rm = TRUE) { + y <- list(...)[[1]] + # y <- suppressMessages(unlist(expand(x))) + if(na.rm) y <- y[!is.na(y)] + sample(y, 1) +} + +#' @rdname coerce_tendency +#' @export +vrandom <- function(..., na.rm = TRUE) UseMethod("vrandom") + +#' @rdname coerce_tendency +#' @examples +#' vrandom(d) +#' @export +vrandom.mdate <- function(..., na.rm = TRUE) { + + x <- as.list(...) + vapply(x, function(y){ + random(expand(y)[[1]], na.rm = na.rm) + }, FUN.VALUE = character(1)) + +} diff --git a/man/coerce_from.Rd b/man/coerce_from.Rd index 5983532f..98e7ab34 100644 --- a/man/coerce_from.Rd +++ b/man/coerce_from.Rd @@ -50,6 +50,7 @@ as.Date(as_messydate("2012-01-01"), FUN = vmean) as.Date(as_messydate("2012-01"), FUN = vmax) as.Date(as_messydate("2012-01"), FUN = vmedian) as.Date(as_messydate("2012-01"), FUN = vmodal) +as.Date(as_messydate("2012-01"), FUN = vrandom) as.Date(as_messydate("1000 BC"), FUN = vmax) as.Date(as_messydate("1000 BC"), FUN = vmedian) as.Date(as_messydate(c("-1000", "2020")), FUN = vmin) diff --git a/man/coerce_tendency.Rd b/man/coerce_tendency.Rd index e7b5a573..2c01ac89 100644 --- a/man/coerce_tendency.Rd +++ b/man/coerce_tendency.Rd @@ -12,6 +12,10 @@ \alias{modal.mdate} \alias{vmodal} \alias{vmodal.mdate} +\alias{random} +\alias{random.mdate} +\alias{vrandom} +\alias{vrandom.mdate} \title{Resolves messy dates into a central tendency} \usage{ \method{median}{mdate}(..., na.rm = TRUE) @@ -34,6 +38,13 @@ vmodal(..., na.rm = TRUE) \method{vmodal}{mdate}(..., na.rm = TRUE) +random(..., na.rm = TRUE) + +\method{random}{mdate}(..., na.rm = TRUE) + +vrandom(..., na.rm = TRUE) + +\method{vrandom}{mdate}(..., na.rm = TRUE) } \arguments{ \item{...}{a mdate object} @@ -60,4 +71,6 @@ mean(d) vmean(d) modal(d) vmodal(d) +random(d) +vrandom(d) } diff --git a/tests/testthat/test-coerce_resolve.R b/tests/testthat/test-coerce_resolve.R index a221ee16..3b6f67ac 100644 --- a/tests/testthat/test-coerce_resolve.R +++ b/tests/testthat/test-coerce_resolve.R @@ -29,7 +29,7 @@ test_that("Max resolving works properly", { # }) test_that("Random resolving works properly", { - expect_length(random(test_dates), 3) + expect_length(vrandom(test_dates), 3) }) # test_that("Resolve dates works properly for date ranges", { From 381e4b7911fd2509ef384cbf18d2c44508ebcb97 Mon Sep 17 00:00:00 2001 From: James Hollway Date: Fri, 7 Mar 2025 16:32:47 +0100 Subject: [PATCH 26/27] Fixed timezone bug in as.POSIXct.mdate(), now uses "UTC" by default --- R/coerce_from_messydate.R | 4 ++++ man/coerce_from.Rd | 5 +++++ 2 files changed, 9 insertions(+) diff --git a/R/coerce_from_messydate.R b/R/coerce_from_messydate.R index c3cd010a..e4511c0b 100644 --- a/R/coerce_from_messydate.R +++ b/R/coerce_from_messydate.R @@ -53,6 +53,10 @@ as.Date.mdate <- function(x, FUN = vmin, ...) { } #' @rdname coerce_from +#' @param tz Character string specifying the time zone for the conversion, +#' if required. +#' By default "UTC" (Universal Time Coordinated), equivalent to GMT. +#' If "" then the current time zone is used. #' @export as.POSIXct.mdate <- function(x, tz = "UTC", FUN = vmin, ...) { # if (missing(FUN) & length(list(...)) > 0) FUN <- list(...)[[1]] diff --git a/man/coerce_from.Rd b/man/coerce_from.Rd index 98e7ab34..f00fcae7 100644 --- a/man/coerce_from.Rd +++ b/man/coerce_from.Rd @@ -22,6 +22,11 @@ For example, \code{min()}, \code{max()}, \code{mean()}, \code{median()}, \code{modal()}, and \code{random()}.} \item{...}{Arguments passed on to the S3 generics.} + +\item{tz}{Character string specifying the time zone for the conversion, +if required. +By default "UTC" (Universal Time Coordinated), equivalent to GMT. +If "" then the current time zone is used.} } \value{ A date object of \code{Date}, \code{POSIXct}, or \code{POSIXlt} class From 7f28078cf93c86c9517e7514e0f3c94f862a292e Mon Sep 17 00:00:00 2001 From: James Hollway Date: Fri, 7 Mar 2025 17:36:04 +0100 Subject: [PATCH 27/27] version updates --- DESCRIPTION | 4 +-- NEWS.md | 27 +++++++++++++++++ README.Rmd | 4 +-- README.md | 78 ++++++++++++++++++++++++------------------------ cran-comments.md | 1 + 5 files changed, 71 insertions(+), 43 deletions(-) diff --git a/DESCRIPTION b/DESCRIPTION index 82a65a96..7450204b 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -9,8 +9,8 @@ Description: Contains a set of tools for constructing and coercing date ranges, and sets of dates. This is useful for describing and analysing temporal information, whether historical or recent, where date precision may vary. -Version: 0.5.1 -Date: 2025-02-25 +Version: 0.5.2 +Date: 2025-03-07 Authors@R: c(person(given = "James", family = "Hollway", diff --git a/NEWS.md b/NEWS.md index f44fb831..5c53f559 100644 --- a/NEWS.md +++ b/NEWS.md @@ -1,3 +1,30 @@ +# messydates 0.5.2 + +## Package + +- Moved `mreport()` to `{manydata}` +- Consolidated and renamed scripts internally + +## Coerce to + +- Fixed pkgdown#2855 by fixing how as_messydate methods interpret infinite dates +- Fixed time zone defaults in `as.POSIXct.mdate()` and `as.POSIXlt.mdate()` +- Fixed set bug in `validate_messydate()` + +## Coerce from + +- Renamed `as.numeric()` to `as.double()` to fix S3 dispatching +- Separated extrema functions into `min.mdate()` and `max.mdate()` for summaries +and `vmin.mdate()` and `vmax.mdate()` for vector coercion +- Separated tendency functions into `mean.mdate()`, `median.mdate()`, and `modal.mdate()` for summaries +and `vmean.mdate()`, `vmedian.mdate()`, and `vmodal.mdate()` for vector coercion +- Vector coercion previously in `random.mdate()` now in `vrandom.mdate()` +- Improved how coercion/resolution functions handle BCE dates + +## Manipulation + +- Fixed how `precision()` calculates precision + # messydates 0.5.1 ## Package diff --git a/README.Rmd b/README.Rmd index 96b15820..6553642b 100644 --- a/README.Rmd +++ b/README.Rmd @@ -3,7 +3,7 @@ output: github_document always_allow_html: true --- -# messydates +# messydates messydates package logo ```{r, include = FALSE} knitr::opts_chunk$set( @@ -179,7 +179,7 @@ allowing researchers to use messy dates in an analytic strategy that uses any ot Please see the cheat sheet and [the messydates website](https://globalgov.github.io/messydates/) for more information about how to use `{messydates}`. - +messydates cheatsheet ## Installation diff --git a/README.md b/README.md index b95c1500..55729e4c 100644 --- a/README.md +++ b/README.md @@ -1,5 +1,5 @@ -# messydates +# messydates messydates package logo @@ -413,13 +413,13 @@ max 2012-01-01 -2012-01-01 +-033-01-01 -2012-01-01 +2012-10-31 -2012-01-01 +2599-12-31 @@ -427,10 +427,10 @@ max 2599-12-31 -2599-12-31 +-033-01-01 -2599-12-31 +2012-10-31 2599-12-31 @@ -441,13 +441,13 @@ max 0476 -0476-01-01 +-033-01-01 -0476-01-01 +2012-10-31 -0476-01-01 +2599-12-31 @@ -458,10 +458,10 @@ max -033-01-01 --033-01-01 +2012-10-31 --033-01-01 +2599-12-31 @@ -469,13 +469,13 @@ max 2012-02-01 -2012-02-01 +-033-01-01 -2012-02-01 +2012-10-31 -2012-02-01 +2599-12-31 @@ -483,13 +483,13 @@ max 2012-10-31 -2012-10-31 +-033-01-01 2012-10-31 -2012-10-31 +2599-12-31 @@ -497,13 +497,13 @@ max 2012-10-31 -2012-10-31 +-033-01-01 2012-10-31 -2012-10-31 +2599-12-31 @@ -511,13 +511,13 @@ max 2012-10-31 -2012-10-31 +-033-01-01 2012-10-31 -2012-10-31 +2599-12-31 @@ -525,13 +525,13 @@ max 2012-01-12~ -2012-01-12 +-033-01-01 -2012-01-12 +2012-10-31 -2012-01-12 +2599-12-31 @@ -539,13 +539,13 @@ max 2012-01-01? -2012-01-01 +-033-01-01 -2012-01-01 +2012-10-31 -2012-01-01 +2599-12-31 @@ -553,13 +553,13 @@ max 2012-01 -2012-01-01 +-033-01-01 -2012-01-01 +2012-10-31 -2012-01-01 +2599-12-31 @@ -567,13 +567,13 @@ max ..2012-01-12 -2012-01-12 +-033-01-01 -2012-01-12 +2012-10-31 -2012-01-12 +2599-12-31 @@ -581,13 +581,13 @@ max 2012-11-01..2012-12-01 -2012-11-01 +-033-01-01 -2012-11-01 +2012-10-31 -2012-11-01 +2599-12-31 @@ -595,13 +595,13 @@ max {2012-05-26,2012-11-19,2012-12-04} -2012-05-26 +-033-01-01 -2012-05-26 +2012-10-31 -2012-05-26 +2599-12-31 @@ -630,7 +630,7 @@ Please see the cheat sheet and [the messydates website](https://globalgov.github.io/messydates/) for more information about how to use `{messydates}`. - +messydates cheatsheet ## Installation diff --git a/cran-comments.md b/cran-comments.md index 71ff5754..8e77fc07 100644 --- a/cran-comments.md +++ b/cran-comments.md @@ -10,3 +10,4 @@ 0 errors | 0 warnings | 0 notes - Fixed redirected URL issue from previous submission +- Fixed most reverse dependency issues, though some changes in manydata will follow