From 6415172d8000b8a06072b57f221e57f310c56b9b Mon Sep 17 00:00:00 2001 From: Sean Davis Date: Mon, 2 Jun 2025 16:44:56 -0400 Subject: [PATCH 1/5] add cfde-opportunity-numbers fetcher This should close Add function to return CFDE funding opportunity numbers #16. --- NAMESPACE | 4 ++ R/cfde_opportunity_numbers.R | 39 +++++++++++++++++++ man/cfde_opportunity_numbers.Rd | 36 +++++++++++++++++ .../testthat/test_cfde_opportunity_numbers.R | 12 ++++++ 4 files changed, 91 insertions(+) create mode 100644 R/cfde_opportunity_numbers.R create mode 100644 man/cfde_opportunity_numbers.Rd create mode 100644 tests/testthat/test_cfde_opportunity_numbers.R diff --git a/NAMESPACE b/NAMESPACE index 2fd75bd..4573216 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -1,5 +1,6 @@ # Generated by roxygen2: do not edit by hand +export(cfde_opportunity_numbers) export(epmc_search) export(ga_dataframe) export(ga_meta_simple) @@ -35,6 +36,9 @@ importFrom(rlang,.data) importFrom(rlang,abort) importFrom(rlang,format_error_bullets) importFrom(rlang,inform) +importFrom(rvest,html_attr) +importFrom(rvest,html_nodes) +importFrom(rvest,read_html) importFrom(stringr,regex) importFrom(stringr,str_detect) importFrom(tibble,tibble) diff --git a/R/cfde_opportunity_numbers.R b/R/cfde_opportunity_numbers.R new file mode 100644 index 0000000..f8b893e --- /dev/null +++ b/R/cfde_opportunity_numbers.R @@ -0,0 +1,39 @@ +#' All CFDE Funding Opportunity Numbers +#' +#' This function retrieves all CFDE funding +#' opportunity numbers from the CFDE funding +#' website, \url{https://commonfund.nih.gov/dataecosystem/FundingOpportunities}. +#' +#' Note that this function is specific to the CFDE +#' program and is not a general-purpose web scraping +#' function. +#' +#' @importFrom rvest read_html html_nodes html_attr +#' +#' +#' @param url The URL of the CFDE API endpoint. +#' Default is set to the CFDE funding opportunities page. +#' @return a character vector of funding opportunity numbers. +#' +#' @examples +#' +#' \dontrun{ +#' browseURL("https://commonfund.nih.gov/dataecosystem/FundingOpportunities") +#' } +#' +#' cfde_opportunity_numbers() +#' +#' @export +cfde_opportunity_numbers <- function( + url = "https://commonfund.nih.gov/dataecosystem/FundingOpportunities" +) { + + hrefs <- rvest::read_html(url) |> + rvest::html_nodes("a") |> + rvest::html_attr("href") + hrefs_filtered <- grep('NOT|RFA|OTA', hrefs, value = TRUE) + + pattern <- "(RFA|OTA|NOT)-[A-Z]{2}-\\d{2}-\\d{3}" + + regmatches(hrefs_filtered, regexpr(pattern, hrefs_filtered, perl=TRUE)) +} \ No newline at end of file diff --git a/man/cfde_opportunity_numbers.Rd b/man/cfde_opportunity_numbers.Rd new file mode 100644 index 0000000..9910a0b --- /dev/null +++ b/man/cfde_opportunity_numbers.Rd @@ -0,0 +1,36 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/cfde_opportunity_numbers.R +\name{cfde_opportunity_numbers} +\alias{cfde_opportunity_numbers} +\title{All CFDE Funding Opportunity Numbers} +\usage{ +cfde_opportunity_numbers( + url = "https://commonfund.nih.gov/dataecosystem/FundingOpportunities" +) +} +\arguments{ +\item{url}{The URL of the CFDE API endpoint. +Default is set to the CFDE funding opportunities page.} +} +\value{ +a character vector of funding opportunity numbers. +} +\description{ +This function retrieves all CFDE funding +opportunity numbers from the CFDE funding +website, \url{https://commonfund.nih.gov/dataecosystem/FundingOpportunities}. +} +\details{ +Note that this function is specific to the CFDE +program and is not a general-purpose web scraping +function. +} +\examples{ + +\dontrun{ +browseURL("https://commonfund.nih.gov/dataecosystem/FundingOpportunities") +} + +cfde_opportunity_numbers() + +} diff --git a/tests/testthat/test_cfde_opportunity_numbers.R b/tests/testthat/test_cfde_opportunity_numbers.R new file mode 100644 index 0000000..db250c1 --- /dev/null +++ b/tests/testthat/test_cfde_opportunity_numbers.R @@ -0,0 +1,12 @@ +test_that("cfde_opportunity_numbers returns a character vector", { + # Test with a known CFDE opportunity number + result <- cfde_opportunity_numbers() + expect_type(result, "character") +}) + +test_that("cfde_opportunity_numbers returns expected pattern", { + # Test that the returned opportunity numbers match the expected pattern + result <- cfde_opportunity_numbers() + pattern <- "(RFA|OTA|NOT)-[A-Z]{2}-\\d{2}-\\d{3}" + expect_true(all(grepl(pattern, result))) +}) \ No newline at end of file From 043692a2a30d973607c98afa632db5dd269c7f0a Mon Sep 17 00:00:00 2001 From: Sean Davis Date: Mon, 2 Jun 2025 16:51:10 -0400 Subject: [PATCH 2/5] Update R/cfde_opportunity_numbers.R Avoid returning NAs since those are not valuable.... Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com> --- R/cfde_opportunity_numbers.R | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/R/cfde_opportunity_numbers.R b/R/cfde_opportunity_numbers.R index f8b893e..3fd0a4c 100644 --- a/R/cfde_opportunity_numbers.R +++ b/R/cfde_opportunity_numbers.R @@ -35,5 +35,7 @@ cfde_opportunity_numbers <- function( pattern <- "(RFA|OTA|NOT)-[A-Z]{2}-\\d{2}-\\d{3}" - regmatches(hrefs_filtered, regexpr(pattern, hrefs_filtered, perl=TRUE)) + matches <- regmatches(hrefs_filtered, regexpr(pattern, hrefs_filtered, perl=TRUE)) + matches <- na.omit(matches) + matches[nzchar(matches)] } \ No newline at end of file From e73e25583a4c1720cc183f5431a39c123f9abf46 Mon Sep 17 00:00:00 2001 From: Sean Davis Date: Mon, 2 Jun 2025 16:54:28 -0400 Subject: [PATCH 3/5] Clarify docs that the url here is not an API but a webpage. --- R/cfde_opportunity_numbers.R | 3 +-- man/cfde_opportunity_numbers.Rd | 3 +-- 2 files changed, 2 insertions(+), 4 deletions(-) diff --git a/R/cfde_opportunity_numbers.R b/R/cfde_opportunity_numbers.R index 3fd0a4c..f07140a 100644 --- a/R/cfde_opportunity_numbers.R +++ b/R/cfde_opportunity_numbers.R @@ -11,8 +11,7 @@ #' @importFrom rvest read_html html_nodes html_attr #' #' -#' @param url The URL of the CFDE API endpoint. -#' Default is set to the CFDE funding opportunities page. +#' @param url The URL of the CFDE funding webpage #' @return a character vector of funding opportunity numbers. #' #' @examples diff --git a/man/cfde_opportunity_numbers.Rd b/man/cfde_opportunity_numbers.Rd index 9910a0b..9fc7057 100644 --- a/man/cfde_opportunity_numbers.Rd +++ b/man/cfde_opportunity_numbers.Rd @@ -9,8 +9,7 @@ cfde_opportunity_numbers( ) } \arguments{ -\item{url}{The URL of the CFDE API endpoint. -Default is set to the CFDE funding opportunities page.} +\item{url}{The URL of the CFDE funding webpage} } \value{ a character vector of funding opportunity numbers. From 76ccca9c2eab54c51b156ef0d9e6254ba47c4d36 Mon Sep 17 00:00:00 2001 From: David Mayer Date: Wed, 3 Dec 2025 09:53:33 -0500 Subject: [PATCH 4/5] bump roxygen, add missing dep to description --- DESCRIPTION | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/DESCRIPTION b/DESCRIPTION index 61d7bc0..180f721 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -13,7 +13,7 @@ Description: License: MIT + file LICENSE Encoding: UTF-8 Roxygen: list(markdown = TRUE) -RoxygenNote: 7.3.2 +RoxygenNote: 7.3.3 Depends: R (>= 4.1.0) Imports: @@ -27,7 +27,8 @@ Imports: stringr, tibble, glue, - ghql + ghql, + rvest Suggests: gargle, gitcreds, From f16872b986618e130728b803724955952680dfab Mon Sep 17 00:00:00 2001 From: David Mayer Date: Wed, 3 Dec 2025 09:54:05 -0500 Subject: [PATCH 5/5] update namespace with additional import --- NAMESPACE | 1 + R/cfde_opportunity_numbers.R | 1 + 2 files changed, 2 insertions(+) diff --git a/NAMESPACE b/NAMESPACE index 75d6933..2dbb8f9 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -50,6 +50,7 @@ importFrom(rlang,inform) importFrom(rvest,html_attr) importFrom(rvest,html_nodes) importFrom(rvest,read_html) +importFrom(stats,na.omit) importFrom(stats,setNames) importFrom(stringr,regex) importFrom(stringr,str_detect) diff --git a/R/cfde_opportunity_numbers.R b/R/cfde_opportunity_numbers.R index f07140a..59a2ac8 100644 --- a/R/cfde_opportunity_numbers.R +++ b/R/cfde_opportunity_numbers.R @@ -9,6 +9,7 @@ #' function. #' #' @importFrom rvest read_html html_nodes html_attr +#' @importFrom stats na.omit #' #' #' @param url The URL of the CFDE funding webpage