diff --git a/DESCRIPTION b/DESCRIPTION index 61d7bc0..180f721 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -13,7 +13,7 @@ Description: License: MIT + file LICENSE Encoding: UTF-8 Roxygen: list(markdown = TRUE) -RoxygenNote: 7.3.2 +RoxygenNote: 7.3.3 Depends: R (>= 4.1.0) Imports: @@ -27,7 +27,8 @@ Imports: stringr, tibble, glue, - ghql + ghql, + rvest Suggests: gargle, gitcreds, diff --git a/NAMESPACE b/NAMESPACE index 1b1fb9e..2dbb8f9 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -1,5 +1,6 @@ # Generated by roxygen2: do not edit by hand +export(cfde_opportunity_numbers) export(epmc_search) export(ga_dataframe) export(ga_meta_simple) @@ -46,6 +47,10 @@ importFrom(rlang,.data) importFrom(rlang,abort) importFrom(rlang,format_error_bullets) importFrom(rlang,inform) +importFrom(rvest,html_attr) +importFrom(rvest,html_nodes) +importFrom(rvest,read_html) +importFrom(stats,na.omit) importFrom(stats,setNames) importFrom(stringr,regex) importFrom(stringr,str_detect) diff --git a/R/cfde_opportunity_numbers.R b/R/cfde_opportunity_numbers.R new file mode 100644 index 0000000..59a2ac8 --- /dev/null +++ b/R/cfde_opportunity_numbers.R @@ -0,0 +1,41 @@ +#' All CFDE Funding Opportunity Numbers +#' +#' This function retrieves all CFDE funding +#' opportunity numbers from the CFDE funding +#' website, \url{https://commonfund.nih.gov/dataecosystem/FundingOpportunities}. +#' +#' Note that this function is specific to the CFDE +#' program and is not a general-purpose web scraping +#' function. +#' +#' @importFrom rvest read_html html_nodes html_attr +#' @importFrom stats na.omit +#' +#' +#' @param url The URL of the CFDE funding webpage +#' @return a character vector of funding opportunity numbers. +#' +#' @examples +#' +#' \dontrun{ +#' browseURL("https://commonfund.nih.gov/dataecosystem/FundingOpportunities") +#' } +#' +#' cfde_opportunity_numbers() +#' +#' @export +cfde_opportunity_numbers <- function( + url = "https://commonfund.nih.gov/dataecosystem/FundingOpportunities" +) { + + hrefs <- rvest::read_html(url) |> + rvest::html_nodes("a") |> + rvest::html_attr("href") + hrefs_filtered <- grep('NOT|RFA|OTA', hrefs, value = TRUE) + + pattern <- "(RFA|OTA|NOT)-[A-Z]{2}-\\d{2}-\\d{3}" + + matches <- regmatches(hrefs_filtered, regexpr(pattern, hrefs_filtered, perl=TRUE)) + matches <- na.omit(matches) + matches[nzchar(matches)] +} \ No newline at end of file diff --git a/man/cfde_opportunity_numbers.Rd b/man/cfde_opportunity_numbers.Rd new file mode 100644 index 0000000..9fc7057 --- /dev/null +++ b/man/cfde_opportunity_numbers.Rd @@ -0,0 +1,35 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/cfde_opportunity_numbers.R +\name{cfde_opportunity_numbers} +\alias{cfde_opportunity_numbers} +\title{All CFDE Funding Opportunity Numbers} +\usage{ +cfde_opportunity_numbers( + url = "https://commonfund.nih.gov/dataecosystem/FundingOpportunities" +) +} +\arguments{ +\item{url}{The URL of the CFDE funding webpage} +} +\value{ +a character vector of funding opportunity numbers. +} +\description{ +This function retrieves all CFDE funding +opportunity numbers from the CFDE funding +website, \url{https://commonfund.nih.gov/dataecosystem/FundingOpportunities}. +} +\details{ +Note that this function is specific to the CFDE +program and is not a general-purpose web scraping +function. +} +\examples{ + +\dontrun{ +browseURL("https://commonfund.nih.gov/dataecosystem/FundingOpportunities") +} + +cfde_opportunity_numbers() + +} diff --git a/tests/testthat/test_cfde_opportunity_numbers.R b/tests/testthat/test_cfde_opportunity_numbers.R new file mode 100644 index 0000000..db250c1 --- /dev/null +++ b/tests/testthat/test_cfde_opportunity_numbers.R @@ -0,0 +1,12 @@ +test_that("cfde_opportunity_numbers returns a character vector", { + # Test with a known CFDE opportunity number + result <- cfde_opportunity_numbers() + expect_type(result, "character") +}) + +test_that("cfde_opportunity_numbers returns expected pattern", { + # Test that the returned opportunity numbers match the expected pattern + result <- cfde_opportunity_numbers() + pattern <- "(RFA|OTA|NOT)-[A-Z]{2}-\\d{2}-\\d{3}" + expect_true(all(grepl(pattern, result))) +}) \ No newline at end of file