diff --git a/.Rbuildignore b/.Rbuildignore index 8bee5ea..1d32942 100644 --- a/.Rbuildignore +++ b/.Rbuildignore @@ -11,4 +11,5 @@ terraform/ DEVELOPER.md ^README\.Rmd$ ^CODE_OF_CONDUCT\.md$ +.xlsx$ diff --git a/.gitignore b/.gitignore index 6f4bb29..9a4aaa0 100644 --- a/.gitignore +++ b/.gitignore @@ -5,3 +5,5 @@ docs terraform/ /.quarto/ **/*.quarto_ipynb +inst/secret/cfde-access-keyfile.json +*.xlsx diff --git a/DESCRIPTION b/DESCRIPTION index 180f721..b436e46 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,9 +1,11 @@ Package: programets Title: Collect and analyze academic impact metrics from various sources -Version: 0.3.0 +Version: 0.3.1 Authors@R: - c(person("Sean", "Davis", , "seandavi@gmail.com", role = c("aut", "cre"), comment=c(ORCID = "0000-0002-8991-6458")), - person("David", "Mayer", , "david.mayer@cuanschutz.edu", role = c("aut"))) + c( + person("Sean", "Davis", , "seandavi@gmail.com", role = c("aut", "cre"), comment=c(ORCID = "0000-0002-8991-6458")), + person("David", "Mayer", , "david.mayer@cuanschutz.edu", role = c("aut"), comment=c(ORCID = "0000-0002-6056-9771")) + ) Description: Collect and analyze academic impact metrics from various sources. The package provides functions to collect and analyze data from NIH reporter, @@ -28,7 +30,10 @@ Imports: tibble, glue, ghql, - rvest + openxlsx, + tidyr, + rvest, + readr Suggests: gargle, gitcreds, diff --git a/DEVELOPER.md b/DEVELOPER.md index e57b807..d26f7d5 100644 --- a/DEVELOPER.md +++ b/DEVELOPER.md @@ -38,9 +38,9 @@ Step 3: Encrypt the google service account json file ```{r} gargle::secret_encrypt_json( - path = "path/to/ga4-acess-keyfile.json", + path = "path/to/ga4-access-keyfile.json", key = "GARGLE_ENCRYPTION_KEY", - output = "inst/secret/ga4-acess-keyfile.json" + output = "inst/secret/ga4-access-keyfile.json" ) ``` This will create an encrypted version of the json file in the `inst/secret` directory. @@ -51,7 +51,7 @@ Step 4: Use the encrypted file in your code library(gargle) googleAnalyticsR::ga_auth( json_file = gargle::secret_decrypt_json( - path = system.file("secret/ga4-acess-keyfile.json", package = "programets"), + path = system.file("secret/ga4-access-keyfile.json", package = "programets"), key = "GARGLE_ENCRYPTION_KEY" ) ) diff --git a/NAMESPACE b/NAMESPACE index 2dbb8f9..e44315d 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -2,10 +2,13 @@ export(cfde_opportunity_numbers) export(epmc_search) +export(export_tabular) export(ga_dataframe) export(ga_meta_simple) export(ga_query_explorer) export(get_core_project_info) +export(get_ga_basic) +export(get_ga_meta_by_id) export(get_github_by_topic) export(get_github_by_topic_graphql) export(icite) @@ -23,6 +26,8 @@ importFrom(dplyr,tibble) importFrom(ghql,GraphqlClient) importFrom(glue,glue) importFrom(glue,glue_collapse) +importFrom(googleAnalyticsR,ga_account_list) +importFrom(googleAnalyticsR,ga_auth) importFrom(httr2,req_auth_bearer_token) importFrom(httr2,req_body_json) importFrom(httr2,req_error) @@ -36,12 +41,18 @@ importFrom(httr2,request) importFrom(httr2,resp_body_json) importFrom(httr2,resp_status) importFrom(jsonlite,fromJSON) +importFrom(openxlsx,addWorksheet) +importFrom(openxlsx,createWorkbook) +importFrom(openxlsx,saveWorkbook) +importFrom(openxlsx,writeData) importFrom(purrr,discard) +importFrom(purrr,map) importFrom(purrr,map2_dbl) importFrom(purrr,map_chr) importFrom(purrr,map_dbl) importFrom(purrr,map_dfr) importFrom(purrr,pmap) +importFrom(readr,write_csv) importFrom(rlang,"%||%") importFrom(rlang,.data) importFrom(rlang,abort) @@ -54,4 +65,6 @@ importFrom(stats,na.omit) importFrom(stats,setNames) importFrom(stringr,regex) importFrom(stringr,str_detect) +importFrom(stringr,str_remove) importFrom(tibble,tibble) +importFrom(tidyr,separate) diff --git a/R/export.R b/R/export.R new file mode 100644 index 0000000..84fbb11 --- /dev/null +++ b/R/export.R @@ -0,0 +1,61 @@ +#' Export to Tabular +#' +#' @param core_project_numbers A character vector of NIH Core Project Numbers +#' @param token The token required for authentication with the GitHub API +#' @param service_account_json A character string containing the path to a JSON file containing a Google service account +#' @param dir A character string containing the path to directory where the Excel file will be written +#' @param csv A logical indicating whether to write a CSV file +#' +#' @importFrom openxlsx createWorkbook addWorksheet writeData saveWorkbook +#' @importFrom readr write_csv +#' @importFrom rlang .data +#' @export +#' +#' @examples +#' \dontrun{ +#' test_projects <-c("OT2OD030545") +#' } +#' +export_tabular <- function(core_project_numbers, token = gitcreds::gitcreds_get()$password, service_account_json = 'cfde-access-keyfile.json', dir, csv = FALSE) { + + ## Create Excel Workbook + wb <- createWorkbook() + + ## Add NIH Project Info + addWorksheet(wb, "project_info") + proj_info <- get_core_project_info(core_project_numbers) + writeData(wb = wb, sheet = "project_info", x = proj_info, na.string = "") + if (csv) { + write_csv(proj_info, file.path(dir, paste0("programets_proj_info_", Sys.Date(), ".csv", sep = ""))) + } + + ## Add Assosciated Publications + addWorksheet(wb, "pub_info") + pmids <- proj_info |> + filter(.data$found_publication) |> + pull('pmid') + pub_info <- icite(pmids) + writeData(wb = wb, sheet = "pub_info", x = pub_info, na.string = "") + if (csv) { + write_csv(pub_info, file.path(dir, paste0("programets_pub_info_", Sys.Date(), ".csv", sep = ""))) + } + + ## Add GitHub + addWorksheet(wb, "github_info") + github_info <- get_github_by_topic_graphql(core_project_numbers, token = token) + writeData(wb = wb, sheet = "github_info", x = github_info, na.string = "") + if (csv) { + write_csv(github_info, file.path(dir, paste0("programets_github_info_", Sys.Date(), ".csv", sep = ""))) + } + + ## Add Google Analytics + addWorksheet(wb, "ga_info") + ga_info <- get_ga_basic(core_project_numbers = core_project_numbers, service_account_json = service_account_json) + writeData(wb = wb, sheet = "ga_info", x = ga_info, na.string = "") + if (csv) { + write_csv(ga_info, file.path(dir, paste0("programets_ga_info_", Sys.Date(), ".csv", sep = ""))) + } + + ## Save Workbook + saveWorkbook(wb, file.path(dir, paste0("programets_", Sys.Date(), ".xlsx", sep = ""))) +} \ No newline at end of file diff --git a/R/ga_meta_simple.R b/R/ga_meta_simple.R index e3882c7..3ac83b4 100644 --- a/R/ga_meta_simple.R +++ b/R/ga_meta_simple.R @@ -26,4 +26,37 @@ #' @export ga_meta_simple <- function() { tibble::as_tibble(googleAnalyticsR::ga_meta(version = "data")) +} + +#' Google Analytics metadata dataframe by property ID +#' +#' This function retrieves Google Analytics metadata by property ID +#' and returns it as a dataframe. The metadata includes +#' information about metrics, dimensions, and other +#' attributes available in Google Analytics. +#' +#' This function is a wrapper around the +#' `googleAnalyticsR::ga_meta()` function. It retrieves +#' metadata for the Google Analytics API version 4. +#' +#' @param property_id The property ID for which to retrieve +#' metadata. +#' +#' @note This function requires first authenticating to +#' Google Analytics using the `ga_auth()` function. +#' +#' @family Google Analytics +#' +#' @examples +#' \dontrun{ +#' res = get_ga_meta_by_id("123456789") +#' head(res) +#' dplyr::glimpse(res) +#' } +#' +#' @return A tibble containing Google Analytics metadata. +#' +#' @export +get_ga_meta_by_id <- function(property_id) { + tibble::as_tibble(googleAnalyticsR::ga_meta(version = "data", propertyId = property_id)) } \ No newline at end of file diff --git a/R/get_ga_basic.R b/R/get_ga_basic.R new file mode 100644 index 0000000..219d14a --- /dev/null +++ b/R/get_ga_basic.R @@ -0,0 +1,74 @@ +#' Get Basic Google Analytics Info +#' +#' This function takes a character vector of NIH Core Project Numbers and +#' returns a data frame containing the any Google Analytics properties associated +#' with the Core Project Numbers. +#' +#' @param core_project_numbers A character vector of NIH Core Project Numbers +#' @param service_account_json A character string containing the path to a JSON file containing the +#' Google service account credentials. If no file is provided, interactive authentication is used. +#' Defaults to "cfde-access-keyfile.json" +#' +#' @importFrom googleAnalyticsR ga_account_list ga_auth +#' @importFrom purrr map map_chr +#' @importFrom stringr str_remove +#' @importFrom tidyr separate +#' @importFrom rlang .data +#' +#' @return A data frame containing the associated Google Analytics data +#' @export +get_ga_basic <- function(core_project_numbers, service_account_json = 'cfde-access-keyfile.json') { + ## This function requires authentication, check for existing creds + ## Package Credentials + if(file.exists(system.file("secret", service_account_json, package = "programets")) && + !is.null(Sys.getenv("CFDE_ENCRYPTION_KEY"))){ + programets_service_account <- gargle::secret_decrypt_json( + path = system.file( + "secret", + service_account_json, + package = "programets" + ), + key = "CFDE_ENCRYPTION_KEY" + ) + googleAnalyticsR::ga_auth( + json_file = programets_service_account + ) + ## User SA Credentials + } else if (file.exists(service_account_json)) { + ga_auth(json_file = service_account_json) + ## Interactive Auth + } else { + ga_auth() + } + + ## Get All Analytics Properties + core_project_regex <- paste0(unique(tolower(core_project_numbers)), collapse = "|") + account_list <- ga_account_list("ga4") |> + mutate( + property_meta = suppressMessages(map(.data$propertyId, get_ga_meta_by_id)), + core_project_num = map_chr( + .data$property_meta, + ~{ + res <- .x |> + filter(str_detect(apiName, regex(core_project_regex, ignore_case = TRUE))) |> + tidyr::separate(apiName, into = c("api", "value"), sep = ":", remove = FALSE) |> + pull(value) + if (length(res) == 0) { + NA_character_ + } else { + res |> + str_remove("^cfde_") |> + unique() |> + paste(collapse = ",") + } + } + ) + ) |> + ## Filter to those with the requested Core Project Numbers + filter(!is.na(.data$core_project_num)) |> + select(-'property_meta') + if(nrow(account_list) == 0) { + rlang::inform(rlang::format_error_bullets(c(i = "No Google Analytics properties found for the requested Core Project Numbers"))) + } + return(account_list) + } diff --git a/R/nih_reporter.R b/R/nih_reporter.R index 48b0312..5616d8e 100644 --- a/R/nih_reporter.R +++ b/R/nih_reporter.R @@ -215,8 +215,8 @@ get_core_project_info <- function(core_project_numbers) { proj_results_tbl |> full_join(all_results, by = c('core_project_num' = 'core_project_number', 'appl_id' = 'applid')) |> filter(.data$core_project_num != "") |> - relocate(.data$core_project_num, .before = .data$appl_id) |> - relocate(.data$found_publication, .after = .data$core_project_num) + relocate('core_project_num', .before = 'appl_id') |> + relocate('found_publication', .after = 'core_project_num') return(all_results_combined) } diff --git a/inst/secret/ga4-acess-keyfile.json b/inst/secret/ga4-access-keyfile.json similarity index 100% rename from inst/secret/ga4-acess-keyfile.json rename to inst/secret/ga4-access-keyfile.json diff --git a/man/export_tabular.Rd b/man/export_tabular.Rd new file mode 100644 index 0000000..095f012 --- /dev/null +++ b/man/export_tabular.Rd @@ -0,0 +1,34 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/export.R +\name{export_tabular} +\alias{export_tabular} +\title{Export to Tabular} +\usage{ +export_tabular( + core_project_numbers, + token = gitcreds::gitcreds_get()$password, + service_account_json = "cfde-access-keyfile.json", + dir, + csv = FALSE +) +} +\arguments{ +\item{core_project_numbers}{A character vector of NIH Core Project Numbers} + +\item{token}{The token required for authentication with the GitHub API} + +\item{service_account_json}{A character string containing the path to a JSON file containing a Google service account} + +\item{dir}{A character string containing the path to directory where the Excel file will be written} + +\item{csv}{A logical indicating whether to write a CSV file} +} +\description{ +Export to Tabular +} +\examples{ +\dontrun{ +test_projects <-c("OT2OD030545") +} + +} diff --git a/man/ga_meta_simple.Rd b/man/ga_meta_simple.Rd index 6d57073..72a94fc 100644 --- a/man/ga_meta_simple.Rd +++ b/man/ga_meta_simple.Rd @@ -34,6 +34,7 @@ dplyr::glimpse(res) } \seealso{ Other Google Analytics: -\code{\link{ga_query_explorer}()} +\code{\link{ga_query_explorer}()}, +\code{\link{get_ga_meta_by_id}()} } \concept{Google Analytics} diff --git a/man/ga_query_explorer.Rd b/man/ga_query_explorer.Rd index d74816e..fc49f77 100644 --- a/man/ga_query_explorer.Rd +++ b/man/ga_query_explorer.Rd @@ -28,6 +28,7 @@ ga_query_explorer() } \seealso{ Other Google Analytics: -\code{\link{ga_meta_simple}()} +\code{\link{ga_meta_simple}()}, +\code{\link{get_ga_meta_by_id}()} } \concept{Google Analytics} diff --git a/man/get_ga_basic.Rd b/man/get_ga_basic.Rd new file mode 100644 index 0000000..13fb9e5 --- /dev/null +++ b/man/get_ga_basic.Rd @@ -0,0 +1,26 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/get_ga_basic.R +\name{get_ga_basic} +\alias{get_ga_basic} +\title{Get Basic Google Analytics Info} +\usage{ +get_ga_basic( + core_project_numbers, + service_account_json = "cfde-access-keyfile.json" +) +} +\arguments{ +\item{core_project_numbers}{A character vector of NIH Core Project Numbers} + +\item{service_account_json}{A character string containing the path to a JSON file containing the +Google service account credentials. If no file is provided, interactive authentication is used. +Defaults to "cfde-access-keyfile.json"} +} +\value{ +A data frame containing the associated Google Analytics data +} +\description{ +This function takes a character vector of NIH Core Project Numbers and +returns a data frame containing the any Google Analytics properties associated +with the Core Project Numbers. +} diff --git a/man/get_ga_meta_by_id.Rd b/man/get_ga_meta_by_id.Rd new file mode 100644 index 0000000..9885205 --- /dev/null +++ b/man/get_ga_meta_by_id.Rd @@ -0,0 +1,44 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/ga_meta_simple.R +\name{get_ga_meta_by_id} +\alias{get_ga_meta_by_id} +\title{Google Analytics metadata dataframe by property ID} +\usage{ +get_ga_meta_by_id(property_id) +} +\arguments{ +\item{property_id}{The property ID for which to retrieve +metadata.} +} +\value{ +A tibble containing Google Analytics metadata. +} +\description{ +This function retrieves Google Analytics metadata by property ID +and returns it as a dataframe. The metadata includes +information about metrics, dimensions, and other +attributes available in Google Analytics. +} +\details{ +This function is a wrapper around the +\code{googleAnalyticsR::ga_meta()} function. It retrieves +metadata for the Google Analytics API version 4. +} +\note{ +This function requires first authenticating to +Google Analytics using the \code{ga_auth()} function. +} +\examples{ +\dontrun{ +res = get_ga_meta_by_id("123456789") +head(res) +dplyr::glimpse(res) +} + +} +\seealso{ +Other Google Analytics: +\code{\link{ga_meta_simple}()}, +\code{\link{ga_query_explorer}()} +} +\concept{Google Analytics} diff --git a/tests/testthat/test-export_tabular.R b/tests/testthat/test-export_tabular.R new file mode 100644 index 0000000..e9a8c8b --- /dev/null +++ b/tests/testthat/test-export_tabular.R @@ -0,0 +1,20 @@ +library(gitcreds) + +test_that("export_tabular writes an Excel file", { + temp_file <- file.path(tempdir(), paste0("programets_", Sys.Date(), ".xlsx", sep = "")) + on.exit(unlink(temp_file)) + core_project_numbers <- c("OT2OD030545") + token <- gitcreds_get()$password + export_tabular(core_project_numbers = core_project_numbers, token = token, dir = tempdir()) + expect_true(file.exists(temp_file)) +}) + +test_that("export_tabular throws an error if the file already exists", { + temp_file <- file.path(tempdir(), paste0("programets_", Sys.Date(), ".xlsx", sep = "")) + on.exit(unlink(temp_file)) + core_project_numbers <- c("OT2OD030545") + token <- gitcreds_get()$password + file.create(temp_file) + expect_error(export_tabular(core_project_numbers = core_project_numbers, token = token, dir = tempdir()), "File already exists") +}) + diff --git a/tests/testthat/test-ga_dataframe.R b/tests/testthat/test-ga_dataframe.R index a294431..8f199ac 100644 --- a/tests/testthat/test-ga_dataframe.R +++ b/tests/testthat/test-ga_dataframe.R @@ -6,7 +6,7 @@ test_that("ga_dataframe returns expected tibble", { json_file <- gargle::secret_decrypt_json( path = system.file( "secret", - "ga4-acess-keyfile.json", + "ga4-access-keyfile.json", package = "programets" ), key = "GARGLE_ENCRYPTION_KEY" diff --git a/tests/testthat/test-get_ga_basic.R b/tests/testthat/test-get_ga_basic.R new file mode 100644 index 0000000..1f1ebe8 --- /dev/null +++ b/tests/testthat/test-get_ga_basic.R @@ -0,0 +1,11 @@ +test_that("get_ga_basic returns expected values", { + core_project_numbers <- c("u54od036472", "99999999") + result <- get_ga_basic(core_project_numbers = core_project_numbers) + expect_s3_class(result, "tbl_df") + expect_true("account_name" %in% names(result)) + expect_true("accountId" %in% names(result)) + expect_true("propertyId" %in% names(result)) + expect_true("property_name" %in% names(result)) + expect_true("core_project_num" %in% names(result)) + } +) diff --git a/vignettes/googleanalytics.qmd b/vignettes/googleanalytics.qmd index 3b14c9f..a483f1f 100644 --- a/vignettes/googleanalytics.qmd +++ b/vignettes/googleanalytics.qmd @@ -39,7 +39,7 @@ ga_auth(email = "seandavi@gmail.com") json_file <- gargle::secret_decrypt_json( path = system.file( "secret", - "ga4-acess-keyfile.json", + "ga4-access-keyfile.json", package = "programets" ), key = "GARGLE_ENCRYPTION_KEY"