Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
26 commits
Select commit Hold shift + click to select a range
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions .Rbuildignore
Original file line number Diff line number Diff line change
Expand Up @@ -11,4 +11,5 @@ terraform/
DEVELOPER.md
^README\.Rmd$
^CODE_OF_CONDUCT\.md$
.xlsx$

2 changes: 2 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -5,3 +5,5 @@ docs
terraform/
/.quarto/
**/*.quarto_ipynb
inst/secret/cfde-access-keyfile.json
*.xlsx
13 changes: 9 additions & 4 deletions DESCRIPTION
Original file line number Diff line number Diff line change
@@ -1,9 +1,11 @@
Package: programets
Title: Collect and analyze academic impact metrics from various sources
Version: 0.3.0
Version: 0.3.1
Authors@R:
c(person("Sean", "Davis", , "seandavi@gmail.com", role = c("aut", "cre"), comment=c(ORCID = "0000-0002-8991-6458")),
person("David", "Mayer", , "david.mayer@cuanschutz.edu", role = c("aut")))
c(
person("Sean", "Davis", , "seandavi@gmail.com", role = c("aut", "cre"), comment=c(ORCID = "0000-0002-8991-6458")),
person("David", "Mayer", , "david.mayer@cuanschutz.edu", role = c("aut"), comment=c(ORCID = "0000-0002-6056-9771"))
)
Description:
Collect and analyze academic impact metrics from various sources.
The package provides functions to collect and analyze data from NIH reporter,
Expand All @@ -28,7 +30,10 @@ Imports:
tibble,
glue,
ghql,
rvest
openxlsx,
tidyr,
rvest,
readr
Suggests:
gargle,
gitcreds,
Expand Down
6 changes: 3 additions & 3 deletions DEVELOPER.md
Original file line number Diff line number Diff line change
Expand Up @@ -38,9 +38,9 @@ Step 3: Encrypt the google service account json file

```{r}
gargle::secret_encrypt_json(
path = "path/to/ga4-acess-keyfile.json",
path = "path/to/ga4-access-keyfile.json",
key = "GARGLE_ENCRYPTION_KEY",
output = "inst/secret/ga4-acess-keyfile.json"
output = "inst/secret/ga4-access-keyfile.json"
)
```
This will create an encrypted version of the json file in the `inst/secret` directory.
Expand All @@ -51,7 +51,7 @@ Step 4: Use the encrypted file in your code
library(gargle)
googleAnalyticsR::ga_auth(
json_file = gargle::secret_decrypt_json(
path = system.file("secret/ga4-acess-keyfile.json", package = "programets"),
path = system.file("secret/ga4-access-keyfile.json", package = "programets"),
key = "GARGLE_ENCRYPTION_KEY"
)
)
Expand Down
13 changes: 13 additions & 0 deletions NAMESPACE
Original file line number Diff line number Diff line change
Expand Up @@ -2,10 +2,13 @@

export(cfde_opportunity_numbers)
export(epmc_search)
export(export_tabular)
export(ga_dataframe)
export(ga_meta_simple)
export(ga_query_explorer)
export(get_core_project_info)
export(get_ga_basic)
export(get_ga_meta_by_id)
export(get_github_by_topic)
export(get_github_by_topic_graphql)
export(icite)
Expand All @@ -23,6 +26,8 @@ importFrom(dplyr,tibble)
importFrom(ghql,GraphqlClient)
importFrom(glue,glue)
importFrom(glue,glue_collapse)
importFrom(googleAnalyticsR,ga_account_list)
importFrom(googleAnalyticsR,ga_auth)
importFrom(httr2,req_auth_bearer_token)
importFrom(httr2,req_body_json)
importFrom(httr2,req_error)
Expand All @@ -36,12 +41,18 @@ importFrom(httr2,request)
importFrom(httr2,resp_body_json)
importFrom(httr2,resp_status)
importFrom(jsonlite,fromJSON)
importFrom(openxlsx,addWorksheet)
importFrom(openxlsx,createWorkbook)
importFrom(openxlsx,saveWorkbook)
importFrom(openxlsx,writeData)
importFrom(purrr,discard)
importFrom(purrr,map)
importFrom(purrr,map2_dbl)
importFrom(purrr,map_chr)
importFrom(purrr,map_dbl)
importFrom(purrr,map_dfr)
importFrom(purrr,pmap)
importFrom(readr,write_csv)
importFrom(rlang,"%||%")
importFrom(rlang,.data)
importFrom(rlang,abort)
Expand All @@ -54,4 +65,6 @@ importFrom(stats,na.omit)
importFrom(stats,setNames)
importFrom(stringr,regex)
importFrom(stringr,str_detect)
importFrom(stringr,str_remove)
importFrom(tibble,tibble)
importFrom(tidyr,separate)
61 changes: 61 additions & 0 deletions R/export.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,61 @@
#' Export to Tabular
#'
#' @param core_project_numbers A character vector of NIH Core Project Numbers
#' @param token The token required for authentication with the GitHub API
#' @param service_account_json A character string containing the path to a JSON file containing a Google service account
#' @param dir A character string containing the path to directory where the Excel file will be written
#' @param csv A logical indicating whether to write a CSV file
#'
#' @importFrom openxlsx createWorkbook addWorksheet writeData saveWorkbook
#' @importFrom readr write_csv
#' @importFrom rlang .data
#' @export
#'
#' @examples
#' \dontrun{
#' test_projects <-c("OT2OD030545")
#' }
#'
export_tabular <- function(core_project_numbers, token = gitcreds::gitcreds_get()$password, service_account_json = 'cfde-access-keyfile.json', dir, csv = FALSE) {

## Create Excel Workbook
wb <- createWorkbook()

## Add NIH Project Info
addWorksheet(wb, "project_info")
proj_info <- get_core_project_info(core_project_numbers)
writeData(wb = wb, sheet = "project_info", x = proj_info, na.string = "")
if (csv) {
write_csv(proj_info, file.path(dir, paste0("programets_proj_info_", Sys.Date(), ".csv", sep = "")))
}

## Add Assosciated Publications
addWorksheet(wb, "pub_info")
pmids <- proj_info |>
filter(.data$found_publication) |>
pull('pmid')
pub_info <- icite(pmids)
writeData(wb = wb, sheet = "pub_info", x = pub_info, na.string = "")
if (csv) {
write_csv(pub_info, file.path(dir, paste0("programets_pub_info_", Sys.Date(), ".csv", sep = "")))
}

## Add GitHub
addWorksheet(wb, "github_info")
github_info <- get_github_by_topic_graphql(core_project_numbers, token = token)
writeData(wb = wb, sheet = "github_info", x = github_info, na.string = "")
if (csv) {
write_csv(github_info, file.path(dir, paste0("programets_github_info_", Sys.Date(), ".csv", sep = "")))
}

## Add Google Analytics
addWorksheet(wb, "ga_info")
ga_info <- get_ga_basic(core_project_numbers = core_project_numbers, service_account_json = service_account_json)
writeData(wb = wb, sheet = "ga_info", x = ga_info, na.string = "")
if (csv) {
write_csv(ga_info, file.path(dir, paste0("programets_ga_info_", Sys.Date(), ".csv", sep = "")))
}

## Save Workbook
saveWorkbook(wb, file.path(dir, paste0("programets_", Sys.Date(), ".xlsx", sep = "")))
}
33 changes: 33 additions & 0 deletions R/ga_meta_simple.R
Original file line number Diff line number Diff line change
Expand Up @@ -26,4 +26,37 @@
#' @export
ga_meta_simple <- function() {
tibble::as_tibble(googleAnalyticsR::ga_meta(version = "data"))
}

#' Google Analytics metadata dataframe by property ID
#'
#' This function retrieves Google Analytics metadata by property ID
#' and returns it as a dataframe. The metadata includes
#' information about metrics, dimensions, and other
#' attributes available in Google Analytics.
#'
#' This function is a wrapper around the
#' `googleAnalyticsR::ga_meta()` function. It retrieves
#' metadata for the Google Analytics API version 4.
#'
#' @param property_id The property ID for which to retrieve
#' metadata.
#'
#' @note This function requires first authenticating to
#' Google Analytics using the `ga_auth()` function.
#'
#' @family Google Analytics
#'
#' @examples
#' \dontrun{
#' res = get_ga_meta_by_id("123456789")
#' head(res)
#' dplyr::glimpse(res)
#' }
#'
#' @return A tibble containing Google Analytics metadata.
#'
#' @export
get_ga_meta_by_id <- function(property_id) {
tibble::as_tibble(googleAnalyticsR::ga_meta(version = "data", propertyId = property_id))
}
74 changes: 74 additions & 0 deletions R/get_ga_basic.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,74 @@
#' Get Basic Google Analytics Info
#'
#' This function takes a character vector of NIH Core Project Numbers and
#' returns a data frame containing the any Google Analytics properties associated
#' with the Core Project Numbers.
#'
#' @param core_project_numbers A character vector of NIH Core Project Numbers
#' @param service_account_json A character string containing the path to a JSON file containing the
#' Google service account credentials. If no file is provided, interactive authentication is used.
#' Defaults to "cfde-access-keyfile.json"
#'
#' @importFrom googleAnalyticsR ga_account_list ga_auth
#' @importFrom purrr map map_chr
#' @importFrom stringr str_remove
#' @importFrom tidyr separate
#' @importFrom rlang .data
#'
#' @return A data frame containing the associated Google Analytics data
#' @export
get_ga_basic <- function(core_project_numbers, service_account_json = 'cfde-access-keyfile.json') {
## This function requires authentication, check for existing creds
## Package Credentials
if(file.exists(system.file("secret", service_account_json, package = "programets")) &&
!is.null(Sys.getenv("CFDE_ENCRYPTION_KEY"))){
programets_service_account <- gargle::secret_decrypt_json(
path = system.file(
"secret",
service_account_json,
package = "programets"
),
key = "CFDE_ENCRYPTION_KEY"
)
googleAnalyticsR::ga_auth(
json_file = programets_service_account
)
## User SA Credentials
} else if (file.exists(service_account_json)) {
ga_auth(json_file = service_account_json)
## Interactive Auth
} else {
ga_auth()
}

## Get All Analytics Properties
core_project_regex <- paste0(unique(tolower(core_project_numbers)), collapse = "|")
account_list <- ga_account_list("ga4") |>
mutate(
property_meta = suppressMessages(map(.data$propertyId, get_ga_meta_by_id)),
core_project_num = map_chr(
.data$property_meta,
~{
res <- .x |>
filter(str_detect(apiName, regex(core_project_regex, ignore_case = TRUE))) |>
tidyr::separate(apiName, into = c("api", "value"), sep = ":", remove = FALSE) |>
pull(value)
if (length(res) == 0) {
NA_character_
} else {
res |>
str_remove("^cfde_") |>
unique() |>
paste(collapse = ",")
}
}
)
) |>
## Filter to those with the requested Core Project Numbers
filter(!is.na(.data$core_project_num)) |>
select(-'property_meta')
if(nrow(account_list) == 0) {
rlang::inform(rlang::format_error_bullets(c(i = "No Google Analytics properties found for the requested Core Project Numbers")))
}
return(account_list)
}
4 changes: 2 additions & 2 deletions R/nih_reporter.R
Original file line number Diff line number Diff line change
Expand Up @@ -215,8 +215,8 @@ get_core_project_info <- function(core_project_numbers) {
proj_results_tbl |>
full_join(all_results, by = c('core_project_num' = 'core_project_number', 'appl_id' = 'applid')) |>
filter(.data$core_project_num != "") |>
relocate(.data$core_project_num, .before = .data$appl_id) |>
relocate(.data$found_publication, .after = .data$core_project_num)
relocate('core_project_num', .before = 'appl_id') |>
relocate('found_publication', .after = 'core_project_num')

return(all_results_combined)
}
File renamed without changes.
34 changes: 34 additions & 0 deletions man/export_tabular.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

3 changes: 2 additions & 1 deletion man/ga_meta_simple.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

3 changes: 2 additions & 1 deletion man/ga_query_explorer.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

26 changes: 26 additions & 0 deletions man/get_ga_basic.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

Loading