From 150c713ed812cf13f986fa3af464067b2486d63f Mon Sep 17 00:00:00 2001 From: njtierney Date: Wed, 5 Apr 2023 18:02:16 +1000 Subject: [PATCH 1/2] first pass at working on #151 --- R/fit_single_contact_model.R | 1 - vignettes/other-data-sources.Rmd | 169 ++++++++++++++++++++++++++++++- 2 files changed, 165 insertions(+), 5 deletions(-) diff --git a/R/fit_single_contact_model.R b/R/fit_single_contact_model.R index b9c436df..19f2e6ed 100644 --- a/R/fit_single_contact_model.R +++ b/R/fit_single_contact_model.R @@ -130,7 +130,6 @@ fit_single_contact_model <- function(contact_data, # programatically add the offset term to the formula, so the model defines # information about the setting, without us having to pass it through to the # prediction data - if (symmetrical) { formula_no_offset <- contacts ~ # Prem method did a post-hoc smoothing diff --git a/vignettes/other-data-sources.Rmd b/vignettes/other-data-sources.Rmd index 51b92503..9a82282a 100644 --- a/vignettes/other-data-sources.Rmd +++ b/vignettes/other-data-sources.Rmd @@ -20,9 +20,9 @@ library(conmat) The primary goal of the conmat package is to be able to get a contact matrix for a given age population. It was initially written for work done in Australia, and so the initial focus was on cleaning and extracting data from the Australian Bureau of Statistics. -This vignette focusses on using other data sources with conmat. +This vignette focusses on using other data sources with conmat. We can use some other functions from `socialmixr` to extract similar estimates for different populations in different countries. -We can use some other functions from `socialmixr` to extract similar estimates for different populations in different countries. +## Predicting contact matrices to other countries We could extract some data from Italy using the [`socialmixr`](https://epiforecasts.io/socialmixr/) R package @@ -54,7 +54,7 @@ italy_contact <- extrapolate_polymod( italy_contact ``` -# Creating a next generation matrix (NGM) +### Creating a next generation matrix (NGM) To create a next generation matrix, you can use either a conmat population object, or setting prediction matrices, like so: @@ -82,7 +82,7 @@ italy_2005_ngm_polymod identical(italy_2005_ngm, italy_2005_ngm_polymod) ``` -# Applying vaccination to an NGM +### Applying vaccination to an NGM We can then take a next generation matrix and apply vaccination data, such as the provided `vaccination_effect_example_data` dataset. @@ -99,3 +99,164 @@ ngm_italy_vacc <- apply_vaccination( ngm_italy_vacc ``` + + +## Predicting using data that isn't POLYMOD + +If we want using survey data that isn't POLYMOD, here is how to do that. + +```{r} +new_polymod <- get_polymod_contact_data() +``` + +We can get another contact survey from the `socialmixr` package. + +We can see what is available here: +```{r} +available_surverys <- socialmixr::list_surveys() +available_surverys +``` + +Let's get the data for Switzerland + +```{r} +swiss_data <- available_surverys %>% + filter(agrepl("switzerland", title)) + +swiss_url <- swiss_data$url[1] + +swiss_contact_data <- socialmixr::get_survey(swiss_url) + +swiss_contact_data +``` + +```{r} +survey_data <- swiss_contact_data +contact_age_imputation <- "sample" +# setting = c("all", "home", "work", "school", "other") +setting <- "all" +ages <- 0:100 +contact_data <- survey_data$participants %>% + # dplyr::filter( + # country %in% countries + # ) %>% + dplyr::left_join( + survey_data$contacts, + by = "part_id", + multiple = "all" + ) + +# impute contact ages according to the required method +contact_data_imputed <- contact_data %>% + dplyr::mutate( + cnt_age_sampled = floor( + # suppress warnings about NAs in runif + suppressWarnings( + stats::runif( + n = dplyr::n(), + min = cnt_age_est_min, + max = cnt_age_est_max + 1 + ) + ) + ), + cnt_age_mean = floor( + cnt_age_est_min + (cnt_age_est_max + 1 - cnt_age_est_min) / 2 + ), + cnt_age = dplyr::case_when( + !is.na(cnt_age_exact) ~ as.numeric(cnt_age_exact), + contact_age_imputation == "sample" ~ cnt_age_sampled, + contact_age_imputation == "mean" ~ cnt_age_mean, + TRUE ~ NA_real_ + ), + ) + +# filter out any participants with missing contact ages or settings (can't +# just remove the contacts as that will bias the count) +contact_data_filtered <- contact_data_imputed %>% + dplyr::group_by(part_id) %>% + dplyr::mutate( + missing_any_contact_age = any(is.na(cnt_age)), + missing_any_contact_setting = any( + is.na(cnt_home) | + is.na(cnt_work) | + is.na(cnt_school) | + is.na(cnt_transport) | + is.na(cnt_leisure) | + is.na(cnt_otherplace) + ) + ) %>% + dplyr::ungroup() %>% + dplyr::filter( + !is.na(part_age), + !missing_any_contact_age, + !missing_any_contact_setting + ) + +# get contacts by setting (keeping 0s, so we can record 0 contacts for some individuals) +contact_data_setting <- contact_data_filtered %>% + dplyr::mutate( + contacted = dplyr::case_when( + setting == "all" ~ 1L, + setting == "home" ~ cnt_home, + setting == "school" ~ cnt_school, + setting == "work" ~ cnt_work, + setting == "other" ~ pmax(cnt_transport, cnt_leisure, cnt_otherplace), + ) + ) + +# collapse down number of contacts per participant and contact age +swiss_contact_data <- contact_data_setting %>% + dplyr::select( + part_id, + age_from = part_age, + age_to = cnt_age, + contacted + ) %>% + tidyr::complete( + tidyr::nesting(age_from, part_id), + age_to = ages, + fill = list(contacted = 0) + ) %>% + dplyr::group_by( + age_from, + age_to + ) %>% + dplyr::summarise( + contacts = sum(contacted), + participants = dplyr::n_distinct(part_id), + .groups = "drop" + ) %>% + # add the setting information, so models can act differently for each + # setting + dplyr::mutate( + setting = setting, + .before = dplyr::everything() + ) %>% + mutate( + age_from = floor(age_from) + ) + +swiss_contact_data +``` + +```{r} +swiss_2015 <- wpp_age("Switzerland", "2015") + +swiss_pop <- as_conmat_population( + data = swiss_2015, + age = lower.age.limit, + population = population +) + +swiss_pop +``` + +```{r} +age_breaks_0_80_plus <- c(seq(0, 80, by = 5), Inf) +contact_model <- fit_single_contact_model( + contact_data = swiss_contact_data, + population = swiss_pop +) + +italy_contact +``` From ce1c5c14aa4d3adce78892dff9dc78d1c2937e7e Mon Sep 17 00:00:00 2001 From: njtierney Date: Thu, 6 Apr 2023 12:19:45 +1000 Subject: [PATCH 2/2] update data name --- vignettes/other-data-sources.Rmd | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/vignettes/other-data-sources.Rmd b/vignettes/other-data-sources.Rmd index 9a82282a..d4756989 100644 --- a/vignettes/other-data-sources.Rmd +++ b/vignettes/other-data-sources.Rmd @@ -258,5 +258,5 @@ contact_model <- fit_single_contact_model( population = swiss_pop ) -italy_contact +contact_model ```