-
Notifications
You must be signed in to change notification settings - Fork 8
Tutorial
Let's start by loading the package and data from 94 bone marrow transplant patients published in CID(2012). You will also need to have dplyr, ggplot2, and phyloseq installed
library(dplyr)
library(ggplot2)
library(yingtools2)
data(cid94)
You'll notice several dataframes:
bsi contains blood stream infection data
cdiff contains clostridium difficile infection data
hosp contains information about the patient's hospitalizations
meds contains medication data
pt.cid94 contains patient demographic and transplant data
There's also a phyloseq object called cid94 that contains microbiota data including stool sample information, otu count data, a taxonomy table, phylogenetic tree, and the reference sequences for each otu
Let's dive right in. Suppose we want to look at the microbiota of patients who had positive blood cultures for the pathogen "Klebsiella pneumoniae":
-
Create a subset of patients that had positive blood cultures with Klebsiella pneumoniae
kleb <- pt.cid94 %>% left_join(bsi) %>% filter(organism=="Klebsiella pneumoniae") -
Subset our phyloseq object to containing only the patients subsetted above using phyloseq
sub <- subset_samples(cid94,Patient_ID %in% kleb$Patient_ID) -
Melt our phyloseq object into a taxonomy/abundance table and get CID color palette
t <- get.otu.melt(sub) %>% rename(Species=taxon) pal <- get.yt.palette(t) -
Plot patient abundances using ggplot2
t %>% arrange(Kingdom,Phylum,Class,Order,Family,Genus,Species) %>% ggplot() + geom_bar(aes(x=sample,y=pctseqs,fill=Species),stat="identity",position="fill") + facet_grid(. ~ Patient_ID,scales="free_x",space="free")+ scale_fill_manual(values=pal)

-
Did any of these patients have detectable Klebsiella in their microbiota?
t <- get.otu.melt(sub,filter.zero=F) %>% rename(Species=taxon) t %>% filter(Genus=="Klebsiella") %>% ggplot() + geom_bar(aes(x=sample,y=pctseqs,fill=Genus),stat="identity") + facet_grid(. ~ Patient_ID,scales="free_x",space="free") + scale_fill_manual(values="red") + ylab("% Klebsiella Relative Abundance")

-
Look at single patient time series, when did they get blood stream infection? Using gg.stack, we can make stacks of multiple ggplot objects
g1 <- t %>% filter(Patient_ID==unique(kleb$Patient_ID)[2]) %>% arrange(Kingdom,Phylum,Class,Order,Family,Genus,Species) %>% ggplot() + geom_bar(aes(x=day,y=pctseqs,fill=Species),stat="identity",position="fill") + scale_fill_manual(values=pal) + xlim(c(-15,30)) + ylab("16S") g2 <- bsi %>% filter(Patient_ID==unique(kleb$Patient_ID)[2]) %>% ggplot() + geom_point(aes(x=startday,y=0),color="red",size=3) + geom_segment(aes(x=startday,xend=endday,y=0,yend=0),color="red",size=7) + geom_text(aes(x=((startday+endday)/2),y=0,label=organism),angle=0) + xlim(c(-15,30)) + ylab("Blood Culture Result") + theme(axis.text.y=element_blank(), axis.ticks.y=element_blank()) library(gtable) library(grid) libary(gridExtra) gg.stack(g2,g1,heights=c(1,2))

[Fig1 Pubmed](https://www.ncbi.nlm.nih.gov/pmc/articles/PMC3657523/figure/CIS580F1/)
1. Estimate Alpha Diversity using Phyloseq, clean up Sample_IDs
alpha <- estimate_richness(cid94) %>%
mutate(Sample_ID=row.names(.),
Sample_ID=gsub("X","",Sample_ID))
-
Get sample data
samp <- get.samp(cid94) -
Join and plot
alpha %>% left_join(samp) %>% ggplot(aes(x=day,y=Shannon)) + geom_point() + geom_smooth() + xlab("Transplant Day") + geom_vline(xintercept=0,linetype="dashed")
