R

普氏分析

2019-06-15  本文已影响44人  Dayueban

目的

为了展示相同个体不同组学数据,或者不同表型数据之间的相似性关系以及之间的差异性,所以采用了普氏分析。这里展示的是如何将两组数据进行普氏分析并进行可视化

一、导入数据

1 | 微生物种水平丰度表

图一 微生物种水平丰度表

2 | 食物数据的非加权unifrac矩阵

图二 食物数据
# 食物数据

# load the food distance matrix, unweighted unifrac
food_un <- read.delim("data/diet/processed_food/dhydrt_smry_no_soy_beta/unweighted_unifrac_dhydrt.smry.no.soy.txt", row = 1) # weighted in not significant
food_dist <- as.dist(food_un)

##############taxonomy############
# load taxonomy collapsed for each person
tax <- read.delim("data/microbiome/processed_average/UN_tax_CLR_mean_norm_s.txt", row = 1) # updates from reviewer comments
# drop soylents
tax <- tax[,colnames(tax) %in% colnames(food_un)]

# make taxonomy distance matrix
tax_dist <- dist(t(tax))

###############nutrients############
# load nutrition data
nutr <- read.delim("data/diet/processed_nutr/nutr_65_smry_no_soy.txt", row = 1)

# normalize nutrition data across features (rows)
nutr_n <- sweep(nutr, 1, rowSums(nutr), "/")

# make nutrition distance matrix (euclidean)
nutr_dist <- dist(t(nutr_n))

二、进行procrustes分析并作图

1 | 首先以食物和微生物为例

# make pcoas 
pcoa_f <- as.data.frame(pcoa(food_dist)$vectors)
pcoa_t <- as.data.frame(pcoa(tax_dist)$vectors)

# procrustes
pro <- procrustes(pcoa_f, pcoa_t)
pro_test <- protest(pcoa_f, pcoa_t, perm = 999)  #普氏分析组间数据的检验

eigen <- sqrt(pro$svd$d)
percent_var <- signif(eigen/sum(eigen), 4)*100

beta_pro <- data.frame(pro$X)
trans_pro <- data.frame(pro$Yrot)
beta_pro$UserName <- rownames(beta_pro)
beta_pro$type <- "Food (Unweighted Unifrac)"
trans_pro$UserName <- rownames(trans_pro)
trans_pro$type <- "Microbiome (Aitchison's)"

colnames(trans_pro) <- colnames(beta_pro)

pval <- signif(pro_test$signif, 1)

plot <- rbind(beta_pro, trans_pro)

food_micro <- ggplot(plot) +
  geom_point(size = 2, alpha=0.75, aes(x = Axis.1, y = Axis.2, color = type)) +
  scale_color_manual(values = c("#5a2071", "#5f86b7")) +
    theme_classic() +
    geom_line(aes(x= Axis.1, y=Axis.2, group=UserName), col = "darkgrey", alpha = 0.6) +
  theme(panel.background = element_blank(),
        panel.grid.major = element_blank(),
        panel.grid.minor = element_blank(),
        legend.title = element_blank(),
        legend.text = element_text(size=9),
        legend.position = 'bottom',
        axis.text = element_text(size=4),
        axis.title = element_text(size=9),
         aspect.ratio = 1) +
  guides(color = guide_legend(ncol = 1)) +
  annotate("text", x = 0.3, y = -0.27, label = paste0("p-value=",pval), size = 2) +
  xlab(paste0("PC 1 [",percent_var[1],"%]")) +
  ylab(paste0("PC 2 [",percent_var[2],"%]")) 
  
  food_micro_leg <- get_legend(food_micro) #得到ggplot图的图例信息


food_micro + theme(legend.position = "none")

2 | 营养与微生物之间的普氏分析

# make pcoas 
pcoa_n <- as.data.frame(pcoa(nutr_dist)$vectors)
pcoa_t <- as.data.frame(pcoa(tax_dist)$vectors)

# procrustes
pro <- procrustes(pcoa_n, pcoa_t)
pro_test <- protest(pcoa_n, pcoa_t, perm = 999)

eigen <- sqrt(pro$svd$d)
percent_var <- signif(eigen/sum(eigen), 4)*100

beta_pro <- data.frame(pro$X)
trans_pro <- data.frame(pro$Yrot)
beta_pro$UserName <- rownames(beta_pro)
beta_pro$type <- "Nutrient (Euclidean)"
trans_pro$UserName <- rownames(trans_pro)
trans_pro$type <- "Microbiome (Aitchison's)"

colnames(trans_pro) <- colnames(beta_pro)

pval <- signif(pro_test$signif, 1)

plot <- rbind(beta_pro, trans_pro)

nutr_micro <- ggplot(plot) +
  geom_point(size = 2, alpha=0.75, aes(x = Axis.1, y = Axis.2, color = type)) +
  scale_color_manual(values = c("#5f86b7", "#2bbaa7")) +
    theme_classic() +
    geom_line(aes(x= Axis.1, y=Axis.2, group=UserName), col = "darkgrey", alpha = 0.6) +
  theme(panel.background = element_blank(),
        panel.grid.major = element_blank(),
        panel.grid.minor = element_blank(),
        legend.title = element_blank(),
        legend.text = element_text(size=9),
        legend.position = 'bottom',
        axis.text = element_text(size=4),
        axis.title = element_text(size=9),
         aspect.ratio = 1) +
  guides(color = guide_legend(ncol = 1)) +
  annotate("text", x = 0.01, y = -0.19, label = paste0("p-value=",pval), size = 2) +
  xlab(paste0("PC 1 [",percent_var[1],"%]")) +
  ylab(paste0("PC 2 [",percent_var[2],"%]")) 

nutr_micro + theme(legend.position = "none")

nutr_micro_leg <- get_legend(nutr_micro)

3 | 谷物类数据和微生物数据(欧几里得距离)

# Import the beta diversity table from grains_beta (weighted or unweighted)
food_beta <- read.table(file="data/diet/fiber/grains_data/grains_beta/unweighted_unifrac_grains_fiber.txt")

food_dist <- as.dist(food_beta)
# refer this code to make a pcoa you will have to fix naming and play with the data structure of the dataframe called plot to get this to work.

# Import the beta diversity table for taxonomy
taxa_beta <- read.table(file="data/microbiome/processed_average/UN_tax_beta/euclidean_UN_taxonomy_clr_s.txt")

tax_dist <- as.dist(taxa_beta)

# make pcoas 
pcoa_f <- as.data.frame(pcoa(food_dist)$vectors)
pcoa_t <- as.data.frame(pcoa(tax_dist)$vectors)

# procrustes
pro <- procrustes(pcoa_f, pcoa_t)
pro_test <- protest(pcoa_f, pcoa_t, perm = 999)

eigen <- sqrt(pro$svd$d)
percent_var <- signif(eigen/sum(eigen), 4)*100

beta_pro <- data.frame(pro$X)
trans_pro <- data.frame(pro$Yrot)
beta_pro$UserName <- rownames(beta_pro)
beta_pro$type <- "Grain Fiber (Unweighted Unifrac)"
trans_pro$UserName <- rownames(trans_pro)
trans_pro$type <- "Microbiome (Aitchison's)"

colnames(trans_pro) <- colnames(beta_pro)

pval <- signif(pro_test$signif)

plot <- rbind(beta_pro, trans_pro)

grain_micro <- ggplot(plot) +
  geom_point(size = 2, alpha=0.75, aes(x = Axis.1, y = Axis.2, color = type)) +
  scale_color_manual(values = c("#fe9700", "#5f86b7")) +
    theme_classic() +
    geom_line(aes(x= Axis.1, y=Axis.2, group=UserName), col = "darkgrey", alpha = 0.6) +
  theme(panel.background = element_blank(),
        panel.grid.major = element_blank(),
        panel.grid.minor = element_blank(),
        legend.title = element_blank(),
        legend.text = element_text(size=9),
        legend.position = 'bottom',
        axis.text = element_text(size=4),
        axis.title = element_text(size=9),
         aspect.ratio = 1) +
  guides(color = guide_legend(ncol = 1)) +
  annotate("text", x = 0.05, y = -0.27, label = paste0("p-value=",pval), size = 2) +
  xlab(paste0("PC 1 [",percent_var[1],"%]")) +
  ylab(paste0("PC 2 [",percent_var[2],"%]")) 

grain_micro + theme(legend.position = "none")

grain_leg <- get_legend(grain_micro)

4 | 水果数据和微生物数据

# Import the beta diversity table from fruit_beta (weighted or unweighted)
food_beta <- read.table(file="data/diet/fiber/fruit_data/fruit_beta/unweighted_unifrac_fruit_fiber.txt")

food_dist <- as.dist(food_beta)

# make pcoas 
pcoa_f <- as.data.frame(pcoa(food_dist)$vectors)
pcoa_t <- as.data.frame(pcoa(tax_dist)$vectors)

# procrustes
pro <- procrustes(pcoa_f, pcoa_t)
pro_test <- protest(pcoa_f, pcoa_t, perm = 999)

eigen <- sqrt(pro$svd$d)
percent_var <- signif(eigen/sum(eigen), 4)*100

beta_pro <- data.frame(pro$X)
trans_pro <- data.frame(pro$Yrot)
beta_pro$UserName <- rownames(beta_pro)
beta_pro$type <- "Fruit Fiber (Unweighted Unifrac)"
trans_pro$UserName <- rownames(trans_pro)
trans_pro$type <- "Microbiome (CLR-Euclidean)"

colnames(trans_pro) <- colnames(beta_pro)

pval <- signif(pro_test$signif, 3)

plot <- rbind(beta_pro, trans_pro)

fruit_micro <- ggplot(plot) +
    geom_point(size = 2, alpha=0.75, aes(x = Axis.1, y = Axis.2, color = type)) +
  scale_color_manual(values = c("#CBD13E", "#5f86b7")) +
    theme_classic() +
    geom_line(aes(x= Axis.1, y=Axis.2, group=UserName), col = "darkgrey", alpha = 0.6) +
  theme(panel.background = element_blank(),
        panel.grid.major = element_blank(),
        panel.grid.minor = element_blank(),
        legend.title = element_blank(),
        legend.text = element_text(size=9),
        legend.position = 'bottom',
        axis.text = element_text(size=4),
        axis.title = element_text(size=9),
         aspect.ratio = 1) +
  guides(color = guide_legend(ncol = 1)) +
  annotate("text", x = 0.33, y = -0.33, label = paste0("p-value=",pval), size = 2) +
  xlab(paste0("PC 1 [",percent_var[1],"%]")) +
  ylab(paste0("PC 2 [",percent_var[2],"%]")) 

fruit_micro + theme(legend.position = "none")

fruit_leg <- get_legend(fruit_micro)

# 合并四个图

plotC_H <- plot_grid(  food_micro + theme(legend.position = "none"), 
          nutr_micro + theme(legend.position = "none"), 
          grain_micro + theme(legend.position = "none"), 
          fruit_micro + theme(legend.position = "none"), 
          ncol = 2, 
          align = "h", 
          labels = c("E", "F", "G", "H"))

save_plot("Figure2E_H.pdf", plotC_H, base_width = 3.5, base_height = 3.5)

5 | 出图,大功告成


图三 普氏分析图

参考文章

[1] https://www.cell.com/cell-host-microbe/fulltext/S1931-3128(19)30250-1

上一篇 下一篇

猜你喜欢

热点阅读