R 下载和整理KEGG ORTHOLOGY文件

2021-12-21  本文已影响0人  leoxiaobei

登录KO (KEGG ORTHOLOGY) Database,进入下图页面:

image.png
点击进入KEGG Orthology (KO),进入下图界面:
image.png

右键复制链接,读入R来解析json文件。

library(rjson)
library(jsonlite)
library(tidyverse)
KO <- fromJSON("https://www.kegg.jp/kegg-bin/download_htext?htext=ko00001&format=json&filedir=")#下载并解析JSON文件
KO$name <- NULL
KO <- as.data.frame(KO) %>% 
  unnest(cols = c("children.name","children.children"),names_repair = tidyr_legacy) %>%#重要函数
  unnest(cols = c("children.name","name","children"),names_repair = tidyr_legacy) %>%
  unnest(cols = c("children.name","name","name1","children"),names_repair = tidyr_legacy)
colnames(KO) <- c("L1","L2","L3","KO") 
KO %<>% #整理KEGG ORTHOLOGY
  select(last_col(),everything()) %>%
  separate(col = "KO",sep = "  ",into = c("KO","Description")) %>%
  separate(col = "L1",sep = " ",into = c("L1_ID","L1"),extra = "merge") %>%
  filter(!L1_ID %in% c("09180","09190")) %>% #去除BRITE hierarchies和Not Included in Pathway or Brite两大类
  separate(col = "L2",sep = " ",into = c("L2_ID","L2"),extra = "merge") %>%
  separate(col = "L3",sep = " ",into = c("L3_ID","L3"),extra = "merge") %>%
  separate(col = "L3",sep = " \\[PATH:",into = c("L3","PathwayID")) %>%
  mutate(PathwayID=str_remove(PathwayID,pattern = "\\]")) %>%
  drop_na()#KEGG ORTHOLOGY等级有缺失的删掉
head(KO)
# A tibble: 6 x 9
# KO     Description                                                L1_ID L1         L2_ID L2                      L3_ID L3                     PathwayID
# <chr>  <chr>                                                      <chr> <chr>      <chr> <chr>                   <chr> <chr>                  <chr>    
# 1 K00844 HK; hexokinase [EC:2.7.1.1]                                09100 Metabolism 09101 Carbohydrate metabolism 00010 Glycolysis / Gluconeo~ ko00010  
# 2 K12407 GCK; glucokinase [EC:2.7.1.2]                              09100 Metabolism 09101 Carbohydrate metabolism 00010 Glycolysis / Gluconeo~ ko00010  
# 3 K00845 glk; glucokinase [EC:2.7.1.2]                              09100 Metabolism 09101 Carbohydrate metabolism 00010 Glycolysis / Gluconeo~ ko00010  
# 4 K25026 glk; glucokinase [EC:2.7.1.2]                              09100 Metabolism 09101 Carbohydrate metabolism 00010 Glycolysis / Gluconeo~ ko00010  
# 5 K01810 GPI, pgi; glucose-6-phosphate isomerase [EC:5.3.1.9]       09100 Metabolism 09101 Carbohydrate metabolism 00010 Glycolysis / Gluconeo~ ko00010  
# 6 K06859 pgi1; glucose-6-phosphate isomerase, archaeal [EC:5.3.1.9] 09100 Metabolism 09101 Carbohydrate metabolism 00010 Glycolysis / Gluconeo~ ko00010  
上一篇 下一篇

猜你喜欢

热点阅读