R基础-向量,dataframe,dplyr操作

2022-12-17  本文已影响0人  余绕

1. 向量操作

A. creat a vector

v1<-c(1,2,3,4,5,6,7,8,9) # creat a vector

B. Get values

v1[c(2)] # obtain a value
[1] 2

v1[c(3:5)] #obtain values
[1] 3 4 5

C. assign names to the vector elements

names(v1)=c("one","two","three","four","five","six","seven","eight","night")

v1 # content of v1
  one   two three  four  five   six seven eight night 
    1     2     3     4     5     6     7     8     9 

D. get a value from its name

v1["two"] 
two 
  2 
v1[v1>3] #get list content basing on logic values
four  five   six seven eight night 
    4     5     6     7     8     9 

2. Dataframe

A. 创建dataframe

df<-data.frame(row.names = c('张三','李四','王武','赵六','田七'),
            性别= c('男','女','女','男','女'),
            年龄= c( 39,65,34,28,76),
            成绩=c(89,99,65,39,10))
df
    性别 年龄 成绩
张三   男   39   89
李四   女   65   99
王武   女   34   65
赵六   男   28   39
田七   女   76   10

B. 1st column,all rows

df[1,]
 性别 年龄 成绩
张三   男   39   89

C. all columns, first row

df[,1]
> df[,1]
[1] "男" "女" "女" "男" "女"

D. select data basing on the score

df[df$成绩>60,]
  性别 年龄 成绩
张三   男   39   89
李四   女   65   99
王武   女   34   65

E. modify values

df
   性别 年龄 成绩
张三   男   39   89
李四   女   65   99
王武   女   34   65
赵六   男   28   39
田七   女   76   10

df$成绩=df$成绩+100

df
    性别 年龄 成绩
张三   男   39  189
李四   女   65  199
王武   女   34  165
赵六   男   28  139
田七   女   76  110

dplyr软件包学习

library(dplyr)

A. import data

de_test<-read.table("K:/生信学习/R语言基础绘图/R_basic/data/R_basic/de_result.txt",header=T)
de_test
 gene_id logFC pvalue    FDR
1   gene1   3.0  0.010 0.0300
2   gene2   1.0  0.300 0.3800
3   gene3   2.0  0.002 0.0025
4   gene4  -3.0  0.002 0.0030
5   gene5  -0.4  0.004 0.0048

B. select data basing on specific criterion---------Basic operation in R

de_test[de_test$logFC>=1 &de_test$pvalue<=0.05,]
  gene_id logFC pvalue    FDR
1   gene1     3  0.010 0.0300
3   gene3     2  0.002 0.0025

using dplyr

C. filter function #按行筛选

filter(de_test,logFC>= 1 & pvalue<=0.05)
  gene_id logFC pvalue    FDR
1   gene1     3  0.010 0.0300
2   gene3     2  0.002 0.0025

D. select #按列筛选

select(de_test,gene_id,logFC,pvalue)
gene_id logFC pvalue
1   gene1   3.0  0.010
2   gene2   1.0  0.300
3   gene3   2.0  0.002
4   gene4  -3.0  0.002
5   gene5  -0.4  0.004
select(de_test,-pvalue)
gene_id logFC pvalue
1   gene1   3.0  0.010
2   gene2   1.0  0.300
3   gene3   2.0  0.002
4   gene4  -3.0  0.002
5   gene5  -0.4  0.004

E. add a new column---mutate

mutate(de_test,FC=2**logFC)
gene_id logFC pvalue    FDR        FC
1   gene1   3.0  0.010 0.0300 8.0000000
2   gene2   1.0  0.300 0.3800 2.0000000
3   gene3   2.0  0.002 0.0025 4.0000000
4   gene4  -3.0  0.002 0.0030 0.1250000
5   gene5  -0.4  0.004 0.0048 0.7578583

F. sort the dataframe

arrange(de_test,logFC) #default is descending order
  gene_id logFC pvalue    FDR
1   gene4  -3.0  0.002 0.0030
2   gene5  -0.4  0.004 0.0048
3   gene2   1.0  0.300 0.3800
4   gene3   2.0  0.002 0.0025
5   gene1   3.0  0.010 0.0300
arrange(de_test,desc(logFC))  #ascending order 
 gene_id logFC pvalue    FDR
1   gene1   3.0  0.010 0.0300
2   gene3   2.0  0.002 0.0025
3   gene2   1.0  0.300 0.3800
4   gene5  -0.4  0.004 0.0048
5   gene4  -3.0  0.002 0.0030

G. 利用管道(using the pipe %>%)

slected<-de_test %>% filter(logFC>= 1 & pvalue<=0.05) %>% select(-pvalue) %>% mutate(FC=2**logFC) %>% arrange(logFC)
slected
  gene_id logFC    FDR FC
1   gene3     2 0.0025  4
2   gene1     3 0.0300  8

Import data

gene_fuction<-read.table("K:/生信学习/R语言基础绘图/R_basic/data/R_basic/gene_function.txt",header=T,sep="\t")
gene_fuction
  gene_name annotation
1     gene1        aaa
2     gene2        bbb
3     gene3        ccc
4     gene5        ddd

G. Merge dataframe basing on columns

left_join(slected,gene_fuction,by = c('gene_id'='gene_name')) #以左表为标准
  gene_id logFC    FDR FC annotation
1   gene3     2 0.0025  4        ccc
2   gene1     3 0.0300  8        aaa
right_join(slected,gene_fuction,by = c('gene_id'='gene_name'))#以右表为标准
1   gene3     2 0.0025  4        ccc
2   gene1     3 0.0300  8        aaa
3   gene2    NA     NA NA        bbb
4   gene5    NA     NA NA        dd

H. 存在重复的情况

gene_fuction<-edit(gene_fuction)

gene_fuction
gene_name annotation
1     gene1        aaa
2     gene2        bbb
3     gene3        ccc
4     gene5        ddd
5     gene1        xxx
6     gene3        new
left_join(slected,gene_fuction,by = c('gene_id'='gene_name')) #以左表为标准
  gene_id logFC    FDR FC annotation
1   gene3     2 0.0025  4        ccc
2   gene3     2 0.0025  4        new
3   gene1     3 0.0300  8        aaa
4   gene1     3 0.0300  8        xxx
上一篇下一篇

猜你喜欢

热点阅读