学习小组—豹—Day6

2020-02-12 本文已影响0人十年是一首歌
R包学习

几个重要包

dplyr

mutate （增加列，格式参考：mutate(test, new = Sepal.Length * Sepal.Width)）

rm(list=ls())
test <- iris[c(1:2,51:52,101:102),]
View(test)
colnames(test)

library(dplyr)
mutate(test,Spal.volume=Sepal.Length*Sepal.Width)
mutate(test,Petal.volume=Petal.Length*Petal.Width)
mutate(test,new)

select(test,c(1,3))
select(test,Sepal.Length)

#filter(.data=,condition_1,condition_2)#将返回相匹配的数据
#同时可以多条件匹配multiple condition，当采用多条件匹配时可直接condition1，condition2或者condition1&condition2
table(test$Species)
filter(.data=iris,Sepal.Length>5,Sepal.Width<3.5)
filter(.data=iris,Sepal.Length>5,Species=="setosa")
filter(test, Species == "setosa")
filter(test, Species == "versicolor")
filter(test, Species == "virginica")

#要使用filter_all()、filter_if()、filter_at()需要先去掉Species列（非数值型列）

iris_data<-iris%>% select(-Species)

#筛选所有属性小于6的行
iris_data%>% filter_all(all_vars(.<6))
#筛选任意一个属性大于3的行
iris_data%>% filter_all(any_vars(.>3))
#筛选以sep开头的属性任一大于3的行
iris_data%>% filter_at(vars(starts_with("Sep")), any_vars(. >3))
#R中自带数据集mtcars，筛选任意一个属性大于150的行
filter_all(mtcars, any_vars(. > 150))
#筛选以d开头的属性任一可被2整除的行
filter_at(mtcars, vars(starts_with("d")), any_vars((. %% 2) == 0))

filter(test, Species == "setosa"&Sepal.Length > 5 )
filter(test, Species %in% c("setosa","versicolor"))

#arrange(),按某1列或某几列对整个表格进行排序
arrange(test, Sepal.Length)#默认从小到大排序
arrange(test, desc(Sepal.Length))#用desc从大到小
arrange(test, Sepal.Length, desc(Sepal.Width))

#summarise()：汇总 对数据进行汇总操作,结合group_by使用实用性强
# 先按照Species分组，计算每组Sepal.Length的平均值和标准差
group_by(test, Species)
summarise(group_by(test, Species),mean(Sepal.Length), sd(Sepal.Length))


##########################################################################
#dplyr两个实用技能
#管道操作 %>% (cmd/ctr + shift + M)
test %>% 
  group_by(Species) %>% 
  summarise(mean(Sepal.Length), sd(Sepal.Length))

#count统计某列的unique值
count(test,Species)

#########################################################################
#dplyr处理关系数据
options(stringsAsFactors = F)

test1 <- data.frame(x = c('b','e','f','x'), 
                    z = c("A","B","C",'D'),
                    stringsAsFactors = F)
test1
test2 <- data.frame(x = c('a','b','c','d','e','f'), 
                    y = c(1,2,3,4,5,6),
                    stringsAsFactors = F)
test2 
#1.內连inner_join,取交集
inner_join(test1, test2, by = "x")
#左连left_join
left_join(test1, test2, by = 'x')
#3.全连full_join
F1=full_join( test1, test2, by = 'x')
#F2=full_join( test2, test1, by='x')与F1不一样
#半连接：返回能够与y表匹配的x表所有记录semi_join
semi_join(x = test1, y = test2, by = 'x')
#反连接：返回无法与y表匹配的x表的所记录anti_join
anti_join(x = test2, y = test1, by = 'x')

#简单合并
test1 <- data.frame(x = c(1,2,3,4), y = c(10,20,30,40))
test1
test2 <- data.frame(x = c(5,6), y = c(50,60))
test2
test3 <- data.frame(z = c(100,200,300,400))
test3
bind_rows(test1, test2)
bind_cols(test1, test3)
学习小组—豹—Day6

R包学习

几个重要包

dplyr

猜你喜欢

热点阅读