apply家族函数:数据分组统计

2021-11-25  本文已影响0人  冬之心

title: "apply家族函数:数据分组统计"
author: "wintryheart"
date: "2021/11/24"
output: html_document


knitr::opts_chunk$set(echo = TRUE)

摘要

apply

apply(X, MARGIN, FUN, ...)

head(cars)
apply(cars, 2, sum) #求列的边缘和

lapply、sapply、vapply和mapply

lappy:相当于list+apply

lapply(X, FUN, ...)

sapply: 同lapply

sapply(X, FUN, ..., simplify = TRUE, USE.NAMES = TRUE)

vapply: 同sapply, 可以定义返回值的标签

vapply(X, FUN, FUN.VALUE, ..., USE.NAMES = TRUE)

mapply: 是sapply的多变量版本

mapply(FUN, ..., MoreArgs = NULL, SIMPLIFY = TRUE, USE.NAMES = TRUE)

x <- list(a = 1:10, beta = exp(-3:3), logic = c(TRUE,FALSE,FALSE,TRUE))
# compute the list mean for each list element
lapply(x, mean) 
sapply(x, mean)

# 比较sapply和vapply
i39 <- sapply(3:9, seq)
sapply(i39, fivenum)  # 计算5个分位值
vapply(i39, fivenum, c(Min. = 0, "1st Qu." = 0, Median = 0, "3rd Qu." = 0, Max. = 0))


# replicate基于sapply的函数,重复计算一个表达式,通常用于随机数生成。
# replicate(n, expr, simplify = "array")

replicate(100, mean(rexp(10)))
hist(replicate(100, mean(rexp(10))))


#mapply

mapply(rep, 1:4, 4:1)
rep(1:4, 4:1)
mapply(rep, x = 1:4, times = 4:1)
rep(x=1:4, times=4:1)

head(cars)
mapply(sum, cars$speed, cars$dist) #两列相加,对行求和
mapply(sum, cars[,1], cars[,2])
apply(cars, 1, sum)

mapply(sum, cars[,1:2])#对列求和
apply(cars, 2, sum)

tapply 相当于table+apply

tapply(X, INDEX, FUN = NULL, ..., default = NA, simplify = TRUE)

# 对数据集data.frame进行列联表操作
head(warpbreaks)
tapply(warpbreaks$breaks, warpbreaks[,-1], sum)
tapply(warpbreaks$breaks, warpbreaks[, 3], sum)


# 对tapply统计流程分解

n <- 17; fac <- factor(rep_len(1:3, n), levels = 1:5)
table(fac) #统计每个因子类型对应几个值
tapply(1:n, fac, length) ## NA's
tapply(1:n, fac, length, default = 0) # == table(fac)


num <-1:17
temp <- as.data.frame(cbind(num, fac))
tapply(temp$num, temp$fac, sum)
tapply(temp$num, fac, sum) #每个因子类型对应的值求和。

上一篇 下一篇

猜你喜欢

热点阅读