4.1 一个示例,4.2 创建新变量,4.3 变量的重编码,4.
2021-08-20 本文已影响0人
灵活胖子的进步之路
###示例数据
manager <- c(1,2,3,4,5)
date <- c("10/24/08","10/28/08","10/1/08","10/12/08","5/1/09")
gender <- c("M","F","F","M","F")
age <- c(32,45,25,39,99)
q1 <- c(5,3,3,3,2)
q2 <- c(4,5,5,3,2)
q3 <- c(5,2,5,4,1)
q4 <- c(5,5,5,NA,2)
q5 <- c(5,5,2,NA,1)
leadership <- data.frame(manager,date,
gender,
age,
q1,q2,q3,q4,q5,
stringsAsFactors=FALSE)
head(leadership)
![](https://img.haomeiwen.com/i22546862/49c325addcb17b81.png)
# Listing 4.2 - Creating new variables
#在原始数据库增加一列数据
mydata<-data.frame(x1 = c(2, 2, 6, 4),
x2 = c(3, 4, 2, 8))
mydata$sumx <- mydata$x1 + mydata$x2
mydata$meanx <- (mydata$x1 + mydata$x2)/2
#attach方法增加变量
attach(mydata)
mydata$sumx <- x1 + x2
mydata$meanx <- (x1 + x2)/2
detach(mydata)
###利用transform函数增加变量
mydata <- transform(mydata,
sumx = x1 + x2,
meanx = (x1 + x2)/2)
# Recoding variables
leadership$age[leadership$age==99] <- NA#将age列为99的设定为缺失值
leadership$agecat[leadership$age > 75] <- "Elder"
leadership$agecat[leadership$age >= 55 &
leadership$age <= 75] <- "Middle Aged"
leadership$agecat[leadership$age < 55] <- "Young"
![](https://img.haomeiwen.com/i22546862/7327c63d2fc6e24f.png)
#以下为另一种简便的方法
leadership <- within(leadership,{
agecat <- NA#定义一列均为NA的列,以下依次定义不同的分组标准
agecat[age > 75] <- "Elder"
agecat[age >= 55 & age <= 75] <- "Middle Aged"
agecat[age < 55] <- "Young" })
###利用names函数重命名
names(leadership)[2] <- "newdata"
colnames(leadership)
![](https://img.haomeiwen.com/i22546862/01594a0b61694c2d.png)
# Renaming variables with the plyr package
names(leadership)
names(leadership)[2] <- "testDate"
leadership
library(plyr)
leadership <- rename(leadership,
c(manager="managerID", date="testDate"))
colnames(leadership)
![](https://img.haomeiwen.com/i22546862/d5bf9659eef46460.png)