4.1 一个示例,4.2 创建新变量,4.3 变量的重编码,4.

2021-08-20  本文已影响0人  灵活胖子的进步之路
###示例数据
manager <- c(1,2,3,4,5)
date <- c("10/24/08","10/28/08","10/1/08","10/12/08","5/1/09")
gender <- c("M","F","F","M","F")
age <- c(32,45,25,39,99)
q1 <- c(5,3,3,3,2)
q2 <- c(4,5,5,3,2)
q3 <- c(5,2,5,4,1)
q4 <- c(5,5,5,NA,2)
q5 <- c(5,5,2,NA,1)
leadership <- data.frame(manager,date,
                         gender,
                         age,
                         q1,q2,q3,q4,q5, 
                         stringsAsFactors=FALSE)
head(leadership)
示例数据结构
# Listing 4.2 - Creating new variables
#在原始数据库增加一列数据
mydata<-data.frame(x1 = c(2, 2, 6, 4),
                   x2 = c(3, 4, 2, 8))

mydata$sumx <- mydata$x1 + mydata$x2
mydata$meanx <- (mydata$x1 + mydata$x2)/2
#attach方法增加变量
attach(mydata)
mydata$sumx <- x1 + x2
mydata$meanx <- (x1 + x2)/2
detach(mydata)

###利用transform函数增加变量
mydata <- transform(mydata,
                    sumx = x1 + x2,
                    meanx = (x1 + x2)/2)
# Recoding variables
leadership$age[leadership$age==99] <- NA#将age列为99的设定为缺失值
leadership$agecat[leadership$age > 75] <- "Elder"
leadership$agecat[leadership$age >= 55 &
                    leadership$age <= 75] <- "Middle Aged"
leadership$agecat[leadership$age < 55] <- "Young"
注意编码前需要设定缺失值,注意筛选方法
#以下为另一种简便的方法
leadership <- within(leadership,{
  agecat <- NA#定义一列均为NA的列,以下依次定义不同的分组标准
  agecat[age > 75] <- "Elder"
  agecat[age >= 55 & age <= 75] <- "Middle Aged"
  agecat[age < 55] <- "Young" })
###利用names函数重命名
names(leadership)[2] <- "newdata"
colnames(leadership)
names函数修改命名
# Renaming variables with the plyr package
names(leadership)
names(leadership)[2] <- "testDate"
leadership

library(plyr)
leadership <- rename(leadership,
                     c(manager="managerID", date="testDate"))
colnames(leadership)
利用rename函数重新命名列变量
上一篇 下一篇

猜你喜欢

热点阅读