2020-12-16 第四章基本数据管理2.0

2020-12-16  本文已影响0人  L6511

建立数据框并改变其中的元素

> manager<-c(1,2,3,4,5)
> data<-c("10/24/08","10/28/08","10/1/08","10/12/08","5/1/09")
> country<-c("US","US","UK","UK","UK")
> gender<-c("M","F","F","M","F")
> age<-c(32,45,25,39,99)
> q1<-c(5,3,3,3,2)
> q2<-c(4,5,5,3,2)
> q3<-c(5,2,5,4,1)
> q4<-c(5,5,5,NA,2)
> q5<-c(5,5,2,NA,1)
> date<-c("10/24/08","10/28/08","10/1/08","10/12/08","5/1/09")
> leadership<-data.frame(manager,date,country,gender,age,q1,q2,q3,q4,q5)
> leadership
  manager     date country gender age q1 q2 q3 q4 q5
1       1 10/24/08      US      M  32  5  4  5  5  5
2       2 10/28/08      US      F  45  3  5  2  5  5
3       3  10/1/08      UK      F  25  3  5  5  5  2
4       4 10/12/08      UK      M  39  3  3  4 NA NA
5       5   5/1/09      UK      F  99  2  2  1  2  1
> leadership$age[leadership$age==99]<-NA
> leadership$agecat[leadership$age>75]<-"Elder"
> leadership$agecat[leadership$age>=55&leadership$age<=75]<"Middle Aged"
[1] NA
> leadership$agecat[leadership$age<55]<-"Young"
> library(reshape)
> leadership<-rename(leadership,
+                    c(manager="managerID",date="testDate")
+ )
> names(leadership)
 [1] "managerID" "testDate"  "country"   "gender"    "age"       "q1"        "q2"       
 [8] "q3"        "q4"        "q5"        "agecat"   
> names(leadership)[2]<-"testDate"
> leadership
  managerID testDate country gender age q1 q2 q3 q4 q5 agecat
1         1 10/24/08      US      M  32  5  4  5  5  5  Young
2         2 10/28/08      US      F  45  3  5  2  5  5  Young
3         3  10/1/08      UK      F  25  3  5  5  5  2  Young
4         4 10/12/08      UK      M  39  3  3  4 NA NA  Young
5         5   5/1/09      UK      F  NA  2  2  1  2  1   <NA>
> names(leadership)[6:10]<-c("item1","item2","item3","item4","item5")
> leadership
  managerID testDate country gender age item1 item2 item3 item4 item5 agecat
1         1 10/24/08      US      M  32     5     4     5     5     5  Young
2         2 10/28/08      US      F  45     3     5     2     5     5  Young
3         3  10/1/08      UK      F  25     3     5     5     5     2  Young
4         4 10/12/08      UK      M  39     3     3     4    NA    NA  Young
5         5   5/1/09      UK      F  NA     2     2     1     2     1   <NA>
> 

在第95页

变量的重命名

> names(leadership)
 [1] "managerID" "testDate"  "country"   "gender"    "age"       "q1"        "q2"       
 [8] "q3"        "q4"        "q5"        "agecat"   
> names(leadership)[2]<-"testDate"
> leadership
  managerID testDate country gender age q1 q2 q3 q4 q5 agecat
1         1 10/24/08      US      M  32  5  4  5  5  5  Young
2         2 10/28/08      US      F  45  3  5  2  5  5  Young
3         3  10/1/08      UK      F  25  3  5  5  5  2  Young
4         4 10/12/08      UK      M  39  3  3  4 NA NA  Young
5         5   5/1/09      UK      F  NA  2  2  1  2  1   <NA>
> names(leadership)[6:10]<-c("item1","item2","item3","item4","item5")
> leadership
  managerID testDate country gender age item1 item2 item3 item4 item5 agecat
1         1 10/24/08      US      M  32     5     4     5     5     5  Young
2         2 10/28/08      US      F  45     3     5     2     5     5  Young
3         3  10/1/08      UK      F  25     3     5     5     5     2  Young
4         4 10/12/08      UK      M  39     3     3     4    NA    NA  Young
5         5   5/1/09      UK      F  NA     2     2     1     2     1   <NA>
> y<-c(1,2,3,NA)
> is.na(y)
[1] FALSE FALSE FALSE  TRUE
> is.na(leadership[,6:10])
     item1 item2 item3 item4 item5
[1,] FALSE FALSE FALSE FALSE FALSE
[2,] FALSE FALSE FALSE FALSE FALSE
[3,] FALSE FALSE FALSE FALSE FALSE
[4,] FALSE FALSE FALSE  TRUE  TRUE
[5,] FALSE FALSE FALSE FALSE FALSE
> leadership$age[leadership$age==99]<-NA
> leadership
  managerID testDate country gender age item1 item2 item3 item4 item5 agecat
1         1 10/24/08      US      M  32     5     4     5     5     5  Young
2         2 10/28/08      US      F  45     3     5     2     5     5  Young
3         3  10/1/08      UK      F  25     3     5     5     5     2  Young
4         4 10/12/08      UK      M  39     3     3     4    NA    NA  Young
5         5   5/1/09      UK      F  NA     2     2     1     2     1   <NA>

使用na.omit()删除不完整的观测

> manager<-c(1,2,3,4,5)
> country<-c("US","US","UK","UK","UK")
> gender<-c("M","F","F","M","F")
> age<-c(32,45,25,39,99)
> q1<-c(5,3,3,3,2)
> q2<-c(4,5,5,3,2)
> q3<-c(5,2,5,4,1)
> q4<-c(5,5,5,NA,2)
> q5<-c(5,5,2,NA,1)
> date<-c("10/24/08","10/28/08","10/1/08","10/12/08","5/1/09")
> leadership<-data.frame(manager,date,country,gender,age,q1,q2,q3,q4,q5)
> leadership
  manager     date country gender age q1 q2 q3 q4 q5
1       1 10/24/08      US      M  32  5  4  5  5  5
2       2 10/28/08      US      F  45  3  5  2  5  5
3       3  10/1/08      UK      F  25  3  5  5  5  2
4       4 10/12/08      UK      M  39  3  3  4 NA NA
5       5   5/1/09      UK      F  99  2  2  1  2  1
> newdata<-na.omit(leadership)
> newdata
  manager     date country gender age q1 q2 q3 q4 q5
1       1 10/24/08      US      M  32  5  4  5  5  5
2       2 10/28/08      US      F  45  3  5  2  5  5
3       3  10/1/08      UK      F  25  3  5  5  5  2
5       5   5/1/09      UK      F  99  2  2  1  2  1

关于日期的函数在第99页

第四章主要学习了如何确定一个对象的数据类型,以及它如何转换成其他类型,存储缺失值,日期值的方式。用公式创建新变量并重编码了现有变量,如何对数据进行排序对变量进行重命名,对数据集进行了横向合并和纵向合并

上一篇下一篇

猜你喜欢

热点阅读