R语言学习笔记6-因子和表格篇

2015-12-04 本文已影响129人 RudyHe

- Factor
    - x<-c(5,12,13,12)
    - xf<-factor(x)
    - xf
        - 5 12 13 12
        - Levels: 5 12 13
    - str(xf)
        - unclass(xf)
        - 1 2 3 2
        - attr(,"levels")
    - length(xf)    # 4 still size of data
    - xff<-factor(x,levels=c(5,12,13,88))
    - xff
        - 5 12 13 12
        - Levels: 5 12 13 88
    - xff[2]<-88
    - xff
        - 5 88 13 12
        - Levels: 5 12 13 88
    -

- operations
    - ages<-c(25,26,55,37,21,42)
    - affils<-c("R","D","D","R","U","D")
    - tapply(ages,affils,mean)
        - D R U
        - 41 31 21
    - d<-data.frame(list(gender=c("M","M","F","M","F","F"),ages=c(47,59,21,32,33,24))
    - d$over25<-ifelse(d$age>25,1,0)
    - tapply(d$income,list(d$gender,d$over25),mean)
        - F 39050 123000.00    # both has column for over25 and below25, because of 0 value in d$over25
        - M NA 73166.67    # Male has no ages below 25
    - split    # tapply using split first, then apply function to every element
    - split(d$income,list(d$gender,d$over25))
        - F.0    32450 45650
        - M.0    numeric(0)
        - F.1    123000
        - M.1    55000 88000 76500
    - split(1:7,c("M","F","F","I","M","M","F")
        - F    2 3 7
        - I    4
        - M    1 5 6
    - aba<-read.csv("xxx.data",header=TRUE)
    - by(aba,aba$Gender,function(m) lm(m[,2]~m[,3]))    # for object not only vector diff from tapply

- table
    - u<-c(22,8,33,6,8,29,-2)
    - fl<-list(c(5,12,13,12,13,5,13),c("a","bc","a","a","bc","a","a"))
    - tapply(u,fl,length)
        -   a bc
        - 5 2 NA    # 5 match 2 twice
        - 12 1 1
        - 13 2 1
    - table(fl)
        -   f1.2
        - fl.1 a bc
        - 5 2 0
        - 12 1 1
        - 13 2 1
    - ct<-read.table("ct.dat",header=T)
    - cttab<-table(ct)
        -   No Yes
        - No 2 0
        - Not Sure 0 1
        - Yes 1 1
    - table(c(5,12,13,12,8,5))
        - 5 8 12 13
        - 2 1 2 1
    - class(cttab)    # "table"
    - apply(cttab,1,sum)
        - No Not Sure Yes    # margin value is the sum of other variables
        - 2 1 2
    - d<-c(5,12,13,4,3,28,12,12,9,5,5,13,5,4,12)
    - dtab<-table(d)
    - tabdom(dtab,3)
        - d Freq
        - 3 5 4
        - 5 12 4
        - 2 4 2
    - aggregate(aba[,-1],list(aba$Gender),median)
    - binmarks<-seq(from=0.0,to=1.0,by=0.1)
    - z<-c(0.88,0.28,0.59,0.43,0.47,0.24,0.05,0.88)
    - cut(z,binmarks,labels=F)
        - 9 3 6 5 5 3 1 9    # 0.88>0.8, the 9th segment, and so on

R语言学习笔记6-因子和表格篇

猜你喜欢

热点阅读