数据-R语言-图表-决策-Linux-PythonR语言

R语言学习笔记

2018-07-23  本文已影响152人  泥人吴

title: "r basical"
author: "wy"
date: "2018/7/22"
output: html_document
请收藏,持续更新中...


R数据结构:

对像的5种基本属性:

名称属性:


数据结构:

创建函数的vector的方法:

x<- vector("character",length=10)       
> x1<-1:4       
> x2<-c(1,2,3,4)
x3<- c(TRUE,10,"a")    
> class(x3) 
[1] "character"
as.numeric(x3)      
[1] NA 10 NA
x3<- c(TRUE,10,"a")
class(x3)
[1] "character"
 as.numeric(x3)
[1] NA 10 NA

names函数

x1<- 1:4
names(x1)<-c("a","b","c","d")

数据结构总结:

  1. vector()
  2. c() 说明:使用c()函数,如果其中一个元素是字符,则非字符值强制为字符类型。
  3. as.logical()/as.numeric()/as.character()

矩阵(matrix)

矩阵就是向量+维度:

y<-1:6
dim(y)<-c(2,3)

拼接矩阵

y2<- matrix(1:6,nrow = 2,ncol = 3)

数组array

x<-array(1:24,dim = c(4,6))
x1<-array(1:24,dim = c(2,3,4))

列表(list)

l<-list("a",2,10L,3+4i,TRUE)
l2<-list(a=1,b=2,c=3)
l3<-list(c(1,2,3),c(4,5,6,7))

矩阵赋名

x<-matrix(1:6,nrow = 2,ncol = 3)
dimnames(x)<-list(c("a","b"),c("c","d","e"))
dimnames(x)<-list(c("a","b"),c("c","d","e","f"))

因子(factor)

创建因子(factor)

x<-factor(c("female","man","female"))
x
[1] female man    female

y<-factor(c("female","man","female"),levels = c("man","female"))

y
[1] female man    female

Levels: man female(那么此处就是以man为基线)

可使用table函数对感兴趣的factor进行了解:

table(y)
y
man female 
1      2 

因子=整数向量+标签(label)

unclass(x)
[1] 1 2 1
[1] "female" "man"   
class(unclass(x))

缺失值(missng value)

is.na(x)
[1] FALSE  TRUE FALSE  TRUE FALSE
is.nan(x)
[1] FALSE FALSE FALSE FALSE FALSE

数据框(data frame)

df
id name gender
1  1    a   TRUE
2  2    b  FALSE
3  3    c  FALSE
4  4    d   TRUE
>df
$id
[1] 1 2 3 4
                                    
$name
[1] "a" "b" "c" "d"
                                    
$gender
[1]  TRUE FALSE FALSE  TRUE
df3
id score
1  1    80
2  2    86
3  3    90
4  4   100
data.matrix(df3)
id score
[1,]  1    80
[2,]  2    86
[3,]  3    90
[4,]  4   100

处理缺失值NA

x <- c(1,NA,2,NA,3)
is.na(x)
x[is.na(x)]
x[!is.na(x)]
x <- c(1,NA,2,NA,3)
y<- c("a","b",NA,"c",NA)
z<- complete.cases(x,y)
x[z]
y[z]
head(airquality)
complete.cases(airquality)
g<-complete.cases(airquality)
airquality[g,][1:10,]

向量化操作:

x<-1:5
y<-6:10
x+y
x/y
x<-matrix(1:4,nrow = 2,ncol = 2)
y<-matrix(rep(2,4),nrow =2,ncol=2)
x*y
x/y

重要函数的使用

目录为:

lapply

x<- list(a=1:10,b=c(11,21,31,41,51))
lapply(x,mean)
sapply(x,mean)  
lapply(x,runif)
lapply(x,runif,min=0,max=100)

apply

x<- matrix(1:16,4,4)
apply (x,2,mean)
apply(x,1,mean)
> x
     [,1] [,2] [,3] [,4]
[1,]    1    5    9   13
[2,]    2    6   10   14
[3,]    3    7   11   15
[4,]    4    8   12   16
> apply (x,2,mean)
[1]  2.5  6.5 10.5 14.5
> apply(x,1,mean)
[1]  7  8  9 10

apply

x<- matrix(1:16,4,4)
apply (x,2,mean)
apply(x,1,mean)
> x
     [,1] [,2] [,3] [,4]
[1,]    1    5    9   13
[2,]    2    6   10   14
[3,]    3    7   11   15
[4,]    4    8   12   16
> apply (x,2,mean)
[1]  2.5  6.5 10.5 14.5
> apply(x,1,mean)]
[1,] -0.6843901 0.1666006 0.6803454
[2,]  1.1253118 0.4653100 0.1141857
> apply(x,c(1,3),mean)
          [,1]        [,2]      [,3]      [,4]
[1,] 1.2045748 -1.53831670 0.3211516 0.2293316
[2,] 0.8996401  0.09853166 1.1504825 0.1244224
> apply(x,c(2,3),mean)
          [,1]       [,2]      [,3]        [,4]
[1,] 0.7023668 -1.4052478 0.6801564  0.90456808
[2,] 1.6055750 -1.0393436 0.7949597 -0.09736984
[3,] 0.8483805  0.2849139 0.7323351 -0.27656733

mapply:

list(rep(1,4),rep(2,3),rep(3,2),rep(4,1))
[[1]]
[1] 1 1 1 1

[[2]]
[1] 2 2 2

[[3]]
[1] 3 3

[[4]]
[1] 4
mapply(rep,1:4,4:1)
s<- function(n,mean,std)
s<- function(n,mean,std){rnorm(n,mean,std)}
s(4,0,1)
mapply(s,1:5,5:1,2)
> mapply(s,1:5,5:1,2)
[[1]]
[1] 1.938297

[[2]]
[1] 4.455111 4.432794

[[3]]
[1] 1.118224 3.751297 5.864922

[[4]]
[1] 4.1675629 1.7385185 0.8987198 0.1048540

[[5]]
[1] -0.3413274  1.2058371 -0.5148753  1.9476646  3.9251892
list(s(1,5,2),s(2,4,2),s(3,3,2)...)

tapply

x<- c(rnorm(5),runif(5),rnorm(5,1))
f<-gl(3,5)
> rnorm
function (n, mean = 0, sd = 1)
>runif
runif(n, min = 0, max = 1)
tapply(x,f,mean)
         1          2          3 
0.08431157 0.36912987 0.92869525 

split

split(x,f)
lapply(split(x,f),mean)

排序

wy<-data.frame(v1=1:5,v2=c(10,7,9,6,8),v3=11:15,v4=c(1,1,2,2,1))
sort(wy$v2)
[1]  6  7  8  9 10
sort(wy$v2,decreasing=TRUE)
[1] 10  9  8  7  6
order(wy$v2)
[1] 4 2 5 3 1

wy[order(wy$v2),]
 v1 v2 v3 v4
4  4  6 14  2
2  2  7 12  1
5  5  8 15  1
3  3  9 13  2
1  1 10 11  1
wy[order(wy$v4,wy$v2),]
 v1 v2 v3 v4
2  2  7 12  1
5  5  8 15  1
1  1 10 11  1
4  4  6 14  2
3  3  9 13  2

wy[order(wy$v2,wy$v4),]
 v1 v2 v3 v4
4  4  6 14  2
2  2  7 12  1
5  5  8 15  1
3  3  9 13  2
1  1 10 11  1

总结数据信息summarize data

上一篇下一篇

猜你喜欢

热点阅读