[R语言] 《R语言编程艺术》 第3章 矩阵和数组
Creating Matrices
The internal storage of a matrix is in column-major order
You can set thebyrow
argument in matrix() to true to indicate that the data is coming in row-major order
# 创建
y <- matrix(c(1,2,3,4),nrow=2)
y
# [,1] [,2]
# [1,] 1 3
# [2,] 2 4
# 也可以先声明再创建
y <- matrix(nrow=2,ncol=2)
y[1,1] <- 1
y[2,1] <- 2
y[1,2] <- 3
y[2,2] <- 4
# 取数
y[,2]
# [1] 3 4
# 赋值
y[2,1] <- 7
# 修改储存顺序
y <- matrix(c(1,2,3,4),nrow=2,byrow=TRUE)
y
# [,1] [,2]
# [1,] 1 2
# [2,] 3 4
General Matrix Operations
- Performing Linear Algebra Operationson Matrices
y
# [,1] [,2]
# [1,] 1 3
# [2,] 2 4
y * y
# [,1] [,2]
# [1,] 1 9
# [2,] 4 16
# 用%会修正为线性代数的矩阵运算
y %*% y
# [,1] [,2]
# [1,] 7 15
# [2,] 10 22
- 子矩阵赋值
y
# [,1] [,2]
# [1,] 11 12
# [2,] 21 22
# [3,] 31 32
y[c(1,3),] <- matrix(c(1,1,8,12),nrow=2)
- 拓展案例一
—— Image Manipulation
课本上用的是拉什莫尔山,这里换了个图
附上一个在线转换图片格式的网站:在线图片转换器
library(pixmap)
# pgm格式rio包无法识别
art <- read.pnm('Art.pgm')
art
# # Pixmap image
# Type : pixmapGrey
# Size : 225x225
# Resolution : 1x1
# Bounding box : 0 0 225 225
plot(art)
str(art)
# Formal class 'pixmapGrey' [package "pixmap"] with 6 slots
# ..@ grey : num [1:225, 1:225] 1 1 1 1 1 1 1 1 1 1 ...
# ..@ channels: chr "grey"
# ..@ size : int [1:2] 225 225
# ..@ cellres : num [1:2] 1 1
# ..@ bbox : num [1:4] 0 0 225 225
# ..@ bbcent : logi FALSE
- The class here is of the
S4 type
, whose components are designated by@
- The intensities are stored as numbers ranging from 0.0 (black) to 1.0 (white)
- The random noise itself is a sample from U(0,1), the uniform distribution on the interval (0,1)
- 使用
locator()
点击图片再按finish
获取点的坐标
locator()
# $x
# [1] 53.50925 181.81603
#
# $y
# [1] 150.38467 85.09246
# 注意,用locator获取的坐标x和y是颠倒的
art2 <- art
art2@grey[85:150,53:181] <- 1
plot(art2)
- 添加噪声
(课本上给出的代码有误..)
# q值调节噪声的权重
blurpart <- function(img,rows,cols,q) {
lrows <- length(rows)
lcols <- length(cols)
newimg <- img
randomnoise <- matrix(nrow=lrows, ncol=lcols, runif(lrows * lcols))
newimg@grey[rows,cols] <- (1-q) * img@grey[rows,cols] + q * randomnoise
return(newimg)
}
library(rafalib)
mypar(1,3)
art1 <- blurpart(art,85:150,53:181,0.3)
plot(art1)
art2 <- blurpart(art,85:150,53:181,0.6)
plot(art2)
art3 <- blurpart(art,85:150,53:181,0.9)
plot(art3)
Filtering on Matrices
x <- matrix(c(1,2,2,3,3,4),nrow=3,byrow = TRUE)
z <- c(5,12,13)
x[z %% 2 == 1,]
# [,1] [,2]
# [1,] 1 2
# [2,] 3 4
- 矩阵的提取会自动降维的特性
x[x[,1] > 2 & x[,2] > 3,]
# [1] 3 4
# 矩阵变向量
# 提取子集时加上 drop 参数可以限制降维
x[x[,1] > 2 & x[,2] > 3,,drop = FALSE]
# [,1] [,2]
# [1,] 3 4
- 矩阵本质是长向量
x
# [,1] [,2]
# [1,] 1 2
# [2,] 2 3
# [3,] 3 4
which(x > 2)
# [1] 3 5 6
- 拓展案例二
—— Generating a Covariance Matrix
# 举例row和col函数的用法
x <- matrix(c(1,4,7,8,3,5),nrow=3,byrow = TRUE)
x
# [,1] [,2]
# [1,] 1 4
# [2,] 7 8
# [3,] 3 5
row(x)
# [,1] [,2]
# [1,] 1 1
# [2,] 2 2
# [3,] 3 3
col(x)
# [,1] [,2]
# [1,] 1 2
# [2,] 1 2
# [3,] 1 2
Our matrix will have n rows and n columns, and we wish each of the n variables to have variance 1, with correlation rho between pairs of variables.
makecov <- function(rho,n) {
m <- matrix(nrow=n,ncol=n)
m <- ifelse(row(m) == col(m),1,rho)
return(m)
}
makecov(0.3,4)
# [,1] [,2] [,3] [,4]
# [1,] 1.0 0.3 0.3 0.3
# [2,] 0.3 1.0 0.3 0.3
# [3,] 0.3 0.3 1.0 0.3
# [4,] 0.3 0.3 0.3 1.0
Applying Functions to Matrix Rows and Columns
x
# [,1] [,2]
# [1,] 1 4
# [2,] 7 8
# [3,] 3 5
apply(x, 1, mean)
# [1] 2.5 7.5 4.0
apply(x, 2, mean)
# [1] 3.666667 5.666667
- 使用自定义函数
x
# [,1] [,2]
# [1,] 1 4
# [2,] 7 8
# [3,] 3 5
func <- function(x) x / c(2,1)
apply(x, 1, func)
# [,1] [,2] [,3]
# [1,] 0.5 3.5 1.5
# [2,] 4.0 8.0 5.0
t(apply(x, 1, func))
# [,1] [,2]
# [1,] 0.5 4
# [2,] 3.5 8
# [3,] 1.5 5
apply(x, 2, func)
# [,1] [,2]
# [1,] 0.5 2.0
# [2,] 7.0 8.0
# [3,] 1.5 2.5
# Warning messages:
# 1: In x/c(2, 1) : 长的对象长度不是短的对象长度的整倍数
# 2: In x/c(2, 1) : 长的对象长度不是短的对象长度的整倍数
- function中需多个参数时的apply
copymaj <- function(rw,d) {
maj <- sum(rw[1:d]) / d
return(if(maj > 0.5) 1 else 0)
}
x <- matrix(c(1,0,1,1,0,
1,1,1,1,0,
1,0,0,1,1,
0,1,1,1,0),nrow=4,byrow = TRUE)
apply(x,1,copymaj,2)
# [1] 0 1 0 0
apply(x,1,copymaj,3)
# [1] 1 1 0 1
apply(x,1,copymaj,4)
# [1] 1 1 0 1
- 拓展案例三
—— Finding Outliers
Say we have retail sales data in a matrix rs.
Each row of data is for a different store, and observations within a row are daily sales figures.
# 嵌套函数的玩法
findols <- function(x) {
findol <- function(xrow) {
mdn <- median(xrow)
devs <- abs(xrow-mdn)
# 寻找偏离中位数最大值的索引
return(which.max(devs))
}
return(apply(x,1,findol))
}
Adding and Deleting Matrix Rows and Columns
- Changing the Size of a Matrix
cbind
和rbind
one <- c(1,1,1,1)
one
# [1] 1 1 1 1
z <- matrix(c(1,2,3,4,
1,1,0,0,
1,0,1,0),nrow = 4)
z
# [,1] [,2] [,3]
# [1,] 1 1 1
# [2,] 2 1 0
# [3,] 3 0 1
# [4,] 4 0 0
z <- cbind(one,z);z
# one
# [1,] 1 1 1 1
# [2,] 1 2 1 0
# [3,] 1 3 0 1
# [4,] 1 4 0 0
cbind(1,z)
# one
# [1,] 1 1 1 1 1
# [2,] 1 1 2 1 0
# [3,] 1 1 3 0 1
# [4,] 1 1 4 0 0
- 可以用
rbind
或者cbind
快速创建矩阵
cbind(c(1,2),c(3,4))
# [,1] [,2]
# [1,] 1 3
# [2,] 2 4
rbind(c(1,2),c(3,4))
# [,1] [,2]
# [1,] 1 2
# [2,] 3 4
- 创建和bind形成新矩阵的时间消耗很多,尽可能在一开始分配一个足够大的空矩阵
- 拓展案例四
—— Finding the Closest Pair of Vertices in a Graph
图论中的基本案例,输入一个城市距离矩阵q,由城市i和城市j共同决定,需要寻找矩阵中的最小距离以及对应的两个城市
q <- matrix(c(0 ,12,13, 8,20,
12, 0,15,28,88,
13,15, 0, 6, 9,
8,28, 6, 0,33,
20,88, 9,33, 0),nrow=5,byrow=TRUE)
思路:
apply
找到每一行最小值后再比较全部最小值
由于矩阵是对称的(行数n也等于列数),寻找第i行最小值只需要比较i+1,i+2 ... n这些元素,最后一行无需比较
q <- matrix(c(0,12,13,8,20,
12,0,15,28,88,
13,15,0,6,9,
8,28,6,0,33,
20,88,9,33,0),nrow=5,byrow=TRUE); q
imin <- function(x) {
lx <- length(x)
i <- x[lx]
j <- which.min(x[(i+1):(lx-1)])
k <- i + j
return(c(k,x[k]))
}
mind <- function(d) {
n <- nrow(d)
# 将行号拼到矩阵中,便于imin每一行的判断从行号+1开始
dd <- cbind(d,1:n)
# 由于对称原理可以去掉最后一行不用参与最小值计算
wmins <- apply(dd[-n,],1,imin)
# print(wmins)
# [,1] [,2] [,3] [,4]
# [1,] 4 3 4 5
# [2,] 8 15 6 33
# apply是纵向存储,第一行是列号,返回矩阵的列号是原行号
# 第二行是原矩阵行最小值
i <- which.min(wmins[2,])
j <- wmins[1,i]
return(c(d[i,j],i,j))
}
mind(q)
# [1] 6 3 4
如果矩阵中数据不重复可以用以下代码:
minda <- function(d) {
smallest <- min(d)
ij <- which(d == smallest,arr.ind = TRUE)
return(c(smallest,ij))
}
# 执行速度比上一个代码要慢很多
arr.ind = TRUE
允许返回矩阵的行列索引,否则当作向量处理,细节如下
x <- matrix(c(5,2,7,4,1,6,3,8,9),nrow=3)
x
# [,1] [,2] [,3]
# [1,] 5 4 3
# [2,] 2 1 8
# [3,] 7 6 9
min(x)
# [1] 1
ij <- which(x == min(x));ij
# [1] 5
ij <- which(x == min(x),arr.ind = TRUE);ij
# row col
# [1,] 2 2
c(min(x),ij)
# [1] 1 2 2
More on the Vector/Matrix Distinction
dim()
nrow()
nrow
# function (x)
# dim(x)[1L]
# <bytecode: 0x0000025e707d6600>
# <environment: namespace:base>
ncol()
Avoiding Unintended Dimension Reduction
x
# [,1] [,2] [,3] [,4] [,5]
# [1,] 1 0 1 1 0
# [2,] 1 1 1 1 0
# [3,] 1 0 0 1 1
# [4,] 0 1 1 1 0
r <- x[2,];r
# [1] 1 1 1 1 0
dim(x)
# [1] 4 5
dim(r)
# NULL
str(x)
# num [1:4, 1:5] 1 1 1 0 0 1 0 1 1 1 ...
str(r)
# num [1:5] 1 1 1 1 0
# r is a vector, not a matrix.
This is a vector of length 2, rather than a 1-by-2 matrix.
策略:
- 用
drop=FALSE
避免意外降维 - 用
as.matrix()
将vector转换回矩阵
Naming Matrix Rows and Columns
z
# [,1] [,2] [,3] [,4]
# [1,] 1 1 1 1
# [2,] 1 2 1 0
# [3,] 1 3 0 1
# [4,] 1 4 0 0
colnames(z)
# NULL
colnames(z) <- c("a","b","c","d")
z
# a b c d
# [1,] 1 1 1 1
# [2,] 1 2 1 0
# [3,] 1 3 0 1
# [4,] 1 4 0 0
z[2,"c"]
# c
# 1
These names can then be used to reference specific columns. The function
rownames()
works similarly.
Higher-Dimensional Arrays
firsttest
# [,1] [,2]
# [1,] 46 30
# [2,] 21 25
# [3,] 50 50
secondtest
# [,1] [,2]
# [1,] 46 43
# [2,] 41 35
# [3,] 50 50
tests <- array(data=c(firsttest,secondtest),dim=c(3,2,2))
# dim中第三个数字指layer