The Art for R Programming

[R语言] 《R语言编程艺术》 第3章 矩阵和数组

2020-04-24  本文已影响0人  半为花间酒

Creating Matrices

The internal storage of a matrix is in column-major order
You can set the byrow argument in matrix() to true to indicate that the data is coming in row-major order

# 创建
y <- matrix(c(1,2,3,4),nrow=2) 
y
#      [,1] [,2]
# [1,]    1    3
# [2,]    2    4

# 也可以先声明再创建
y <- matrix(nrow=2,ncol=2) 
y[1,1] <- 1 
y[2,1] <- 2 
y[1,2] <- 3 
y[2,2] <- 4

# 取数
y[,2] 
# [1] 3 4

# 赋值
y[2,1] <- 7

# 修改储存顺序
y <- matrix(c(1,2,3,4),nrow=2,byrow=TRUE) 
y
#      [,1] [,2]
# [1,]    1    2
# [2,]    3    4

General Matrix Operations

- Performing Linear Algebra Operationson Matrices

y
#      [,1] [,2]
# [1,]    1    3
# [2,]    2    4

y * y
#      [,1] [,2]
# [1,]    1    9
# [2,]    4   16

# 用%会修正为线性代数的矩阵运算
y %*% y
#      [,1] [,2]
# [1,]    7   15
# [2,]   10   22
y 
#    [,1] [,2] 
# [1,] 11  12 
# [2,] 21  22 
# [3,] 31  32

y[c(1,3),] <- matrix(c(1,1,8,12),nrow=2) 

- 拓展案例一

—— Image Manipulation

课本上用的是拉什莫尔山,这里换了个图

附上一个在线转换图片格式的网站:在线图片转换器

library(pixmap)

# pgm格式rio包无法识别
art <- read.pnm('Art.pgm')
art
# # Pixmap image
#  Type          : pixmapGrey 
#  Size          : 225x225 
#  Resolution    : 1x1 
#  Bounding box  : 0 0 225 225 
plot(art)

str(art)
# Formal class 'pixmapGrey' [package "pixmap"] with 6 slots
# ..@ grey    : num [1:225, 1:225] 1 1 1 1 1 1 1 1 1 1 ...
# ..@ channels: chr "grey"
# ..@ size    : int [1:2] 225 225
# ..@ cellres : num [1:2] 1 1
# ..@ bbox    : num [1:4] 0 0 225 225
# ..@ bbcent  : logi FALSE
  • The class here is of the S4 type, whose components are designated by@
  • The intensities are stored as numbers ranging from 0.0 (black) to 1.0 (white)
  • The random noise itself is a sample from U(0,1), the uniform distribution on the interval (0,1)
locator()
# $x
# [1]  53.50925 181.81603
# 
# $y
# [1] 150.38467  85.09246

# 注意,用locator获取的坐标x和y是颠倒的
art2 <- art
art2@grey[85:150,53:181] <- 1
plot(art2)
# q值调节噪声的权重
blurpart <- function(img,rows,cols,q) { 
  lrows <- length(rows) 
  lcols <- length(cols) 
  newimg <- img 
  randomnoise <- matrix(nrow=lrows, ncol=lcols, runif(lrows * lcols)) 
  newimg@grey[rows,cols] <- (1-q) * img@grey[rows,cols] + q * randomnoise 
  return(newimg) 
  }

library(rafalib)
mypar(1,3)

art1 <- blurpart(art,85:150,53:181,0.3)
plot(art1)
art2 <- blurpart(art,85:150,53:181,0.6)
plot(art2)
art3 <- blurpart(art,85:150,53:181,0.9)
plot(art3)

Filtering on Matrices

x <- matrix(c(1,2,2,3,3,4),nrow=3,byrow = TRUE)
z <- c(5,12,13)
x[z %% 2 == 1,]
#      [,1] [,2]
# [1,]    1    2
# [2,]    3    4
x[x[,1] > 2 & x[,2] > 3,]
# [1] 3 4
# 矩阵变向量

# 提取子集时加上 drop 参数可以限制降维
x[x[,1] > 2 & x[,2] > 3,,drop = FALSE]
#      [,1] [,2]
# [1,]    3    4
x
#      [,1] [,2]
# [1,]    1    2
# [2,]    2    3
# [3,]    3    4
which(x > 2)
# [1] 3 5 6

- 拓展案例二

—— Generating a Covariance Matrix

# 举例row和col函数的用法
x <- matrix(c(1,4,7,8,3,5),nrow=3,byrow = TRUE)
x
#      [,1] [,2]
# [1,]    1    4
# [2,]    7    8
# [3,]    3    5

row(x)
#      [,1] [,2]
# [1,]    1    1
# [2,]    2    2
# [3,]    3    3

col(x)
#      [,1] [,2]
# [1,]    1    2
# [2,]    1    2
# [3,]    1    2

Our matrix will have n rows and n columns, and we wish each of the n variables to have variance 1, with correlation rho between pairs of variables.

makecov <- function(rho,n) { 
  m <- matrix(nrow=n,ncol=n) 
  m <- ifelse(row(m) == col(m),1,rho) 
  return(m) 
}

makecov(0.3,4)
#      [,1] [,2] [,3] [,4]
# [1,]  1.0  0.3  0.3  0.3
# [2,]  0.3  1.0  0.3  0.3
# [3,]  0.3  0.3  1.0  0.3
# [4,]  0.3  0.3  0.3  1.0

Applying Functions to Matrix Rows and Columns

x
#      [,1] [,2]
# [1,]    1    4
# [2,]    7    8
# [3,]    3    5

apply(x, 1, mean)
# [1] 2.5 7.5 4.0

apply(x, 2, mean)
# [1] 3.666667 5.666667
x
#      [,1] [,2]
# [1,]    1    4
# [2,]    7    8
# [3,]    3    5

func <- function(x) x / c(2,1)

apply(x, 1, func)
#      [,1] [,2] [,3]
# [1,]  0.5  3.5  1.5
# [2,]  4.0  8.0  5.0

t(apply(x, 1, func))
#      [,1] [,2]
# [1,]  0.5    4
# [2,]  3.5    8
# [3,]  1.5    5

apply(x, 2, func)
#      [,1] [,2]
# [1,]  0.5  2.0
# [2,]  7.0  8.0
# [3,]  1.5  2.5
# Warning messages:
# 1: In x/c(2, 1) : 长的对象长度不是短的对象长度的整倍数
# 2: In x/c(2, 1) : 长的对象长度不是短的对象长度的整倍数
copymaj <- function(rw,d) { 
  maj <- sum(rw[1:d]) / d 
  return(if(maj > 0.5) 1 else 0) 
  }

x <- matrix(c(1,0,1,1,0,
              1,1,1,1,0,
              1,0,0,1,1,
              0,1,1,1,0),nrow=4,byrow = TRUE)

apply(x,1,copymaj,2)
# [1] 0 1 0 0
apply(x,1,copymaj,3)
# [1] 1 1 0 1
apply(x,1,copymaj,4)
# [1] 1 1 0 1

- 拓展案例三

—— Finding Outliers
Say we have retail sales data in a matrix rs.
Each row of data is for a different store, and observations within a row are daily sales figures.

# 嵌套函数的玩法
findols <- function(x) {
  findol <- function(xrow) { 
    mdn <- median(xrow) 
    devs <- abs(xrow-mdn) 
    # 寻找偏离中位数最大值的索引
    return(which.max(devs)) 
    }  
  return(apply(x,1,findol)) 
  }

Adding and Deleting Matrix Rows and Columns

- Changing the Size of a Matrix

cbindrbind

one <- c(1,1,1,1)
one
# [1] 1 1 1 1

z <- matrix(c(1,2,3,4,
              1,1,0,0,
              1,0,1,0),nrow = 4)
z
#      [,1] [,2] [,3]
# [1,]    1    1    1
# [2,]    2    1    0
# [3,]    3    0    1
# [4,]    4    0    0

z <- cbind(one,z);z
#       one      
# [1,]   1 1 1 1
# [2,]   1 2 1 0
# [3,]   1 3 0 1
# [4,]   1 4 0 0

cbind(1,z) 
#     one      
# [1,] 1   1 1 1 1
# [2,] 1   1 2 1 0
# [3,] 1   1 3 0 1
# [4,] 1   1 4 0 0
cbind(c(1,2),c(3,4))
#      [,1] [,2]
# [1,]    1    3
# [2,]    2    4

rbind(c(1,2),c(3,4))
#      [,1] [,2]
# [1,]    1    2
# [2,]    3    4

- 拓展案例四

—— Finding the Closest Pair of Vertices in a Graph
图论中的基本案例,输入一个城市距离矩阵q,由城市i和城市j共同决定,需要寻找矩阵中的最小距离以及对应的两个城市

q <- matrix(c(0 ,12,13, 8,20,
              12, 0,15,28,88,
              13,15, 0, 6, 9,
               8,28, 6, 0,33,
              20,88, 9,33, 0),nrow=5,byrow=TRUE)

思路:
apply找到每一行最小值后再比较全部最小值

由于矩阵是对称的(行数n也等于列数),寻找第i行最小值只需要比较i+1,i+2 ... n这些元素,最后一行无需比较

q <- matrix(c(0,12,13,8,20,
              12,0,15,28,88,
              13,15,0,6,9,
              8,28,6,0,33,
              20,88,9,33,0),nrow=5,byrow=TRUE); q

imin <- function(x) { 
  lx <- length(x) 
  i <- x[lx]
  j <- which.min(x[(i+1):(lx-1)]) 
  k <- i + j 
  return(c(k,x[k])) 
}

mind <- function(d) {
  n <- nrow(d) 
  # 将行号拼到矩阵中,便于imin每一行的判断从行号+1开始
  dd <- cbind(d,1:n) 
  # 由于对称原理可以去掉最后一行不用参与最小值计算
  wmins <- apply(dd[-n,],1,imin) 
  # print(wmins)
  #      [,1] [,2] [,3] [,4]
  # [1,]    4    3    4    5
  # [2,]    8   15    6   33
  # apply是纵向存储,第一行是列号,返回矩阵的列号是原行号
  # 第二行是原矩阵行最小值
  i <- which.min(wmins[2,]) 
  j <- wmins[1,i] 
  return(c(d[i,j],i,j)) 
  }

mind(q)
# [1] 6 3 4

如果矩阵中数据不重复可以用以下代码:

minda <- function(d) { 
  smallest <- min(d) 
  ij <- which(d == smallest,arr.ind = TRUE) 
  return(c(smallest,ij)) 
}
# 执行速度比上一个代码要慢很多

arr.ind = TRUE允许返回矩阵的行列索引,否则当作向量处理,细节如下

x <- matrix(c(5,2,7,4,1,6,3,8,9),nrow=3)
x
#      [,1] [,2] [,3]
# [1,]    5    4    3
# [2,]    2    1    8
# [3,]    7    6    9

min(x)
# [1] 1

ij <- which(x == min(x));ij
# [1] 5
ij <- which(x == min(x),arr.ind = TRUE);ij
#      row col
# [1,]   2   2

c(min(x),ij)
# [1] 1 2 2

More on the Vector/Matrix Distinction

  1. dim()
  2. nrow()
nrow
# function (x) 
# dim(x)[1L]
# <bytecode: 0x0000025e707d6600>
# <environment: namespace:base>
  1. ncol()

Avoiding Unintended Dimension Reduction

x
#      [,1] [,2] [,3] [,4] [,5]
# [1,]    1    0    1    1    0
# [2,]    1    1    1    1    0
# [3,]    1    0    0    1    1
# [4,]    0    1    1    1    0

r <- x[2,];r
# [1] 1 1 1 1 0

dim(x)
# [1] 4 5

dim(r)
# NULL

str(x)
# num [1:4, 1:5] 1 1 1 0 0 1 0 1 1 1 ...

str(r)
# num [1:5] 1 1 1 1 0

# r is a vector, not a matrix. 

This is a vector of length 2, rather than a 1-by-2 matrix.

策略:

  1. drop=FALSE避免意外降维
  2. as.matrix()将vector转换回矩阵

Naming Matrix Rows and Columns

z
#      [,1] [,2] [,3] [,4]
# [1,]    1    1    1    1
# [2,]    1    2    1    0
# [3,]    1    3    0    1
# [4,]    1    4    0    0  

colnames(z)
# NULL

colnames(z) <- c("a","b","c","d")
z
#      a b c d
# [1,] 1 1 1 1
# [2,] 1 2 1 0
# [3,] 1 3 0 1
# [4,] 1 4 0 0

z[2,"c"]
# c 
# 1 

These names can then be used to reference specific columns. The function rownames() works similarly.

Higher-Dimensional Arrays

firsttest 
#     [,1] [,2] 
# [1,]  46  30 
# [2,]  21  25 
# [3,]  50  50

secondtest 
#    [,1] [,2] 
# [1,]  46  43 
# [2,]  41  35 
# [3,]  50  50

tests <- array(data=c(firsttest,secondtest),dim=c(3,2,2))
# dim中第三个数字指layer
上一篇下一篇

猜你喜欢

热点阅读