Pytorch学习笔记(一)
1 Tensor的创建
# 生成一个值全为6的tensor
torch.full([2,3], 6)
torch.full([], 6) # 生成一个标量
torch.full([1], 6) # 生成一个标量
# arange 生成一个等差数列
torch.arange(0,10)
torch.arange(0,10,2)
# linspace/logspace 等分的切
torch.linspace(0, 10, steps=4) # 这里是等分的切开,而arange是等差的数列
torch.logspace(0, 10, steps=4)
# ones/zeros/eyes
torch.ones(2,3)
torch.zeros(2,3)
torch.eyes(3)
torch.ones_like(a)
# randperm 生成一个指定范围的序列,且顺序被打乱 (random.shuffle)
print(randperm(10)) # tensor([7, 1, 8, 2, 9, 0, 5, 4, 3, 6]
2 Tensor的索引和切片
2.1 Indexing 索引
a = torch.rand(4, 3, 28, 28)
print(a[0].shape) # torch.Size([3, 28, 28])
print(a[0, 0].shape) # torch.Size([28, 28])
print(a[0, 0, 0].shape) # torch.Size([28])
print(a[0, 0, 0, 1].shape) # torch.Size([]), 标量
2.2 select first/last N
a = torch.rand(4, 3, 28, 28)
print(a[:2].shape) # torch.Size([2, 3, 28, 28]) 含义,选择第一、二两张图片
print(a[:2, :1].shape) # torch.Size([2, 1, 28, 28]) 含义,取前两张图片第一个通道上的数据
print(a[:2, 1:,:,:].shape) # torch.Size([2, 2, 28, 28]) 含义,取前两张图片除第一个通道上的数据
print(a[:2, -1:,:,:].shape) # torch.Size([2, 1, 28, 28]) 含义,取前两张图片最后通道上的数据
2.3 select by steps
a = torch.rand(4, 3, 28, 28)
print(a[:,:,0:28:2,0:28:2].shape) # torch.Size([4, 3, 14, 14]), 含义,隔行采样
print(a[:,:,::2,::2].shape) # torch.Size([4, 3, 14, 14]), 含义,隔行采样,同上
pytorch中冒号在切块的含义
1、冒号单独出现表示取全部;
2、冒号在数字前面如:N,表示取0~N-1;
3、冒号在数字后面如N:,表示取N~len(sequence);
4、start:end, 表示取从start~end-1;
5、start:end:step, 表示从start~end-1每隔step取一次, 可以省略写为:::step。
2.4 select by specific index
a = torch.rand(4, 3, 28, 28)
print(a.index_select(0, torch.tensor([0, 2])).shape)
# torch.Size([2, 3, 28, 28]) 选择图片中的第0/2两张图片;
print(a.index_select(2, torch.arange(24)).shape) # torch.Size([4, 3, 24, 28]) 选择第三维的24行
print(a[...].shape) # torch.Size([4, 3, 28, 28])
print(a[0, ...].shape) # torch.Size([3, 28, 28])
print(a[:,1, ...].shape) # torch.Size([4, 28, 28])
print(a[0,..., ::2].shape) # torch.Size([3, 28, 14])
print(a[..., :2].shape) # torch.Size([4, 3, 28, 2])
“...”表示任意多的维度,会根据tensor自动推断出
2.5 select by mask 【masked_select】
a = torch.randn(3, 4)
print(a)
tensor([[ 0.1641, 1.2368, 0.7215, -0.5228],
[ 0.0288, 0.6919, -1.6339, -0.1283],
[-0.0908, -0.1472, -0.2184, -0.6402]])
mask = a.ge(0.5)
print(mask)
tensor([[0, 1, 1, 0],
[0, 1, 0, 0],
[0, 0, 0, 0]], dtype=torch.uint8)
print(torch.masked_select(a, mask))
tensor([1.2368, 0.7215, 0.6919])
2.6 select by flatten index 【将tensor打平之后,再按照index进行选择】
a = torch.tensor([[4, 3, 5], [6, 7, 8]])
print(torch.take(a, torch.tensor([0, 2, 5]))) # tensor([4, 5, 8])
3 Tensor维度变换
3.1 view/reshape 【view和reshape是等价的】
a = torch.rand(4, 1, 28, 28)
print(a.shape) # torch.Size([4, 1, 28, 28])
print(a.view(4, 28*28).shape) # torch.Size([4, 784])
print(a.view(4*1, 28, 28).shape) # torch.Size([4, 28, 28])
print(a.view(4*1*28, 28).shape) # torch.Size([112, 28])
view改变了Tensor的维度。
3.2 squeeze/unsqueeze 【在指定位置处缩减维度或增加维度】
正负索引的对应关系:
[0, 1, 2, 3, 4] => [-5, -4, -3, -2, -1]
# 维度增加
a = torch.rand(4, 1, 28, 28)
print(a.unsqueeze(0).shape) # torch.Size([1, 4, 1, 28, 28]) 增加一个维度
print(a.unsqueeze(-1).shape) # torch.Size([4, 1, 28, 28, 1]) 增加一个维度
print(a.unsqueeze(4).shape) # torch.Size([4, 1, 28, 28, 1]) 增加一个维度
print(a.unsqueeze(-4).shape) # torch.Size([4, 1, 1, 28, 28]) 增加一个维度
print(a.unsqueeze(-5).shape) # torch.Size([1, 4, 1, 28, 28]) 增加一个维度
b = torch.tensor([1.2, 2.3])
print(b) # tensor([1.2000, 2.3000])
print(b.unsqueeze(-1)) # tensor([[1.2000],[2.3000]]) 在内层嵌入了一个维度
print(b.unsqueeze(0)) # tensor([[1.2000, 2.3000]]) 在外层嵌入了一个维度
c = torch.rand(32)
d = c.unsqueeze(1).unsqueeze(2).unsqueeze(0)
print(c.shape) # torch.Size([32])
print(c.unsqueeze(1).shape) # torch.Size([32, 1])
print(c.unsqueeze(1).unsqueeze(2).shape) # torch.Size([32, 1, 1])
print(c.unsqueeze(1).unsqueeze(2).unsqueeze(0).shape) # torch.Size([1, 32, 1, 1])
print(d.shape) # torch.Size([1, 32, 1, 1])
# 维度删减。未指定删减的维度时,dim维度为1的会被挤压掉且只有大小为1的维度才能被挤压掉
print(torch.rand(4, 1, 28, 28).squeeze().shape) # 维度为1的都被挤压掉,torch.Size([4, 28, 28])
print(torch.rand(1, 32, 1, 1).squeeze().shape) # 维度为1的都被挤压掉,torch.Size([32])
print(torch.rand(1, 32, 1, 1).squeeze(0).shape) # 只挤压掉了第一个维度, torch.Size([32, 1, 1])
print(torch.rand(1, 32, 1, 1).squeeze(-1).shape) # 只挤压掉了最后一个维度, torch.Size([1, 32, 1])
print(torch.rand(4, 1, 28, 28).squeeze(0).shape) # 挤压大小非1的维度,不起作用 torch.Size([4, 1, 28, 28])
3.3 expand(broadcasting)/repeat(memory copied) 【维度扩展】
# 维度扩展, 维度为1时才可以扩展, -1表示不扩展对应的维度
a = torch.rand(4, 32, 14, 14)
b = torch.rand(1, 32, 1, 1)
print(b.expand(4, 32, 14, 14).shape)
print(torch.rand(2, 32, 1, 1).expand(-1, 32, 14, 14).shape) # -1表示不扩展对应的维度, torch.Size([2, 32, 14, 14])
# print(torch.rand(2, 32, 1, 1).expand(4, 32, 14, 14).shape)
# RuntimeError: The expanded size of the tensor (4) must match the existing size (2)
# repeat维度扩展。repeat的参数表示每个维度要复制的次数,而expand的参数指定的就是对应的维度(不推荐)
b = torch.rand(1, 32, 1, 1)
print(b.repeat(4, 32, 1, 1).shape) # torch.Size([4, 1024, 1, 1])
print(b.repeat(4, 1, 1, 1).shape) # torch.Size([4, 32, 1, 1])
print(b.repeat(4, 1, 14, 14).shape) # torch.Size([4, 32, 14, 14])
- expand不会主动的复制数据,推荐使用。repeat会先拷贝数据;
- repeat的参数表示每个维度要复制的次数,而expand的参数指定的就是对应的维度(不推荐repeat);
- expand维度为1时才可以扩展, -1表示不扩展对应的维度;
3.4 transpose/t/permute 【矩阵的转置】
a = torch.rand(4, 3, 32, 32)
a1 = a.transpose(1, 3).contiguous().view(4, 3*32*32).view(4, 3, 32, 32) # 正确但是数据已经错了
a2 = a.transpose(1, 3).contiguous().view(4, 3*32*32).view(4, 32, 32, 3).transpose(1, 3) # 正确
print(a1.shape)
print(a2.shape)
print(torch.all(torch.eq(a, a1))) # tensor(0, dtype=torch.uint8)
print(torch.all(torch.eq(a, a2))) # tensor(1, dtype=torch.uint8)
# permute 可以直接指定维度的顺序
a = torch.rand(4, 3, 32, 28)
print(a.permute(0, 2, 3, 1).shape) # torch.Size([4, 32, 28, 3])
- b.t()只适用2D的Tensor;
- transpose()可以指定交换的维度,进行两两交换, view前要使用contiguous;
- view()操作会导致数据维度的丢失;
- permute()可以直接指定的顺序交换维度;
4 auto-broadcasting
# broadcasting = unsqueeze + expand
# insert 1 dim ahead (在高纬度扩展)
# expand dims with size 1 to same size
# feature maps: [4, 32, 14, 14]
# Bias: [32, 1, 1] => [1, 32, 1, 1] => [4, 32, 14, 14]
broadcasting的扩展例子
Is it broadcasting-able? (match from last dim!!!)
If current dim =1 , expand to same;
if either has no dim , insert one dim and expand to same;
otherwise, NOT broadcasting-able;
When it has no dim, treat it as own the same.
[class, student, scores] + [scores]
when it has dim of size 1, treat is shared by all.
[class, student, scores] + [student, 1]
5 Tensor 的拼接与拆分
cat/stack
a = torch.rand(4, 32, 8)
b = torch.rand(5, 32, 8)
c = torch.rand(4, 32, 8)
print(torch.cat([a, b], dim=0).shape) # torch.Size([9, 32, 8])
print(torch.cat([a, c], dim=1).shape) # torch.Size([4, 64, 8])
a = torch.rand(4, 3, 16, 16)
b = torch.rand(4, 3, 16, 16)
print(torch.stack([a, b], dim=2).shape) # torch.Size([4, 3, 2, 16, 16])
cat: 只有被连接的维度的size可以不一样,其他维度的size必须一样,拼接完成之后非拼接维度的size保持不变,拼接的维度是各个Tensor对应size的累加。
stack会在指定维度之前插入一个新的维度,旧维度的size必须要完全一致;
split/chunk
a = torch.rand(3, 32, 8)
a1, a2 = a.split([2, 1], dim=0) # 拆分长度分别为2, 1
print(a1.shape, a2.shape) # torch.Size([2, 32, 8]) torch.Size([1, 32, 8])
a = torch.rand(7, 32, 8)
a1, a2 = a.chunk(2, dim=0) # 分成2块
print(a1.shape, a2.shape) # torch.Size([4, 32, 8]) torch.Size([3, 32, 8])
split 按长度拆分, split by length
chunk 按数量拆分, chunk by number
6 Tensor的基本运算
矩阵的加减乘除
a, b = torch.rand(3, 4), torch.rand(4)
print(torch.all(torch.eq(a+b, torch.add(a, b)))) # 加法和加法运算符,tensor(1, dtype=torch.uint8)
print(torch.all(torch.eq(a-b, torch.sub(a, b)))) # 减法和减法运算符,tensor(1, dtype=torch.uint8)
print(torch.all(torch.eq(a*b, torch.mul(a, b)))) # 乘法和乘法运算符,tensor(1, dtype=torch.uint8)
print(torch.all(torch.eq(a/b, torch.div(a, b)))) # 除法和除法运算符,tensor(1, dtype=torch.uint8)
矩阵的乘法和按位相乘
* 表示按元素相乘,element-wise
.matmul 矩阵相乘, matrix mul
1st: torch.mm only for 2d
2nd: torch.matmul for all-dim, @为matmul的重载运算符【推荐】
a = torch.full([2, 2], 3)
b = torch.full([2, 2], 4)
print(a*b) # 表示按元素相乘, tensor([[12., 12.],[12., 12.]])
print(a.mm(b)) # 矩阵相乘, tensor([[24., 24.], [24., 24.]])
print(a@b) # 矩阵相乘, tensor([[24., 24.], [24., 24.]])
print(a.matmul(b)) # 矩阵相乘, tensor([[24., 24.], [24., 24.]])
# matmul 实际上只取后面的两个维度进行计算,
# 其他维度的size要相同或满足broadcasting的条件
a = torch.rand(4, 3, 28, 64)
b = torch.rand(4, 3, 64, 32)
c = torch.rand(4, 1, 64, 32)
d = torch.rand(4, 2, 64, 32)
print((a@b).shape) # torch.Size([4, 3, 28, 32])
print((a@c).shape) # torch.Size([4, 3, 28, 32]), broadcasting
print((a@d).shape) # torch.Size([4, 3, 28, 32]), can't broadcasting, error
指数运算
# pow或者**进行指数运算
a = torch.full([2, 2], 3)
print(a.pow(2)) # tensor([[9., 9.], [9., 9.]])
print(a**2) # tensor([[9., 9.], [9., 9.]])
# sqrt 求平方根, rsqrt 平方根的倒数
b = a.pow(2)
print(b.sqrt()) # tensor([[3., 3.], [3., 3.]])
print(b.rsqrt()) # tensor([[0.3333, 0.3333], [0.3333, 0.3333]])
# exp log
a = torch.exp(torch.ones(2, 2))
b = torch.ones(2, 2)*100
c = torch.ones(2, 2)*8
print(a) # tensor([[2.7183, 2.7183], [2.7183, 2.7183]])
print(torch.log(a)) # tensor([[1., 1.], [1., 1.]])
print(torch.log10(b)) # tensor([[2., 2.], [2., 2.]])
print(torch.log2(c)) # tensor([[3., 3.], [3., 3.]])
近似计算 【floor/ceil/round/trunc/frac/clamp】
a = torch.tensor(3.14)
print(torch.floor(a)) # 向下取整 tensor(3.)
print(torch.ceil(a)) # 向上取整 tensor(4.)
print(torch.round(a)) # 四舍五入 tensor(3.)
print(torch.trunc(a)) # 取整数部分 tensor(3.)
print(torch.frac(a)) # 取小数部分 tensor(0.1400)
# clamp 裁剪, gradient clipping
grad = torch.rand(2, 3)*15
grad = torch.tensor([[10.8008, 11.9414, 3.9532], [7.5537, 13.9067, 4.6728]])
print(grad) # tensor([[10.8008, 11.9414, 3.9532], [7.5537, 13.9067, 4.6728]])
print(grad.max()) # tensor(13.9067)
print(grad.median()) # tensor(7.5537)
print(grad.clamp(10)) # tensor([[10.8008, 11.9414, 10.0000], [10.0000, 13.9067, 10.0000]], 裁剪最小值
print(grad.clamp(0, 10))# tensor([[10.0000, 10.0000, 3.9532], [ 7.5537, 10.0000, 4.6728]]),裁剪最大值和最小值
7 Tensor的统计属性
norm
# norm是范数而不是normalize
a = torch.full([8], 1)
b = a.view(2, 4)
c = a.view(2, 2, 2)
print(a.norm(1), b.norm(1), c.norm(1)) # tensor(8.) tensor(8.) tensor(8.)
print(a.norm(2), b.norm(2), c.norm(2)) # tensor(2.8284) tensor(2.8284) tensor(2.8284)
print(b.norm(1, dim=1)) # tensor([4., 4.])
print(c.norm(1, dim=1)) # tensor([[2., 2.], [2., 2.]])
mean/sum/min/max/prod
a = torch.arange(8).view(2, 4).float()
print(a) # tensor([[0., 1., 2., 3.], [4., 5., 6., 7.]])
print(a.min(), a.max(), a.mean(), a.prod()) # tensor(0.) tensor(7.) tensor(3.5000) tensor(0.)
print(a.sum(), a.argmax(), a.argmin()) # tensor(28.) tensor(7) tensor(0)
print(a.min(dim=0)) # (tensor([0., 1., 2., 3.]), tensor([0, 0, 0, 0]))
print(a.argmin(dim=0)) # tensor([0, 0, 0, 0])
# dim, keepdim
a = torch.rand(4, 10)
print(a)
print(a.argmin(dim=1)) # tensor([5, 5, 0, 5])
print(a.argmin(dim=1, keepdim=True)) # tensor([[5],[5],[0],[5]])
print(a.argmin(dim=1).unsqueeze(-1)) # tensor([[5],[5],[0],[5]]) 和上式等价
- a.min() 返回一个tuple,第一个为最小的元素,二个为对应的索引,即a.argmin()的返回值。
- 理解a.argmax(dim=1)返回的维度: dim=1时说明在维度1上做聚合操作,聚合后该维度值全为1会被消掉,所以返回的大小是dim=0的大小。因为维度为1的被压缩掉了,若不希望维度被压缩,可将keepdim设置为True。
top-k, k-th
a = torch.rand(4, 10)
print(a.topk(3, dim=1)[0].shape) # torch.Size([4, 3])
print(a.kthvalue(8, dim=1)[0].shape) # torch.Size([4])
compare
# compare
a = torch.rand(4, 10)
print(torch.eq(a, a))
# tensor([[1, 1, 1, 1, 1, 1, 1, 1, 1, 1],
# [1, 1, 1, 1, 1, 1, 1, 1, 1, 1],
# [1, 1, 1, 1, 1, 1, 1, 1, 1, 1],
# [1, 1, 1, 1, 1, 1, 1, 1, 1, 1]], dtype=torch.uint8)
print(torch.equal(a, a)) # True, 返回True或False
torch.eq()和torch.equal()返回值是不同的。torch.eq()返回的是一个tensor,tensor中的各个位相等为1,不相等为0。
torch.equal()返回的是True或者False。
高级Tensor操作:where | gather
# torch.where(condition, x, y) -> Tensor
# Return a tensor of elements selected from either x or y, depending on condition.
# out=x if condition else out=y
# x, y的shape必须要一致
cond = torch.rand(2, 2)
print(cond) # tensor([[0.1623, 0.4277], [0.6705, 0.4220]])
a = torch.zeros(2, 2)
b = torch.ones(2, 2)
print(torch.where(cond > 0.5, a, b)) # tensor([[1., 1.], [0., 1.]])
# gather
# torch.gather(input, dim, index, out=None) -> Tensor
# Gathers values along an axis specified by dim
# Gather的本质就是查表,input要查的表,dim维度上的index索引序列
prob = torch.randn(4, 10)
idx = prob.topk(dim=1, k=3)
idx = idx[1]
print(idx)
# tensor([[8, 0, 4],
# [4, 8, 3],
# [6, 0, 9],
# [8, 7, 9]])
label = torch.arange(10) + 100
print(torch.gather(label.expand(4, 10), dim=1, index=idx.long()))
# tensor([[108, 100, 104],
# [104, 108, 103],
# [106, 100, 109],
# [108, 107, 109]])
其他
- 能够直接求解的方程,如二元一次方程,可以直接求解,这样的问题称为closed form solution。而实际生活中大部分是无法直接求解的,只能通过近似的计算。
- PyTorch是没有string类型的张量的,通过one-hot encoding或者Embedding表示。