莫烦pytorch学习笔记
2019-11-23 本文已影响0人
诗人藏夜里
网址:https://morvanzhou.github.io/tutorials/machine-learning/torch/
强烈推荐,将问题讲解的极其简单易懂
import torch
import torch.nn.functional as F #激活函数都在这
from torch.autograd import Variable #相当于容器,用pytorch都要将tensor放进这个容器中
x = torch.unsqueeze(torch.linspace(-1, 1, 100), dim=1) # x data (tensor),
#shape=(100, 1) torch 不能处理一维数据,因此要用torch.unsqueeze
x = Variable(x) #torch运算都要用Variable
'''
基本网络结构(实现回归问题)
'''
class Net(torch.nn.Module): #继承主模块torch.nn.Module
def __init__(self, n_features, n_hidden, n_output): #搭建层所需要的信息, 每层信息,参数加下面层的层数信息
super(Net, self).__init__() #继承torch.nn.Module,官方操作
self.hidden = torch.nn.Linear(n_features, n_hidden) #self.hidden点后面为层名字,后面为输入输出维度
self.prediction = torch.nn.Linear(n_hidden, n_output)
def forward(self, x): #前向传播过程 x参数为输入
x = F.relu(self.hidden(x)) #定义输入前向传播过程 F.relu由F定义的relu是个功能,不会在打印层中显示
x = self.prediction(x)
return x #得到最终返回值
net = Net(1, 10, 1) #输入参数为Net类所规定的参数(__init__()中定义的输入参数)
print(net) #打印网络层
#优化器
optmizer = torch.optm.SGD(net.parameters(), lr=0.5) #定义优化器,优化的是net的parameters
loss_func = torch.nn.MseLoss() #定义怎么计算误差
for t in range(100): #迭代
prediction = net(x)
loss = loss_func(prediction, y) #要预测值在前,label在后
'''
优化步骤
'''
optimizer.zero_grad() #net.parameters()所有参数梯度变为0
loss.backward() #所有参数计算梯度
optimizer.step() #optimizr优化parameters
net结构
用前馈网络实现分类问题
#分类
# 假数据
n_data = torch.ones(100, 2) # 数据的基本形态
x0 = torch.normal(2*n_data, 1) # 类型0 x data (tensor), shape=(100, 2)
y0 = torch.zeros(100) # 类型0 y data (tensor), shape=(100, )
x1 = torch.normal(-2*n_data, 1) # 类型1 x data (tensor), shape=(100, 1)
y1 = torch.ones(100) # 类型1 y data (tensor), shape=(100, )
# 注意 x, y 数据的数据形式是一定要像下面一样 (torch.cat 是在合并数据)
x = torch.cat((x0, x1), 0).type(torch.FloatTensor) # FloatTensor = 32-bit floating
y = torch.cat((y0, y1), ).type(torch.LongTensor) # LongTensor = 64-bit integer
#注意分类问题中一定要将标签值类型设为longtensor
#x为floattensor
net = Net(2, 10, 2) #假如是2分类问题,输出为对应类别维度(相当于one-hot)
print(net)
loss_func = torch.nn.CrossEntropyLoss() #定义怎么计算误差,CrossEntropyLoss在计算softmax,即每一类概率
'''
比如标签值[0,0,1], loss值【0.1,0.1,0.8】
'''
网络顺序搭建方法
#第二种搭建网络方法,这种方法没有层的自定义名字
net2 = torch.nn.Sequential(
torch.nn.Linear(2, 10),
torch.nn.ReLU(), # torch.nn.ReLU()这种定义方法定义的是一个类,因此在打印层的时候会出现
torch.nn.Linear(10,2))
print(net2)
实现网络的保存和提取
#保存
torch.save(net, 'net.pkl') #完整保存.第一个参数是要保存的网络,第二个是名字
torch.save(net.state_dict(), 'net_parameters.pkl') #只保留整个parameters
#提取网络
net = torch.load('net.pkl')
#若保存的为参数,则提取过程为先定义网络结构
net2 = torch.nn.Sequential(
torch.nn.Linear(2, 10),
torch.nn.ReLU(), # torch.nn.ReLU()这种定义方法定义的是一个类,因此在打印层的时候会出现
torch.nn.Linear(10,2))
net2.load_state_dict(torch.load('net_parameters.pkl'))
批训练
#批训练
import torch.utils.data as Data
torch.manual_seed(1) #种子
BATCH_SIZE = 5
x = torch.linspace(1,10,10)
y = torch.linspace(10,1,10)
'''
形成tersor数据库
x 放特征数据
y 放标签数据
'''
torch_dataset = Data.TensorDataset(x, y) #形成tersor数据库
'''
loader 使训练变成一批一批
'''
loader = Data.DataLoader(
dataset=torch_dataset, #传进去数据库
batch_size=BATCH_SIZE, #多少批
shuffle=True, #是否打乱
num_workers=2) #多线程提取数据
for epoch in range(3): #每一轮
for step, (batch_x, batch_y) in enumerate(loader): #每一批一批
b_x = Variable(batch_x) #转为Variable
b_y = Variable(batch_y)
#training.....
pass
CNN结构
import torch.nn as nn
class CNN(nn.Module):
def __init__(self):
super(CNN, self).__init__()
self.conv1 = nn.Sequential( # input shape (1, 28, 28)
nn.Conv2d(
in_channels=1, # input height
out_channels=16, # n_filters
kernel_size=5, # filter size
stride=1, # filter movement/step
padding=2, # 如果想要 con2d 出来的图片长宽没有变化, padding=(kernel_size-1)/2 当 stride=1
), # output shape (16, 28, 28)
nn.ReLU(), # activation
nn.MaxPool2d(kernel_size=2), # 在 2x2 空间里向下采样, output shape (16, 14, 14)
)
self.conv2 = nn.Sequential( # input shape (16, 14, 14)
nn.Conv2d(16, 32, 5, 1, 2), # output shape (32, 14, 14)
nn.ReLU(), # activation
nn.MaxPool2d(2), # output shape (32, 7, 7)
)
self.out = nn.Linear(32 * 7 * 7, 10) # fully connected layer, output 10 classes
def forward(self, x):
x = self.conv1(x)
x = self.conv2(x) #维度(batch,32,7,7)
# 下一步要自己进行展平,之后才能传入到Linear层
x = x.view(x.size(0), -1) # 展平多维的卷积图成 (batch_size, 32 * 7 * 7)
output = self.out(x)
return output
net = CNN()
print(net)
RNN结构
`#RNN
class RNN(nn.Module):
def __init__(self):
super(RNN, self).__init__()
self.rnn = nn.RNN( # 这回一个普通的 RNN 就能胜任
input_size=1,
hidden_size=32, # rnn hidden unit
num_layers=1, # 有几层 RNN layers
batch_first=True, # input & output 会是以 batch size 为第一维度的特征集 e.g. (batch, time_step, input_size)
)
self.out = nn.Linear(32, 1)
def forward(self, x, h_state): # 因为 hidden state 是连续的, 所以我们要一直传递这一个 state
# x (batch, time_step, input_size)
# h_state (n_layers, batch, hidden_size)
# r_out (batch, time_step, output_size)
r_out, h_state = self.rnn(x, h_state) # h_state 也要作为 RNN 的一个输入
outs = [] # 保存所有时间点的预测值
for time_step in range(r_out.size(1)): # 对每一个时间点计算 output
outs.append(self.out(r_out[:, time_step, :]))
return torch.stack(outs, dim=1), h_state #因为outs为list因此要将它包为tensor h_state用于下一回输入
rnn = RNN()
print(rnn
h_state = None #在没有训练之前,先置0
for step in range(60):
x = Variable()
y = Variable
prediction, h_state = rnn(x, h_state)
h_state = Variable(h_state.data) # 关键,h_state要再次包进Variable里
"""
RNN (
(rnn): RNN(1, 32, batch_first=True)
(out): Linear (32 -> 1)
)
"""
#另一种定义方式
def forward(self, x, h_state):
r_out, h_state = self.rnn(x, h_state)
r_out = r_out.view(-1, 32)
outs = self.out(r_out)
return outs.view(-1, 32, TIME_STEP), h_state
LSTM结构
'''
rnn用于分类
'''
class RNN(nn.Module):
def __init__(self):
super(RNN, self).__init__()
self.rnn = nn.LSTM( # LSTM 效果要比 nn.RNN() 好多了
input_size=28, # 图片每行的数据像素点 原始输入(28,28)每个时间点扫描一行,因此input_size=28
hidden_size=64, # rnn hidden unit 总共用多少个隐层,与常规相似
num_layers=1, # 有几层 RNN layers
batch_first=True, # input & output 会是以 batch size 为第一维度的特征集 e.g. (batch, time_step, input_size)
)
self.out = nn.Linear(64, 10) # 输出层 输入为lstm的最后隐层,输出为类别
def forward(self, x):
# x shape (batch, time_step, input_size)
# r_out shape (batch, time_step, output_size)
# h_n shape (n_layers, batch, hidden_size) LSTM 有两个 hidden states, h_n 是分线, h_c 是主线
# h_c shape (n_layers, batch, hidden_size)
r_out, (h_n, h_c) = self.rnn(x, None) # None 表示 hidden state 会用全0的 state
# 选取最后一个时间点的 r_out 输出
# 这里 r_out[:, -1, :] 的值也是 h_n 的值
out = self.out(r_out[:, -1, :]) #(batch,time step,input)
return out
rnn = RNN()
print(rnn)
"""
RNN (
(rnn): LSTM(28, 64, batch_first=True)
(out): Linear (64 -> 10)
)
"""
'''
rnn用于回归
'''
class RNN(nn.Module):
def __init__(self):
super(RNN, self).__init__()
self.rnn = nn.LSTM( # LSTM 效果要比 nn.RNN() 好多了
input_size=1,
hidden_size=64, # rnn hidden unit 总共用多少个隐层,与常规相似
num_layers=1, # 有几层 RNN layers
batch_first=True, # input & output 会是以 batch size 为第一维度的特征集 e.g. (batch, time_step, input_size)
)
self.out = nn.Linear(64, 1) # 输出层 输入为lstm的最后隐层,输出为类别
def forward(self, x):
# x shape (batch, time_step, input_size)
# r_out shape (batch, time_step, output_size)
# h_n shape (n_layers, batch, hidden_size) LSTM 有两个 hidden states, h_n 是分线, h_c 是主线
# h_c shape (n_layers, batch, hidden_size)
r_out, (h_n, h_c) = self.rnn(x, None) # None 表示 hidden state 会用全0的 state
# 选取最后一个时间点的 r_out 输出
# 这里 r_out[:, -1, :] 的值也是 h_n 的值
out = self.out(r_out[:, -1, :]) #(batch,time step,input)
return out
rnn = RNN()
print(rnn)
无监督学习decoder encoder
#无监督学习 encoder decoder 类似bottleneck y标签值为x
#压缩和解压, 压缩后得到压缩的特征值, 再从压缩的特征值解压成原图片.
class AutoEncoder(nn.Module):
def __init__(self):
super(AutoEncoder, self).__init__()
# 压缩
self.encoder = nn.Sequential(
nn.Linear(28*28, 128),
nn.Tanh(),
nn.Linear(128, 64),
nn.Tanh(),
nn.Linear(64, 12),
nn.Tanh(),
nn.Linear(12, 3), # 压缩成3个特征, 进行 3D 图像可视化
)
# 解压
self.decoder = nn.Sequential(
nn.Linear(3, 12),
nn.Tanh(),
nn.Linear(12, 64),
nn.Tanh(),
nn.Linear(64, 128),
nn.Tanh(),
nn.Linear(128, 28*28),
nn.Sigmoid(), # 激励函数让输出值在 (0, 1)
)
def forward(self, x):
encoded = self.encoder(x)
decoded = self.decoder(encoded)
return encoded, decoded #主要用encoded 即压缩完的样子
autoencoder = AutoEncoder()
optimizer = torch.optim.Adam(autoencoder.parameters(), lr=LR)
loss_func = nn.MSELoss()
for epoch in range(EPOCH):
for step, (x, b_label) in enumerate(train_loader):
b_x = x.view(-1, 28*28) # batch x, shape (batch, 28*28)
b_y = x.view(-1, 28*28) # batch y, shape (batch, 28*28) 标签y为x原始值
encoded, decoded = autoencoder(b_x)
loss = loss_func(decoded, b_y) # mean square error
optimizer.zero_grad() # clear gradients for this training step
loss.backward() # backpropagation, compute gradients
optimizer.step() # apply gradients
使用gpu进行加速
#用GPU加速
#对数据进行cuda移动
x_b = Variable(x).cuda()
#转为cpu格式
pred = x_b.cpu()
#将模块移动
cnn = CNN()
cnn.cuda()