莫烦pytorch学习笔记

2019-11-23 本文已影响0人诗人藏夜里

网址：https://morvanzhou.github.io/tutorials/machine-learning/torch/

强烈推荐，将问题讲解的极其简单易懂

import torch
import torch.nn.functional as F #激活函数都在这
from torch.autograd import Variable #相当于容器，用pytorch都要将tensor放进这个容器中

x = torch.unsqueeze(torch.linspace(-1, 1, 100), dim=1)  # x data (tensor), 
#shape=(100, 1) torch 不能处理一维数据，因此要用torch.unsqueeze

x = Variable(x) #torch运算都要用Variable

'''
基本网络结构（实现回归问题）
'''

class Net(torch.nn.Module): #继承主模块torch.nn.Module
    def __init__(self, n_features, n_hidden, n_output):   #搭建层所需要的信息, 每层信息,参数加下面层的层数信息
        super(Net, self).__init__()  #继承torch.nn.Module，官方操作
        self.hidden = torch.nn.Linear(n_features, n_hidden)   #self.hidden点后面为层名字，后面为输入输出维度
        self.prediction = torch.nn.Linear(n_hidden, n_output)
    def forward(self, x):  #前向传播过程 x参数为输入
        x = F.relu(self.hidden(x))      #定义输入前向传播过程  F.relu由F定义的relu是个功能，不会在打印层中显示
        x = self.prediction(x)
        return x     #得到最终返回值

net = Net(1, 10, 1)  #输入参数为Net类所规定的参数（__init__()中定义的输入参数）
print(net)   #打印网络层

#优化器
optmizer = torch.optm.SGD(net.parameters(), lr=0.5)  #定义优化器，优化的是net的parameters
loss_func = torch.nn.MseLoss()   #定义怎么计算误差

for t in range(100):    #迭代
    prediction = net(x)
    
    loss = loss_func(prediction, y) #要预测值在前，label在后
    '''
    优化步骤
    '''
    optimizer.zero_grad()  #net.parameters()所有参数梯度变为0
    loss.backward() #所有参数计算梯度
    optimizer.step() #optimizr优化parameters

net结构

用前馈网络实现分类问题

#分类
# 假数据
n_data = torch.ones(100, 2)         # 数据的基本形态
x0 = torch.normal(2*n_data, 1)      # 类型0 x data (tensor), shape=(100, 2)
y0 = torch.zeros(100)               # 类型0 y data (tensor), shape=(100, )
x1 = torch.normal(-2*n_data, 1)     # 类型1 x data (tensor), shape=(100, 1)
y1 = torch.ones(100)                # 类型1 y data (tensor), shape=(100, )

# 注意 x, y 数据的数据形式是一定要像下面一样 (torch.cat 是在合并数据)
x = torch.cat((x0, x1), 0).type(torch.FloatTensor)  # FloatTensor = 32-bit floating
y = torch.cat((y0, y1), ).type(torch.LongTensor)    # LongTensor = 64-bit integer 
#注意分类问题中一定要将标签值类型设为longtensor
#x为floattensor

net = Net(2, 10, 2)  #假如是2分类问题，输出为对应类别维度(相当于one-hot)
print(net)
loss_func = torch.nn.CrossEntropyLoss()   #定义怎么计算误差,CrossEntropyLoss在计算softmax，即每一类概率

'''
比如标签值[0,0,1], loss值【0.1，0.1，0.8】
'''

网络顺序搭建方法

#第二种搭建网络方法,这种方法没有层的自定义名字
net2 = torch.nn.Sequential(
    torch.nn.Linear(2, 10),
    torch.nn.ReLU(),   # torch.nn.ReLU()这种定义方法定义的是一个类，因此在打印层的时候会出现
    torch.nn.Linear(10,2))
print(net2)

实现网络的保存和提取

#保存
torch.save(net, 'net.pkl')  #完整保存.第一个参数是要保存的网络，第二个是名字
torch.save(net.state_dict(), 'net_parameters.pkl') #只保留整个parameters

#提取网络
net = torch.load('net.pkl')

#若保存的为参数，则提取过程为先定义网络结构
net2 = torch.nn.Sequential(
    torch.nn.Linear(2, 10),
    torch.nn.ReLU(),   # torch.nn.ReLU()这种定义方法定义的是一个类，因此在打印层的时候会出现
    torch.nn.Linear(10,2))
net2.load_state_dict(torch.load('net_parameters.pkl'))

批训练

#批训练
import torch.utils.data as Data
torch.manual_seed(1)   #种子

BATCH_SIZE = 5
x = torch.linspace(1,10,10)
y = torch.linspace(10,1,10)

'''
形成tersor数据库
x 放特征数据
y 放标签数据
'''
torch_dataset = Data.TensorDataset(x, y) #形成tersor数据库

'''
loader 使训练变成一批一批
'''
loader = Data.DataLoader(
        dataset=torch_dataset,    #传进去数据库
        batch_size=BATCH_SIZE,    #多少批
        shuffle=True,             #是否打乱
        num_workers=2)        #多线程提取数据

for epoch in range(3):   #每一轮
    for step, (batch_x, batch_y) in enumerate(loader):   #每一批一批
        b_x = Variable(batch_x)    #转为Variable
        b_y = Variable(batch_y)
        #training.....   
        pass

CNN结构

import torch.nn as nn
class CNN(nn.Module):
    def __init__(self):
        super(CNN, self).__init__()
        self.conv1 = nn.Sequential(  # input shape (1, 28, 28)
            nn.Conv2d(
                in_channels=1,      # input height
                out_channels=16,    # n_filters
                kernel_size=5,      # filter size
                stride=1,           # filter movement/step
                padding=2,      # 如果想要 con2d 出来的图片长宽没有变化, padding=(kernel_size-1)/2 当 stride=1
            ),      # output shape (16, 28, 28)
            nn.ReLU(),    # activation
            nn.MaxPool2d(kernel_size=2),    # 在 2x2 空间里向下采样, output shape (16, 14, 14)
        )
        self.conv2 = nn.Sequential(  # input shape (16, 14, 14)
            nn.Conv2d(16, 32, 5, 1, 2),  # output shape (32, 14, 14)
            nn.ReLU(),  # activation
            nn.MaxPool2d(2),  # output shape (32, 7, 7)
        )
        self.out = nn.Linear(32 * 7 * 7, 10)   # fully connected layer, output 10 classes

    def forward(self, x):
        x = self.conv1(x)
        x = self.conv2(x)     #维度（batch，32，7，7）

       # 下一步要自己进行展平，之后才能传入到Linear层
        x = x.view(x.size(0), -1)   # 展平多维的卷积图成 (batch_size, 32 * 7 * 7)
        output = self.out(x)
        return output
net = CNN()
print(net)

RNN结构

`#RNN
class RNN(nn.Module):
    def __init__(self):
        super(RNN, self).__init__()

        self.rnn = nn.RNN(  # 这回一个普通的 RNN 就能胜任
            input_size=1,
            hidden_size=32,     # rnn hidden unit
            num_layers=1,       # 有几层 RNN layers
            batch_first=True,   # input & output 会是以 batch size 为第一维度的特征集 e.g. (batch, time_step, input_size)
        )
        self.out = nn.Linear(32, 1) 

    def forward(self, x, h_state):  # 因为 hidden state 是连续的, 所以我们要一直传递这一个 state
        # x (batch, time_step, input_size)
        # h_state (n_layers, batch, hidden_size)
        # r_out (batch, time_step, output_size)
        r_out, h_state = self.rnn(x, h_state)   # h_state 也要作为 RNN 的一个输入

        outs = []    # 保存所有时间点的预测值
        for time_step in range(r_out.size(1)):    # 对每一个时间点计算 output
            outs.append(self.out(r_out[:, time_step, :]))
        return torch.stack(outs, dim=1), h_state    #因为outs为list因此要将它包为tensor h_state用于下一回输入


rnn = RNN()
print(rnn
      
h_state = None #在没有训练之前，先置0

for step in range(60):
      x = Variable()
      y = Variable
      prediction, h_state = rnn(x, h_state)
      h_state = Variable(h_state.data)   # 关键，h_state要再次包进Variable里

"""
RNN (
  (rnn): RNN(1, 32, batch_first=True)
  (out): Linear (32 -> 1)
)
"""
#另一种定义方式
def forward(self, x, h_state):
    r_out, h_state = self.rnn(x, h_state)
    r_out = r_out.view(-1, 32)
    outs = self.out(r_out)
    return outs.view(-1, 32, TIME_STEP), h_state

LSTM结构

'''
rnn用于分类
'''
class RNN(nn.Module):
    def __init__(self):
        super(RNN, self).__init__()

        self.rnn = nn.LSTM(     # LSTM 效果要比 nn.RNN() 好多了
            input_size=28,      # 图片每行的数据像素点   原始输入（28，28）每个时间点扫描一行，因此input_size=28
            hidden_size=64,     # rnn hidden unit    总共用多少个隐层，与常规相似
            num_layers=1,       # 有几层 RNN layers
            batch_first=True,   # input & output 会是以 batch size 为第一维度的特征集 e.g. (batch, time_step, input_size)
        )

        self.out = nn.Linear(64, 10)    # 输出层   输入为lstm的最后隐层，输出为类别

    def forward(self, x):
        # x shape (batch, time_step, input_size)
        # r_out shape (batch, time_step, output_size)
        # h_n shape (n_layers, batch, hidden_size)   LSTM 有两个 hidden states, h_n 是分线, h_c 是主线
        # h_c shape (n_layers, batch, hidden_size)
        r_out, (h_n, h_c) = self.rnn(x, None)   # None 表示 hidden state 会用全0的 state

        # 选取最后一个时间点的 r_out 输出
        # 这里 r_out[:, -1, :] 的值也是 h_n 的值
        out = self.out(r_out[:, -1, :])     #（batch，time step，input）
        return out

rnn = RNN()
print(rnn)
"""
RNN (
  (rnn): LSTM(28, 64, batch_first=True)
  (out): Linear (64 -> 10)
)
"""

'''
rnn用于回归
'''
class RNN(nn.Module):
    def __init__(self):
        super(RNN, self).__init__()

        self.rnn = nn.LSTM(     # LSTM 效果要比 nn.RNN() 好多了
            input_size=1,    
            hidden_size=64,     # rnn hidden unit    总共用多少个隐层，与常规相似
            num_layers=1,       # 有几层 RNN layers
            batch_first=True,   # input & output 会是以 batch size 为第一维度的特征集 e.g. (batch, time_step, input_size)
        )

        self.out = nn.Linear(64, 1)    # 输出层   输入为lstm的最后隐层，输出为类别

    def forward(self, x):
        # x shape (batch, time_step, input_size)
        # r_out shape (batch, time_step, output_size)
        # h_n shape (n_layers, batch, hidden_size)   LSTM 有两个 hidden states, h_n 是分线, h_c 是主线
        # h_c shape (n_layers, batch, hidden_size)
        r_out, (h_n, h_c) = self.rnn(x, None)   # None 表示 hidden state 会用全0的 state

        # 选取最后一个时间点的 r_out 输出
        # 这里 r_out[:, -1, :] 的值也是 h_n 的值
        out = self.out(r_out[:, -1, :])     #（batch，time step，input）
        return out

rnn = RNN()
print(rnn)

无监督学习decoder encoder

#无监督学习 encoder decoder  类似bottleneck y标签值为x
#压缩和解压, 压缩后得到压缩的特征值, 再从压缩的特征值解压成原图片.

class AutoEncoder(nn.Module):
    def __init__(self):
        super(AutoEncoder, self).__init__()

        # 压缩
        self.encoder = nn.Sequential(
            nn.Linear(28*28, 128),
            nn.Tanh(),
            nn.Linear(128, 64),
            nn.Tanh(),
            nn.Linear(64, 12),
            nn.Tanh(),
            nn.Linear(12, 3),   # 压缩成3个特征, 进行 3D 图像可视化
        )
        # 解压
        self.decoder = nn.Sequential(
            nn.Linear(3, 12),
            nn.Tanh(),
            nn.Linear(12, 64),
            nn.Tanh(),
            nn.Linear(64, 128),
            nn.Tanh(),
            nn.Linear(128, 28*28),
            nn.Sigmoid(),       # 激励函数让输出值在 (0, 1)
        )

    def forward(self, x):
        encoded = self.encoder(x)
        decoded = self.decoder(encoded)
        return encoded, decoded    #主要用encoded 即压缩完的样子

autoencoder = AutoEncoder()

optimizer = torch.optim.Adam(autoencoder.parameters(), lr=LR)
loss_func = nn.MSELoss()

for epoch in range(EPOCH):
    for step, (x, b_label) in enumerate(train_loader):
        b_x = x.view(-1, 28*28)   # batch x, shape (batch, 28*28)
        b_y = x.view(-1, 28*28)   # batch y, shape (batch, 28*28)  标签y为x原始值

        encoded, decoded = autoencoder(b_x)

        loss = loss_func(decoded, b_y)      # mean square error
        optimizer.zero_grad()               # clear gradients for this training step
        loss.backward()                     # backpropagation, compute gradients
        optimizer.step()                    # apply gradients

使用gpu进行加速

#用GPU加速

#对数据进行cuda移动
x_b = Variable(x).cuda()
#转为cpu格式
pred = x_b.cpu()

#将模块移动
cnn = CNN()
cnn.cuda()

莫烦pytorch学习笔记

猜你喜欢

热点阅读