pytorch下载MNIST数据报错 urllib.error.

2020-03-05  本文已影响0人  牛奶大泡芙

今天用pytorch写一个MNIST手写数字识别(分类)程序的时候,出现了这样的错误(如下图)


403错误.png

然后找到了一个解决办法是修改请求的header,具体参考下面的代码:
用测试数据检查正确率的时候,发现会有这样的报错:
位置:self.lstm(x, (h0, c0))
错误提示:RuntimeError: Expected hidden[0] size (2, 16, 128), got (2, 32, 128)
这是因为一个细节的处理,这三个维度分别对应layer_number, batch_size, hidden_size,验证的最后一批数据长度不等于BATCH_SIZE导致的,在循环时加上一个判断条件就可以啦,但仔细观察这个期望的size和得到的size没有发生维度的错乱,第1,3维度都是正确的size,那就说明应该是最后一批数据长度的问题,如果是维度的错乱,就要考虑h0,c0,size的设置是否有问题,调换一下应该就可以了。
这个模型一开始的学习率是0.02,发现几乎没有什么效果,后来提高到了0.1,准确率提高了很多。

import torch
import torchvision
from torchvision import transforms as t
import os
import sys
import torch.nn as nn
import torch.utils.data as tud
from six.moves import urllib
opener = urllib.request.build_opener()
opener.addheaders = [('User-agent', 'Mozilla/5.0')]
urllib.request.install_opener(opener)
# 以上四行为了解决请求MNIST数据403报错的问题

INPUT_SIZE = 28
TIME_STEP = 28
BATCH_SIZE = 32
HIDDEN_SIZE = 128
NUM_LAYERS = 2
CLASSIFY = 10
EPOCHES = 1
LEARNING_RATE = 0.1

train_dataset = torchvision.datasets.MNIST(root='./data', train=True, download=True, transform=t.ToTensor())
test_dataset = torchvision.datasets.MNIST(root='./data', train=False, transform=t.ToTensor())
train_dataloader = tud.DataLoader(dataset=train_dataset, batch_size=BATCH_SIZE, shuffle=True)
test_dataloader = tud.DataLoader(dataset=test_dataset, batch_size=BATCH_SIZE, shuffle=True)

# h0 = torch.zeros(size=(NUM_LAYERS, BATCH_SIZE, HIDDEN_SIZE))
# c0 = torch.zeros(size=(NUM_LAYERS, BATCH_SIZE, HIDDEN_SIZE))


class L(nn.Module):
    def __init__(self, input_size, batch_size, hidden_size, num_layers, output_size):
        super(L, self).__init__()
        self.hidden_size = hidden_size
        self.batch_size = batch_size
        self.lstm = nn.LSTM(input_size=input_size, hidden_size=hidden_size, num_layers=num_layers, batch_first=True)
        self.linear = nn.Linear(in_features=hidden_size, out_features=output_size)

    def forward(self, x):
        h0 = torch.zeros(size=(NUM_LAYERS, BATCH_SIZE, HIDDEN_SIZE)).cuda()
        c0 = torch.zeros(size=(NUM_LAYERS, BATCH_SIZE, HIDDEN_SIZE)).cuda()
        output, _ = self.lstm(x, (h0, c0))
        output = self.linear(output[:, -1, :])
        return output

lstm_model = L(INPUT_SIZE, BATCH_SIZE, HIDDEN_SIZE, NUM_LAYERS, CLASSIFY)
optimizer = torch.optim.SGD(lstm_model.parameters(), lr=LEARNING_RATE)
loss_fn = nn.CrossEntropyLoss()

for e in range(EPOCHES):
    for i, (x_label, y_label) in enumerate(train_dataloader):
        if torch.cuda.is_available():
            lstm_model = lstm_model.cuda()
            x_label = x_label.view(-1, 28, 28)
            x_label = x_label.cuda()
            y_label = y_label.cuda()
            out = lstm_model(x_label)
            loss = loss_fn(out, y_label)
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()

            if i % 500 == 0:
                print(i, ' ', loss)
print(lstm_model)
with torch.no_grad():
    correct = 0
    total = 0
    for img, label in test_dataloader:
        if torch.cuda.is_available() and label.size(0) == BATCH_SIZE:
            img = img.view(-1, 28, 28)
            img = img.cuda()
            label = label.cuda()
            out = lstm_model(img)
            _, predict = torch.max(out.data, 1)
            total += label.size(0)
            correct += (predict == label).sum().item()
    print('Accuracy is :{}'.format(100*correct/total))
torch.save(lstm_model.state_dict(), 'model.ckpt')
上一篇下一篇

猜你喜欢

热点阅读