Pytorch 里面model部分属性简析

2021-05-21  本文已影响0人  十年磨剑_莫回首

class MyNet(nn.Module):

    def __init__(self,n_classes=2, input_length=1014, input_dim=68,

                n_conv_filters=256,

                n_fc_neurons=1024):

        super().__init__()

        self.feature1 = nn.Sequential(nn.Conv1d(input_dim, n_conv_filters, kernel_size=7, padding=0), nn.ReLU(), nn.MaxPool1d(3))

        self.feature2 = nn.Sequential(nn.Conv1d(n_conv_filters, n_conv_filters, kernel_size=7, padding=0), nn.ReLU(), nn.MaxPool1d(3))

        self.feature3 = nn.Sequential(nn.Conv1d(n_conv_filters, n_conv_filters, kernel_size=3, padding=0), nn.ReLU())

        self.feature4 = nn.Sequential(nn.Conv1d(n_conv_filters, n_conv_filters, kernel_size=3, padding=0), nn.ReLU())

        self.feature5 = nn.Sequential(nn.Conv1d(n_conv_filters, n_conv_filters, kernel_size=3, padding=0), nn.ReLU())

        self.feature6 = nn.Sequential(nn.Conv1d(n_conv_filters, n_conv_filters, kernel_size=3, padding=0), nn.ReLU(), nn.MaxPool1d(3))

        self.conv1 = nn.Conv1d(n_conv_filters, n_conv_filters, kernel_size=3, padding=0)

        self.conv2 = nn.Conv1d(n_conv_filters, n_conv_filters, kernel_size=3, padding=0)

        self.maxpool =  nn.MaxPool1d(3)

        self.classifier=nn.Sequential(nn.Linear(265,50),nn.ReLU(),nn.Linear(50,25),nn.ReLU(),nn.Linear(25,2))

    def forward(self,x):

        x=self.feature1(x)

        x=self.feature2(x)

        x=self.feature3(x)

        x=self.feature4(x)

        x=self.feature5(x)

        x=self.feature6(x)

        x=self.conv1(x)

        x=self.conv2(x)

        x=self.maxpool(x)

        x=self.classifier(x)

        return x

随手定义一个model,首先查看下model 都有哪些属性和方法

model=MyNet()

dir(model)

['T_destination', '__annotations__', '__call__', '__class__', '__delattr__', '__dict__', '__dir__', '__doc__', '__eq__', '__format__', '__ge__', '__getattr__', '__getattribute__', '__gt__', '__hash__', '__init__', '__init_subclass__', '__le__', '__lt__', '__module__', '__ne__', '__new__', '__reduce__', '__reduce_ex__', '__repr__', '__setattr__', '__setstate__', '__sizeof__', '__str__', '__subclasshook__', '__weakref__', '_apply', '_backward_hooks', '_buffers', '_call_impl', '_forward_hooks', '_forward_pre_hooks', '_get_backward_hooks', '_get_name', '_is_full_backward_hook', '_load_from_state_dict', '_load_state_dict_pre_hooks', '_maybe_warn_non_full_backward_hook', '_modules', '_named_members', '_non_persistent_buffers_set', '_parameters', '_register_load_state_dict_pre_hook', '_register_state_dict_hook', '_replicate_for_data_parallel', '_save_to_state_dict', '_slow_forward', '_state_dict_hooks', '_version', 'add_module', 'apply', 'bfloat16', 'buffers', 'children', 'classifier', 'conv1', 'conv2', 'cpu', 'cuda', 'double', 'dump_patches', 'eval', 'extra_repr', 'feature1', 'feature2', 'feature3', 'feature4', 'feature5', 'feature6', 'float', 'forward', 'half', 'load_state_dict', 'maxpool', 'modules', 'named_buffers', 'named_children', 'named_modules', 'named_parameters', 'parameters', 'register_backward_hook', 'register_buffer', 'register_forward_hook', 'register_forward_pre_hook', 'register_full_backward_hook', 'register_parameter', 'requires_grad_', 'share_memory', 'state_dict', 'to', 'train', 'training', 'type', 'xpu', 'zero_grad']

我们可以看到model有许多的属性,其中有children/named_children, parameters/named_parameters, modules/named_modules 

这里先介绍一下,children 这个方法,会返回一个生成器,如果用for循环打开这样一个生成器,会得到一串,我们在模型类中初始化定义的block,conv和classifier,至于block中的二级各种module都不会被进一步展开。是一个很compact的表示。至于named_children()就是会给这些一级module 分别赋予名字,这样更好描述。

for name,children in model.named_children():

    print('name: ',name)

    print('children: ',children)

print(list(model.named_children()))

name: feature1

children:  Sequential(

  (0): Conv1d(68, 256, kernel_size=(7,), stride=(1,))

  (1): ReLU()

  (2): MaxPool1d(kernel_size=3, stride=3, padding=0, dilation=1, ceil_mode=False)

)

name:  feature2

children:  Sequential(

  (0): Conv1d(256, 256, kernel_size=(7,), stride=(1,))

  (1): ReLU()

  (2): MaxPool1d(kernel_size=3, stride=3, padding=0, dilation=1, ceil_mode=False)

)

name:  feature3

children:  Sequential(

  (0): Conv1d(256, 256, kernel_size=(3,), stride=(1,))

  (1): ReLU()

)

name:  feature4

children:  Sequential(

  (0): Conv1d(256, 256, kernel_size=(3,), stride=(1,))

  (1): ReLU()

)

name:  feature5

children:  Sequential(

  (0): Conv1d(256, 256, kernel_size=(3,), stride=(1,))

  (1): ReLU()

)

name:  feature6

children:  Sequential(

  (0): Conv1d(256, 256, kernel_size=(3,), stride=(1,))

  (1): ReLU()

  (2): MaxPool1d(kernel_size=3, stride=3, padding=0, dilation=1, ceil_mode=False)

)

name:  conv1

children:  Conv1d(256, 256, kernel_size=(3,), stride=(1,))

name:  conv2

children:  Conv1d(256, 256, kernel_size=(3,), stride=(1,))

name:  maxpool

children:  MaxPool1d(kernel_size=3, stride=3, padding=0, dilation=1, ceil_mode=False)

name:  classifier

children:  Sequential(

  (0): Linear(in_features=265, out_features=50, bias=True)

  (1): ReLU()

  (2): Linear(in_features=50, out_features=25, bias=True)

  (3): ReLU()

  (4): Linear(in_features=25, out_features=2, bias=True)

)

[('feature1', Sequential(

  (0): Conv1d(68, 256, kernel_size=(7,), stride=(1,))

  (1): ReLU()

  (2): MaxPool1d(kernel_size=3, stride=3, padding=0, dilation=1, ceil_mode=False)

)), ('feature2', Sequential(

  (0): Conv1d(256, 256, kernel_size=(7,), stride=(1,))

  (1): ReLU()

  (2): MaxPool1d(kernel_size=3, stride=3, padding=0, dilation=1, ceil_mode=False)

)), ('feature3', Sequential(

  (0): Conv1d(256, 256, kernel_size=(3,), stride=(1,))

  (1): ReLU()

)), ('feature4', Sequential(

  (0): Conv1d(256, 256, kernel_size=(3,), stride=(1,))

  (1): ReLU()

)), ('feature5', Sequential(

  (0): Conv1d(256, 256, kernel_size=(3,), stride=(1,))

  (1): ReLU()

)), ('feature6', Sequential(

  (0): Conv1d(256, 256, kernel_size=(3,), stride=(1,))

  (1): ReLU()

  (2): MaxPool1d(kernel_size=3, stride=3, padding=0, dilation=1, ceil_mode=False)

)), ('conv1', Conv1d(256, 256, kernel_size=(3,), stride=(1,))), ('conv2', Conv1d(256, 256, kernel_size=(3,), stride=(1,))), ('maxpool', MaxPool1d(kernel_size=3, stride=3, padding=0, dilation=1, ceil_mode=False)), ('classifier', Sequential(

  (0): Linear(in_features=265, out_features=50, bias=True)

  (1): ReLU()

  (2): Linear(in_features=50, out_features=25, bias=True)

  (3): ReLU()

  (4): Linear(in_features=25, out_features=2, bias=True)

))]

至于module这个方法,也会生成一个生成器,用for循环打开这样的生成器,首先可以得到model的一级module,然后一级module都展示完毕以后,会再把二级module里面的成分展开,比如block里面,有conv,maxpool,bn这些。

for name,module in model.named_modules():

    print(name,module)

至于parameters 这个方法,返回的是一个生成器,如果用for循环打开,可以得到module的所有抽象参数位,这里的parameters 需要和argument 区分一下,前者是概念性的参数,后面的是,具体赋值以后的参数。对于parameters可以做的主要操作就是控制导数是否需要计算,最后是否需要更新。

for params in model.parameters():

    print('params: ',params)  ## 返回model里面所有的参数,不管参数来自于一级module 还是其他各层级的module

至于state_dict 这个方法,会返回一个字典,字典的键值对分别是,参数的名称和参数赋值的tensor,我们每一次load_state_dict 也是加载这样一个字典,来获得预加载模型的各个module和配置的参数。

至于 eval 和 train 这两个方法,前者是模型在validation阶段时,设置model 不自动更新参数,进行model 评价验证的方法,后者是模型在train阶段时,设置model可以进行参数的迭代更新。

至于to这个方法,是决定把模型或者输入的数据放在CPU还是CPU上面,这个模型和数据必须在同一个设备上面,不然会报错。

device=torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

model.to(device)

上一篇下一篇

猜你喜欢

热点阅读