Pytorch 里面model部分属性简析
class MyNet(nn.Module):
def __init__(self,n_classes=2, input_length=1014, input_dim=68,
n_conv_filters=256,
n_fc_neurons=1024):
super().__init__()
self.feature1 = nn.Sequential(nn.Conv1d(input_dim, n_conv_filters, kernel_size=7, padding=0), nn.ReLU(), nn.MaxPool1d(3))
self.feature2 = nn.Sequential(nn.Conv1d(n_conv_filters, n_conv_filters, kernel_size=7, padding=0), nn.ReLU(), nn.MaxPool1d(3))
self.feature3 = nn.Sequential(nn.Conv1d(n_conv_filters, n_conv_filters, kernel_size=3, padding=0), nn.ReLU())
self.feature4 = nn.Sequential(nn.Conv1d(n_conv_filters, n_conv_filters, kernel_size=3, padding=0), nn.ReLU())
self.feature5 = nn.Sequential(nn.Conv1d(n_conv_filters, n_conv_filters, kernel_size=3, padding=0), nn.ReLU())
self.feature6 = nn.Sequential(nn.Conv1d(n_conv_filters, n_conv_filters, kernel_size=3, padding=0), nn.ReLU(), nn.MaxPool1d(3))
self.conv1 = nn.Conv1d(n_conv_filters, n_conv_filters, kernel_size=3, padding=0)
self.conv2 = nn.Conv1d(n_conv_filters, n_conv_filters, kernel_size=3, padding=0)
self.maxpool = nn.MaxPool1d(3)
self.classifier=nn.Sequential(nn.Linear(265,50),nn.ReLU(),nn.Linear(50,25),nn.ReLU(),nn.Linear(25,2))
def forward(self,x):
x=self.feature1(x)
x=self.feature2(x)
x=self.feature3(x)
x=self.feature4(x)
x=self.feature5(x)
x=self.feature6(x)
x=self.conv1(x)
x=self.conv2(x)
x=self.maxpool(x)
x=self.classifier(x)
return x
随手定义一个model,首先查看下model 都有哪些属性和方法
model=MyNet()
dir(model)
['T_destination', '__annotations__', '__call__', '__class__', '__delattr__', '__dict__', '__dir__', '__doc__', '__eq__', '__format__', '__ge__', '__getattr__', '__getattribute__', '__gt__', '__hash__', '__init__', '__init_subclass__', '__le__', '__lt__', '__module__', '__ne__', '__new__', '__reduce__', '__reduce_ex__', '__repr__', '__setattr__', '__setstate__', '__sizeof__', '__str__', '__subclasshook__', '__weakref__', '_apply', '_backward_hooks', '_buffers', '_call_impl', '_forward_hooks', '_forward_pre_hooks', '_get_backward_hooks', '_get_name', '_is_full_backward_hook', '_load_from_state_dict', '_load_state_dict_pre_hooks', '_maybe_warn_non_full_backward_hook', '_modules', '_named_members', '_non_persistent_buffers_set', '_parameters', '_register_load_state_dict_pre_hook', '_register_state_dict_hook', '_replicate_for_data_parallel', '_save_to_state_dict', '_slow_forward', '_state_dict_hooks', '_version', 'add_module', 'apply', 'bfloat16', 'buffers', 'children', 'classifier', 'conv1', 'conv2', 'cpu', 'cuda', 'double', 'dump_patches', 'eval', 'extra_repr', 'feature1', 'feature2', 'feature3', 'feature4', 'feature5', 'feature6', 'float', 'forward', 'half', 'load_state_dict', 'maxpool', 'modules', 'named_buffers', 'named_children', 'named_modules', 'named_parameters', 'parameters', 'register_backward_hook', 'register_buffer', 'register_forward_hook', 'register_forward_pre_hook', 'register_full_backward_hook', 'register_parameter', 'requires_grad_', 'share_memory', 'state_dict', 'to', 'train', 'training', 'type', 'xpu', 'zero_grad']
我们可以看到model有许多的属性,其中有children/named_children, parameters/named_parameters, modules/named_modules
这里先介绍一下,children 这个方法,会返回一个生成器,如果用for循环打开这样一个生成器,会得到一串,我们在模型类中初始化定义的block,conv和classifier,至于block中的二级各种module都不会被进一步展开。是一个很compact的表示。至于named_children()就是会给这些一级module 分别赋予名字,这样更好描述。
for name,children in model.named_children():
print('name: ',name)
print('children: ',children)
print(list(model.named_children()))
name: feature1
children: Sequential(
(0): Conv1d(68, 256, kernel_size=(7,), stride=(1,))
(1): ReLU()
(2): MaxPool1d(kernel_size=3, stride=3, padding=0, dilation=1, ceil_mode=False)
)
name: feature2
children: Sequential(
(0): Conv1d(256, 256, kernel_size=(7,), stride=(1,))
(1): ReLU()
(2): MaxPool1d(kernel_size=3, stride=3, padding=0, dilation=1, ceil_mode=False)
)
name: feature3
children: Sequential(
(0): Conv1d(256, 256, kernel_size=(3,), stride=(1,))
(1): ReLU()
)
name: feature4
children: Sequential(
(0): Conv1d(256, 256, kernel_size=(3,), stride=(1,))
(1): ReLU()
)
name: feature5
children: Sequential(
(0): Conv1d(256, 256, kernel_size=(3,), stride=(1,))
(1): ReLU()
)
name: feature6
children: Sequential(
(0): Conv1d(256, 256, kernel_size=(3,), stride=(1,))
(1): ReLU()
(2): MaxPool1d(kernel_size=3, stride=3, padding=0, dilation=1, ceil_mode=False)
)
name: conv1
children: Conv1d(256, 256, kernel_size=(3,), stride=(1,))
name: conv2
children: Conv1d(256, 256, kernel_size=(3,), stride=(1,))
name: maxpool
children: MaxPool1d(kernel_size=3, stride=3, padding=0, dilation=1, ceil_mode=False)
name: classifier
children: Sequential(
(0): Linear(in_features=265, out_features=50, bias=True)
(1): ReLU()
(2): Linear(in_features=50, out_features=25, bias=True)
(3): ReLU()
(4): Linear(in_features=25, out_features=2, bias=True)
)
[('feature1', Sequential(
(0): Conv1d(68, 256, kernel_size=(7,), stride=(1,))
(1): ReLU()
(2): MaxPool1d(kernel_size=3, stride=3, padding=0, dilation=1, ceil_mode=False)
)), ('feature2', Sequential(
(0): Conv1d(256, 256, kernel_size=(7,), stride=(1,))
(1): ReLU()
(2): MaxPool1d(kernel_size=3, stride=3, padding=0, dilation=1, ceil_mode=False)
)), ('feature3', Sequential(
(0): Conv1d(256, 256, kernel_size=(3,), stride=(1,))
(1): ReLU()
)), ('feature4', Sequential(
(0): Conv1d(256, 256, kernel_size=(3,), stride=(1,))
(1): ReLU()
)), ('feature5', Sequential(
(0): Conv1d(256, 256, kernel_size=(3,), stride=(1,))
(1): ReLU()
)), ('feature6', Sequential(
(0): Conv1d(256, 256, kernel_size=(3,), stride=(1,))
(1): ReLU()
(2): MaxPool1d(kernel_size=3, stride=3, padding=0, dilation=1, ceil_mode=False)
)), ('conv1', Conv1d(256, 256, kernel_size=(3,), stride=(1,))), ('conv2', Conv1d(256, 256, kernel_size=(3,), stride=(1,))), ('maxpool', MaxPool1d(kernel_size=3, stride=3, padding=0, dilation=1, ceil_mode=False)), ('classifier', Sequential(
(0): Linear(in_features=265, out_features=50, bias=True)
(1): ReLU()
(2): Linear(in_features=50, out_features=25, bias=True)
(3): ReLU()
(4): Linear(in_features=25, out_features=2, bias=True)
))]
至于module这个方法,也会生成一个生成器,用for循环打开这样的生成器,首先可以得到model的一级module,然后一级module都展示完毕以后,会再把二级module里面的成分展开,比如block里面,有conv,maxpool,bn这些。
for name,module in model.named_modules():
print(name,module)
至于parameters 这个方法,返回的是一个生成器,如果用for循环打开,可以得到module的所有抽象参数位,这里的parameters 需要和argument 区分一下,前者是概念性的参数,后面的是,具体赋值以后的参数。对于parameters可以做的主要操作就是控制导数是否需要计算,最后是否需要更新。
for params in model.parameters():
print('params: ',params) ## 返回model里面所有的参数,不管参数来自于一级module 还是其他各层级的module
至于state_dict 这个方法,会返回一个字典,字典的键值对分别是,参数的名称和参数赋值的tensor,我们每一次load_state_dict 也是加载这样一个字典,来获得预加载模型的各个module和配置的参数。
至于 eval 和 train 这两个方法,前者是模型在validation阶段时,设置model 不自动更新参数,进行model 评价验证的方法,后者是模型在train阶段时,设置model可以进行参数的迭代更新。
至于to这个方法,是决定把模型或者输入的数据放在CPU还是CPU上面,这个模型和数据必须在同一个设备上面,不然会报错。
device=torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
model.to(device)