GoogleNet v1结构和代码复现
1 大致结构
Top5错误率6.7%; 使用9个inception模块, 改变CNN原串行结构, 并行, 共22层;
使用平均池化替代FC层; 参数量仅为AlexNet的1/12; 使用softmax获取平均结果;
网络结构的更新, 性能比AlexNet要好; 2014年ILSVRC冠军。
下面是他的具体参数
其实感觉对于GoogleNet结构除了inception结构和多出来的两个输出结构并没有什么好介绍的,具体介绍下inception结构,和那两个输出结构
2 inception结构
inception结构,就是将上述的四种卷积的结构进行concat通道合并输出到下一个inception
在原图中,inception结构是右图模样,有人会好奇,加不加1x1卷积有什么区别吗,其实整体感受效果是差不多,主要在于计算量上,先通过1x1来降维使通道压缩,然后再3x3卷积,会减少不少的计算量。
3 两个额外输出结构
主要是网络层次越深,容易导致梯度消失,或者梯度很小反向传播效果不好,导致模型学习能力不强,就添加了两个分支,这两个分支,因为输出的时候层次还不深,所以梯度不会很小,通过这两个分支的反向传播,增加模型的学习能力。
4 代码复现
import torch
import torch.nn as nn
class BasicConv2d(nn.Module):
def __init__(self, in_channels, out_channels, kernel_size, stride, padding):
super(BasicConv2d, self).__init__()
self.conv = nn.Conv2d(in_channels, out_channels, kernel_size, stride, padding)
self.relu = nn.ReLU()
def forward(self, x):
return self.relu(self.conv(x))
class Inception(nn.Module):
def __init__(self, in_channels, out_channels):
"""
:param in_channels: 输入通道数目, eg: 192
:param out_channels: 各个分支的输出通道数目, eg: [[64], [96,128], [16,32], [32]]
"""
super(Inception, self).__init__()
self.branch1 = nn.Sequential(
BasicConv2d(in_channels, out_channels[0][0], kernel_size=1, stride=1, padding=0)
)
self.branch2 = nn.Sequential(
BasicConv2d(in_channels, out_channels[1][0], kernel_size=1, stride=1, padding=0),
BasicConv2d(out_channels[1][0], out_channels[1][1], kernel_size=3, stride=1, padding=1)
)
self.branch3 = nn.Sequential(
BasicConv2d(in_channels, out_channels[2][0], kernel_size=1, stride=1, padding=0),
BasicConv2d(out_channels[2][0], out_channels[2][1], kernel_size=5, stride=1, padding=2)
)
self.branch4 = nn.Sequential(
nn.MaxPool2d(3, 1, padding=1),
BasicConv2d(in_channels, out_channels[3][0], kernel_size=1, stride=1, padding=0)
)
def forward(self, x):
"""
inception前向过程
:param x: [N,C,H,W]
:return:
"""
x1 = self.branch1(x) # [N,C,H,W] -> [N,C1,H,W]
x2 = self.branch2(x) # [N,C,H,W] -> [N,C2,H,W]
x3 = self.branch3(x) # [N,C,H,W] -> [N,C3,H,W]
x4 = self.branch4(x) # [N,C,H,W] -> [N,C4,H,W]
x = torch.concat([x1, x2, x3, x4], dim=1) # [N,C1+C2+C3+C4,H,W]
return x
class GoogLeNet(nn.Module):
def __init__(self, num_classes, add_aux_stage=False):
super(GoogLeNet, self).__init__()
self.stage1 = nn.Sequential(
BasicConv2d(3, 64, 7, 2, 3),
nn.MaxPool2d(3, 2, padding=1),
BasicConv2d(64, 64, 1, 1, 0),
BasicConv2d(64, 192, 3, 1, 1),
nn.MaxPool2d(3, 2, padding=1),
Inception( 192,[[64], [96, 128], [16, 32], [32]]), # inception3a
Inception(256, [[128], [128, 192], [32, 96], [64]]), # inception3b
nn.MaxPool2d(3, 2, padding=1),
Inception(480, [[192], [96, 208], [16, 48], [64]]) # inception4a
)
self.stage2 = nn.Sequential(
Inception(512, [[160], [112, 224], [24, 64], [64]]), # inception4b
Inception(512, [[128], [128, 256], [24, 64], [64]]), # inception4c
Inception(512, [[112], [144, 288], [32, 64], [64]]) # inception4d
)
self.stage3 = nn.Sequential(
Inception(528, [[256], [160, 320], [32, 128], [128]]), # inception4e
nn.MaxPool2d(3, 2, padding=1),
Inception(832, [[256], [160, 320], [32, 128], [128]]), # inception5a
Inception(832, [[384], [192, 384], [48, 128], [128]]), # inception5b
nn.AdaptiveAvgPool2d()
)
self.classify = nn.Conv2d(1024, num_classes, kernel_size=(1, 1), stride=(1, 1), padding=0)
if add_aux_stage:
self.aux_stage1 = nn.Sequential(
nn.MaxPool2d(5, 3, padding=0),
nn.Conv2d(512, 1024, kernel_size=(1, 1), stride=(1, 1), padding=0),
nn.ReLU(),
nn.AdaptiveAvgPool2d(output_size=(2, 2)),
nn.Flatten(1),
nn.Linear(4096, 2048),
nn.Dropout(p=0.4),
nn.ReLU(),
nn.Linear(2048, num_classes)
)
self.aux_stage2 = nn.Sequential(
nn.MaxPool2d(5, 3, padding=0),
nn.Conv2d(528, 1024, kernel_size=(1, 1), stride=(1, 1), padding=0),
nn.ReLU(),
nn.AdaptiveAvgPool2d(output_size=(2, 2)),
nn.Flatten(1),
nn.Linear(4096, 2048),
nn.Dropout(p=0.4),
nn.ReLU(),
nn.Linear(2048, num_classes)
)
else:
self.aux_stage1 = None
self.aux_stage2 = None
def forward(self, x):
"""
前向过程
:param x: [N,3,H,W]
:return:
"""
z1 = self.stage1(x) # [N,3,H,W] -> [N,512,H1,W1]
z2 = self.stage2(z1) # [N,512,H1,W1] -> [N,528,H2,W2]
z3 = self.stage3(z2) # [N,528,H2,W2] -> [N,1024,1,1]
# 三个决策分支的输出
# scores3 = self.classify(z3)[:, :, 0, 0] # [N,1024,1,1] -> [N,num_classes,1,1] -> [N,num_classes]
scores3 = torch.squeeze(self.classify(z3)) # [N,1024,1,1] -> [N,num_classes,1,1] -> [N,num_classes]
if self.aux_stage1 is not None:
scores1 = self.aux_stage1(z1)
scores2 = self.aux_stage2(z2)
return scores1, scores2, scores3
else:
return scores3