Inception V2 没有改进 Inception V1

问题描述

我正在学习 d2l 一书，GoogLeNet 章节中的一个练习是从 Rethinking the Inception Architecture for Computer Vision 论文中获取模型，所以我这样做并基本上重新创建了基于 Inception V1 架构的 Inception V2 架构在 d2l 书中提出，我唯一没有包括的是论文中描述的网格减少。我希望这会改善我的模型的结果，但如果有的话，它会使它们变得更糟。我正在使用关于 fashion_mnist 数据集的 GoogLeNet 章节中提供的 d2l 代码测试模型，并使用 Inception V1 获得了 0.908 的测试准确率，但是在我的 V2 实现中，我只得到了 0.885 的测试acc，这是我所能达到的最高值得到。我希望 V2 模型会更好，所以我真的不知道我哪里出错了，如果我在我的代码中犯了错误或什么，但是对 V2 架构或论文的任何见解都将不胜感激。

这是前 3 个 inception 块的代码（来自论文中的图 5）

class InceptionB1(nn.Module):
    # `c1`--`c4` are the number of output channels for each path
    def __init__(self,in_channels,c1,c2,c3,c4,**kwargs):
        super(InceptionB1,self).__init__(**kwargs)
        # Path 1 is a single 1 x 1 convolutional layer
        self.p1_1 = nn.Conv2d(in_channels,kernel_size=1)
        # Path 2 is a 1 x 1 convolutional layer followed by a 3 x 3
        # convolutional layer
        self.p2_1 = nn.Conv2d(in_channels,c2[0],kernel_size=1)
        self.p2_2 = nn.Conv2d(c2[0],c2[1],kernel_size=3,padding=1)
        # Path 3 is a 1 x 1 convolutional layer followed by 2 3x3 convs (factorization)
        # convolutional layer
        self.p3_1 = nn.Conv2d(in_channels,c3[0],kernel_size=1)
        self.p3_2 = nn.Conv2d(c3[0],c3[1],padding=1)
        # Replacing path3_2 with modified path using factorization through smaller convolutions
        self.p3_3 = nn.Conv2d(c3[1],padding = 1)
        # Path 4 is a 3 x 3 maximum pooling layer followed by a 1 x 1
        # convolutional layer
        self.p4_1 = nn.MaxPool2d(kernel_size=3,stride=1,padding=1)
        self.p4_2 = nn.Conv2d(in_channels,kernel_size=1)

    def forward(self,x):
        p1 = F.relu(self.p1_1(x))
        p2 = F.relu(self.p2_2(F.relu(self.p2_1(x))))
        p3 = F.relu(self.p3_3(self.p3_2(F.relu(self.p3_1(x)))))
        p4 = F.relu(self.p4_2(self.p4_1(x)))
        # Concatenate the outputs on the channel dimension
        return torch.cat((p1,p2,p3,p4),dim=1)

这是我对中间 5 个块的代码（论文中的图 6）

class InceptionB2(nn.Module):
    # `c1`--`c4` are the number of output channels for each path
    def __init__(self,**kwargs):
        super(InceptionB2,kernel_size=1)
        # Path 2 is a 1 x 1 convolutional layer followed by a 1 x 3 then 3x1 conv
        # convolutional layer
        self.p2_1 = nn.Conv2d(in_channels,kernel_size=(1,3),padding=1)
        self.p2_3 = nn.Conv2d(c2[1],kernel_size=(3,1),padding=0)
        # Path 3 is a 1 x 1 conv layer followed by a 1x3 conv then 3x1 then 1x3 then 3x1
        # convolutional layer
        self.p3_1 = nn.Conv2d(in_channels,padding=1)
        self.p3_3 = nn.Conv2d(c3[1],padding=0)
        self.p3_4 = nn.Conv2d(c3[1],padding=0)
        self.p3_5 = nn.Conv2d(c3[1],padding=1)
        # Path 4 is a 3 x 3 maximum pooling layer followed by a 1 x 1
        # convolutional layer
        self.p4_1 = nn.MaxPool2d(kernel_size=3,x):
        p1 = F.relu(self.p1_1(x))
        p2 = F.relu(self.p2_3(F.relu(self.p2_2(F.relu(self.p2_1(x))))))
        p3 = F.relu(self.p3_5(F.relu(self.p3_4(F.relu(self.p3_3(F.relu(self.p3_2(F.relu(self.p3_1(x))))))))))
        p4 = F.relu(self.p4_2(self.p4_1(x)))
        # Concatenate the outputs on the channel dimension
        return torch.cat((p1,dim=1)

这是我最后 2 个块的代码（论文中的图 7）

class InceptionB3(nn.Module):
    # `c1`--`c4` are the number of output channels for each path
    def __init__(self,**kwargs):
        super(InceptionB3,kernel_size=1)
        # Path 2 is a 1 x 1 convolutional layer followed by a 1x3 and 3x1
        # convolutional layer
        self.p2_1 = nn.Conv2d(in_channels,kernel_size=1)
        self.p2_2_1 = nn.Conv2d(c2[0],padding=0)
        self.p2_2_2 = nn.Conv2d(c2[0],padding=0)
        # Path 3 is a 1 x 1 convolutional layer followed by a 3x3 then 1x3 and 3x1
        # convolutional layer
        self.p3_1 = nn.Conv2d(in_channels,padding=1)
        self.p3_3_1 = nn.Conv2d(c3[1],padding=0)
        self.p3_3_2 = nn.Conv2d(c3[1],padding=0)
        # Path 4 is a 3 x 3 maximum pooling layer followed by a 1 x 1
        # convolutional layer
        self.p4_1 = nn.MaxPool2d(kernel_size=3,x):
        p1 = F.relu(self.p1_1(x))

        p2_1 = F.relu(self.p2_2_1(F.relu(self.p2_1(x))))
        p2_2 = F.relu(elf.p2_2_2(F.relu(self.p2_1(x))))
        p2 = torch.matmul(p2_1,p2_2)

        p3_1 = F.relu(self.p3_2(F.relu(self.p3_1(x))))
        p3_2_1 = F.relu(self.p3_3_1(p3_1))
        p3_2_2 = F.relu(sself.p3_3_2(p3_1))
        p3 = torch.matmul(p3_2_1,p3_2_2)

        p4 = F.relu(self.p4_2(self.p4_1(x)))
        # Concatenate the outputs on the channel dimension
        return torch.cat((p1,dim=1)

这是d2l章节的链接 https://d2l.ai/chapter_convolutional-modern/googlenet.html

This is the training graph for the v1 model

This is the training graph for the v2 model

解决方法

暂无找到可以解决该程序问题的有效方法，小编努力寻找整理中！

如果你已经找到好的解决方法，欢迎将解决方案带上本链接一起发送给小编。

小编邮箱:dio#foxmail.com (将#修改为@）

computer-vision conv-neural-network d2l deep-learning machine-learning