大步走上Conv2d与ConvTranspose2d

问题描述

我正在尝试在CIFAR10上学习自动编码器。我已经在下面的代码中按顺序编写了代码，并且效果很好：

class Autoencoder(nn.Module):
def __init__(self):
    super(Autoencoder,self).__init__()
    self.Flatten = Flatten()
    
    # Input size: [batch,3,32,32]
    # Output size: [batch,32]
    self.encoder = nn.Sequential(
        nn.Conv2d(3,12,4,stride=2,padding=1),# [batch,16,16]
        nn.ReLU(),nn.Conv2d(12,24,8,8]
        nn.ReLU(),nn.Conv2d(24,48,4]
        ReLU(),)
    self.decoder = nn.Sequential(
        
        
        nn.ConvTranspose2d(48,nn.ConvTranspose2d(24,nn.ConvTranspose2d(12,32]
        nn.Sigmoid(),)

def forward(self,x):
    encoded = self.encoder(x)
    
    decoded = self.decoder(encoded)
    return encoded,decoded

我决定添加一个线性层以减小输出尺寸。为此，我重写了上面的代码，如下所示（这次避免使用顺序代码）：

class Autoencoder(nn.Module):

def __init__(self):
    super(Autoencoder,self).__init__()
    
    # Encoder architecture
    self.conv_1 = nn.Conv2d(3,kernel_size=3,padding=1)
    self.conv_2 = nn.Conv2d(12,padding=1)
    self.conv_3 = nn.Conv2d(24,padding=1)
    #Decodere architecture: convolutional transpose layers or deconvolutional layers + upsampling pooling layers
    self.de_conv_3 = nn.ConvTranspose2d(48,padding=1,stride=2)
    self.de_conv_2 = nn.ConvTranspose2d(24,stride=2)
    self.de_conv_1 = nn.ConvTranspose2d(12,stride=2)
    
    self.linear1 = nn.Linear(48*4*4,10)
    self.linear2 = nn.Linear(10,48*4*4)
    self.sigmoid = nn.Sigmoid()
    

def forward(self,images):
    code = self.encode(images)
    out = self.decode(code)
    return code,out

def encode(self,images):
    #print('input shape',images.shape)
    code = self.conv_1(images)
    code = F.relu(code)
    print("conv1 output",code.shape)
   
    code = self.conv_2(code)
    code = F.relu(code)
    #print('batch batch_norm shape',code.shape)
    print("conv2 output",code.shape)
    
    code = self.conv_3(code)
    code = F.relu(code)
    print("conv3 output",code.shape)
    
    code = code.view(code.size(0),48 * 4 * 4)
    print('code after view',code.shape)
    #add a linear layer that flattens and also gives a 10 
    code = F.relu(self.linear1(code))
    print('code after linear1',code.shape)
    return code#[100,10]


def decode(self,code):
    code = self.linear2(code)
    print('code after linear2',code.shape)
    #reshape to conv
    code = code.view(100,4)
    print('code after view',code.shape)
    #upsample,followed by a conv layer,with relu activation function  
    #this function is called `upsample` in some PyTorch versions
    
   
    code = F.relu(self.de_conv_3(code))
    print('deconv3 output',code.shape)
    code = F.relu(self.de_conv_2(code))
    print('deconv2 output',code.shape)
    
    code = self.de_conv_1(code)
    print("deconv1 output",code.shape)
    out = self.sigmoid(code)
    #print('final output',out.shape)
    
    return out

但是我得到一个错误，当我打印每一层的输出时，我看到解码器构造的形状与编码器输出的形状不同。我很困惑为什么我没有得到与顺序编写的第一个模型相同的结果。是因为卷积转置层中的stride = 2可能工作不同吗？但是为什么不按顺序编写。这些是打印解码器层形状的结果：

conv1 output torch.Size([100,16])
conv2 output torch.Size([100,8])
conv3 output torch.Size([100,4])
code after view torch.Size([100,768])
code after linear1 torch.Size([100,10])
code after linear2 torch.Size([100,768])
code after view torch.Size([100,4])
deconv3 output torch.Size([100,7,7])
deconv2 output torch.Size([100,13,13])
deconv1 output torch.Size([100,25,25])

解决方法

暂无找到可以解决该程序问题的有效方法，小编努力寻找整理中！

如果你已经找到好的解决方法，欢迎将解决方案带上本链接一起发送给小编。

小编邮箱:dio#foxmail.com (将#修改为@）

pytorch sequential stride