“RuntimeError：张量的扩展大小118必须与非单维1处的现有大小135相匹配”Pytorch机器翻译

问题描述

使用此 German to English Pytorch Seq2Seq Machine Translatior，我一直在尝试使用自定义数据集创建中古英语到现代英语 Seq2Seq 机器翻译。除了创建自定义数据集之外，我虚拟地复制并粘贴了德语模板并将德语替换为我（中古英语）和英语替换为 pdf（现代英语），但是在尝试训练模型时我收到了两条错误消息.当我运行德语版本的代码时，它训练得很好。以下是我的代码和两条错误信息。

我的代码：

# imports
import torch
import torch.nn as nn
import torch.optim as optim
from torchtext import data,datasets
from torchtext.data import Field,BucketIterator,TabularDataset
import numpy as np # other useful math-related libraries and modules
import spacy # this is where he gets his nlp datasets
import spacy.cli
import random
from utils2 import translate_sentence,bleu,save_checkpoint,load_checkpoint

spacy_eng = spacy.load('en') # loading up the English tokenizer
def tokenizer_eng(text): # English tokenizer function for the fields
  return [tok.text for tok in spacy_eng.tokenizer(text)]

# constructing the fields for pde and me
me = Field(sequential=True,use_vocab=True,tokenize=tokenizer_eng,lower=True,init_token='sos',eos_token='<eos>')
pde = Field(sequential=True,eos_token='<eos>')

fields = {'Middle English': ('me',me),'Present Day English': ('pde',pde)}

train_data,validation_data,test_data = TabularDataset.splits(
    path='/Users/user/Desktop/Code/datasets',train='train.tsv',test='test.tsv',validation='validation.tsv',format='tsv',fields=fields
)

#print(test_data[0].__dict__.keys())
#print(validation_data[1].__dict__.values())

# Building the vocabulary
me.build_vocab(train_data)
pde.build_vocab(train_data)

# Building the seq2seq and encoder decoder models
class Encoder(nn.Module): # first LSTM
    def __init__(self,input_size,embedding_size,hidden_size,num_layers,p):
        super(Encoder,self).__init__()
        self.hidden_size = hidden_size
        self.num_layers = num_layers
        self.dropout = nn.Dropout(p)
        self.embedding = nn.Embedding(input_size,embedding_size)
        self.rnn = nn.LSTM(embedding_size,dropout=p)
    def forward(self,x): # defines the vector of indices
        # x vector shape: (seq_length,N)
        embedding = self.dropout(self.embedding(x))
        # embedding vector shape: (seq_length,N,embedding_size)
        outputs,(hidden,cell) = self.rnn(embedding)
        return hidden,cell

class Decoder(nn.Module):
  def __init__(self,output_size,p):
      # output_size should be the same as input_size,because the size of the vocabulary should change.
      super(Decoder,self).__init__()
      self.hidden_size = hidden_size
      self.num_layers = num_layers

      self.dropout = nn.Dropout(p)
      self.embedding = nn.Embedding(input_size,embedding_size)
      self.rnn = nn.LSTM(embedding_size,dropout=p)
      self.fc = nn.Linear(hidden_size,output_size) # fc stands for fully connected

  def forward(self,x,hidden,cell):
      x = x.unsqueeze(0) #this adds another dimension

      embedding = self.dropout(self.embedding(x))
      # embedding shape: (1,embedding_size)

      outputs,cell) = self.rnn(embedding,cell))
      # outputs is what we think this next word should be
      # shape of the outputs: (1,hidden_size)

      predictions = self.fc(outputs)
      # shape of predictions: (1,length_of_vocab)
      # this will be sent to the loss function

      predictions = predictions.squeeze(0)

      return predictions,cell

class Seq2Seq(nn.Module): # combines the encoder and decoder
  def __init__(self,encoder,decoder):
      super(Seq2Seq,self).__init__()
      self.encoder = encoder
      self.decoder = decoder

  def forward(self,source,target,teacher_force_ratio=0.5):
      batch_size = source.shape[1]
      # batch_size should look like (trg_len,N)
      target_len = target.shape[0]
      target_vocab_size = len(pde.vocab) #make sure to change this to pde

      outputs = torch.zeros(target_len,batch_size,target_vocab_size).to(device)
      # predicts one word at a time,but each word predicts an entire batch and every prediction is a vector of the entire vocabulary size

      hidden,cell = self.encoder(source)

      x = target[0]

      for t in range(1,target_len):
          output,cell = self.decoder(x,cell)

          outputs[t] = output

          # the output will look like (N,english_vocab_size)
          best_guess = output.argmax(1)

          x = target[t] if random.random() < teacher_force_ratio else best_guess

      return outputs


# hyperparameters for the training model
num_epochs = 2
learning_rate = 3e-4
batch_size = 32 # batch size must be smaller than total amount of data

# model hyperparameters
save_model = True
load_model = False
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
input_size_encoder = len(me.vocab)
input_size_decoder = len(pde.vocab)
output_size = len(me.vocab)
encoder_embedding_size = 50
decoder_embedding_size = 50
hidden_size = 100
num_layers = 2
enc_dropout = 0.5
dec_dropout = 0.5

# writing iterators
train_iterator,valid_iterator,test_iterator = BucketIterator.splits(
    (train_data,test_data),# make sure these are in same order as declared variables
    batch_size=batch_size,sort_within_batch = True,sort_key = lambda x: len(x.me),# formerly x.src
    device=device
)

# running the encoder decoder models
encoder_net = Encoder(input_size_encoder,encoder_embedding_size,enc_dropout).to(device)
decoder_net = Decoder(input_size_decoder,decoder_embedding_size,dec_dropout).to(device)

model = Seq2Seq(encoder_net,decoder_net).to(device)
optimizer = optim.Adam(model.parameters(),lr=learning_rate)

pad_idx = pde.vocab.stoi['<pad>']
criterion = nn.CrossEntropyLoss(ignore_index=pad_idx)

if load_model:
    load_checkpoint(torch.load("my_checkpoint2.pth.tar"),model,optimizer)

# This is an example sentence.
sentence = (
    "Is owr dyner dyght?"
)

for epoch in range(num_epochs):
    print(f'Epoch [{epoch} / {num_epochs}]')

    if save_model:
        checkpoint = {
            "state_dict": model.state_dict(),"optimizer": optimizer.state_dict(),}
        save_checkpoint(checkpoint)

    model.eval()

    translated_sentence = translate_sentence(
        model,sentence,me,pde,device,max_length=50
    )

    print(f"Translated example sentence: \n {translated_sentence}")

    model.train()

    for batch_idx,batch in enumerate(train_iterator):
        inp_data = batch.me.to(device)
        target = batch.pde.to(device)
        # this was formerly batch.src and batch.trg

        output = model(inp_data,target)
        # Output is of shape (trg_len,output_dim) but Cross Entropy Loss
        # doesn't take input in that form. Here we can view it in a similar
        # way so we have output_words * batch_size that we want to send in into
        # our cost function,so we need to do some reshaping."
        output = output[1:].reshape(-1,output.shape[2])
        target = target[1:].reshape(-1)

        optimizer.zero_grad()
        loss = criterion(output,target)

        loss.backward()

        optimizer.step()

print("run complete")

错误一我认为这个错误主要与输出没有按预期成形有关，但我不知道如何使它成为正确的形状。我认为问题可能在于我如何创建数据集，我如何创建 Seq2Seq2 类，或者程序的最后一个 for 循环。

Traceback (most recent call last):
  File "/Users/user/PycharmProjects/pythonProject/ME-PDE.py",line 250,in <module>
    output = model(inp_data,target)
  File "/Users/user/.conda/envs/pythonProject/lib/python3.8/site-packages/torch/nn/modules/module.py",line 727,in _call_impl
    result = self.forward(*input,**kwargs)
  File "/Users/user/PycharmProjects/pythonProject/ME-PDE.py",line 150,in forward
    outputs[t] = output
RuntimeError: The expanded size of the tensor (118) must match the existing size (135) at non-singleton dimension 1.  Target sizes: [20,118].  Tensor sizes: [20,135]

错误二有时会随机出现此错误。我认为这可能是因为我的超参数对于我的样本量来说太大了，但我不确定为什么有时只出现这个错误消息而不是第一个。

Traceback (most recent call last):
  File "/Users/user/PycharmProjects/pythonProject/ME-PDE.py",line 235,in <module>
    translated_sentence = translate_sentence(
  File "/Users/user/PycharmProjects/pythonProject/utils2.py",line 53,in translate_sentence
    output,cell = model.decoder(prevIoUs_word,cell)
  File "/Users/user/.conda/envs/pythonProject/lib/python3.8/site-packages/torch/nn/modules/module.py",line 105,in forward
    embedding = self.dropout(self.embedding(x))
  File "/Users/user/.conda/envs/pythonProject/lib/python3.8/site-packages/torch/nn/modules/module.py",**kwargs)
  File "/Users/user/.conda/envs/pythonProject/lib/python3.8/site-packages/torch/nn/modules/sparse.py",line 124,in forward
    return F.embedding(
  File "/Users/user/.conda/envs/pythonProject/lib/python3.8/site-packages/torch/nn/functional.py",line 1852,in embedding
    return torch.embedding(weight,input,padding_idx,scale_grad_by_freq,sparse)
IndexError: index out of range in self

截至目前，我在 test.tsv 中只有 25 个翻译，train.tsv 中有 20 个，validation.tsv 中有 5 个。我会得到更多，但我希望程序能够首先成功运行。来自validation.tsv的示例

Middle English  Present Day English
Than make I buter ferther on the day    Then I make butter later in the day.
Ye wold say,'they be prowde!'  You would say,‘they are proud!’
Whyll yow slepe fulle stylle,While you sleep soundly,Kype wylle owr chelderne and let them not wepe. Keep our children well and don’t let them weep.
Yet I have not a feyr word whan that I have done.   Yet I don’t get any kind words when I have done that.

非常感谢您提供的任何帮助！

解决方法

暂无找到可以解决该程序问题的有效方法，小编努力寻找整理中！

如果你已经找到好的解决方法，欢迎将解决方案带上本链接一起发送给小编。

小编邮箱:dio#foxmail.com (将#修改为@）

machine-translation python pytorch recurrent-neural-network tensor