无法在 Google AI Platform 上部署 pytorch 文本分类器模型，错误：无法从“prediction_server

问题描述

我遵循了这个 article，尝试在 Google AI 平台上部署 PyTorch 文本分类器，得到以下错误。

创建版本失败。错误模型检测到错误：“加载失败模型：加载模型时出现意外错误：无法获取属性 main' 上的 'TextPreprocessor' 来自 'prediction_server_beta.py'>（错误代码：0）”

我通过GUI部署模型，这里是设置

!gcloud ai-platform versions create {v17} \
    --model {pytorch_text_classfier} \
    --origin=gs://pytorch_text_classfier_package \
    --python-version=3.7 \
    --runtime-version={1.15} \
    --framework ={"Custom prediction routine (BETA)"} \
    --package-uris=gs://pytorch_model_distribution_package_v3/my_package-1.7.tar.gz\
    --machine-type=mls1-c4-m4 \
    --prediction-class=model_prediction.CustomModelPrediction

我向存储桶上传了五个文件，model_prediction.py、preprocess.py、processor_state.pkl、setup.py、torch_model.py、torch_saved_model.pt。 tar.gz 文件在另一个存储桶中。

有没有人知道如何解决这个错误？

下面是model_prediction.py、preprocess.py、torch_model.py、setup.py代码。

model_prediction.py

import os
import pickle
import numpy as np
import torch
import torch.nn as nn
from torch.autograd import Variable
from tensorflow.python.keras.preprocessing import sequence
from tensorflow.keras.preprocessing import text
from preprocess import TextPreprocessor
from torch_model import TorchTextClassifier

device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')


class CustomModelPrediction(object):
 def __init__(self,model,processor):
   self._model = model
   self._processor = processor

 def _postprocess(self,predictions):
   labels = ['no confusion','confusion']
   label_indexes = [np.argmax(prediction) for prediction in predictions.detach().numpy()]
   return [labels[label_index] for label_index in label_indexes]

 def predict(self,instances,**kwargs):
   preprocessed_data = self._processor.transform(instances)
   predictions =  self._model(Variable(torch.Tensor(preprocessed_data).long()))
   labels = self._postprocess(predictions)
   return labels

 @classmethod
 def from_path(cls,model_dir):

  import torch
  import torch_model

  state_dict = torch.load(os.path.join(model_dir,'torch_saved_model.pt'),map_location= device)
  print(f'Model loaded from <== {model_dir}')
  model = TorchTextClassifier().to(device)
  model.load_state_dict(state_dict['model_state_dict'])
  model.eval()
  print('set to eval mode')  
  with open(os.path.join(model_dir,'processor_state.pkl'),'rb') as f:
      processor = pickle.load(f)
  print('loaded processor')
  return cls(model,processor)

proprocess.py

from tensorflow.python.keras.preprocessing import sequence
from tensorflow.keras.preprocessing import text


class TextPreprocessor(object):
 def __init__(self,vocab_size,max_sequence_length):
   self._vocab_size = vocab_size
   self._max_sequence_length = max_sequence_length
   self._tokenizer = None

 def fit(self,text_list):       
   # Create vocabulary from input corpus.
   tokenizer = text.Tokenizer(num_words=self._vocab_size)
   tokenizer.fit_on_texts(text_list)
   self._tokenizer = tokenizer

 def transform(self,text_list):       
   # Transform text to sequence of integers
   text_sequence = self._tokenizer.texts_to_sequences(text_list)

   # Fix sequence length to max value. Sequences shorter than the length are
   # padded in the beginning and sequences longer are truncated
   # at the beginning.
   padded_text_sequence = sequence.pad_sequences(text_sequence,maxlen=self._max_sequence_length,padding='post')
   return padded_text_sequence

setup.py

from setuptools import setup

setup(
 name="my_package",version="1.6",include_package_data=True,scripts=["preprocess.py","model_prediction.py","torch_model.py"],install_requires=['torch @ https://download.pytorch.org/whl/cpu/torch-1.7.0%2Bcpu-cp37-cp37m-linux_x86_64.whl']
)

torch_model.py

import torch
import torch.nn as nn
from torch.autograd import Variable
from tensorflow.python.keras.preprocessing import sequence
from tensorflow.keras.preprocessing import text

text_field_vocab_length = 15832  # this is set based on training data


class TorchTextClassifier(nn.Module):

    def __init__(self,dimension=128):
        super(TorchTextClassifier,self).__init__()

        self.embedding = nn.Embedding(text_field_vocab_length,300)
        self.dimension = dimension
        self.lstm = nn.LSTM(input_size=300,hidden_size=dimension,num_layers=1,batch_first=True,bidirectional=True)
        self.drop = nn.Dropout(p=0.5)

        self.fc = nn.Linear(2*dimension,2)

    def forward(self,text,text_len):

        text_emb = self.embedding(text)

        packed_input = pack_padded_sequence(text_emb,text_len,enforce_sorted=False)
        packed_output,_ = self.lstm(packed_input)
        output,_ = pad_packed_sequence(packed_output,batch_first=True)

        out_forward = output[range(len(output)),text_len - 1,:self.dimension]
        out_reverse = output[:,self.dimension:]
        out_reduced = torch.cat((out_forward,out_reverse),1)
        text_fea = self.drop(out_reduced)

        text_fea = self.fc(text_fea)
        text_fea = torch.squeeze(text_fea,1)
        text_out = torch.softmax(text_fea,dim=-1)

        return text_out

解决方法

暂无找到可以解决该程序问题的有效方法，小编努力寻找整理中！

如果你已经找到好的解决方法，欢迎将解决方案带上本链接一起发送给小编。

小编邮箱:dio#foxmail.com (将#修改为@）

google-ai-platform pytorch

无法在 Google AI Platform 上部署 pytorch 文本分类器模型，错误：无法从“prediction_server_beta.py”中获取 <module 'main' 上的属性>

问题描述

解决方法