如何将用户用阿拉伯语输入的答案翻译成英语

问题描述

我想将用户输入的阿拉伯语答案翻译为英语,然后在模型中进行训练,但是如果有人可以帮助我,则该方法无效!

from flask import Flask,render_template,url_for,request,jsonify      
from sklearn.feature_extraction.text import CountVectorizer,TfidfVectorizer 
import pickle
import numpy as np
import googletrans
from googletrans import Translator
app = Flask(__name__)`

# Load the TF-IDF vocabulary specific to the category

with open(r"toxic_vect.pkl","rb") as f:
   tox = pickle.load(f)

with open(r"severe_toxic_vect.pkl","rb") as f:
   sev = pickle.load(f)

with open(r"obscene_vect.pkl","rb") as f:
   obs = pickle.load(f)

with open(r"insult_vect.pkl","rb") as f:
   ins = pickle.load(f)

with open(r"threat_vect.pkl","rb") as f:
   thr = pickle.load(f)

with open(r"identity_hate_vect.pkl","rb") as f:
   ide = pickle.load(f)`

# Load the pickled RDF models

with open(r"toxic_model.pkl","rb") as f:
   tox_model = pickle.load(f)

with open(r"severe_toxic_model.pkl","rb") as f:
   sev_model = pickle.load(f)

with open(r"obscene_model.pkl","rb") as f:
   obs_model  = pickle.load(f)

with open(r"insult_model.pkl","rb") as f:
   ins_model  = pickle.load(f)

with open(r"threat_model.pkl","rb") as f:
   thr_model  = pickle.load(f)

with open(r"identity_hate_model.pkl","rb") as f:
   ide_model  = pickle.load(f)`

# Render the HTML file for the home page

@app.route("/")
def home():
   return render_template('index_toxic.html')

@app.route("/predict",methods=['POST'])
def predict():

   # Take a string input from user
   data = request.form['text']

   #translate data to english
   translator = Translator()
   result = translator.translate(data)
   data = result.text
   data = [data]

   vect = tox.transform(data)
   pred_tox = tox_model.predict_proba(vect)[:,1]

   vect = sev.transform(data)
   pred_sev = sev_model.predict_proba(vect)[:,1]

   vect = obs.transform(data)
   pred_obs = obs_model.predict_proba(vect)[:,1]

   vect = thr.transform(data)
   pred_thr = thr_model.predict_proba(vect)[:,1]

   vect = ins.transform(data)
   pred_ins = ins_model.predict_proba(vect)[:,1]

   vect = ide.transform(data)
   pred_ide = ide_model.predict_proba(vect)[:,1]

   out_tox = round(pred_tox[0],2)
   out_sev = round(pred_sev[0],2)
   out_obs = round(pred_obs[0],2)
   out_ins = round(pred_ins[0],2)
   out_thr = round(pred_thr[0],2)
   out_ide = round(pred_ide[0],2)

   print(out_tox)

   return render_template('index_toxic.html',pred_tox='Prob (Toxic): {}'.format(out_tox),pred_sev='Prob (Severe Toxic): {}'.format(out_sev),pred_obs='Prob (Obscene): {}'.format(out_obs),pred_ins='Prob (Insult): {}'.format(out_ins),pred_thr='Prob (Threat): {}'.format(out_thr),pred_ide='Prob (Identity Hate): {}'.format(out_ide)                        
                         )
 
# Server reloads itself if code changes so no need to keep restarting:

app.run(debug=True)

这是我在html页面中看到的错误

File "C:\Users\elidr\.conda\envs\toxic\Lib\site-packages\flask\app.py",line 2464,in __call__
    return self.wsgi_app(environ,start_response)
File "C:\Users\elidr\.conda\envs\toxic\Lib\site-packages\flask\app.py",line 2450,in wsgi_app
    response = self.handle_exception(e)
File "C:\Users\elidr\.conda\envs\toxic\Lib\site-packages\flask\app.py",line 1867,in handle_exception
    reraise(exc_type,exc_value,tb)
File "C:\Users\elidr\.conda\envs\toxic\Lib\site-packages\flask\_compat.py",line 39,in reraise
    raise value
File "C:\Users\elidr\.conda\envs\toxic\Lib\site-packages\flask\app.py",line 2447,in wsgi_app
    response = self.full_dispatch_request()
File "C:\Users\elidr\.conda\envs\toxic\Lib\site-packages\flask\app.py",line 1952,in full_dispatch_request
    rv = self.handle_user_exception(e)
File "C:\Users\elidr\.conda\envs\toxic\Lib\site-packages\flask\app.py",line 1821,in handle_user_exception
    reraise(exc_type,line 1950,in full_dispatch_request
    rv = self.dispatch_request()
File "C:\Users\elidr\.conda\envs\toxic\Lib\site-packages\flask\app.py",line 1936,in dispatch_request
    return self.view_functions[rule.endpoint](**req.view_args)
File "C:\Users\elidr\Desktop\PFE - copie\toxic_comments_classifier-master\toxic_comments_classifier-master\Flask app for toxic comments\toxic_app.py",line 66,in predict
   vect = tox.transform(data)
File "C:\Users\elidr\.conda\envs\toxic\Lib\site-packages\sklearn\feature_extraction\text.py",line 1898,in transform
    X = super().transform(raw_documents)
File "C:\Users\elidr\.conda\envs\toxic\Lib\site-packages\sklearn\feature_extraction\text.py",line 1264,in transform
    raise ValueError(
ValueError: Iterable over raw text documents expected,string object received.}

解决方法

错误提示:

Iterable over raw text documents expected,string object received

致电时

vect = tox.transform(data)

在您的代码中。

sklearn\feature_extraction\text.py,我们了解到:

raw_documents : iterable
            An iterable which yields either str,unicode or file objects

因此将data从字符串对象转换为可产生字符串的可迭代对象:

def raw_documents(data):
    for line in data.splitlines():
        assert isinstance(line,str)
        yield line

并像这样使用它:

translator = Translator()
result = translator.translate(data)
data = result.text
vect = tox.transform(raw_documents(data))

相关问答

Selenium Web驱动程序和Java。元素在(x,y)点处不可单击。其...
Python-如何使用点“。” 访问字典成员?
Java 字符串是不可变的。到底是什么意思?
Java中的“ final”关键字如何工作?(我仍然可以修改对象。...
“loop:”在Java代码中。这是什么,为什么要编译?
java.lang.ClassNotFoundException:sun.jdbc.odbc.JdbcOdbc...