问题描述
我想将用户输入的阿拉伯语答案翻译为英语,然后在模型中进行训练,但是如果有人可以帮助我,则该方法无效!
from flask import Flask,render_template,url_for,request,jsonify
from sklearn.feature_extraction.text import CountVectorizer,TfidfVectorizer
import pickle
import numpy as np
import googletrans
from googletrans import Translator
app = Flask(__name__)`
# Load the TF-IDF vocabulary specific to the category
with open(r"toxic_vect.pkl","rb") as f:
tox = pickle.load(f)
with open(r"severe_toxic_vect.pkl","rb") as f:
sev = pickle.load(f)
with open(r"obscene_vect.pkl","rb") as f:
obs = pickle.load(f)
with open(r"insult_vect.pkl","rb") as f:
ins = pickle.load(f)
with open(r"threat_vect.pkl","rb") as f:
thr = pickle.load(f)
with open(r"identity_hate_vect.pkl","rb") as f:
ide = pickle.load(f)`
# Load the pickled RDF models
with open(r"toxic_model.pkl","rb") as f:
tox_model = pickle.load(f)
with open(r"severe_toxic_model.pkl","rb") as f:
sev_model = pickle.load(f)
with open(r"obscene_model.pkl","rb") as f:
obs_model = pickle.load(f)
with open(r"insult_model.pkl","rb") as f:
ins_model = pickle.load(f)
with open(r"threat_model.pkl","rb") as f:
thr_model = pickle.load(f)
with open(r"identity_hate_model.pkl","rb") as f:
ide_model = pickle.load(f)`
# Render the HTML file for the home page
@app.route("/")
def home():
return render_template('index_toxic.html')
@app.route("/predict",methods=['POST'])
def predict():
# Take a string input from user
data = request.form['text']
#translate data to english
translator = Translator()
result = translator.translate(data)
data = result.text
data = [data]
vect = tox.transform(data)
pred_tox = tox_model.predict_proba(vect)[:,1]
vect = sev.transform(data)
pred_sev = sev_model.predict_proba(vect)[:,1]
vect = obs.transform(data)
pred_obs = obs_model.predict_proba(vect)[:,1]
vect = thr.transform(data)
pred_thr = thr_model.predict_proba(vect)[:,1]
vect = ins.transform(data)
pred_ins = ins_model.predict_proba(vect)[:,1]
vect = ide.transform(data)
pred_ide = ide_model.predict_proba(vect)[:,1]
out_tox = round(pred_tox[0],2)
out_sev = round(pred_sev[0],2)
out_obs = round(pred_obs[0],2)
out_ins = round(pred_ins[0],2)
out_thr = round(pred_thr[0],2)
out_ide = round(pred_ide[0],2)
print(out_tox)
return render_template('index_toxic.html',pred_tox='Prob (Toxic): {}'.format(out_tox),pred_sev='Prob (Severe Toxic): {}'.format(out_sev),pred_obs='Prob (Obscene): {}'.format(out_obs),pred_ins='Prob (Insult): {}'.format(out_ins),pred_thr='Prob (Threat): {}'.format(out_thr),pred_ide='Prob (Identity Hate): {}'.format(out_ide)
)
# Server reloads itself if code changes so no need to keep restarting:
app.run(debug=True)
File "C:\Users\elidr\.conda\envs\toxic\Lib\site-packages\flask\app.py",line 2464,in __call__
return self.wsgi_app(environ,start_response)
File "C:\Users\elidr\.conda\envs\toxic\Lib\site-packages\flask\app.py",line 2450,in wsgi_app
response = self.handle_exception(e)
File "C:\Users\elidr\.conda\envs\toxic\Lib\site-packages\flask\app.py",line 1867,in handle_exception
reraise(exc_type,exc_value,tb)
File "C:\Users\elidr\.conda\envs\toxic\Lib\site-packages\flask\_compat.py",line 39,in reraise
raise value
File "C:\Users\elidr\.conda\envs\toxic\Lib\site-packages\flask\app.py",line 2447,in wsgi_app
response = self.full_dispatch_request()
File "C:\Users\elidr\.conda\envs\toxic\Lib\site-packages\flask\app.py",line 1952,in full_dispatch_request
rv = self.handle_user_exception(e)
File "C:\Users\elidr\.conda\envs\toxic\Lib\site-packages\flask\app.py",line 1821,in handle_user_exception
reraise(exc_type,line 1950,in full_dispatch_request
rv = self.dispatch_request()
File "C:\Users\elidr\.conda\envs\toxic\Lib\site-packages\flask\app.py",line 1936,in dispatch_request
return self.view_functions[rule.endpoint](**req.view_args)
File "C:\Users\elidr\Desktop\PFE - copie\toxic_comments_classifier-master\toxic_comments_classifier-master\Flask app for toxic comments\toxic_app.py",line 66,in predict
vect = tox.transform(data)
File "C:\Users\elidr\.conda\envs\toxic\Lib\site-packages\sklearn\feature_extraction\text.py",line 1898,in transform
X = super().transform(raw_documents)
File "C:\Users\elidr\.conda\envs\toxic\Lib\site-packages\sklearn\feature_extraction\text.py",line 1264,in transform
raise ValueError(
ValueError: Iterable over raw text documents expected,string object received.}
解决方法
错误提示:
Iterable over raw text documents expected,string object received
致电时
vect = tox.transform(data)
在您的代码中。
从sklearn\feature_extraction\text.py,我们了解到:
raw_documents : iterable
An iterable which yields either str,unicode or file objects
因此将data
从字符串对象转换为可产生字符串的可迭代对象:
def raw_documents(data):
for line in data.splitlines():
assert isinstance(line,str)
yield line
并像这样使用它:
translator = Translator()
result = translator.translate(data)
data = result.text
vect = tox.transform(raw_documents(data))