问题描述
从 3 位诗人的诗歌数据集中,数据分别被标记为 1,2,3,分别是诗人 1、诗人 2 和诗人 3。 现在我们要从他们的诗歌数据集中对这 3 位诗人进行分类。波纹管,我已经使用 sklearn 分享了我的 RNN 随机分类器模型。现在我的问题是如何编写一个预测方法,我可以手动给出一首诗,它会对诗人的名字进行分类?请帮忙。
代码如下:
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.ensemble import RandomForestClassifier
from sklearn.pipeline import Pipeline
from sklearn.metrics import accuracy_score
def accuracy_summary(pipeline,X_train,y_train,X_test,y_test):
poet_fit = pipeline.fit(X_train,y_train)
y_pred = poet_fit.predict(X_test)
accuracy = accuracy_score(y_test,y_pred)
print("accuracy score: {0:.2f}%".format(accuracy*100))
return accuracy
cv = CountVectorizer()
rf = RandomForestClassifier(class_weight="balanced")
n_features = np.arange(10000,25001,5000)
def nfeature_accuracy_checker(vectorizer=cv,n_features=n_features,stop_words=None,ngram_range=(1,3),classifier=rf):
result = []
print(classifier)
print("\n")
for n in n_features:
vectorizer.set_params(stop_words=stop_words,max_features=n,ngram_range=ngram_range)
checker_pipeline = Pipeline([
('vectorizer',vectorizer),('classifier',classifier)
])
print("Test result for {} features".format(n))
nfeature_accuracy = accuracy_summary(checker_pipeline,y_test)
result.append((n,nfeature_accuracy))
return result
from sklearn.feature_extraction.text import TfidfVectorizer
tfidf = TfidfVectorizer(encoding='utf-8')
print("Result for trigram with stop words (Tfidf)\n")
feature_result_tgt = nfeature_accuracy_checker(vectorizer=tfidf,2))
from sklearn.metrics import classification_report
cv = CountVectorizer(max_features=20000,2))
pipeline = Pipeline([
('vectorizer',cv),rf)
])
poet_fit = pipeline.fit(X_train,y_train)
y_pred = poet_fit.predict(X_test)
print(classification_report(y_test,y_pred,target_names=['poet 1','poet 2','poet 3']))
print('accuracy %s' % accuracy_score(y_pred,y_test))
print(y_pred)
解决方法
暂无找到可以解决该程序问题的有效方法,小编努力寻找整理中!
如果你已经找到好的解决方法,欢迎将解决方案带上本链接一起发送给小编。
小编邮箱:dio#foxmail.com (将#修改为@)