问题描述
我有一个数据集,我需要对其进行特征选择,因此我将有 4 个不同的模型,我需要使用多数票。在那之前它一直在工作,但现在我需要使用 gridsearch 来检查我的模型的参数,但我遇到了困难。如果有人可以帮助我,我将不胜感激。
from sklearn import datasets
from sklearn.model_selection import train_test_split
from sklearn.model_selection import gridsearchcv
from sklearn.metrics import classification_report
from pandas import read_csv
from mlxtend.classifier import EnsembleVoteClassifier
from sklearn.metrics import accuracy_score
from sklearn2pmml import make_pmml_pipeline
from sklearn2pmml import sklearn2pmml
from sklearn.compose import ColumnTransformer,make_column_transformer
from sklearn.pipeline import Pipeline
from sklearn.impute import SimpleImputer
from sklearn.preprocessing import StandardScaler,OneHotEncoder
from sklearn2pmml.pipeline import PMMLPipeline
from sklearn.ensemble._voting import VotingClassifier
from mlxtend.feature_selection import ColumnSelector
from sklearn.metrics import roc_auc_score
from sklearn.metrics import cohen_kappa_score
from sklearn.metrics import f1_score
from sklearn.metrics import matthews_corrcoef
from sklearn.metrics.cluster import fowlkes_mallows_score
from sklearn.metrics import precision_recall_curve
from sklearn.metrics import average_precision_score
from sklearn.model_selection import KFold
from sklearn.metrics import precision_score #precision
from sklearn.metrics import recall_score #recall
import numpy as np
from sklearn.metrics import precision_recall_fscore_support
from sklearn.ensemble import RandomForestClassifier
print(__doc__)
fold1_train = 'D:/ARTIGO/TREINAMENTO.CSV'
df_fold1_train = read_csv(fold1_train,header=None)
data_fold1_train = df_fold1_train.values
fold1_test = 'D:/ARTIGO/TESTE.CSV'
df_fold1_test = read_csv(fold1_test,header=None)
data_fold1_test = df_fold1_test.values
X_train_fold1 = data_fold1_train[:,:-1]
y_train_fold1 = data_fold1_train[:,-1]
X_test_fold1 = data_fold1_test[:,:-1]
y_test_fold1 = data_fold1_test[:,-1]
#features selection
features1 = [2,5,7]
features2 = [0,1,4,7]
features3 = [0,6]
features4 = [1,4]
numeric_transformer = Pipeline(steps=[('scaler',StandardScaler())])
preprocessor1 = ColumnTransformer(transformers=[('numerical',numeric_transformer,features1)])
preprocessor2 = ColumnTransformer(transformers=[('numerical',features2)])
preprocessor3 = ColumnTransformer(transformers=[('numerical',features3)])
preprocessor4 = ColumnTransformer(transformers=[('numerical',features4)])
#RandomForest
pipeline = PMMLPipeline([
('classifier',VotingClassifier([
("pipe1",Pipeline(steps=[('preprocessor1',preprocessor1),('classifier1',SVC())])),("pipe2",Pipeline(steps=[('preprocessor2',preprocessor2),('classifier2',("pipe3",Pipeline(steps=[('preprocessor3',preprocessor3),('classifier3',("pipe4",Pipeline(steps=[('preprocessor4',preprocessor4),('classifier4',SVC())]))
]))
])
# Set the parameters by cross-validation
tuned_parameters = [{'classifier__kernel': ['rbf','linear']
#,'pipe1__gamma': [1e-3,1e-4]
}]
scores = ['precision']
for score in scores:
print("# Tuning hyper-parameters for %s" % score)
print()
cv = KFold(n_splits=5)
clf = gridsearchcv(
estimator=pipeline,param_grid=tuned_parameters,n_jobs=-1,verbose=1,cv=cv,scoring='%s_macro' % score
)
clf.fit(X_train_fold1,y_train_fold1)
print("Best parameters set found on development set:")
print()
print(clf.best_params_)
print()
print("Grid scores on development set:")
print()
means = clf.cv_results_['mean_test_score']
stds = clf.cv_results_['std_test_score']
for mean,std,params in zip(means,stds,clf.cv_results_['params']):
print("%0.3f (+/-%0.03f) for %r"
% (mean,std * 2,params))
print()
print("Detailed classification report:")
print()
print("The model is trained on the full development set.")
print("The scores are computed on the full evaluation set.")
print()
y_true,y_pred = y_test_fold1,clf.predict(X_test_fold1)
print(classification_report(y_true,y_pred))
print()
ValueError: Invalid parameter kernel for estimator VotingClassifier(estimators=[('pipe1',ColumnTransformer(transformers=[('numerical',Pipeline(steps=[('scaler',StandardScaler())]),[2,7])])),('pipe2',[0,('...())])),('pipe3',6])])),('pipe4',[1,4])])),SVC())]))]). Check the list of available parameters with `estimator.get_params().keys()`.
"""
解决方法
暂无找到可以解决该程序问题的有效方法,小编努力寻找整理中!
如果你已经找到好的解决方法,欢迎将解决方案带上本链接一起发送给小编。
小编邮箱:dio#foxmail.com (将#修改为@)