问题描述
我正在尝试从sklearn.svm.LinearSVC
创建一个子类,以用作sklearn.model_selection.gridsearchcv
的估计量。子类具有一个额外的功能,在此示例中,该功能不执行任何操作。但是,当我运行此命令时,我遇到一个错误,但似乎无法调试。如果复制并粘贴代码并运行,它应再现以ValueError: Input contains NaN,infinity or a value too large for dtype('float64')
一旦我开始工作,我希望为方法transform_this()
添加更多功能。
有人可以告诉我我哪里出问题了吗?基于this,我首先认为是由于我的数据存在一些问题。但是,由于我已使用sklearn内置数据集重现了它,因此似乎并非如此。另外,我相信我会根据对上一个问题here的回答将其正确分类。另外,我了解到gridsearchcv似乎并没有以其他方式初始化估算器(某种程度上,它首先使用了默认参数,正如我从this post中看到的那样)
from sklearn.datasets import load_breast_cancer
from sklearn.svm import LinearSVC
from sklearn.model_selection import gridsearchcv
RANDOM_STATE = 123
class LinearSVCSub(LinearSVC):
def __init__(self,penalty='l2',loss='squared_hinge',additional_parameter1=1,additional_parameter2=100,dual=True,tol=0.0001,C=1.0,multi_class='ovr',fit_intercept=True,intercept_scaling=1,class_weight=None,verbose=0,random_state=None,max_iter=1000):
super(LinearSVCSub,self).__init__(penalty=penalty,loss=loss,dual=dual,tol=tol,C=C,multi_class=multi_class,fit_intercept=fit_intercept,intercept_scaling=intercept_scaling,class_weight=class_weight,verbose=verbose,random_state=random_state,max_iter=max_iter)
self.additional_parameter1 = additional_parameter1
self.additional_parameter2 = additional_parameter2
def fit(self,X,y,sample_weight=None):
X = self.transform_this(X)
super(LinearSVCSub,self).fit(X,sample_weight)
def predict(self,X):
X = self.transform_this(X)
super(LinearSVCSub,self).predict(X)
def score(self,self).score(X,sample_weight)
def decision_function(self,self).decision_function(X)
def transform_this(self,X):
return X
if __name__ == '__main__':
data = load_breast_cancer()
X,y = data.data,data.target
# Parameter tuning with custom LinearSVC
param_grid = {'C': [0.00001,0.0001,0.0005],'dual': (True,False),'random_state': [RANDOM_STATE],'additional_parameter1': [0.90,0.80,0.60,0.30],'additional_parameter2': [20,30]}
gs_model = gridsearchcv(estimator=LinearSVCSub(),verbose=1,param_grid=param_grid,scoring='roc_auc',n_jobs=-1)
gs_model.fit(X,y)
解决方法
您遇到了一些问题:
- 定义的方法没有return语句
- 您选择的数据集未与
LinearSVC
收敛
只要您更正了这些,就可以:
from sklearn.datasets import make_classification
from sklearn.svm import LinearSVC
from sklearn.model_selection import GridSearchCV
RANDOM_STATE = 123
class LinearSVCSub(LinearSVC):
def __init__(self,penalty='l2',loss='squared_hinge',additional_parameter1=1,additional_parameter2=100,dual=True,tol=0.0001,C=1.0,multi_class='ovr',fit_intercept=True,intercept_scaling=1,class_weight=None,verbose=0,random_state=None,max_iter=100000):
super(LinearSVCSub,self).__init__(penalty=penalty,loss=loss,dual=dual,tol=tol,C=C,multi_class=multi_class,fit_intercept=fit_intercept,intercept_scaling=intercept_scaling,class_weight=class_weight,verbose=verbose,random_state=random_state,max_iter=max_iter)
self.additional_parameter1 = additional_parameter1
self.additional_parameter2 = additional_parameter2
def fit(self,X,y,sample_weight=None):
X = self.transform_this(X)
super(LinearSVCSub,self).fit(X,sample_weight)
return self
def predict(self,X):
X = self.transform_this(X)
return super(LinearSVCSub,self).predict(X)
def score(self,sample_weight=None):
X = self.transform_this(X)
return super(LinearSVCSub,self).score(X,sample_weight)
def decision_function(self,self).decision_function(X)
def transform_this(self,X):
return X
X,y = make_classification()
# Parameter tuning with custom LinearSVC
param_grid = {'C': [0.00001,0.0001,0.0005],'dual': (True,False),'random_state': [RANDOM_STATE],'additional_parameter1': [0.90,0.80,0.60,0.30],'additional_parameter2': [20,30]
}
gs_model = GridSearchCV(estimator=LinearSVCSub(),verbose=1,param_grid=param_grid,scoring='roc_auc',n_jobs=1)
gs_model.fit(X,y)
Fitting 5 folds for each of 48 candidates,totalling 240 fits
[Parallel(n_jobs=1)]: Using backend SequentialBackend with 1 concurrent workers.
[Parallel(n_jobs=1)]: Done 240 out of 240 | elapsed: 0.9s finished
GridSearchCV(estimator=LinearSVCSub(),n_jobs=1,param_grid={'C': [1e-05,'additional_parameter1': [0.9,0.8,0.6,0.3],30],'random_state': [123]},verbose=1)
gs_model.predict(X)
array([0,1,1])