问题描述
我想继承sklearn.svm.LinearSVC
并将其用作sklearn.model_selection.gridsearchcv
的估计量。我之前有一些关于子类化的问题,我想我根据之前的post和选定的答案进行了修复。
但是,现在我的目标是创建一个sklearn.kernel_approximation.RBFSampler
对象作为我的新类的属性。现在这是一个示例,我在这里有一个更广泛的问题:
gridsearchcv
一起使用,如何基于传递到构造函数中的参数值创建属性(或缺少)?@H_502_13@ 到目前为止,我已经尝试过以下操作:
from sklearn.datasets import make_classification
from sklearn.svm import LinearSVC
from sklearn.model_selection import gridsearchcv
from sklearn.kernel_approximation import RBFSampler
from sklearn.datasets import load_breast_cancer
RANDOM_STATE = 123
class LinearSVCSub(LinearSVC):
def __init__(self,penalty='l2',loss='squared_hinge',sampler_gamma=None,sampler_n=None,dual=True,tol=0.0001,C=1.0,multi_class='ovr',fit_intercept=True,intercept_scaling=1,class_weight=None,verbose=0,random_state=None,max_iter=1000):
super(LinearSVCSub,self).__init__(penalty=penalty,loss=loss,dual=dual,tol=tol,C=C,multi_class=multi_class,fit_intercept=fit_intercept,intercept_scaling=intercept_scaling,class_weight=class_weight,verbose=verbose,random_state=random_state,max_iter=max_iter)
self.sampler_gamma = sampler_gamma
self.sampler_n = sampler_n
# I have also tried a conditional statement here instead of
# within a separate function create_sampler()
self.sampler = create_sampler()
def fit(self,X,y,sample_weight=None):
X = self.transform_this(X)
super(LinearSVCSub,self).fit(X,sample_weight)
return self
def predict(self,X):
X = self.transform_this(X)
return super(LinearSVCSub,self).predict(X)
def score(self,sample_weight=None):
X = self.transform_this(X)
return super(LinearSVCSub,self).score(X,sample_weight)
def decision_function(self,self).decision_function(X)
def transform_this(self,X):
if self.sampler is not None:
X = sampler.fit_transform(X)
return X
def create_sampler(self):
# If sampler_gamma and sampler_n have been given,create a sampler
if (self.sampler_gamma is not None) and (self.sampler_n is not None):
sampler = RBFSampler(gamma=self.sampler_gamma,n_components=self.sampler_n)
else:
sampler = None
return sampler
if __name__ == '__main__':
data = load_breast_cancer()
X,y = data.data,data.target
# Parameter tuning with custom LinearSVC
param_grid = {'C': [0.00001,0.0005],'dual': (True,False),'random_state': [RANDOM_STATE],'sampler_gamma': [0.90,0.60,0.30],'sampler_n': [10,200]}
gs_model = gridsearchcv(estimator=LinearSVCSub(),verbose=1,param_grid=param_grid,scoring='roc_auc',n_jobs=-1,cv=2)
gs_model.fit(X,y)
gs_model.cv_results_
但是,据我所知here,gridsearchcv首先使用默认值启动估算器对象,并且其实现与feature_importances_
中的sklearn.tree.DecisionTreeClassifier
属性类似。
---------------------------------------------------------------------------
NameError Traceback (most recent call last)
<ipython-input-6-a11420cc931e> in <module>
66 'sampler_n': [10,200]}
67
---> 68 gs_model = gridsearchcv(estimator=LinearSVCSub(),69 scoring='roc_auc',cv=2)
70 gs_model.fit(X,y)
<ipython-input-6-a11420cc931e> in __init__(self,penalty,loss,sampler_gamma,sampler_n,dual,tol,C,multi_class,fit_intercept,intercept_scaling,class_weight,verbose,random_state,max_iter)
21 self.sampler_n = sampler_n
22
---> 23 self.sampler = create_sampler()
24
25
NameError: name 'create_sampler' is not defined
解决方法
- 使用
__init__
构造函数作为容器来存储属性。 - 在方法中执行所有相应的逻辑
from sklearn.datasets import make_classification
from sklearn.svm import LinearSVC
from sklearn.model_selection import GridSearchCV
from sklearn.kernel_approximation import RBFSampler
from sklearn.datasets import load_breast_cancer
RANDOM_STATE = 123
class LinearSVCSub(LinearSVC):
def __init__(self,penalty='l2',loss='squared_hinge',sampler_gamma=None,sampler_n=None,dual=True,tol=0.0001,C=1.0,multi_class='ovr',fit_intercept=True,intercept_scaling=1,class_weight=None,verbose=0,random_state=None,max_iter=1000,sampler=None):
super(LinearSVCSub,self).__init__(penalty=penalty,loss=loss,dual=dual,tol=tol,C=C,multi_class=multi_class,fit_intercept=fit_intercept,intercept_scaling=intercept_scaling,class_weight=class_weight,verbose=verbose,random_state=random_state,max_iter=max_iter)
self.sampler_gamma = sampler_gamma
self.sampler_n = sampler_n
self.sampler = sampler
def fit(self,X,y,sample_weight=None):
X = self.transform_this(X)
super(LinearSVCSub,self).fit(X,sample_weight)
return self
def predict(self,X):
X = self.transform_this(X)
return super(LinearSVCSub,self).predict(X)
def score(self,sample_weight=None):
X = self.transform_this(X)
return super(LinearSVCSub,self).score(X,sample_weight)
def decision_function(self,self).decision_function(X)
def transform_this(self,X):
if self.sampler:
X = RBFSampler(gamma=self.sampler_gamma,n_components=self.sampler_n).fit_transform(X)
return X
data = load_breast_cancer()
X,y = data.data,data.target
# Parameter tuning with custom LinearSVC
param_grid = {'C': [0.00001,0.0005],'dual': (True,False),'random_state': [RANDOM_STATE],'sampler_gamma': [0.90,0.60,0.30],'sampler_n': [10,200],'sampler':[0,1]
}
gs_model = GridSearchCV(estimator=LinearSVCSub(sampler=1),verbose=1,param_grid=param_grid,scoring='roc_auc',n_jobs=-1,cv=2)
gs_model.fit(X,y)
gs_model.cv_results_