问题描述
我正在通过一个玩具示例来学习let trackLine = MGLpolylineFeature(coordinates: coordinates!,count: UInt(coordinates!.count))
polyline.attributes.updateValue("red",forKey: "stroke")
。假设我要建立线性模型y = ax + b。我编写了一个自定义sklearn估算器,如下所示:
RandomizedSearchCV
然后,我需要对此进行测试。
import numpy as np
# import the base estimator
from sklearn.base import BaseEstimator,RegressorMixin
class testEstimator(BaseEstimator,RegressorMixin):
def __init__(self,alpha=1,beta=0):
self.alpha = alpha
self.beta = beta
def fit(self,X,y=None):
mu = np.ones((len(X)))
for ii in range(len(X)):
mu[ii] = self.alpha*X[ii] + self.beta
self.mu_ = mu
return self
def predict(self,X):
try:
getattr(self,"mu_")
except:
raise RuntimeError("You must train classifer before predicting data!")
return self.mu_
def score(self,y):
print("y: ",y)
print("mu: ",self.mu_)
return np.dot(y - self.predict(X),y - self.predict(X))/len(X)
但是,我得到了错误
# temp estimator
tempEs = testEstimator()
# temp params grid
params_grid_temp = {'alpha': [0,1,2,3,4,5,6],'beta': [0,4]}
# test randomizedSearchCV
temp = RandomizedSearchCV(tempEs,params_grid_temp)
# define X,y
X = range(10)
y = np.dot(2,range(10)) + 4
# fit model
temp.fit(X,y)
我发现(2,)表示ValueError Traceback (most recent call last)
<ipython-input-8-72a46fdf9098> in <module>
9 y = np.dot(2,range(10)) + 4
10 # fit model
---> 11 temp.fit(X,y)
~\Anaconda3\lib\site-packages\sklearn\utils\validation.py in inner_f(*args,**kwargs)
71 FutureWarning)
72 kwargs.update({k: arg for k,arg in zip(sig.parameters,args)})
---> 73 return f(**kwargs)
74 return inner_f
75
~\Anaconda3\lib\site-packages\sklearn\model_selection\_search.py in fit(self,y,groups,**fit_params)
734 return results
735
--> 736 self._run_search(evaluate_candidates)
737
738 # For multi-metric evaluation,store the best_index_,best_params_ and
~\Anaconda3\lib\site-packages\sklearn\model_selection\_search.py in _run_search(self,evaluate_candidates)
1527 def _run_search(self,evaluate_candidates):
1528 """Search n_iter candidates from param_distributions"""
-> 1529 evaluate_candidates(ParameterSampler(
1530 self.param_distributions,self.n_iter,1531 random_state=self.random_state))
~\Anaconda3\lib\site-packages\sklearn\model_selection\_search.py in evaluate_candidates(candidate_params)
706 n_splits,n_candidates,n_candidates * n_splits))
707
--> 708 out = parallel(delayed(_fit_and_score)(clone(base_estimator),709 X,710 train=train,test=test,~\Anaconda3\lib\site-packages\joblib\parallel.py in __call__(self,iterable)
1027 # remaining jobs.
1028 self._iterating = False
-> 1029 if self.dispatch_one_batch(iterator):
1030 self._iterating = self._original_iterator is not None
1031
~\Anaconda3\lib\site-packages\joblib\parallel.py in dispatch_one_batch(self,iterator)
845 return False
846 else:
--> 847 self._dispatch(tasks)
848 return True
849
~\Anaconda3\lib\site-packages\joblib\parallel.py in _dispatch(self,batch)
763 with self._lock:
764 job_idx = len(self._jobs)
--> 765 job = self._backend.apply_async(batch,callback=cb)
766 # A job can complete so quickly than its callback is
767 # called before we get here,causing self._jobs to
~\Anaconda3\lib\site-packages\joblib\_parallel_backends.py in apply_async(self,func,callback)
206 def apply_async(self,callback=None):
207 """Schedule a func to be run"""
--> 208 result = ImmediateResult(func)
209 if callback:
210 callback(result)
~\Anaconda3\lib\site-packages\joblib\_parallel_backends.py in __init__(self,batch)
570 # Don't delay the application,to avoid keeping the input
571 # arguments in memory
--> 572 self.results = batch()
573
574 def get(self):
~\Anaconda3\lib\site-packages\joblib\parallel.py in __call__(self)
250 # change the default number of processes to -1
251 with parallel_backend(self._backend,n_jobs=self._n_jobs):
--> 252 return [func(*args,**kwargs)
253 for func,args,kwargs in self.items]
254
~\Anaconda3\lib\site-packages\joblib\parallel.py in <listcomp>(.0)
250 # change the default number of processes to -1
251 with parallel_backend(self._backend,kwargs in self.items]
254
~\Anaconda3\lib\site-packages\sklearn\model_selection\_validation.py in _fit_and_score(estimator,scorer,train,test,verbose,parameters,fit_params,return_train_score,return_parameters,return_n_test_samples,return_times,return_estimator,error_score)
558 else:
559 fit_time = time.time() - start_time
--> 560 test_scores = _score(estimator,X_test,y_test,scorer)
561 score_time = time.time() - start_time - fit_time
562 if return_train_score:
~\Anaconda3\lib\site-packages\sklearn\model_selection\_validation.py in _score(estimator,scorer)
605 scores = scorer(estimator,X_test)
606 else:
--> 607 scores = scorer(estimator,y_test)
608
609 error_msg = ("scoring must return a number,got %s (%s) "
~\Anaconda3\lib\site-packages\sklearn\metrics\_scorer.py in __call__(self,estimator,*args,**kwargs)
88 *args,**kwargs)
89 else:
---> 90 score = scorer(estimator,**kwargs)
91 scores[name] = score
92 return scores
~\Anaconda3\lib\site-packages\sklearn\metrics\_scorer.py in _passthrough_scorer(estimator,**kwargs)
370 def _passthrough_scorer(estimator,**kwargs):
371 """Function that wraps estimator.score"""
--> 372 return estimator.score(*args,**kwargs)
373
374
<ipython-input-7-0c2138d9bf96> in score(self,y)
20 print("y: ",y)
21 print("mu: ",self.mu_)
---> 22 return np.dot(y - self.predict(X),y - self.predict(X))/len(X)
ValueError: operands Could not be broadcast together with shapes (2,) (8,)
的大小,而(8,)表示y
的大小。这是怎么发生的?应该是10。
解决方法
好的,我发现了问题。您的预测方法是完全错误的。它必须返回预测值,而不是返回mu_
。
def predict(self,X):
return self.alpha*X + self.beta
就是这样。您也可以使用fit
方法优化代码