问题描述
我正在尝试使用带有负二项式族的statsmodels GLM来建模时间序列数据。我已经使模型可以与默认的协方差类型nonrobust一起正常工作,但是我想使用cov_type
中的hac_groupsum
。
以下是cov_type选项的链接: https://www.statsmodels.org/stable/generated/statsmodels.regression.linear_model.OLSResults.get_robustcov_results.html
但是,在定义cov_kwds
时,我总是遇到错误。我尝试使用X
的索引,X
的Time变量,并为time关键字创建一个从1到73的整数的列表,数组和DataFrame。
这是有效的代码
neg_bin = sm.GLM(y,X,family=sm.families.NegativeBinomial(alpha=aux_olsr_results.params[0])).fit()
print(neg_bin.summary())
time = np.array(range(1,74))
time = pd.DataFrame(time,columns = ['Time'])
neg_bin = sm.GLM(y,family=sm.families.NegativeBinomial(alpha=aux_olsr_results.params[0]
)).fit(cov_kwds = {'time' : time,'maxlags': 5},cov_type = 'hac-groupsum')
print(neg_bin.summary())
---------------------------------------------------------------------------
ValueError Traceback (most recent call last)
<ipython-input-107-cf6c22076bb1> in <module>
4 )).fit(cov_kwds = {'time' : time
5,'maxlags': 5}
----> 6,cov_type = 'hac-groupsum')
7 print(neg_bin.summary())
~\anaconda3\lib\site-packages\statsmodels\genmod\generalized_linear_model.py in fit(self,start_params,maxiter,method,tol,scale,cov_type,cov_kwds,use_t,full_output,disp,max_start_irls,**kwargs)
1025 return self._fit_irls(start_params=start_params,maxiter=maxiter,1026 tol=tol,scale=scale,cov_type=cov_type,-> 1027 cov_kwds=cov_kwds,use_t=use_t,**kwargs)
1028 else:
1029 self._optim_hessian = kwargs.get('optim_hessian')
~\anaconda3\lib\site-packages\statsmodels\genmod\generalized_linear_model.py in _fit_irls(self,**kwargs)
1188 self.scale,1189 cov_type=cov_type,cov_kwds=cov_kwds,-> 1190 use_t=use_t)
1191
1192 glm_results.method = "IRLS"
~\anaconda3\lib\site-packages\statsmodels\genmod\generalized_linear_model.py in __init__(self,model,params,normalized_cov_params,use_t)
1472 cov_kwds = {}
1473 get_robustcov_results(self,use_self=True,-> 1474 use_t=use_t,**cov_kwds)
1475
1476 @cached_data
~\anaconda3\lib\site-packages\statsmodels\base\covtype.py in get_robustcov_results(self,**kwds)
336 if adjust_df:
337 # need to find number of groups
--> 338 tt = (np.nonzero(time[1:] < time[:-1])[0] + 1)
339 self.n_groups = n_groups = len(tt) + 1
340 res.cov_params_default = sw.cov_nw_groupsum(self,maxlags,time,~\anaconda3\lib\site-packages\pandas\core\ops\__init__.py in f(self,other)
773 if not self._indexed_same(other):
774 raise ValueError(
--> 775 "Can only compare identically-labeled DataFrame objects"
776 )
777 new_data = dispatch_to_series(self,other,op,str_rep)
ValueError: Can only compare identically-labeled DataFrame objects
这是我用X.index替换时间时得到的错误
---------------------------------------------------------------------------
UFuncTypeError Traceback (most recent call last)
<ipython-input-109-77dde8cd9c9d> in <module>
4 )).fit(cov_kwds = {'time' : X.index
5,**kwds)
340 res.cov_params_default = sw.cov_nw_groupsum(self,341 weights_func=weights_func,--> 342 use_correction=use_correction)
343 res.cov_kwds['description'] = descriptions['HAC-Groupsum']
344 else:
~\anaconda3\lib\site-packages\statsmodels\stats\sandwich_covariance.py in cov_nw_groupsum(results,nlags,weights_func,use_correction)
859 '''
860
--> 861 xu,hessian_inv = _get_sandwich_arrays(results)
862
863 #S_hac = S_nw_panel(xw,weights,groupidx)
~\anaconda3\lib\site-packages\statsmodels\stats\sandwich_covariance.py in _get_sandwich_arrays(results,cov_type)
238 elif hasattr(results.model,'score_obs'):
239 xu = results.model.score_obs(results.params)
--> 240 hessian_inv = np.linalg.inv(results.model.hessian(results.params))
241 else:
242 xu = results.model.wexog * results.wresid[:,None]
~\anaconda3\lib\site-packages\statsmodels\genmod\generalized_linear_model.py in hessian(self,observed)
579
580 factor = self.hessian_factor(params,observed=observed)
--> 581 np.multiply(self.exog.T,factor,out=tmp.T)
582 return -tmp.T.dot(self.exog)
583
UFuncTypeError: Cannot cast ufunc 'multiply' output from dtype('float64') to dtype('int64') with casting rule 'same_kind'
解决方法
暂无找到可以解决该程序问题的有效方法,小编努力寻找整理中!
如果你已经找到好的解决方法,欢迎将解决方案带上本链接一起发送给小编。
小编邮箱:dio#foxmail.com (将#修改为@)