问题描述
我正在尝试使用SciPy的gaussian_kde函数来估计多元数据的密度。
在下面的代码中,如果尺寸数超过4d,则可能会发生以下错误(大约50%)。
如果数字小于3d,则在大多数情况下不会发生此错误。
# Import
import numpy as np
from scipy import stats
import matplotlib.pyplot as plt
%matplotlib inline
# Data
N1 = np.random.normal(size=400)
N2 = np.random.normal(scale=0.5,size=400)
N3 = np.random.normal(scale=0.5,size=400)
N4 = np.random.normal(scale=0.5,size=400)
N5 = np.random.normal(scale=0.5,size=400)
N6 = np.random.normal(scale=0.5,size=400)
a1 = N1+1*N2
a2 = N1-1*N2
a3 = N1+1*N3
a4 = N1-1*N3
a5 = N1+1*N4
a6 = N1-1*N4
a7 = N1+1*N5
a8 = N1-1*N5
a9 = N1+1*N6
a0 = N1-1*N6
# Kernel density
xy = np.vstack([a1,a2,a3,a4,a5,a6,a7,a8,a9,a0])
kernel = stats.gaussian_kde(xy)
z_est = kernel.evaluate(xy)
# Visualization
x = a1
y = a2
plt.scatter(x,y,c=z_est)
错误消息
LinAlgError Traceback (most recent call last)
<ipython-input-11-ce4c335d8dd1> in <module>
2 xy = np.vstack([a1,a5])
3 kernel = stats.gaussian_kde(xy)
----> 4 z_est = kernel.evaluate(xy)
~\program\anaconda\lib\site-packages\scipy\stats\kde.py in evaluate(self,points)
244 result = zeros((m,),dtype=float)
245
--> 246 whitening = linalg.cholesky(self.inv_cov)
247 scaled_dataset = dot(whitening,self.dataset)
248 scaled_points = dot(whitening,points)
~\program\anaconda\lib\site-packages\scipy\linalg\decomp_cholesky.py in cholesky(a,lower,overwrite_a,check_finite)
89 """
90 c,lower = _cholesky(a,lower=lower,overwrite_a=overwrite_a,clean=True,---> 91 check_finite=check_finite)
92 return c
93
~\program\anaconda\lib\site-packages\scipy\linalg\decomp_cholesky.py in _cholesky(a,clean,check_finite)
38 if info > 0:
39 raise LinAlgError("%d-th leading minor of the array is not positive "
---> 40 "definite" % info)
41 if info < 0:
42 raise ValueError('LAPACK reported an illegal value in {}-th argument'
LinAlgError: 1-th leading minor of the array is not positive definite
为什么会出现错误?
解决方法
import numpy as np
from scipy import stats
import matplotlib.pyplot as plt
%matplotlib inline
def measure(n):
a1 = np.random.normal(size=n)
a2= np.random.normal(scale=0.5,size=n)
return a1+a2,a1-a2
a1,a2 = measure(4000)
xmin = a1.min()
xmax = a1.max()
ymin = a2.min()
ymax = a2.max()
X,Y = np.mgrid[xmin:xmax:100j,ymin:ymax:100j]
positions = np.vstack([X.ravel(),Y.ravel()])
values = np.vstack([a1,a2])
kernel = stats.gaussian_kde(values)
z_est = np.reshape(kernel.evaluate(positions).T,X.shape)
fig = plt.figure()
ax = fig.add_subplot(111)
ax.imshow(np.rot90(z_est),cmap=plt.cm.gist_earth_r,extent=[xmin,xmax,ymin,ymax])
ax.plot(a1,a2,'k.',markersize=2)
ax.set_xlim([xmin,xmax])
ax.set_ylim([ymin,ymax])
plt.show()