问题描述
我需要绘制我这样计算的特征向量:
def fit(self,X):
'''
fits sorted eigenvalues and eigenvectors to class attributes. same goes for variance and explained variance.
'''
n_samples = X.shape[0]
# We center the data and compute the sample covariance matrix.
X -= np.mean(X,axis=0)
self.cov_matrix_ = np.dot(X.T,X) / (n_samples-1)
#test = np.cov(X)
#Negative values are ignored with eigh
(self.eigvalues_,self.components_) = np.linalg.eigh(self.cov_matrix_)
idx = self.eigvalues_.argsort()[::-1]
self.eigvalues_ = self.eigvalues_[idx]
self.components_ = self.components_[:,idx]
self.variance_ = np.sum(self.eigvalues_)
self.explained_variance_ = self.eigvalues_ / self.variance_
def transform(self,X):
#project data onto eigenvectors
print(self.components_.shape,X.shape)
self.projected_ = X @ self.components_.T
return self.projected_
进入我的数据集的前 2 个特征的绘图。
我的 self.components_ 的形状是我的 100x240 数据集的 240 个特征向量,形状为 240x240。 用最大特征值绘制我的 2 个特征向量的前两个值后,结果如下:
pca = PCA()
pca.fit(subsample)
#pca.transform(subsample)
plt.scatter(subsample[:,0],subsample[:,1],edgecolor='none',alpha=0.5)
plt.quiver(pca.components_[0,pca.components_[0,angles='xy',scale_units='xy',scale=1,width=0.002 )
plt.quiver(pca.components_[1,pca.components_[1,width=0.002 )
我做错了什么?
解决方法
您应该按行而不是列对特征向量进行排序,即
self.components_ = self.components_[:,idx]
应该
self.components_ = self.components_[idx]
此外,您应确保以相同的纵横比进行绘图,因为箭袋可能未对齐:
plt.gca().set_aspect('equal')
在你的代码中包含一个最少的工作示例是一个好习惯,所以下次记住:)。我必须推断出您的其余代码可能是什么,以获得最小的工作示例。无论如何,这是我建议的代码:
import numpy as np
from matplotlib import pyplot as plt
class PCA:
def fit(self,X):
'''
fits sorted eigenvalues and eigenvectors to class attributes. same goes for variance and explained variance.
'''
n_samples = X.shape[0]
# We center the data and compute the sample covariance matrix.
X -= np.mean(X,axis=0)
self.cov_matrix_ = np.dot(X.T,X) / (n_samples-1)
#test = np.cov(X)
#Negative values are ignored with eigh
(self.eigvalues_,self.components_) = np.linalg.eigh(self.cov_matrix_)
idx = self.eigvalues_.argsort()[::-1]
self.eigvalues_ = self.eigvalues_[idx]
self.components_ = self.components_[idx]
self.variance_ = np.sum(self.eigvalues_)
self.explained_variance_ = self.eigvalues_ / self.variance_
def transform(self,X):
#project data onto eigenvectors
print(self.components_.shape,X.shape)
self.projected_ = X @ self.components_.T
return self.projected_
pca = PCA()
# Generate some dummy data
subsample = np.random.randn(69,2)*0.1
subsample[:,0] = subsample[:,0]*8
subsample[:,1] = subsample[:,0]*2 + subsample[:,1] # Add some correlations
pca.fit(subsample)
plt.scatter(subsample[:,0],subsample[:,1],edgecolor='none',alpha=0.5)
plt.quiver(pca.components_[0,0]*2,pca.components_[0,1]*2,# *2 to make arrows larger
angles='xy',scale_units='xy',scale=1,width=0.006)
plt.quiver(pca.components_[1,pca.components_[1,angles='xy',width=0.006)
plt.gca().set_aspect('equal')
plt.show()