问题描述
我是 Python 新手。我对 numpy 和 array 有一点了解。在我的工作中,我有 1000 行 × 5 列的数据集。前三列作为 X_set,最后一列作为 y_set。但是,当我绘制可视化训练集结果时,我得到了错误。错误如下
ValueError: X.shape[1] = 2 should be equal to 4,the number of features at training time
这是我的脚本 svm
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
import sklearn as svm
data = pd.read_csv('overdischarge.csv')
data
#Getting features from dataset
data=data.sample(frac=1)
X=data.iloc[:,:-1].values
y=data.iloc[:,-1].values
X=X.astype(float)
#training and testing set size
train_size=int(0.75*data.shape[0])
test_size=int(0.25*data.shape[0])
print("Training set size : "+ str(train_size))
print("Testing set size : "+str(test_size))
from sklearn.model_selection import train_test_split
X_train,X_test,y_train,y_test = train_test_split(X,y,test_size = 0.25,random_state = 0)
from sklearn.preprocessing import StandardScaler
sc = StandardScaler()
X_train = sc.fit_transform(X_train)
X_test = sc.transform(X_test)
from sklearn.svm import SVC
classifier = SVC(kernel = 'linear',random_state = 0)
classifier.fit(X_train,y_train)
#training set split
X_train=X[0:train_size,:]
y_train=y[0:train_size]
#testing set split
X_test=X[train_size:,:]
y_test=y[train_size:]
y_pred = classifier.predict(X_test)
print(np.concatenate((y_pred.reshape(len(y_pred),1),y_test.reshape(len(y_test),1)),1))
from sklearn.metrics import confusion_matrix,accuracy_score
cm = confusion_matrix(y_test,y_pred)
print(cm)
accuracy_score(y_test,y_pred)
#visualize the training set
from matplotlib.colors import ListedColormap
X_set,y_set = X_train,y_train
for i,j in enumerate(np.unique(y_set)):
plt.scatter(X_set[y_set == j,0],X_set[y_set == j,1],2],c = ListedColormap(('green','orange','red'))(i),label = j,marker='.')
plt.title('Training set Overdischarge')
plt.xlabel('rpn')
plt.ylabel('ALARM')
plt.legend()
plt.show()
# Visualising the Training set results
from matplotlib.colors import ListedColormap
X_set,y_train
X1,X2 = np.meshgrid(np.arange(start = X_set[:,0].min() - 1,stop = X_set[:,0].max() + 1,step = 0.1),np.arange(start = X_set[:,1].min() - 1,1].max() + 1,step = 0.1))
plt.contourf(X1,X2,classifier.predict(np.array([X1.ravel(),X2.ravel()]).T).reshape(X1.shape),alpha = 0.85,cmap = ListedColormap(('green','yellow','red')))
plt.xlim(X1.min(),X1.max())
plt.ylim(X2.min(),X2.max())
for i,j in enumerate(np.unique(y_set)):
plt.scatter(X_set[y_set == j,marker='.')
plt.title('Training set')
plt.xlabel('ALARM')
plt.ylabel('rpn')
plt.legend()
plt.show()
ValueError Traceback (most recent call last)
<ipython-input-24-d51e66447620> in <module>()
4 X1,5 np.arange(start = X_set[:,step = 0.1))
----> 6 plt.contourf(X1,7 alpha = 0.85,'red')))
8 plt.xlim(X1.min(),X1.max())
2 frames
/usr/local/lib/python3.6/dist-packages/sklearn/svm/_base.py in _validate_for_predict(self,X)
465 raise ValueError("X.shape[1] = %d should be equal to %d,"
466 "the number of features at training time" %
--> 467 (n_features,self.shape_fit_[1]))
468 return X
469
ValueError: X.shape[1] = 2 should be equal to 4,the number of features at training time
这是我的数据集
SEV OCC DET rpn ALARM
0 1 2 10 20 0
1 2 3 9 54 0
2 3 4 8 96 0
3 4 5 7 140 0
4 5 6 6 180 0
... ... ... ... ... ...
995 8 7 5 280 1
996 9 8 4 288 1
997 10 9 3 270 1
998 1 10 2 20 0
999 2 10 1 20 0
为什么我会出错? numpy 或数组有问题吗?请给我建议。提前致谢。
解决方法
暂无找到可以解决该程序问题的有效方法,小编努力寻找整理中!
如果你已经找到好的解决方法,欢迎将解决方案带上本链接一起发送给小编。
小编邮箱:dio#foxmail.com (将#修改为@)