问题描述
MRE
import pandas as pd
import numpy as np
from sklearn.datasets import load_iris
from sklearn.neighbors import KNeighborsClassifier
import seaborn as sns
import matplotlib.pyplot as plt
from matplotlib.colors import ListedColormap
# prepare data
iris = load_iris()
X = iris.data
y = iris.target
df = pd.DataFrame(X,columns=iris.feature_names)
df['label'] = y
species_map = dict(zip(range(3),iris.target_names))
df['species'] = df.label.map(species_map)
df = df.reindex(['sepal length (cm)','sepal width (cm)','petal length (cm)','petal width (cm)','species','label'],axis=1)
# instantiate model
knn = KNeighborsClassifier(n_neighbors=6)
# predict for 'petal length (cm)' and 'petal width (cm)'
knn.fit(df.iloc[:,2:4],df.label)
h = .02 # step size in the mesh
# create colormap for the contour plot
cmap_light = ListedColormap(list(sns.color_palette('pastel',n_colors=3)))
# Plot the decision boundary.
# For that,we will assign a color to each point in the mesh [x_min,x_max]x[y_min,y_max].
x_min,x_max = df['petal length (cm)'].min() - 1,df['petal length (cm)'].max() + 1
y_min,y_max = df['petal width (cm)'].min() - 1,df['petal width (cm)'].max() + 1
xx,yy = np.meshgrid(np.arange(x_min,x_max,h),np.arange(y_min,y_max,h))
Z = knn.predict(np.c_[xx.ravel(),yy.ravel()]).reshape(xx.shape)
# create plot
fig,ax = plt.subplots()
# add data points
sns.scatterplot(data=df,x='petal length (cm)',y='petal width (cm)',hue='species',ax=ax,edgecolor='k')
# add decision boundary countour map
ax.contourf(xx,yy,Z,cmap=cmap_light,alpha=0.4)
# legend
lgd = plt.legend(bBox_to_anchor=(1.05,1),loc='upper left')
plt.show()
结果图
所需图
资源
无法回答问题的问题
自我回答
- 我提供了 a 解决方案,但是我不确定这是否是最好的解决方案。我当然愿意接受其他选择。
- 也就是说,我不希望在
contourf
或pcolormesh
情节中着色的解决方案。
- 最好的解决方案是简洁地仅提取决策边界值。
解决方法
- 这是我想到的一种解决方案,它在
Z
的两个轴上使用np.diff
,得到.predict
的结果。这个想法是,只要结果发生变化,这就是决策边界。
- 使用
.diff
从自身中减去Z
,移位1。
- 使用
mask
创建np.diff(Z) != 0
- 使用
mask
从x
和y
中选择适当的xx
和yy
- 使用OP中的现有代码
# use diff to create a mask
mask = np.diff(Z,axis=1) != 0
mask2 = np.diff(Z,axis=0) != 0
# apply mask against xx and yy
xd = np.concatenate((xx[:,1:][mask],xx[1:,:][mask2]))
yd = np.concatenate((yy[:,yy[1:,:][mask2]))
# plot just the decision boundary
fig,ax = plt.subplots()
sns.scatterplot(x=xd,y=yd,color='k',edgecolor='k',s=5,ax=ax,label='decision boundary')
plt.show()
fig,ax = plt.subplots()
sns.scatterplot(data=df,x='petal length (cm)',y='petal width (cm)',hue='species',edgecolor='k')
sns.scatterplot(x=xd,label='decision boundary')
lgd = plt.legend(bbox_to_anchor=(1.05,1),loc='upper left')
xd
和yd
正确覆盖plt.contourf