问题描述
首先,我生成了 20 对随机数,并将它们用作 20 个粒子的二维空间位置。
import numpy as np
positions = np.random.rand(20,2)
Positions
如下所示
array([[0.96124789,0.52413156],[0.5186589,0.4300743 ],[0.63357087,0.70130091],[0.8213765,0.29515393],[0.68616945,0.02020544],[0.71924115,0.71630689],[0.92340942,0.56007463],[0.17322848,0.2455891 ],[0.3993029,0.53287478],[0.15887798,0.60968053],[0.45877831,0.88163765],[0.04565275,0.76557075],[0.73800541,0.71257644],[0.02784201,0.10035848],[0.83830731,0.66442518],[0.95518272,0.37313694],[0.14761192,0.8255784 ],[0.83576694,0.18367566],[0.79187776,0.52189936],[0.97585451,0.97077229]])
我想做的是根据它们的空间分布将这些粒子分为不同的簇。为此,我通过导入 scipy.spatial.cKDTree
构建了一棵 K-d 树,并使用 query_pairs
方法找到这棵树中距离最多为 0.2 的所有点对。
from scipy.spatial import cKDTree as kdtree
tree=kdtree(positions)
pairs= tree.query_pairs(r=0.2,output_type="ndarray")
pairs[np.argsort(pairs[:,0])] # Sorting pairs along axis=0
array([[ 0,6],[ 0,15],18],14],[ 1,8],[ 2,5],12],[ 3,17],[ 5,[ 6,[ 9,11],[11,16],[12,[14,18]],dtype=int64)
根据pairs
,我们可以在视觉上将这些粒子分为3个簇,即
a)。 (0,3,5,6,12,14,15,17,18)
b)。 (1,8)
c)。 (9,11,16)
所以我的问题是,如何通过 Python 本身来进行这种分类?
解决方法
您可以从 sklearn
尝试 KMeans
:
import matplotlib.pyplot as plt
from sklearn.cluster import KMeans
import numpy as np
positions = np.array([[0.96124789,0.52413156],[0.5186589,0.4300743],[0.63357087,0.70130091],[0.8213765,0.29515393],[0.68616945,0.02020544],[0.71924115,0.71630689],[0.92340942,0.56007463],[0.17322848,0.2455891],[0.3993029,0.53287478],[0.15887798,0.60968053],[0.45877831,0.88163765],[0.04565275,0.76557075],[0.73800541,0.71257644],[0.02784201,0.10035848],[0.83830731,0.66442518],[0.95518272,0.37313694],[0.14761192,0.8255784],[0.83576694,0.18367566],[0.79187776,0.52189936],[0.97585451,0.97077229]])
kmeans = KMeans(n_clusters=3).fit(positions)
fig,ax = plt.subplots()
for i,(color,center) in enumerate(zip(['crimson','dodgerblue','limegreen'],kmeans.cluster_centers_)):
ax.scatter(*positions[kmeans.labels_ == i].T,color=color,label=f'Cluster {i}')
ax.scatter(*center,ec=color,fc='None',s=100)
ax.set_aspect('equal')
rnd = np.random.rand(10000,2)
rnd_labels = kmeans.predict(rnd)
for i,(color) in enumerate(['crimson','limegreen']):
ax.scatter(*rnd[rnd_labels == i].T,ec='none',fc=color,marker='.',s=3)
ax.legend(bbox_to_anchor=(1.02,0.95),loc='upper left')
plt.tight_layout()
plt.show()