问题描述
那里。于是我搭建了一个Kmean集群程序;然而,每次我运行程序时,情节都会发生变化。我不知道为什么会发生这种情况,如果有人可以提供帮助,我将不胜感激。
import numpy as np
import matplotlib.pyplot as plt
import random
def cal_centroids(clusters,cluster_array,k):
new_centroids= []
for c in range(k):
x= 0
y=0
count=0
for i in range(len(clusters)):
if clusters[i]==c:
x+=cluster_array[i][0]
y+=cluster_array[i][1]
count+=1
x/=count
y/=count
new_centroids.append([x,y])
return new_centroids
def assign_clusters(centroids,cluster_array):
clusters=[]
for i in range(cluster_array.shape[0]):
distances=[]
for centroid in centroids:
distances.append(calc_distance(centroid,cluster_array[i]))
cluster=[z for z,val in enumerate(distances) if val==min(distances)]
clusters.append(cluster[0])
return clusters
def calc_distance(x1,x2):
return (sum((x1-x2)**2))**0.5
#从这里开始主要存储数据,初始化质心并为数据分配集群标签
def kmean(data,no_clusters,iterations):
s= random.sample(range(data.shape[0]),no_clusters)
centroids= []
for i in s:
centroids.append(data[i,:])
clusters= assign_clusters(centroids,data)
initial_centroids= [i for i in centroids]
for i in range(0,iterations):
centroids= cal_centroids(clusters,data,no_clusters)
cluster= assign_clusters(centroids,data)
dict_centroids= {}
for i in range(no_clusters):
dict_centroids[i]=[]
for i in range(no_clusters):
for j in range(data.shape[0]):
if(clusters[j]==i):
dict_centroids[i].append(data[j,:])
return dict_centroids,centroids,clusters
def extract_file(file_name):
file = open(file_name,'r')
lines = [list(map(int,line.strip("\n").split(","))) for line in file]
x= np.array(lines)
return x
data= extract_file("backyard.txt")
dict_centroids,clusters= kmean(data,2,8)
x= data[:,0]
y= data[:,1]
fig=plt.figure()
scatter= plt.scatter(x,y,c=clusters,s=40)
for i,j in centroids:
plt.scatter(i,j,s=50,c='red',marker= '+')
plt.xlabel("Vitamin C")
plt.ylabel("GLA")
plt.title("File backyard 2 groups displayed")
fig.show()
后院名单是这样的:
40,40
10,10
200,200
230,231
40,43
15,45
220,190
解决方法
暂无找到可以解决该程序问题的有效方法,小编努力寻找整理中!
如果你已经找到好的解决方法,欢迎将解决方案带上本链接一起发送给小编。
小编邮箱:dio#foxmail.com (将#修改为@)