问题描述
我拥有MNIST数据集的测试集,我想将图像提供给经过预训练的编码器,然后使用k-means聚类对嵌入的图像进行聚类,但是在尝试fit_predict()时出现错误。
这是代码:
trans = transforms.Compose([transforms.ToTensor(),transforms.normalize((0.5,),(1.0,))])
test_set = dset.MNIST(root=root,train=False,transform=trans,download=True)
test_loader = torch.utils.data.DataLoader(
dataset = test_set,batch_size = 10000,shuffle = False)
km = KMeans(k,n_init=20,n_jobs=4)
sil=[]
for data in test_loader:
x,_= data
x = model(x.cuda())
x = x.data.cpu().numpy()
#x = x.astype(int)
y_pred = km.fit_predict(x) # seems we can only get a centre from batch
sil_score = sil(x,y_pred)
print('sil score',sil_score)
sil.append(sil_score)
这是我得到的错误:
_RemoteTraceback Traceback (most recent call last)
_RemoteTraceback:
"""
Traceback (most recent call last):
File "C:\Users\samin\Anaconda3\lib\site-packages\joblib\externals\loky\process_executor.py",line 418,in _process_worker
r = call_item()
File "C:\Users\samin\Anaconda3\lib\site-packages\joblib\externals\loky\process_executor.py",line 272,in __call__
return self.fn(*self.args,**self.kwargs)
File "C:\Users\samin\Anaconda3\lib\site-packages\joblib\_parallel_backends.py",line 567,in __call__
return self.func(*args,**kwargs)
File "C:\Users\samin\Anaconda3\lib\site-packages\joblib\parallel.py",line 225,in __call__
for func,args,kwargs in self.items]
File "C:\Users\samin\Anaconda3\lib\site-packages\joblib\parallel.py",in <listcomp>
for func,kwargs in self.items]
File "C:\Users\samin\Anaconda3\lib\site-packages\sklearn\cluster\k_means_.py",line 437,in _kmeans_single_elkan
x_squared_norms=x_squared_norms)
File "C:\Users\samin\Anaconda3\lib\site-packages\sklearn\cluster\k_means_.py",line 749,in _init_centroids
x_squared_norms=x_squared_norms)
File "C:\Users\samin\Anaconda3\lib\site-packages\sklearn\cluster\k_means_.py",line 81,in _k_init
centers = np.empty((n_clusters,n_features),dtype=X.dtype)
TypeError: 'float' object cannot be interpreted as an integer
"""
The above exception was the direct cause of the following exception:
TypeError Traceback (most recent call last)
<ipython-input-148-6ec8225ad810> in <module>
----> 1 k,sil_score = Test(test_loader,model,Controller)
<ipython-input-147-94d13c371d50> in Test(test_loader_0,Controller)
20 k = sum(k) / len(k)
21 km = KMeans(k,n_jobs=4)
---> 22 y_pred = km.fit_predict(obs) # seems we can only get a centre from batch
23 sil_score = sil(x,y_pred)
24
~\Anaconda3\lib\site-packages\sklearn\cluster\k_means_.py in fit_predict(self,X,y,sample_weight)
996 Index of the cluster each sample belongs to.
997 """
--> 998 return self.fit(X,sample_weight=sample_weight).labels_
999
1000 def fit_transform(self,y=None,sample_weight=None):
~\Anaconda3\lib\site-packages\sklearn\cluster\k_means_.py in fit(self,sample_weight)
970 tol=self.tol,random_state=random_state,copy_x=self.copy_x,971 n_jobs=self.n_jobs,algorithm=self.algorithm,--> 972 return_n_iter=True)
973 return self
974
~\Anaconda3\lib\site-packages\sklearn\cluster\k_means_.py in k_means(X,n_clusters,sample_weight,init,precompute_distances,n_init,max_iter,verbose,tol,random_state,copy_x,n_jobs,algorithm,return_n_iter)
397 # Change seed to ensure variety
398 random_state=seed)
--> 399 for seed in seeds)
400 # Get results with the lowest inertia
401 labels,inertia,centers,n_iters = zip(*results)
~\Anaconda3\lib\site-packages\joblib\parallel.py in __call__(self,iterable)
932
933 with self._backend.retrieval_context():
--> 934 self.retrieve()
935 # Make sure that we get a last message telling us we are done
936 elapsed_time = time.time() - self._start_time
~\Anaconda3\lib\site-packages\joblib\parallel.py in retrieve(self)
831 try:
832 if getattr(self._backend,'supports_timeout',False):
--> 833 self._output.extend(job.get(timeout=self.timeout))
834 else:
835 self._output.extend(job.get())
~\Anaconda3\lib\site-packages\joblib\_parallel_backends.py in wrap_future_result(future,timeout)
519 AsyncResults.get from multiprocessing."""
520 try:
--> 521 return future.result(timeout=timeout)
522 except LokyTimeoutError:
523 raise TimeoutError()
~\Anaconda3\lib\concurrent\futures\_base.py in result(self,timeout)
433 raise CancelledError()
434 elif self._state == FINISHED:
--> 435 return self.__get_result()
436 else:
437 raise TimeoutError()
~\Anaconda3\lib\concurrent\futures\_base.py in __get_result(self)
382 def __get_result(self):
383 if self._exception:
--> 384 raise self._exception
385 else:
386 return self._result
TypeError: 'float' object cannot be interpreted as an integer
我尝试添加x = x.astype(int)
:
km = KMeans(k,n_jobs=4)
sil=[]
for data in test_loader_0:
x,_= data
x = model(x.cuda())
x = x.data.cpu().numpy()
x = x.astype(int)
y_pred = km.fit_predict(x) # seems we can only get a centre from batch
sil_score = sil(x,sil_score)
sil.append(sil_score)
,但出现相同的错误。我发现这个错误非常奇怪,因为我使用相同的数据集(训练集)和相同的网络(模型=编码器)来计算由k均值产生的标签。我不认为k-means.fit_predict仅接受整数值。 我想知道是否有人对此有任何线索或遇到了这个问题?我很感谢提示。
解决方法
暂无找到可以解决该程序问题的有效方法,小编努力寻找整理中!
如果你已经找到好的解决方法,欢迎将解决方案带上本链接一起发送给小编。
小编邮箱:dio#foxmail.com (将#修改为@)