问题描述
我从单个栅格文件中提取5个波段,并将它们转换为5个numpy数组。我将这些数组组合成具有5列的pandas数据框。但是,由于数据帧是从栅格中提取的,因此具有超过1亿行。它太大,无法处理clustermap。
我使用dask将我的pandas数据框转换为具有100个分区的dask数据框。 但是,clustermap函数具有错误:
ValueError:无法将字符串转换为浮点数
我认为这是由于分区dask数据帧中的描述性行所致。每个分区都有一行来描述每一列的数据类型。
import pandas as pd
import seaborn as sns
import dask.dataframe as dd
print(raster_data)
a b c d e
0 -10.991648 -5.194196 -7.814418 -8.581491 -4.685249
1 -11.008073 -5.199173 -7.816316 -8.585398 -4.684309
2 -11.021060 -5.203457 -7.816524 -8.587661 -4.683213
3 -11.029137 -5.206793 -7.814248 -8.587540 -4.681872
4 -11.030838 -5.208930 -7.808703 -8.584308 -4.680197
... ... ... ... ... ...
125207167 -1.239941 -0.706424 -1.459914 -1.473716 -1.457305
125207168 -1.237355 -0.703007 -1.461954 -1.473614 -1.457217
125207169 -1.235120 -0.700016 -1.463818 -1.473586 -1.457193
125207170 -1.232403 -0.707647 -1.432281 -1.452982 -1.439554
125207171 -1.224955 -0.708945 -1.404824 -1.432420 -1.421914
dask_data = dd.from_pandas(raster_data,npartitions = 100)
print(dask_data)
dask DataFrame Structure:
a b c d e
npartitions=100
0 float32 float32 float32 float32 float32
1252072 ... ... ... ... ...
... ... ... ... ... ...
123955128 ... ... ... ... ...
125207171 ... ... ... ... ...
dask Name: from_pandas,100 tasks
sns.clustermap(dask_data,metric="euclidean",method="ward")
'''
'''
---------------------------------------------------------------------------
ValueError Traceback (most recent call last)
<ipython-input-23-3b9c411920cd> in <module>
----> 1 sns.clustermap(dask_data,method="ward")
C:\ProgramData\Anaconda3\lib\site-packages\seaborn\matrix.py in clustermap(data,pivot_kws,method,metric,z_score,standard_scale,figsize,cbar_kws,row_cluster,col_cluster,row_linkage,col_linkage,row_colors,col_colors,mask,dendrogram_ratio,colors_ratio,cbar_pos,tree_kws,**kwargs)
1389 row_cluster=row_cluster,col_cluster=col_cluster,1390 row_linkage=row_linkage,col_linkage=col_linkage,-> 1391 tree_kws=tree_kws,**kwargs)
C:\ProgramData\Anaconda3\lib\site-packages\seaborn\matrix.py in plot(self,colorbar_kws,**kws)
1206 self.plot_dendrograms(row_cluster,1207 row_linkage=row_linkage,-> 1208 tree_kws=tree_kws)
1209 try:
1210 xind = self.dendrogram_col.reordered_ind
C:\ProgramData\Anaconda3\lib\site-packages\seaborn\matrix.py in plot_dendrograms(self,tree_kws)
1052 self.data2d,metric=metric,method=method,label=False,axis=0,1053 ax=self.ax_row_dendrogram,rotate=True,linkage=row_linkage,-> 1054 tree_kws=tree_kws
1055 )
1056 else:
C:\ProgramData\Anaconda3\lib\site-packages\seaborn\matrix.py in dendrogram(data,linkage,axis,label,rotate,ax)
770 plotter = _DendrogramPlotter(data,linkage=linkage,axis=axis,771 metric=metric,--> 772 label=label,rotate=rotate)
773 if ax is None:
774 ax = plt.gca()
C:\ProgramData\Anaconda3\lib\site-packages\seaborn\matrix.py in __init__(self,data,rotate)
582
583 if linkage is None:
--> 584 self.linkage = self.calculated_linkage
585 else:
586 self.linkage = linkage
C:\ProgramData\Anaconda3\lib\site-packages\seaborn\matrix.py in calculated_linkage(self)
642
643 try:
--> 644 return self._calculate_linkage_fastcluster()
645 except ImportError:
646 if np.product(self.shape) >= 10000:
C:\ProgramData\Anaconda3\lib\site-packages\seaborn\matrix.py in _calculate_linkage_fastcluster(self)
632 return fastcluster.linkage_vector(self.array,633 method=self.method,--> 634 metric=self.metric)
635 else:
636 linkage = fastcluster.linkage(self.array,method=self.method,C:\ProgramData\Anaconda3\lib\site-packages\fastcluster.py in linkage_vector(X,extraarg)
467 else:
468 assert metric=='euclidean'
--> 469 X = array(X,dtype=double,copy=(method=='ward'),order='C',subok=True)
470 assert X.ndim==2
471 N = len(X)
ValueError: Could not convert string to float: 'a'
解决方法
暂无找到可以解决该程序问题的有效方法,小编努力寻找整理中!
如果你已经找到好的解决方法,欢迎将解决方案带上本链接一起发送给小编。
小编邮箱:dio#foxmail.com (将#修改为@)