自适应DBSCAN成就

问题描述

我正在用python做DBSCAN集群。我想通过自己计算eps和Minpts参数来实现一种自适应的方法来返回簇数。下面是我的代码。

import math
import copy
import numpy as np
import pandas as pd
from sklearn.cluster import DBSCAN


def loadDataSet(fileName,splitChar='\t'):

    dataSet = []
    with open(fileName) as fr:
        for line in fr.readlines():
            curline = line.strip().split(splitChar)
            fltline = list(map(float,curline))
            dataSet.append(fltline)
    return dataSet


def dist(a,b):

    return math.sqrt(math.pow(a[0]-b[0],2) + math.pow(a[1]-b[1],2))


def returnDk(matrix,k):

    Dk = []
    for i in range(len(matrix)):
        Dk.append(matrix[i][k])
    return Dk


def returnDkAverage(Dk):

    sum = 0
    for i in range(len(Dk)):
        sum = sum + Dk[i]
    return sum/len(Dk)


def CalculateDistMatrix(dataset):

    DistMatrix = [[0 for j in range(len(dataset))] for i in range(len(dataset))]
    for i in range(len(dataset)):
        for j in range(len(dataset)):
            DistMatrix[i][j] = dist(dataset[i],dataset[j])
    return DistMatrix


def returnEpsCandidate(dataSet):

    DistMatrix = CalculateDistMatrix(dataSet)
    tmp_matrix = copy.deepcopy(DistMatrix)
    for i in range(len(tmp_matrix)):
        tmp_matrix[i].sort()
    EpsCandidate = []
    for k in range(1,len(dataSet)):
        Dk = returnDk(tmp_matrix,k)
        DkAverage = returnDkAverage(Dk)
        EpsCandidate.append(DkAverage)
    return EpsCandidate


def returnMinptsCandidate(DistMatrix,EpsCandidate):

    MinptsCandidate = []
    for k in range(len(EpsCandidate)):
        tmp_eps = EpsCandidate[k]
        tmp_count = 0
        for i in range(len(DistMatrix)):
            for j in range(len(DistMatrix[i])):
                if DistMatrix[i][j] <= tmp_eps:
                    tmp_count = tmp_count + 1
        MinptsCandidate.append(tmp_count/len(dataSet))
    return MinptsCandidate


def returnClusterNumberList(dataset,EpsCandidate,MinptsCandidate):

    np_dataset = np.array(dataset)
    ClusterNumberList = []
    for i in range(len(EpsCandidate)):
        clustering = DBSCAN(eps= EpsCandidate[i],min_samples= MinptsCandidate[i]).fit(np_dataset)
        num_clustering = max(clustering.labels_)
        ClusterNumberList.append(num_clustering)
    return ClusterNumberList

if __name__ == '__main__':
    data = pd.read_csv('/Users/Desktop/Mic/recorder_test1/New folder/MFCCresultsforclustering/MFCCresultsforclustering.csv')
    dataSet = data.iloc[:,0:13].values
    EpsCandidate = returnEpsCandidate(dataSet)
    DistMatrix = CalculateDistMatrix(dataSet)
    MinptsCandidate = returnMinptsCandidate(DistMatrix,EpsCandidate)
    ClusterNumberList = returnClusterNumberList(dataSet,MinptsCandidate)
    print(EpsCandidate)
    print(MinptsCandidate)
    print('cluster number list is')
    print(ClusterNumberList)

但是，带有加载数据集的输出为所有[-1]。我想知道错误在哪里。我适合这个总体方向吗？如果没有，如何实现自适应DBSCAN集群？

解决方法

暂无找到可以解决该程序问题的有效方法，小编努力寻找整理中！

如果你已经找到好的解决方法，欢迎将解决方案带上本链接一起发送给小编。

小编邮箱:dio#foxmail.com (将#修改为@）

cluster-analysis dbscan machine-learning python

自适应DBSCAN成就

问题描述

解决方法

相关问答