Python sklearn.cluster 模块-SpectralClustering() 实例源码

Python sklearn.cluster 模块,SpectralClustering() 实例源码

我们从Python开源项目中,提取了以下29代码示例,用于说明如何使用sklearn.cluster.SpectralClustering()

项目:CS-SMAF    作者:brian-cleary    | 项目源码 | 文件源码
def compare_clusters(X,Y,method='spectral',s=10000):
    A = (X/np.linalg.norm(X,axis=0)).T
    A[np.isnan(A)] = 0
    B = (Y/np.linalg.norm(Y,axis=0)).T
    B[np.isnan(B)] = 0
    random_samples = np.zeros(A.shape[0],dtype=np.bool)
    random_samples[:min(s,A.shape[0])] = True
    np.random.shuffle(random_samples)
    A = A[random_samples]
    B = B[random_samples]
    dA = 1 - A.dot(A.T)
    dA = np.exp(-dA**2/2.)
    dB = 1 - B.dot(B.T)
    dB = np.exp(-dB**2/2.)
    del A,B
    if method == 'spectral':
        n = max(5,min(30,X.shape[1]/50))
        lA = SpectralClustering(n_clusters=n,affinity='precomputed').fit_predict(dA)
        lB = SpectralClustering(n_clusters=n,affinity='precomputed').fit_predict(dB)
    elif method == 'ap':
        lA = AffinityPropagation(affinity='precomputed').fit_predict(dA)
        lB = AffinityPropagation(affinity='precomputed').fit_predict(dB)
    return adjusted_mutual_info_score(lA,lB)
项目:lol-category    作者:vonum    | 项目源码 | 文件源码
def spectral(data):
  spectral = SpectralClustering(
      eigen_solver='arpack',
      affinity='rbf',
      assign_labels='discretize'
  ).fit(data)

  print 'Spectral'
  print collections.Counter(spectral.labels_)
  print metrics.silhouette_score(data, spectral.labels_)

  reduced_data = reduce_with_pca(data, 2)
  plot_2d_data(reduced_data, spectral.labels_)
项目:Deep-subspace-clustering-networks    作者:panji1990    | 项目源码 | 文件源码
def post_proC(C, K, d, alpha):
    # C: coefficient matrix,K: number of clusters,d: dimension of each subspace
    C = 0.5*(C + C.T)
    r = d*K + 1
    U, S, _ = svds(C,r,v0 = np.ones(C.shape[0]))
    U = U[:,::-1]    
    S = np.sqrt(S[::-1])
    S = np.diag(S)    
    U = U.dot(S)    
    U = normalize(U, norm='l2', axis = 1)       
    Z = U.dot(U.T)
    Z = Z * (Z>0)    
    L = np.abs(Z ** alpha) 
    L = L/L.max()   
    L = 0.5 * (L + L.T)    
    spectral = cluster.SpectralClustering(n_clusters=K, eigen_solver='arpack', affinity='precomputed',assign_labels='discretize')
    spectral.fit(L)
    grp = spectral.fit_predict(L) + 1
    return grp, L
项目:Deep-subspace-clustering-networks    作者:panji1990    | 项目源码 | 文件源码
def post_proC(C,d: dimension of each subspace
    C = 0.5*(C + C.T)
    r = min(d*K + 1, C.shape[0]-1)      
    U, L
项目:Deep-subspace-clustering-networks    作者:panji1990    | 项目源码 | 文件源码
def post_proC(C,d: dimension of each subspace
    n = C.shape[0]
    C = 0.5*(C + C.T)    
    C = C - np.diag(np.diag(C)) + np.eye(n,n) # for sparse C,this step will make the algorithm more numerically stable
    r = d*K + 1     
    U,v0 = np.ones(n))
    U = U[:,::-1] 
    S = np.sqrt(S[::-1])
    S = np.diag(S)
    U = U.dot(S)
    U = normalize(U, axis = 1)  
    Z = U.dot(U.T)
    Z = Z * (Z>0)
    L = np.abs(Z ** alpha)
    L = L/L.max()
    L = 0.5 * (L + L.T) 
    spectral = cluster.SpectralClustering(n_clusters=K, assign_labels='discretize')
    spectral.fit(L)
    grp = spectral.fit_predict(L) + 1
    return grp, L
项目:Deep-subspace-clustering-networks    作者:panji1990    | 项目源码 | 文件源码
def post_proC(C,d: dimension of each subspace
    C = 0.5*(C + C.T)
    r = d*K + 1 
    U,assign_labels='discretize')
    spectral.fit(L)
    grp = spectral.fit_predict(L) + 1 
    return grp, L
项目:word2vec_pipeline    作者:NIHOPA    | 项目源码 | 文件源码
def compute_Meta_centroid_set(self, C):
        print("Intermediate clusters", C.shape)

        # By eye,it looks like the top 60%-80% of the
        # remaining clusters are stable...

        nc = int(self.subcluster_pcut * self.subcluster_kn)
        clf = SpectralClustering(n_clusters=nc, affinity="precomputed")

        S = cosine_affinity(C)
        labels = clf.fit_predict(S)

        Meta_clusters = []
        Meta_cluster_size = []
        for i in range(labels.max() + 1):
            idx = labels == i
            mu = C[idx].mean(axis=0)
            mu /= np.linalg.norm(mu)
            Meta_clusters.append(mu)
            Meta_cluster_size.append(idx.sum())

        return np.array(Meta_clusters)
项目:DTW_physionet2016    作者:JJGO    | 项目源码 | 文件源码
def cluster(aff_matrix, records, n_clusters, medoid_indexes):
    Cluster = SpectralClustering(n_clusters=n_clusters, affinity='precomputed')
    labels = Cluster.fit_predict(aff_matrix)

    medoid_indexes = medoid_indexes.loc[records]

    t_records = []
    indexes = []

    for i in range(n_clusters):
        labels_i = np.where(labels == i)[0]
        sub_aff_matrix = aff_matrix[labels_i, :][:, labels_i]
        medoid_index   = np.argmax(np.prod(sub_aff_matrix, axis=0))
        absolute_index = labels_i[medoid_index]
        r = medoid_indexes.index[absolute_index]
        t_records.append(r)
        i = medoid_indexes.iloc[absolute_index].values[0]
        indexes.append(i)
    return t_records, indexes
项目:deep_share    作者:luyongxi    | 项目源码 | 文件源码
def _clusteraffinity(aff, k, imdb, cls_idx):
    """ Cluster error correlation matrix using spectral clustering into k cluster,
        show the class labels in each cluster. 
    """
    # clustering model
    spectral = SpectralClustering(n_clusters=k,
                                  eigen_solver='arpack',
                                  affinity="precomputed")
    print 'Performing clustering...'
    labels = spectral.fit_predict(aff)

    # print out all labels
    for i in xrange(k):
        find_idx = np.where(labels==i)[0]
        print 'The list of classes in cluster {}'.format(i)
        print [imdb.classes[id] for id in find_idx]
        print '--------------------------------------------'

    return labels

if __name__ == '__main__':
    # Todo: debug code if necessary

    pass
项目:eezzy    作者:3Blades    | 项目源码 | 文件源码
def cluster_spectral(X_train, model_args=None, gridsearch=True):
    from sklearn.cluster import SpectralClustering
    print('SpectralClustering')

    if gridsearch is True:
        ## Todo:
        # add hyperparamter searching. No scoring method available for this model,
        # so we can't easily use gridsearching.

        raise NotImplementedError('No hyperparameter optimization available yet for this model. Set gridsearch to False')
        # prune(param_grid,model_args)
    else:
        if 'n_clusters' not in model_args:
            raise KeyError('Need to define n_clusters for SpectralClustering')
        param_grid = None

    return ModelWrapper(SpectralClustering, X=X_train, model_args=model_args, param_grid=param_grid, unsupervised=True)
项目:VASC    作者:wang-research    | 项目源码 | 文件源码
def clustering( points, k=2,name='kmeans'):
    '''
    points: N_samples * N_features
    k: number of clusters
    '''
    if name == 'kmeans':
        kmeans = KMeans( n_clusters=k,n_init=100 ).fit(points)
        ## print within_variance
        #cluster_distance = kmeans.transform( points )
        #within_variance = sum( np.min(cluster_distance,axis=1) ) / float( points.shape[0] )
        #print("AvgWithinSS:"+str(within_variance))
        if len( np.unique(kmeans.labels_) ) > 1: 
            si = silhouette_score( points,kmeans.labels_ )
            #print("Silhouette:"+str(si))
        else:
            si = 0
            print("Silhouette:"+str(si))
        return kmeans.labels_,si

    if name == 'spec':
        spec= SpectralClustering( n_clusters=k,affinity='cosine' ).fit( points )
        si = silhouette_score( points,spec.labels_ )
        print("Silhouette:"+str(si))
        return spec.labels_,si
项目:contrastive    作者:abidlabs    | 项目源码 | 文件源码
def find_spectral_alphas(self, n_alphas, max_log_alpha, n_alphas_to_return):
        self.create_affinity_matrix(max_log_alpha, n_alphas)
        affinity = self.affinity_matrix
        spectral = cluster.SpectralClustering(n_clusters=n_alphas_to_return, affinity='precomputed')
        alphas = np.concatenate(([0],np.logspace(-1,max_log_alpha,n_alphas)))
        spectral.fit(affinity)        
        labels = spectral.labels_
        best_alphas = list()
        for i in range(n_alphas_to_return):
            idx = np.where(labels==i)[0]
            if not(0 in idx): #because we don't want to include the cluster that includes alpha=0
                affinity_submatrix = affinity[idx][:, idx]
                sum_affinities = np.sum(affinity_submatrix, axis=0)
                exemplar_idx = idx[np.argmax(sum_affinities)]
                best_alphas.append(alphas[exemplar_idx])
        return np.sort(best_alphas), alphas, affinity[0,:], labels
项目:contrastive    作者:abidlabs    | 项目源码 | 文件源码
def find_spectral_alphas(self, labels
项目:dcss_single_cell    作者:srmcc    | 项目源码 | 文件源码
def spectral(k,D, rs):
    """
    From clustering_on_transcript_compatibility_counts,see github for MIT license
    """
    if D[1,1] < 1: D = 1-D # Convert distance to similarity matrix
    spectral = cluster.SpectralClustering(n_clusters=k,affinity='precomputed', random_state=rs)
    spectral.fit(D)
    labels = spectral.labels_
    return labels

# gets max weight matching of a biparetite graph with row_label x column_label
# (weights are given by weight_matrix)
项目:word2vec_pipeline    作者:NIHOPA    | 项目源码 | 文件源码
def spectral_clustering(S, X, config):
    '''
    Computes spectral clustering from an input similarity matrix.
    Returns the labels associated with the clustering.
    '''
    from sklearn.cluster import SpectralClustering

    nk = int(config["n_clusters"])
    clf = SpectralClustering(affinity='cosine', n_clusters=nk)
    return clf.fit_predict(X)
项目:word2vec_pipeline    作者:NIHOPA    | 项目源码 | 文件源码
def compute_centroid_set(self):

        INPUT_ITR = subset_iterator(
            X=self.docv,
            m=self.subcluster_m,
            repeats=self.subcluster_repeats,
        )

        kn = self.subcluster_kn
        clf = SpectralClustering(
            n_clusters=kn,
            affinity="precomputed",
        )

        C = []

        for X in INPUT_ITR:
            # Remove any rows that have zero vectors
            bad_row_idx = ((X**2).sum(axis=1) == 0)

            X = X[~bad_row_idx]
            A = cosine_affinity(X)

            # "Force" symmetry due to rounding errors
            A = np.maximum( A, A.transpose() )

            labels = clf.fit_predict(A)

            # Compute the centroids
            (N, dim) = X.shape
            centroids = np.zeros((kn, dim))

            for i in range(kn):
                idx = labels == i
                mu = X[idx].mean(axis=0)
                mu /= np.linalg.norm(mu)
                centroids[i] = mu

            C.append(centroids)

        return np.vstack(C)
项目:w2vec-similarity    作者:jayantj    | 项目源码 | 文件源码
def spectral_clustering_clusters(similarity_matrix):
  return SpectralClustering(n_clusters=10, affinity='precomputed').fit(similarity_matrix)
项目:icing    作者:slipguru    | 项目源码 | 文件源码
def __init__(self, n_clusters=8, eigen_solver=None, random_state=None,
                 n_init=10, gamma=1., affinity='rbf', n_neighbors=10,
                 eigen_tol=0.0, assign_labels='kmeans', degree=3, coef0=1,
                 kernel_params=None, norm_laplacian=True):
        super(SpectralClustering, self).__init__(
            n_clusters=n_clusters, eigen_solver=eigen_solver,
            random_state=random_state, n_init=n_init, gamma=gamma,
            affinity=affinity, n_neighbors=n_neighbors, eigen_tol=eigen_tol,
            assign_labels=assign_labels, degree=degree, coef0=coef0,
            kernel_params=kernel_params)
        self.norm_laplacian = norm_laplacian
项目:TextStageProcessor    作者:mhyhre    | 项目源码 | 文件源码
def make_spectral_clustering(self, short_filenames, input_texts):

        output_dir = self.output_dir + 'spectral/'
        if not os.path.exists(output_dir):
            os.makedirs(output_dir)

        if self.need_tf_idf:
            self.signals.PrintInfo.emit("?????? TF-IDF...")
            idf_filename = output_dir + 'tf_idf.csv'
            msg = self.calculate_and_write_tf_idf(idf_filename, input_texts)
            self.signals.PrintInfo.emit(msg)

        vectorizer = CountVectorizer()
        X = vectorizer.fit_transform(input_texts)

        svd = TruncatedSVD(2)
        normalizer = normalizer(copy=False)
        lsa = make_pipeline(svd, normalizer)
        X = lsa.fit_transform(X)

        spectral = SpectralClustering(n_clusters=self.spectral_clusters_count)
        predict_result = spectral.fit_predict(X)
        self.signals.PrintInfo.emit('\n??????? ?? ??????????:\n')

        clasters_output = ''
        for claster_index in range(max(predict_result) + 1):
            clasters_output += ('??????? ' + str(claster_index) + ':\n')
            for predict, document in zip(predict_result, short_filenames):
                if predict == claster_index:
                    clasters_output += ('  ' + str(document) + '\n')
            clasters_output += '\n'
        self.signals.PrintInfo.emit(clasters_output)
        self.signals.PrintInfo.emit('????????? ?:' + str(output_dir + 'clusters.txt'))
        writeStringToFile(clasters_output, output_dir + 'clusters.txt')

        self.draw_clusters_plot(X, predict_result, short_filenames)

    # aa = Affinity Propagation
项目:context_predictive_words    作者:Cogitans    | 项目源码 | 文件源码
def SpectralAccuracy():
    clusterer = SpectralClustering(n_clusters=2)
    tdm = pickle.load(open(DATASET_PATH + "BOW_TDIDF.p", "rb"))
    predictions = clusterer.fit_predict(tdm)
    true_labels = pickle.load(open(OUTFILE_STANCE, "rb"))[0]
    numerical_mapped_1 = [0 if i == "Israeli" else 1 for i in true_labels]
    numerical_mapped_2 = [1 if i == "Israeli" else 0 for i in true_labels]
    one = f1_score(numerical_mapped_1, predictions)
    two = f1_score(numerical_mapped_2, predictions)
    print("The F1 score of Spectral Clustering on BOW (w/Tdidf) is: " + str(max(one, two)))
项目:aid    作者:cvjena    | 项目源码 | 文件源码
def split(self, node):

        # Perform normalized cut
        try:
            ind = SpectralClustering(2, affinity = 'precomputed', assign_labels = 'discretize').fit_predict(node['affinity'])
        except KeyboardInterrupt:
            raise
        except:
            return None, None, 0

        # Create left and right node
        mask1, mask2 = (ind == 0), (ind == 1)
        if not (np.any(mask1) and np.any(mask2)):
            return None, 0
        left = { 'depth' : node['depth'] + 1, 'height' : 0, 'size' : 0, 'leafs' : 1, 'children' : [], 'parent' : node, 'items' : [f for i, f in enumerate(node['items']) if ind[i] == 0], 'affinity' : node['affinity'][np.ix_(mask1, mask1)] }
        right = { 'depth' : node['depth'] + 1, f in enumerate(node['items']) if ind[i] == 1], 'affinity' : node['affinity'][np.ix_(mask2, mask2)] }

        # Force the node with the lower minimum distance to the query to be the left node
        if ind[0] == 1: # items are already sorted when passed to fit(),so we just need to look at the first item instead of re-computing all distances
            left, right = right, left

        # Modify parent
        node['children'] = [left, right]

        # Modify parent chain
        parent = node
        while parent is not None:
            parent['height'] += 1
            parent['size'] += 2
            parent['leafs'] += 1
            parent = parent['parent']

        return left, right, self.ncut_value(node['affinity'], ind)
项目:MasterDegree    作者:Waszker    | 项目源码 | 文件源码
def _get_spectral(parameters):
    if parameters is None:
        parameters = {
            'n_clusters': 2,
            'affinity': 'nearest_neighbors'
        }
    return SpectralClustering(**parameters)
项目:Parallel-SGD    作者:angadgill    | 项目源码 | 文件源码
def test_spectral_clustering():
    S = np.array([[1.0, 1.0, 0.2, 0.0, 0.0],
                  [1.0,
                  [0.2, 1.0],
                  [0.0, 1.0]])

    for eigen_solver in ('arpack', 'lobpcg'):
        for assign_labels in ('kmeans', 'discretize'):
            for mat in (S, sparse.csr_matrix(S)):
                model = SpectralClustering(random_state=0, n_clusters=2,
                                           affinity='precomputed',
                                           eigen_solver=eigen_solver,
                                           assign_labels=assign_labels
                                          ).fit(mat)
                labels = model.labels_
                if labels[0] == 0:
                    labels = 1 - labels

                assert_array_equal(labels, [1, 1, 0, 0])

                model_copy = loads(dumps(model))
                assert_equal(model_copy.n_clusters, model.n_clusters)
                assert_equal(model_copy.eigen_solver, model.eigen_solver)
                assert_array_equal(model_copy.labels_, model.labels_)
项目:Parallel-SGD    作者:angadgill    | 项目源码 | 文件源码
def test_spectral_amg_mode():
    # Test the amg mode of SpectralClustering
    centers = np.array([
        [0., 0., 0.],
        [10., 10., 10.],
        [20., 20., 20.],
    ])
    X, true_labels = make_blobs(n_samples=100, centers=centers,
                                cluster_std=1., random_state=42)
    D = pairwise_distances(X)  # distance matrix
    S = np.max(D) - D  # Similarity matrix
    S = sparse.coo_matrix(S)
    try:
        from pyamg import smoothed_aggregation_solver

        amg_loaded = True
    except ImportError:
        amg_loaded = False
    if amg_loaded:
        labels = spectral_clustering(S, n_clusters=len(centers),
                                     random_state=0, eigen_solver="amg")
        # We don't care too much that it's good,just that it *worked*.
        # There does have to be some lower limit on the performance though.
        assert_greater(np.mean(labels == true_labels), .3)
    else:
        assert_raises(ValueError, spectral_embedding,
                      n_components=len(centers),
                      random_state=0, eigen_solver="amg")
项目:Parallel-SGD    作者:angadgill    | 项目源码 | 文件源码
def test_spectral_unkNown_mode():
    # Test that SpectralClustering fails with an unkNown mode set.
    centers = np.array([
        [0., random_state=42)
    D = pairwise_distances(X)  # distance matrix
    S = np.max(D) - D  # Similarity matrix
    S = sparse.coo_matrix(S)
    assert_raises(ValueError, spectral_clustering,
                  random_state=0, eigen_solver="<unkNown>")
项目:Parallel-SGD    作者:angadgill    | 项目源码 | 文件源码
def test_spectral_unkNown_assign_labels():
    # Test that SpectralClustering fails with an unkNown assign_labels set.
    centers = np.array([
        [0., assign_labels="<unkNown>")
项目:Parallel-SGD    作者:angadgill    | 项目源码 | 文件源码
def test_spectral_clustering_sparse():
    X, y = make_blobs(n_samples=20, random_state=0,
                      centers=[[1, 1], [-1, -1]], cluster_std=0.01)

    S = rbf_kernel(X, gamma=1)
    S = np.maximum(S - 1e-4, 0)
    S = sparse.coo_matrix(S)

    labels = SpectralClustering(random_state=0,
                                affinity='precomputed').fit(S).labels_
    assert_equal(adjusted_rand_score(y, labels), 1)
项目:extract    作者:dblalock    | 项目源码 | 文件源码
def makeSpectral(X=None, k=2):
    return cluster.SpectralClustering(n_clusters=k,
                                      eigen_solver='arpack',
                                      affinity="nearest_neighbors")
项目:extract    作者:dblalock    | 项目源码 | 文件源码
def makeClusterers(X, k=2):
    return [('MiniBatchKMeans', makeKMeans(X, k)),
            ('AffinityPropagation', makeAffinityProp()),
            ('MeanShift', makeMeanShift(X)),
            ('SpectralClustering', makeSpectral(X,
            ('Ward', makeWard(X,
            ('AgglomerativeAvg', makeAvgLinkage(X,
            ('AgglomerativeMax', makeMaxLinkage(X,
            ('AgglomerativeWard', makeWardLinkage(X,
            ('DBSCAN', makeDBScan())]

相关文章

Python setuptools.dep_util 模块,newer_pairwise_group() ...
Python chainer.utils.type_check 模块,eval() 实例源码 我...
Python chainer.utils.type_check 模块,prod() 实例源码 我...
Python chainer.utils.type_check 模块,expect() 实例源码 ...
Python multiprocessing.managers 模块,BaseProxy() 实例源...
Python multiprocessing.managers 模块,RemoteError() 实例...