问题描述
我正在应用内核主成分分析 (KPCA) 来降低特征矩阵集的维数,以获得一组数据点。我浏览了 scikit 学习包中 KPCA 中使用的参数,并了解到如果选择其中之一,则有些参数应该可以工作(例如,如果选择了伽马,则不使用度数和系数),但是,当我尝试要将 hyperopt 包用于 KPCA,我不断收到以下错误:
错误信息:
ValueError: Precomputed metric requires shape (n_queries,n_indexed). Got (50,14) for 50 indexed.
我一直在尝试解决这个问题,但是,我一直收到这个错误。有人可以解释这个错误的原因和可能的解决方案吗?请找到以下代码:
代码:
from sklearn.decomposition import PCA,KernelPCA,SparsePCA,IncrementalPCA
from hyperopt import hp,tpe,atpe,fmin,Trials,rand,STATUS_OK
# Implementing Hyperparamater method:
models = {'pca' : PCA,'kpca' : KernelPCA,'spca' : SparsePCA,# 'ipca' : IncrementalPCA
}
def search_space(model):
# Initialising variables:
model = model.lower()
space = {}
# Calling the models:
if model == 'pca':
space = {'svd_solver' : hp.choice('svd_solver',["auto","full","arpack","randomized"]),}
elif model == 'kpca':
space = {'kernel' : hp.choice('kernel',['linear','poly','rbf','sigmoid','cosine','precomputed']),'gamma' : hp.choice('gamma',np.linspace(1,0.1,12)),'degree' : hp.choice('degree',np.linspace(3,10,8)),'coef0' : hp.choice('coef0',12))
}
elif model == 'spca':
space = {'alpha' : hp.choice('alpha',np.arange(1.0,15.0,0.2)),'ridge_alpha' : hp.choice('ridge_alpha',np.linspace(0.01,0.3,30)),'method' : hp.choice('method',['lars','cd']),'max_iter' : hp.choice('max_iter',[1000,1500,2000,2500,3000])
}
# elif model == 'ipca':
# space = {'batch_size' : hp.choice('batch_size',['gini','entropy']),# }
space['model'] = model
return space
def obj_fnc(params):
model = params.get('model').lower()
# X_ = scale_normalize(params,X[:])
del params['model']
clf = models[model](**params)
return (get_acc_status(clf,X))
def get_acc_status(clf,X):
X_reduced = clf.fit_transform(X)
# X_prereduced = clf.fit_inverse_transform(X_reduced)
# acc = -1 * mean_squared_error(X,X_prereduced)
X_prereduced = clf.inverse_transform(X_reduced)
# acc = -1 * mean_absolute_error(X,X_prereduced)
acc = -1 * r2_score(X,X_prereduced)
# acc = cross_val_score(clf,X).mean()
return {'loss': -acc,'status': STATUS_OK}
##### Hyperparameter optimisation:
# Running Bayesian Optimisation to get the best parameters:
start = time.time()
# Create the algorithms
tpe_algo = tpe.suggest
# rand_algo = rand.suggest
# atpe_algo = atpe.suggest
# Assigning model:
model = 'kpca'
# Creating the trial objects:
hypopt_trials = Trials()
# Getting the best parameters:
best_params = fmin(obj_fnc,search_space(model),algo=tpe_algo,max_evals=500,trials=hypopt_trials)
print("Best params: ",best_params)
print('Best accuracy: ',hypopt_trials.best_trial['result']['loss'])
print("[INFO] Baye. Opt. search took {:.2f} seconds".format(time.time() - start))
# Calling parameters:
## PCA:
svd_solver = ["auto","randomized"]
## KPCA:
kernel = ["linear","poly","rbf","sigmoid","cosine","precomputed"]
gamma = np.linspace(1,12)
degree = np.linspace(3,8)
coef0 = np.linspace(1,12)
kernel_gamma = ["poly","sigmoid"]
kernel_degree = "poly"
kernel_coef0 = "sigmoid"
## SPCA:
alpha = np.arange(1.0,0.2)
ridge_alpha = np.linspace(0.01,30)
method = ['lars','cd']
max_iter = [1000,3000]
# Creating the PCA models:
# pca = PCA(n_components=2,svd_solver=svd_solver[best_params['svd_solver'])
if any(x in best_params for x in kernel_gamma):
pca = KernelPCA(n_components=2,kernel=kernel[best_params['kernel']],gamma='{0}'.format(gamma[best_params['gamma']]))
if any(x in best_params for x in kernel_degree):
pca = KernelPCA(n_components=2,gamma='{0}'.format(gamma[best_params['gamma']]),degree='{0}'.format(degree[best_params['degree']]),coef0='{0}'.format(coef0[best_params['coef0']]))
if any(x in best_params for x in kernel_coef0):
pca = KernelPCA(n_components=2,coef0='{0}'.format(coef0[best_params['coef0']]))
# pca = SparsePCA(n_components=2,alpha='{0}'.format(alpha[best_params['alpha']]),ridge_alpha='{0}'.format(ridge_alpha[best_params['ridge_alpha']]),method=method[best_params['method']],max_iter='{0}'.format(max_iter[best_params['max_iter']]))
# pca = IncrementalPCA(n_components=2)
print('Model: ',pca)
PrincipalComponents = pca.fit_transform(X_std)
principalDf = pd.DataFrame(data = PrincipalComponents,columns = ['principal component 1','principal component 2'])
finalDf = pd.concat([principalDf,dataframe[['Label']]],axis = 1)
print('Principal Component Analysis: ')
print(principalDf)
解决方法
暂无找到可以解决该程序问题的有效方法,小编努力寻找整理中!
如果你已经找到好的解决方法,欢迎将解决方案带上本链接一起发送给小编。
小编邮箱:dio#foxmail.com (将#修改为@)