问题描述
我正在尝试使用多输出回归使用 train XGBoost 运行 Python 代码,但出现值错误。感谢您的帮助。
请找到我的数据样本
Layers Model Technique Accuracy-1 Accuracy-2 Latency time
18-27 Net 1 0.96 0.99 334368.0 0.99
38-37 MNet 1 0.76 0.99 313348.0 0.99
以下是我使用 XGBoost 的代码
def optimize(trial,x,y,regressor):
max_depth = trial.suggest_int("max_depth",3,30)
n_estimators = trial.suggest_int("n_estimators",100,3000)
max_leaves= trial.suggest_int("max_leaves",1,10)
colsample_bytree = trial.suggest_uniform('colsample_bytree',0.0,1.0)
gamma = trial.suggest_uniform('gamma',0.05)
min_child_weight = trial.suggest_uniform('min_child_weight',3)
reg_lambda = trial.suggest_uniform('reg_lambda',0.5,1)
model = xgb.XGBRegressor(
objective ='reg:squarederror',n_estimators=n_estimators,max_depth=max_depth,learning_rate=learning_rate,colsample_bytree=colsample_bytree,gamma=gamma,min_child_weight=min_child_weight,reg_lambda=reg_lambda,max_leaves=max_leaves)
kf=model_selection.KFold(n_splits=5)
error=[]
for idx in kf.split(X=x,y=y):
train_idx,test_idx= idx[0],idx[1]
xtrain=x[train_idx]
ytrain=y[train_idx]
xtest=x[test_idx]
ytest=y[test_idx]
model.fit(x,y)
y_pred = model.predict(xtest)
fold_err = metrics.mean_squared_error(ytest,y_pred)
error.append(fold_err)
return np.mean(error)
def optimize_xgb(X,y):
list_of_y = ["Target 1","Target 2","Target 3","Target 4"]
for i,m in zip(range(y.shape[1]),list_of_y):
print("{} optimized Parameters on MSE Error".format(m))
optimization_function = partial(optimize,x=X,y=y[:,i],regressor="random_forest")
study = optuna.create_study(direction="minimize")
study.optimize(optimization_function,n_trials=1)
data["Latency"] = minmax_scale(data["Latency"])
X = data[["Layers ","Model"]]
Y = data[['Accuracy-1','Accuracy-2','Latency','time ']]
encoder = OneHotEncoder(sparse=False)
onehot = encoder.fit_transform(X)
X_encoded = encoder.fit_transform(X)
X_train,X_test,y_train,y_test = train_test_split(
np.array(X_encoded),np.array(Y),test_size=0.3,random_state=42)
def modeling(X,max_depth=10,n_estimators=300,max_leaves=10,learning_rate=0.01,colsample_bytree=0.001,gamma=0.0001,min_child_weight=2,reg_lambda=0.3):
model = xgb.XGBRegressor(objective='reg:squarederror',max_leaves=max_leaves,colsample_bytree=colsample_bytree)
if y.shape[1] ==1:
print(" Apply Xgboost for one single Target....\n")
model_xgb = model.fit(X,y)
else:
print(" Apply Xgboost for {} Targets....".format(y.shape[1]))
model_xgb = MOR(model).fit(X,y)
cv = RepeatedKFold(n_splits=5,n_repeats=3,random_state=1)
scores = []
for i in range(y.shape[1]):
scores.append(np.abs(cross_val_score(model,X,y[:,scoring='neg_mean_squared_error',cv=cv,n_jobs=-1)))
print('Mean MSE of the {} target : {} ({})'.format(i,scores[i].mean(),scores[i].std()) )
return model_xgb
model_xgb = modeling(X_train,optimize="no")
y_estimated = model_xgb.predict(X_test)
mse(y_estimated,y_test)
################
y = np.random.random((1000,1))
model_xgb = modeling(X,optimize="no")
错误检索 ValueError:数据的 DataFrame.dtypes 必须是 int、float 或 bool。 没想到字段 Layers、Model 中的数据类型
解决方法
暂无找到可以解决该程序问题的有效方法,小编努力寻找整理中!
如果你已经找到好的解决方法,欢迎将解决方案带上本链接一起发送给小编。
小编邮箱:dio#foxmail.com (将#修改为@)