检索数据的 ValueError DataFrame.dtypes 必须是 int、float 或 bool

问题描述

我正在尝试使用多输出回归使用 train XGBoost 运行 Python 代码，但出现值错误。感谢您的帮助。

请找到我的数据样本

Layers  Model  Technique    Accuracy-1  Accuracy-2  Latency    time
18-27   Net     1           0.96         0.99       334368.0    0.99
38-37   MNet    1           0.76         0.99       313348.0    0.99

以下是我使用 XGBoost 的代码

def optimize(trial,x,y,regressor):
  max_depth = trial.suggest_int("max_depth",3,30)
  n_estimators = trial.suggest_int("n_estimators",100,3000)
  max_leaves= trial.suggest_int("max_leaves",1,10)
  colsample_bytree = trial.suggest_uniform('colsample_bytree',0.0,1.0) 
  gamma = trial.suggest_uniform('gamma',0.05)  
  min_child_weight = trial.suggest_uniform('min_child_weight',3)
  reg_lambda = trial.suggest_uniform('reg_lambda',0.5,1)
  model = xgb.XGBRegressor(
    objective ='reg:squarederror',n_estimators=n_estimators,max_depth=max_depth,learning_rate=learning_rate,colsample_bytree=colsample_bytree,gamma=gamma,min_child_weight=min_child_weight,reg_lambda=reg_lambda,max_leaves=max_leaves)
  kf=model_selection.KFold(n_splits=5)
  error=[]
  for idx in kf.split(X=x,y=y):
    train_idx,test_idx= idx[0],idx[1]
    xtrain=x[train_idx]
    ytrain=y[train_idx]
    xtest=x[test_idx]
    ytest=y[test_idx]   
    model.fit(x,y)
    y_pred = model.predict(xtest)
    fold_err = metrics.mean_squared_error(ytest,y_pred)
    error.append(fold_err)
  return np.mean(error)

def optimize_xgb(X,y):
  list_of_y = ["Target 1","Target 2","Target 3","Target 4"]
  for i,m in zip(range(y.shape[1]),list_of_y):
    print("{} optimized Parameters on MSE Error".format(m))
    optimization_function = partial(optimize,x=X,y=y[:,i],regressor="random_forest")
    study = optuna.create_study(direction="minimize")
    study.optimize(optimization_function,n_trials=1)
data["Latency"] = minmax_scale(data["Latency"])  
X = data[["Layers ","Model"]]
Y = data[['Accuracy-1','Accuracy-2','Latency','time ']]
encoder = OneHotEncoder(sparse=False)
onehot = encoder.fit_transform(X)
X_encoded  = encoder.fit_transform(X)
X_train,X_test,y_train,y_test = train_test_split(
np.array(X_encoded),np.array(Y),test_size=0.3,random_state=42)
def modeling(X,max_depth=10,n_estimators=300,max_leaves=10,learning_rate=0.01,colsample_bytree=0.001,gamma=0.0001,min_child_weight=2,reg_lambda=0.3):
  
    model = xgb.XGBRegressor(objective='reg:squarederror',max_leaves=max_leaves,colsample_bytree=colsample_bytree)
  if y.shape[1] ==1:
    print(" Apply Xgboost for one single Target....\n")
    model_xgb = model.fit(X,y)
  else:
    print(" Apply Xgboost for {} Targets....".format(y.shape[1]))
    model_xgb = MOR(model).fit(X,y)
  cv = RepeatedKFold(n_splits=5,n_repeats=3,random_state=1)
  scores = []
  for i in range(y.shape[1]):
    scores.append(np.abs(cross_val_score(model,X,y[:,scoring='neg_mean_squared_error',cv=cv,n_jobs=-1)))
    print('Mean MSE of the {} target : {}  ({})'.format(i,scores[i].mean(),scores[i].std()) )
  return model_xgb
model_xgb = modeling(X_train,optimize="no")
y_estimated = model_xgb.predict(X_test)
mse(y_estimated,y_test)
################
y = np.random.random((1000,1))
model_xgb = modeling(X,optimize="no")

错误检索 ValueError：数据的 DataFrame.dtypes 必须是 int、float 或 bool。没想到字段 Layers、Model 中的数据类型

解决方法

暂无找到可以解决该程序问题的有效方法，小编努力寻找整理中！

如果你已经找到好的解决方法，欢迎将解决方案带上本链接一起发送给小编。

小编邮箱:dio#foxmail.com (将#修改为@）

categorical-data data-science machine-learning python xgboost