用于python中的XGBoostRegressor参数调整的贝叶斯优化

时间:2018-08-05 15:20:50

标签: python optimization bayesian xgboost

我正在尝试执行贝叶斯优化来优化XGBoost Regressor的参数,遵循以下代码:

def xgboostcv(max_depth,
              learning_rate,
              n_estimators,
              gamma,
              min_child_weight,
              max_delta_step,
              subsample,
              colsample_bytree,
              reg_alpha,
              reg_lambda,
              silent=True,
              nthread=-1,
              random_state=1):
    return cross_val_score(xgb.XGBRegressor(max_depth=int(max_depth),
                                             learning_rate=learning_rate,
                                             n_estimators=int(n_estimators),
                                             silent=silent,
                                             nthread=nthread,
                                             gamma=gamma,
                                             min_child_weight=min_child_weight,
                                             max_delta_step=max_delta_step,
                                             subsample=subsample,
                                             colsample_bytree=colsample_bytree,
                                             reg_alpha=reg_alpha,
                                             reg_lambda = reg_lambda),
                           train,
                           y_train,
                           "mean_squared_error",
                           cv=5).mean()

xgboostBO = BayesianOptimization(xgboostcv,
                                 {'max_depth': (2, 5),
                                  'learning_rate': (0.01, 0.3),
                                  'n_estimators': (1000, 2500),
                                  'gamma': (1., 0.01),
                                  'min_child_weight': (1, 10),
                                  'max_delta_step': (0, 0.1),
                                  'subsample': (0.5, 0.8),
                                  'colsample_bytree' :(0.1, 0.99),
                                  'reg_alpha':(0.1, 0.5),
                                  'reg_lambda':(0.1, 0.9)
                                  })

xgboostBO.maximize()

print('Final Results')
print('XGBOOST: %f' % xgboostBO.res['max']['max_val'])

此优化代替GridSearchCV和RandomizedSearchCV用于查找学习模型的最佳参数。查找最佳参数时,它必须优于GridSearchCV。但是我收到一个错误,无法找出问题所在。有人有建议吗?:

Initialization
--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
 Step |   Time |      Value |   colsample_bytree |     gamma |   learning_rate |   max_delta_step |   max_depth |   min_child_weight |   n_estimators |   reg_alpha |   reg_lambda |   subsample | 
---------------------------------------------------------------------------
ValueError                                Traceback (most recent call last)
<ipython-input-90-397f04e73c8f> in <module>()
     12                                   })
     13 
---> 14 xgboostBO.maximize()
     15 
     16 print('Final Results')

/usr/local/lib/python2.7/dist-packages/bayes_opt/bayesian_optimization.pyc in maximize(self, init_points, n_iter, acq, kappa, xi, **gp_params)
    241             if self.verbose:
    242                 self.plog.print_header()
--> 243             self.init(init_points)
    244 
    245         y_max = self.space.Y.max()

/usr/local/lib/python2.7/dist-packages/bayes_opt/bayesian_optimization.pyc in init(self, init_points)
     87         # Evaluate target function at all initialization points
     88         for x in self.init_points:
---> 89             y = self._observe_point(x)
     90 
     91         # Add the points from `self.initialize` to the observations

/usr/local/lib/python2.7/dist-packages/bayes_opt/bayesian_optimization.pyc in _observe_point(self, x)
    102 
    103     def _observe_point(self, x):
--> 104         y = self.space.observe_point(x)
    105         if self.verbose:
    106             self.plog.print_step(x, y)

/usr/local/lib/python2.7/dist-packages/bayes_opt/target_space.pyc in observe_point(self, x)
    137             # measure the target function
    138             params = dict(zip(self.keys, x))
--> 139             y = self.target_func(**params)
    140             self.add_observation(x, y)
    141         return y

<ipython-input-89-3d7f26b78f91> in xgboostcv(max_depth, learning_rate, n_estimators, gamma, min_child_weight, max_delta_step, subsample, colsample_bytree, reg_alpha, reg_lambda, silent, nthread, random_state)
     27                            y_train,
     28                            "root_mean_squared_error",
---> 29                            cv=5).mean()

/usr/local/lib/python2.7/dist-packages/sklearn/model_selection/_validation.pyc in cross_val_score(estimator, X, y, groups, scoring, cv, n_jobs, verbose, fit_params, pre_dispatch)
    340                                 n_jobs=n_jobs, verbose=verbose,
    341                                 fit_params=fit_params,
--> 342                                 pre_dispatch=pre_dispatch)
    343     return cv_results['test_score']
    344 

/usr/local/lib/python2.7/dist-packages/sklearn/model_selection/_validation.pyc in cross_validate(estimator, X, y, groups, scoring, cv, n_jobs, verbose, fit_params, pre_dispatch, return_train_score)
    190 
    191     """
--> 192     X, y, groups = indexable(X, y, groups)
    193 
    194     cv = check_cv(cv, y, classifier=is_classifier(estimator))

/usr/local/lib/python2.7/dist-packages/sklearn/utils/validation.pyc in indexable(*iterables)
    227         else:
    228             result.append(np.array(X))
--> 229     check_consistent_length(*result)
    230     return result
    231 

/usr/local/lib/python2.7/dist-packages/sklearn/utils/validation.pyc in check_consistent_length(*arrays)
    202     if len(uniques) > 1:
    203         raise ValueError("Found input variables with inconsistent numbers of"
--> 204                          " samples: %r" % [int(l) for l in lengths])
    205 
    206 

ValueError: Found input variables with inconsistent numbers of samples: [1456, 1456, 23]

1 个答案:

答案 0 :(得分:0)

从代码中删除“ mean_squared_error”