Every line of 'from xgboost import xgbclassifier' code snippets is scanned for vulnerabilities by our powerful machine learning engine that combs millions of open source libraries, ensuring your Python code is secure.
129 def test_xgboost_multiclass(): 130 try: 131 import xgboost 132 except Exception as e: 133 print("Skipping test_xgboost_multiclass!") 134 return 135 import shap 136 137 # train XGBoost model 138 X, Y = shap.datasets.iris() 139 model = xgboost.XGBClassifier(objective="binary:logistic", max_depth=4) 140 model.fit(X, Y) 141 142 # explain the model's predictions using SHAP values (use pred_contrib in LightGBM) 143 shap_values = shap.TreeExplainer(model).shap_values(X) 144 145 # ensure plot works for first class 146 shap.dependence_plot(0, shap_values[0], X, show=False)
27 def train_model(self, x_train, y_train, x_val, y_val): 28 print("Will train XGB for {} rounds, RandomSeed: {}".format(self.rounds, self.params['seed'])) 29 30 xg_train = xgb.DMatrix(x_train, label=y_train) 31 32 if y_val is None: 33 watchlist = [(xg_train, 'train')] 34 model = xgb.train(self.params, xg_train, self.rounds, watchlist) 35 else: 36 early_stop = self.rounds if self.early_stop == 0 else self.early_stop 37 xg_val = xgb.DMatrix(x_val, label=y_val) 38 watchlist = [(xg_train, 'train'), (xg_val, 'eval')] 39 model = xgb.train(self.params, xg_train, self.rounds, watchlist, early_stopping_rounds=early_stop) 40 41 self.steps = model.best_iteration 42 return model
28 def train_model(self, train_x, train_y): 29 """ 30 train a xgboost model 31 :param train_x: 32 :param train_y: 33 :return: 34 """ 35 self.xgb_clf = xgb.XGBClassifier() 36 self.xgb_clf.fit(train_x, train_y, eval_metric=self.xgb_eval_metric, 37 eval_set=[(train_x, train_y)]) 38 xgb_eval_result = self.xgb_clf.evals_result() 39 print('Xgb train eval result:', xgb_eval_result) 40 41 train_x_mat = DMatrix(train_x) 42 # get boost tree leaf info 43 train_xgb_pred_mat = self.xgb_clf.get_booster().predict(train_x_mat, 44 pred_leaf=True) 45 # begin one-hot encoding 46 self.one_hot_encoder = OneHotEncoder() 47 train_lr_feature_mat = self.one_hot_encoder.fit_transform(train_xgb_pred_mat) 48 print('train_mat:', train_lr_feature_mat.shape) 49 50 # lr 51 self.lr_clf = LogisticRegression() 52 self.lr_clf.fit(train_lr_feature_mat, train_y) 53 self.init = True 54 55 # dump xgboost+lr model 56 with open(self.xgb_model_name, 'wb') as f1, open(self.lr_model_name, 'wb') as f2, \ 57 open(self.one_hot_model_name, 'wb') as f3: 58 pickle.dump(self.xgb_clf, f1, True) 59 pickle.dump(self.lr_clf, f2, True) 60 pickle.dump(self.one_hot_encoder, f3, True)
22 def train_model(self, train_x, train_y): 23 """ 24 use Feature vector 25 :param train_x: 26 :param train_y: 27 :return: 28 """ 29 self.clf = xgb.XGBClassifier() 30 self.clf.fit(train_x, train_y, eval_metric=self.eval_metric, 31 eval_set=[(train_x, train_y)]) 32 self.init = True 33 evals_result = self.clf.evals_result() 34 print('evals_result:', evals_result) 35 with open(self.xgb_model_name, 'wb')as f: 36 pickle.dump(self.clf, f, True)
327 def run_xgb(train_X, test_X, train_y, test_y, depth=6, a=0.0, l=1.5, seed=0): 328 param = {'max_depth':depth, 'num_round':20, 'eta':0.3, 'silent':1, 329 'objective':'binary:logistic', 'eval_metric':['auc', 'error'], 330 'alpha': a, 'lambda':l } 331 if seed != 0: # specific random seed entered 332 param['seed'] = seed 333 param['colsample_bytree'] = 0.5 334 param['colsample_bylevel'] = 0.5 335 train_xgb = xgb.DMatrix(train_X, label=train_y) 336 test_xgb = xgb.DMatrix(test_X, label=test_y) 337 bst = xgb.train(param, train_xgb) 338 ypred = bst.predict(test_xgb) 339 metrics = gen_eval_metrics(test_y, ypred) 340 accuracy = metrics[0] 341 342 #cor = sum([int(ypred[i] + 0.5) == test_y[i] for i in range(len(ypred))]) 343 #accuracy = cor / len(test_y) 344 print('Fold accuracy: ' + str(accuracy)) 345 return metrics
32 def get_xgb(**kwargs): 33 grid = { 34 #'colsample_bytree': [0.0005, 0.001, 0.002, 0.005, 0.01, 0.02, 35 # 0.05], 36 'colsample_bytree': [0.001, 0.002, 0.005, 0.01, 0.02, 0.05, 0.1, 0.2], 37 #'colsample_bytree': [0.1, 0.2, 0.3, 0.5], 38 #'colsample_bytree': [0.1, 0.2, 0.5], 39 #'max_depth': [2, 3, 4], 40 'learning_rate': [0.1], 41 'n_estimators': [100], 42 'seed': np.arange(kwargs.pop('n_iter', 1)) * 10 + 1, 43 } 44 args = { 45 'subsample': 0.5, 46 'colsample_bytree': 0.2, 47 'learning_rate': 0.1, 48 'seed': 99, 49 'n_estimators': 100, 50 'max_depth': 3, 51 #'silent': False, 52 } 53 args.update(kwargs) 54 pprint.pprint(args) 55 p = Pipeline([ 56 ('scale', StandardScaler()), 57 ('fit', XGBRegressor(**args)) 58 ]) 59 return p, {'fit__' + k: v for k, v in grid.items()}
612 def xgb_for_te(on_tr,te): 613 train = on_tr.copy() 614 test = te.copy() 615 616 train_y = train['label'].values 617 train_x = train.drop(['user_id','label'],axis=1).values 618 test_x = test.drop(['user_id'],axis=1).values 619 620 dtrain = xgb.DMatrix(train_x, label=train_y) 621 dtest = xgb.DMatrix(test_x) 622 623 # 模型参数 624 params = {'booster': 'gbtree', 625 'objective':'binary:logistic', 626 'eval_metric' : 'error', 627 'eta': 0.03, 628 'max_depth': 6, # 4 3 629 'colsample_bytree': 0.8,#0.8 630 'subsample': 0.8, 631 'scale_pos_weight': 1, 632 'min_child_weight': 14 # 2 3 633 } 634 # 训练 635 bst = xgb.train(params, dtrain, num_boost_round=240) 636 # 预测 637 predict = bst.predict(dtest) 638 test_xy = test[['user_id']] 639 test_xy['predicted_score'] = predict 640 test_xy.sort_values(['predicted_score'],ascending = False,inplace = True) 641 # 返回 642 return test_xy
407 def predict(self, data, output_margin=False, ntree_limit=None, validate_features=True): 408 """ 409 Predict with `data`. 410 411 .. note:: This function is not thread safe. 412 413 For each booster object, predict can only be called from one thread. 414 If you want to run prediction using multiple thread, call ``xgb.copy()`` to make copies 415 of model object and then call ``predict()``. 416 417 .. note:: Using ``predict()`` with DART booster 418 419 If the booster object is DART type, ``predict()`` will perform dropouts, i.e. only 420 some of the trees will be evaluated. This will produce incorrect results if ``data`` is 421 not the training data. To obtain correct results on test sets, set ``ntree_limit`` to 422 a nonzero value, e.g. 423 424 .. code-block:: python 425 426 preds = bst.predict(dtest, ntree_limit=num_round) 427 428 Parameters 429 ---------- 430 data : numpy.array/scipy.sparse 431 Data to predict with 432 output_margin : bool 433 Whether to output the raw untransformed margin value. 434 ntree_limit : int 435 Limit number of trees in the prediction; defaults to best_ntree_limit if defined 436 (i.e. it has been trained with early stopping), otherwise 0 (use all trees). 437 validate_features : bool 438 When this is True, validate that the Booster's and data's feature_names are identical. 439 Otherwise, it is assumed that the feature_names are the same. 440 Returns 441 ------- 442 prediction : numpy array 443 """ 444 # pylint: disable=missing-docstring,invalid-name 445 test_dmatrix = DMatrix(data, missing=self.missing, nthread=self.n_jobs) 446 # get ntree_limit to use - if none specified, default to 447 # best_ntree_limit if defined, otherwise 0. 448 if ntree_limit is None: 449 ntree_limit = getattr(self, "best_ntree_limit", 0) 450 return self.get_booster().predict(test_dmatrix, 451 output_margin=output_margin, 452 ntree_limit=ntree_limit, 453 validate_features=validate_features)