Every line of 'xgboost classifier sklearn' code snippets is scanned for vulnerabilities by our powerful machine learning engine that combs millions of open source libraries, ensuring your Python code is secure.
27 def train_model(self, x_train, y_train, x_val, y_val): 28 print("Will train XGB for {} rounds, RandomSeed: {}".format(self.rounds, self.params['seed'])) 29 30 xg_train = xgb.DMatrix(x_train, label=y_train) 31 32 if y_val is None: 33 watchlist = [(xg_train, 'train')] 34 model = xgb.train(self.params, xg_train, self.rounds, watchlist) 35 else: 36 early_stop = self.rounds if self.early_stop == 0 else self.early_stop 37 xg_val = xgb.DMatrix(x_val, label=y_val) 38 watchlist = [(xg_train, 'train'), (xg_val, 'eval')] 39 model = xgb.train(self.params, xg_train, self.rounds, watchlist, early_stopping_rounds=early_stop) 40 41 self.steps = model.best_iteration 42 return model
28 def train_model(self, train_x, train_y): 29 """ 30 train a xgboost model 31 :param train_x: 32 :param train_y: 33 :return: 34 """ 35 self.xgb_clf = xgb.XGBClassifier() 36 self.xgb_clf.fit(train_x, train_y, eval_metric=self.xgb_eval_metric, 37 eval_set=[(train_x, train_y)]) 38 xgb_eval_result = self.xgb_clf.evals_result() 39 print('Xgb train eval result:', xgb_eval_result) 40 41 train_x_mat = DMatrix(train_x) 42 # get boost tree leaf info 43 train_xgb_pred_mat = self.xgb_clf.get_booster().predict(train_x_mat, 44 pred_leaf=True) 45 # begin one-hot encoding 46 self.one_hot_encoder = OneHotEncoder() 47 train_lr_feature_mat = self.one_hot_encoder.fit_transform(train_xgb_pred_mat) 48 print('train_mat:', train_lr_feature_mat.shape) 49 50 # lr 51 self.lr_clf = LogisticRegression() 52 self.lr_clf.fit(train_lr_feature_mat, train_y) 53 self.init = True 54 55 # dump xgboost+lr model 56 with open(self.xgb_model_name, 'wb') as f1, open(self.lr_model_name, 'wb') as f2, \ 57 open(self.one_hot_model_name, 'wb') as f3: 58 pickle.dump(self.xgb_clf, f1, True) 59 pickle.dump(self.lr_clf, f2, True) 60 pickle.dump(self.one_hot_encoder, f3, True)
41 def fit_gradient_boosting(features_train, labels_train): 42 """ 43 Fit a gradient boosting algorithm and use cross validation to tune the hyperparameters 44 45 :return: classifier 46 """ 47 param_grid = { 48 'min_samples_split': [100, 250, 500], 49 'max_depth': [3, 4, 5] 50 } 51 52 clf = GradientBoostingClassifier(n_estimators=500, learning_rate=.1, random_state=42, verbose=2) 53 54 print("Fitting Gradient Boosting Classifier") 55 56 # Tune hyperparameters 57 cv_clf = GridSearchCV(estimator=clf, param_grid=param_grid, cv=10) 58 59 # Fit classifier 60 cv_clf.fit(features_train, labels_train) 61 62 print("\nGradient Boosting Classifier:", cv_clf) 63 64 # Save model 65 pickle.dump(cv_clf, open("gmb_multiclass_xg.pkl", 'wb')) 66 67 return cv_clf
129 def test_xgboost_multiclass(): 130 try: 131 import xgboost 132 except Exception as e: 133 print("Skipping test_xgboost_multiclass!") 134 return 135 import shap 136 137 # train XGBoost model 138 X, Y = shap.datasets.iris() 139 model = xgboost.XGBClassifier(objective="binary:logistic", max_depth=4) 140 model.fit(X, Y) 141 142 # explain the model's predictions using SHAP values (use pred_contrib in LightGBM) 143 shap_values = shap.TreeExplainer(model).shap_values(X) 144 145 # ensure plot works for first class 146 shap.dependence_plot(0, shap_values[0], X, show=False)
22 def train_model(self, train_x, train_y): 23 """ 24 use Feature vector 25 :param train_x: 26 :param train_y: 27 :return: 28 """ 29 self.clf = xgb.XGBClassifier() 30 self.clf.fit(train_x, train_y, eval_metric=self.eval_metric, 31 eval_set=[(train_x, train_y)]) 32 self.init = True 33 evals_result = self.clf.evals_result() 34 print('evals_result:', evals_result) 35 with open(self.xgb_model_name, 'wb')as f: 36 pickle.dump(self.clf, f, True)
131 def predict(self, features, labels=None): 132 preds = self.model.predict(xgb.DMatrix(features, label=labels), ntree_limit=self.model.best_ntree_limit) 133 return preds
407 def predict(self, data, output_margin=False, ntree_limit=None, validate_features=True): 408 """ 409 Predict with `data`. 410 411 .. note:: This function is not thread safe. 412 413 For each booster object, predict can only be called from one thread. 414 If you want to run prediction using multiple thread, call ``xgb.copy()`` to make copies 415 of model object and then call ``predict()``. 416 417 .. note:: Using ``predict()`` with DART booster 418 419 If the booster object is DART type, ``predict()`` will perform dropouts, i.e. only 420 some of the trees will be evaluated. This will produce incorrect results if ``data`` is 421 not the training data. To obtain correct results on test sets, set ``ntree_limit`` to 422 a nonzero value, e.g. 423 424 .. code-block:: python 425 426 preds = bst.predict(dtest, ntree_limit=num_round) 427 428 Parameters 429 ---------- 430 data : numpy.array/scipy.sparse 431 Data to predict with 432 output_margin : bool 433 Whether to output the raw untransformed margin value. 434 ntree_limit : int 435 Limit number of trees in the prediction; defaults to best_ntree_limit if defined 436 (i.e. it has been trained with early stopping), otherwise 0 (use all trees). 437 validate_features : bool 438 When this is True, validate that the Booster's and data's feature_names are identical. 439 Otherwise, it is assumed that the feature_names are the same. 440 Returns 441 ------- 442 prediction : numpy array 443 """ 444 # pylint: disable=missing-docstring,invalid-name 445 test_dmatrix = DMatrix(data, missing=self.missing, nthread=self.n_jobs) 446 # get ntree_limit to use - if none specified, default to 447 # best_ntree_limit if defined, otherwise 0. 448 if ntree_limit is None: 449 ntree_limit = getattr(self, "best_ntree_limit", 0) 450 return self.get_booster().predict(test_dmatrix, 451 output_margin=output_margin, 452 ntree_limit=ntree_limit, 453 validate_features=validate_features)
160 def predict_two_class(self, data_x, y=None): 161 # predict the probability of two classes 162 prediction_output = two_class_encoding(self.predict(data_x, y)) 163 164 return prediction_output