8 examples of 'xgboost classifier sklearn' in Python

Every line of 'xgboost classifier sklearn' code snippets is scanned for vulnerabilities by our powerful machine learning engine that combs millions of open source libraries, ensuring your Python code is secure.

All examples are scanned by Snyk Code

By copying the Snyk Code Snippets you agree to
27def train_model(self, x_train, y_train, x_val, y_val):
28 print("Will train XGB for {} rounds, RandomSeed: {}".format(self.rounds, self.params['seed']))
29
30 xg_train = xgb.DMatrix(x_train, label=y_train)
31
32 if y_val is None:
33 watchlist = [(xg_train, 'train')]
34 model = xgb.train(self.params, xg_train, self.rounds, watchlist)
35 else:
36 early_stop = self.rounds if self.early_stop == 0 else self.early_stop
37 xg_val = xgb.DMatrix(x_val, label=y_val)
38 watchlist = [(xg_train, 'train'), (xg_val, 'eval')]
39 model = xgb.train(self.params, xg_train, self.rounds, watchlist, early_stopping_rounds=early_stop)
40
41 self.steps = model.best_iteration
42 return model
28def train_model(self, train_x, train_y):
29 """
30 train a xgboost model
31 :param train_x:
32 :param train_y:
33 :return:
34 """
35 self.xgb_clf = xgb.XGBClassifier()
36 self.xgb_clf.fit(train_x, train_y, eval_metric=self.xgb_eval_metric,
37 eval_set=[(train_x, train_y)])
38 xgb_eval_result = self.xgb_clf.evals_result()
39 print('Xgb train eval result:', xgb_eval_result)
40
41 train_x_mat = DMatrix(train_x)
42 # get boost tree leaf info
43 train_xgb_pred_mat = self.xgb_clf.get_booster().predict(train_x_mat,
44 pred_leaf=True)
45 # begin one-hot encoding
46 self.one_hot_encoder = OneHotEncoder()
47 train_lr_feature_mat = self.one_hot_encoder.fit_transform(train_xgb_pred_mat)
48 print('train_mat:', train_lr_feature_mat.shape)
49
50 # lr
51 self.lr_clf = LogisticRegression()
52 self.lr_clf.fit(train_lr_feature_mat, train_y)
53 self.init = True
54
55 # dump xgboost+lr model
56 with open(self.xgb_model_name, 'wb') as f1, open(self.lr_model_name, 'wb') as f2, \
57 open(self.one_hot_model_name, 'wb') as f3:
58 pickle.dump(self.xgb_clf, f1, True)
59 pickle.dump(self.lr_clf, f2, True)
60 pickle.dump(self.one_hot_encoder, f3, True)
41def fit_gradient_boosting(features_train, labels_train):
42 """
43 Fit a gradient boosting algorithm and use cross validation to tune the hyperparameters
44
45 :return: classifier
46 """
47 param_grid = {
48 'min_samples_split': [100, 250, 500],
49 'max_depth': [3, 4, 5]
50 }
51
52 clf = GradientBoostingClassifier(n_estimators=500, learning_rate=.1, random_state=42, verbose=2)
53
54 print("Fitting Gradient Boosting Classifier")
55
56 # Tune hyperparameters
57 cv_clf = GridSearchCV(estimator=clf, param_grid=param_grid, cv=10)
58
59 # Fit classifier
60 cv_clf.fit(features_train, labels_train)
61
62 print("\nGradient Boosting Classifier:", cv_clf)
63
64 # Save model
65 pickle.dump(cv_clf, open("gmb_multiclass_xg.pkl", 'wb'))
66
67 return cv_clf
129def test_xgboost_multiclass():
130 try:
131 import xgboost
132 except Exception as e:
133 print("Skipping test_xgboost_multiclass!")
134 return
135 import shap
136
137 # train XGBoost model
138 X, Y = shap.datasets.iris()
139 model = xgboost.XGBClassifier(objective="binary:logistic", max_depth=4)
140 model.fit(X, Y)
141
142 # explain the model's predictions using SHAP values (use pred_contrib in LightGBM)
143 shap_values = shap.TreeExplainer(model).shap_values(X)
144
145 # ensure plot works for first class
146 shap.dependence_plot(0, shap_values[0], X, show=False)
22def train_model(self, train_x, train_y):
23 """
24 use Feature vector
25 :param train_x:
26 :param train_y:
27 :return:
28 """
29 self.clf = xgb.XGBClassifier()
30 self.clf.fit(train_x, train_y, eval_metric=self.eval_metric,
31 eval_set=[(train_x, train_y)])
32 self.init = True
33 evals_result = self.clf.evals_result()
34 print('evals_result:', evals_result)
35 with open(self.xgb_model_name, 'wb')as f:
36 pickle.dump(self.clf, f, True)
131def predict(self, features, labels=None):
132 preds = self.model.predict(xgb.DMatrix(features, label=labels), ntree_limit=self.model.best_ntree_limit)
133 return preds
407def predict(self, data, output_margin=False, ntree_limit=None, validate_features=True):
408 """
409 Predict with `data`.
410
411 .. note:: This function is not thread safe.
412
413 For each booster object, predict can only be called from one thread.
414 If you want to run prediction using multiple thread, call ``xgb.copy()`` to make copies
415 of model object and then call ``predict()``.
416
417 .. note:: Using ``predict()`` with DART booster
418
419 If the booster object is DART type, ``predict()`` will perform dropouts, i.e. only
420 some of the trees will be evaluated. This will produce incorrect results if ``data`` is
421 not the training data. To obtain correct results on test sets, set ``ntree_limit`` to
422 a nonzero value, e.g.
423
424 .. code-block:: python
425
426 preds = bst.predict(dtest, ntree_limit=num_round)
427
428 Parameters
429 ----------
430 data : numpy.array/scipy.sparse
431 Data to predict with
432 output_margin : bool
433 Whether to output the raw untransformed margin value.
434 ntree_limit : int
435 Limit number of trees in the prediction; defaults to best_ntree_limit if defined
436 (i.e. it has been trained with early stopping), otherwise 0 (use all trees).
437 validate_features : bool
438 When this is True, validate that the Booster's and data's feature_names are identical.
439 Otherwise, it is assumed that the feature_names are the same.
440 Returns
441 -------
442 prediction : numpy array
443 """
444 # pylint: disable=missing-docstring,invalid-name
445 test_dmatrix = DMatrix(data, missing=self.missing, nthread=self.n_jobs)
446 # get ntree_limit to use - if none specified, default to
447 # best_ntree_limit if defined, otherwise 0.
448 if ntree_limit is None:
449 ntree_limit = getattr(self, "best_ntree_limit", 0)
450 return self.get_booster().predict(test_dmatrix,
451 output_margin=output_margin,
452 ntree_limit=ntree_limit,
453 validate_features=validate_features)
160def predict_two_class(self, data_x, y=None):
161 # predict the probability of two classes
162 prediction_output = two_class_encoding(self.predict(data_x, y))
163
164 return prediction_output

Related snippets