8 examples of 'from xgboost import xgbclassifier' in Python

Every line of 'from xgboost import xgbclassifier' code snippets is scanned for vulnerabilities by our powerful machine learning engine that combs millions of open source libraries, ensuring your Python code is secure.

All examples are scanned by Snyk Code

By copying the Snyk Code Snippets you agree to
129def test_xgboost_multiclass():
130 try:
131 import xgboost
132 except Exception as e:
133 print("Skipping test_xgboost_multiclass!")
134 return
135 import shap
136
137 # train XGBoost model
138 X, Y = shap.datasets.iris()
139 model = xgboost.XGBClassifier(objective="binary:logistic", max_depth=4)
140 model.fit(X, Y)
141
142 # explain the model's predictions using SHAP values (use pred_contrib in LightGBM)
143 shap_values = shap.TreeExplainer(model).shap_values(X)
144
145 # ensure plot works for first class
146 shap.dependence_plot(0, shap_values[0], X, show=False)
27def train_model(self, x_train, y_train, x_val, y_val):
28 print("Will train XGB for {} rounds, RandomSeed: {}".format(self.rounds, self.params['seed']))
29
30 xg_train = xgb.DMatrix(x_train, label=y_train)
31
32 if y_val is None:
33 watchlist = [(xg_train, 'train')]
34 model = xgb.train(self.params, xg_train, self.rounds, watchlist)
35 else:
36 early_stop = self.rounds if self.early_stop == 0 else self.early_stop
37 xg_val = xgb.DMatrix(x_val, label=y_val)
38 watchlist = [(xg_train, 'train'), (xg_val, 'eval')]
39 model = xgb.train(self.params, xg_train, self.rounds, watchlist, early_stopping_rounds=early_stop)
40
41 self.steps = model.best_iteration
42 return model
28def train_model(self, train_x, train_y):
29 """
30 train a xgboost model
31 :param train_x:
32 :param train_y:
33 :return:
34 """
35 self.xgb_clf = xgb.XGBClassifier()
36 self.xgb_clf.fit(train_x, train_y, eval_metric=self.xgb_eval_metric,
37 eval_set=[(train_x, train_y)])
38 xgb_eval_result = self.xgb_clf.evals_result()
39 print('Xgb train eval result:', xgb_eval_result)
40
41 train_x_mat = DMatrix(train_x)
42 # get boost tree leaf info
43 train_xgb_pred_mat = self.xgb_clf.get_booster().predict(train_x_mat,
44 pred_leaf=True)
45 # begin one-hot encoding
46 self.one_hot_encoder = OneHotEncoder()
47 train_lr_feature_mat = self.one_hot_encoder.fit_transform(train_xgb_pred_mat)
48 print('train_mat:', train_lr_feature_mat.shape)
49
50 # lr
51 self.lr_clf = LogisticRegression()
52 self.lr_clf.fit(train_lr_feature_mat, train_y)
53 self.init = True
54
55 # dump xgboost+lr model
56 with open(self.xgb_model_name, 'wb') as f1, open(self.lr_model_name, 'wb') as f2, \
57 open(self.one_hot_model_name, 'wb') as f3:
58 pickle.dump(self.xgb_clf, f1, True)
59 pickle.dump(self.lr_clf, f2, True)
60 pickle.dump(self.one_hot_encoder, f3, True)
22def train_model(self, train_x, train_y):
23 """
24 use Feature vector
25 :param train_x:
26 :param train_y:
27 :return:
28 """
29 self.clf = xgb.XGBClassifier()
30 self.clf.fit(train_x, train_y, eval_metric=self.eval_metric,
31 eval_set=[(train_x, train_y)])
32 self.init = True
33 evals_result = self.clf.evals_result()
34 print('evals_result:', evals_result)
35 with open(self.xgb_model_name, 'wb')as f:
36 pickle.dump(self.clf, f, True)
327def run_xgb(train_X, test_X, train_y, test_y, depth=6, a=0.0, l=1.5, seed=0):
328 param = {'max_depth':depth, 'num_round':20, 'eta':0.3, 'silent':1,
329 'objective':'binary:logistic', 'eval_metric':['auc', 'error'],
330 'alpha': a, 'lambda':l }
331 if seed != 0: # specific random seed entered
332 param['seed'] = seed
333 param['colsample_bytree'] = 0.5
334 param['colsample_bylevel'] = 0.5
335 train_xgb = xgb.DMatrix(train_X, label=train_y)
336 test_xgb = xgb.DMatrix(test_X, label=test_y)
337 bst = xgb.train(param, train_xgb)
338 ypred = bst.predict(test_xgb)
339 metrics = gen_eval_metrics(test_y, ypred)
340 accuracy = metrics[0]
341
342 #cor = sum([int(ypred[i] + 0.5) == test_y[i] for i in range(len(ypred))])
343 #accuracy = cor / len(test_y)
344 print('Fold accuracy: ' + str(accuracy))
345 return metrics
32def get_xgb(**kwargs):
33 grid = {
34 #'colsample_bytree': [0.0005, 0.001, 0.002, 0.005, 0.01, 0.02,
35 # 0.05],
36 'colsample_bytree': [0.001, 0.002, 0.005, 0.01, 0.02, 0.05, 0.1, 0.2],
37 #'colsample_bytree': [0.1, 0.2, 0.3, 0.5],
38 #'colsample_bytree': [0.1, 0.2, 0.5],
39 #'max_depth': [2, 3, 4],
40 'learning_rate': [0.1],
41 'n_estimators': [100],
42 'seed': np.arange(kwargs.pop('n_iter', 1)) * 10 + 1,
43 }
44 args = {
45 'subsample': 0.5,
46 'colsample_bytree': 0.2,
47 'learning_rate': 0.1,
48 'seed': 99,
49 'n_estimators': 100,
50 'max_depth': 3,
51 #'silent': False,
52 }
53 args.update(kwargs)
54 pprint.pprint(args)
55 p = Pipeline([
56 ('scale', StandardScaler()),
57 ('fit', XGBRegressor(**args))
58 ])
59 return p, {'fit__' + k: v for k, v in grid.items()}
612def xgb_for_te(on_tr,te):
613 train = on_tr.copy()
614 test = te.copy()
615
616 train_y = train['label'].values
617 train_x = train.drop(['user_id','label'],axis=1).values
618 test_x = test.drop(['user_id'],axis=1).values
619
620 dtrain = xgb.DMatrix(train_x, label=train_y)
621 dtest = xgb.DMatrix(test_x)
622
623 # 模型参数
624 params = {'booster': 'gbtree',
625 'objective':'binary:logistic',
626 'eval_metric' : 'error',
627 'eta': 0.03,
628 'max_depth': 6, # 4 3
629 'colsample_bytree': 0.8,#0.8
630 'subsample': 0.8,
631 'scale_pos_weight': 1,
632 'min_child_weight': 14 # 2 3
633 }
634 # 训练
635 bst = xgb.train(params, dtrain, num_boost_round=240)
636 # 预测
637 predict = bst.predict(dtest)
638 test_xy = test[['user_id']]
639 test_xy['predicted_score'] = predict
640 test_xy.sort_values(['predicted_score'],ascending = False,inplace = True)
641 # 返回
642 return test_xy
407def predict(self, data, output_margin=False, ntree_limit=None, validate_features=True):
408 """
409 Predict with `data`.
410
411 .. note:: This function is not thread safe.
412
413 For each booster object, predict can only be called from one thread.
414 If you want to run prediction using multiple thread, call ``xgb.copy()`` to make copies
415 of model object and then call ``predict()``.
416
417 .. note:: Using ``predict()`` with DART booster
418
419 If the booster object is DART type, ``predict()`` will perform dropouts, i.e. only
420 some of the trees will be evaluated. This will produce incorrect results if ``data`` is
421 not the training data. To obtain correct results on test sets, set ``ntree_limit`` to
422 a nonzero value, e.g.
423
424 .. code-block:: python
425
426 preds = bst.predict(dtest, ntree_limit=num_round)
427
428 Parameters
429 ----------
430 data : numpy.array/scipy.sparse
431 Data to predict with
432 output_margin : bool
433 Whether to output the raw untransformed margin value.
434 ntree_limit : int
435 Limit number of trees in the prediction; defaults to best_ntree_limit if defined
436 (i.e. it has been trained with early stopping), otherwise 0 (use all trees).
437 validate_features : bool
438 When this is True, validate that the Booster's and data's feature_names are identical.
439 Otherwise, it is assumed that the feature_names are the same.
440 Returns
441 -------
442 prediction : numpy array
443 """
444 # pylint: disable=missing-docstring,invalid-name
445 test_dmatrix = DMatrix(data, missing=self.missing, nthread=self.n_jobs)
446 # get ntree_limit to use - if none specified, default to
447 # best_ntree_limit if defined, otherwise 0.
448 if ntree_limit is None:
449 ntree_limit = getattr(self, "best_ntree_limit", 0)
450 return self.get_booster().predict(test_dmatrix,
451 output_margin=output_margin,
452 ntree_limit=ntree_limit,
453 validate_features=validate_features)

Related snippets