5 examples of 'decisiontreeclassifier' in Python

Every line of 'decisiontreeclassifier' code snippets is scanned for vulnerabilities by our powerful machine learning engine that combs millions of open source libraries, ensuring your Python code is secure.

All examples are scanned by Snyk Code

By copying the Snyk Code Snippets you agree to
36def decisiontree(data):
37 Xt = []
38 Yt = []
39 Xv = []
40 Yv = []
41 # Adds 90% of the data to the trainingsset, 10% to the validationset.
42 np.random.shuffle(data)
43 trainingsize = 0.9 * len(data)
44 training = data[:int(trainingsize)]
45 validation = data[int(trainingsize):]
46
47 # Creates the X and Y parts of the training and test sets.
48 # Also fills the tree species list (classes) with all different species.
49 for line in training:
50 if line[-1] not in classes:
51 classes.append(line[-1])
52 Xt.append(line[0:-1])
53 Yt.append(line[-1])
54 for line in validation:
55 if line[-1] not in classes:
56 return decisiontree(data)
57 Xv.append(line[0:-1])
58 Yv.append(line[-1])
59
60 clf = tree.DecisionTreeClassifier()
61 clf = clf.fit(Xt, Yt)
62 return clf, Xt, Yt, Xv, Yv
520def train(self, num_classes=2, categorical_features=None, max_depth=5):
521 categorical_features = categorical_features or {}
522 model = DecisionTree.trainClassifier(
523 self._labeled_feature_vector_rdd(),
524 numClasses=num_classes,
525 categoricalFeaturesInfo=categorical_features,
526 maxDepth=max_depth)
527 return DecisionTreeModel(model, self.feature_cols)
71def _create_tree(self, X, Y, feature_name):
72 if len(set(Y)) == 1:
73 return Y[0]
74
75 most_common_Y = Counter(Y).most_common()[0][0]
76 if X.shape[1] == 1 and X.columns[0] == '__target__':
77 return most_common_Y
78
79 best_feature, best_info_gain = self._get_best_feature(X, Y)
80 if best_feature is None:
81 return most_common_Y
82
83 feature_name.remove(best_feature)
84 self._shannon.setdefault(best_feature, [best_info_gain, 1])
85 subColumn = X.columns
86 subColumn.remove(best_feature)
87 subX = X[subColumn]
88 subtree = {'???': most_common_Y}
89 feature_column = X[best_feature]
90
91 for value in set(feature_column):
92 equal_value_index = [i for i, _ in enumerate(feature_column) if _ == value]
93 x = subX[equal_value_index]
94 y = subX[equal_value_index]['__target__']
95 subtree[value] = self._create_tree(x, y, deepcopy(feature_name))
96 return {best_feature: subtree}
183def predict(self, X):
184 predictions = np.zeros(X.shape[0])
185 for i, observation in enumerate(X):
186 predictions[i] = self.single_prediction(observation, self.root)
187 return predictions
180def train_sgd_forest(X, Y,
181 num_trees = 20,
182 max_depth = 3,
183 bagging_percent=0.65,
184 randomize_alpha=False,
185 model_args = {},
186 tree_args= {}):
187 """A random forest whose base classifier is a tree of SGD classifiers
188
189 Parameters
190 ----------
191 X : numpy array containing input data.
192 Should have samples for rows and features for columns.
193
194 Y : numpy array containing class labels for each sample
195
196 num_trees : how big is the forest?
197
198 bagging_percent : what subset of the data is each tree trained on?
199
200 randomize_alpha : bool
201
202 model_args : parameters for each SGD classifier
203
204 tree_args : parameters for each tree
205 """
206 bagsize = bagging_percent * X.shape[0]
207 tree = mk_sgd_tree(bagsize, max_depth, randomize_alpha, model_args, tree_args)
208 forest = ClassifierEnsemble(
209 base_model = tree,
210 num_models = num_trees,
211 bagging_percent = bagging_percent)
212 forest.fit(X,Y)
213 return forest

Related snippets