Every line of 'decisiontreeclassifier' code snippets is scanned for vulnerabilities by our powerful machine learning engine that combs millions of open source libraries, ensuring your Python code is secure.
36 def decisiontree(data): 37 Xt = [] 38 Yt = [] 39 Xv = [] 40 Yv = [] 41 # Adds 90% of the data to the trainingsset, 10% to the validationset. 42 np.random.shuffle(data) 43 trainingsize = 0.9 * len(data) 44 training = data[:int(trainingsize)] 45 validation = data[int(trainingsize):] 46 47 # Creates the X and Y parts of the training and test sets. 48 # Also fills the tree species list (classes) with all different species. 49 for line in training: 50 if line[-1] not in classes: 51 classes.append(line[-1]) 52 Xt.append(line[0:-1]) 53 Yt.append(line[-1]) 54 for line in validation: 55 if line[-1] not in classes: 56 return decisiontree(data) 57 Xv.append(line[0:-1]) 58 Yv.append(line[-1]) 59 60 clf = tree.DecisionTreeClassifier() 61 clf = clf.fit(Xt, Yt) 62 return clf, Xt, Yt, Xv, Yv
520 def train(self, num_classes=2, categorical_features=None, max_depth=5): 521 categorical_features = categorical_features or {} 522 model = DecisionTree.trainClassifier( 523 self._labeled_feature_vector_rdd(), 524 numClasses=num_classes, 525 categoricalFeaturesInfo=categorical_features, 526 maxDepth=max_depth) 527 return DecisionTreeModel(model, self.feature_cols)
71 def _create_tree(self, X, Y, feature_name): 72 if len(set(Y)) == 1: 73 return Y[0] 74 75 most_common_Y = Counter(Y).most_common()[0][0] 76 if X.shape[1] == 1 and X.columns[0] == '__target__': 77 return most_common_Y 78 79 best_feature, best_info_gain = self._get_best_feature(X, Y) 80 if best_feature is None: 81 return most_common_Y 82 83 feature_name.remove(best_feature) 84 self._shannon.setdefault(best_feature, [best_info_gain, 1]) 85 subColumn = X.columns 86 subColumn.remove(best_feature) 87 subX = X[subColumn] 88 subtree = {'???': most_common_Y} 89 feature_column = X[best_feature] 90 91 for value in set(feature_column): 92 equal_value_index = [i for i, _ in enumerate(feature_column) if _ == value] 93 x = subX[equal_value_index] 94 y = subX[equal_value_index]['__target__'] 95 subtree[value] = self._create_tree(x, y, deepcopy(feature_name)) 96 return {best_feature: subtree}
183 def predict(self, X): 184 predictions = np.zeros(X.shape[0]) 185 for i, observation in enumerate(X): 186 predictions[i] = self.single_prediction(observation, self.root) 187 return predictions
180 def train_sgd_forest(X, Y, 181 num_trees = 20, 182 max_depth = 3, 183 bagging_percent=0.65, 184 randomize_alpha=False, 185 model_args = {}, 186 tree_args= {}): 187 """A random forest whose base classifier is a tree of SGD classifiers 188 189 Parameters 190 ---------- 191 X : numpy array containing input data. 192 Should have samples for rows and features for columns. 193 194 Y : numpy array containing class labels for each sample 195 196 num_trees : how big is the forest? 197 198 bagging_percent : what subset of the data is each tree trained on? 199 200 randomize_alpha : bool 201 202 model_args : parameters for each SGD classifier 203 204 tree_args : parameters for each tree 205 """ 206 bagsize = bagging_percent * X.shape[0] 207 tree = mk_sgd_tree(bagsize, max_depth, randomize_alpha, model_args, tree_args) 208 forest = ClassifierEnsemble( 209 base_model = tree, 210 num_models = num_trees, 211 bagging_percent = bagging_percent) 212 forest.fit(X,Y) 213 return forest