Every line of 'train and test data in machine learning' code snippets is scanned for vulnerabilities by our powerful machine learning engine that combs millions of open source libraries, ensuring your Python code is secure.
234 def train(self, Train_data, Validation_data, Test_data): # fit a dataset 235 # Check Init performance 236 if self.verbose > 0: 237 t2 = time() 238 init_train = self.evaluate(Train_data) 239 init_valid = self.evaluate(Validation_data) 240 print("Init: \t train=%.4f, validation=%.4f [%.1f s]" %(init_train, init_valid, time()-t2)) 241 242 for epoch in xrange(self.epoch): 243 t1 = time() 244 self.shuffle_in_unison_scary(Train_data['X'], Train_data['Y']) 245 total_batch = int(len(Train_data['Y']) / self.batch_size) 246 for i in xrange(total_batch): 247 # generate a batch 248 batch_xs = self.get_random_block_from_data(Train_data, self.batch_size) 249 # Fit training 250 self.partial_fit(batch_xs) 251 t2 = time() 252 253 # output validation 254 train_result = self.evaluate(Train_data) 255 valid_result = self.evaluate(Validation_data) 256 257 self.train_rmse.append(train_result) 258 self.valid_rmse.append(valid_result) 259 260 if self.verbose > 0 and epoch%self.verbose == 0: 261 print("Epoch %d [%.1f s]\ttrain=%.4f, validation=%.4f [%.1f s]" 262 %(epoch+1, t2-t1, train_result, valid_result, time()-t2)) 263 if self.eva_termination(self.valid_rmse): 264 break 265 266 if self.pretrain_flag < 0: 267 print "Save model to file as pretrain." 268 self.saver.save(self.sess, self.save_file)
17 def test_fit_and_predict(self): 18 seed = 1709 19 for dataset_id in [38]: # 720 # 31,44,737 20 df = pd.read_csv("./tests/data/{0}.csv".format(dataset_id)) 21 x_cols = [c for c in df.columns if c != "target"] 22 X = df[x_cols] 23 y = df["target"] 24 25 X_train, X_test, y_train, y_test = sklearn.model_selection.train_test_split( 26 X, y, test_size=0.3, random_state=seed 27 ) 28 automl = AutoML( 29 total_time_limit=5, 30 algorithms=["RF"], # ["LightGBM", "RF", "NN", "CatBoost", "Xgboost"], 31 # start_random_models=5, 32 # hill_climbing_steps=3, 33 # top_models_to_improve=3, 34 train_ensemble=False, 35 verbose=True, 36 ) 37 automl.fit(X_train, y_train) 38 #print(json.dumps(automl.to_json(), indent=4)) 39 response = automl.predict(X_test)["p_1"] 40 # Compute the logloss on test dataset 41 ll = log_loss(y_test, response) 42 print("(*) Dataset id {} logloss {}".format(dataset_id, ll)) 43 44 for i, m in enumerate(automl._models): 45 response = m.predict(X_test) 46 ll = log_loss(y_test, response) 47 print("{}) Dataset id {} logloss {}".format(i, dataset_id, ll))
21 def train(training_pandasData, test_pandasData, label_col, feat_cols, n_trees, m_depth, 22 learning_rate, loss, training_data_path, test_data_path): 23 24 print("train: " + training_data_path) 25 print("test: " + test_data_path) 26 print("n_trees: ", n_trees) 27 print("m-depth: ", m_depth) 28 print("learning-rate: ", learning_rate) 29 print("loss: " + loss) 30 print("label-col: " + label_col) 31 for feat in feat_cols: 32 print("feat-cols: " + feat) 33 34 # Split data into training labels and testing labels. 35 trainingLabels = training_pandasData[label_col] 36 trainingFeatures = training_pandasData[feat_cols] 37 38 testLabels = test_pandasData[label_col] 39 testFeatures = test_pandasData[feat_cols] 40 41 # We will use a GBT regressor model. 42 xgbr = xgb.XGBRegressor(max_depth=m_depth, 43 learning_rate=learning_rate, 44 n_estimators=n_trees) 45 46 # Here we train the model 47 xgbr.fit(trainingFeatures, trainingLabels, eval_metric=loss) 48 49 # Calculating the scores of the model. 50 test_rmse = mean_squared_error(testLabels, xgbr.predict(testFeatures))**0.5 51 r2_score_training = xgbr.score(trainingFeatures, trainingLabels) 52 r2_score_test = xgbr.score(testFeatures, testLabels) 53 54 print("Test RMSE:", test_rmse) 55 print("Training set score:", r2_score_training) 56 print("Test set score:", r2_score_test) 57 58 # Logging the RMSE and r2 scores. 59 mlflow.log_metric("Test RMSE", test_rmse) 60 mlflow.log_metric("Train R2", r2_score_training) 61 mlflow.log_metric("Test R2", r2_score_test) 62 63 # Saving the model as an artifact. 64 sklearn.log_model(xgbr, "model") 65 66 run_id = mlflow.active_run().info.run_uuid 67 print("Run with id %s finished" % run_id)
158 def train_test(self, train_path, test_path=None): 159 # load train and (maybe) test data 160 metadata = MetaData(label_column=self.label_column, 161 train_path=train_path, 162 test_path=test_path) 163 self.num_classes = metadata.k_classes 164 self.num_features = metadata.d_features 165 166 # if necessary, cast judgment metric into its binary/multiary equivalent 167 if self.num_classes == 2: 168 if self.judgment_metric in [Metrics.F1_MICRO, Metrics.F1_MACRO]: 169 self.judgment_metric = Metrics.F1 170 elif self.judgment_metric in [Metrics.ROC_AUC_MICRO, 171 Metrics.ROC_AUC_MACRO]: 172 self.judgment_metric = Metrics.ROC_AUC 173 else: 174 if self.judgment_metric == Metrics.F1: 175 self.judgment_metric = Metrics.F1_MACRO 176 elif self.judgment_metric == Metrics.ROC_AUC: 177 self.judgment_metric = Metrics.ROC_AUC_MACRO 178 179 # load training data 180 train_data = self.load_data(train_path) 181 182 # if necessary, generate permanent train/test split 183 if test_path is not None: 184 test_data = self.load_data(test_path) 185 else: 186 train_data, test_data = train_test_split(train_data, 187 test_size=self.testing_ratio, 188 random_state=self.random_state) 189 190 # extract feature matrix and labels from raw data 191 self.encoder = DataEncoder(label_column=self.label_column) 192 X_train, y_train = self.encoder.fit_transform(train_data) 193 X_test, y_test = self.encoder.transform(test_data) 194 195 # create and cross-validate pipeline 196 self.make_pipeline() 197 cv_scores = self.cross_validate(X_train, y_train) 198 199 # train and test the final model 200 self.pipeline.fit(X_train, y_train) 201 test_scores = self.test_final_model(X_test, y_test) 202 return {'cv': cv_scores, 'test': test_scores}
53 def load_test_data(self, input_data_file=''): 54 55 if (input_data_file == ''): 56 input_data_file = os.path.normpath(os.path.join(os.path.join(os.getcwd(), os.path.dirname(__file__)), "dataset/logistic_regression_test.dat")) 57 else: 58 if (os.path.isfile(input_data_file) is not True): 59 print("Please make sure input_data_file path is correct.") 60 return self.test_X, self.test_Y 61 62 self.test_X, self.test_Y = utility.DatasetLoader.load(input_data_file) 63 64 if (self.feature_transform_mode == 'polynomial') or (self.feature_transform_mode == 'legendre'): 65 self.test_X = self.test_X[:, 1:] 66 67 self.test_X = utility.DatasetLoader.feature_transform( 68 self.test_X, 69 self.feature_transform_mode, 70 self.feature_transform_degree 71 ) 72 73 return self.test_X, self.test_Y
18 def test_single_class_training_data(self): 19 classifier = NaiveBayesClassifier() 20 classifier.train((('A', 'a'),('A', 'a'),('A', 'a'))) 21 22 self.failUnless(classifier.label('a') == 'A') 23 distribution = classifier.label_distribution('a') 24 self.failUnlessEqual(len(distribution), 1) 25 self.failUnless('A' in distribution) 26 self.failUnless(distribution['A'] == 0.0, distribution)
149 def train(self): 150 features, y = self.features_from_citations() 151 self.vectorizer = DictVectorizer(sparse=True) 152 X_fv = self.vectorizer.fit_transform(self.features) 153 154 self.clf = _get_SVM() 155 156 ## 157 # @TODO grid search over c? 158 self.clf.fit(X_fv, y)
162 def test(model, X_test, y_test, process_X_data_func, process_y_data_func, 163 nb_features, nb_classes, process_X_data_func_args={}, 164 process_y_data_func_args={}, batch_size=512, verbose=True): 165 166 if verbose: 167 print('{} test batches' 168 .format(int(ceil(float(len(X_test)) / batch_size)))) 169 170 test_proba = np.empty([0, nb_classes]) 171 172 # testing by batch 173 test_losses, test_accs, test_weights = [], [], [] 174 175 for i, (X, y) in enumerate(izip(chunks(X_test, batch_size), 176 chunks(y_test, batch_size))): 177 if i % 250 == 0 and verbose: 178 print('-- test batch {}'.format(i)) 179 180 assert len(X) == len(y) # chunk sizes should be equal 181 w = len(X) # chunk size serves as weight when averaging 182 X = process_X_data_func(X, **process_X_data_func_args) 183 y = process_y_data_func(y, **process_y_data_func_args) 184 185 batch_loss, batch_acc = model.test_on_batch(X, y) 186 batch_proba = model.predict_proba(X, batch_size=batch_size, 187 verbose=0) 188 189 test_losses.append(batch_loss) 190 test_accs.append(batch_acc) 191 test_proba = np.append(test_proba, batch_proba, axis=0) 192 test_weights.append(w) 193 194 test_loss = np.average(test_losses, weights=test_weights) 195 test_acc = np.average(test_accs, weights=test_weights) 196 197 if verbose: 198 print('Final test loss: {:5f} / accuracy: {:.15f}' 199 .format(test_loss, test_acc)) 200 print() 201 202 return (test_loss, test_acc), test_proba
123 def run(self, train_data, dev_data, test_data=None, n_epoch=20, batch_size=20, learning_rate=None): 124 """训练,验证与测试 125 如果没有验证集,则使用测试集作为验证集 126 """ 127 if learning_rate is not None: 128 self.lr = learning_rate 129 130 train_data = list(train_data) 131 dev_data = list(dev_data) 132 n_dev = len(dev_data) 133 test_data = list(test_data) 134 for e in range(n_epoch): 135 self.train(train_data, batch_size, self.lr) 136 print("Epoch {} : {} / {}".format(e + 1, self.evaluate(dev_data), n_dev)) 137 138 if test_data is not None: 139 n_test = len(test_data) 140 print("Test : {} / {}".format(self.evaluate(test_data), n_test))
278 def train(self, data): 279 280 if self.estimator is None: 281 logging.warning('Model estimator not yet specified. Please define or load an estimator.', UserWarning) 282 283 self.model = OneVsRestClassifier(self.estimator).fit(data.X_train, data.y_train) 284 self.dependent = data.dependent 285 independent_vars = [] 286 for i in data.independent: 287 independent_vars.append({"name": i}) 288 self.independent = independent_vars 289 290 train_results, timestamp, train_time, train_data_balance = Models()._train(self.model, data.X_train, 291 data.y_train, 292 balance=data.balance, 293 encoder=self.encoder) 294 295 self.train_results = train_results 296 self.train_timestamp = timestamp 297 self.train_time = train_time 298 self.train_data_balance = train_data_balance