10 examples of 'train and test data in machine learning' in Python

Every line of 'train and test data in machine learning' code snippets is scanned for vulnerabilities by our powerful machine learning engine that combs millions of open source libraries, ensuring your Python code is secure.

All examples are scanned by Snyk Code

By copying the Snyk Code Snippets you agree to
234def train(self, Train_data, Validation_data, Test_data): # fit a dataset
235 # Check Init performance
236 if self.verbose > 0:
237 t2 = time()
238 init_train = self.evaluate(Train_data)
239 init_valid = self.evaluate(Validation_data)
240 print("Init: \t train=%.4f, validation=%.4f [%.1f s]" %(init_train, init_valid, time()-t2))
241
242 for epoch in xrange(self.epoch):
243 t1 = time()
244 self.shuffle_in_unison_scary(Train_data['X'], Train_data['Y'])
245 total_batch = int(len(Train_data['Y']) / self.batch_size)
246 for i in xrange(total_batch):
247 # generate a batch
248 batch_xs = self.get_random_block_from_data(Train_data, self.batch_size)
249 # Fit training
250 self.partial_fit(batch_xs)
251 t2 = time()
252
253 # output validation
254 train_result = self.evaluate(Train_data)
255 valid_result = self.evaluate(Validation_data)
256
257 self.train_rmse.append(train_result)
258 self.valid_rmse.append(valid_result)
259
260 if self.verbose > 0 and epoch%self.verbose == 0:
261 print("Epoch %d [%.1f s]\ttrain=%.4f, validation=%.4f [%.1f s]"
262 %(epoch+1, t2-t1, train_result, valid_result, time()-t2))
263 if self.eva_termination(self.valid_rmse):
264 break
265
266 if self.pretrain_flag < 0:
267 print "Save model to file as pretrain."
268 self.saver.save(self.sess, self.save_file)
17def test_fit_and_predict(self):
18 seed = 1709
19 for dataset_id in [38]: # 720 # 31,44,737
20 df = pd.read_csv("./tests/data/{0}.csv".format(dataset_id))
21 x_cols = [c for c in df.columns if c != "target"]
22 X = df[x_cols]
23 y = df["target"]
24
25 X_train, X_test, y_train, y_test = sklearn.model_selection.train_test_split(
26 X, y, test_size=0.3, random_state=seed
27 )
28 automl = AutoML(
29 total_time_limit=5,
30 algorithms=["RF"], # ["LightGBM", "RF", "NN", "CatBoost", "Xgboost"],
31 # start_random_models=5,
32 # hill_climbing_steps=3,
33 # top_models_to_improve=3,
34 train_ensemble=False,
35 verbose=True,
36 )
37 automl.fit(X_train, y_train)
38 #print(json.dumps(automl.to_json(), indent=4))
39 response = automl.predict(X_test)["p_1"]
40 # Compute the logloss on test dataset
41 ll = log_loss(y_test, response)
42 print("(*) Dataset id {} logloss {}".format(dataset_id, ll))
43
44 for i, m in enumerate(automl._models):
45 response = m.predict(X_test)
46 ll = log_loss(y_test, response)
47 print("{}) Dataset id {} logloss {}".format(i, dataset_id, ll))
21def train(training_pandasData, test_pandasData, label_col, feat_cols, n_trees, m_depth,
22 learning_rate, loss, training_data_path, test_data_path):
23
24 print("train: " + training_data_path)
25 print("test: " + test_data_path)
26 print("n_trees: ", n_trees)
27 print("m-depth: ", m_depth)
28 print("learning-rate: ", learning_rate)
29 print("loss: " + loss)
30 print("label-col: " + label_col)
31 for feat in feat_cols:
32 print("feat-cols: " + feat)
33
34 # Split data into training labels and testing labels.
35 trainingLabels = training_pandasData[label_col]
36 trainingFeatures = training_pandasData[feat_cols]
37
38 testLabels = test_pandasData[label_col]
39 testFeatures = test_pandasData[feat_cols]
40
41 # We will use a GBT regressor model.
42 xgbr = xgb.XGBRegressor(max_depth=m_depth,
43 learning_rate=learning_rate,
44 n_estimators=n_trees)
45
46 # Here we train the model
47 xgbr.fit(trainingFeatures, trainingLabels, eval_metric=loss)
48
49 # Calculating the scores of the model.
50 test_rmse = mean_squared_error(testLabels, xgbr.predict(testFeatures))**0.5
51 r2_score_training = xgbr.score(trainingFeatures, trainingLabels)
52 r2_score_test = xgbr.score(testFeatures, testLabels)
53
54 print("Test RMSE:", test_rmse)
55 print("Training set score:", r2_score_training)
56 print("Test set score:", r2_score_test)
57
58 # Logging the RMSE and r2 scores.
59 mlflow.log_metric("Test RMSE", test_rmse)
60 mlflow.log_metric("Train R2", r2_score_training)
61 mlflow.log_metric("Test R2", r2_score_test)
62
63 # Saving the model as an artifact.
64 sklearn.log_model(xgbr, "model")
65
66 run_id = mlflow.active_run().info.run_uuid
67 print("Run with id %s finished" % run_id)
158def train_test(self, train_path, test_path=None):
159 # load train and (maybe) test data
160 metadata = MetaData(label_column=self.label_column,
161 train_path=train_path,
162 test_path=test_path)
163 self.num_classes = metadata.k_classes
164 self.num_features = metadata.d_features
165
166 # if necessary, cast judgment metric into its binary/multiary equivalent
167 if self.num_classes == 2:
168 if self.judgment_metric in [Metrics.F1_MICRO, Metrics.F1_MACRO]:
169 self.judgment_metric = Metrics.F1
170 elif self.judgment_metric in [Metrics.ROC_AUC_MICRO,
171 Metrics.ROC_AUC_MACRO]:
172 self.judgment_metric = Metrics.ROC_AUC
173 else:
174 if self.judgment_metric == Metrics.F1:
175 self.judgment_metric = Metrics.F1_MACRO
176 elif self.judgment_metric == Metrics.ROC_AUC:
177 self.judgment_metric = Metrics.ROC_AUC_MACRO
178
179 # load training data
180 train_data = self.load_data(train_path)
181
182 # if necessary, generate permanent train/test split
183 if test_path is not None:
184 test_data = self.load_data(test_path)
185 else:
186 train_data, test_data = train_test_split(train_data,
187 test_size=self.testing_ratio,
188 random_state=self.random_state)
189
190 # extract feature matrix and labels from raw data
191 self.encoder = DataEncoder(label_column=self.label_column)
192 X_train, y_train = self.encoder.fit_transform(train_data)
193 X_test, y_test = self.encoder.transform(test_data)
194
195 # create and cross-validate pipeline
196 self.make_pipeline()
197 cv_scores = self.cross_validate(X_train, y_train)
198
199 # train and test the final model
200 self.pipeline.fit(X_train, y_train)
201 test_scores = self.test_final_model(X_test, y_test)
202 return {'cv': cv_scores, 'test': test_scores}
53def load_test_data(self, input_data_file=''):
54
55 if (input_data_file == ''):
56 input_data_file = os.path.normpath(os.path.join(os.path.join(os.getcwd(), os.path.dirname(__file__)), "dataset/logistic_regression_test.dat"))
57 else:
58 if (os.path.isfile(input_data_file) is not True):
59 print("Please make sure input_data_file path is correct.")
60 return self.test_X, self.test_Y
61
62 self.test_X, self.test_Y = utility.DatasetLoader.load(input_data_file)
63
64 if (self.feature_transform_mode == 'polynomial') or (self.feature_transform_mode == 'legendre'):
65 self.test_X = self.test_X[:, 1:]
66
67 self.test_X = utility.DatasetLoader.feature_transform(
68 self.test_X,
69 self.feature_transform_mode,
70 self.feature_transform_degree
71 )
72
73 return self.test_X, self.test_Y
18def test_single_class_training_data(self):
19 classifier = NaiveBayesClassifier()
20 classifier.train((('A', 'a'),('A', 'a'),('A', 'a')))
21
22 self.failUnless(classifier.label('a') == 'A')
23 distribution = classifier.label_distribution('a')
24 self.failUnlessEqual(len(distribution), 1)
25 self.failUnless('A' in distribution)
26 self.failUnless(distribution['A'] == 0.0, distribution)
149def train(self):
150 features, y = self.features_from_citations()
151 self.vectorizer = DictVectorizer(sparse=True)
152 X_fv = self.vectorizer.fit_transform(self.features)
153
154 self.clf = _get_SVM()
155
156 ##
157 # @TODO grid search over c?
158 self.clf.fit(X_fv, y)
162def test(model, X_test, y_test, process_X_data_func, process_y_data_func,
163 nb_features, nb_classes, process_X_data_func_args={},
164 process_y_data_func_args={}, batch_size=512, verbose=True):
165
166 if verbose:
167 print('{} test batches'
168 .format(int(ceil(float(len(X_test)) / batch_size))))
169
170 test_proba = np.empty([0, nb_classes])
171
172 # testing by batch
173 test_losses, test_accs, test_weights = [], [], []
174
175 for i, (X, y) in enumerate(izip(chunks(X_test, batch_size),
176 chunks(y_test, batch_size))):
177 if i % 250 == 0 and verbose:
178 print('-- test batch {}'.format(i))
179
180 assert len(X) == len(y) # chunk sizes should be equal
181 w = len(X) # chunk size serves as weight when averaging
182 X = process_X_data_func(X, **process_X_data_func_args)
183 y = process_y_data_func(y, **process_y_data_func_args)
184
185 batch_loss, batch_acc = model.test_on_batch(X, y)
186 batch_proba = model.predict_proba(X, batch_size=batch_size,
187 verbose=0)
188
189 test_losses.append(batch_loss)
190 test_accs.append(batch_acc)
191 test_proba = np.append(test_proba, batch_proba, axis=0)
192 test_weights.append(w)
193
194 test_loss = np.average(test_losses, weights=test_weights)
195 test_acc = np.average(test_accs, weights=test_weights)
196
197 if verbose:
198 print('Final test loss: {:5f} / accuracy: {:.15f}'
199 .format(test_loss, test_acc))
200 print()
201
202 return (test_loss, test_acc), test_proba
123def run(self, train_data, dev_data, test_data=None, n_epoch=20, batch_size=20, learning_rate=None):
124 """训练,验证与测试
125 如果没有验证集,则使用测试集作为验证集
126 """
127 if learning_rate is not None:
128 self.lr = learning_rate
129
130 train_data = list(train_data)
131 dev_data = list(dev_data)
132 n_dev = len(dev_data)
133 test_data = list(test_data)
134 for e in range(n_epoch):
135 self.train(train_data, batch_size, self.lr)
136 print("Epoch {} : {} / {}".format(e + 1, self.evaluate(dev_data), n_dev))
137
138 if test_data is not None:
139 n_test = len(test_data)
140 print("Test : {} / {}".format(self.evaluate(test_data), n_test))
278def train(self, data):
279
280 if self.estimator is None:
281 logging.warning('Model estimator not yet specified. Please define or load an estimator.', UserWarning)
282
283 self.model = OneVsRestClassifier(self.estimator).fit(data.X_train, data.y_train)
284 self.dependent = data.dependent
285 independent_vars = []
286 for i in data.independent:
287 independent_vars.append({"name": i})
288 self.independent = independent_vars
289
290 train_results, timestamp, train_time, train_data_balance = Models()._train(self.model, data.X_train,
291 data.y_train,
292 balance=data.balance,
293 encoder=self.encoder)
294
295 self.train_results = train_results
296 self.train_timestamp = timestamp
297 self.train_time = train_time
298 self.train_data_balance = train_data_balance

Related snippets