Every line of 'test_train_split' code snippets is scanned for vulnerabilities by our powerful machine learning engine that combs millions of open source libraries, ensuring your Python code is secure.
82 def my_train_split(ds, y): 83 return ds, skorch.dataset.Dataset(corpus.valid[:200], y=None)
9 def train_test_split(X, y, train_percentage=0.8): 10 ''' 11 Very simple splitting into train and test data. Works for 12 any input shape without dependencies, but is a bit restricted. 13 ''' 14 cut_idx = int(floor(X.shape[0] * 0.80)) 15 X_train, X_test = X[:cut_idx], X[cut_idx:] 16 y_train, y_test = y[:cut_idx], y[cut_idx:] 17 print("Number of train samples", X_train.shape[0]) 18 print("Number of test samples", X_test.shape[0]) 19 20 return (X_train, y_train), (X_test, y_test)
23 def train_valid_test_split(SOURCE_DATA_DIR, TARGET_DATA_DIR, train_size=0.8, valid_size=0.1, 24 COMBINE_FOLDERS=None, SELECT_FOLDERS=None): 25 """ 26 Usage: 27 SOURCE_DATA_DIR = "data/ClothingAttributeDataset/images/" 28 TARGET_DATA_DIR = "data/" 29 30 train_valid_test_split(SOURCE_DATA_DIR, TARGET_DATA_DIR) 31 """ 32 if COMBINE_FOLDERS is None: 33 COMBINE_FOLDERS = dict() 34 for folder_name in ["train", "test", "valid"]: 35 rmtree(os.path.join(TARGET_DATA_DIR, folder_name), ignore_errors=True) 36 os.makedirs(os.path.join(TARGET_DATA_DIR, folder_name)) 37 38 # Split records by 80-20 between Train and Validation Set 39 filenames = np.random.permutation(glob(os.path.join(SOURCE_DATA_DIR, "*.jpg"))) 40 41 train_idx = int(len(filenames) * train_size) 42 test_idx = int(len(filenames) * (train_size+valid_size)) 43 for idx, filename in enumerate(filenames): 44 target_name = filename.split("/")[-1] 45 if idx < train_idx: 46 target_filepath = os.path.join(TARGET_DATA_DIR, "train", target_name) 47 elif idx < test_idx: 48 target_filepath = os.path.join(TARGET_DATA_DIR, "valid", target_name) 49 else: 50 target_filepath = os.path.join(TARGET_DATA_DIR, "test", target_name) 51 copyfile(filenames[idx], target_filepath)
9 def train_test_split(fileName,type=1): 10 header = ['user_id', 'item_id', 'rating', 'timestamp'] 11 if(type==1): 12 df = pd.read_csv(fileName, sep='\t', names=header) 13 else: 14 df = pd.read_csv(fileName, sep='::', names=header,engine = 'python') 15 n_users = df.user_id.unique().shape[0] 16 users = df.user_id.max() 17 n_items = df.item_id.unique().shape[0] 18 items = df.item_id.max() 19 20 print 'Number of users = ' + str(n_users) + ' | Number of movies = ' + str(n_items) 21 print 'The biggest ID of users = ' + str(users) + ' | The biggest ID of movies = ' + str(items) 22 # 23 train_data, test_data = cv.train_test_split(df, test_size=0.1) 24 train_data = pd.DataFrame(train_data) 25 test_data = pd.DataFrame(test_data) 26 #Create two user-item matrices, one for training and another for testing 27 train_data_matrix = np.zeros((users, items)) 28 for line in train_data.itertuples(): 29 train_data_matrix[line[1]-1, line[2]-1] = line[3] 30 31 test_data_matrix = np.zeros((users, items)) 32 for line in test_data.itertuples(): 33 test_data_matrix[line[1]-1, line[2]-1] = line[3] 34 return train_data_matrix,test_data_matrix
480 def train_valid_split(dataset, validation_amount): 481 valid_length = int(validation_amount * len(dataset)) 482 train_length = len(dataset) - valid_length 483 484 train_dataset, valid_dataset = random_split(dataset, [train_length, valid_length]) 485 return train_dataset, valid_dataset
134 def test_split(self): 135 """ 136 Apply split to the sample described in the docstring of prepare_time_inhomogeneous_cv_object, with n_splits = 4 137 and n_test_splits = 2. The folds are [0 : 6], [6 : 11], [11 : 16], [16 : 21]. We use an embargo of zero. 138 Inspection shows that the pairs test-train sets should respectively be 139 [...] 140 3. Train: folds 1 and 4, samples [0, 1, 2, 3, 4, 16, 17, 18, 19, 20]. Test: folds 2 and 3, samples [6, 7, 8, 9, 141 10, 11, 12, 13, 14, 15]. Sample 5 is purged from the train set. 142 4. Train: folds 2 and 3, samples [7, 8, 9, 10, 11, 12, 13, 14, 15]. Test: folds 1 and 4, samples [0, 1, 2, 3, 4, 143 5, 16, 17, 18, 19, 20]. Sample 6 is embargoed. 144 [...] 145 """ 146 cv = CombPurgedKFoldCV(n_splits=4, n_test_splits=2) 147 prepare_time_inhomogeneous_cv_object(cv) 148 count = 0 149 for train_set, test_set in cv.split(cv.X, pred_times=cv.pred_times, eval_times=cv.eval_times): 150 count += 1 151 if count == 3: 152 result_train = np.array([0, 1, 2, 3, 4, 16, 17, 18, 19, 20]) 153 result_test = np.array([6, 7, 8, 9, 10, 11, 12, 13, 14, 15]) 154 self.assertTrue(np.array_equal(result_train, train_set)) 155 self.assertTrue(np.array_equal(result_test, test_set)) 156 if count == 4: 157 result_train = np.array([7, 8, 9, 10, 11, 12, 13, 14, 15]) 158 result_test = np.array([0, 1, 2, 3, 4, 5, 16, 17, 18, 19, 20]) 159 self.assertTrue(np.array_equal(result_train, train_set)) 160 self.assertTrue(np.array_equal(result_test, test_set))
87 def train_val_test_split(id_lists, train_fraction, val_fraction, test_fraction): 88 train_ids = [] 89 val_ids = [] 90 test_ids = [] 91 92 for dataset_idx, id_list in enumerate(id_lists): 93 print('dataset', dataset_idx, 'contains', len(id_lists), 'items.') 94 train, val, test = make_splits(id_list, [train_fraction, val_fraction, test_fraction]) 95 train_ids += train 96 val_ids += val 97 test_ids += test 98 print('train_ids', len(train_ids), 'val_ids', len(val_ids), 'test_ids', len(test_ids)) 99 100 return {'train': train_ids, 'valid': val_ids, 'test': test_ids}
33 def train_test_split_result(clf, X, y): 34 print("This is Random and Percentaged Spilt Result ... ") 35 X_train, X_test, y_train, y_test = cross_validation.train_test_split(X, y) 36 clf = clf.fit(X_train, y_train) 37 report_result(clf, X_test, y_test, y_train)
355 @classmethod 356 def training_split(cls, 357 dataset_folder, 358 number_of_validation_examples=500, 359 maximum_disparity=255): 360 """Returns training and validation datasets. 361 362 Example from FlyingThings3d dataset is added to the training 363 or validation datasets if: 364 365 (1) it is training example of FlyingThings3d dataset; 366 (2) it does not have rendering artifacts; 367 (3) all its disparities are within the range [0, maximum_disparity]. 368 369 Args: 370 dataset_folder: folder with FlyingThings3D dataset, that contains 371 "frames_cleanpass" folder with left and right 372 images and "disparity" folder with disparities. 373 number_of_validation_examples: number of examples from training set 374 that will be used for validation. 375 maximum_disparity: maximum disparity in training / validation 376 dataset. All training examples with disparity 377 larger than "maximum_disparity" are excluded 378 from the dataset. 379 """ 380 examples = _find_examples(dataset_folder) 381 # Manual random seed garantees that splits will be same in a 382 # different runs. 383 random.seed(0) 384 random.shuffle(examples) 385 examples = _split_examples_into_training_and_test_sets(examples)[0] 386 examples = _filter_out_examples_with_rendering_artifacts(examples) 387 examples = _filter_out_examples_with_large_disparities( 388 examples, maximum_disparity) 389 _dataset = FlyingThings3D(examples) 390 validation_dataset, training_dataset = _dataset.split_in_two( 391 size_of_first_subset=number_of_validation_examples) 392 return training_dataset, validation_dataset
75 def _get_adapted_dataset(split): 76 """ Gets the adapted dataset for the experiments 77 78 Args : 79 split (str): train or test 80 Returns : 81 (tuple): images and labels 82 """ 83 dataset = _get_dataset() 84 key_img = 'x_' + split 85 key_lbl = 'y_' + split 86 87 if split != 'train': 88 dataset[key_img], dataset[key_lbl] = _adapt(dataset[key_img], 89 dataset[key_lbl]) 90 91 return (dataset[key_img], dataset[key_lbl])