Every line of 'train_test_split in python' code snippets is scanned for vulnerabilities by our powerful machine learning engine that combs millions of open source libraries, ensuring your Python code is secure.
82 def my_train_split(ds, y): 83 return ds, skorch.dataset.Dataset(corpus.valid[:200], y=None)
9 def train_test_split(X, y, train_percentage=0.8): 10 ''' 11 Very simple splitting into train and test data. Works for 12 any input shape without dependencies, but is a bit restricted. 13 ''' 14 cut_idx = int(floor(X.shape[0] * 0.80)) 15 X_train, X_test = X[:cut_idx], X[cut_idx:] 16 y_train, y_test = y[:cut_idx], y[cut_idx:] 17 print("Number of train samples", X_train.shape[0]) 18 print("Number of test samples", X_test.shape[0]) 19 20 return (X_train, y_train), (X_test, y_test)
23 def train_valid_test_split(SOURCE_DATA_DIR, TARGET_DATA_DIR, train_size=0.8, valid_size=0.1, 24 COMBINE_FOLDERS=None, SELECT_FOLDERS=None): 25 """ 26 Usage: 27 SOURCE_DATA_DIR = "data/ClothingAttributeDataset/images/" 28 TARGET_DATA_DIR = "data/" 29 30 train_valid_test_split(SOURCE_DATA_DIR, TARGET_DATA_DIR) 31 """ 32 if COMBINE_FOLDERS is None: 33 COMBINE_FOLDERS = dict() 34 for folder_name in ["train", "test", "valid"]: 35 rmtree(os.path.join(TARGET_DATA_DIR, folder_name), ignore_errors=True) 36 os.makedirs(os.path.join(TARGET_DATA_DIR, folder_name)) 37 38 # Split records by 80-20 between Train and Validation Set 39 filenames = np.random.permutation(glob(os.path.join(SOURCE_DATA_DIR, "*.jpg"))) 40 41 train_idx = int(len(filenames) * train_size) 42 test_idx = int(len(filenames) * (train_size+valid_size)) 43 for idx, filename in enumerate(filenames): 44 target_name = filename.split("/")[-1] 45 if idx < train_idx: 46 target_filepath = os.path.join(TARGET_DATA_DIR, "train", target_name) 47 elif idx < test_idx: 48 target_filepath = os.path.join(TARGET_DATA_DIR, "valid", target_name) 49 else: 50 target_filepath = os.path.join(TARGET_DATA_DIR, "test", target_name) 51 copyfile(filenames[idx], target_filepath)
9 def train_test_split(fileName,type=1): 10 header = ['user_id', 'item_id', 'rating', 'timestamp'] 11 if(type==1): 12 df = pd.read_csv(fileName, sep='\t', names=header) 13 else: 14 df = pd.read_csv(fileName, sep='::', names=header,engine = 'python') 15 n_users = df.user_id.unique().shape[0] 16 users = df.user_id.max() 17 n_items = df.item_id.unique().shape[0] 18 items = df.item_id.max() 19 20 print 'Number of users = ' + str(n_users) + ' | Number of movies = ' + str(n_items) 21 print 'The biggest ID of users = ' + str(users) + ' | The biggest ID of movies = ' + str(items) 22 # 23 train_data, test_data = cv.train_test_split(df, test_size=0.1) 24 train_data = pd.DataFrame(train_data) 25 test_data = pd.DataFrame(test_data) 26 #Create two user-item matrices, one for training and another for testing 27 train_data_matrix = np.zeros((users, items)) 28 for line in train_data.itertuples(): 29 train_data_matrix[line[1]-1, line[2]-1] = line[3] 30 31 test_data_matrix = np.zeros((users, items)) 32 for line in test_data.itertuples(): 33 test_data_matrix[line[1]-1, line[2]-1] = line[3] 34 return train_data_matrix,test_data_matrix
480 def train_valid_split(dataset, validation_amount): 481 valid_length = int(validation_amount * len(dataset)) 482 train_length = len(dataset) - valid_length 483 484 train_dataset, valid_dataset = random_split(dataset, [train_length, valid_length]) 485 return train_dataset, valid_dataset
134 def test_split(self): 135 """ 136 Apply split to the sample described in the docstring of prepare_time_inhomogeneous_cv_object, with n_splits = 4 137 and n_test_splits = 2. The folds are [0 : 6], [6 : 11], [11 : 16], [16 : 21]. We use an embargo of zero. 138 Inspection shows that the pairs test-train sets should respectively be 139 [...] 140 3. Train: folds 1 and 4, samples [0, 1, 2, 3, 4, 16, 17, 18, 19, 20]. Test: folds 2 and 3, samples [6, 7, 8, 9, 141 10, 11, 12, 13, 14, 15]. Sample 5 is purged from the train set. 142 4. Train: folds 2 and 3, samples [7, 8, 9, 10, 11, 12, 13, 14, 15]. Test: folds 1 and 4, samples [0, 1, 2, 3, 4, 143 5, 16, 17, 18, 19, 20]. Sample 6 is embargoed. 144 [...] 145 """ 146 cv = CombPurgedKFoldCV(n_splits=4, n_test_splits=2) 147 prepare_time_inhomogeneous_cv_object(cv) 148 count = 0 149 for train_set, test_set in cv.split(cv.X, pred_times=cv.pred_times, eval_times=cv.eval_times): 150 count += 1 151 if count == 3: 152 result_train = np.array([0, 1, 2, 3, 4, 16, 17, 18, 19, 20]) 153 result_test = np.array([6, 7, 8, 9, 10, 11, 12, 13, 14, 15]) 154 self.assertTrue(np.array_equal(result_train, train_set)) 155 self.assertTrue(np.array_equal(result_test, test_set)) 156 if count == 4: 157 result_train = np.array([7, 8, 9, 10, 11, 12, 13, 14, 15]) 158 result_test = np.array([0, 1, 2, 3, 4, 5, 16, 17, 18, 19, 20]) 159 self.assertTrue(np.array_equal(result_train, train_set)) 160 self.assertTrue(np.array_equal(result_test, test_set))
35 def split_data(df): 36 X = df.drop('Y', axis=1).values 37 y = df['Y'].values 38 39 X_train, X_test, y_train, y_test = train_test_split( 40 X, y, test_size=0.2, random_state=0) 41 data = {"train": {"X": X_train, "y": y_train}, 42 "test": {"X": X_test, "y": y_test}} 43 return data
6 def split(df): 7 ''' 8 9 :param df: Dataframe to be splited 10 :return: Sorted list of dataframe's splited list 11 ''' 12 trainingSet, testSet = train_test_split(df, test_size=0.2) 13 sorted_trainSet = trainingSet.sort_values('user_id') 14 sorted_testSet = testSet.sort_values('user_id') 15 return sorted_testSet, sorted_trainSet
355 @classmethod 356 def training_split(cls, 357 dataset_folder, 358 number_of_validation_examples=500, 359 maximum_disparity=255): 360 """Returns training and validation datasets. 361 362 Example from FlyingThings3d dataset is added to the training 363 or validation datasets if: 364 365 (1) it is training example of FlyingThings3d dataset; 366 (2) it does not have rendering artifacts; 367 (3) all its disparities are within the range [0, maximum_disparity]. 368 369 Args: 370 dataset_folder: folder with FlyingThings3D dataset, that contains 371 "frames_cleanpass" folder with left and right 372 images and "disparity" folder with disparities. 373 number_of_validation_examples: number of examples from training set 374 that will be used for validation. 375 maximum_disparity: maximum disparity in training / validation 376 dataset. All training examples with disparity 377 larger than "maximum_disparity" are excluded 378 from the dataset. 379 """ 380 examples = _find_examples(dataset_folder) 381 # Manual random seed garantees that splits will be same in a 382 # different runs. 383 random.seed(0) 384 random.shuffle(examples) 385 examples = _split_examples_into_training_and_test_sets(examples)[0] 386 examples = _filter_out_examples_with_rendering_artifacts(examples) 387 examples = _filter_out_examples_with_large_disparities( 388 examples, maximum_disparity) 389 _dataset = FlyingThings3D(examples) 390 validation_dataset, training_dataset = _dataset.split_in_two( 391 size_of_first_subset=number_of_validation_examples) 392 return training_dataset, validation_dataset
75 def _get_adapted_dataset(split): 76 """ Gets the adapted dataset for the experiments 77 78 Args : 79 split (str): train or test 80 Returns : 81 (tuple): images and labels 82 """ 83 dataset = _get_dataset() 84 key_img = 'x_' + split 85 key_lbl = 'y_' + split 86 87 if split != 'train': 88 dataset[key_img], dataset[key_lbl] = _adapt(dataset[key_img], 89 dataset[key_lbl]) 90 91 return (dataset[key_img], dataset[key_lbl])