Every line of 'from sklearn.model_selection import train_test_split' code snippets is scanned for vulnerabilities by our powerful machine learning engine that combs millions of open source libraries, ensuring your Python code is secure.
158 def train_test(self, train_path, test_path=None): 159 # load train and (maybe) test data 160 metadata = MetaData(label_column=self.label_column, 161 train_path=train_path, 162 test_path=test_path) 163 self.num_classes = metadata.k_classes 164 self.num_features = metadata.d_features 165 166 # if necessary, cast judgment metric into its binary/multiary equivalent 167 if self.num_classes == 2: 168 if self.judgment_metric in [Metrics.F1_MICRO, Metrics.F1_MACRO]: 169 self.judgment_metric = Metrics.F1 170 elif self.judgment_metric in [Metrics.ROC_AUC_MICRO, 171 Metrics.ROC_AUC_MACRO]: 172 self.judgment_metric = Metrics.ROC_AUC 173 else: 174 if self.judgment_metric == Metrics.F1: 175 self.judgment_metric = Metrics.F1_MACRO 176 elif self.judgment_metric == Metrics.ROC_AUC: 177 self.judgment_metric = Metrics.ROC_AUC_MACRO 178 179 # load training data 180 train_data = self.load_data(train_path) 181 182 # if necessary, generate permanent train/test split 183 if test_path is not None: 184 test_data = self.load_data(test_path) 185 else: 186 train_data, test_data = train_test_split(train_data, 187 test_size=self.testing_ratio, 188 random_state=self.random_state) 189 190 # extract feature matrix and labels from raw data 191 self.encoder = DataEncoder(label_column=self.label_column) 192 X_train, y_train = self.encoder.fit_transform(train_data) 193 X_test, y_test = self.encoder.transform(test_data) 194 195 # create and cross-validate pipeline 196 self.make_pipeline() 197 cv_scores = self.cross_validate(X_train, y_train) 198 199 # train and test the final model 200 self.pipeline.fit(X_train, y_train) 201 test_scores = self.test_final_model(X_test, y_test) 202 return {'cv': cv_scores, 'test': test_scores}
249 def fit(self,X,y): 250 cv = check_cv(self.cv,y,classifier = True) 251 self.scores = [] 252 self.score = 0 253 self.estimators_ = [] 254 for train,valid in cv.split(X,y): 255 score1 = 0 256 test = len(y[valid]) 257 print("逻辑回归开始拟合") 258 clf =LogisticRegression(**self.lr_params).fit(X[train], y[train]) 259 print("逻辑回归拟合结束") 260 # for i in range(0, test): 261 # yt = clf.predict(X[valid][i,:]) 262 # if yt == y[valid][i]: 263 # score1 += 1 264 # score1 = score1 / test 265 # print(score1) 266 # self.scores.append(score1) 267 self.estimators_.append(clf) 268 #self.score = sum(self.scores) / len(self.scores) 269 return self
9 def train_test_split(fileName,type=1): 10 header = ['user_id', 'item_id', 'rating', 'timestamp'] 11 if(type==1): 12 df = pd.read_csv(fileName, sep='\t', names=header) 13 else: 14 df = pd.read_csv(fileName, sep='::', names=header,engine = 'python') 15 n_users = df.user_id.unique().shape[0] 16 users = df.user_id.max() 17 n_items = df.item_id.unique().shape[0] 18 items = df.item_id.max() 19 20 print 'Number of users = ' + str(n_users) + ' | Number of movies = ' + str(n_items) 21 print 'The biggest ID of users = ' + str(users) + ' | The biggest ID of movies = ' + str(items) 22 # 23 train_data, test_data = cv.train_test_split(df, test_size=0.1) 24 train_data = pd.DataFrame(train_data) 25 test_data = pd.DataFrame(test_data) 26 #Create two user-item matrices, one for training and another for testing 27 train_data_matrix = np.zeros((users, items)) 28 for line in train_data.itertuples(): 29 train_data_matrix[line[1]-1, line[2]-1] = line[3] 30 31 test_data_matrix = np.zeros((users, items)) 32 for line in test_data.itertuples(): 33 test_data_matrix[line[1]-1, line[2]-1] = line[3] 34 return train_data_matrix,test_data_matrix
35 def split_data(df): 36 X = df.drop('Y', axis=1).values 37 y = df['Y'].values 38 39 X_train, X_test, y_train, y_test = train_test_split( 40 X, y, test_size=0.2, random_state=0) 41 data = {"train": {"X": X_train, "y": y_train}, 42 "test": {"X": X_test, "y": y_test}} 43 return data
33 def train_test_split_result(clf, X, y): 34 print("This is Random and Percentaged Spilt Result ... ") 35 X_train, X_test, y_train, y_test = cross_validation.train_test_split(X, y) 36 clf = clf.fit(X_train, y_train) 37 report_result(clf, X_test, y_test, y_train)
134 def test_split(self): 135 """ 136 Apply split to the sample described in the docstring of prepare_time_inhomogeneous_cv_object, with n_splits = 4 137 and n_test_splits = 2. The folds are [0 : 6], [6 : 11], [11 : 16], [16 : 21]. We use an embargo of zero. 138 Inspection shows that the pairs test-train sets should respectively be 139 [...] 140 3. Train: folds 1 and 4, samples [0, 1, 2, 3, 4, 16, 17, 18, 19, 20]. Test: folds 2 and 3, samples [6, 7, 8, 9, 141 10, 11, 12, 13, 14, 15]. Sample 5 is purged from the train set. 142 4. Train: folds 2 and 3, samples [7, 8, 9, 10, 11, 12, 13, 14, 15]. Test: folds 1 and 4, samples [0, 1, 2, 3, 4, 143 5, 16, 17, 18, 19, 20]. Sample 6 is embargoed. 144 [...] 145 """ 146 cv = CombPurgedKFoldCV(n_splits=4, n_test_splits=2) 147 prepare_time_inhomogeneous_cv_object(cv) 148 count = 0 149 for train_set, test_set in cv.split(cv.X, pred_times=cv.pred_times, eval_times=cv.eval_times): 150 count += 1 151 if count == 3: 152 result_train = np.array([0, 1, 2, 3, 4, 16, 17, 18, 19, 20]) 153 result_test = np.array([6, 7, 8, 9, 10, 11, 12, 13, 14, 15]) 154 self.assertTrue(np.array_equal(result_train, train_set)) 155 self.assertTrue(np.array_equal(result_test, test_set)) 156 if count == 4: 157 result_train = np.array([7, 8, 9, 10, 11, 12, 13, 14, 15]) 158 result_test = np.array([0, 1, 2, 3, 4, 5, 16, 17, 18, 19, 20]) 159 self.assertTrue(np.array_equal(result_train, train_set)) 160 self.assertTrue(np.array_equal(result_test, test_set))
8 def load_data(): 9 ''' 10 load iris data from sk-learn. this data has 150 samples and 3 class. 11 return: 12 1 array for the classification problem. 13 train_data, test_data, train_value, test_value 14 ''' 15 iris=datasets.load_iris() 16 X_train=iris.data 17 y_train=iris.target 18 return cross_validation.train_test_split(X_train, y_train,test_size=0.25, 19 random_state=0,stratify=y_train)
556 def _define(y, cvtype='skfold', n_folds=10, rndstate=0, rep=10, 557 **kwargs): 558 # Stratified k-fold : 559 if cvtype == 'skfold': 560 cvT = StratifiedKFold(y, n_folds=n_folds, shuffle=True, 561 random_state=rndstate, **kwargs) 562 cvT.lgStr = str(rep)+'-times, '+str(n_folds)+' Stratified k-folds' 563 cvT.shStr = str(rep)+' rep x'+str(n_folds)+' '+cvtype 564 565 # k-fold : 566 elif cvtype == 'kfold': 567 cvT = KFold(len(y), n_folds=n_folds, shuffle=True, 568 random_state=rndstate, **kwargs) 569 cvT.lgStr = str(rep)+'-times, '+str(n_folds)+' k-folds' 570 cvT.shStr = str(rep)+' rep x'+str(n_folds)+' '+cvtype 571 572 # Shuffle stratified k-fold : 573 elif cvtype == 'sss': 574 cvT = StratifiedShuffleSplit(y, n_iter=n_folds, 575 test_size=1/n_folds, 576 random_state=rndstate, **kwargs) 577 cvT.lgStr = str(rep)+'-times, test size 1/' + \ 578 str(n_folds)+' Shuffle Stratified Split' 579 cvT.shStr = str(rep)+' rep x'+str(n_folds)+' '+cvtype 580 581 # Shuffle stratified : 582 elif cvtype == 'ss': 583 cvT = ShuffleSplit(len(y), n_iter=rep, test_size=1/n_folds, 584 random_state=rndstate, **kwargs) 585 cvT.lgStr = str(rep)+'-times, test size 1/' + \ 586 str(n_folds)+' Shuffle Stratified' 587 cvT.shStr = str(rep)+' rep x'+str(n_folds)+' '+cvtype 588 589 else: 590 raise ValueError('No cross-validation "'+cvtype+'"" found') 591 592 return cvT
82 def my_train_split(ds, y): 83 return ds, skorch.dataset.Dataset(corpus.valid[:200], y=None)
75 def _get_adapted_dataset(split): 76 """ Gets the adapted dataset for the experiments 77 78 Args : 79 split (str): train or test 80 Returns : 81 (tuple): images and labels 82 """ 83 dataset = _get_dataset() 84 key_img = 'x_' + split 85 key_lbl = 'y_' + split 86 87 if split != 'train': 88 dataset[key_img], dataset[key_lbl] = _adapt(dataset[key_img], 89 dataset[key_lbl]) 90 91 return (dataset[key_img], dataset[key_lbl])