10 examples of 'from sklearn.model_selection import train_test_split' in Python

Every line of 'from sklearn.model_selection import train_test_split' code snippets is scanned for vulnerabilities by our powerful machine learning engine that combs millions of open source libraries, ensuring your Python code is secure.

All examples are scanned by Snyk Code

By copying the Snyk Code Snippets you agree to
158def train_test(self, train_path, test_path=None):
159 # load train and (maybe) test data
160 metadata = MetaData(label_column=self.label_column,
161 train_path=train_path,
162 test_path=test_path)
163 self.num_classes = metadata.k_classes
164 self.num_features = metadata.d_features
165
166 # if necessary, cast judgment metric into its binary/multiary equivalent
167 if self.num_classes == 2:
168 if self.judgment_metric in [Metrics.F1_MICRO, Metrics.F1_MACRO]:
169 self.judgment_metric = Metrics.F1
170 elif self.judgment_metric in [Metrics.ROC_AUC_MICRO,
171 Metrics.ROC_AUC_MACRO]:
172 self.judgment_metric = Metrics.ROC_AUC
173 else:
174 if self.judgment_metric == Metrics.F1:
175 self.judgment_metric = Metrics.F1_MACRO
176 elif self.judgment_metric == Metrics.ROC_AUC:
177 self.judgment_metric = Metrics.ROC_AUC_MACRO
178
179 # load training data
180 train_data = self.load_data(train_path)
181
182 # if necessary, generate permanent train/test split
183 if test_path is not None:
184 test_data = self.load_data(test_path)
185 else:
186 train_data, test_data = train_test_split(train_data,
187 test_size=self.testing_ratio,
188 random_state=self.random_state)
189
190 # extract feature matrix and labels from raw data
191 self.encoder = DataEncoder(label_column=self.label_column)
192 X_train, y_train = self.encoder.fit_transform(train_data)
193 X_test, y_test = self.encoder.transform(test_data)
194
195 # create and cross-validate pipeline
196 self.make_pipeline()
197 cv_scores = self.cross_validate(X_train, y_train)
198
199 # train and test the final model
200 self.pipeline.fit(X_train, y_train)
201 test_scores = self.test_final_model(X_test, y_test)
202 return {'cv': cv_scores, 'test': test_scores}
249def fit(self,X,y):
250 cv = check_cv(self.cv,y,classifier = True)
251 self.scores = []
252 self.score = 0
253 self.estimators_ = []
254 for train,valid in cv.split(X,y):
255 score1 = 0
256 test = len(y[valid])
257 print("逻辑回归开始拟合")
258 clf =LogisticRegression(**self.lr_params).fit(X[train], y[train])
259 print("逻辑回归拟合结束")
260 # for i in range(0, test):
261 # yt = clf.predict(X[valid][i,:])
262 # if yt == y[valid][i]:
263 # score1 += 1
264 # score1 = score1 / test
265 # print(score1)
266 # self.scores.append(score1)
267 self.estimators_.append(clf)
268 #self.score = sum(self.scores) / len(self.scores)
269 return self
9def train_test_split(fileName,type=1):
10 header = ['user_id', 'item_id', 'rating', 'timestamp']
11 if(type==1):
12 df = pd.read_csv(fileName, sep='\t', names=header)
13 else:
14 df = pd.read_csv(fileName, sep='::', names=header,engine = 'python')
15 n_users = df.user_id.unique().shape[0]
16 users = df.user_id.max()
17 n_items = df.item_id.unique().shape[0]
18 items = df.item_id.max()
19
20 print 'Number of users = ' + str(n_users) + ' | Number of movies = ' + str(n_items)
21 print 'The biggest ID of users = ' + str(users) + ' | The biggest ID of movies = ' + str(items)
22 #
23 train_data, test_data = cv.train_test_split(df, test_size=0.1)
24 train_data = pd.DataFrame(train_data)
25 test_data = pd.DataFrame(test_data)
26 #Create two user-item matrices, one for training and another for testing
27 train_data_matrix = np.zeros((users, items))
28 for line in train_data.itertuples():
29 train_data_matrix[line[1]-1, line[2]-1] = line[3]
30
31 test_data_matrix = np.zeros((users, items))
32 for line in test_data.itertuples():
33 test_data_matrix[line[1]-1, line[2]-1] = line[3]
34 return train_data_matrix,test_data_matrix
35def split_data(df):
36 X = df.drop('Y', axis=1).values
37 y = df['Y'].values
38
39 X_train, X_test, y_train, y_test = train_test_split(
40 X, y, test_size=0.2, random_state=0)
41 data = {"train": {"X": X_train, "y": y_train},
42 "test": {"X": X_test, "y": y_test}}
43 return data
33def train_test_split_result(clf, X, y):
34 print("This is Random and Percentaged Spilt Result ... ")
35 X_train, X_test, y_train, y_test = cross_validation.train_test_split(X, y)
36 clf = clf.fit(X_train, y_train)
37 report_result(clf, X_test, y_test, y_train)
134def test_split(self):
135 """
136 Apply split to the sample described in the docstring of prepare_time_inhomogeneous_cv_object, with n_splits = 4
137 and n_test_splits = 2. The folds are [0 : 6], [6 : 11], [11 : 16], [16 : 21]. We use an embargo of zero.
138 Inspection shows that the pairs test-train sets should respectively be
139 [...]
140 3. Train: folds 1 and 4, samples [0, 1, 2, 3, 4, 16, 17, 18, 19, 20]. Test: folds 2 and 3, samples [6, 7, 8, 9,
141 10, 11, 12, 13, 14, 15]. Sample 5 is purged from the train set.
142 4. Train: folds 2 and 3, samples [7, 8, 9, 10, 11, 12, 13, 14, 15]. Test: folds 1 and 4, samples [0, 1, 2, 3, 4,
143 5, 16, 17, 18, 19, 20]. Sample 6 is embargoed.
144 [...]
145 """
146 cv = CombPurgedKFoldCV(n_splits=4, n_test_splits=2)
147 prepare_time_inhomogeneous_cv_object(cv)
148 count = 0
149 for train_set, test_set in cv.split(cv.X, pred_times=cv.pred_times, eval_times=cv.eval_times):
150 count += 1
151 if count == 3:
152 result_train = np.array([0, 1, 2, 3, 4, 16, 17, 18, 19, 20])
153 result_test = np.array([6, 7, 8, 9, 10, 11, 12, 13, 14, 15])
154 self.assertTrue(np.array_equal(result_train, train_set))
155 self.assertTrue(np.array_equal(result_test, test_set))
156 if count == 4:
157 result_train = np.array([7, 8, 9, 10, 11, 12, 13, 14, 15])
158 result_test = np.array([0, 1, 2, 3, 4, 5, 16, 17, 18, 19, 20])
159 self.assertTrue(np.array_equal(result_train, train_set))
160 self.assertTrue(np.array_equal(result_test, test_set))
8def load_data():
9 '''
10 load iris data from sk-learn. this data has 150 samples and 3 class.
11 return:
12 1 array for the classification problem.
13 train_data, test_data, train_value, test_value
14 '''
15 iris=datasets.load_iris()
16 X_train=iris.data
17 y_train=iris.target
18 return cross_validation.train_test_split(X_train, y_train,test_size=0.25,
19 random_state=0,stratify=y_train)
556def _define(y, cvtype='skfold', n_folds=10, rndstate=0, rep=10,
557 **kwargs):
558 # Stratified k-fold :
559 if cvtype == 'skfold':
560 cvT = StratifiedKFold(y, n_folds=n_folds, shuffle=True,
561 random_state=rndstate, **kwargs)
562 cvT.lgStr = str(rep)+'-times, '+str(n_folds)+' Stratified k-folds'
563 cvT.shStr = str(rep)+' rep x'+str(n_folds)+' '+cvtype
564
565 # k-fold :
566 elif cvtype == 'kfold':
567 cvT = KFold(len(y), n_folds=n_folds, shuffle=True,
568 random_state=rndstate, **kwargs)
569 cvT.lgStr = str(rep)+'-times, '+str(n_folds)+' k-folds'
570 cvT.shStr = str(rep)+' rep x'+str(n_folds)+' '+cvtype
571
572 # Shuffle stratified k-fold :
573 elif cvtype == 'sss':
574 cvT = StratifiedShuffleSplit(y, n_iter=n_folds,
575 test_size=1/n_folds,
576 random_state=rndstate, **kwargs)
577 cvT.lgStr = str(rep)+'-times, test size 1/' + \
578 str(n_folds)+' Shuffle Stratified Split'
579 cvT.shStr = str(rep)+' rep x'+str(n_folds)+' '+cvtype
580
581 # Shuffle stratified :
582 elif cvtype == 'ss':
583 cvT = ShuffleSplit(len(y), n_iter=rep, test_size=1/n_folds,
584 random_state=rndstate, **kwargs)
585 cvT.lgStr = str(rep)+'-times, test size 1/' + \
586 str(n_folds)+' Shuffle Stratified'
587 cvT.shStr = str(rep)+' rep x'+str(n_folds)+' '+cvtype
588
589 else:
590 raise ValueError('No cross-validation "'+cvtype+'"" found')
591
592 return cvT
82def my_train_split(ds, y):
83 return ds, skorch.dataset.Dataset(corpus.valid[:200], y=None)
75def _get_adapted_dataset(split):
76 """ Gets the adapted dataset for the experiments
77
78 Args :
79 split (str): train or test
80 Returns :
81 (tuple): images and labels
82 """
83 dataset = _get_dataset()
84 key_img = 'x_' + split
85 key_lbl = 'y_' + split
86
87 if split != 'train':
88 dataset[key_img], dataset[key_lbl] = _adapt(dataset[key_img],
89 dataset[key_lbl])
90
91 return (dataset[key_img], dataset[key_lbl])

Related snippets