4 examples of 'shufflesplit sklearn' in Python

Every line of 'shufflesplit sklearn' code snippets is scanned for vulnerabilities by our powerful machine learning engine that combs millions of open source libraries, ensuring your Python code is secure.

All examples are scanned by Snyk Code

By copying the Snyk Code Snippets you agree to
6def split(df):
7 '''
8
9 :param df: Dataframe to be splited
10 :return: Sorted list of dataframe's splited list
11 '''
12 trainingSet, testSet = train_test_split(df, test_size=0.2)
13 sorted_trainSet = trainingSet.sort_values('user_id')
14 sorted_testSet = testSet.sort_values('user_id')
15 return sorted_testSet, sorted_trainSet
423def _train_val_split(df, validation):
424 train_df = df
425 val_df = None
426 validation_ratio = 0.0
427
428 if isinstance(validation, float) and validation > 0:
429 train_df, val_df = train_df.randomSplit([1.0 - validation, validation])
430 validation_ratio = validation
431 elif isinstance(validation, str):
432 dtype = [field.dataType for field in df.schema.fields if field.name == validation][0]
433 bool_dtype = isinstance(dtype, BooleanType)
434 val_df = train_df.filter(
435 f.col(validation) if bool_dtype else f.col(validation) > 0).drop(validation)
436 train_df = train_df.filter(
437 ~f.col(validation) if bool_dtype else f.col(validation) == 0).drop(validation)
438
439 # Approximate ratio of validation data to training data for proportionate scale
440 # of partitions
441 timeout_ms = 1000
442 confidence = 0.90
443 train_rows = train_df.rdd.countApprox(timeout=timeout_ms, confidence=confidence)
444 val_rows = val_df.rdd.countApprox(timeout=timeout_ms, confidence=confidence)
445 validation_ratio = val_rows / (val_rows + train_rows)
446 elif validation:
447 raise ValueError('Unrecognized validation type: {}'.format(type(validation)))
448
449 return train_df, val_df, validation_ratio
220def randomSplit(self, weights, seed=None):
221 """
222
223 :param weights:
224 :param seed:
225 :return:
226 """
227 pass
167@staticmethod
168def _get_split(X, y):
169 split = ShuffleSplit(y.shape[0], n_iter=1)
170 train, validate = list(split)[0]
171 X_train, X_validate, y_train, y_validate = X[train], X[validate], y[train], y[validate]
172 return X_train, X_validate, y_train, y_validate

Related snippets