Every line of 'shufflesplit sklearn' code snippets is scanned for vulnerabilities by our powerful machine learning engine that combs millions of open source libraries, ensuring your Python code is secure.
6 def split(df): 7 ''' 8 9 :param df: Dataframe to be splited 10 :return: Sorted list of dataframe's splited list 11 ''' 12 trainingSet, testSet = train_test_split(df, test_size=0.2) 13 sorted_trainSet = trainingSet.sort_values('user_id') 14 sorted_testSet = testSet.sort_values('user_id') 15 return sorted_testSet, sorted_trainSet
423 def _train_val_split(df, validation): 424 train_df = df 425 val_df = None 426 validation_ratio = 0.0 427 428 if isinstance(validation, float) and validation > 0: 429 train_df, val_df = train_df.randomSplit([1.0 - validation, validation]) 430 validation_ratio = validation 431 elif isinstance(validation, str): 432 dtype = [field.dataType for field in df.schema.fields if field.name == validation][0] 433 bool_dtype = isinstance(dtype, BooleanType) 434 val_df = train_df.filter( 435 f.col(validation) if bool_dtype else f.col(validation) > 0).drop(validation) 436 train_df = train_df.filter( 437 ~f.col(validation) if bool_dtype else f.col(validation) == 0).drop(validation) 438 439 # Approximate ratio of validation data to training data for proportionate scale 440 # of partitions 441 timeout_ms = 1000 442 confidence = 0.90 443 train_rows = train_df.rdd.countApprox(timeout=timeout_ms, confidence=confidence) 444 val_rows = val_df.rdd.countApprox(timeout=timeout_ms, confidence=confidence) 445 validation_ratio = val_rows / (val_rows + train_rows) 446 elif validation: 447 raise ValueError('Unrecognized validation type: {}'.format(type(validation))) 448 449 return train_df, val_df, validation_ratio
220 def randomSplit(self, weights, seed=None): 221 """ 222 223 :param weights: 224 :param seed: 225 :return: 226 """ 227 pass
167 @staticmethod 168 def _get_split(X, y): 169 split = ShuffleSplit(y.shape[0], n_iter=1) 170 train, validate = list(split)[0] 171 X_train, X_validate, y_train, y_validate = X[train], X[validate], y[train], y[validate] 172 return X_train, X_validate, y_train, y_validate