Every line of 'pandas split dataframe into chunks' code snippets is scanned for vulnerabilities by our powerful machine learning engine that combs millions of open source libraries, ensuring your Python code is secure.
64 @ray.remote 65 def _split_df(pandas_df, chunksize): 66 """Split a pandas_df into partitions. 67 68 Returns: 69 remote_df_ids ([ObjectID]) 70 """ 71 dataframes = [] 72 73 while len(pandas_df) > chunksize: 74 t_df = pandas_df[:chunksize] 75 t_df.reset_index(drop=True) 76 top = ray.put(t_df) 77 dataframes.append(top) 78 pandas_df = pandas_df[chunksize:] 79 else: 80 pandas_df = pandas_df.reset_index(drop=True) 81 dataframes.append(ray.put(pandas_df)) 82 83 return dataframes
253 def _split_dataframe(df, dense_columns): 254 """Split a DataFrame by creating groups of the same values for the dense dims.""" 255 groups = {name: group for name, group in df.groupby(dense_columns)} 256 groups = convert_dictionary_keys_to_dense_indices(groups) 257 258 return groups
111 @convert.register(pd.DataFrame, chunks(pd.DataFrame), cost=1.0) 112 def chunks_dataframe_to_dataframe(c, **kwargs): 113 c = list(c) 114 if not c: # empty case 115 return pd.DataFrame(columns=kwargs.get('dshape').measure.names) 116 else: 117 return pd.concat(c, axis=0, ignore_index=True)
167 @timeit 168 def split_X_y(df: pd.DataFrame, config: Config) -> (pd.DataFrame, Optional[pd.Series]): 169 if config['params']['field_target_name'] in df.columns: 170 return df.drop(config['params']['field_target_name'], axis=1), df[config['params']['field_target_name']] 171 else: 172 return df, None
189 def split_into_chunks(iterable, chunk_length): 190 args = [iter(iterable)] * chunk_length 191 return zip_longest(*args)
6 def split(df): 7 ''' 8 9 :param df: Dataframe to be splited 10 :return: Sorted list of dataframe's splited list 11 ''' 12 trainingSet, testSet = train_test_split(df, test_size=0.2) 13 sorted_trainSet = trainingSet.sort_values('user_id') 14 sorted_testSet = testSet.sort_values('user_id') 15 return sorted_testSet, sorted_trainSet
163 def split(self, index_series, proportion, batch_size=None): 164 """Deterministically split a `DataFrame` into two `DataFrame`s. 165 166 Note this split is only as deterministic as the underlying hash function; 167 see `tf.string_to_hash_bucket_fast`. The hash function is deterministic 168 for a given binary, but may change occasionally. The only way to achieve 169 an absolute guarantee that the split `DataFrame`s do not change across runs 170 is to materialize them. 171 172 Note too that the allocation of a row to one partition or the 173 other is evaluated independently for each row, so the exact number of rows 174 in each partition is binomially distributed. 175 176 Args: 177 index_series: a `Series` of unique strings, whose hash will determine the 178 partitioning; or the name in this `DataFrame` of such a `Series`. 179 (This `Series` must contain strings because TensorFlow provides hash 180 ops only for strings, and there are no number-to-string converter ops.) 181 proportion: The proportion of the rows to select for the 'left' 182 partition; the remaining (1 - proportion) rows form the 'right' 183 partition. 184 batch_size: the batch size to use when rebatching the left and right 185 `DataFrame`s. If None (default), the `DataFrame`s are not rebatched; 186 thus their batches will have variable sizes, according to which rows 187 are selected from each batch of the original `DataFrame`. 188 189 Returns: 190 Two `DataFrame`s containing the partitioned rows. 191 """ 192 if isinstance(index_series, str): 193 index_series = self[index_series] 194 left_mask, = split_mask.SplitMask(proportion)(index_series) 195 right_mask = ~left_mask 196 left_rows = self.select_rows(left_mask) 197 right_rows = self.select_rows(right_mask) 198 199 if batch_size: 200 left_rows = left_rows.batch(batch_size=batch_size, shuffle=False) 201 right_rows = right_rows.batch(batch_size=batch_size, shuffle=False) 202 203 return left_rows, right_rows
35 def split_data(df): 36 X = df.drop('Y', axis=1).values 37 y = df['Y'].values 38 39 X_train, X_test, y_train, y_test = train_test_split( 40 X, y, test_size=0.2, random_state=0) 41 data = {"train": {"X": X_train, "y": y_train}, 42 "test": {"X": X_test, "y": y_test}} 43 return data
48 def split( 49 x: ndarray, y: ndarray, num_splits: int 50 ) -> Tuple[List[ndarray], List[ndarray]]: 51 x_splits = np.split(x, indices_or_sections=num_splits, axis=0) 52 y_splits = np.split(y, indices_or_sections=num_splits, axis=0) 53 return x_splits, y_splits
45 def _split_into_chunks(value: int) -> Iterable[int]: 46 while value >= 32: # 2^5, while there are at least 5 bits 47 48 # first & with 2^5-1, zeros out all the bits other than the first five 49 # then OR with 0x20 if another bit chunk follows 50 yield (value & 31) | 0x20 51 value >>= 5 52 yield value