10 examples of 'split dataframe into multiple dataframes pandas' in Python

Every line of 'split dataframe into multiple dataframes pandas' code snippets is scanned for vulnerabilities by our powerful machine learning engine that combs millions of open source libraries, ensuring your Python code is secure.

All examples are scanned by Snyk Code

By copying the Snyk Code Snippets you agree to
this disclaimer
64@ray.remote
65def _split_df(pandas_df, chunksize):
66 """Split a pandas_df into partitions.
67
68 Returns:
69 remote_df_ids ([ObjectID])
70 """
71 dataframes = []
72
73 while len(pandas_df) > chunksize:
74 t_df = pandas_df[:chunksize]
75 t_df.reset_index(drop=True)
76 top = ray.put(t_df)
77 dataframes.append(top)
78 pandas_df = pandas_df[chunksize:]
79 else:
80 pandas_df = pandas_df.reset_index(drop=True)
81 dataframes.append(ray.put(pandas_df))
82
83 return dataframes
Important

Use secure code every time

Secure your code as it's written. Use Snyk Code to scan source code in minutes – no build needed – and fix issues immediately. Enable Snyk Code

253def _split_dataframe(df, dense_columns):
254 """Split a DataFrame by creating groups of the same values for the dense dims."""
255 groups = {name: group for name, group in df.groupby(dense_columns)}
256 groups = convert_dictionary_keys_to_dense_indices(groups)
257
258 return groups
167@timeit
168def split_X_y(df: pd.DataFrame, config: Config) -> (pd.DataFrame, Optional[pd.Series]):
169 if config['params']['field_target_name'] in df.columns:
170 return df.drop(config['params']['field_target_name'], axis=1), df[config['params']['field_target_name']]
171 else:
172 return df, None
6def split(df):
7 '''
8
9 :param df: Dataframe to be splited
10 :return: Sorted list of dataframe's splited list
11 '''
12 trainingSet, testSet = train_test_split(df, test_size=0.2)
13 sorted_trainSet = trainingSet.sort_values('user_id')
14 sorted_testSet = testSet.sort_values('user_id')
15 return sorted_testSet, sorted_trainSet
35def split_data(df):
36 X = df.drop('Y', axis=1).values
37 y = df['Y'].values
38
39 X_train, X_test, y_train, y_test = train_test_split(
40 X, y, test_size=0.2, random_state=0)
41 data = {"train": {"X": X_train, "y": y_train},
42 "test": {"X": X_test, "y": y_test}}
43 return data
341def split_frame(self, index):
342 """
343 Split data frame by an index
344
345 Parameters
346 ----------
347 index : string
348 used to sep the DataObject into groups
349
350 Returns
351 -------
352 tuple of DataObjects
353
354 Examples
355 --------
356 >>> df = DataObject([(6, 'APL'), (2, 'IBM')])
357 >>> df1, df2 = df.split_frame('a')
358 >>> df1.to_dict()
359 {'b': {2: 'IBM'}}
360 >>> df2.to_dict()
361 {'b': {6: 'APL'}}
362 """
363 df = self.unindexed.set_index(index)
364 g = [DataObject(g[1]) for g in df.groupby(level=0)]
365 return tuple(g)
65def _split(self, frame):
66 if self.share_afterward == 1:
67 return
68
69 splits = [self.share_afterward, 1.0]
70 adj_day = self.ex_date - datetime.timedelta(days=1)
71 indexes = []
72 indexes.append(self.d2t(adj_day))
73 indexes.append(self.d2t(datetime.date.today()))
74
75 splits = TimeSeries(splits, index=indexes)
76 ri_splits = splits.reindex(frame.index, method='backfill')
77
78 frame['adjclose'] = frame['adjclose'] / ri_splits
163def split(self, index_series, proportion, batch_size=None):
164 """Deterministically split a `DataFrame` into two `DataFrame`s.
165
166 Note this split is only as deterministic as the underlying hash function;
167 see `tf.string_to_hash_bucket_fast`. The hash function is deterministic
168 for a given binary, but may change occasionally. The only way to achieve
169 an absolute guarantee that the split `DataFrame`s do not change across runs
170 is to materialize them.
171
172 Note too that the allocation of a row to one partition or the
173 other is evaluated independently for each row, so the exact number of rows
174 in each partition is binomially distributed.
175
176 Args:
177 index_series: a `Series` of unique strings, whose hash will determine the
178 partitioning; or the name in this `DataFrame` of such a `Series`.
179 (This `Series` must contain strings because TensorFlow provides hash
180 ops only for strings, and there are no number-to-string converter ops.)
181 proportion: The proportion of the rows to select for the 'left'
182 partition; the remaining (1 - proportion) rows form the 'right'
183 partition.
184 batch_size: the batch size to use when rebatching the left and right
185 `DataFrame`s. If None (default), the `DataFrame`s are not rebatched;
186 thus their batches will have variable sizes, according to which rows
187 are selected from each batch of the original `DataFrame`.
188
189 Returns:
190 Two `DataFrame`s containing the partitioned rows.
191 """
192 if isinstance(index_series, str):
193 index_series = self[index_series]
194 left_mask, = split_mask.SplitMask(proportion)(index_series)
195 right_mask = ~left_mask
196 left_rows = self.select_rows(left_mask)
197 right_rows = self.select_rows(right_mask)
198
199 if batch_size:
200 left_rows = left_rows.batch(batch_size=batch_size, shuffle=False)
201 right_rows = right_rows.batch(batch_size=batch_size, shuffle=False)
202
203 return left_rows, right_rows
4def split_and_expand(df, col, sep):
5 split_col = df[col].str.split(sep).apply(pd.Series, 1).stack()
6 split_col.index = split_col.index.droplevel(-1)
7 split_col.name = col
8 df = df.drop(col, axis=1).join(split_col)
9 df.reset_index(drop=True, inplace=True)
10 return df
169def split(self, *arrays: Union[np.ndarray, pd.DataFrame, pd.Series]):
170 """
171 Split data.
172
173 Parameters
174 ----------
175 *arrays
176 Dataset for split.
177 Size of dim 0 must be equal to :meth:`~Splitter.size`.
178 If None, return the split indices.
179
180 Returns
181 -------
182 tuple
183 List containing split of inputs. if inputs are None, only return
184 the indices of split. if ``test_size`` is 0, test data/index will
185 not return.
186 """
187 if self._test is None:
188 raise RuntimeError('split action is illegal because `test_size` is none')
189
190 if len(arrays) == 0:
191 return self._train, self._test
192
193 ret = []
194 for array in arrays:
195 self._size_check(array)
196 ret.extend(self._split(array, self._train, self._test))
197 return tuple(ret)

Related snippets