8 examples of 'group by two columns pandas' in Python

Every line of 'group by two columns pandas' code snippets is scanned for vulnerabilities by our powerful machine learning engine that combs millions of open source libraries, ensuring your Python code is secure.

All examples are scanned by Snyk Code

By copying the Snyk Code Snippets you agree to
38def add_group_id(df, *groupby_cols, gid_colname='gid'):
39 groupby_cols = list(groupby_cols)
40 df_group = df.groupby(groupby_cols).apply(lambda g: pd.Series({
41 'group_length': g.shape[0]
42 })).reset_index()
43 df_group[gid_colname] = df_group.index
44 df_merge = pd.merge(df, df_group, how='outer', on=groupby_cols)
45 df_merge['group_length'] = df_merge['group_length'].fillna(-1)
46 df_merge[gid_colname] = df_merge[gid_colname].fillna(-1)
47 df_merge['group_length'] = df_merge['group_length'].astype(int)
48 df_merge[gid_colname] = df_merge[gid_colname].astype(int)
49 return df_merge
601def group_data(data, cols, degree=3):
602 """
603 Create higher-order combinations of categorial features
604
605 Args:
606 data - a pandas DataFrame
607 cols - a list of column names that will be used for creating higher-order
608 groupings
609 degree - an integer identifying the order of the combinations
610
611 Returns:
612 a DataFrame with additional colums for each of the higher-order column
613 combination. The names of the new columns are concatenated strings
614 of the column names used in the combination separated by an '_'
615
616 Eg: 2nd degree combinations of data['col1'] and data['col2'] are located
617 in data['col1_col2']
618 """
619 new_data = []
620 m,n = data[cols].shape
621 for indices in combinations(range(n), degree):
622 group_ids = data.groupby( \
623 list(data[cols].columns[list(indices)])) \
624 .grouper.group_info[0]
625 new_data.append(group_ids)
626 data['_'.join(cols)] = np.array(new_data).flatten()
627 return data
213def groups(self, *columns):
214 """ Returns a list of tuples (key, value), where key is a tuple
215 of coordinates from the root hypercube (and whose length depends
216 on the number of parents of this DataCube), and value is a dict
217 mapping column names from the **columns** positional arguments
218 to Numpy arrays.
219
220 For example, calling groups('age', 'height') on a cube
221 that is the result of faceting on 'country' and 'gender' would
222 result in a return value of:
223
224 [ (('USA', 'male') : {'age': , 'height': }),
225 (('USA', 'female') : {'age': , 'height': }),
226 (('CAN', 'male') : {'age': , 'height': }),
227 (('CAN', 'female') : {'age': , 'height': }),
228 (('MEX', 'male') : {'age': , 'height': }),
229 (('MEX', 'female') : {'age': , 'height': }),
230 ... ]
231
232 """
233 # TODO: This really needs to just use Pandas.MultiIndex, stack(),
234 # and pivot(). I just need to rework the FactorExprNode stuff
235 # to produce a MultiIndex; then, this DataCube can just pass
236 # in self._expr.
237 raise NotImplementedError
284def groupTable(table,
285 group_column=0,
286 group_function=min,
287 missing_value="na"):
288 '''group table by *group_column*.
289
290 The table need not be sorted.
291 Arguments
292 ---------
293 table : list
294 List of rows
295 group_column : int
296 Column to group on
297 group_function : function
298 Function to apply on grouped values
299 missing_value : string
300 String to use for missing values.
301 '''
302
303 table.sort(lambda x, y: cmp(x[group_column], y[group_column]))
304
305 rows = []
306 last_value = None
307 new_table = []
308
309 for row in table:
310 if row[group_column] != last_value:
311
312 if last_value is not None:
313 new_table.append(
314 __DoGroup(rows, group_column, group_function,
315 missing_value))
316
317 rows = []
318 last_value = row[group_column]
319
320 rows.append(row)
321
322 if last_value is not None:
323 new_table.append(
324 __DoGroup(rows, group_column, group_function, missing_value))
325
326 return new_table
317def test_groupby_select_all_columns():
318 # Check that when selecting all columns, the result has the same number
319 # of columns as the original.
320 DT = dt.Frame(id2=[1, 2] * 3, id4=[1] * 6, v3=[1, 3, 2, 3, 3, 3])
321 res = DT[:, :, by(f.id2, f.id4)]
322 assert_equals(res, dt.Frame(id2=[1, 1, 1, 2, 2, 2], id4=[1] * 6,
323 v3=[1, 2, 3, 3, 3, 3]))
142def append_data(df1, df2):
143 '''
144 Append df2 to df1
145 '''
146 df = pd.concat((df1, df2))
147 return df.groupby(df.index).first()
253def _split_dataframe(df, dense_columns):
254 """Split a DataFrame by creating groups of the same values for the dense dims."""
255 groups = {name: group for name, group in df.groupby(dense_columns)}
256 groups = convert_dictionary_keys_to_dense_indices(groups)
257
258 return groups
31def groupby(xs, keys):
32 result = defaultdict(list)
33 for (x, key) in zip(xs, keys):
34 result[key].append(x)
35 return result

Related snippets