10 examples of 'group by in pandas' in Python

Every line of 'group by in pandas' code snippets is scanned for vulnerabilities by our powerful machine learning engine that combs millions of open source libraries, ensuring your Python code is secure.

All examples are scanned by Snyk Code

By copying the Snyk Code Snippets you agree to
this disclaimer
38def add_group_id(df, *groupby_cols, gid_colname='gid'):
39 groupby_cols = list(groupby_cols)
40 df_group = df.groupby(groupby_cols).apply(lambda g: pd.Series({
41 'group_length': g.shape[0]
42 })).reset_index()
43 df_group[gid_colname] = df_group.index
44 df_merge = pd.merge(df, df_group, how='outer', on=groupby_cols)
45 df_merge['group_length'] = df_merge['group_length'].fillna(-1)
46 df_merge[gid_colname] = df_merge[gid_colname].fillna(-1)
47 df_merge['group_length'] = df_merge['group_length'].astype(int)
48 df_merge[gid_colname] = df_merge[gid_colname].astype(int)
49 return df_merge
Important

Use secure code every time

Secure your code as it's written. Use Snyk Code to scan source code in minutes – no build needed – and fix issues immediately. Enable Snyk Code

31def groupby(xs, keys):
32 result = defaultdict(list)
33 for (x, key) in zip(xs, keys):
34 result[key].append(x)
35 return result
2757@typecheck(f=func_spec(1, expr_any),
2758 collection=expr_oneof(expr_set(), expr_array()))
2759def group_by(f: Callable, collection) -> DictExpression:
2760 """Group collection elements into a dict according to a lambda function.
2761
2762 Examples
2763 --------
2764
2765 >>> a = ['The', 'quick', 'brown', 'fox']
2766
2767 >>> hl.eval(hl.group_by(lambda x: hl.len(x), a))
2768 {5: ['quick', 'brown'], 3: ['The', 'fox']}
2769
2770 Parameters
2771 ----------
2772 f : function ( (arg) -> :class:`.Expression`)
2773 Function to evaluate for each element of the collection to produce a key for the
2774 resulting dictionary.
2775 collection : :class:`.ArrayExpression` or :class:`.SetExpression`
2776 Collection expression.
2777
2778 Returns
2779 -------
2780 :class:`.DictExpression`.
2781 Dictionary keyed by results of `f`.
2782 """
2783 return collection.group_by(f)
125def _iter_groups(self, df, y=None):
126 """Iterate over groups of `df`, and, if provided, matching labels."""
127 groups = df.groupby(self.groupby).indices
128 for key, sub_idx in groups.items():
129 sub_df = df.iloc[sub_idx]
130 sub_y = y[sub_idx] if y is not None else None
131 yield key, sub_df, sub_y
84def groupby(self, arr, fields):
85 """
86 Applies a groupby to a struct array based on selected fields.
87
88 arr : rarray
89 A remote array on the server.
90 fields : list of field names
91 These are the fields which are used for grouping.
92
93 Returns a tuple of the groupby result and the groups.
94 """
95 j = groupby(self.session_url, arr.url, fields)
96 return (
97 rarray(j['output_gb'], j['dshape_gb']),
98 rarray(j['output_groups'], j['dshape_groups']))
11def test_filter_groups():
12 """
13 Return only groups with size > 3
14 """
15
16 dfgb = create_test_df().groupby('group')
17
18 filtered = filter_groups(dfgb, lambda x: len(x) > 3)
19
20 should_be = pandas.DataFrame({
21 'group': [0, 0, 0, 0, 0],
22 'feature1' : [1, 1, 1, 1, 3],
23 'feature2' : [10.0, 10.5, 9.5, 11.0, 0.0]},
24 index=[0, 1, 2, 3, 6]).groupby('group')
25
26 assert_equals(filtered, should_be)
309@ApplyToDataframe
310def ungroup():
311 return UngroupDF
317def test_groupby_select_all_columns():
318 # Check that when selecting all columns, the result has the same number
319 # of columns as the original.
320 DT = dt.Frame(id2=[1, 2] * 3, id4=[1] * 6, v3=[1, 3, 2, 3, 3, 3])
321 res = DT[:, :, by(f.id2, f.id4)]
322 assert_equals(res, dt.Frame(id2=[1, 1, 1, 2, 2, 2], id4=[1] * 6,
323 v3=[1, 2, 3, 3, 3, 3]))
15def group_func(d):
16 return d.time
149def _groupby_and_apply(expression, probes, info, applyfunc):
150 """
151 Subsets `expression` based on most representative probe
152
153 Parameters
154 ----------
155 expression : dict of (P, S) pandas.DataFrame
156 Dictionary where keys are donor IDs and values are dataframes with `P`
157 rows representing probes and `S` columns representing distinct samples
158 probes : pandas.DataFrame
159 Dataframe containing information on probes that should be considered in
160 representative analysis. Generally, intensity-based-filtering (i.e.,
161 `filter_probes()`) should have been used to reduce this list to only
162 those probes with good expression signal
163 info : pandas.DataFrame
164 Dataframe containing information on probe expression information. Index
165 should be unique probe IDs and must have at least 'gene_symbol' column
166 applyfunc : callable
167 Function used to select representative probe ID from those indexing
168 the same gene. Must accept a pandas dataframe as input and return a
169 string (i.e., the chosen probe ID)
170
171 Returns
172 -------
173 representative : dict of (S, G) pandas.DataFrame
174 Dictionary where keys are donor IDs and values are dataframes with `S`
175 rows representing distinct samples and `G` columns representing unique
176 genes
177 """
178
179 # group probes by gene and get probe corresponding to relevant feature
180 retained = info.groupby('gene_symbol').apply(applyfunc).dropna()
181 probes = probes.loc[sorted(np.squeeze(retained.astype(int)))]
182
183 # subset expression dataframes to retain only desired probes and reassign
184 # (and sort) index to gene symbols in lieu of probe IDs
185 representative = {
186 d: e.loc[probes.index].set_index(probes['gene_symbol']).sort_index().T
187 for d, e in utils.check_dict(expression).items()
188 }
189
190 return representative

Related snippets