4 examples of 'pandas groupby sum multiple columns' in Python

Every line of 'pandas groupby sum multiple columns' code snippets is scanned for vulnerabilities by our powerful machine learning engine that combs millions of open source libraries, ensuring your Python code is secure.

All examples are scanned by Snyk Code

By copying the Snyk Code Snippets you agree to
this disclaimer
230def get_column_sum(self, column):
231 return self.spark_df.select(column).groupBy().sum().collect()[0][0]
Important

Use secure code every time

Secure your code as it's written. Use Snyk Code to scan source code in minutes – no build needed – and fix issues immediately. Enable Snyk Code

38def add_group_id(df, *groupby_cols, gid_colname='gid'):
39 groupby_cols = list(groupby_cols)
40 df_group = df.groupby(groupby_cols).apply(lambda g: pd.Series({
41 'group_length': g.shape[0]
42 })).reset_index()
43 df_group[gid_colname] = df_group.index
44 df_merge = pd.merge(df, df_group, how='outer', on=groupby_cols)
45 df_merge['group_length'] = df_merge['group_length'].fillna(-1)
46 df_merge[gid_colname] = df_merge[gid_colname].fillna(-1)
47 df_merge['group_length'] = df_merge['group_length'].astype(int)
48 df_merge[gid_colname] = df_merge[gid_colname].astype(int)
49 return df_merge
236def mean(self):
237 # TODO, there is a lot of copy-paste with the code above
238 # TODO, we should probably define groupby.aggregate
239 func = _accumulate_groupby_mean
240 start = (0, 0)
241 if isinstance(self.grouper, Streaming):
242 func = partial(func, index=self.index)
243 example = self.root.example.groupby(self.grouper.example)
244 if self.index is not None:
245 example = example[self.index]
246 example = example.mean()
247 stream = self.root.stream.zip(self.grouper.stream)
248 stream = stream.accumulate(func, start=start, returns_state=True)
249 else:
250 func = partial(func, grouper=self.grouper, index=self.index)
251 example = self.root.example.groupby(self.grouper)
252 if self.index is not None:
253 example = example[self.index]
254 example = example.mean()
255 stream = self.root.stream.accumulate(func, start=start,
256 returns_state=True)
257 if isinstance(example, pd.DataFrame):
258 return StreamingDataFrame(stream, example)
259 else:
260 return StreamingSeries(stream, example)
54def sum(self):
55 def sum(scol):
56 return F.when(
57 F.row_number().over(self._unbounded_window) >= self._min_periods,
58 F.sum(scol).over(self._window)
59 ).otherwise(F.lit(None))
60
61 return self._apply_as_series_or_frame(sum)

Related snippets