Every line of 'pandas groupby sum multiple columns' code snippets is scanned for vulnerabilities by our powerful machine learning engine that combs millions of open source libraries, ensuring your Python code is secure.
230 def get_column_sum(self, column): 231 return self.spark_df.select(column).groupBy().sum().collect()[0][0]
38 def add_group_id(df, *groupby_cols, gid_colname='gid'): 39 groupby_cols = list(groupby_cols) 40 df_group = df.groupby(groupby_cols).apply(lambda g: pd.Series({ 41 'group_length': g.shape[0] 42 })).reset_index() 43 df_group[gid_colname] = df_group.index 44 df_merge = pd.merge(df, df_group, how='outer', on=groupby_cols) 45 df_merge['group_length'] = df_merge['group_length'].fillna(-1) 46 df_merge[gid_colname] = df_merge[gid_colname].fillna(-1) 47 df_merge['group_length'] = df_merge['group_length'].astype(int) 48 df_merge[gid_colname] = df_merge[gid_colname].astype(int) 49 return df_merge
236 def mean(self): 237 # TODO, there is a lot of copy-paste with the code above 238 # TODO, we should probably define groupby.aggregate 239 func = _accumulate_groupby_mean 240 start = (0, 0) 241 if isinstance(self.grouper, Streaming): 242 func = partial(func, index=self.index) 243 example = self.root.example.groupby(self.grouper.example) 244 if self.index is not None: 245 example = example[self.index] 246 example = example.mean() 247 stream = self.root.stream.zip(self.grouper.stream) 248 stream = stream.accumulate(func, start=start, returns_state=True) 249 else: 250 func = partial(func, grouper=self.grouper, index=self.index) 251 example = self.root.example.groupby(self.grouper) 252 if self.index is not None: 253 example = example[self.index] 254 example = example.mean() 255 stream = self.root.stream.accumulate(func, start=start, 256 returns_state=True) 257 if isinstance(example, pd.DataFrame): 258 return StreamingDataFrame(stream, example) 259 else: 260 return StreamingSeries(stream, example)
54 def sum(self): 55 def sum(scol): 56 return F.when( 57 F.row_number().over(self._unbounded_window) >= self._min_periods, 58 F.sum(scol).over(self._window) 59 ).otherwise(F.lit(None)) 60 61 return self._apply_as_series_or_frame(sum)