Every line of 'group by two columns pandas' code snippets is scanned for vulnerabilities by our powerful machine learning engine that combs millions of open source libraries, ensuring your Python code is secure.
38 def add_group_id(df, *groupby_cols, gid_colname='gid'): 39 groupby_cols = list(groupby_cols) 40 df_group = df.groupby(groupby_cols).apply(lambda g: pd.Series({ 41 'group_length': g.shape[0] 42 })).reset_index() 43 df_group[gid_colname] = df_group.index 44 df_merge = pd.merge(df, df_group, how='outer', on=groupby_cols) 45 df_merge['group_length'] = df_merge['group_length'].fillna(-1) 46 df_merge[gid_colname] = df_merge[gid_colname].fillna(-1) 47 df_merge['group_length'] = df_merge['group_length'].astype(int) 48 df_merge[gid_colname] = df_merge[gid_colname].astype(int) 49 return df_merge
601 def group_data(data, cols, degree=3): 602 """ 603 Create higher-order combinations of categorial features 604 605 Args: 606 data - a pandas DataFrame 607 cols - a list of column names that will be used for creating higher-order 608 groupings 609 degree - an integer identifying the order of the combinations 610 611 Returns: 612 a DataFrame with additional colums for each of the higher-order column 613 combination. The names of the new columns are concatenated strings 614 of the column names used in the combination separated by an '_' 615 616 Eg: 2nd degree combinations of data['col1'] and data['col2'] are located 617 in data['col1_col2'] 618 """ 619 new_data = [] 620 m,n = data[cols].shape 621 for indices in combinations(range(n), degree): 622 group_ids = data.groupby( \ 623 list(data[cols].columns[list(indices)])) \ 624 .grouper.group_info[0] 625 new_data.append(group_ids) 626 data['_'.join(cols)] = np.array(new_data).flatten() 627 return data
213 def groups(self, *columns): 214 """ Returns a list of tuples (key, value), where key is a tuple 215 of coordinates from the root hypercube (and whose length depends 216 on the number of parents of this DataCube), and value is a dict 217 mapping column names from the **columns** positional arguments 218 to Numpy arrays. 219 220 For example, calling groups('age', 'height') on a cube 221 that is the result of faceting on 'country' and 'gender' would 222 result in a return value of: 223 224 [ (('USA', 'male') : {'age': , 'height': }), 225 (('USA', 'female') : {'age': , 'height': }), 226 (('CAN', 'male') : {'age': , 'height': }), 227 (('CAN', 'female') : {'age': , 'height': }), 228 (('MEX', 'male') : {'age': , 'height': }), 229 (('MEX', 'female') : {'age': , 'height': }), 230 ... ] 231 232 """ 233 # TODO: This really needs to just use Pandas.MultiIndex, stack(), 234 # and pivot(). I just need to rework the FactorExprNode stuff 235 # to produce a MultiIndex; then, this DataCube can just pass 236 # in self._expr. 237 raise NotImplementedError
284 def groupTable(table, 285 group_column=0, 286 group_function=min, 287 missing_value="na"): 288 '''group table by *group_column*. 289 290 The table need not be sorted. 291 Arguments 292 --------- 293 table : list 294 List of rows 295 group_column : int 296 Column to group on 297 group_function : function 298 Function to apply on grouped values 299 missing_value : string 300 String to use for missing values. 301 ''' 302 303 table.sort(lambda x, y: cmp(x[group_column], y[group_column])) 304 305 rows = [] 306 last_value = None 307 new_table = [] 308 309 for row in table: 310 if row[group_column] != last_value: 311 312 if last_value is not None: 313 new_table.append( 314 __DoGroup(rows, group_column, group_function, 315 missing_value)) 316 317 rows = [] 318 last_value = row[group_column] 319 320 rows.append(row) 321 322 if last_value is not None: 323 new_table.append( 324 __DoGroup(rows, group_column, group_function, missing_value)) 325 326 return new_table
317 def test_groupby_select_all_columns(): 318 # Check that when selecting all columns, the result has the same number 319 # of columns as the original. 320 DT = dt.Frame(id2=[1, 2] * 3, id4=[1] * 6, v3=[1, 3, 2, 3, 3, 3]) 321 res = DT[:, :, by(f.id2, f.id4)] 322 assert_equals(res, dt.Frame(id2=[1, 1, 1, 2, 2, 2], id4=[1] * 6, 323 v3=[1, 2, 3, 3, 3, 3]))
142 def append_data(df1, df2): 143 ''' 144 Append df2 to df1 145 ''' 146 df = pd.concat((df1, df2)) 147 return df.groupby(df.index).first()
253 def _split_dataframe(df, dense_columns): 254 """Split a DataFrame by creating groups of the same values for the dense dims.""" 255 groups = {name: group for name, group in df.groupby(dense_columns)} 256 groups = convert_dictionary_keys_to_dense_indices(groups) 257 258 return groups
31 def groupby(xs, keys): 32 result = defaultdict(list) 33 for (x, key) in zip(xs, keys): 34 result[key].append(x) 35 return result