Every line of 'group by in pandas' code snippets is scanned for vulnerabilities by our powerful machine learning engine that combs millions of open source libraries, ensuring your Python code is secure.
38 def add_group_id(df, *groupby_cols, gid_colname='gid'): 39 groupby_cols = list(groupby_cols) 40 df_group = df.groupby(groupby_cols).apply(lambda g: pd.Series({ 41 'group_length': g.shape[0] 42 })).reset_index() 43 df_group[gid_colname] = df_group.index 44 df_merge = pd.merge(df, df_group, how='outer', on=groupby_cols) 45 df_merge['group_length'] = df_merge['group_length'].fillna(-1) 46 df_merge[gid_colname] = df_merge[gid_colname].fillna(-1) 47 df_merge['group_length'] = df_merge['group_length'].astype(int) 48 df_merge[gid_colname] = df_merge[gid_colname].astype(int) 49 return df_merge
31 def groupby(xs, keys): 32 result = defaultdict(list) 33 for (x, key) in zip(xs, keys): 34 result[key].append(x) 35 return result
2757 @typecheck(f=func_spec(1, expr_any), 2758 collection=expr_oneof(expr_set(), expr_array())) 2759 def group_by(f: Callable, collection) -> DictExpression: 2760 """Group collection elements into a dict according to a lambda function. 2761 2762 Examples 2763 -------- 2764 2765 >>> a = ['The', 'quick', 'brown', 'fox'] 2766 2767 >>> hl.eval(hl.group_by(lambda x: hl.len(x), a)) 2768 {5: ['quick', 'brown'], 3: ['The', 'fox']} 2769 2770 Parameters 2771 ---------- 2772 f : function ( (arg) -> :class:`.Expression`) 2773 Function to evaluate for each element of the collection to produce a key for the 2774 resulting dictionary. 2775 collection : :class:`.ArrayExpression` or :class:`.SetExpression` 2776 Collection expression. 2777 2778 Returns 2779 ------- 2780 :class:`.DictExpression`. 2781 Dictionary keyed by results of `f`. 2782 """ 2783 return collection.group_by(f)
125 def _iter_groups(self, df, y=None): 126 """Iterate over groups of `df`, and, if provided, matching labels.""" 127 groups = df.groupby(self.groupby).indices 128 for key, sub_idx in groups.items(): 129 sub_df = df.iloc[sub_idx] 130 sub_y = y[sub_idx] if y is not None else None 131 yield key, sub_df, sub_y
84 def groupby(self, arr, fields): 85 """ 86 Applies a groupby to a struct array based on selected fields. 87 88 arr : rarray 89 A remote array on the server. 90 fields : list of field names 91 These are the fields which are used for grouping. 92 93 Returns a tuple of the groupby result and the groups. 94 """ 95 j = groupby(self.session_url, arr.url, fields) 96 return ( 97 rarray(j['output_gb'], j['dshape_gb']), 98 rarray(j['output_groups'], j['dshape_groups']))
11 def test_filter_groups(): 12 """ 13 Return only groups with size > 3 14 """ 15 16 dfgb = create_test_df().groupby('group') 17 18 filtered = filter_groups(dfgb, lambda x: len(x) > 3) 19 20 should_be = pandas.DataFrame({ 21 'group': [0, 0, 0, 0, 0], 22 'feature1' : [1, 1, 1, 1, 3], 23 'feature2' : [10.0, 10.5, 9.5, 11.0, 0.0]}, 24 index=[0, 1, 2, 3, 6]).groupby('group') 25 26 assert_equals(filtered, should_be)
309 @ApplyToDataframe 310 def ungroup(): 311 return UngroupDF
317 def test_groupby_select_all_columns(): 318 # Check that when selecting all columns, the result has the same number 319 # of columns as the original. 320 DT = dt.Frame(id2=[1, 2] * 3, id4=[1] * 6, v3=[1, 3, 2, 3, 3, 3]) 321 res = DT[:, :, by(f.id2, f.id4)] 322 assert_equals(res, dt.Frame(id2=[1, 1, 1, 2, 2, 2], id4=[1] * 6, 323 v3=[1, 2, 3, 3, 3, 3]))
15 def group_func(d): 16 return d.time
149 def _groupby_and_apply(expression, probes, info, applyfunc): 150 """ 151 Subsets `expression` based on most representative probe 152 153 Parameters 154 ---------- 155 expression : dict of (P, S) pandas.DataFrame 156 Dictionary where keys are donor IDs and values are dataframes with `P` 157 rows representing probes and `S` columns representing distinct samples 158 probes : pandas.DataFrame 159 Dataframe containing information on probes that should be considered in 160 representative analysis. Generally, intensity-based-filtering (i.e., 161 `filter_probes()`) should have been used to reduce this list to only 162 those probes with good expression signal 163 info : pandas.DataFrame 164 Dataframe containing information on probe expression information. Index 165 should be unique probe IDs and must have at least 'gene_symbol' column 166 applyfunc : callable 167 Function used to select representative probe ID from those indexing 168 the same gene. Must accept a pandas dataframe as input and return a 169 string (i.e., the chosen probe ID) 170 171 Returns 172 ------- 173 representative : dict of (S, G) pandas.DataFrame 174 Dictionary where keys are donor IDs and values are dataframes with `S` 175 rows representing distinct samples and `G` columns representing unique 176 genes 177 """ 178 179 # group probes by gene and get probe corresponding to relevant feature 180 retained = info.groupby('gene_symbol').apply(applyfunc).dropna() 181 probes = probes.loc[sorted(np.squeeze(retained.astype(int)))] 182 183 # subset expression dataframes to retain only desired probes and reassign 184 # (and sort) index to gene symbols in lieu of probe IDs 185 representative = { 186 d: e.loc[probes.index].set_index(probes['gene_symbol']).sort_index().T 187 for d, e in utils.check_dict(expression).items() 188 } 189 190 return representative