Every line of 'pandas filter rows by condition' code snippets is scanned for vulnerabilities by our powerful machine learning engine that combs millions of open source libraries, ensuring your Python code is secure.
905 def filter(self, func): 906 """ 907 Return a copy of a DataFrame excluding elements from groups that 908 do not satisfy the boolean criterion specified by func. 909 910 Parameters 911 ---------- 912 f : function 913 Function to apply to each subframe. Should return True or False. 914 dropna : Drop groups that do not pass the filter. True by default; 915 if False, groups that evaluate False are filled with NaNs. 916 917 Returns 918 ------- 919 filtered : DataFrame 920 921 Notes 922 ----- 923 Each subframe is endowed the attribute 'name' in case you need to know 924 which group you are working on. 925 926 Examples 927 -------- 928 >>> df = ks.DataFrame({'A' : ['foo', 'bar', 'foo', 'bar', 929 ... 'foo', 'bar'], 930 ... 'B' : [1, 2, 3, 4, 5, 6], 931 ... 'C' : [2.0, 5., 8., 1., 2., 9.]}, columns=['A', 'B', 'C']) 932 >>> grouped = df.groupby('A') 933 >>> grouped.filter(lambda x: x['B'].mean() > 3.) 934 A B C 935 1 bar 2 5.0 936 3 bar 4 1.0 937 5 bar 6 9.0 938 """ 939 if not isinstance(func, Callable): 940 raise TypeError("%s object is not callable" % type(func)) 941 942 data_schema = self._kdf._sdf.schema 943 groupby_names = [s.name for s in self._groupkeys] 944 945 def pandas_filter(pdf): 946 return pdf.groupby(groupby_names).filter(func) 947 948 sdf = self._spark_group_map_apply( 949 pandas_filter, data_schema, retain_index=True) 950 return DataFrame(self._kdf._internal.copy( 951 sdf=sdf, 952 column_scols=[scol_for(sdf, col) for col in self._kdf._internal.data_columns]))