Every line of 'pandas drop duplicated columns' code snippets is scanned for vulnerabilities by our powerful machine learning engine that combs millions of open source libraries, ensuring your Python code is secure.
49 def remove_duplicates(df_or_series): 50 """ Remove duplicate rows or values by keeping the first of each duplicate. 51 52 Parameters 53 ---------- 54 df_or_series : :any:`pandas.DataFrame` or :any:`pandas.Series` 55 Pandas object from which to drop duplicate index values. 56 57 Returns 58 ------- 59 deduplicated : :any:`pandas.DataFrame` or :any:`pandas.Series` 60 The deduplicated pandas object. 61 """ 62 # CalTrack 2.3.2.2 63 return df_or_series[~df_or_series.index.duplicated(keep="first")]
39 def _drop_col(self, df): 40 ''' 41 Drops last column, which was added in the parsing procedure due to a 42 trailing white space for each sample in the text file 43 Arguments: 44 df: pandas dataframe 45 Return: 46 df: original df with last column dropped 47 ''' 48 return df.drop(df.columns[-1], axis=1)
332 def dropcols(df, start=None, end=None): 333 """Drop columns that contain NaN within [start, end] inclusive. 334 335 A wrapper around DataFrame.dropna() that builds an easier *subset* 336 syntax for tseries-indexed DataFrames. 337 338 Parameters 339 ---------- 340 df : DataFrame 341 start : str or datetime, default None 342 start cutoff date, inclusive 343 end : str or datetime, default None 344 end cutoff date, inclusive 345 346 Example 347 ------- 348 df = DataFrame(np.random.randn(10,3), 349 index=pd.date_range('2017', periods=10)) 350 351 # Drop in some NaN 352 df.set_value('2017-01-04', 0, np.nan) 353 df.set_value('2017-01-02', 2, np.nan) 354 df.loc['2017-01-05':, 1] = np.nan 355 356 # only col2 will be kept--its NaN value falls before `start` 357 print(dropcols(df, start='2017-01-03')) 358 2 359 2017-01-01 0.12939 360 2017-01-02 NaN 361 2017-01-03 0.16596 362 2017-01-04 1.06442 363 2017-01-05 -1.87040 364 2017-01-06 -0.17160 365 2017-01-07 0.94588 366 2017-01-08 1.49246 367 2017-01-09 0.02042 368 2017-01-10 0.75094 369 370 """ 371 372 if isinstance(df, Series): 373 raise ValueError("func only applies to `pd.DataFrame`") 374 if start is None: 375 start = df.index[0] 376 if end is None: 377 end = df.index[-1] 378 subset = df.index[(df.index >= start) & (df.index <= end)] 379 return df.dropna(axis=1, subset=subset)
673 def _unique(df, columns=None): 674 if isinstance(columns, str): 675 columns = [columns] 676 if not columns: 677 columns = df.columns.tolist() 678 info = {} 679 for col in columns: 680 values = df[col].dropna().values 681 uniques = np.unique(list(_flatten_list(values))).tolist() 682 info[col] = {'count': len(uniques), 'values': uniques} 683 return info
80 def _clean_columns(df, keep_colnames): 81 new_colnames = [] 82 for i,colname in enumerate(df.columns): 83 if colname not in keep_colnames: 84 new_colnames.append(i) 85 else: 86 new_colnames.append(colname) 87 return new_colnames
35 @property 36 def drop_columns(self): 37 drop_col=self.uni_table.query('Iv<%s'%(self._iv_threshold)) 38 return pd.concat([drop_col,self.uni_table[self.uni_table['Iv'].isnull()]])
14 def drop_duplicate_events(df): 15 """ 16 Function to group dataframe, use all new information from the latest row 17 but keep the ``event_index`` from the first one 18 """ 19 df = df.sort_values('event_index', na_position='last') 20 event_index = df.event_index.iloc[0] 21 r = df.iloc[-1].to_dict() 22 r['event_index'] = event_index 23 return r
259 def drop_some(df_: pd.DataFrame, thresh: int) -> pd.DataFrame: 260 # thresh is the minimum number of NA, the 1 indicates that columns should be dropped not rows 261 return df_.dropna(1, thresh=thresh)