Every line of 'pandas clean data' code snippets is scanned for vulnerabilities by our powerful machine learning engine that combs millions of open source libraries, ensuring your Python code is secure.
102 def preprocess_data(self, data_df): 103 """returns data_df with catagorical features converted to binary and categories 104 which is a dict whihch maps the original categorical features to the possible 105 types""" 106 out = pd.DataFrame(index=data_df.index) # output dataframe, initially empty 107 categories = {} 108 for col, col_data in data_df.iteritems(): 109 # If non-numeric, convert to one or more dummy variables 110 if col_data.dtype == object: 111 categories[col] = list(data_df[col].fillna('Unknown').unique()) 112 col_data = pd.get_dummies(col_data, prefix=col, prefix_sep=self.prefix_sep) 113 out = out.join(col_data) 114 [v.remove('Unknown') for v in categories.values() if 'Unknown' in v] 115 return out.fillna('Unknown'), categories
418 def clean_data(self): 419 """Loads and processes the data.""" 420 421 if self.mocap_tsv_path is not None: 422 raw_mocap_data_frame = self._load_mocap_data(ignore_hbm=True) 423 mocap_data_frame = self._identify_missing_markers(raw_mocap_data_frame) 424 mocap_data_frame = \ 425 self._generate_cortex_time_stamp(mocap_data_frame) 426 mocap_data_frame = \ 427 self._interpolate_missing_markers(mocap_data_frame) 428 self.mocap_data = mocap_data_frame 429 430 if self.record_tsv_path is not None: 431 # TODO : A record file that has events but no event mapping in 432 # given in a meta file should do some default event handling 433 # behavior. Keep in mind that D-Flow only allows a certain 434 # number of events (A through F) and multiple counts for the 435 # events. 436 self._extract_events_from_record_file() 437 self.raw_record_data_frame = self._load_record_data() 438 439 if self.mocap_tsv_path is not None and self.record_tsv_path is not None: 440 self.record_data = \ 441 self._resample_record_data(self.raw_record_data_frame) 442 self.data = self.mocap_data.join(self.record_data) 443 elif self.mocap_tsv_path is None and self.record_tsv_path is not None: 444 self.data = self.raw_record_data_frame 445 elif self.mocap_tsv_path is not None and self.record_tsv_path is None: 446 self.data = self.mocap_data 447 448 return self.data
33 def prep_data(data): 34 data = data.sort_values(by="datetime_full") 35 if "new_brightness" in data.columns: 36 col_sort = [x for x in data.columns if x != "new_brightness"] + ["new_brightness"] 37 data = data.reindex(col_sort, axis=1) 38 return data
80 def _clean_columns(df, keep_colnames): 81 new_colnames = [] 82 for i,colname in enumerate(df.columns): 83 if colname not in keep_colnames: 84 new_colnames.append(i) 85 else: 86 new_colnames.append(colname) 87 return new_colnames