4 examples of 'pandas clean data' in Python

Every line of 'pandas clean data' code snippets is scanned for vulnerabilities by our powerful machine learning engine that combs millions of open source libraries, ensuring your Python code is secure.

All examples are scanned by Snyk Code

By copying the Snyk Code Snippets you agree to
102def preprocess_data(self, data_df):
103 """returns data_df with catagorical features converted to binary and categories
104 which is a dict whihch maps the original categorical features to the possible
105 types"""
106 out = pd.DataFrame(index=data_df.index) # output dataframe, initially empty
107 categories = {}
108 for col, col_data in data_df.iteritems():
109 # If non-numeric, convert to one or more dummy variables
110 if col_data.dtype == object:
111 categories[col] = list(data_df[col].fillna('Unknown').unique())
112 col_data = pd.get_dummies(col_data, prefix=col, prefix_sep=self.prefix_sep)
113 out = out.join(col_data)
114 [v.remove('Unknown') for v in categories.values() if 'Unknown' in v]
115 return out.fillna('Unknown'), categories
418def clean_data(self):
419 """Loads and processes the data."""
420
421 if self.mocap_tsv_path is not None:
422 raw_mocap_data_frame = self._load_mocap_data(ignore_hbm=True)
423 mocap_data_frame = self._identify_missing_markers(raw_mocap_data_frame)
424 mocap_data_frame = \
425 self._generate_cortex_time_stamp(mocap_data_frame)
426 mocap_data_frame = \
427 self._interpolate_missing_markers(mocap_data_frame)
428 self.mocap_data = mocap_data_frame
429
430 if self.record_tsv_path is not None:
431 # TODO : A record file that has events but no event mapping in
432 # given in a meta file should do some default event handling
433 # behavior. Keep in mind that D-Flow only allows a certain
434 # number of events (A through F) and multiple counts for the
435 # events.
436 self._extract_events_from_record_file()
437 self.raw_record_data_frame = self._load_record_data()
438
439 if self.mocap_tsv_path is not None and self.record_tsv_path is not None:
440 self.record_data = \
441 self._resample_record_data(self.raw_record_data_frame)
442 self.data = self.mocap_data.join(self.record_data)
443 elif self.mocap_tsv_path is None and self.record_tsv_path is not None:
444 self.data = self.raw_record_data_frame
445 elif self.mocap_tsv_path is not None and self.record_tsv_path is None:
446 self.data = self.mocap_data
447
448 return self.data
33def prep_data(data):
34 data = data.sort_values(by="datetime_full")
35 if "new_brightness" in data.columns:
36 col_sort = [x for x in data.columns if x != "new_brightness"] + ["new_brightness"]
37 data = data.reindex(col_sort, axis=1)
38 return data
80def _clean_columns(df, keep_colnames):
81 new_colnames = []
82 for i,colname in enumerate(df.columns):
83 if colname not in keep_colnames:
84 new_colnames.append(i)
85 else:
86 new_colnames.append(colname)
87 return new_colnames

Related snippets