7 examples of 'pandas merge columns with same name' in Python

Every line of 'pandas merge columns with same name' code snippets is scanned for vulnerabilities by our powerful machine learning engine that combs millions of open source libraries, ensuring your Python code is secure.

All examples are scanned by Snyk Code

By copying the Snyk Code Snippets you agree to
813def merge(old_cols, new_cols):
814 return old_cols + new_cols
1066def merge_columns(self, columns=None, sparse=True, sampling_rate='tr'):
1067 ''' Merge columns into one DF.
1068 Args:
1069 columns (list): Optional list of column names to retain; if None,
1070 all columns are written out.
1071 sparse (bool): If True, columns will be kept in a sparse format
1072 provided they are all internally represented as such. If False,
1073 a dense matrix (i.e., uniform sampling rate for all events)
1074 will be exported. Will be ignored if at least one column is
1075 dense.
1076 sampling_rate (float): If a dense matrix is written out, the
1077 sampling rate (in Hz) to use for downsampling. Defaults to the
1078 value currently set in the instance.
1079 Returns: A pandas DataFrame.
1080 '''
1081
1082 if sparse and self._none_dense():
1083 return super(BIDSEventVariableCollection,
1084 self).merge_columns(columns)
1085
1086 sampling_rate = self._get_sampling_rate(sampling_rate)
1087
1088 # Make sure all columns have the same sampling rate
1089 _cols = self.resample(sampling_rate, force_dense=True,
1090 in_place=False).values()
1091
1092 # Retain only specific columns if desired
1093 if columns is not None:
1094 _cols = [c for c in _cols if c.name in columns]
1095
1096 _cols = [c for c in _cols if c.name not in ["event_file_id", "time"]]
1097
1098 # Merge all data into one DF
1099 dfs = [pd.Series(c.values.iloc[:, 0], name=c.name) for c in _cols]
1100 # Convert datetime to seconds and add duration column
1101 dense_index = self.dense_index.copy()
1102 onsets = self.dense_index.pop('time').values.astype(float) / 1e+9
1103 timing = pd.DataFrame({'onset': onsets})
1104 timing['duration'] = 1. / sampling_rate
1105 dfs = [timing] + dfs + [dense_index]
1106 data = pd.concat(dfs, axis=1)
1107
1108 return data
38def add_group_id(df, *groupby_cols, gid_colname='gid'):
39 groupby_cols = list(groupby_cols)
40 df_group = df.groupby(groupby_cols).apply(lambda g: pd.Series({
41 'group_length': g.shape[0]
42 })).reset_index()
43 df_group[gid_colname] = df_group.index
44 df_merge = pd.merge(df, df_group, how='outer', on=groupby_cols)
45 df_merge['group_length'] = df_merge['group_length'].fillna(-1)
46 df_merge[gid_colname] = df_merge[gid_colname].fillna(-1)
47 df_merge['group_length'] = df_merge['group_length'].astype(int)
48 df_merge[gid_colname] = df_merge[gid_colname].astype(int)
49 return df_merge
420def df_column_types_rename(df):
421 result = [df[x].dtype.name for x in list(df.columns)]
422 result[:] = [x if x != 'object' else 'string' for x in result]
423 result[:] = [x if x != 'int64' else 'integer' for x in result]
424 result[:] = [x if x != 'float64' else 'double' for x in result]
425 result[:] = [x if x != 'bool' else 'boolean' for x in result]
426
427 return result
62def matchColumnNames(df):
63 return df.rename(
64 columns={
65 "id": "vehicle_id",
66 "heading": "direction",
67 "secsSinceReport": "seconds_since_report",
68 "lat": "latitude",
69 "lon": "longitude",
70 "routeTag": "line"
71 }
72 )
42def extractCols(df, colnames):
43 extracted = df[colnames]
44 df.drop(extracted.columns, axis=1, inplace=True)
45 return extracted
138def cols_to_cats(df, cat_name, col_cats):
139 """
140 Turn top-level MultiIndex columns into a categorial column.
141
142 In some cases FERC Form 1 data comes with many different types of related
143 values interleaved in the same table -- e.g. current year and previous year
144 income -- this can result in DataFrames that are hundreds of columns wide,
145 which is unwieldy. This function takes those top level MultiIndex labels
146 and turns them into categories in a single column, which can be used to
147 select a particular type of report.
148
149 Args:
150 df (pandas.DataFrame): the dataframe to be simplified.
151 cat_name (str): the label of the column to be created indicating what
152 MultiIndex label the values came from.
153 col_cats (dict): a dictionary with top level MultiIndex labels as keys,
154 and the category to which they should be mapped as values.
155
156 Returns:
157 pandas.DataFrame: A re-shaped/re-labeled dataframe with one fewer
158 levels of MultiIndex in the columns, and an additional column
159 containing the assigned labels.
160
161 """
162 out_df = pd.DataFrame()
163 for col, cat in col_cats.items():
164 logger.info(f"Col: {col}, Cat: {cat}")
165 tmp_df = df.loc[:, col].copy().dropna(how='all')
166 tmp_df.loc[:, cat_name] = cat
167 out_df = pd.concat([out_df, tmp_df])
168 return out_df.reset_index()

Related snippets