5 examples of 'pandas merge on multiple columns' in Python

Every line of 'pandas merge on multiple columns' code snippets is scanned for vulnerabilities by our powerful machine learning engine that combs millions of open source libraries, ensuring your Python code is secure.

All examples are scanned by Snyk Code

By copying the Snyk Code Snippets you agree to
this disclaimer
813def merge(old_cols, new_cols):
814 return old_cols + new_cols
Important

Use secure code every time

Secure your code as it's written. Use Snyk Code to scan source code in minutes – no build needed – and fix issues immediately. Enable Snyk Code

6def default_merger(x, y):
7 import pandas as pd
8 return pd.concat([x, y])
1066def merge_columns(self, columns=None, sparse=True, sampling_rate='tr'):
1067 ''' Merge columns into one DF.
1068 Args:
1069 columns (list): Optional list of column names to retain; if None,
1070 all columns are written out.
1071 sparse (bool): If True, columns will be kept in a sparse format
1072 provided they are all internally represented as such. If False,
1073 a dense matrix (i.e., uniform sampling rate for all events)
1074 will be exported. Will be ignored if at least one column is
1075 dense.
1076 sampling_rate (float): If a dense matrix is written out, the
1077 sampling rate (in Hz) to use for downsampling. Defaults to the
1078 value currently set in the instance.
1079 Returns: A pandas DataFrame.
1080 '''
1081
1082 if sparse and self._none_dense():
1083 return super(BIDSEventVariableCollection,
1084 self).merge_columns(columns)
1085
1086 sampling_rate = self._get_sampling_rate(sampling_rate)
1087
1088 # Make sure all columns have the same sampling rate
1089 _cols = self.resample(sampling_rate, force_dense=True,
1090 in_place=False).values()
1091
1092 # Retain only specific columns if desired
1093 if columns is not None:
1094 _cols = [c for c in _cols if c.name in columns]
1095
1096 _cols = [c for c in _cols if c.name not in ["event_file_id", "time"]]
1097
1098 # Merge all data into one DF
1099 dfs = [pd.Series(c.values.iloc[:, 0], name=c.name) for c in _cols]
1100 # Convert datetime to seconds and add duration column
1101 dense_index = self.dense_index.copy()
1102 onsets = self.dense_index.pop('time').values.astype(float) / 1e+9
1103 timing = pd.DataFrame({'onset': onsets})
1104 timing['duration'] = 1. / sampling_rate
1105 dfs = [timing] + dfs + [dense_index]
1106 data = pd.concat(dfs, axis=1)
1107
1108 return data
51def random_merge(A, B, N=20, on='AnswerId', key='key', n='n'):
52 """Pair all rows of A with 1 matching row on "on" and N-1 random rows from B
53 """
54 assert key not in A and key not in B
55 X = A.copy()
56 X[key] = A[on]
57 Y = B.copy()
58 Y[key] = B[on]
59 match = X.merge(Y, on=key).drop(key, axis=1)
60 match[n] = 0
61 df_list = [match]
62 for i in A.index:
63 X = A.loc[[i]]
64 Y = B[B[on] != X[on].iloc[0]].sample(N-1)
65 X[key] = 1
66 Y[key] = 1
67 Z = X.merge(Y, how='outer', on=key).drop(key, axis=1)
68 Z[n] = range(1, N)
69 df_list.append(Z)
70 df = pd.concat(df_list, ignore_index=True)
71 return df
332def merge_datasets(self, other):
333 """
334 This operation combines two dataframes into one new DataFrame.
335 If the operation is combining two SpatialDataFrames, the
336 geometry_type must match.
337 """
338 if isinstance(other, SpatialDataFrame) and \
339 other.geometry_type == self.geometry_type:
340 return pd.concat(objs=[self, other], axis=0)
341 elif isinstance(other, DataFrame):
342 return pd.concat(objs=[self, other], axis=0)
343 elif isinstance(other, Series):
344 self['merged_datasets'] = other
345 elif isinstance(other, SpatialDataFrame) and \
346 other.geometry_type != self.geometry_type:
347 raise ValueError("Spatial DataFrames must have the same geometry type.")
348 else:
349 raise ValueError("Merge datasets cannot merge types %s" % type(other))

Related snippets