1. 通过key(一个)合并两个DataFrame
import pandas as pd# 通过key(一个)合并两个DataFrameleft = pd.DataFrame({'key':['K0', 'K1', 'K2', 'K3'], 'A': ['A0', 'A1', 'A2', 'A3'], 'B': ['B0', 'B1', 'B2', 'B3']})right = pd.DataFrame({'key':['K0', 'K1', 'K2', 'K3'], 'C': ['C0', 'C1', 'C2', 'C3'], 'D': ['D0', 'D1', 'D2', 'D3']})print(left)print(right)# 根据 key 进行合并res =pd.merge(left, right, on = 'key')print(res)
2. 通过key(多个)进行合并
import pandas as pd# 通过key(多个)进行合并left = pd.DataFrame({'key1': ['K0', 'K0', 'K1', 'K2'], 'key2': ['K0', 'K1', 'K0', 'K1'], 'A': ['A0', 'A1', 'A2', 'A3'], 'B': ['B0', 'B1', 'B2', 'B3']})right = pd.DataFrame({'key1': ['K0', 'K1', 'K1', 'K2'], 'key2': ['K0', 'K0', 'K0', 'K0'], 'C': ['C0', 'C1', 'C2', 'C3'], 'D': ['D0', 'D1', 'D2', 'D3']})print(left)print(right)# 以key1,key2进行合并,默认inner合并,只保留两个DataFrame都有的数据# on 表示根据什么进行合并# how = {'left', 'right', 'outer', 'inner'} 可以取4个值# how='inner' 表示返回两个DataFrame都有的keys合并的结果# how='outer' 表示两个DataFrame中没有数据的地方会补充NaN# how='left' 表示给予left位置的DataFrame进行合并填充(就相当于把left的key进行合并,没有数据的位置填充NaN)# how='right' 表示给予right位置的DataFrame进行合并填充res =pd.merge(left, right, on = ['key1', 'key2'], how = 'inner')print(res)res2 =pd.merge(left, right, on = ['key1', 'key2'], how = 'outer')print(res2)res3 = pd.merge(left, right, on = ['key1', 'key2'], how = 'left')print(res3)res4 = pd.merge(left, right, on = ['key1', 'key2'], how = 'right')print(res4)
3. indicator 显示合并方式
import pandas as pddf1 = pd.DataFrame({'col1': [0, 1], 'col_left': ['a', 'b']})df2 = pd.DataFrame({'col1': [1, 2, 2], 'col_right': [2, 2, 2]})# indicator=True 会有_merge,显示合并的方式res =pd.merge(df1, df2, on = 'col1', how = 'outer', indicator = True)print(res)# 可以自定义_merge的名称res2 = pd.merge(df1, df2, on = 'col1', how = 'outer', indicator = 'my_merge')print(res2)
4. index合并
import pandas as pdleft = pd.DataFrame({'A': ['A0', 'A1', 'A2'], 'B': ['B0', 'B1', 'B2']}, index = ['K0', 'K1', 'K2'])right = pd.DataFrame({'C': ['C0', 'C2', 'C3'], 'D': ['D0', 'D2', 'D3']}, index = ['K0', 'K2', 'K3']) # left_index 和 right_indexres = pd.merge(left, right, left_index = True, right_index = True, how = 'outer')print(res)res2 = pd.merge(left, right, left_index = True, right_index = True, how = 'inner')print(res2)
5. suffixes 合并两个名称相同的列
import pandas as pdboys = pd.DataFrame({'k': ['K0', 'K1', 'K2'], 'age': [1, 2, 3]})girls = pd.DataFrame({'k': ['K0', 'K0', 'K3'], 'age': [4, 5, 6]})res = pd.merge(boys, girls, on = 'k', suffixes = ['_boy', '_girl'], how = 'inner')print(res)