21 lines
731 B
Python
21 lines
731 B
Python
import pandas as pd
|
|
|
|
# 读取目标文件
|
|
input_path = "merge.xlsx"
|
|
output_path = "xid_dedup.xlsx"
|
|
|
|
# 读取两个 sheet
|
|
video_df = pd.read_excel(input_path, sheet_name="视频信息")
|
|
user_df = pd.read_excel(input_path, sheet_name="用户信息")
|
|
|
|
# 按 xid 去重,保留第一条记录
|
|
video_df_dedup = video_df.drop_duplicates(subset="xid", keep="first")
|
|
user_df_dedup = user_df.drop_duplicates(subset="xid", keep="first")
|
|
|
|
# 写入去重后的新文件
|
|
with pd.ExcelWriter(output_path, engine="openpyxl") as writer:
|
|
video_df_dedup.to_excel(writer, sheet_name="视频信息", index=False)
|
|
user_df_dedup.to_excel(writer, sheet_name="用户信息", index=False)
|
|
|
|
print(f"去重完成,结果保存为:{output_path}")
|