Crawler/web/dailymotion_com/merge_video_user_data.py

38 lines
1.2 KiB
Python

import os
import pandas as pd
video_data_list = []
user_data_list = []
folder_path = "out_put_US"
for filename in os.listdir(folder_path):
if filename.endswith(".xlsx"):
file_path = os.path.join(folder_path, filename)
try:
video_df = pd.read_excel(file_path, sheet_name="视频信息")
user_df = pd.read_excel(file_path, sheet_name="用户信息")
# 正确添加 来源文件列,不改动原来的 Index
video_df["来源文件"] = filename
user_df["来源文件"] = filename
video_data_list.append(video_df)
user_data_list.append(user_df)
except Exception as e:
print(f"❌ 读取失败: {filename}, 错误信息: {e}")
# 合并
all_video_df = pd.concat(video_data_list, ignore_index=True)
all_user_df = pd.concat(user_data_list, ignore_index=True)
# 写入一个Excel文件中两个Sheet
output_path = "合并视频用户信息.xlsx"
with pd.ExcelWriter(output_path, engine="openpyxl") as writer:
all_video_df.to_excel(writer, sheet_name="视频信息", index=False)
all_user_df.to_excel(writer, sheet_name="用户信息", index=False)
print(f"✅ 合并完成,文件保存为:{output_path}")