Crawler/web/dailymotion_com/conversion_time.py

40 lines
1.2 KiB
Python

import pandas as pd
import pytz
input_path = "xid_dedup.xlsx"
output_path = "id_dedup_时间格式化.xlsx"
video_df = pd.read_excel(input_path, sheet_name="视频信息")
user_df = pd.read_excel(input_path, sheet_name="用户信息")
def convert_to_east8(dt):
try:
dt = pd.to_datetime(dt, errors='coerce', utc=True)
return dt.tz_convert("Asia/Shanghai") if pd.notna(dt) else None
except Exception:
return None
# 1. 转东八区
video_df["添加时间"] = video_df["添加时间"].apply(convert_to_east8)
# 2. 去除时区并格式化为字符串
video_df["添加时间"] = video_df["添加时间"].dt.tz_localize(None).dt.strftime("%Y-%m-%d %H:%M:%S")
# 3. 转换时长为 mm:ss
def format_duration(seconds):
try:
seconds = int(seconds)
minutes = seconds // 60
remain = seconds % 60
return f"{minutes}:{remain:02d}"
except Exception:
return None
video_df["时长 (秒)"] = video_df["时长 (秒)"].apply(format_duration)
with pd.ExcelWriter(output_path, engine="openpyxl") as writer:
video_df.to_excel(writer, sheet_name="视频信息", index=False)
user_df.to_excel(writer, sheet_name="用户信息", index=False)
print(f"✅ 处理完成,结果保存为:{output_path}")