40 lines
1.2 KiB
Python
40 lines
1.2 KiB
Python
import pandas as pd
|
|
import pytz
|
|
|
|
input_path = "xid_dedup.xlsx"
|
|
output_path = "id_dedup_时间格式化.xlsx"
|
|
|
|
video_df = pd.read_excel(input_path, sheet_name="视频信息")
|
|
user_df = pd.read_excel(input_path, sheet_name="用户信息")
|
|
|
|
def convert_to_east8(dt):
|
|
try:
|
|
dt = pd.to_datetime(dt, errors='coerce', utc=True)
|
|
return dt.tz_convert("Asia/Shanghai") if pd.notna(dt) else None
|
|
except Exception:
|
|
return None
|
|
|
|
# 1. 转东八区
|
|
video_df["添加时间"] = video_df["添加时间"].apply(convert_to_east8)
|
|
|
|
# 2. 去除时区并格式化为字符串
|
|
video_df["添加时间"] = video_df["添加时间"].dt.tz_localize(None).dt.strftime("%Y-%m-%d %H:%M:%S")
|
|
|
|
# 3. 转换时长为 mm:ss
|
|
def format_duration(seconds):
|
|
try:
|
|
seconds = int(seconds)
|
|
minutes = seconds // 60
|
|
remain = seconds % 60
|
|
return f"{minutes}:{remain:02d}"
|
|
except Exception:
|
|
return None
|
|
|
|
video_df["时长 (秒)"] = video_df["时长 (秒)"].apply(format_duration)
|
|
|
|
with pd.ExcelWriter(output_path, engine="openpyxl") as writer:
|
|
video_df.to_excel(writer, sheet_name="视频信息", index=False)
|
|
user_df.to_excel(writer, sheet_name="用户信息", index=False)
|
|
|
|
print(f"✅ 处理完成,结果保存为:{output_path}")
|