import pandas as pd from datetime import datetime from sqlalchemy import create_engine, text DB_CONFIG = { "host": "192.144.230.75", "port": 3306, "user": "db_vidcon", "password": "rexdK4fhCCiRE4BZ", "database": "db_vidcon", "charset": "utf8mb4", } URL = ( f"mysql+pymysql://{DB_CONFIG['user']}:{DB_CONFIG['password']}" f"@{DB_CONFIG['host']}:{DB_CONFIG['port']}/{DB_CONFIG['database']}" f"?charset={DB_CONFIG['charset']}" ) engine = create_engine(URL, pool_pre_ping=True) def get_rn_list() -> list[str]: sql = "SELECT DISTINCT rn FROM sh_dm_video_op_v2;" with engine.connect() as conn: result = conn.execute(text(sql)) return [row[0] for row in result] def fetch_all_data_for_rn(rn: str) -> pd.DataFrame: sql = """ SELECT op.id AS ID, v.v_name AS 片名, v.link AS 视频连接, v.is_piracy AS 是否盗版, op.`level` AS 优先级, op.rn AS 地区, NULL AS 投诉日期, NULL AS 下线日期, op.keyword AS 关键词, v.title AS 标题, v.duration AS 时长, v.watch_number AS 观看数量, v.public_time AS 上传时间, v.u_pic AS 头像, v.is_repeat AS 是否重复, op.sort AS 排序, op.batch AS 批次, op.machine AS 机器号, v.u_id AS 用户id, v.u_xid AS u_xid, v.u_name AS 用户名称 FROM sh_dm_video_op_v2 AS op LEFT JOIN sh_dm_video_v2 AS v ON op.v_xid = v.v_xid WHERE op.rn = %s AND op.batch IN (1747324254, 1747323990) ORDER BY op.id """ # 注意:params 用列表或元组 chunks = pd.read_sql_query( sql, engine, params=(rn,), chunksize=10000 ) dfs = [] for i, chunk in enumerate(chunks, start=1): print(f"[{rn}] 正在拉取第 {i} 块数据,行数:{len(chunk)}") dfs.append(chunk) df = pd.concat(dfs, ignore_index=True) if dfs else pd.DataFrame() print(f"[{rn}] 全部拉取完成,共 {len(df)} 行") return df def export_all(): rn_list = get_rn_list() timestamp = datetime.now().strftime("%Y%m%d") for rn in rn_list: print(f"开始处理地区:{rn}") df = fetch_all_data_for_rn(rn) if df.empty: print(f"[{rn}] 无数据,跳过导出") continue safe_rn = rn.replace(" ", "_") filename = f"{timestamp}_T0T1_{safe_rn}.xlsx" print(f"[{rn}] 导出到文件:{filename} …") df.to_excel(filename, index=False) print(f"[{rn}] 导出完成\n") if __name__ == "__main__": export_all()