feat: 导出修改

This commit is contained in:
晓丰 2025-05-17 02:36:41 +08:00
parent e258e19949
commit af14e27e4c

View File

@ -1,9 +1,8 @@
import pymysql
import pandas as pd import pandas as pd
from datetime import datetime from datetime import datetime
from sqlalchemy import create_engine, text
# 数据库连接配置 DB_CONFIG = {
db_config = {
"host": "192.144.230.75", "host": "192.144.230.75",
"port": 3306, "port": 3306,
"user": "db_vidcon", "user": "db_vidcon",
@ -12,18 +11,24 @@ db_config = {
"charset": "utf8mb4", "charset": "utf8mb4",
} }
def get_rn_list(): URL = (
"""获取所有地区列表""" f"mysql+pymysql://{DB_CONFIG['user']}:{DB_CONFIG['password']}"
sql = "SELECT DISTINCT rn FROM sh_dm_video_op_v2;" f"@{DB_CONFIG['host']}:{DB_CONFIG['port']}/{DB_CONFIG['database']}"
conn = pymysql.connect(**db_config) f"?charset={DB_CONFIG['charset']}"
df = pd.read_sql(sql, conn) )
conn.close()
return df['rn'].tolist()
def get_data_for_rn(rn: str) -> pd.DataFrame: engine = create_engine(URL, pool_pre_ping=True)
"""针对指定 rn 拉取数据"""
# 注意:这里把 SQL 中的 rn 和 level 参数化
sql = f""" def get_rn_list() -> list[str]:
sql = "SELECT DISTINCT rn FROM sh_dm_video_op_v2;"
with engine.connect() as conn:
result = conn.execute(text(sql))
return [row[0] for row in result]
def fetch_all_data_for_rn(rn: str) -> pd.DataFrame:
sql = """
SELECT SELECT
op.id AS ID, op.id AS ID,
v.v_name AS 片名, v.v_name AS 片名,
@ -39,7 +44,7 @@ def get_data_for_rn(rn: str) -> pd.DataFrame:
v.watch_number AS 观看数量, v.watch_number AS 观看数量,
v.public_time AS 上传时间, v.public_time AS 上传时间,
v.u_pic AS 头像, v.u_pic AS 头像,
v.is_repeat AS 是否重复, -- 直接用字段 v.is_repeat AS 是否重复,
op.sort AS 排序, op.sort AS 排序,
op.batch AS 批次, op.batch AS 批次,
op.machine AS 机器号, op.machine AS 机器号,
@ -51,27 +56,39 @@ def get_data_for_rn(rn: str) -> pd.DataFrame:
ON op.v_xid = v.v_xid ON op.v_xid = v.v_xid
WHERE op.rn = %s WHERE op.rn = %s
AND op.batch IN (1747324254, 1747323990) AND op.batch IN (1747324254, 1747323990)
ORDER BY op.id; ORDER BY op.id
""" """
conn = pymysql.connect(**db_config) # 注意params 用列表或元组
df = pd.read_sql(sql, conn, params=(rn)) chunks = pd.read_sql_query(
conn.close() sql,
engine,
params=(rn,),
chunksize=10000
)
dfs = []
for i, chunk in enumerate(chunks, start=1):
print(f"[{rn}] 正在拉取第 {i} 块数据,行数:{len(chunk)}")
dfs.append(chunk)
df = pd.concat(dfs, ignore_index=True) if dfs else pd.DataFrame()
print(f"[{rn}] 全部拉取完成,共 {len(df)}")
return df return df
def export_all(): def export_all():
"""循环所有地区,导出 Excel"""
rn_list = get_rn_list() rn_list = get_rn_list()
for rn in rn_list:
df = get_data_for_rn(rn)
if df.empty:
continue
timestamp = datetime.now().strftime("%Y%m%d") timestamp = datetime.now().strftime("%Y%m%d")
safe_rn = rn.replace(" ", "_") # 如果地区名里有空格或特殊字符 for rn in rn_list:
print(f"开始处理地区:{rn}")
df = fetch_all_data_for_rn(rn)
if df.empty:
print(f"[{rn}] 无数据,跳过导出")
continue
safe_rn = rn.replace(" ", "_")
filename = f"{timestamp}_T0T1_{safe_rn}.xlsx" filename = f"{timestamp}_T0T1_{safe_rn}.xlsx"
print(f"[{rn}] 导出到文件:{filename}")
df.to_excel(filename, index=False) df.to_excel(filename, index=False)
print(f"已导出:{filename}") print(f"[{rn}] 导出完成\n")
if __name__ == "__main__": if __name__ == "__main__":
export_all() export_all()