fix: 添加更新 is_repeat 字段的功能并修改数据获取函数以支持批次参数

This commit is contained in:
晓丰 2025-05-18 23:54:14 +08:00
parent 0a1d4492ee
commit 90ad9c28ff

View File

@ -19,6 +19,27 @@ URL = (
engine = create_engine(URL, pool_pre_ping=True)
# 在导出前执行更新 is_repeat 的 SQL
def update_is_repeat(batches: list[int]):
sql = text("""
UPDATE sh_dm_video_op_v2 AS op
JOIN (
SELECT v_xid, COUNT(*) AS cnt
FROM sh_dm_video_op_v2
WHERE batch IN :batches
GROUP BY v_xid
) AS agg
ON op.v_xid = agg.v_xid
SET op.is_repeat = CASE
WHEN agg.cnt = 1 THEN 1
ELSE 2
END
WHERE op.batch IN :batches;
""")
with engine.begin() as conn:
conn.execute(sql, {"batches": tuple(batches)})
print(f"已更新批次 {batches} 的 is_repeat 字段。")
def get_rn_list() -> list[str]:
sql = "SELECT DISTINCT rn FROM sh_dm_video_op_v2;"
@ -27,8 +48,9 @@ def get_rn_list() -> list[str]:
return [row[0] for row in result]
def fetch_all_data_for_rn(rn: str) -> pd.DataFrame:
sql = """
def fetch_all_data_for_rn(rn: str, batches: list[int]) -> pd.DataFrame:
sql = text(
"""
SELECT
op.id AS ID,
v.v_name AS 片名,
@ -54,15 +76,15 @@ def fetch_all_data_for_rn(rn: str) -> pd.DataFrame:
FROM sh_dm_video_op_v2 AS op
LEFT JOIN sh_dm_video_v2 AS v
ON op.v_xid = v.v_xid
WHERE op.rn = %s
AND op.batch IN (1747324254, 1747323990)
WHERE op.rn = :rn
AND op.batch IN :batches
ORDER BY op.id
"""
# 注意params 用列表或元组
)
chunks = pd.read_sql_query(
sql,
engine,
params=(rn,),
params={"rn": rn, "batches": tuple(batches)},
chunksize=10000
)
dfs = []
@ -75,11 +97,16 @@ def fetch_all_data_for_rn(rn: str) -> pd.DataFrame:
def export_all():
# 指定要处理的批次
batches = [1747324254, 1747323990]
# 先更新 is_repeat
update_is_repeat(batches)
rn_list = get_rn_list()
timestamp = datetime.now().strftime("%Y%m%d")
for rn in rn_list:
print(f"开始处理地区:{rn}")
df = fetch_all_data_for_rn(rn)
df = fetch_all_data_for_rn(rn, batches)
if df.empty:
print(f"[{rn}] 无数据,跳过导出")
continue