From 90ad9c28ff41c022eb4bca696f0059d4d663a707 Mon Sep 17 00:00:00 2001
From: Franklin-F <dewujie64@gmail.com>
Date: Sun, 18 May 2025 23:54:14 +0800
Subject: [PATCH] =?UTF-8?q?fix:=20=E6=B7=BB=E5=8A=A0=E6=9B=B4=E6=96=B0=20i?=
 =?UTF-8?q?s=5Frepeat=20=E5=AD=97=E6=AE=B5=E7=9A=84=E5=8A=9F=E8=83=BD?=
 =?UTF-8?q?=E5=B9=B6=E4=BF=AE=E6=94=B9=E6=95=B0=E6=8D=AE=E8=8E=B7=E5=8F=96?=
 =?UTF-8?q?=E5=87=BD=E6=95=B0=E4=BB=A5=E6=94=AF=E6=8C=81=E6=89=B9=E6=AC=A1?=
 =?UTF-8?q?=E5=8F=82=E6=95=B0?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 mysql_to_xlsx.py | 95 +++++++++++++++++++++++++++++++-----------------
 1 file changed, 61 insertions(+), 34 deletions(-)

diff --git a/mysql_to_xlsx.py b/mysql_to_xlsx.py
index 1b61078..0a55ed0 100644
--- a/mysql_to_xlsx.py
+++ b/mysql_to_xlsx.py
@@ -19,6 +19,27 @@ URL = (
 
 engine = create_engine(URL, pool_pre_ping=True)
 
+# 在导出前执行更新 is_repeat 的 SQL
+def update_is_repeat(batches: list[int]):
+    sql = text("""
+    UPDATE sh_dm_video_op_v2 AS op
+    JOIN (
+      SELECT v_xid, COUNT(*) AS cnt
+      FROM sh_dm_video_op_v2
+      WHERE batch IN :batches
+      GROUP BY v_xid
+    ) AS agg
+      ON op.v_xid = agg.v_xid
+    SET op.is_repeat = CASE
+      WHEN agg.cnt = 1 THEN 1
+      ELSE 2
+    END
+    WHERE op.batch IN :batches;
+    """)
+    with engine.begin() as conn:
+        conn.execute(sql, {"batches": tuple(batches)})
+    print(f"已更新批次 {batches} 的 is_repeat 字段。")
+
 
 def get_rn_list() -> list[str]:
     sql = "SELECT DISTINCT rn FROM sh_dm_video_op_v2;"
@@ -27,42 +48,43 @@ def get_rn_list() -> list[str]:
         return [row[0] for row in result]
 
 
-def fetch_all_data_for_rn(rn: str) -> pd.DataFrame:
-    sql = """
-    SELECT
-        op.id                AS ID,
-        v.v_name             AS 片名,
-        v.link               AS 视频连接,
-        v.is_piracy          AS 是否盗版,
-        op.`level`           AS 优先级,
-        op.rn                AS 地区,
-        NULL                 AS 投诉日期,
-        NULL                 AS 下线日期,
-        op.keyword           AS 关键词,
-        v.title              AS 标题,
-        v.duration           AS 时长,
-        v.watch_number       AS 观看数量,
-        v.public_time        AS 上传时间,
-        v.u_pic              AS 头像,
-        v.is_repeat          AS 是否重复,
-        op.sort              AS 排序,
-        op.batch             AS 批次,
-        op.machine           AS 机器号,
-        v.u_id               AS 用户id,
-        v.u_xid              AS u_xid,
-        v.u_name             AS 用户名称
-    FROM sh_dm_video_op_v2 AS op
-    LEFT JOIN sh_dm_video_v2 AS v
-      ON op.v_xid = v.v_xid
-    WHERE op.rn = %s
-      AND op.batch IN (1747324254, 1747323990)
-    ORDER BY op.id
-    """
-    # 注意：params 用列表或元组
+def fetch_all_data_for_rn(rn: str, batches: list[int]) -> pd.DataFrame:
+    sql = text(
+        """
+        SELECT
+            op.id                AS ID,
+            v.v_name             AS 片名,
+            v.link               AS 视频连接,
+            v.is_piracy          AS 是否盗版,
+            op.`level`           AS 优先级,
+            op.rn                AS 地区,
+            NULL                 AS 投诉日期,
+            NULL                 AS 下线日期,
+            op.keyword           AS 关键词,
+            v.title              AS 标题,
+            v.duration           AS 时长,
+            v.watch_number       AS 观看数量,
+            v.public_time        AS 上传时间,
+            v.u_pic              AS 头像,
+            v.is_repeat          AS 是否重复,
+            op.sort              AS 排序,
+            op.batch             AS 批次,
+            op.machine           AS 机器号,
+            v.u_id               AS 用户id,
+            v.u_xid              AS u_xid,
+            v.u_name             AS 用户名称
+        FROM sh_dm_video_op_v2 AS op
+        LEFT JOIN sh_dm_video_v2 AS v
+          ON op.v_xid = v.v_xid
+        WHERE op.rn = :rn
+          AND op.batch IN :batches
+        ORDER BY op.id
+        """
+    )
     chunks = pd.read_sql_query(
         sql,
         engine,
-        params=(rn,),
+        params={"rn": rn, "batches": tuple(batches)},
         chunksize=10000
     )
     dfs = []
@@ -75,11 +97,16 @@ def fetch_all_data_for_rn(rn: str) -> pd.DataFrame:
 
 
 def export_all():
+    # 指定要处理的批次
+    batches = [1747324254, 1747323990]
+    # 先更新 is_repeat
+    update_is_repeat(batches)
+
     rn_list = get_rn_list()
     timestamp = datetime.now().strftime("%Y%m%d")
     for rn in rn_list:
         print(f"开始处理地区：{rn}")
-        df = fetch_all_data_for_rn(rn)
+        df = fetch_all_data_for_rn(rn, batches)
         if df.empty:
             print(f"[{rn}] 无数据，跳过导出")
             continue