From 4f3051b1005d1204225f286d02d622ca83b73c4f Mon Sep 17 00:00:00 2001 From: Franklin-F Date: Tue, 20 May 2025 22:07:09 +0800 Subject: [PATCH] =?UTF-8?q?feat:=20=E9=87=8D=E6=9E=84=20dump=5Fkeyword=5Ft?= =?UTF-8?q?itle.py=EF=BC=8C=E4=BC=98=E5=8C=96=E6=95=B0=E6=8D=AE=E5=BA=93?= =?UTF-8?q?=E6=9F=A5=E8=AF=A2=E5=92=8C=E6=95=B0=E6=8D=AE=E6=8E=A8=E9=80=81?= =?UTF-8?q?=E9=80=BB=E8=BE=91?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- dump_keyword_title.py | 92 ++++++++++++------------------------------- 1 file changed, 25 insertions(+), 67 deletions(-) diff --git a/dump_keyword_title.py b/dump_keyword_title.py index 257db2d..81e77ed 100644 --- a/dump_keyword_title.py +++ b/dump_keyword_title.py @@ -1,48 +1,6 @@ -#!/usr/bin/env python3 -""" -用法示例 --------- -# 用默认 level=1 -python3 dump_keyword_title.py - -# 指定 level=3 -python3 dump_keyword_title.py -l 3 -""" -import json, time, pymysql, redis +import json, time import argparse - -# ======= 配置区 ======= -MYSQL_CONFIG = { - "host": "192.144.230.75", "port": 3306, - "user": "db_vidcon", "password": "rexdK4fhCCiRE4BZ", - "database": "db_vidcon", "charset": "utf8mb4", - "cursorclass": pymysql.cursors.DictCursor -} -REDIS_CONFIG = { - "host": "192.144.230.75", "port": 6379, - "password": "qwert@$123!&", "decode_responses": True -} -LIST_KEY = "video_kw_queue" -BATCH_SIZE = 1000 -SQL = """ -SELECT - k.keyword, - k.rn, - t.title AS v_name, - ANY_VALUE(t.level) AS level -FROM - sh_dm_keyword k -LEFT JOIN - sh_dm_title t ON k.title = t.title -WHERE - k.status = 1 - AND t.status = 1 - AND NOT EXISTS ( - SELECT 1 FROM sh_dm_black_keyword b WHERE b.title = t.title - ) - AND t.level = %s -GROUP BY k.keyword, k.rn -""" +from DB import DBVidcon def parse_args(): parser = argparse.ArgumentParser( @@ -50,35 +8,35 @@ def parse_args(): ) parser.add_argument("-l", "--level", type=int, default=99, help="value for t.level (default: 99)") - parser.add_argument("-u", "--urgent", type=int, default=0, - help="加急标记") return parser.parse_args() def main(): - args = parse_args() - batch_ts = int(time.time()) - conn = pymysql.connect(**MYSQL_CONFIG) - cur = conn.cursor() - cur.execute(SQL, (args.level,)) - r = redis.Redis(**REDIS_CONFIG) - pipe = r.pipeline() - total = 0 - start = time.time() - global LIST_KEY - if args.urgent == 1: - LIST_KEY = "video_urgent_queue" + args = parse_args() + batch = int(time.time()) + db = DBVidcon() - for row in cur: - row["batch"] = batch_ts - pipe.lpush(LIST_KEY, json.dumps(row, ensure_ascii=False)) - total += 1 - if total % BATCH_SIZE == 0: - pipe.execute() + rows = db.fetch_keyword_title(level=args.level) + payload_list = [] + push = None + if args.level == 0: + push = db.push_l0 + elif args.level == 1: + push = db.push_l1 + elif args.level == 2: + push = db.push_l2 + else: + return - if pipe.command_stack: - pipe.execute() + for row in rows: + payload_list.append(json.dumps({**row, "batch": batch}, ensure_ascii=False)) + if len(payload_list) >= 10000: + push(payload_list) + payload_list.clear() + if payload_list: # 收尾 + push(payload_list) - print(f"✔ 推送 {total} 行(level={args.level}, batch={batch_ts})→ Redis '{LIST_KEY}',耗时 {time.time()-start:.2f}s") + print(f"✔ 推送 {len(rows)} 行(batch={batch})到 {push.__name__}队列完毕") + db.close() if __name__ == "__main__": main()