feat: 优化 DB.py 和 dump_keyword_title.py,增强数据处理和日志记录功能
This commit is contained in:
parent
9b74bdf312
commit
217d8c7ed7
44
DB.py
44
DB.py
@ -64,7 +64,6 @@ video = Table("sh_dm_video_v2", _meta,
|
||||
)
|
||||
|
||||
|
||||
|
||||
def mysql_retry(max_retries: int = 3, base_delay: float = 2.0):
|
||||
"""
|
||||
装饰器工厂:捕获 InterfaceError 后断线重连并重试,
|
||||
@ -370,6 +369,35 @@ class DBVidcon:
|
||||
self.cursor.execute(sql, (level,))
|
||||
return self.cursor.fetchall()
|
||||
|
||||
@mysql_retry()
|
||||
def log_batch_start(self, info: Dict) -> int or None:
|
||||
batch = info.get("batch")
|
||||
level = info.get("level")
|
||||
if batch is None or level is None:
|
||||
raise ValueError("info 字典必须包含 'batch' 和 'level'")
|
||||
count = info.get("count", 0)
|
||||
if level == 0:
|
||||
t0, t1, t2 = count, 0, 0
|
||||
elif level == 1:
|
||||
t0, t1, t2 = 0, count, 0
|
||||
elif level == 9:
|
||||
level = 2
|
||||
t0, t1, t2 = 0, 0, count
|
||||
|
||||
start_ts = int(time.time())
|
||||
sql = """
|
||||
INSERT INTO sh_dm_batch_log
|
||||
(batch, info, t0, t1, t2, starttime)
|
||||
VALUES (%s, %s, %s, %s, %s, %s)
|
||||
"""
|
||||
try:
|
||||
self.cursor.execute(sql, (batch, level, t0, t1, t2, start_ts))
|
||||
self.conn.commit()
|
||||
return self.cursor.lastrowid
|
||||
except Exception as e:
|
||||
print(f"[log_batch_start] 插入失败:{e}")
|
||||
return None
|
||||
|
||||
@mysql_retry()
|
||||
def flush(self):
|
||||
"""批量执行完后手动提交。"""
|
||||
@ -411,16 +439,26 @@ class DBVidcon:
|
||||
@redis_retry(max_retries=3)
|
||||
def queues_empty(self) -> bool:
|
||||
"""
|
||||
判断 urgent_list_key 和 list_key 两个队列是否都为空。
|
||||
如果都空,返回 True;只要有一个不空,就返回 False。
|
||||
"""
|
||||
# 注意:redis.llen 返回 int
|
||||
return (
|
||||
self.redis.llen(self.l0_list_key) == 0
|
||||
and self.redis.llen(self.l1_list_key) == 0
|
||||
and self.redis.llen(self.l2_list_key) == 0
|
||||
)
|
||||
|
||||
@redis_retry()
|
||||
def l0_empty(self) -> bool:
|
||||
return self.redis.llen(self.l0_list_key) == 0
|
||||
|
||||
@redis_retry()
|
||||
def l1_empty(self) -> bool:
|
||||
return self.redis.llen(self.l1_list_key) == 0
|
||||
|
||||
@redis_retry()
|
||||
def l2_empty(self) -> bool:
|
||||
return self.redis.llen(self.l2_list_key) == 0
|
||||
|
||||
@redis_retry(max_retries=3)
|
||||
def pop_error_item(self):
|
||||
"""
|
||||
|
@ -14,19 +14,26 @@ def main():
|
||||
args = parse_args()
|
||||
batch = int(time.time())
|
||||
db = DBVidcon()
|
||||
|
||||
rows = db.fetch_keyword_title(level=args.level)
|
||||
payload_list = []
|
||||
push = None
|
||||
empty = None
|
||||
|
||||
if args.level == 0:
|
||||
push = db.push_l0
|
||||
empty = db.l0_empty
|
||||
elif args.level == 1:
|
||||
push = db.push_l1
|
||||
empty = db.l1_empty
|
||||
elif args.level == 9:
|
||||
push = db.push_l2
|
||||
empty = db.l2_empty
|
||||
else:
|
||||
return
|
||||
|
||||
if empty():
|
||||
rows = db.fetch_keyword_title(level=args.level)
|
||||
payload_list = []
|
||||
|
||||
|
||||
for row in rows:
|
||||
payload_list.append(json.dumps({**row, "batch": batch}, ensure_ascii=False))
|
||||
if len(payload_list) >= 10000:
|
||||
@ -35,6 +42,12 @@ def main():
|
||||
if payload_list: # 收尾
|
||||
push(payload_list)
|
||||
|
||||
data = {
|
||||
"level": args.level,
|
||||
"batch": batch,
|
||||
"count": len(rows),
|
||||
}
|
||||
db.log_batch_start(data)
|
||||
print(f"✔ 推送 {len(rows)} 行(batch={batch})到 {push.__name__}队列完毕")
|
||||
db.close()
|
||||
|
||||
|
Loading…
x
Reference in New Issue
Block a user