feat: 优化 DB.py 和 dump_keyword_title.py,增强数据处理和日志记录功能
This commit is contained in:
parent
9b74bdf312
commit
217d8c7ed7
48
DB.py
48
DB.py
@ -40,7 +40,7 @@ video_op = Table("sh_dm_video_op_v2", _meta,
|
|||||||
Column("updatetime", Integer),
|
Column("updatetime", Integer),
|
||||||
Column("batch", BigInteger),
|
Column("batch", BigInteger),
|
||||||
Column("machine", Integer),
|
Column("machine", Integer),
|
||||||
)
|
)
|
||||||
|
|
||||||
video = Table("sh_dm_video_v2", _meta,
|
video = Table("sh_dm_video_v2", _meta,
|
||||||
Column("v_id", BigInteger, primary_key=True),
|
Column("v_id", BigInteger, primary_key=True),
|
||||||
@ -61,8 +61,7 @@ video = Table("sh_dm_video_v2", _meta,
|
|||||||
Column("status", Integer),
|
Column("status", Integer),
|
||||||
Column("createtime", Integer),
|
Column("createtime", Integer),
|
||||||
Column("updatetime", Integer),
|
Column("updatetime", Integer),
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
def mysql_retry(max_retries: int = 3, base_delay: float = 2.0):
|
def mysql_retry(max_retries: int = 3, base_delay: float = 2.0):
|
||||||
@ -370,6 +369,35 @@ class DBVidcon:
|
|||||||
self.cursor.execute(sql, (level,))
|
self.cursor.execute(sql, (level,))
|
||||||
return self.cursor.fetchall()
|
return self.cursor.fetchall()
|
||||||
|
|
||||||
|
@mysql_retry()
|
||||||
|
def log_batch_start(self, info: Dict) -> int or None:
|
||||||
|
batch = info.get("batch")
|
||||||
|
level = info.get("level")
|
||||||
|
if batch is None or level is None:
|
||||||
|
raise ValueError("info 字典必须包含 'batch' 和 'level'")
|
||||||
|
count = info.get("count", 0)
|
||||||
|
if level == 0:
|
||||||
|
t0, t1, t2 = count, 0, 0
|
||||||
|
elif level == 1:
|
||||||
|
t0, t1, t2 = 0, count, 0
|
||||||
|
elif level == 9:
|
||||||
|
level = 2
|
||||||
|
t0, t1, t2 = 0, 0, count
|
||||||
|
|
||||||
|
start_ts = int(time.time())
|
||||||
|
sql = """
|
||||||
|
INSERT INTO sh_dm_batch_log
|
||||||
|
(batch, info, t0, t1, t2, starttime)
|
||||||
|
VALUES (%s, %s, %s, %s, %s, %s)
|
||||||
|
"""
|
||||||
|
try:
|
||||||
|
self.cursor.execute(sql, (batch, level, t0, t1, t2, start_ts))
|
||||||
|
self.conn.commit()
|
||||||
|
return self.cursor.lastrowid
|
||||||
|
except Exception as e:
|
||||||
|
print(f"[log_batch_start] 插入失败:{e}")
|
||||||
|
return None
|
||||||
|
|
||||||
@mysql_retry()
|
@mysql_retry()
|
||||||
def flush(self):
|
def flush(self):
|
||||||
"""批量执行完后手动提交。"""
|
"""批量执行完后手动提交。"""
|
||||||
@ -411,16 +439,26 @@ class DBVidcon:
|
|||||||
@redis_retry(max_retries=3)
|
@redis_retry(max_retries=3)
|
||||||
def queues_empty(self) -> bool:
|
def queues_empty(self) -> bool:
|
||||||
"""
|
"""
|
||||||
判断 urgent_list_key 和 list_key 两个队列是否都为空。
|
|
||||||
如果都空,返回 True;只要有一个不空,就返回 False。
|
如果都空,返回 True;只要有一个不空,就返回 False。
|
||||||
"""
|
"""
|
||||||
# 注意:redis.llen 返回 int
|
|
||||||
return (
|
return (
|
||||||
self.redis.llen(self.l0_list_key) == 0
|
self.redis.llen(self.l0_list_key) == 0
|
||||||
and self.redis.llen(self.l1_list_key) == 0
|
and self.redis.llen(self.l1_list_key) == 0
|
||||||
and self.redis.llen(self.l2_list_key) == 0
|
and self.redis.llen(self.l2_list_key) == 0
|
||||||
)
|
)
|
||||||
|
|
||||||
|
@redis_retry()
|
||||||
|
def l0_empty(self) -> bool:
|
||||||
|
return self.redis.llen(self.l0_list_key) == 0
|
||||||
|
|
||||||
|
@redis_retry()
|
||||||
|
def l1_empty(self) -> bool:
|
||||||
|
return self.redis.llen(self.l1_list_key) == 0
|
||||||
|
|
||||||
|
@redis_retry()
|
||||||
|
def l2_empty(self) -> bool:
|
||||||
|
return self.redis.llen(self.l2_list_key) == 0
|
||||||
|
|
||||||
@redis_retry(max_retries=3)
|
@redis_retry(max_retries=3)
|
||||||
def pop_error_item(self):
|
def pop_error_item(self):
|
||||||
"""
|
"""
|
||||||
|
@ -14,19 +14,26 @@ def main():
|
|||||||
args = parse_args()
|
args = parse_args()
|
||||||
batch = int(time.time())
|
batch = int(time.time())
|
||||||
db = DBVidcon()
|
db = DBVidcon()
|
||||||
|
|
||||||
rows = db.fetch_keyword_title(level=args.level)
|
|
||||||
payload_list = []
|
|
||||||
push = None
|
push = None
|
||||||
|
empty = None
|
||||||
|
|
||||||
if args.level == 0:
|
if args.level == 0:
|
||||||
push = db.push_l0
|
push = db.push_l0
|
||||||
|
empty = db.l0_empty
|
||||||
elif args.level == 1:
|
elif args.level == 1:
|
||||||
push = db.push_l1
|
push = db.push_l1
|
||||||
|
empty = db.l1_empty
|
||||||
elif args.level == 9:
|
elif args.level == 9:
|
||||||
push = db.push_l2
|
push = db.push_l2
|
||||||
|
empty = db.l2_empty
|
||||||
else:
|
else:
|
||||||
return
|
return
|
||||||
|
|
||||||
|
if empty():
|
||||||
|
rows = db.fetch_keyword_title(level=args.level)
|
||||||
|
payload_list = []
|
||||||
|
|
||||||
|
|
||||||
for row in rows:
|
for row in rows:
|
||||||
payload_list.append(json.dumps({**row, "batch": batch}, ensure_ascii=False))
|
payload_list.append(json.dumps({**row, "batch": batch}, ensure_ascii=False))
|
||||||
if len(payload_list) >= 10000:
|
if len(payload_list) >= 10000:
|
||||||
@ -35,6 +42,12 @@ def main():
|
|||||||
if payload_list: # 收尾
|
if payload_list: # 收尾
|
||||||
push(payload_list)
|
push(payload_list)
|
||||||
|
|
||||||
|
data = {
|
||||||
|
"level": args.level,
|
||||||
|
"batch": batch,
|
||||||
|
"count": len(rows),
|
||||||
|
}
|
||||||
|
db.log_batch_start(data)
|
||||||
print(f"✔ 推送 {len(rows)} 行(batch={batch})到 {push.__name__}队列完毕")
|
print(f"✔ 推送 {len(rows)} 行(batch={batch})到 {push.__name__}队列完毕")
|
||||||
db.close()
|
db.close()
|
||||||
|
|
||||||
|
Loading…
x
Reference in New Issue
Block a user