DailyMotion/DB.py

225 lines
7.4 KiB
Python

import json
import redis
import pymysql
import time
class DBVidcon:
_MYSQL_CONF = {
"host": "192.144.230.75",
"port": 3306,
"user": "db_vidcon",
"password": "rexdK4fhCCiRE4BZ",
"database": "db_vidcon",
"charset": "utf8mb4",
"cursorclass": pymysql.cursors.DictCursor,
}
_REDIS_CONF = {
"host": "192.144.230.75",
"port": 6379,
"password": "qwert@$123!&",
"decode_responses": True,
}
def __init__(self):
self.list_key = "video_kw_queue"
self.record_list_key = "record_kw_queue"
self.urgent_list_key = "video_urgent_queue"
self.conn = pymysql.connect(**self._MYSQL_CONF)
self.cursor = self.conn.cursor()
self.redis = redis.Redis(**self._REDIS_CONF)
def _connect_redis(self):
"""初始化或重建 Redis 客户端"""
self.redis = redis.Redis(**self._REDIS_CONF)
def reconnect_redis(self):
"""当捕获到 ConnectionError 时,重连 Redis"""
try:
self._connect_redis()
except Exception as e:
print("[Redis reconnect error]", e)
time.sleep(2)
def push_record(self, data: dict):
raw = json.dumps(data, ensure_ascii=False)
try:
self.redis.lpush(self.record_list_key, raw)
except redis.exceptions.ConnectionError:
self.reconnect_redis()
self.redis.lpush(self.record_list_key, raw)
def fetch_records(self, count: int = 100):
try:
raws = self.redis.lpop(self.record_list_key, count)
except TypeError:
raws = []
for _ in range(count):
item = self.redis.rpop(self.record_list_key)
if item is None:
break
raws.append(item)
except redis.exceptions.ConnectionError:
self.reconnect_redis()
return self.fetch_records(count)
if not raws:
return []
if isinstance(raws, str):
raws = [raws]
out = []
for raw in raws:
try:
data = json.loads(raw)
out.append((raw, data))
except json.JSONDecodeError:
continue
return out
def fetch_from_redis(self, count: int = 100, list_key: str = None):
key = list_key
try:
raws = self.redis.lpop(key, count)
except TypeError:
raws = []
for _ in range(count):
item = self.redis.rpop(key)
if item is None:
break
raws.append(item)
except redis.exceptions.ConnectionError as e:
print("[Redis pop error]", e)
self.reconnect_redis()
return []
if not raws:
return []
if isinstance(raws, str):
raws = [raws]
out = []
for raw in raws:
try:
out.append((raw, json.loads(raw)))
except json.JSONDecodeError:
continue
return out
def rollback_records(self, raws):
if isinstance(raws, str):
raws = [raws]
self.redis.lpush(self.record_list_key, *raws)
def rollback_urgent(self, raws):
"""写库失败或加急任务处理失败时,把原始 JSON 退回 urgent 列表"""
if isinstance(raws, str):
raws = [raws]
try:
self.redis.lpush(self.urgent_list_key, *raws)
except redis.exceptions.ConnectionError as e:
print("[Redis urgent rollback error]", e)
self.reconnect_redis()
self.redis.lpush(self.urgent_list_key, *raws)
def item_keyword(self, count: int = 100):
try:
urgent_items = self.fetch_from_redis(count, list_key=self.urgent_list_key)
except Exception as e:
print("[Redis urgent pop error]", e)
self.reconnect_redis()
urgent_items = []
if urgent_items:
return urgent_items, 1
try:
items = self.fetch_from_redis(count, list_key=self.list_key)
except Exception as e:
print("[Redis normal pop error]", e)
self.reconnect_redis()
return [], 0
return items, 2
def rollback(self, payloads):
if not payloads:
return
if isinstance(payloads, str):
payloads = [payloads]
self.redis.rpush(self.list_key, *payloads)
print(f"[回滚] 已退回 {len(payloads)}")
def upsert_video(self, data: dict):
"""
1) 插入到 sh_dm_video_op_v2
2) DELETE sh_dm_video_v2 WHERE rn = … AND v_xid = …
3) INSERT INTO sh_dm_video_v2 (…) VALUES (…)
"""
# 保底字段
data.setdefault("a_id", 0)
data.setdefault("history_status", "")
data.setdefault("is_repeat", 3)
data["sort"] = data.get("index", 0)
try:
sql_op = """
INSERT INTO sh_dm_video_op_v2 (
v_id, v_xid, a_id, level, name_title,
keyword, rn, history_status, is_repeat,
sort, createtime, updatetime, batch, machine
) VALUES (
%(v_id)s, %(v_xid)s, %(a_id)s, %(level)s, %(v_name)s,
%(keyword)s, %(rn)s, %(history_status)s, %(is_repeat)s,
%(sort)s, UNIX_TIMESTAMP(), UNIX_TIMESTAMP(), %(batch)s, %(machine_id)s
)
"""
self.cursor.execute(sql_op, data)
sql_del = """
DELETE FROM sh_dm_video_v2
WHERE rn = %(rn)s
AND v_xid = %(v_xid)s
"""
self.cursor.execute(sql_del, data)
sql_ins = """
INSERT INTO sh_dm_video_v2 (
v_id, v_xid, rn, v_name, title, link,
is_piracy, edition, duration,
watch_number, follow_number, video_number,
public_time, cover_pic, sort,
u_xid, u_id, u_pic, u_name,
status, createtime, updatetime
) VALUES (
%(v_id)s, %(v_xid)s, %(rn)s, %(v_name)s, %(title)s, %(link)s,
3, '', %(duration)s,
%(watch_number)s, %(fans)s, %(videos)s,
%(create_time)s, %(cover_pic)s, %(sort)s,
%(u_xid)s, %(u_id)s, %(u_pic)s, %(u_name)s,
1, UNIX_TIMESTAMP(), UNIX_TIMESTAMP()
)
"""
self.cursor.execute(sql_ins, data)
except Exception as e:
# 打印错误并回滚
print("[数据库写入异常]", str(e))
print("[出错数据]", data)
raise
def flush(self):
"""批量执行完后手动提交。"""
self.conn.commit()
def close(self):
self.cursor.close()
self.conn.close()
def get_proxy(self, region_code: str) -> str:
"""
从 Redis 队列 proxy_queue:<region_code> 弹出一个代理并返回。
如果队列为空,返回空字符串。
"""
proxy = ""
while True:
key = f"proxy_queue:{region_code}"
proxy = self.redis.lpop(key)
if proxy is None:
time.sleep(10)
else:
break
return proxy