feat: 更新视频插入逻辑以包括is_repeat字段并改进代理池为空时的阻塞处理

This commit is contained in:
晓丰 2025-05-17 00:33:55 +08:00
parent 12bbd2340b
commit dd90cc3c91
4 changed files with 73 additions and 72 deletions

34
DB.py
View File

@ -146,18 +146,25 @@ class DBVidcon:
print(f"[回滚] 已退回 {len(payloads)}")
def upsert_video(self, data: dict):
"""
1) 插入到 sh_dm_video_op_v2
2) DELETE sh_dm_video_v2 WHERE rn = AND v_xid =
3) INSERT INTO sh_dm_video_v2 () VALUES ()
"""
# 保底字段
data.setdefault("a_id", 0)
data.setdefault("history_status", "")
data.setdefault("is_repeat", 3)
data["sort"] = data.get("index", 0)
try:
select_repeat = """
SELECT is_repeat
FROM sh_dm_video_v2
WHERE rn = %(rn)s
AND v_xid = %(v_xid)s
LIMIT 1
"""
self.cursor.execute(select_repeat, data)
row = self.cursor.fetchone()
if row:
data['is_repeat'] = row[0]
# 2. 插入到 op 表
sql_op = """
INSERT INTO sh_dm_video_op_v2 (
v_id, v_xid, a_id, level, name_title,
@ -170,12 +177,16 @@ class DBVidcon:
)
"""
self.cursor.execute(sql_op, data)
# 3. 删除旧表中的那行
sql_del = """
DELETE FROM sh_dm_video_v2
WHERE rn = %(rn)s
AND v_xid = %(v_xid)s
"""
self.cursor.execute(sql_del, data)
# 4. 带上 is_repeat 再插入新数据
sql_ins = """
INSERT INTO sh_dm_video_v2 (
v_id, v_xid, rn, v_name, title, link,
@ -183,18 +194,19 @@ class DBVidcon:
watch_number, follow_number, video_number,
public_time, cover_pic, sort,
u_xid, u_id, u_pic, u_name,
status, createtime, updatetime
status, createtime, updatetime,
is_repeat
) VALUES (
%(v_id)s, %(v_xid)s, %(rn)s, %(v_name)s, %(title)s, %(link)s,
3, '', %(duration)s,
%(is_piracy)s, '', %(duration)s,
%(watch_number)s, %(fans)s, %(videos)s,
%(create_time)s, %(cover_pic)s, %(sort)s,
%(u_xid)s, %(u_id)s, %(u_pic)s, %(u_name)s,
1, UNIX_TIMESTAMP(), UNIX_TIMESTAMP()
1, UNIX_TIMESTAMP(), UNIX_TIMESTAMP(),
%(is_repeat)s
)
"""
self.cursor.execute(sql_ins, data)
except Exception as e:
# 打印错误并回滚
print("[数据库写入异常]", str(e))
@ -212,7 +224,7 @@ class DBVidcon:
def get_proxy(self, region_code: str) -> str:
"""
Redis 队列 proxy_queue:<region_code> 弹出一个代理并返回
如果队列为空返回空字符串
如果队列为空阻塞
"""
proxy = ""
while True:

View File

@ -15,12 +15,12 @@ proxies_address = {
"印度尼西亚": "ID",
"马来": "MY",
"加拿大": "CA",
"台湾": "TW", #"CN_city_TW",
"台湾": "CN_city_TW", # "TW", #
"泰国": "TH",
"美国": "US",
"西班牙": "ES",
"韩国": "KR",
"香港": "HK", #"CN_city_HK",
"香港": "CN_city_HK", # "HK", #
"越南": "VN",
}
MACHINE_ID = None
@ -137,6 +137,7 @@ def post_with_retry(url, json_payload=None, data=None, headers=None, proxies=Non
for attempt in range(1, retries + 1):
try:
proxy_str = db.get_proxy(Gproxies)
proxies = {"http": proxy_str, "https": proxy_str}
resp = requests.post(

View File

@ -28,12 +28,12 @@ PROXIES_ADDRESS = {
"印度尼西亚": "ID",
"马来": "MY",
"加拿大": "CA",
"台湾": "TW", #"CN_city_TW",
"台湾": "CN_city_TW", #"TW", #
"泰国": "TH",
"美国": "US",
"西班牙": "ES",
"韩国": "KR",
"香港": "HK", #"CN_city_HK",
"香港": "CN_city_HK", #"HK", #
"越南": "VN",
}

View File

@ -39,10 +39,7 @@ def get_data_for_rn(rn: str) -> pd.DataFrame:
v.watch_number AS 观看数量,
v.public_time AS 上传时间,
v.u_pic AS 头像,
CASE
WHEN dup.cnt > 1 THEN 1
ELSE 2
END AS 是否重复,
v.is_repeat AS 是否重复, -- 直接用字段
op.sort AS 排序,
op.batch AS 批次,
op.machine AS 机器号,
@ -50,15 +47,6 @@ def get_data_for_rn(rn: str) -> pd.DataFrame:
v.u_xid AS u_xid,
v.u_name AS 用户名称
FROM sh_dm_video_op_v2 AS op
LEFT JOIN (
SELECT
t.v_xid,
COUNT(*) AS cnt
FROM sh_dm_video_op_v2 AS t
WHERE t.batch IN (1747324254, 1747323990)
GROUP BY t.v_xid
) AS dup
ON op.v_xid = dup.v_xid
LEFT JOIN sh_dm_video_v2 AS v
ON op.v_xid = v.v_xid
WHERE op.rn = %s