feat: 更新视频插入逻辑以包括is_repeat字段并改进代理池为空时的阻塞处理
This commit is contained in:
parent
12bbd2340b
commit
dd90cc3c91
82
DB.py
82
DB.py
@ -146,55 +146,67 @@ class DBVidcon:
|
||||
print(f"[回滚] 已退回 {len(payloads)} 条")
|
||||
|
||||
def upsert_video(self, data: dict):
|
||||
"""
|
||||
1) 插入到 sh_dm_video_op_v2
|
||||
2) DELETE sh_dm_video_v2 WHERE rn = … AND v_xid = …
|
||||
3) INSERT INTO sh_dm_video_v2 (…) VALUES (…)
|
||||
"""
|
||||
# 保底字段
|
||||
data.setdefault("a_id", 0)
|
||||
data.setdefault("history_status", "")
|
||||
data.setdefault("is_repeat", 3)
|
||||
data["sort"] = data.get("index", 0)
|
||||
|
||||
try:
|
||||
select_repeat = """
|
||||
SELECT is_repeat
|
||||
FROM sh_dm_video_v2
|
||||
WHERE rn = %(rn)s
|
||||
AND v_xid = %(v_xid)s
|
||||
LIMIT 1
|
||||
"""
|
||||
self.cursor.execute(select_repeat, data)
|
||||
row = self.cursor.fetchone()
|
||||
if row:
|
||||
data['is_repeat'] = row[0]
|
||||
|
||||
# 2. 插入到 op 表
|
||||
sql_op = """
|
||||
INSERT INTO sh_dm_video_op_v2 (
|
||||
v_id, v_xid, a_id, level, name_title,
|
||||
keyword, rn, history_status, is_repeat,
|
||||
sort, createtime, updatetime, batch, machine
|
||||
) VALUES (
|
||||
%(v_id)s, %(v_xid)s, %(a_id)s, %(level)s, %(v_name)s,
|
||||
%(keyword)s, %(rn)s, %(history_status)s, %(is_repeat)s,
|
||||
%(sort)s, UNIX_TIMESTAMP(), UNIX_TIMESTAMP(), %(batch)s, %(machine_id)s
|
||||
)
|
||||
INSERT INTO sh_dm_video_op_v2 (
|
||||
v_id, v_xid, a_id, level, name_title,
|
||||
keyword, rn, history_status, is_repeat,
|
||||
sort, createtime, updatetime, batch, machine
|
||||
) VALUES (
|
||||
%(v_id)s, %(v_xid)s, %(a_id)s, %(level)s, %(v_name)s,
|
||||
%(keyword)s, %(rn)s, %(history_status)s, %(is_repeat)s,
|
||||
%(sort)s, UNIX_TIMESTAMP(), UNIX_TIMESTAMP(), %(batch)s, %(machine_id)s
|
||||
)
|
||||
"""
|
||||
self.cursor.execute(sql_op, data)
|
||||
|
||||
# 3. 删除旧表中的那行
|
||||
sql_del = """
|
||||
DELETE FROM sh_dm_video_v2
|
||||
WHERE rn = %(rn)s
|
||||
AND v_xid = %(v_xid)s
|
||||
DELETE FROM sh_dm_video_v2
|
||||
WHERE rn = %(rn)s
|
||||
AND v_xid = %(v_xid)s
|
||||
"""
|
||||
self.cursor.execute(sql_del, data)
|
||||
|
||||
# 4. 带上 is_repeat 再插入新数据
|
||||
sql_ins = """
|
||||
INSERT INTO sh_dm_video_v2 (
|
||||
v_id, v_xid, rn, v_name, title, link,
|
||||
is_piracy, edition, duration,
|
||||
watch_number, follow_number, video_number,
|
||||
public_time, cover_pic, sort,
|
||||
u_xid, u_id, u_pic, u_name,
|
||||
status, createtime, updatetime
|
||||
) VALUES (
|
||||
%(v_id)s, %(v_xid)s, %(rn)s, %(v_name)s, %(title)s, %(link)s,
|
||||
3, '', %(duration)s,
|
||||
%(watch_number)s, %(fans)s, %(videos)s,
|
||||
%(create_time)s, %(cover_pic)s, %(sort)s,
|
||||
%(u_xid)s, %(u_id)s, %(u_pic)s, %(u_name)s,
|
||||
1, UNIX_TIMESTAMP(), UNIX_TIMESTAMP()
|
||||
)
|
||||
INSERT INTO sh_dm_video_v2 (
|
||||
v_id, v_xid, rn, v_name, title, link,
|
||||
is_piracy, edition, duration,
|
||||
watch_number, follow_number, video_number,
|
||||
public_time, cover_pic, sort,
|
||||
u_xid, u_id, u_pic, u_name,
|
||||
status, createtime, updatetime,
|
||||
is_repeat
|
||||
) VALUES (
|
||||
%(v_id)s, %(v_xid)s, %(rn)s, %(v_name)s, %(title)s, %(link)s,
|
||||
%(is_piracy)s, '', %(duration)s,
|
||||
%(watch_number)s, %(fans)s, %(videos)s,
|
||||
%(create_time)s, %(cover_pic)s, %(sort)s,
|
||||
%(u_xid)s, %(u_id)s, %(u_pic)s, %(u_name)s,
|
||||
1, UNIX_TIMESTAMP(), UNIX_TIMESTAMP(),
|
||||
%(is_repeat)s
|
||||
)
|
||||
"""
|
||||
self.cursor.execute(sql_ins, data)
|
||||
|
||||
except Exception as e:
|
||||
# 打印错误并回滚
|
||||
print("[数据库写入异常]", str(e))
|
||||
@ -212,7 +224,7 @@ class DBVidcon:
|
||||
def get_proxy(self, region_code: str) -> str:
|
||||
"""
|
||||
从 Redis 队列 proxy_queue:<region_code> 弹出一个代理并返回。
|
||||
如果队列为空,返回空字符串。
|
||||
如果队列为空,阻塞
|
||||
"""
|
||||
proxy = ""
|
||||
while True:
|
||||
|
5
main.py
5
main.py
@ -15,12 +15,12 @@ proxies_address = {
|
||||
"印度尼西亚": "ID",
|
||||
"马来": "MY",
|
||||
"加拿大": "CA",
|
||||
"台湾": "TW", #"CN_city_TW",
|
||||
"台湾": "CN_city_TW", # "TW", #
|
||||
"泰国": "TH",
|
||||
"美国": "US",
|
||||
"西班牙": "ES",
|
||||
"韩国": "KR",
|
||||
"香港": "HK", #"CN_city_HK",
|
||||
"香港": "CN_city_HK", # "HK", #
|
||||
"越南": "VN",
|
||||
}
|
||||
MACHINE_ID = None
|
||||
@ -137,6 +137,7 @@ def post_with_retry(url, json_payload=None, data=None, headers=None, proxies=Non
|
||||
for attempt in range(1, retries + 1):
|
||||
try:
|
||||
proxy_str = db.get_proxy(Gproxies)
|
||||
|
||||
proxies = {"http": proxy_str, "https": proxy_str}
|
||||
|
||||
resp = requests.post(
|
||||
|
@ -28,12 +28,12 @@ PROXIES_ADDRESS = {
|
||||
"印度尼西亚": "ID",
|
||||
"马来": "MY",
|
||||
"加拿大": "CA",
|
||||
"台湾": "TW", #"CN_city_TW",
|
||||
"台湾": "CN_city_TW", #"TW", #
|
||||
"泰国": "TH",
|
||||
"美国": "US",
|
||||
"西班牙": "ES",
|
||||
"韩国": "KR",
|
||||
"香港": "HK", #"CN_city_HK",
|
||||
"香港": "CN_city_HK", #"HK", #
|
||||
"越南": "VN",
|
||||
}
|
||||
|
||||
|
@ -25,40 +25,28 @@ def get_data_for_rn(rn: str) -> pd.DataFrame:
|
||||
# 注意:这里把 SQL 中的 rn 和 level 参数化
|
||||
sql = f"""
|
||||
SELECT
|
||||
op.id AS ID,
|
||||
v.v_name AS 片名,
|
||||
v.link AS 视频连接,
|
||||
v.is_piracy AS 是否盗版,
|
||||
op.`level` AS 优先级,
|
||||
op.rn AS 地区,
|
||||
NULL AS 投诉日期,
|
||||
NULL AS 下线日期,
|
||||
op.keyword AS 关键词,
|
||||
v.title AS 标题,
|
||||
v.duration AS 时长,
|
||||
v.watch_number AS 观看数量,
|
||||
v.public_time AS 上传时间,
|
||||
v.u_pic AS 头像,
|
||||
CASE
|
||||
WHEN dup.cnt > 1 THEN 1
|
||||
ELSE 2
|
||||
END AS 是否重复,
|
||||
op.sort AS 排序,
|
||||
op.batch AS 批次,
|
||||
op.machine AS 机器号,
|
||||
v.u_id AS 用户id,
|
||||
v.u_xid AS u_xid,
|
||||
v.u_name AS 用户名称
|
||||
op.id AS ID,
|
||||
v.v_name AS 片名,
|
||||
v.link AS 视频连接,
|
||||
v.is_piracy AS 是否盗版,
|
||||
op.`level` AS 优先级,
|
||||
op.rn AS 地区,
|
||||
NULL AS 投诉日期,
|
||||
NULL AS 下线日期,
|
||||
op.keyword AS 关键词,
|
||||
v.title AS 标题,
|
||||
v.duration AS 时长,
|
||||
v.watch_number AS 观看数量,
|
||||
v.public_time AS 上传时间,
|
||||
v.u_pic AS 头像,
|
||||
v.is_repeat AS 是否重复, -- 直接用字段
|
||||
op.sort AS 排序,
|
||||
op.batch AS 批次,
|
||||
op.machine AS 机器号,
|
||||
v.u_id AS 用户id,
|
||||
v.u_xid AS u_xid,
|
||||
v.u_name AS 用户名称
|
||||
FROM sh_dm_video_op_v2 AS op
|
||||
LEFT JOIN (
|
||||
SELECT
|
||||
t.v_xid,
|
||||
COUNT(*) AS cnt
|
||||
FROM sh_dm_video_op_v2 AS t
|
||||
WHERE t.batch IN (1747324254, 1747323990)
|
||||
GROUP BY t.v_xid
|
||||
) AS dup
|
||||
ON op.v_xid = dup.v_xid
|
||||
LEFT JOIN sh_dm_video_v2 AS v
|
||||
ON op.v_xid = v.v_xid
|
||||
WHERE op.rn = %s
|
||||
|
Loading…
x
Reference in New Issue
Block a user