diff --git a/DB.py b/DB.py index 2c9aecd..d31a496 100644 --- a/DB.py +++ b/DB.py @@ -146,55 +146,67 @@ class DBVidcon: print(f"[回滚] 已退回 {len(payloads)} 条") def upsert_video(self, data: dict): - """ - 1) 插入到 sh_dm_video_op_v2 - 2) DELETE sh_dm_video_v2 WHERE rn = … AND v_xid = … - 3) INSERT INTO sh_dm_video_v2 (…) VALUES (…) - """ - # 保底字段 data.setdefault("a_id", 0) data.setdefault("history_status", "") data.setdefault("is_repeat", 3) data["sort"] = data.get("index", 0) try: + select_repeat = """ + SELECT is_repeat + FROM sh_dm_video_v2 + WHERE rn = %(rn)s + AND v_xid = %(v_xid)s + LIMIT 1 + """ + self.cursor.execute(select_repeat, data) + row = self.cursor.fetchone() + if row: + data['is_repeat'] = row[0] + + # 2. 插入到 op 表 sql_op = """ - INSERT INTO sh_dm_video_op_v2 ( - v_id, v_xid, a_id, level, name_title, - keyword, rn, history_status, is_repeat, - sort, createtime, updatetime, batch, machine - ) VALUES ( - %(v_id)s, %(v_xid)s, %(a_id)s, %(level)s, %(v_name)s, - %(keyword)s, %(rn)s, %(history_status)s, %(is_repeat)s, - %(sort)s, UNIX_TIMESTAMP(), UNIX_TIMESTAMP(), %(batch)s, %(machine_id)s - ) + INSERT INTO sh_dm_video_op_v2 ( + v_id, v_xid, a_id, level, name_title, + keyword, rn, history_status, is_repeat, + sort, createtime, updatetime, batch, machine + ) VALUES ( + %(v_id)s, %(v_xid)s, %(a_id)s, %(level)s, %(v_name)s, + %(keyword)s, %(rn)s, %(history_status)s, %(is_repeat)s, + %(sort)s, UNIX_TIMESTAMP(), UNIX_TIMESTAMP(), %(batch)s, %(machine_id)s + ) """ self.cursor.execute(sql_op, data) + + # 3. 删除旧表中的那行 sql_del = """ - DELETE FROM sh_dm_video_v2 - WHERE rn = %(rn)s - AND v_xid = %(v_xid)s + DELETE FROM sh_dm_video_v2 + WHERE rn = %(rn)s + AND v_xid = %(v_xid)s """ self.cursor.execute(sql_del, data) + + # 4. 带上 is_repeat 再插入新数据 sql_ins = """ - INSERT INTO sh_dm_video_v2 ( - v_id, v_xid, rn, v_name, title, link, - is_piracy, edition, duration, - watch_number, follow_number, video_number, - public_time, cover_pic, sort, - u_xid, u_id, u_pic, u_name, - status, createtime, updatetime - ) VALUES ( - %(v_id)s, %(v_xid)s, %(rn)s, %(v_name)s, %(title)s, %(link)s, - 3, '', %(duration)s, - %(watch_number)s, %(fans)s, %(videos)s, - %(create_time)s, %(cover_pic)s, %(sort)s, - %(u_xid)s, %(u_id)s, %(u_pic)s, %(u_name)s, - 1, UNIX_TIMESTAMP(), UNIX_TIMESTAMP() - ) + INSERT INTO sh_dm_video_v2 ( + v_id, v_xid, rn, v_name, title, link, + is_piracy, edition, duration, + watch_number, follow_number, video_number, + public_time, cover_pic, sort, + u_xid, u_id, u_pic, u_name, + status, createtime, updatetime, + is_repeat + ) VALUES ( + %(v_id)s, %(v_xid)s, %(rn)s, %(v_name)s, %(title)s, %(link)s, + %(is_piracy)s, '', %(duration)s, + %(watch_number)s, %(fans)s, %(videos)s, + %(create_time)s, %(cover_pic)s, %(sort)s, + %(u_xid)s, %(u_id)s, %(u_pic)s, %(u_name)s, + 1, UNIX_TIMESTAMP(), UNIX_TIMESTAMP(), + %(is_repeat)s + ) """ self.cursor.execute(sql_ins, data) - except Exception as e: # 打印错误并回滚 print("[数据库写入异常]", str(e)) @@ -212,7 +224,7 @@ class DBVidcon: def get_proxy(self, region_code: str) -> str: """ 从 Redis 队列 proxy_queue: 弹出一个代理并返回。 - 如果队列为空,返回空字符串。 + 如果队列为空,阻塞 """ proxy = "" while True: diff --git a/main.py b/main.py index 5f58a92..e7b51ed 100644 --- a/main.py +++ b/main.py @@ -15,12 +15,12 @@ proxies_address = { "印度尼西亚": "ID", "马来": "MY", "加拿大": "CA", - "台湾": "TW", #"CN_city_TW", + "台湾": "CN_city_TW", # "TW", # "泰国": "TH", "美国": "US", "西班牙": "ES", "韩国": "KR", - "香港": "HK", #"CN_city_HK", + "香港": "CN_city_HK", # "HK", # "越南": "VN", } MACHINE_ID = None @@ -137,6 +137,7 @@ def post_with_retry(url, json_payload=None, data=None, headers=None, proxies=Non for attempt in range(1, retries + 1): try: proxy_str = db.get_proxy(Gproxies) + proxies = {"http": proxy_str, "https": proxy_str} resp = requests.post( diff --git a/multi_proxy_refill.py b/multi_proxy_refill.py index e61972d..88b252b 100644 --- a/multi_proxy_refill.py +++ b/multi_proxy_refill.py @@ -28,12 +28,12 @@ PROXIES_ADDRESS = { "印度尼西亚": "ID", "马来": "MY", "加拿大": "CA", - "台湾": "TW", #"CN_city_TW", + "台湾": "CN_city_TW", #"TW", # "泰国": "TH", "美国": "US", "西班牙": "ES", "韩国": "KR", - "香港": "HK", #"CN_city_HK", + "香港": "CN_city_HK", #"HK", # "越南": "VN", } diff --git a/mysql_to_xlsx.py b/mysql_to_xlsx.py index 15581ba..85584d0 100644 --- a/mysql_to_xlsx.py +++ b/mysql_to_xlsx.py @@ -25,40 +25,28 @@ def get_data_for_rn(rn: str) -> pd.DataFrame: # 注意:这里把 SQL 中的 rn 和 level 参数化 sql = f""" SELECT - op.id AS ID, - v.v_name AS 片名, - v.link AS 视频连接, - v.is_piracy AS 是否盗版, - op.`level` AS 优先级, - op.rn AS 地区, - NULL AS 投诉日期, - NULL AS 下线日期, - op.keyword AS 关键词, - v.title AS 标题, - v.duration AS 时长, - v.watch_number AS 观看数量, - v.public_time AS 上传时间, - v.u_pic AS 头像, - CASE - WHEN dup.cnt > 1 THEN 1 - ELSE 2 - END AS 是否重复, - op.sort AS 排序, - op.batch AS 批次, - op.machine AS 机器号, - v.u_id AS 用户id, - v.u_xid AS u_xid, - v.u_name AS 用户名称 + op.id AS ID, + v.v_name AS 片名, + v.link AS 视频连接, + v.is_piracy AS 是否盗版, + op.`level` AS 优先级, + op.rn AS 地区, + NULL AS 投诉日期, + NULL AS 下线日期, + op.keyword AS 关键词, + v.title AS 标题, + v.duration AS 时长, + v.watch_number AS 观看数量, + v.public_time AS 上传时间, + v.u_pic AS 头像, + v.is_repeat AS 是否重复, -- 直接用字段 + op.sort AS 排序, + op.batch AS 批次, + op.machine AS 机器号, + v.u_id AS 用户id, + v.u_xid AS u_xid, + v.u_name AS 用户名称 FROM sh_dm_video_op_v2 AS op - LEFT JOIN ( - SELECT - t.v_xid, - COUNT(*) AS cnt - FROM sh_dm_video_op_v2 AS t - WHERE t.batch IN (1747324254, 1747323990) - GROUP BY t.v_xid - ) AS dup - ON op.v_xid = dup.v_xid LEFT JOIN sh_dm_video_v2 AS v ON op.v_xid = v.v_xid WHERE op.rn = %s