feat: 引入自定义日志记录器并替换 print 语句为 logger 记录
This commit is contained in:
parent
68d79c84b0
commit
b4db51a8e7
66
DB.py
66
DB.py
@ -11,6 +11,8 @@ from sqlalchemy import (
|
|||||||
BigInteger, Integer, String, Text
|
BigInteger, Integer, String, Text
|
||||||
)
|
)
|
||||||
from sqlalchemy.dialects.mysql import insert as mysql_insert
|
from sqlalchemy.dialects.mysql import insert as mysql_insert
|
||||||
|
from logger import logger
|
||||||
|
|
||||||
|
|
||||||
MYSQL_URL = (
|
MYSQL_URL = (
|
||||||
"mysql+pymysql://db_vidcon:rexdK4fhCCiRE4BZ"
|
"mysql+pymysql://db_vidcon:rexdK4fhCCiRE4BZ"
|
||||||
@ -80,11 +82,11 @@ def mysql_retry(max_retries: int = 3, base_delay: float = 2.0):
|
|||||||
return fn(self, *args, **kwargs)
|
return fn(self, *args, **kwargs)
|
||||||
except pymysql.InterfaceError as e:
|
except pymysql.InterfaceError as e:
|
||||||
wait = base_delay * (2 ** (attempt - 1))
|
wait = base_delay * (2 ** (attempt - 1))
|
||||||
print(f"[MySQL][{fn.__name__}] 第{attempt}次 InterfaceError:{e},等待 {wait:.1f}s 后重连…")
|
logger.info(f"[MySQL][{fn.__name__}] 第{attempt}次 InterfaceError:{e},等待 {wait:.1f}s 后重连…")
|
||||||
time.sleep(wait)
|
time.sleep(wait)
|
||||||
self._reconnect_mysql()
|
self._reconnect_mysql()
|
||||||
if attempt == max_retries:
|
if attempt == max_retries:
|
||||||
print("[MySQL] 重试多次仍失败,抛出异常")
|
logger.info("[MySQL] 重试多次仍失败,抛出异常")
|
||||||
raise
|
raise
|
||||||
|
|
||||||
return wrapper
|
return wrapper
|
||||||
@ -104,12 +106,12 @@ def redis_retry(max_retries: int = 3):
|
|||||||
try:
|
try:
|
||||||
return fn(self, *args, **kwargs)
|
return fn(self, *args, **kwargs)
|
||||||
except (ConnectionError, TimeoutError) as e:
|
except (ConnectionError, TimeoutError) as e:
|
||||||
print(f"[Redis][{fn.__name__}] 第 {attempt} 次失败:{e}")
|
logger.info(f"[Redis][{fn.__name__}] 第 {attempt} 次失败:{e}")
|
||||||
self.reconnect_redis()
|
self.reconnect_redis()
|
||||||
if attempt == max_retries:
|
if attempt == max_retries:
|
||||||
print("[Redis] 连接彻底失败")
|
logger.info("[Redis] 连接彻底失败")
|
||||||
raise
|
raise
|
||||||
print(f"[Redis] 重连后第 {attempt + 1} 次重试…")
|
logger.info(f"[Redis] 重连后第 {attempt + 1} 次重试…")
|
||||||
|
|
||||||
return wrapper
|
return wrapper
|
||||||
|
|
||||||
@ -151,7 +153,7 @@ class DBVidcon:
|
|||||||
try:
|
try:
|
||||||
self._connect_redis()
|
self._connect_redis()
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
print("[Redis reconnect error]", e)
|
logger.info("[Redis reconnect error]", e)
|
||||||
time.sleep(2)
|
time.sleep(2)
|
||||||
|
|
||||||
@redis_retry(max_retries=3)
|
@redis_retry(max_retries=3)
|
||||||
@ -172,7 +174,7 @@ class DBVidcon:
|
|||||||
break
|
break
|
||||||
raws.append(item)
|
raws.append(item)
|
||||||
except redis.exceptions.ConnectionError as e:
|
except redis.exceptions.ConnectionError as e:
|
||||||
print("[Redis pop error]", e)
|
logger.info("[Redis pop error]", e)
|
||||||
self.reconnect_redis()
|
self.reconnect_redis()
|
||||||
return []
|
return []
|
||||||
if not raws:
|
if not raws:
|
||||||
@ -193,7 +195,7 @@ class DBVidcon:
|
|||||||
if isinstance(raws, str):
|
if isinstance(raws, str):
|
||||||
raws = [raws]
|
raws = [raws]
|
||||||
self.redis.lpush(self.l0_list_key, *raws)
|
self.redis.lpush(self.l0_list_key, *raws)
|
||||||
print(f"[写入l0] 已推入 {len(raws)} 条")
|
logger.info(f"[写入l0] 已推入 {len(raws)} 条")
|
||||||
|
|
||||||
@redis_retry(max_retries=3)
|
@redis_retry(max_retries=3)
|
||||||
def push_l1(self, payloads):
|
def push_l1(self, payloads):
|
||||||
@ -201,7 +203,7 @@ class DBVidcon:
|
|||||||
if isinstance(payloads, str):
|
if isinstance(payloads, str):
|
||||||
payloads = [payloads]
|
payloads = [payloads]
|
||||||
self.redis.rpush(self.l1_list_key, *payloads)
|
self.redis.rpush(self.l1_list_key, *payloads)
|
||||||
print(f"[写入l1] 已推入 {len(payloads)} 条")
|
logger.info(f"[写入l1] 已推入 {len(payloads)} 条")
|
||||||
|
|
||||||
@redis_retry(max_retries=3)
|
@redis_retry(max_retries=3)
|
||||||
def push_l2(self, raws):
|
def push_l2(self, raws):
|
||||||
@ -209,7 +211,7 @@ class DBVidcon:
|
|||||||
if isinstance(raws, str):
|
if isinstance(raws, str):
|
||||||
raws = [raws]
|
raws = [raws]
|
||||||
self.redis.lpush(self.l2_list_key, *raws)
|
self.redis.lpush(self.l2_list_key, *raws)
|
||||||
print(f"[写入l2] 已推入 {len(raws)} 条")
|
logger.info(f"[写入l2] 已推入 {len(raws)} 条")
|
||||||
|
|
||||||
@mysql_retry()
|
@mysql_retry()
|
||||||
def get_proxy_agent_dict(self) -> dict:
|
def get_proxy_agent_dict(self) -> dict:
|
||||||
@ -224,7 +226,7 @@ class DBVidcon:
|
|||||||
sql = "SELECT parameter FROM proxy_agent WHERE rn = %s LIMIT 1"
|
sql = "SELECT parameter FROM proxy_agent WHERE rn = %s LIMIT 1"
|
||||||
self.cursor.execute(sql, (rn,))
|
self.cursor.execute(sql, (rn,))
|
||||||
result = self.cursor.fetchone()
|
result = self.cursor.fetchone()
|
||||||
print(result)
|
logger.info(result)
|
||||||
return result['parameter'] if result else None
|
return result['parameter'] if result else None
|
||||||
|
|
||||||
@redis_retry(max_retries=3)
|
@redis_retry(max_retries=3)
|
||||||
@ -232,7 +234,7 @@ class DBVidcon:
|
|||||||
try:
|
try:
|
||||||
items = self.fetch_from_redis(count, list_key=self.l0_list_key)
|
items = self.fetch_from_redis(count, list_key=self.l0_list_key)
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
print("[Redis l0 pop error]", e)
|
logger.info("[Redis l0 pop error]", e)
|
||||||
self.reconnect_redis()
|
self.reconnect_redis()
|
||||||
items = []
|
items = []
|
||||||
|
|
||||||
@ -241,7 +243,7 @@ class DBVidcon:
|
|||||||
try:
|
try:
|
||||||
items = self.fetch_from_redis(count, list_key=self.l1_list_key)
|
items = self.fetch_from_redis(count, list_key=self.l1_list_key)
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
print("[Redis l1 pop error]", e)
|
logger.info("[Redis l1 pop error]", e)
|
||||||
self.reconnect_redis()
|
self.reconnect_redis()
|
||||||
items = []
|
items = []
|
||||||
|
|
||||||
@ -250,7 +252,7 @@ class DBVidcon:
|
|||||||
try:
|
try:
|
||||||
items = self.fetch_from_redis(count, list_key=self.l2_list_key)
|
items = self.fetch_from_redis(count, list_key=self.l2_list_key)
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
print("[Redis l2 pop error]", e)
|
logger.info("[Redis l2 pop error]", e)
|
||||||
self.reconnect_redis()
|
self.reconnect_redis()
|
||||||
items = []
|
items = []
|
||||||
return items, 99
|
return items, 99
|
||||||
@ -260,25 +262,25 @@ class DBVidcon:
|
|||||||
if isinstance(payloads, str):
|
if isinstance(payloads, str):
|
||||||
payloads = [payloads]
|
payloads = [payloads]
|
||||||
self.redis.rpush(self.l1_list_key, *payloads)
|
self.redis.rpush(self.l1_list_key, *payloads)
|
||||||
print(f"[回滚l1] 已退回 {len(payloads)} 条")
|
logger.info(f"[回滚l1] 已退回 {len(payloads)} 条")
|
||||||
|
|
||||||
@redis_retry(max_retries=3)
|
@redis_retry(max_retries=3)
|
||||||
def rollback_l0(self, raws):
|
def rollback_l0(self, raws):
|
||||||
if isinstance(raws, str):
|
if isinstance(raws, str):
|
||||||
raws = [raws]
|
raws = [raws]
|
||||||
self.redis.lpush(self.l0_list_key, *raws)
|
self.redis.lpush(self.l0_list_key, *raws)
|
||||||
print(f"[回滚l0] 已退回 {len(raws)} 条")
|
logger.info(f"[回滚l0] 已退回 {len(raws)} 条")
|
||||||
|
|
||||||
@redis_retry(max_retries=3)
|
@redis_retry(max_retries=3)
|
||||||
def rollback_l2(self, raws):
|
def rollback_l2(self, raws):
|
||||||
if isinstance(raws, str):
|
if isinstance(raws, str):
|
||||||
raws = [raws]
|
raws = [raws]
|
||||||
self.redis.lpush(self.l2_list_key, *raws)
|
self.redis.lpush(self.l2_list_key, *raws)
|
||||||
print(f"[回滚l2] 已退回 {len(raws)} 条")
|
logger.info(f"[回滚l2] 已退回 {len(raws)} 条")
|
||||||
|
|
||||||
@mysql_retry()
|
@mysql_retry()
|
||||||
def upsert_video(self, data: dict):
|
def upsert_video(self, data: dict):
|
||||||
print(fr"DB处理->{data.get('v_xid')},\tlevel->{data.get('level')}")
|
logger.info(fr"DB处理->{data.get('v_xid')},\tlevel->{data.get('level')}")
|
||||||
data.setdefault("a_id", 0)
|
data.setdefault("a_id", 0)
|
||||||
data.setdefault("history_status", "")
|
data.setdefault("history_status", "")
|
||||||
data.setdefault("is_piracy", 3)
|
data.setdefault("is_piracy", 3)
|
||||||
@ -331,18 +333,18 @@ class DBVidcon:
|
|||||||
|
|
||||||
# 回滚这次未提交的改动
|
# 回滚这次未提交的改动
|
||||||
self.conn.rollback()
|
self.conn.rollback()
|
||||||
print("[数据库写入异常]", str(e))
|
logger.info("[数据库写入异常]", str(e))
|
||||||
print("[出错数据]:", data)
|
logger.info("[出错数据]:", data)
|
||||||
|
|
||||||
if attempt < max_retries:
|
if attempt < max_retries:
|
||||||
attempt += 1
|
attempt += 1
|
||||||
print(f"第 {attempt + 1} 次重试…")
|
logger.info(f"第 {attempt + 1} 次重试…")
|
||||||
continue
|
continue
|
||||||
else:
|
else:
|
||||||
# 重试过后依然失败,推入 Redis 备用
|
# 重试过后依然失败,推入 Redis 备用
|
||||||
print("重试失败,将数据写入 Redis 以便后续处理")
|
logger.info("重试失败,将数据写入 Redis 以便后续处理")
|
||||||
self.push_record(data)
|
self.push_record(data)
|
||||||
print("[交由Redis处理]")
|
logger.info("[交由Redis处理]")
|
||||||
break
|
break
|
||||||
|
|
||||||
@mysql_retry()
|
@mysql_retry()
|
||||||
@ -395,7 +397,7 @@ class DBVidcon:
|
|||||||
self.conn.commit()
|
self.conn.commit()
|
||||||
return self.cursor.lastrowid
|
return self.cursor.lastrowid
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
print(f"[log_batch_start] 插入失败:{e}")
|
logger.info(f"[log_batch_start] 插入失败:{e}")
|
||||||
return None
|
return None
|
||||||
|
|
||||||
@mysql_retry()
|
@mysql_retry()
|
||||||
@ -418,7 +420,7 @@ class DBVidcon:
|
|||||||
else:
|
else:
|
||||||
self.redis.connection_pool.disconnect()
|
self.redis.connection_pool.disconnect()
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
print("[Redis close error]", e)
|
logger.info("[Redis close error]", e)
|
||||||
|
|
||||||
@redis_retry(max_retries=3)
|
@redis_retry(max_retries=3)
|
||||||
def get_proxy(self, region_code: str) -> str:
|
def get_proxy(self, region_code: str) -> str:
|
||||||
@ -481,7 +483,7 @@ class DBSA:
|
|||||||
_lock = threading.Lock()
|
_lock = threading.Lock()
|
||||||
|
|
||||||
push_record_many = staticmethod(
|
push_record_many = staticmethod(
|
||||||
lambda rows: print("[退回Redis] cnt=", len(rows))
|
lambda rows: logger.info("[退回Redis] cnt=", len(rows))
|
||||||
)
|
)
|
||||||
|
|
||||||
@classmethod
|
@classmethod
|
||||||
@ -516,7 +518,7 @@ class DBSA:
|
|||||||
cls._buf_vid.append(vid_row)
|
cls._buf_vid.append(vid_row)
|
||||||
cls._buf_payload.append(data) # 保存原始
|
cls._buf_payload.append(data) # 保存原始
|
||||||
buf_len = len(cls._buf_vid)
|
buf_len = len(cls._buf_vid)
|
||||||
print(f"DB缓冲 -> xid={data['v_xid']}, level={data['level']}, buffer={buf_len}")
|
logger.info(f"DB缓冲 -> xid={data['v_xid']}, level={data['level']}, buffer={buf_len}")
|
||||||
|
|
||||||
need_flush = False
|
need_flush = False
|
||||||
flush_reason = ""
|
flush_reason = ""
|
||||||
@ -528,7 +530,7 @@ class DBSA:
|
|||||||
flush_reason = "TIME"
|
flush_reason = "TIME"
|
||||||
|
|
||||||
if need_flush:
|
if need_flush:
|
||||||
print(f"DBSA 落 ({flush_reason}) ...")
|
logger.info(f"DBSA 落 ({flush_reason}) ...")
|
||||||
cls.flush()
|
cls.flush()
|
||||||
|
|
||||||
@classmethod
|
@classmethod
|
||||||
@ -577,20 +579,20 @@ class DBSA:
|
|||||||
try:
|
try:
|
||||||
cls._bulk_insert(op_rows)
|
cls._bulk_insert(op_rows)
|
||||||
cls._bulk_upsert(vid_rows)
|
cls._bulk_upsert(vid_rows)
|
||||||
print(f"[DBSA] 成 op={len(op_rows)} video={len(vid_rows)} time={time.time() - start:.3f}s")
|
logger.info(f"[DBSA] 成 op={len(op_rows)} video={len(vid_rows)} time={time.time() - start:.3f}s")
|
||||||
|
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
print(f"[DBSA] flush FAIL: {e} op={len(op_rows)} video={len(vid_rows)}")
|
logger.info(f"[DBSA] flush FAIL: {e} op={len(op_rows)} video={len(vid_rows)}")
|
||||||
# 批量退回原始 payload,字段最全
|
# 批量退回原始 payload,字段最全
|
||||||
try:
|
try:
|
||||||
cls.push_record_many(payloads)
|
cls.push_record_many(payloads)
|
||||||
except Exception as re:
|
except Exception as re:
|
||||||
print("[Redis 回退失败]", re)
|
logger.info("[Redis 回退失败]", re)
|
||||||
|
|
||||||
@classmethod
|
@classmethod
|
||||||
def update_video_stats(cls, locator:dict, stats:dict) -> int:
|
def update_video_stats(cls, locator:dict, stats:dict) -> int:
|
||||||
"""
|
"""
|
||||||
立即更新 sh_dm_video_v2 表中的统计字段。
|
立即更新 sh_dm_video_v2 表中的统计字段。n
|
||||||
:param locator: 用于定位行的字典,必须包含: v_xid, rn
|
:param locator: 用于定位行的字典,必须包含: v_xid, rn
|
||||||
:param stats: 需要更新的统计字段,如 {"fans": 633, "videos": 10090, "view": 1678408}
|
:param stats: 需要更新的统计字段,如 {"fans": 633, "videos": 10090, "view": 1678408}
|
||||||
:return: 受影响的行数
|
:return: 受影响的行数
|
||||||
|
57
logger.py
Normal file
57
logger.py
Normal file
@ -0,0 +1,57 @@
|
|||||||
|
import logging
|
||||||
|
import os
|
||||||
|
from datetime import datetime
|
||||||
|
from logging.handlers import TimedRotatingFileHandler
|
||||||
|
|
||||||
|
|
||||||
|
class CustomHourlyHandler(TimedRotatingFileHandler):
|
||||||
|
def __init__(self, base_filename, level=logging.INFO, is_error=False):
|
||||||
|
# 初始化路径
|
||||||
|
self.is_error = is_error
|
||||||
|
self.base_filename = base_filename
|
||||||
|
self.log_dir = self._get_log_dir()
|
||||||
|
filename = self._build_log_path()
|
||||||
|
|
||||||
|
# 初始化 handler
|
||||||
|
super().__init__(
|
||||||
|
filename,
|
||||||
|
when='H',
|
||||||
|
interval=1,
|
||||||
|
backupCount=336, # 14天 * 24小时 = 336 小时日志
|
||||||
|
encoding='utf-8',
|
||||||
|
utc=False
|
||||||
|
)
|
||||||
|
self.setLevel(level)
|
||||||
|
formatter = logging.Formatter('%(asctime)s %(levelname)s %(message)s')
|
||||||
|
self.setFormatter(formatter)
|
||||||
|
|
||||||
|
def _get_log_dir(self):
|
||||||
|
today = datetime.now().strftime("%Y-%m-%d")
|
||||||
|
log_dir = os.path.join("logs", today)
|
||||||
|
os.makedirs(log_dir, exist_ok=True)
|
||||||
|
return log_dir
|
||||||
|
|
||||||
|
def _build_log_path(self):
|
||||||
|
hour = datetime.now().strftime("%H")
|
||||||
|
suffix = "err.log" if self.is_error else "app.log"
|
||||||
|
return os.path.join(self.log_dir, f"{hour}_{suffix}")
|
||||||
|
|
||||||
|
def shouldRollover(self, record):
|
||||||
|
# 每小时轮换并重设路径
|
||||||
|
result = super().shouldRollover(record)
|
||||||
|
if result:
|
||||||
|
self.baseFilename = os.path.abspath(self._build_log_path())
|
||||||
|
return result
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
logger = logging.getLogger("DailyMotion")
|
||||||
|
logger.setLevel(logging.DEBUG)
|
||||||
|
|
||||||
|
if not logger.handlers:
|
||||||
|
logger.addHandler(CustomHourlyHandler("app", level=logging.DEBUG, is_error=False))
|
||||||
|
logger.addHandler(CustomHourlyHandler("err", level=logging.ERROR, is_error=True))
|
||||||
|
ch = logging.StreamHandler()
|
||||||
|
ch.setLevel(logging.DEBUG)
|
||||||
|
ch.setFormatter(logging.Formatter('%(asctime)s %(levelname)s %(message)s'))
|
||||||
|
logger.addHandler(ch)
|
37
main.py
37
main.py
@ -14,6 +14,7 @@ from dateutil import parser as date_parser
|
|||||||
import copy
|
import copy
|
||||||
from threading import Lock
|
from threading import Lock
|
||||||
from concurrent.futures import ThreadPoolExecutor, as_completed
|
from concurrent.futures import ThreadPoolExecutor, as_completed
|
||||||
|
from logger import logger
|
||||||
|
|
||||||
|
|
||||||
db = DBVidcon()
|
db = DBVidcon()
|
||||||
@ -39,7 +40,7 @@ def clean_dash_to_zero(val):
|
|||||||
try:
|
try:
|
||||||
return int(val)
|
return int(val)
|
||||||
except (ValueError, TypeError) as e:
|
except (ValueError, TypeError) as e:
|
||||||
print(f"[字段异常] val = {val} → {str(e)}")
|
logger.info(f"[字段异常] val = {val} → {str(e)}")
|
||||||
return 0
|
return 0
|
||||||
|
|
||||||
|
|
||||||
@ -48,7 +49,7 @@ def format_create_time(timestr):
|
|||||||
dt = date_parser.isoparse(timestr)
|
dt = date_parser.isoparse(timestr)
|
||||||
return dt.strftime("%Y-%m-%d %H:%M:%S")
|
return dt.strftime("%Y-%m-%d %H:%M:%S")
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
print(f"[时间格式错误] {timestr} → {str(e)}")
|
logger.info(f"[时间格式错误] {timestr} → {str(e)}")
|
||||||
return "1970-01-01 00:00:00"
|
return "1970-01-01 00:00:00"
|
||||||
|
|
||||||
|
|
||||||
@ -114,8 +115,8 @@ def get_proxies(g):
|
|||||||
try:
|
try:
|
||||||
proxy_data = response.json()['data'][0]
|
proxy_data = response.json()['data'][0]
|
||||||
except Exception:
|
except Exception:
|
||||||
print(g)
|
logger.info(g)
|
||||||
print("数据返回解析错误!" + str(response.text))
|
logger.info("数据返回解析错误!" + str(response.text))
|
||||||
time.sleep(5)
|
time.sleep(5)
|
||||||
return get_proxies(g)
|
return get_proxies(g)
|
||||||
proxies_url = f"http://{proxy_data['username']}:{proxy_data['password']}@{proxy_data['ip']}:{proxy_data['port']}"
|
proxies_url = f"http://{proxy_data['username']}:{proxy_data['password']}@{proxy_data['ip']}:{proxy_data['port']}"
|
||||||
@ -145,7 +146,7 @@ def post_with_retry(url, proxy_name, json_payload=None, data=None, headers=None,
|
|||||||
)
|
)
|
||||||
if resp.status_code == 401 and not token_refreshed:
|
if resp.status_code == 401 and not token_refreshed:
|
||||||
if verbose:
|
if verbose:
|
||||||
print("[post_with_retry] 收到 401,刷新 token 后重试")
|
logger.info("[post_with_retry] 收到 401,刷新 token 后重试")
|
||||||
gettoken()
|
gettoken()
|
||||||
token_refreshed = True
|
token_refreshed = True
|
||||||
continue
|
continue
|
||||||
@ -155,22 +156,22 @@ def post_with_retry(url, proxy_name, json_payload=None, data=None, headers=None,
|
|||||||
|
|
||||||
except RequestException as e:
|
except RequestException as e:
|
||||||
if verbose:
|
if verbose:
|
||||||
print(f"[{attempt}/{retries}] 请求失败: {e}")
|
logger.info(f"[{attempt}/{retries}] 请求失败: {e}")
|
||||||
# 如果还没刷新过 token,就刷新一次
|
# 如果还没刷新过 token,就刷新一次
|
||||||
if not token_refreshed:
|
if not token_refreshed:
|
||||||
if verbose:
|
if verbose:
|
||||||
print("[post_with_retry] 刷新 token 后再试")
|
logger.info("[post_with_retry] 刷新 token 后再试")
|
||||||
gettoken(proxy_name)
|
gettoken(proxy_name)
|
||||||
token_refreshed = True
|
token_refreshed = True
|
||||||
continue
|
continue
|
||||||
if attempt == retries:
|
if attempt == retries:
|
||||||
if verbose:
|
if verbose:
|
||||||
print(f"[post_with_retry] 最终失败:{url}")
|
logger.info(f"[post_with_retry] 最终失败:{url}")
|
||||||
return None
|
return None
|
||||||
|
|
||||||
sleep_time = backoff_factor * (2 ** (attempt - 1))
|
sleep_time = backoff_factor * (2 ** (attempt - 1))
|
||||||
if verbose:
|
if verbose:
|
||||||
print(f"[post_with_retry] 等待 {sleep_time}s 后重试…")
|
logger.info(f"[post_with_retry] 等待 {sleep_time}s 后重试…")
|
||||||
time.sleep(sleep_time)
|
time.sleep(sleep_time)
|
||||||
|
|
||||||
|
|
||||||
@ -217,14 +218,14 @@ def gettoken(proxy, r=2):
|
|||||||
_headers_cache = copy_headers
|
_headers_cache = copy_headers
|
||||||
return copy_headers
|
return copy_headers
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
print("[gettoken] 失败:", e)
|
logger.info("[gettoken] 失败:", e)
|
||||||
if r > 0:
|
if r > 0:
|
||||||
time.sleep(5)
|
time.sleep(5)
|
||||||
return gettoken(proxy, r - 1)
|
return gettoken(proxy, r - 1)
|
||||||
else:
|
else:
|
||||||
with _cache_lock:
|
with _cache_lock:
|
||||||
if _headers_cache:
|
if _headers_cache:
|
||||||
print("[gettoken] 用缓存 headers 兜底")
|
logger.info("[gettoken] 用缓存 headers 兜底")
|
||||||
return copy.deepcopy(_headers_cache)
|
return copy.deepcopy(_headers_cache)
|
||||||
# 仍然没有 → 返回模板(没有 Auth)
|
# 仍然没有 → 返回模板(没有 Auth)
|
||||||
return copy.deepcopy(headers1)
|
return copy.deepcopy(headers1)
|
||||||
@ -232,7 +233,7 @@ def gettoken(proxy, r=2):
|
|||||||
|
|
||||||
def get_searchInfo(keyword, level, headers, proxy_name, r=2):
|
def get_searchInfo(keyword, level, headers, proxy_name, r=2):
|
||||||
if r == 2:
|
if r == 2:
|
||||||
print(f"NET处理->{keyword},\trn->{proxy_name},\tlevel->{level}")
|
logger.info(f"NET处理->{keyword},\trn->{proxy_name},\tlevel->{level}")
|
||||||
video_list = []
|
video_list = []
|
||||||
max_page = 2
|
max_page = 2
|
||||||
limit = 30
|
limit = 30
|
||||||
@ -600,16 +601,16 @@ def get_searchInfo(keyword, level, headers, proxy_name, r=2):
|
|||||||
|
|
||||||
if errors or stories is None: # 有错误 或 stories 为 null
|
if errors or stories is None: # 有错误 或 stories 为 null
|
||||||
if r == 0:
|
if r == 0:
|
||||||
print("连续 3 次错误或空结果:", json.dumps(jsondata, ensure_ascii=False))
|
logger.info("连续 3 次错误或空结果:", json.dumps(jsondata, ensure_ascii=False))
|
||||||
return None
|
return None
|
||||||
time.sleep((3 - r) * 5)
|
time.sleep((3 - r) * 5)
|
||||||
return get_searchInfo(keyword, level, headers, proxy_name, r - 1)
|
return get_searchInfo(keyword, level, headers, proxy_name, r - 1)
|
||||||
resinfo = stories["edges"]
|
resinfo = stories["edges"]
|
||||||
print("resinfo :", len(resinfo))
|
logger.info("resinfo :", len(resinfo))
|
||||||
except Exception:
|
except Exception:
|
||||||
if r < 0:
|
if r < 0:
|
||||||
print("[搜索接口] 未知:未处理", response.text)
|
logger.info("[搜索接口] 未知:未处理", response.text)
|
||||||
print("返回字段解析错误!")
|
logger.info("返回字段解析错误!")
|
||||||
return None
|
return None
|
||||||
else:
|
else:
|
||||||
time.sleep((3 - r) * 5)
|
time.sleep((3 - r) * 5)
|
||||||
@ -663,7 +664,7 @@ def search_worker(payload, kitem, flag):
|
|||||||
v_list = []
|
v_list = []
|
||||||
return True, flag, payload, kitem, v_list # 成功
|
return True, flag, payload, kitem, v_list # 成功
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
print(f"[线程异常] {kitem['keyword']} → {e}")
|
logger.info(f"[线程异常] {kitem['keyword']} → {e}")
|
||||||
traceback.print_exc()
|
traceback.print_exc()
|
||||||
return False, flag, payload, kitem, [] # 失败
|
return False, flag, payload, kitem, [] # 失败
|
||||||
|
|
||||||
@ -762,7 +763,7 @@ def parse_args() -> argparse.Namespace:
|
|||||||
if __name__ == '__main__':
|
if __name__ == '__main__':
|
||||||
parse_args()
|
parse_args()
|
||||||
start_time = datetime.datetime.now()
|
start_time = datetime.datetime.now()
|
||||||
print(f"开始时间:{start_time.strftime('%Y-%m-%d %H:%M:%S')}")
|
logger.info(f"开始时间:{start_time.strftime('%Y-%m-%d %H:%M:%S')}")
|
||||||
integrate_data_parallel()
|
integrate_data_parallel()
|
||||||
end_time = datetime.datetime.now()
|
end_time = datetime.datetime.now()
|
||||||
duration = end_time - start_time
|
duration = end_time - start_time
|
||||||
|
Loading…
x
Reference in New Issue
Block a user