refactor: 清理无用代码并优化代理处理逻辑
This commit is contained in:
parent
db440b420a
commit
6d8dca5e24
203
main.py
203
main.py
@ -16,26 +16,11 @@ import copy
|
||||
from threading import Lock
|
||||
from concurrent.futures import ThreadPoolExecutor, as_completed
|
||||
from logger import logger
|
||||
import os
|
||||
import urllib3
|
||||
|
||||
|
||||
db = DBVidcon()
|
||||
MACHINE_ID = None
|
||||
MAX_WORKERS = 10
|
||||
executor = ThreadPoolExecutor(max_workers=MAX_WORKERS)
|
||||
UserAgent = [
|
||||
'User-Agent,Mozilla/5.0 (Windows; U; Windows NT 6.1; en-us) AppleWebKit/534.50 (KHTML, like Gecko) Version/5.1 Safari/534.50',
|
||||
'User-Agent,Mozilla/5.0 (Windows NT 6.1; rv,2.0.1) Gecko/20100101 Firefox/4.0.1',
|
||||
'User-Agent,Opera/9.80 (Windows NT 6.1; U; en) Presto/2.8.131 Version/11.11',
|
||||
'User-Agent, Mozilla/4.0 (compatible; MSIE 7.0; Windows NT 5.1; Trident/4.0; SE 2.X MetaSr 1.0; SE 2.X MetaSr 1.0; .NET CLR 2.0.50727; SE 2.X MetaSr 1.0)',
|
||||
'User-Agent, Mozilla/4.0 (compatible; MSIE 7.0; Windows NT 5.1; 360SE)',
|
||||
'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/136.0.0.0 Safari/537.36',
|
||||
'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/128.0.0.0 Safari/537.36',
|
||||
'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/53.0.2785.104 Safari/537.36 Core/1.53.2669.400 QQBrowser/9.6.10990.400',
|
||||
'User-Agent, Mozilla/4.0 (compatible; MSIE 7.0; Windows NT 5.1; Maxthon 2.0)',
|
||||
'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Maxthon/5.0.3.4000 Chrome/47.0.2526.73 Safari/537.36',
|
||||
'User-Agent, Mozilla/4.0 (compatible; MSIE 7.0; Windows NT 5.1; The World)']
|
||||
|
||||
|
||||
def get_part_ids(part_num: int, take: int, offset: int = 0):
|
||||
@ -77,178 +62,6 @@ def format_duration(seconds):
|
||||
return "00:00"
|
||||
|
||||
|
||||
headers1 = {
|
||||
'Accept': '*/*, */*',
|
||||
# 'Accept-Encoding': 'gzip, deflate, br',
|
||||
'Cache-Control': 'no-cache',
|
||||
'Connection': 'keep-alive',
|
||||
# 'Content-Length': '8512',
|
||||
'Content-Type': 'application/json, application/json',
|
||||
'Host': 'graphql.api.dailymotion.com',
|
||||
'Origin': 'https://www.dailymotion.com',
|
||||
'Referer': 'https://www.dailymotion.com/',
|
||||
'Sec-Fetch-Dest': 'empty',
|
||||
'Sec-Fetch-Mode': 'cors',
|
||||
'Sec-Fetch-Site': 'same-site',
|
||||
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/128.0.0.0 Safari/537.36',
|
||||
'X-DM-AppInfo-Id': 'com.dailymotion.neon',
|
||||
'X-DM-AppInfo-Type': 'website',
|
||||
'X-DM-AppInfo-Version': 'v2025-05-26T13:45:05.666Z',
|
||||
'X-DM-Neon-SSR': '0',
|
||||
'X-DM-Preferred-Country': 'tw',
|
||||
'accept-language': 'zh-CN',
|
||||
'authorization': 'Bearer eyJ0eXAiOiJKV1QiLCJhbGciOiJIUzI1NiJ9.eyJhaWQiOiJmMWEzNjJkMjg4YzFiOTgwOTljNyIsInJvbCI6ImNhbi1tYW5hZ2UtcGFydG5lcnMtcmVwb3J0cyBjYW4tcmVhZC12aWRlby1zdHJlYW1zIGNhbi1zcG9vZi1jb3VudHJ5IGNhbi1hZG9wdC11c2VycyBjYW4tcmVhZC1jbGFpbS1ydWxlcyBjYW4tbWFuYWdlLWNsYWltLXJ1bGVzIGNhbi1tYW5hZ2UtdXNlci1hbmFseXRpY3MgY2FuLXJlYWQtbXktdmlkZW8tc3RyZWFtcyBjYW4tZG93bmxvYWQtbXktdmlkZW9zIGFjdC1hcyBhbGxzY29wZXMgYWNjb3VudC1jcmVhdG9yIGNhbi1yZWFkLWFwcGxpY2F0aW9ucyIsInNjbyI6InJlYWQgd3JpdGUgZGVsZXRlIGVtYWlsIHVzZXJpbmZvIGZlZWQgbWFuYWdlX3ZpZGVvcyBtYW5hZ2VfY29tbWVudHMgbWFuYWdlX3BsYXlsaXN0cyBtYW5hZ2VfdGlsZXMgbWFuYWdlX3N1YnNjcmlwdGlvbnMgbWFuYWdlX2ZyaWVuZHMgbWFuYWdlX2Zhdm9yaXRlcyBtYW5hZ2VfbGlrZXMgbWFuYWdlX2dyb3VwcyBtYW5hZ2VfcmVjb3JkcyBtYW5hZ2Vfc3VidGl0bGVzIG1hbmFnZV9mZWF0dXJlcyBtYW5hZ2VfaGlzdG9yeSBpZnR0dCByZWFkX2luc2lnaHRzIG1hbmFnZV9jbGFpbV9ydWxlcyBkZWxlZ2F0ZV9hY2NvdW50X21hbmFnZW1lbnQgbWFuYWdlX2FuYWx5dGljcyBtYW5hZ2VfcGxheWVyIG1hbmFnZV9wbGF5ZXJzIG1hbmFnZV91c2VyX3NldHRpbmdzIG1hbmFnZV9jb2xsZWN0aW9ucyBtYW5hZ2VfYXBwX2Nvbm5lY3Rpb25zIG1hbmFnZV9hcHBsaWNhdGlvbnMgbWFuYWdlX2RvbWFpbnMgbWFuYWdlX3BvZGNhc3RzIiwibHRvIjoiY0c1Z1RocGRBbFIwVEVZeVhEVWNBMnNDTDFrUFFncDNRUTBNS3ciLCJhaW4iOjEsImFkZyI6MSwiaWF0IjoxNzQ4NTI0MDU5LCJleHAiOjE3NDg1NjAwMDcsImRtdiI6IjEiLCJhdHAiOiJicm93c2VyIiwiYWRhIjoid3d3LmRhaWx5bW90aW9uLmNvbSIsInZpZCI6IjY0NjMzRDAzMDY1RjQxODZBRDBCMDI3Q0Y3OTVFRjBGIiwiZnRzIjo5MTE0MSwiY2FkIjoyLCJjeHAiOjIsImNhdSI6Miwia2lkIjoiQUY4NDlERDczQTU4NjNDRDdEOTdEMEJBQjA3MjI0M0IifQ.h27sfMMETgt0xKhQvFAGIpwInouNj2sFLOeb1Y74Orc',
|
||||
'sec-ch-ua': '"Chromium";v="128", "Not;A=Brand";v="24", "Google Chrome";v="128"',
|
||||
'sec-ch-ua-mobile': '?0',
|
||||
'sec-ch-ua-platform': '"Windows"',
|
||||
'x-dm-visit-id': '1748480937099',
|
||||
'x-dm-visitor-id': '1032a5f1-d07f-4bef-b96d-7783939abfc9',
|
||||
}
|
||||
_headers_cache = None # 保存最近一次成功的 headers
|
||||
_cache_lock = Lock()
|
||||
Gproxies = None
|
||||
|
||||
|
||||
def get_proxies(g):
|
||||
url = "https://www.kookeey.com/pickdynamicips"
|
||||
params = {
|
||||
"auth": "pwd",
|
||||
"format": "1",
|
||||
"n": "1",
|
||||
"p": "http",
|
||||
"gate": "sea",
|
||||
"g": g,
|
||||
"r": "0",
|
||||
"type": "json",
|
||||
"sign": "10099426b05c7119e9c4dbd6a7a0aa4e",
|
||||
"accessid": "2207189",
|
||||
"dl": ","
|
||||
}
|
||||
try:
|
||||
response = requests.get(url, params=params)
|
||||
except RequestException:
|
||||
return get_proxies(g)
|
||||
try:
|
||||
proxy_data = response.json()['data'][0]
|
||||
except Exception:
|
||||
logger.exception(g)
|
||||
logger.exception("数据返回解析错误!" + str(response.text))
|
||||
time.sleep(5)
|
||||
return get_proxies(g)
|
||||
proxies_url = f"http://{proxy_data['username']}:{proxy_data['password']}@{proxy_data['ip']}:{proxy_data['port']}"
|
||||
proxies = {
|
||||
"http": proxies_url,
|
||||
"https": proxies_url,
|
||||
}
|
||||
return proxies
|
||||
|
||||
|
||||
def post_with_retry(url, proxy_name, json_payload=None, data=None, headers=None,
|
||||
retries=5, timeout=10, backoff_factor=2, verbose=True):
|
||||
token_refreshed = False
|
||||
for attempt in range(1, retries + 1):
|
||||
try:
|
||||
proxy_str = db.get_proxy(proxy_name)
|
||||
|
||||
proxies = {"http": proxy_str, "https": proxy_str}
|
||||
|
||||
resp = requests.post(
|
||||
url,
|
||||
json=json_payload,
|
||||
data=data,
|
||||
headers=headers,
|
||||
proxies=proxies,
|
||||
timeout=timeout,
|
||||
)
|
||||
if resp.status_code == 401 and not token_refreshed:
|
||||
if verbose:
|
||||
logger.info("[post_with_retry] 收到 401,刷新 token 后重试")
|
||||
gettoken()
|
||||
token_refreshed = True
|
||||
continue
|
||||
|
||||
resp.raise_for_status()
|
||||
return resp
|
||||
|
||||
except RequestException as e:
|
||||
if verbose:
|
||||
logger.info(f"[{attempt}/{retries}] 请求失败: {e}")
|
||||
# 如果还没刷新过 token,就刷新一次
|
||||
if not token_refreshed:
|
||||
if verbose:
|
||||
logger.info("[post_with_retry] 刷新 token 后再试")
|
||||
gettoken(proxy_name)
|
||||
token_refreshed = True
|
||||
continue
|
||||
if attempt == retries:
|
||||
if verbose:
|
||||
logger.info(f"[post_with_retry] 最终失败:{url}")
|
||||
return None
|
||||
|
||||
sleep_time = backoff_factor * (2 ** (attempt - 1))
|
||||
if verbose:
|
||||
logger.info(f"[post_with_retry] 等待 {sleep_time}s 后重试…")
|
||||
time.sleep(sleep_time)
|
||||
|
||||
|
||||
def gettoken(proxy, r=2):
|
||||
global _headers_cache
|
||||
headers = {
|
||||
'Accept': '*/*',
|
||||
'Accept-Language': 'zh-CN,zh;q=0.9,en;q=0.8,en-GB;q=0.7,en-US;q=0.6',
|
||||
'Cache-Control': 'no-cache',
|
||||
'Connection': 'keep-alive',
|
||||
'Content-Type': 'application/x-www-form-urlencoded',
|
||||
'Origin': 'https://www.dailymotion.com',
|
||||
'Pragma': 'no-cache',
|
||||
'Referer': 'https://www.dailymotion.com/',
|
||||
'Sec-Fetch-Dest': 'empty',
|
||||
'Sec-Fetch-Mode': 'cors',
|
||||
'Sec-Fetch-Site': 'same-site',
|
||||
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/136.0.0.0 Safari/537.36',
|
||||
'sec-ch-ua': '"Chromium";v="136", "Google Chrome";v="136", "Not.A/Brand";v="99"',
|
||||
'sec-ch-ua-mobile': '?0',
|
||||
'sec-ch-ua-platform': '"Windows"',
|
||||
}
|
||||
u = uuid.uuid4()
|
||||
uuid_with_dash = str(u)
|
||||
# uuid_no_dash = u.hex
|
||||
traffic_segment = str(random.randint(100_000, 999_999))
|
||||
data = {
|
||||
'client_id': 'f1a362d288c1b98099c7',
|
||||
'client_secret': 'eea605b96e01c796ff369935357eca920c5da4c5',
|
||||
'grant_type': 'client_credentials',
|
||||
'traffic_segment': traffic_segment,
|
||||
'visitor_id': uuid_with_dash,
|
||||
}
|
||||
try:
|
||||
proxy_str = db.get_proxy(proxy)
|
||||
url = 'https://graphql.api.dailymotion.com/oauth/token'
|
||||
response = requests.post(url, headers=headers, data=data, proxies={"http": proxy_str, "https": proxy_str})
|
||||
token = response.json()['access_token']
|
||||
copy_headers = copy.deepcopy(headers1)
|
||||
copy_headers['authorization'] = "Bearer " + token
|
||||
copy_headers['x-dm-visit-id'] = str(int(time.time() * 1000))
|
||||
copy_headers['x-dm-visitor-id'] = uuid_with_dash
|
||||
copy_headers['User-Agent'] = UserAgent[random.randint(0, len(UserAgent) - 1)]
|
||||
copy_headers['X-DM-Preferred-Country'] = proxy.lower()
|
||||
with _cache_lock:
|
||||
_headers_cache = copy_headers
|
||||
return copy_headers
|
||||
except Exception as e:
|
||||
logger.exception("[gettoken] 失败:", e)
|
||||
if r > 0:
|
||||
time.sleep(5)
|
||||
return gettoken(proxy, r - 1)
|
||||
else:
|
||||
with _cache_lock:
|
||||
if _headers_cache:
|
||||
logger.info("[gettoken] 用缓存 headers 兜底")
|
||||
return copy.deepcopy(_headers_cache)
|
||||
# 仍然没有 → 返回模板(没有 Auth)
|
||||
return copy.deepcopy(headers1)
|
||||
|
||||
|
||||
def get_searchInfo(keyword, level, headers, proxy_name, r=2):
|
||||
if r == 2:
|
||||
logger.info(f"NET处理->{keyword},\trn->{proxy_name},\tlevel->{level}")
|
||||
@ -270,13 +83,13 @@ def get_searchInfo(keyword, level, headers, proxy_name, r=2):
|
||||
proxy_string = db.get_proxy(proxy_name)
|
||||
logger.info(f"代理: {proxy_string}")
|
||||
proxies = {
|
||||
'http':proxy_string,
|
||||
'https':proxy_string,
|
||||
'http': proxy_string,
|
||||
'https': proxy_string,
|
||||
}
|
||||
response = requests.get(endpoint, params=params, proxies=proxies)
|
||||
jsondata = response.json()
|
||||
try:
|
||||
resinfo=jsondata.get("list")
|
||||
resinfo = jsondata.get("list")
|
||||
except Exception:
|
||||
if r < 0:
|
||||
logger.exception("[搜索接口] 未知:未处理", response.text)
|
||||
@ -322,12 +135,11 @@ proxiesdict = db.get_proxy_agent_dict()
|
||||
def search_worker(payload, kitem, flag):
|
||||
try:
|
||||
gproxies = proxiesdict[kitem['rn']]
|
||||
header = gettoken(gproxies)
|
||||
v_list = get_searchInfo(kitem['keyword'], kitem['level'], header, gproxies)
|
||||
v_list = get_searchInfo(kitem['keyword'], kitem['level'], None, gproxies)
|
||||
if not v_list:
|
||||
for i in range(2):
|
||||
time.sleep(i * 5)
|
||||
v_list = get_searchInfo(kitem['keyword'], kitem['level'], header, gproxies)
|
||||
v_list = get_searchInfo(kitem['keyword'], kitem['level'], None, gproxies)
|
||||
if v_list:
|
||||
break
|
||||
time.sleep(2)
|
||||
@ -340,9 +152,6 @@ def search_worker(payload, kitem, flag):
|
||||
return False, flag, payload, kitem, [] # 失败
|
||||
|
||||
|
||||
executor = concurrent.futures.ThreadPoolExecutor(MAX_WORKERS)
|
||||
|
||||
|
||||
def integrate_data_parallel():
|
||||
while True:
|
||||
global proxiesdict
|
||||
@ -401,6 +210,7 @@ def integrate_data_parallel():
|
||||
db.rollback_l2(rollback[2])
|
||||
time.sleep(10)
|
||||
|
||||
|
||||
def parse_args() -> argparse.Namespace:
|
||||
global MACHINE_ID, MAX_WORKERS
|
||||
|
||||
@ -434,6 +244,7 @@ def parse_args() -> argparse.Namespace:
|
||||
|
||||
if __name__ == '__main__':
|
||||
parse_args()
|
||||
executor = ThreadPoolExecutor(max_workers=MAX_WORKERS)
|
||||
start_time = datetime.datetime.now()
|
||||
logger.info(f"开始时间:{start_time.strftime('%Y-%m-%d %H:%M:%S')}")
|
||||
integrate_data_parallel()
|
||||
|
Loading…
x
Reference in New Issue
Block a user