feat: 修改 get_searchInfo 函数,添加代理参数以支持代理请求

This commit is contained in:
晓丰 2025-05-20 23:41:26 +08:00
parent 91a1113d51
commit 82f134fe2f

72
main.py
View File

@ -1,3 +1,4 @@
import json
import random import random
import traceback import traceback
from urllib.parse import quote from urllib.parse import quote
@ -227,7 +228,7 @@ def gettoken(proxy, r=2):
return copy.deepcopy(headers1) return copy.deepcopy(headers1)
def get_searchInfo(keyword, level, headers, proxy_name): def get_searchInfo(keyword, level, headers, proxy_name, r=2):
video_list = [] video_list = []
max_page = 2 max_page = 2
limit = 30 limit = 30
@ -589,12 +590,25 @@ def get_searchInfo(keyword, level, headers, proxy_name):
jsondata = response.json() jsondata = response.json()
try: try:
resinfo = jsondata['data']['search']['stories']['edges'] errors = jsondata.get("errors") # GraphQL errors 数组
print('resinfo :', len(resinfo)) stories = jsondata.get("data", {}).get("search", {}).get("stories")
if errors or stories is None: # 有错误 或 stories 为 null
if r == 0:
print("连续 3 次错误或空结果:", json.dumps(jsondata, ensure_ascii=False))
return None
time.sleep((3 - r) * 5)
return get_searchInfo(keyword, level, headers, proxy_name, r - 1)
resinfo = stories["edges"]
print("resinfo :", len(resinfo))
except Exception: except Exception:
resinfo = [] if r < 0:
print("[搜索接口]", response.text) print("[搜索接口] 未知:未处理", response.text)
print("返回字段解析错误!") print("返回字段解析错误!")
return None
else:
time.sleep((3 - r) * 5)
return get_searchInfo(keyword, level, headers, proxy_name, r - 1)
for index, iteminfo in enumerate(resinfo): for index, iteminfo in enumerate(resinfo):
calculated_index = index + 1 + (j - 1) * 100 calculated_index = index + 1 + (j - 1) * 100
node = iteminfo['node'] node = iteminfo['node']
@ -631,24 +645,26 @@ proxiesdict = db.get_proxy_agent_dict()
def search_worker(payload, kitem, flag): def search_worker(payload, kitem, flag):
try: try:
gproxies = proxiesdict[kitem['rn']] gproxies = proxiesdict[kitem['rn']]
header = gettoken(gproxies) header = gettoken(gproxies)
v_list = get_searchInfo(kitem['keyword'], kitem['level'], header, gproxies) v_list = get_searchInfo(kitem['keyword'], kitem['level'], header, gproxies)
if not v_list: if not v_list:
for i in range(2): for i in range(2):
time.sleep(i * 5) time.sleep(i * 5)
v_list = get_searchInfo(kitem['keyword'], kitem['level'], header) v_list = get_searchInfo(kitem['keyword'], kitem['level'], header, gproxies)
if v_list: if v_list:
break break
time.sleep(2) time.sleep(2)
return True, flag, payload, kitem, v_list # 成功 return True, flag, payload, kitem, v_list # 成功
except Exception as e: except Exception as e:
print(f"[线程异常] {kitem['keyword']}{e}") print(f"[线程异常] {kitem['keyword']}{e}")
traceback.print_exc() traceback.print_exc()
return False, flag, payload, kitem, [] # 失败 return False, flag, payload, kitem, [] # 失败
executor = concurrent.futures.ThreadPoolExecutor(MAX_WORKERS) executor = concurrent.futures.ThreadPoolExecutor(MAX_WORKERS)
def integrate_data_parallel(): def integrate_data_parallel():
while True: while True:
tasks, flag = db.item_keyword() tasks, flag = db.item_keyword()
@ -675,26 +691,26 @@ def integrate_data_parallel():
for item in v_list: for item in v_list:
record = { record = {
"keyword": kitem["keyword"], "keyword": kitem["keyword"],
"v_name": kitem["v_name"], "v_name": kitem["v_name"],
"v_id": item["v_id"], "v_id": item["v_id"],
"v_xid": item["v_xid"], "v_xid": item["v_xid"],
"link": item["link"], "link": item["link"],
"title": item["title"], "title": item["title"],
"duration": format_duration(item["duration"]), "duration": format_duration(item["duration"]),
"fans": clean_dash_to_zero(item["fans"]), "fans": clean_dash_to_zero(item["fans"]),
"videos": clean_dash_to_zero(item["videos"]), "videos": clean_dash_to_zero(item["videos"]),
"watch_number": clean_dash_to_zero(item["view"]), "watch_number": clean_dash_to_zero(item["view"]),
"create_time": format_create_time(item["createtime"]), "create_time": format_create_time(item["createtime"]),
"cover_pic": item["pic"], "cover_pic": item["pic"],
"index": item["index"], "index": item["index"],
"u_id": item["u_id"], "u_id": item["u_id"],
"u_xid": item["u_xid"], "u_xid": item["u_xid"],
"u_name": item["u_name"], "u_name": item["u_name"],
"u_pic": item["u_pic"], "u_pic": item["u_pic"],
"rn": kitem["rn"], "rn": kitem["rn"],
"batch": kitem["batch"], "batch": kitem["batch"],
"machine_id": MACHINE_ID, "machine_id": MACHINE_ID,
"level": kitem["level"], "level": kitem["level"],
} }
db.upsert_video(record) db.upsert_video(record)
db.flush() db.flush()