feat: 修改 get_searchInfo 函数,添加代理参数以支持代理请求
This commit is contained in:
parent
91a1113d51
commit
82f134fe2f
72
main.py
72
main.py
@ -1,3 +1,4 @@
|
|||||||
|
import json
|
||||||
import random
|
import random
|
||||||
import traceback
|
import traceback
|
||||||
from urllib.parse import quote
|
from urllib.parse import quote
|
||||||
@ -227,7 +228,7 @@ def gettoken(proxy, r=2):
|
|||||||
return copy.deepcopy(headers1)
|
return copy.deepcopy(headers1)
|
||||||
|
|
||||||
|
|
||||||
def get_searchInfo(keyword, level, headers, proxy_name):
|
def get_searchInfo(keyword, level, headers, proxy_name, r=2):
|
||||||
video_list = []
|
video_list = []
|
||||||
max_page = 2
|
max_page = 2
|
||||||
limit = 30
|
limit = 30
|
||||||
@ -589,12 +590,25 @@ def get_searchInfo(keyword, level, headers, proxy_name):
|
|||||||
|
|
||||||
jsondata = response.json()
|
jsondata = response.json()
|
||||||
try:
|
try:
|
||||||
resinfo = jsondata['data']['search']['stories']['edges']
|
errors = jsondata.get("errors") # GraphQL errors 数组
|
||||||
print('resinfo :', len(resinfo))
|
stories = jsondata.get("data", {}).get("search", {}).get("stories")
|
||||||
|
|
||||||
|
if errors or stories is None: # 有错误 或 stories 为 null
|
||||||
|
if r == 0:
|
||||||
|
print("连续 3 次错误或空结果:", json.dumps(jsondata, ensure_ascii=False))
|
||||||
|
return None
|
||||||
|
time.sleep((3 - r) * 5)
|
||||||
|
return get_searchInfo(keyword, level, headers, proxy_name, r - 1)
|
||||||
|
resinfo = stories["edges"]
|
||||||
|
print("resinfo :", len(resinfo))
|
||||||
except Exception:
|
except Exception:
|
||||||
resinfo = []
|
if r < 0:
|
||||||
print("[搜索接口]", response.text)
|
print("[搜索接口] 未知:未处理", response.text)
|
||||||
print("返回字段解析错误!")
|
print("返回字段解析错误!")
|
||||||
|
return None
|
||||||
|
else:
|
||||||
|
time.sleep((3 - r) * 5)
|
||||||
|
return get_searchInfo(keyword, level, headers, proxy_name, r - 1)
|
||||||
for index, iteminfo in enumerate(resinfo):
|
for index, iteminfo in enumerate(resinfo):
|
||||||
calculated_index = index + 1 + (j - 1) * 100
|
calculated_index = index + 1 + (j - 1) * 100
|
||||||
node = iteminfo['node']
|
node = iteminfo['node']
|
||||||
@ -631,24 +645,26 @@ proxiesdict = db.get_proxy_agent_dict()
|
|||||||
def search_worker(payload, kitem, flag):
|
def search_worker(payload, kitem, flag):
|
||||||
try:
|
try:
|
||||||
gproxies = proxiesdict[kitem['rn']]
|
gproxies = proxiesdict[kitem['rn']]
|
||||||
header = gettoken(gproxies)
|
header = gettoken(gproxies)
|
||||||
v_list = get_searchInfo(kitem['keyword'], kitem['level'], header, gproxies)
|
v_list = get_searchInfo(kitem['keyword'], kitem['level'], header, gproxies)
|
||||||
if not v_list:
|
if not v_list:
|
||||||
for i in range(2):
|
for i in range(2):
|
||||||
time.sleep(i * 5)
|
time.sleep(i * 5)
|
||||||
v_list = get_searchInfo(kitem['keyword'], kitem['level'], header)
|
v_list = get_searchInfo(kitem['keyword'], kitem['level'], header, gproxies)
|
||||||
if v_list:
|
if v_list:
|
||||||
break
|
break
|
||||||
time.sleep(2)
|
time.sleep(2)
|
||||||
|
|
||||||
return True, flag, payload, kitem, v_list # 成功
|
return True, flag, payload, kitem, v_list # 成功
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
print(f"[线程异常] {kitem['keyword']} → {e}")
|
print(f"[线程异常] {kitem['keyword']} → {e}")
|
||||||
traceback.print_exc()
|
traceback.print_exc()
|
||||||
return False, flag, payload, kitem, [] # 失败
|
return False, flag, payload, kitem, [] # 失败
|
||||||
|
|
||||||
|
|
||||||
executor = concurrent.futures.ThreadPoolExecutor(MAX_WORKERS)
|
executor = concurrent.futures.ThreadPoolExecutor(MAX_WORKERS)
|
||||||
|
|
||||||
|
|
||||||
def integrate_data_parallel():
|
def integrate_data_parallel():
|
||||||
while True:
|
while True:
|
||||||
tasks, flag = db.item_keyword()
|
tasks, flag = db.item_keyword()
|
||||||
@ -675,26 +691,26 @@ def integrate_data_parallel():
|
|||||||
for item in v_list:
|
for item in v_list:
|
||||||
record = {
|
record = {
|
||||||
"keyword": kitem["keyword"],
|
"keyword": kitem["keyword"],
|
||||||
"v_name": kitem["v_name"],
|
"v_name": kitem["v_name"],
|
||||||
"v_id": item["v_id"],
|
"v_id": item["v_id"],
|
||||||
"v_xid": item["v_xid"],
|
"v_xid": item["v_xid"],
|
||||||
"link": item["link"],
|
"link": item["link"],
|
||||||
"title": item["title"],
|
"title": item["title"],
|
||||||
"duration": format_duration(item["duration"]),
|
"duration": format_duration(item["duration"]),
|
||||||
"fans": clean_dash_to_zero(item["fans"]),
|
"fans": clean_dash_to_zero(item["fans"]),
|
||||||
"videos": clean_dash_to_zero(item["videos"]),
|
"videos": clean_dash_to_zero(item["videos"]),
|
||||||
"watch_number": clean_dash_to_zero(item["view"]),
|
"watch_number": clean_dash_to_zero(item["view"]),
|
||||||
"create_time": format_create_time(item["createtime"]),
|
"create_time": format_create_time(item["createtime"]),
|
||||||
"cover_pic": item["pic"],
|
"cover_pic": item["pic"],
|
||||||
"index": item["index"],
|
"index": item["index"],
|
||||||
"u_id": item["u_id"],
|
"u_id": item["u_id"],
|
||||||
"u_xid": item["u_xid"],
|
"u_xid": item["u_xid"],
|
||||||
"u_name": item["u_name"],
|
"u_name": item["u_name"],
|
||||||
"u_pic": item["u_pic"],
|
"u_pic": item["u_pic"],
|
||||||
"rn": kitem["rn"],
|
"rn": kitem["rn"],
|
||||||
"batch": kitem["batch"],
|
"batch": kitem["batch"],
|
||||||
"machine_id": MACHINE_ID,
|
"machine_id": MACHINE_ID,
|
||||||
"level": kitem["level"],
|
"level": kitem["level"],
|
||||||
}
|
}
|
||||||
db.upsert_video(record)
|
db.upsert_video(record)
|
||||||
db.flush()
|
db.flush()
|
||||||
|
Loading…
x
Reference in New Issue
Block a user