fix: 修改搜索信息获取逻辑,增加参数以支持不同级别的请求和视频时长过滤

This commit is contained in:
晓丰 2025-05-17 20:06:15 +08:00
parent 69137dd128
commit 84c6f3afd9

40
main.py
View File

@ -6,6 +6,8 @@ import uuid
import concurrent.futures
import requests
import datetime
from mpmath import limit
from requests import RequestException
from DB import DBVidcon
from dateutil import parser as date_parser
@ -109,7 +111,7 @@ def get_proxies(g):
proxy_data = response.json()['data'][0]
except Exception:
print(g)
print("数据返回解析错误!"+ str(response.text))
print("数据返回解析错误!" + str(response.text))
time.sleep(5)
return get_proxies(g)
proxies_url = f"http://{proxy_data['username']}:{proxy_data['password']}@{proxy_data['ip']}:{proxy_data['port']}"
@ -188,7 +190,7 @@ def gettoken():
u = uuid.uuid4()
uuid_with_dash = str(u)
uuid_no_dash = u.hex
traffic_segment = str(random.randint(10**8, 10**9 - 1))
traffic_segment = str(random.randint(10 ** 8, 10 ** 9 - 1))
data = {
'client_id': 'f1a362d288c1b98099c7',
'client_secret': 'eea605b96e01c796ff369935357eca920c5da4c5',
@ -209,9 +211,14 @@ def gettoken():
pass
def get_searchInfo(keyword):
def get_searchInfo(keyword, level):
video_list = []
for j in range(1, 3):
max_page = 2
limit = 30
if level == 1:
max_page = 3
limit = 100
for j in range(1, max_page):
# 别展开 = = !
data = {
"operationName": "SEARCH_QUERY",
@ -224,7 +231,7 @@ def get_searchInfo(keyword):
"shouldIncludeVideos": False,
"shouldIncludeLives": False,
"page": j,
"limit": 100,
"limit": limit,
"recaptchaToken": None
},
"query": """
@ -580,12 +587,14 @@ def get_searchInfo(keyword):
if node['__typename'] != "Video":
continue
creator = node['creator']
video_tasks.append({
"index": calculated_index,
"xid": node.get('xid'),
"node": node,
"creator": creator,
})
duration = node.get('duration')
if duration > 300:
video_tasks.append({
"index": calculated_index,
"xid": node.get('xid'),
"node": node,
"creator": creator,
})
def safe_fetch(task, max_try=2):
attempt = 0
@ -704,16 +713,15 @@ def integrate_data():
if not v_list:
for i in range(3):
time.sleep(i * 5)
v_list = get_searchInfo(kitem["keyword"])
v_list = get_searchInfo(kitem["keyword"], kitem['level'])
if v_list:
break
time.sleep(2)
for item in v_list:
record = {
"keyword": kitem.get("keyword"),
"v_name" : kitem.get("v_name"),
"v_name": kitem.get("v_name"),
"v_id": item.get("v_id"),
"v_xid": item.get("v_xid"),
"link": item.get("link"),
@ -748,6 +756,7 @@ def integrate_data():
time.sleep(5)
break
def parse_args() -> argparse.Namespace:
global MACHINE_ID, MAX_WORKERS
@ -778,10 +787,11 @@ def parse_args() -> argparse.Namespace:
raise ValueError("请指定机器编号")
return args
if __name__ == '__main__':
parse_args()
start_time = datetime.datetime.now()
print(f"开始时间:{start_time.strftime('%Y-%m-%d %H:%M:%S')}")
integrate_data()
end_time = datetime.datetime.now()
duration = end_time - start_time
duration = end_time - start_time