fix: 修改搜索信息获取逻辑,增加参数以支持不同级别的请求和视频时长过滤

This commit is contained in:
晓丰 2025-05-17 20:06:15 +08:00
parent 69137dd128
commit 84c6f3afd9

20
main.py
View File

@ -6,6 +6,8 @@ import uuid
import concurrent.futures import concurrent.futures
import requests import requests
import datetime import datetime
from mpmath import limit
from requests import RequestException from requests import RequestException
from DB import DBVidcon from DB import DBVidcon
from dateutil import parser as date_parser from dateutil import parser as date_parser
@ -209,9 +211,14 @@ def gettoken():
pass pass
def get_searchInfo(keyword): def get_searchInfo(keyword, level):
video_list = [] video_list = []
for j in range(1, 3): max_page = 2
limit = 30
if level == 1:
max_page = 3
limit = 100
for j in range(1, max_page):
# 别展开 = = ! # 别展开 = = !
data = { data = {
"operationName": "SEARCH_QUERY", "operationName": "SEARCH_QUERY",
@ -224,7 +231,7 @@ def get_searchInfo(keyword):
"shouldIncludeVideos": False, "shouldIncludeVideos": False,
"shouldIncludeLives": False, "shouldIncludeLives": False,
"page": j, "page": j,
"limit": 100, "limit": limit,
"recaptchaToken": None "recaptchaToken": None
}, },
"query": """ "query": """
@ -580,6 +587,8 @@ def get_searchInfo(keyword):
if node['__typename'] != "Video": if node['__typename'] != "Video":
continue continue
creator = node['creator'] creator = node['creator']
duration = node.get('duration')
if duration > 300:
video_tasks.append({ video_tasks.append({
"index": calculated_index, "index": calculated_index,
"xid": node.get('xid'), "xid": node.get('xid'),
@ -704,12 +713,11 @@ def integrate_data():
if not v_list: if not v_list:
for i in range(3): for i in range(3):
time.sleep(i * 5) time.sleep(i * 5)
v_list = get_searchInfo(kitem["keyword"]) v_list = get_searchInfo(kitem["keyword"], kitem['level'])
if v_list: if v_list:
break break
time.sleep(2) time.sleep(2)
for item in v_list: for item in v_list:
record = { record = {
"keyword": kitem.get("keyword"), "keyword": kitem.get("keyword"),
@ -748,6 +756,7 @@ def integrate_data():
time.sleep(5) time.sleep(5)
break break
def parse_args() -> argparse.Namespace: def parse_args() -> argparse.Namespace:
global MACHINE_ID, MAX_WORKERS global MACHINE_ID, MAX_WORKERS
@ -778,6 +787,7 @@ def parse_args() -> argparse.Namespace:
raise ValueError("请指定机器编号") raise ValueError("请指定机器编号")
return args return args
if __name__ == '__main__': if __name__ == '__main__':
parse_args() parse_args()
start_time = datetime.datetime.now() start_time = datetime.datetime.now()