fix: 修改搜索信息获取逻辑,增加参数以支持不同级别的请求和视频时长过滤
This commit is contained in:
parent
69137dd128
commit
84c6f3afd9
20
main.py
20
main.py
@ -6,6 +6,8 @@ import uuid
|
||||
import concurrent.futures
|
||||
import requests
|
||||
import datetime
|
||||
|
||||
from mpmath import limit
|
||||
from requests import RequestException
|
||||
from DB import DBVidcon
|
||||
from dateutil import parser as date_parser
|
||||
@ -209,9 +211,14 @@ def gettoken():
|
||||
pass
|
||||
|
||||
|
||||
def get_searchInfo(keyword):
|
||||
def get_searchInfo(keyword, level):
|
||||
video_list = []
|
||||
for j in range(1, 3):
|
||||
max_page = 2
|
||||
limit = 30
|
||||
if level == 1:
|
||||
max_page = 3
|
||||
limit = 100
|
||||
for j in range(1, max_page):
|
||||
# 别展开 = = !
|
||||
data = {
|
||||
"operationName": "SEARCH_QUERY",
|
||||
@ -224,7 +231,7 @@ def get_searchInfo(keyword):
|
||||
"shouldIncludeVideos": False,
|
||||
"shouldIncludeLives": False,
|
||||
"page": j,
|
||||
"limit": 100,
|
||||
"limit": limit,
|
||||
"recaptchaToken": None
|
||||
},
|
||||
"query": """
|
||||
@ -580,6 +587,8 @@ def get_searchInfo(keyword):
|
||||
if node['__typename'] != "Video":
|
||||
continue
|
||||
creator = node['creator']
|
||||
duration = node.get('duration')
|
||||
if duration > 300:
|
||||
video_tasks.append({
|
||||
"index": calculated_index,
|
||||
"xid": node.get('xid'),
|
||||
@ -704,12 +713,11 @@ def integrate_data():
|
||||
if not v_list:
|
||||
for i in range(3):
|
||||
time.sleep(i * 5)
|
||||
v_list = get_searchInfo(kitem["keyword"])
|
||||
v_list = get_searchInfo(kitem["keyword"], kitem['level'])
|
||||
if v_list:
|
||||
break
|
||||
time.sleep(2)
|
||||
|
||||
|
||||
for item in v_list:
|
||||
record = {
|
||||
"keyword": kitem.get("keyword"),
|
||||
@ -748,6 +756,7 @@ def integrate_data():
|
||||
time.sleep(5)
|
||||
break
|
||||
|
||||
|
||||
def parse_args() -> argparse.Namespace:
|
||||
global MACHINE_ID, MAX_WORKERS
|
||||
|
||||
@ -778,6 +787,7 @@ def parse_args() -> argparse.Namespace:
|
||||
raise ValueError("请指定机器编号")
|
||||
return args
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
parse_args()
|
||||
start_time = datetime.datetime.now()
|
||||
|
Loading…
x
Reference in New Issue
Block a user