fix: 修改搜索信息获取逻辑,增加参数以支持不同级别的请求和视频时长过滤
This commit is contained in:
parent
69137dd128
commit
84c6f3afd9
20
main.py
20
main.py
@ -6,6 +6,8 @@ import uuid
|
|||||||
import concurrent.futures
|
import concurrent.futures
|
||||||
import requests
|
import requests
|
||||||
import datetime
|
import datetime
|
||||||
|
|
||||||
|
from mpmath import limit
|
||||||
from requests import RequestException
|
from requests import RequestException
|
||||||
from DB import DBVidcon
|
from DB import DBVidcon
|
||||||
from dateutil import parser as date_parser
|
from dateutil import parser as date_parser
|
||||||
@ -209,9 +211,14 @@ def gettoken():
|
|||||||
pass
|
pass
|
||||||
|
|
||||||
|
|
||||||
def get_searchInfo(keyword):
|
def get_searchInfo(keyword, level):
|
||||||
video_list = []
|
video_list = []
|
||||||
for j in range(1, 3):
|
max_page = 2
|
||||||
|
limit = 30
|
||||||
|
if level == 1:
|
||||||
|
max_page = 3
|
||||||
|
limit = 100
|
||||||
|
for j in range(1, max_page):
|
||||||
# 别展开 = = !
|
# 别展开 = = !
|
||||||
data = {
|
data = {
|
||||||
"operationName": "SEARCH_QUERY",
|
"operationName": "SEARCH_QUERY",
|
||||||
@ -224,7 +231,7 @@ def get_searchInfo(keyword):
|
|||||||
"shouldIncludeVideos": False,
|
"shouldIncludeVideos": False,
|
||||||
"shouldIncludeLives": False,
|
"shouldIncludeLives": False,
|
||||||
"page": j,
|
"page": j,
|
||||||
"limit": 100,
|
"limit": limit,
|
||||||
"recaptchaToken": None
|
"recaptchaToken": None
|
||||||
},
|
},
|
||||||
"query": """
|
"query": """
|
||||||
@ -580,6 +587,8 @@ def get_searchInfo(keyword):
|
|||||||
if node['__typename'] != "Video":
|
if node['__typename'] != "Video":
|
||||||
continue
|
continue
|
||||||
creator = node['creator']
|
creator = node['creator']
|
||||||
|
duration = node.get('duration')
|
||||||
|
if duration > 300:
|
||||||
video_tasks.append({
|
video_tasks.append({
|
||||||
"index": calculated_index,
|
"index": calculated_index,
|
||||||
"xid": node.get('xid'),
|
"xid": node.get('xid'),
|
||||||
@ -704,12 +713,11 @@ def integrate_data():
|
|||||||
if not v_list:
|
if not v_list:
|
||||||
for i in range(3):
|
for i in range(3):
|
||||||
time.sleep(i * 5)
|
time.sleep(i * 5)
|
||||||
v_list = get_searchInfo(kitem["keyword"])
|
v_list = get_searchInfo(kitem["keyword"], kitem['level'])
|
||||||
if v_list:
|
if v_list:
|
||||||
break
|
break
|
||||||
time.sleep(2)
|
time.sleep(2)
|
||||||
|
|
||||||
|
|
||||||
for item in v_list:
|
for item in v_list:
|
||||||
record = {
|
record = {
|
||||||
"keyword": kitem.get("keyword"),
|
"keyword": kitem.get("keyword"),
|
||||||
@ -748,6 +756,7 @@ def integrate_data():
|
|||||||
time.sleep(5)
|
time.sleep(5)
|
||||||
break
|
break
|
||||||
|
|
||||||
|
|
||||||
def parse_args() -> argparse.Namespace:
|
def parse_args() -> argparse.Namespace:
|
||||||
global MACHINE_ID, MAX_WORKERS
|
global MACHINE_ID, MAX_WORKERS
|
||||||
|
|
||||||
@ -778,6 +787,7 @@ def parse_args() -> argparse.Namespace:
|
|||||||
raise ValueError("请指定机器编号")
|
raise ValueError("请指定机器编号")
|
||||||
return args
|
return args
|
||||||
|
|
||||||
|
|
||||||
if __name__ == '__main__':
|
if __name__ == '__main__':
|
||||||
parse_args()
|
parse_args()
|
||||||
start_time = datetime.datetime.now()
|
start_time = datetime.datetime.now()
|
||||||
|
Loading…
x
Reference in New Issue
Block a user