fix: 修改搜索信息获取逻辑,增加参数以支持不同级别的请求和视频时长过滤
This commit is contained in:
parent
69137dd128
commit
84c6f3afd9
40
main.py
40
main.py
@ -6,6 +6,8 @@ import uuid
|
||||
import concurrent.futures
|
||||
import requests
|
||||
import datetime
|
||||
|
||||
from mpmath import limit
|
||||
from requests import RequestException
|
||||
from DB import DBVidcon
|
||||
from dateutil import parser as date_parser
|
||||
@ -109,7 +111,7 @@ def get_proxies(g):
|
||||
proxy_data = response.json()['data'][0]
|
||||
except Exception:
|
||||
print(g)
|
||||
print("数据返回解析错误!"+ str(response.text))
|
||||
print("数据返回解析错误!" + str(response.text))
|
||||
time.sleep(5)
|
||||
return get_proxies(g)
|
||||
proxies_url = f"http://{proxy_data['username']}:{proxy_data['password']}@{proxy_data['ip']}:{proxy_data['port']}"
|
||||
@ -188,7 +190,7 @@ def gettoken():
|
||||
u = uuid.uuid4()
|
||||
uuid_with_dash = str(u)
|
||||
uuid_no_dash = u.hex
|
||||
traffic_segment = str(random.randint(10**8, 10**9 - 1))
|
||||
traffic_segment = str(random.randint(10 ** 8, 10 ** 9 - 1))
|
||||
data = {
|
||||
'client_id': 'f1a362d288c1b98099c7',
|
||||
'client_secret': 'eea605b96e01c796ff369935357eca920c5da4c5',
|
||||
@ -209,9 +211,14 @@ def gettoken():
|
||||
pass
|
||||
|
||||
|
||||
def get_searchInfo(keyword):
|
||||
def get_searchInfo(keyword, level):
|
||||
video_list = []
|
||||
for j in range(1, 3):
|
||||
max_page = 2
|
||||
limit = 30
|
||||
if level == 1:
|
||||
max_page = 3
|
||||
limit = 100
|
||||
for j in range(1, max_page):
|
||||
# 别展开 = = !
|
||||
data = {
|
||||
"operationName": "SEARCH_QUERY",
|
||||
@ -224,7 +231,7 @@ def get_searchInfo(keyword):
|
||||
"shouldIncludeVideos": False,
|
||||
"shouldIncludeLives": False,
|
||||
"page": j,
|
||||
"limit": 100,
|
||||
"limit": limit,
|
||||
"recaptchaToken": None
|
||||
},
|
||||
"query": """
|
||||
@ -580,12 +587,14 @@ def get_searchInfo(keyword):
|
||||
if node['__typename'] != "Video":
|
||||
continue
|
||||
creator = node['creator']
|
||||
video_tasks.append({
|
||||
"index": calculated_index,
|
||||
"xid": node.get('xid'),
|
||||
"node": node,
|
||||
"creator": creator,
|
||||
})
|
||||
duration = node.get('duration')
|
||||
if duration > 300:
|
||||
video_tasks.append({
|
||||
"index": calculated_index,
|
||||
"xid": node.get('xid'),
|
||||
"node": node,
|
||||
"creator": creator,
|
||||
})
|
||||
|
||||
def safe_fetch(task, max_try=2):
|
||||
attempt = 0
|
||||
@ -704,16 +713,15 @@ def integrate_data():
|
||||
if not v_list:
|
||||
for i in range(3):
|
||||
time.sleep(i * 5)
|
||||
v_list = get_searchInfo(kitem["keyword"])
|
||||
v_list = get_searchInfo(kitem["keyword"], kitem['level'])
|
||||
if v_list:
|
||||
break
|
||||
time.sleep(2)
|
||||
|
||||
|
||||
for item in v_list:
|
||||
record = {
|
||||
"keyword": kitem.get("keyword"),
|
||||
"v_name" : kitem.get("v_name"),
|
||||
"v_name": kitem.get("v_name"),
|
||||
"v_id": item.get("v_id"),
|
||||
"v_xid": item.get("v_xid"),
|
||||
"link": item.get("link"),
|
||||
@ -748,6 +756,7 @@ def integrate_data():
|
||||
time.sleep(5)
|
||||
break
|
||||
|
||||
|
||||
def parse_args() -> argparse.Namespace:
|
||||
global MACHINE_ID, MAX_WORKERS
|
||||
|
||||
@ -778,10 +787,11 @@ def parse_args() -> argparse.Namespace:
|
||||
raise ValueError("请指定机器编号")
|
||||
return args
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
parse_args()
|
||||
start_time = datetime.datetime.now()
|
||||
print(f"开始时间:{start_time.strftime('%Y-%m-%d %H:%M:%S')}")
|
||||
integrate_data()
|
||||
end_time = datetime.datetime.now()
|
||||
duration = end_time - start_time
|
||||
duration = end_time - start_time
|
||||
|
Loading…
x
Reference in New Issue
Block a user