From 84c6f3afd9a4b3daceab0240faf926ae851df481 Mon Sep 17 00:00:00 2001 From: Franklin-F Date: Sat, 17 May 2025 20:06:15 +0800 Subject: [PATCH] =?UTF-8?q?fix:=20=E4=BF=AE=E6=94=B9=E6=90=9C=E7=B4=A2?= =?UTF-8?q?=E4=BF=A1=E6=81=AF=E8=8E=B7=E5=8F=96=E9=80=BB=E8=BE=91=EF=BC=8C?= =?UTF-8?q?=E5=A2=9E=E5=8A=A0=E5=8F=82=E6=95=B0=E4=BB=A5=E6=94=AF=E6=8C=81?= =?UTF-8?q?=E4=B8=8D=E5=90=8C=E7=BA=A7=E5=88=AB=E7=9A=84=E8=AF=B7=E6=B1=82?= =?UTF-8?q?=E5=92=8C=E8=A7=86=E9=A2=91=E6=97=B6=E9=95=BF=E8=BF=87=E6=BB=A4?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- main.py | 40 +++++++++++++++++++++++++--------------- 1 file changed, 25 insertions(+), 15 deletions(-) diff --git a/main.py b/main.py index cea6bc3..39a1ecb 100644 --- a/main.py +++ b/main.py @@ -6,6 +6,8 @@ import uuid import concurrent.futures import requests import datetime + +from mpmath import limit from requests import RequestException from DB import DBVidcon from dateutil import parser as date_parser @@ -109,7 +111,7 @@ def get_proxies(g): proxy_data = response.json()['data'][0] except Exception: print(g) - print("数据返回解析错误!"+ str(response.text)) + print("数据返回解析错误!" + str(response.text)) time.sleep(5) return get_proxies(g) proxies_url = f"http://{proxy_data['username']}:{proxy_data['password']}@{proxy_data['ip']}:{proxy_data['port']}" @@ -188,7 +190,7 @@ def gettoken(): u = uuid.uuid4() uuid_with_dash = str(u) uuid_no_dash = u.hex - traffic_segment = str(random.randint(10**8, 10**9 - 1)) + traffic_segment = str(random.randint(10 ** 8, 10 ** 9 - 1)) data = { 'client_id': 'f1a362d288c1b98099c7', 'client_secret': 'eea605b96e01c796ff369935357eca920c5da4c5', @@ -209,9 +211,14 @@ def gettoken(): pass -def get_searchInfo(keyword): +def get_searchInfo(keyword, level): video_list = [] - for j in range(1, 3): + max_page = 2 + limit = 30 + if level == 1: + max_page = 3 + limit = 100 + for j in range(1, max_page): # 别展开 = = ! data = { "operationName": "SEARCH_QUERY", @@ -224,7 +231,7 @@ def get_searchInfo(keyword): "shouldIncludeVideos": False, "shouldIncludeLives": False, "page": j, - "limit": 100, + "limit": limit, "recaptchaToken": None }, "query": """ @@ -580,12 +587,14 @@ def get_searchInfo(keyword): if node['__typename'] != "Video": continue creator = node['creator'] - video_tasks.append({ - "index": calculated_index, - "xid": node.get('xid'), - "node": node, - "creator": creator, - }) + duration = node.get('duration') + if duration > 300: + video_tasks.append({ + "index": calculated_index, + "xid": node.get('xid'), + "node": node, + "creator": creator, + }) def safe_fetch(task, max_try=2): attempt = 0 @@ -704,16 +713,15 @@ def integrate_data(): if not v_list: for i in range(3): time.sleep(i * 5) - v_list = get_searchInfo(kitem["keyword"]) + v_list = get_searchInfo(kitem["keyword"], kitem['level']) if v_list: break time.sleep(2) - for item in v_list: record = { "keyword": kitem.get("keyword"), - "v_name" : kitem.get("v_name"), + "v_name": kitem.get("v_name"), "v_id": item.get("v_id"), "v_xid": item.get("v_xid"), "link": item.get("link"), @@ -748,6 +756,7 @@ def integrate_data(): time.sleep(5) break + def parse_args() -> argparse.Namespace: global MACHINE_ID, MAX_WORKERS @@ -778,10 +787,11 @@ def parse_args() -> argparse.Namespace: raise ValueError("请指定机器编号") return args + if __name__ == '__main__': parse_args() start_time = datetime.datetime.now() print(f"开始时间:{start_time.strftime('%Y-%m-%d %H:%M:%S')}") integrate_data() end_time = datetime.datetime.now() - duration = end_time - start_time \ No newline at end of file + duration = end_time - start_time