From 84c6f3afd9a4b3daceab0240faf926ae851df481 Mon Sep 17 00:00:00 2001
From: Franklin-F <dewujie64@gmail.com>
Date: Sat, 17 May 2025 20:06:15 +0800
Subject: [PATCH] =?UTF-8?q?fix:=20=E4=BF=AE=E6=94=B9=E6=90=9C=E7=B4=A2?=
 =?UTF-8?q?=E4=BF=A1=E6=81=AF=E8=8E=B7=E5=8F=96=E9=80=BB=E8=BE=91=EF=BC=8C?=
 =?UTF-8?q?=E5=A2=9E=E5=8A=A0=E5=8F=82=E6=95=B0=E4=BB=A5=E6=94=AF=E6=8C=81?=
 =?UTF-8?q?=E4=B8=8D=E5=90=8C=E7=BA=A7=E5=88=AB=E7=9A=84=E8=AF=B7=E6=B1=82?=
 =?UTF-8?q?=E5=92=8C=E8=A7=86=E9=A2=91=E6=97=B6=E9=95=BF=E8=BF=87=E6=BB=A4?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 main.py | 40 +++++++++++++++++++++++++---------------
 1 file changed, 25 insertions(+), 15 deletions(-)

diff --git a/main.py b/main.py
index cea6bc3..39a1ecb 100644
--- a/main.py
+++ b/main.py
@@ -6,6 +6,8 @@ import uuid
 import concurrent.futures
 import requests
 import datetime
+
+from mpmath import limit
 from requests import RequestException
 from DB import DBVidcon
 from dateutil import parser as date_parser
@@ -109,7 +111,7 @@ def get_proxies(g):
         proxy_data = response.json()['data'][0]
     except Exception:
         print(g)
-        print("数据返回解析错误!"+ str(response.text))
+        print("数据返回解析错误!" + str(response.text))
         time.sleep(5)
         return get_proxies(g)
     proxies_url = f"http://{proxy_data['username']}:{proxy_data['password']}@{proxy_data['ip']}:{proxy_data['port']}"
@@ -188,7 +190,7 @@ def gettoken():
     u = uuid.uuid4()
     uuid_with_dash = str(u)
     uuid_no_dash = u.hex
-    traffic_segment = str(random.randint(10**8, 10**9 - 1))
+    traffic_segment = str(random.randint(10 ** 8, 10 ** 9 - 1))
     data = {
         'client_id': 'f1a362d288c1b98099c7',
         'client_secret': 'eea605b96e01c796ff369935357eca920c5da4c5',
@@ -209,9 +211,14 @@ def gettoken():
         pass
 
 
-def get_searchInfo(keyword):
+def get_searchInfo(keyword, level):
     video_list = []
-    for j in range(1, 3):
+    max_page = 2
+    limit = 30
+    if level == 1:
+        max_page = 3
+        limit = 100
+    for j in range(1, max_page):
         # 别展开 = = !
         data = {
             "operationName": "SEARCH_QUERY",
@@ -224,7 +231,7 @@ def get_searchInfo(keyword):
                 "shouldIncludeVideos": False,
                 "shouldIncludeLives": False,
                 "page": j,
-                "limit": 100,
+                "limit": limit,
                 "recaptchaToken": None
             },
             "query": """
@@ -580,12 +587,14 @@ def get_searchInfo(keyword):
             if node['__typename'] != "Video":
                 continue
             creator = node['creator']
-            video_tasks.append({
-                "index": calculated_index,
-                "xid": node.get('xid'),
-                "node": node,
-                "creator": creator,
-            })
+            duration = node.get('duration')
+            if duration > 300:
+                video_tasks.append({
+                    "index": calculated_index,
+                    "xid": node.get('xid'),
+                    "node": node,
+                    "creator": creator,
+                })
 
         def safe_fetch(task, max_try=2):
             attempt = 0
@@ -704,16 +713,15 @@ def integrate_data():
                     if not v_list:
                         for i in range(3):
                             time.sleep(i * 5)
-                            v_list = get_searchInfo(kitem["keyword"])
+                            v_list = get_searchInfo(kitem["keyword"], kitem['level'])
                             if v_list:
                                 break
                             time.sleep(2)
 
-
                     for item in v_list:
                         record = {
                             "keyword": kitem.get("keyword"),
-                            "v_name" : kitem.get("v_name"),
+                            "v_name": kitem.get("v_name"),
                             "v_id": item.get("v_id"),
                             "v_xid": item.get("v_xid"),
                             "link": item.get("link"),
@@ -748,6 +756,7 @@ def integrate_data():
                     time.sleep(5)
                     break
 
+
 def parse_args() -> argparse.Namespace:
     global MACHINE_ID, MAX_WORKERS
 
@@ -778,10 +787,11 @@ def parse_args() -> argparse.Namespace:
         raise ValueError("请指定机器编号")
     return args
 
+
 if __name__ == '__main__':
     parse_args()
     start_time = datetime.datetime.now()
     print(f"开始时间：{start_time.strftime('%Y-%m-%d %H:%M:%S')}")
     integrate_data()
     end_time = datetime.datetime.now()
-    duration = end_time - start_time
\ No newline at end of file
+    duration = end_time - start_time