feat: 添加视频信息处理和时间格式化功能

2025-07-17 14:21:15 +08:00 · 2025-07-17 14:21:15 +08:00 · fe96e23cc2
commit fe96e23cc2
parent e9ef87fe62
1 changed files with 467 additions and 365 deletions
--- a/oneget.py
+++ b/oneget.py
@ -1,3 +1,6 @@
 import base64
 from datetime import datetime
 import requests
 import uuid
 import random
@ -7,11 +10,39 @@ from threading import Lock
 import logging
 from DB import DBVidcon
 import json
 from requests.adapters import HTTPAdapter
 from urllib3.util.retry import Retry
 MACHINE_ID = 3
 logger = logging.getLogger(__name__)
 db = DBVidcon()
 proxiesdict = db.get_proxy_agent_dict()
 def clean_dash_to_zero(val):
    if val in ('-', '', None):
        return 0
    try:
        return int(val)
    except (ValueError, TypeError) as e:
        logger.exception(f"[字段异常] val = {val} → {str(e)}")
        return 0
 def format_create_time(timestr):
    try:
        dt = date_parser.isoparse(timestr)
        return dt.strftime("%Y-%m-%d %H:%M:%S")
    except Exception as e:
        logger.exception(f"[时间格式错误] {timestr} → {str(e)}")
        return "1970-01-01 00:00:00"
 def format_duration(seconds):
    try:
        seconds = int(seconds)
        return f"{seconds // 60:02}:{seconds % 60:02}"
    except Exception:
        return "00:00"
 class DMHeaderManager:
    _headers_template = {
@ -124,400 +155,471 @@ class DMHeaderManager:
        return new_headers
 kwdata = db.get_web_items()
 if not kwdata:
    logger.error("没有获取到关键词数据")
    exit(1)
-kwdata = kwdata[0][1]
+class DMVideoInfo:
-rn = kwdata['rn']
+    def __init__(self, proxies: dict = None, max_retries: int = 3, backoff_factor: float = 0.5):
-proxy_name = proxiesdict.get(rn)
+        self.proxies = proxies
-proxies_str = db.get_proxy(proxy_name, '-1')
+        self.max_retries = max_retries
-proxies = {
+        self.backoff_factor = backoff_factor
-    'http': proxies_str,
+        self.session = self._create_session()
    'https': proxies_str
 }
 kw = kwdata['keyword']
 print(kw)
 print("=" * 30)
 dmheader_manager = DMHeaderManager(proxies=proxies)
-headers = dmheader_manager.get_headers()
+    def _create_session(self):
        session = requests.Session()
        retry = Retry(
            total=self.max_retries,
            connect=self.max_retries,
            read=self.max_retries,
            backoff_factor=self.backoff_factor,
            status_forcelist=[500, 502, 503, 504],
            allowed_methods=["GET"]
        )
        adapter = HTTPAdapter(max_retries=retry)
        session.mount("http://", adapter)
        session.mount("https://", adapter)
-data = {
+        if self.proxies:
-    "operationName": "SEARCH_QUERY",
+            session.proxies.update(self.proxies)
-    "variables": {
+
-        "query": kw,
+        return session
-        "shouldIncludeTopResults": True,  # 是否包含热门结果
+
-        "shouldIncludeChannels": False,  # 是否包含频道
+    def get_video_info(self, data: dict) -> dict:
-        "shouldIncludePlaylists": False,  # 是否包含播放列表
+        v_xid = data.get('v_xid')
-        "shouldIncludeHashtags": False,  # 是否包含标签
+        url = f'https://api.dailymotion.com/video/{v_xid}'
-        "shouldIncludeVideos": False,  # 是否包含视频
+        params = {
-        "shouldIncludeLives": False,  # 是否包含直播
+            'fields': 'id,title,created_time,thumbnail_240_url,duration,'
-        "page": 1,
+                      'owner.id,owner.screenname,likes_total,views_total,'
-        "limit": 20,
+                      'owner.avatar_60_url,owner.followers_total,owner.videos_total'
-        "recaptchaToken": None
+        }
-    },
+
-    "query": """
+        try:
-fragment VIDEO_BASE_FRAGMENT on Video {
+            resp = self.session.get(url, params=params, timeout=10)
-  id
+            resp.raise_for_status()
-  xid
+            r_data = resp.json()
-  title
+            xid = r_data["id"]
-  createdAt
+            vid = base64.b64encode(f"Video:{xid}".encode('utf-8')).decode('utf-8')
-  duration
+            uxid = r_data["owner.id"]
-  aspectRatio
+            uid = base64.b64encode(f"Channel:{uxid}".encode('utf-8')).decode('utf-8')
-  thumbnail(height: PORTRAIT_240) {
+            data["v_id"] = vid
-    id
+            data["v_title"] = r_data["title"]
-    url
+            data["link"] = "https://www.dailymotion.com/video/" + xid,
-    __typename
+            data["duration"] = r_data["duration"]
-  }
+            data['createdtime'] = datetime.fromtimestamp(r_data.get("created_time")).strftime("%Y-%m-%d %H:%M:%S"),
-  creator {
+            data['']
-    id
+        except requests.RequestException as e:
-    xid
+            print(f"[ERROR] 请求失败 vxid={v_xid} : {e}")
-    name
+            return None
-    displayName
+
-    accountType
+
-    avatar(height: SQUARE_60) {
+
-      id
+def main():
-      url
+    kwdata = db.get_web_items()
-      __typename
+    if not kwdata:
        logger.error("没有获取到关键词数据")
        exit(1)
    kwdata = kwdata[0][1]
    rn = kwdata['rn']
    proxy_name = proxiesdict.get(rn)
    proxies_str = db.get_proxy(proxy_name, '-1')
    proxies = {
        'http': proxies_str,
        'https': proxies_str
    }
-    __typename
+    kw = kwdata['keyword']
  }
  __typename
 }
-fragment CHANNEL_BASE_FRAG on Channel {
+    dmheader_manager = DMHeaderManager(proxies=proxies)
-  id
+
-  xid
+    headers = dmheader_manager.get_headers()
-  name
+    for i in range(1, 11):
-  displayName
+        data = {
-  accountType
+            "operationName": "SEARCH_QUERY",
-  isFollowed
+            "variables": {
-  avatar(height: SQUARE_120) {
+                "query": kw,
-    id
+                "shouldIncludeTopResults": True,  # 是否包含热门结果
-    url
+                "shouldIncludeChannels": False,  # 是否包含频道
-    __typename
+                "shouldIncludePlaylists": False,  # 是否包含播放列表
-  }
+                "shouldIncludeHashtags": False,  # 是否包含标签
-  followerEngagement {
+                "shouldIncludeVideos": False,  # 是否包含视频
-    id
+                "shouldIncludeLives": False,  # 是否包含直播
-    followDate
+                "page": i,
-    __typename
+                "limit": 20,
-  }
+                "recaptchaToken": None
-  metrics {
+            },
-    id
+            "query": """
-    engagement {
+        fragment VIDEO_BASE_FRAGMENT on Video {
-      id
+          id
-      followers {
+          xid
-        edges {
+          title
-          node {
+          createdAt
          duration
          aspectRatio
          thumbnail(height: PORTRAIT_240) {
            id
-            total
+            url
            __typename
          }
          creator {
            id
            xid
            name
            displayName
            accountType
            avatar(height: SQUARE_60) {
              id
              url
              __typename
            }
            __typename
          }
          __typename
        }
-        __typename
+        
-      }
+        fragment CHANNEL_BASE_FRAG on Channel {
-      __typename
+          id
-    }
+          xid
-    __typename
+          name
-  }
+          displayName
-  __typename
+          accountType
-}
+          isFollowed
-
+          avatar(height: SQUARE_120) {
 fragment PLAYLIST_BASE_FRAG on Collection {
  id
  xid
  name
  description
  thumbnail(height: PORTRAIT_240) {
    id
    url
    __typename
  }
  creator {
    id
    xid
    name
    displayName
    accountType
    avatar(height: SQUARE_60) {
      id
      url
      __typename
    }
    __typename
  }
  metrics {
    id
    engagement {
      id
      videos(filter: {visibility: {eq: PUBLIC}}) {
        edges {
          node {
            id
-            total
+            url
            __typename
          }
          followerEngagement {
            id
            followDate
            __typename
          }
          metrics {
            id
            engagement {
              id
              followers {
                edges {
                  node {
                    id
                    total
                    __typename
                  }
                  __typename
                }
                __typename
              }
              __typename
            }
            __typename
          }
          __typename
        }
-        __typename
+        
-      }
+        fragment PLAYLIST_BASE_FRAG on Collection {
-      __typename
+          id
-    }
+          xid
-    __typename
+          name
-  }
+          description
-  __typename
+          thumbnail(height: PORTRAIT_240) {
 }
 fragment HASHTAG_BASE_FRAG on Hashtag {
  id
  xid
  name
  metrics {
    id
    engagement {
      id
      videos {
        edges {
          node {
            id
-            total
+            url
            __typename
          }
          creator {
            id
            xid
            name
            displayName
            accountType
            avatar(height: SQUARE_60) {
              id
              url
              __typename
            }
            __typename
          }
          metrics {
            id
            engagement {
              id
              videos(filter: {visibility: {eq: PUBLIC}}) {
                edges {
                  node {
                    id
                    total
                    __typename
                  }
                  __typename
                }
                __typename
              }
              __typename
            }
            __typename
          }
          __typename
        }
-        __typename
+        
-      }
+        fragment HASHTAG_BASE_FRAG on Hashtag {
      __typename
    }
    __typename
  }
  __typename
 }
 fragment LIVE_BASE_FRAGMENT on Live {
  id
  xid
  title
  audienceCount
  aspectRatio
  isOnAir
  thumbnail(height: PORTRAIT_240) {
    id
    url
    __typename
  }
  creator {
    id
    xid
    name
    displayName
    accountType
    avatar(height: SQUARE_60) {
      id
      url
      __typename
    }
    __typename
  }
  __typename
 }
 query SEARCH_QUERY(
  $query: String!,
  $shouldIncludeTopResults: Boolean!,
  $shouldIncludeVideos: Boolean!,
  $shouldIncludeChannels: Boolean!,
  $shouldIncludePlaylists: Boolean!,
  $shouldIncludeHashtags: Boolean!,
  $shouldIncludeLives: Boolean!,
  $page: Int,
  $limit: Int,
  $sortByVideos: SearchVideoSort,
  $durationMinVideos: Int,
  $durationMaxVideos: Int,
  $createdAfterVideos: DateTime,
  $recaptchaToken: String
 ) {
  search(token: $recaptchaToken) {
    id
    stories(query: $query, first: $limit, page: $page) @include(if: $shouldIncludeTopResults) {
      metadata {
        id
        algorithm {
          uuid
          __typename
        }
        __typename
      }
      pageInfo {
        hasNextPage
        nextPage
        __typename
      }
      edges {
        node {
          ...VIDEO_BASE_FRAGMENT
          ...CHANNEL_BASE_FRAG
          ...PLAYLIST_BASE_FRAG
          ...HASHTAG_BASE_FRAG
          ...LIVE_BASE_FRAGMENT
          __typename
        }
        __typename
      }
      __typename
    }
    videos(
      query: $query,
      first: $limit,
      page: $page,
      sort: $sortByVideos,
      durationMin: $durationMinVideos,
      durationMax: $durationMaxVideos,
      createdAfter: $createdAfterVideos
    ) @include(if: $shouldIncludeVideos) {
      metadata {
        id
        algorithm {
          uuid
          __typename
        }
        __typename
      }
      pageInfo {
        hasNextPage
        nextPage
        __typename
      }
      edges {
        node {
          id
-          ...VIDEO_BASE_FRAGMENT
+          xid
          name
          metrics {
            id
            engagement {
              id
              videos {
                edges {
                  node {
                    id
                    total
                    __typename
                  }
                  __typename
                }
                __typename
              }
              __typename
            }
            __typename
          }
          __typename
        }
-        __typename
+        
-      }
+        fragment LIVE_BASE_FRAGMENT on Live {
      __typename
    }
    lives(query: $query, first: $limit, page: $page) @include(if: $shouldIncludeLives) {
      metadata {
        id
        algorithm {
          uuid
          __typename
        }
        __typename
      }
      pageInfo {
        hasNextPage
        nextPage
        __typename
      }
      edges {
        node {
          id
-          ...LIVE_BASE_FRAGMENT
+          xid
          title
          audienceCount
          aspectRatio
          isOnAir
          thumbnail(height: PORTRAIT_240) {
            id
            url
            __typename
          }
          creator {
            id
            xid
            name
            displayName
            accountType
            avatar(height: SQUARE_60) {
              id
              url
              __typename
            }
            __typename
          }
          __typename
        }
-        __typename
+        
-      }
+        query SEARCH_QUERY(
-      __typename
+          $query: String!,
-    }
+          $shouldIncludeTopResults: Boolean!,
-
+          $shouldIncludeVideos: Boolean!,
-    channels(query: $query, first: $limit, page: $page) @include(if: $shouldIncludeChannels) {
+          $shouldIncludeChannels: Boolean!,
-      metadata {
+          $shouldIncludePlaylists: Boolean!,
-        id
+          $shouldIncludeHashtags: Boolean!,
-        algorithm {
+          $shouldIncludeLives: Boolean!,
-          uuid
+          $page: Int,
-          __typename
+          $limit: Int,
          $sortByVideos: SearchVideoSort,
          $durationMinVideos: Int,
          $durationMaxVideos: Int,
          $createdAfterVideos: DateTime,
          $recaptchaToken: String
        ) {
          search(token: $recaptchaToken) {
            id
            stories(query: $query, first: $limit, page: $page) @include(if: $shouldIncludeTopResults) {
              metadata {
                id
                algorithm {
                  uuid
                  __typename
                }
                __typename
              }
              pageInfo {
                hasNextPage
                nextPage
                __typename
              }
              edges {
                node {
                  ...VIDEO_BASE_FRAGMENT
                  ...CHANNEL_BASE_FRAG
                  ...PLAYLIST_BASE_FRAG
                  ...HASHTAG_BASE_FRAG
                  ...LIVE_BASE_FRAGMENT
                  __typename
                }
                __typename
              }
              __typename
            }
            videos(
              query: $query,
              first: $limit,
              page: $page,
              sort: $sortByVideos,
              durationMin: $durationMinVideos,
              durationMax: $durationMaxVideos,
              createdAfter: $createdAfterVideos
            ) @include(if: $shouldIncludeVideos) {
              metadata {
                id
                algorithm {
                  uuid
                  __typename
                }
                __typename
              }
              pageInfo {
                hasNextPage
                nextPage
                __typename
              }
              edges {
                node {
                  id
                  ...VIDEO_BASE_FRAGMENT
                  __typename
                }
                __typename
              }
              __typename
            }
            lives(query: $query, first: $limit, page: $page) @include(if: $shouldIncludeLives) {
              metadata {
                id
                algorithm {
                  uuid
                  __typename
                }
                __typename
              }
              pageInfo {
                hasNextPage
                nextPage
                __typename
              }
              edges {
                node {
                  id
                  ...LIVE_BASE_FRAGMENT
                  __typename
                }
                __typename
              }
              __typename
            }
            channels(query: $query, first: $limit, page: $page) @include(if: $shouldIncludeChannels) {
              metadata {
                id
                algorithm {
                  uuid
                  __typename
                }
                __typename
              }
              pageInfo {
                hasNextPage
                nextPage
                __typename
              }
              edges {
                node {
                  id
                  ...CHANNEL_BASE_FRAG
                  __typename
                }
                __typename
              }
              __typename
            }
            playlists: collections(query: $query, first: $limit, page: $page) @include(if: $shouldIncludePlaylists) {
              metadata {
                id
                algorithm {
                  uuid
                  __typename
                }
                __typename
              }
              pageInfo {
                hasNextPage
                nextPage
                __typename
              }
              edges {
                node {
                  id
                  ...PLAYLIST_BASE_FRAG
                  __typename
                }
                __typename
              }
              __typename
            }
            hashtags(query: $query, first: $limit, page: $page) @include(if: $shouldIncludeHashtags) {
              metadata {
                id
                algorithm {
                  uuid
                  __typename
                }
                __typename
              }
              pageInfo {
                hasNextPage
                nextPage
                __typename
              }
              edges {
                node {
                  id
                  ...HASHTAG_BASE_FRAG
                  __typename
                }
                __typename
              }
              __typename
            }
            __typename
          }
        }
-        __typename
+        """
      }
      pageInfo {
        hasNextPage
        nextPage
        __typename
      }
      edges {
        node {
          id
          ...CHANNEL_BASE_FRAG
          __typename
        }
        __typename
      }
      __typename
    }
-    playlists: collections(query: $query, first: $limit, page: $page) @include(if: $shouldIncludePlaylists) {
+        payload = json.dumps(data).encode()
      metadata {
        id
        algorithm {
          uuid
          __typename
        }
        __typename
      }
      pageInfo {
        hasNextPage
        nextPage
        __typename
      }
      edges {
        node {
          id
          ...PLAYLIST_BASE_FRAG
          __typename
        }
        __typename
      }
      __typename
    }
-    hashtags(query: $query, first: $limit, page: $page) @include(if: $shouldIncludeHashtags) {
+        response = requests.post('https://graphql.api.dailymotion.com/', headers=headers, data=payload,
-      metadata {
+                                 proxies=proxies)
        id
        algorithm {
          uuid
          __typename
        }
        __typename
      }
      pageInfo {
        hasNextPage
        nextPage
        __typename
      }
      edges {
        node {
          id
          ...HASHTAG_BASE_FRAG
          __typename
        }
        __typename
      }
      __typename
    }
-    __typename
+        data = response.json()
-  }
+        edges = data['data']['search']['stories']['edges']
-}
+        edges_len = len(edges)
-"""
+        dm_video_info = DMVideoInfo(proxies=proxies)
-}
+        tancks = []
        for j, edge in enumerate(edges):
            node = edge.get("node", {})
            tancks.append({
                "keyword": kw,
                "v_name": kwdata.get("v_name", ""),
                "v_xid": node.get("xid"),
                "batch": kwdata.get("batch"),
                "rn": kwdata.get("rn"),
                "machine_id": MACHINE_ID,
                "index": (i - 1) * 20 + j + 1,
                "level": 0,
            })
-payload = json.dumps(data).encode()
+        if edges_len < 20:
-
+            break
 response = requests.post('https://graphql.api.dailymotion.com/', headers=headers, data=payload,
                         proxies=proxies)
 data = response.json()
 edges = data['data']['search']['stories']['edges']
 for i, edge in enumerate(edges):
    print(i, edge['node']['xid'])