DailyMotion/oneget.py

import requests
import uuid
import random
import time
import copy
from threading import Lock
import logging
from DB import DBVidcon
import json

logger = logging.getLogger(__name__)
db = DBVidcon()
proxiesdict = db.get_proxy_agent_dict()


class DMHeaderManager:
    _headers_template = {
        'Accept': '*/*, */*',
        'Cache-Control': 'no-cache',
        'Connection': 'keep-alive',
        'Content-Type': 'application/json, application/json',
        'Host': 'graphql.api.dailymotion.com',
        'Origin': 'https://www.dailymotion.com',
        'Referer': 'https://www.dailymotion.com/',
        'Sec-Fetch-Dest': 'empty',
        'Sec-Fetch-Mode': 'cors',
        'Sec-Fetch-Site': 'same-site',
        'User-Agent': 'Mozilla/5.0',
        'X-DM-AppInfo-Id': 'com.dailymotion.neon',
        'X-DM-AppInfo-Type': 'website',
        'X-DM-AppInfo-Version': 'v2025-05-26T13:45:05.666Z',
        'X-DM-Neon-SSR': '0',
        'X-DM-Preferred-Country': 'tw',
        'accept-language': 'zh-CN',
        'authorization': '',
        'sec-ch-ua': '"Chromium";v="128", "Not;A=Brand";v="24", "Google Chrome";v="128"',
        'sec-ch-ua-mobile': '?0',
        'sec-ch-ua-platform': '"Windows"',
        'x-dm-visit-id': '',
        'x-dm-visitor-id': '',
    }

    _user_agents = [
        'Mozilla/5.0 (Windows; U; Windows NT 6.1; en-us) AppleWebKit/534.50 (KHTML, like Gecko) Version/5.1 Safari/534.50',
        'Mozilla/5.0 (Windows NT 6.1; rv:2.0.1) Gecko/20100101 Firefox/4.0.1',
        'Opera/9.80 (Windows NT 6.1; U; en) Presto/2.8.131 Version/11.11',
        'Mozilla/4.0 (compatible; MSIE 7.0; Windows NT 5.1; 360SE)',
        'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/136.0.0.0 Safari/537.36',
        'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/128.0.0.0 Safari/537.36',
    ]

    def __init__(self, proxies: dict = None):
        self._headers_cache = None
        self._cache_lock = Lock()
        self._proxies = proxies

    def get_headers(self, retry: int = 2) -> dict:
        for attempt in range(retry + 1):
            try:
                return self._generate_headers()
            except Exception as e:
                logger.warning(f"[get_headers] 第 {attempt + 1} 次尝试失败: {e}")
                time.sleep(2)

        with self._cache_lock:
            if self._headers_cache:
                logger.info("[get_headers]")
                return copy.deepcopy(self._headers_cache)

        logger.warning("[get_headers] 基础 headers")
        return copy.deepcopy(self._headers_template)

    def _generate_headers(self) -> dict:
        visitor_id = str(uuid.uuid4())
        visit_id = str(int(time.time() * 1000))
        traffic_segment = str(random.randint(100_000, 999_999))
        ua = random.choice(self._user_agents)

        token_headers = {
            'Accept': '*/*',
            'Accept-Language': 'zh-CN,zh;q=0.9,en;q=0.8',
            'Cache-Control': 'no-cache',
            'Connection': 'keep-alive',
            'Content-Type': 'application/x-www-form-urlencoded',
            'Origin': 'https://www.dailymotion.com',
            'Pragma': 'no-cache',
            'Referer': 'https://www.dailymotion.com/',
            'Sec-Fetch-Dest': 'empty',
            'Sec-Fetch-Mode': 'cors',
            'Sec-Fetch-Site': 'same-site',
            'User-Agent': ua,
            'sec-ch-ua': '"Chromium";v="136", "Google Chrome";v="136", "Not.A/Brand";v="99"',
            'sec-ch-ua-mobile': '?0',
            'sec-ch-ua-platform': '"Windows"',
        }

        data = {
            'client_id': 'f1a362d288c1b98099c7',
            'client_secret': 'eea605b96e01c796ff369935357eca920c5da4c5',
            'grant_type': 'client_credentials',
            'traffic_segment': traffic_segment,
            'visitor_id': visitor_id,
        }

        response = requests.post(
            'https://graphql.api.dailymotion.com/oauth/token',
            headers=token_headers,
            data=data,
            proxies=self._proxies,
            timeout=10
        )
        response.raise_for_status()
        token = response.json()['access_token']

        new_headers = copy.deepcopy(self._headers_template)
        new_headers['authorization'] = f'Bearer {token}'
        new_headers['x-dm-visit-id'] = visit_id
        new_headers['x-dm-visitor-id'] = visitor_id
        new_headers['User-Agent'] = ua

        with self._cache_lock:
            self._headers_cache = copy.deepcopy(new_headers)

        return new_headers


kwdata = db.get_web_items()
if not kwdata:
    logger.error("没有获取到关键词数据")
    exit(1)

kwdata = kwdata[0][1]
rn = kwdata['rn']
proxy_name = proxiesdict.get(rn)
proxies_str = db.get_proxy(proxy_name, '-1')
proxies = {
    'http': proxies_str,
    'https': proxies_str
}
kw = kwdata['keyword']
print(kw)
print("=" * 30)
dmheader_manager = DMHeaderManager(proxies=proxies)

headers = dmheader_manager.get_headers()

data = {
    "operationName": "SEARCH_QUERY",
    "variables": {
        "query": kw,
        "shouldIncludeTopResults": True,  # 是否包含热门结果
        "shouldIncludeChannels": False,  # 是否包含频道
        "shouldIncludePlaylists": False,  # 是否包含播放列表
        "shouldIncludeHashtags": False,  # 是否包含标签
        "shouldIncludeVideos": False,  # 是否包含视频
        "shouldIncludeLives": False,  # 是否包含直播
        "page": 1,
        "limit": 20,
        "recaptchaToken": None
    },
    "query": """
fragment VIDEO_BASE_FRAGMENT on Video {
  id
  xid
  title
  createdAt
  duration
  aspectRatio
  thumbnail(height: PORTRAIT_240) {
    id
    url
    __typename
  }
  creator {
    id
    xid
    name
    displayName
    accountType
    avatar(height: SQUARE_60) {
      id
      url
      __typename
    }
    __typename
  }
  __typename
}

fragment CHANNEL_BASE_FRAG on Channel {
  id
  xid
  name
  displayName
  accountType
  isFollowed
  avatar(height: SQUARE_120) {
    id
    url
    __typename
  }
  followerEngagement {
    id
    followDate
    __typename
  }
  metrics {
    id
    engagement {
      id
      followers {
        edges {
          node {
            id
            total
            __typename
          }
          __typename
        }
        __typename
      }
      __typename
    }
    __typename
  }
  __typename
}

fragment PLAYLIST_BASE_FRAG on Collection {
  id
  xid
  name
  description
  thumbnail(height: PORTRAIT_240) {
    id
    url
    __typename
  }
  creator {
    id
    xid
    name
    displayName
    accountType
    avatar(height: SQUARE_60) {
      id
      url
      __typename
    }
    __typename
  }
  metrics {
    id
    engagement {
      id
      videos(filter: {visibility: {eq: PUBLIC}}) {
        edges {
          node {
            id
            total
            __typename
          }
          __typename
        }
        __typename
      }
      __typename
    }
    __typename
  }
  __typename
}

fragment HASHTAG_BASE_FRAG on Hashtag {
  id
  xid
  name
  metrics {
    id
    engagement {
      id
      videos {
        edges {
          node {
            id
            total
            __typename
          }
          __typename
        }
        __typename
      }
      __typename
    }
    __typename
  }
  __typename
}

fragment LIVE_BASE_FRAGMENT on Live {
  id
  xid
  title
  audienceCount
  aspectRatio
  isOnAir
  thumbnail(height: PORTRAIT_240) {
    id
    url
    __typename
  }
  creator {
    id
    xid
    name
    displayName
    accountType
    avatar(height: SQUARE_60) {
      id
      url
      __typename
    }
    __typename
  }
  __typename
}

query SEARCH_QUERY(
  $query: String!,
  $shouldIncludeTopResults: Boolean!,
  $shouldIncludeVideos: Boolean!,
  $shouldIncludeChannels: Boolean!,
  $shouldIncludePlaylists: Boolean!,
  $shouldIncludeHashtags: Boolean!,
  $shouldIncludeLives: Boolean!,
  $page: Int,
  $limit: Int,
  $sortByVideos: SearchVideoSort,
  $durationMinVideos: Int,
  $durationMaxVideos: Int,
  $createdAfterVideos: DateTime,
  $recaptchaToken: String
) {
  search(token: $recaptchaToken) {
    id

    stories(query: $query, first: $limit, page: $page) @include(if: $shouldIncludeTopResults) {
      metadata {
        id
        algorithm {
          uuid
          __typename
        }
        __typename
      }
      pageInfo {
        hasNextPage
        nextPage
        __typename
      }
      edges {
        node {
          ...VIDEO_BASE_FRAGMENT
          ...CHANNEL_BASE_FRAG
          ...PLAYLIST_BASE_FRAG
          ...HASHTAG_BASE_FRAG
          ...LIVE_BASE_FRAGMENT
          __typename
        }
        __typename
      }
      __typename
    }

    videos(
      query: $query,
      first: $limit,
      page: $page,
      sort: $sortByVideos,
      durationMin: $durationMinVideos,
      durationMax: $durationMaxVideos,
      createdAfter: $createdAfterVideos
    ) @include(if: $shouldIncludeVideos) {
      metadata {
        id
        algorithm {
          uuid
          __typename
        }
        __typename
      }
      pageInfo {
        hasNextPage
        nextPage
        __typename
      }
      edges {
        node {
          id
          ...VIDEO_BASE_FRAGMENT
          __typename
        }
        __typename
      }
      __typename
    }

    lives(query: $query, first: $limit, page: $page) @include(if: $shouldIncludeLives) {
      metadata {
        id
        algorithm {
          uuid
          __typename
        }
        __typename
      }
      pageInfo {
        hasNextPage
        nextPage
        __typename
      }
      edges {
        node {
          id
          ...LIVE_BASE_FRAGMENT
          __typename
        }
        __typename
      }
      __typename
    }

    channels(query: $query, first: $limit, page: $page) @include(if: $shouldIncludeChannels) {
      metadata {
        id
        algorithm {
          uuid
          __typename
        }
        __typename
      }
      pageInfo {
        hasNextPage
        nextPage
        __typename
      }
      edges {
        node {
          id
          ...CHANNEL_BASE_FRAG
          __typename
        }
        __typename
      }
      __typename
    }

    playlists: collections(query: $query, first: $limit, page: $page) @include(if: $shouldIncludePlaylists) {
      metadata {
        id
        algorithm {
          uuid
          __typename
        }
        __typename
      }
      pageInfo {
        hasNextPage
        nextPage
        __typename
      }
      edges {
        node {
          id
          ...PLAYLIST_BASE_FRAG
          __typename
        }
        __typename
      }
      __typename
    }

    hashtags(query: $query, first: $limit, page: $page) @include(if: $shouldIncludeHashtags) {
      metadata {
        id
        algorithm {
          uuid
          __typename
        }
        __typename
      }
      pageInfo {
        hasNextPage
        nextPage
        __typename
      }
      edges {
        node {
          id
          ...HASHTAG_BASE_FRAG
          __typename
        }
        __typename
      }
      __typename
    }

    __typename
  }
}
"""
}

payload = json.dumps(data).encode()

response = requests.post('https://graphql.api.dailymotion.com/', headers=headers, data=payload,
                         proxies=proxies)

data = response.json()
edges = data['data']['search']['stories']['edges']

for i, edge in enumerate(edges):
    print(i, edge['node']['xid'])