DailyMotion/oneget.py

524 lines
11 KiB
Python

import requests
import uuid
import random
import time
import copy
from threading import Lock
import logging
from DB import DBVidcon
import json
logger = logging.getLogger(__name__)
db = DBVidcon()
proxiesdict = db.get_proxy_agent_dict()
class DMHeaderManager:
_headers_template = {
'Accept': '*/*, */*',
'Cache-Control': 'no-cache',
'Connection': 'keep-alive',
'Content-Type': 'application/json, application/json',
'Host': 'graphql.api.dailymotion.com',
'Origin': 'https://www.dailymotion.com',
'Referer': 'https://www.dailymotion.com/',
'Sec-Fetch-Dest': 'empty',
'Sec-Fetch-Mode': 'cors',
'Sec-Fetch-Site': 'same-site',
'User-Agent': 'Mozilla/5.0',
'X-DM-AppInfo-Id': 'com.dailymotion.neon',
'X-DM-AppInfo-Type': 'website',
'X-DM-AppInfo-Version': 'v2025-05-26T13:45:05.666Z',
'X-DM-Neon-SSR': '0',
'X-DM-Preferred-Country': 'tw',
'accept-language': 'zh-CN',
'authorization': '',
'sec-ch-ua': '"Chromium";v="128", "Not;A=Brand";v="24", "Google Chrome";v="128"',
'sec-ch-ua-mobile': '?0',
'sec-ch-ua-platform': '"Windows"',
'x-dm-visit-id': '',
'x-dm-visitor-id': '',
}
_user_agents = [
'Mozilla/5.0 (Windows; U; Windows NT 6.1; en-us) AppleWebKit/534.50 (KHTML, like Gecko) Version/5.1 Safari/534.50',
'Mozilla/5.0 (Windows NT 6.1; rv:2.0.1) Gecko/20100101 Firefox/4.0.1',
'Opera/9.80 (Windows NT 6.1; U; en) Presto/2.8.131 Version/11.11',
'Mozilla/4.0 (compatible; MSIE 7.0; Windows NT 5.1; 360SE)',
'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/136.0.0.0 Safari/537.36',
'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/128.0.0.0 Safari/537.36',
]
def __init__(self, proxies: dict = None):
self._headers_cache = None
self._cache_lock = Lock()
self._proxies = proxies
def get_headers(self, retry: int = 2) -> dict:
for attempt in range(retry + 1):
try:
return self._generate_headers()
except Exception as e:
logger.warning(f"[get_headers] 第 {attempt + 1} 次尝试失败: {e}")
time.sleep(2)
with self._cache_lock:
if self._headers_cache:
logger.info("[get_headers]")
return copy.deepcopy(self._headers_cache)
logger.warning("[get_headers] 基础 headers")
return copy.deepcopy(self._headers_template)
def _generate_headers(self) -> dict:
visitor_id = str(uuid.uuid4())
visit_id = str(int(time.time() * 1000))
traffic_segment = str(random.randint(100_000, 999_999))
ua = random.choice(self._user_agents)
token_headers = {
'Accept': '*/*',
'Accept-Language': 'zh-CN,zh;q=0.9,en;q=0.8',
'Cache-Control': 'no-cache',
'Connection': 'keep-alive',
'Content-Type': 'application/x-www-form-urlencoded',
'Origin': 'https://www.dailymotion.com',
'Pragma': 'no-cache',
'Referer': 'https://www.dailymotion.com/',
'Sec-Fetch-Dest': 'empty',
'Sec-Fetch-Mode': 'cors',
'Sec-Fetch-Site': 'same-site',
'User-Agent': ua,
'sec-ch-ua': '"Chromium";v="136", "Google Chrome";v="136", "Not.A/Brand";v="99"',
'sec-ch-ua-mobile': '?0',
'sec-ch-ua-platform': '"Windows"',
}
data = {
'client_id': 'f1a362d288c1b98099c7',
'client_secret': 'eea605b96e01c796ff369935357eca920c5da4c5',
'grant_type': 'client_credentials',
'traffic_segment': traffic_segment,
'visitor_id': visitor_id,
}
response = requests.post(
'https://graphql.api.dailymotion.com/oauth/token',
headers=token_headers,
data=data,
proxies=self._proxies,
timeout=10
)
response.raise_for_status()
token = response.json()['access_token']
new_headers = copy.deepcopy(self._headers_template)
new_headers['authorization'] = f'Bearer {token}'
new_headers['x-dm-visit-id'] = visit_id
new_headers['x-dm-visitor-id'] = visitor_id
new_headers['User-Agent'] = ua
with self._cache_lock:
self._headers_cache = copy.deepcopy(new_headers)
return new_headers
kwdata = db.get_web_items()
if not kwdata:
logger.error("没有获取到关键词数据")
exit(1)
kwdata = kwdata[0][1]
rn = kwdata['rn']
proxy_name = proxiesdict.get(rn)
proxies_str = db.get_proxy(proxy_name, '-1')
proxies = {
'http': proxies_str,
'https': proxies_str
}
kw = kwdata['keyword']
print(kw)
print("=" * 30)
dmheader_manager = DMHeaderManager(proxies=proxies)
headers = dmheader_manager.get_headers()
data = {
"operationName": "SEARCH_QUERY",
"variables": {
"query": kw,
"shouldIncludeTopResults": True, # 是否包含热门结果
"shouldIncludeChannels": False, # 是否包含频道
"shouldIncludePlaylists": False, # 是否包含播放列表
"shouldIncludeHashtags": False, # 是否包含标签
"shouldIncludeVideos": False, # 是否包含视频
"shouldIncludeLives": False, # 是否包含直播
"page": 1,
"limit": 20,
"recaptchaToken": None
},
"query": """
fragment VIDEO_BASE_FRAGMENT on Video {
id
xid
title
createdAt
duration
aspectRatio
thumbnail(height: PORTRAIT_240) {
id
url
__typename
}
creator {
id
xid
name
displayName
accountType
avatar(height: SQUARE_60) {
id
url
__typename
}
__typename
}
__typename
}
fragment CHANNEL_BASE_FRAG on Channel {
id
xid
name
displayName
accountType
isFollowed
avatar(height: SQUARE_120) {
id
url
__typename
}
followerEngagement {
id
followDate
__typename
}
metrics {
id
engagement {
id
followers {
edges {
node {
id
total
__typename
}
__typename
}
__typename
}
__typename
}
__typename
}
__typename
}
fragment PLAYLIST_BASE_FRAG on Collection {
id
xid
name
description
thumbnail(height: PORTRAIT_240) {
id
url
__typename
}
creator {
id
xid
name
displayName
accountType
avatar(height: SQUARE_60) {
id
url
__typename
}
__typename
}
metrics {
id
engagement {
id
videos(filter: {visibility: {eq: PUBLIC}}) {
edges {
node {
id
total
__typename
}
__typename
}
__typename
}
__typename
}
__typename
}
__typename
}
fragment HASHTAG_BASE_FRAG on Hashtag {
id
xid
name
metrics {
id
engagement {
id
videos {
edges {
node {
id
total
__typename
}
__typename
}
__typename
}
__typename
}
__typename
}
__typename
}
fragment LIVE_BASE_FRAGMENT on Live {
id
xid
title
audienceCount
aspectRatio
isOnAir
thumbnail(height: PORTRAIT_240) {
id
url
__typename
}
creator {
id
xid
name
displayName
accountType
avatar(height: SQUARE_60) {
id
url
__typename
}
__typename
}
__typename
}
query SEARCH_QUERY(
$query: String!,
$shouldIncludeTopResults: Boolean!,
$shouldIncludeVideos: Boolean!,
$shouldIncludeChannels: Boolean!,
$shouldIncludePlaylists: Boolean!,
$shouldIncludeHashtags: Boolean!,
$shouldIncludeLives: Boolean!,
$page: Int,
$limit: Int,
$sortByVideos: SearchVideoSort,
$durationMinVideos: Int,
$durationMaxVideos: Int,
$createdAfterVideos: DateTime,
$recaptchaToken: String
) {
search(token: $recaptchaToken) {
id
stories(query: $query, first: $limit, page: $page) @include(if: $shouldIncludeTopResults) {
metadata {
id
algorithm {
uuid
__typename
}
__typename
}
pageInfo {
hasNextPage
nextPage
__typename
}
edges {
node {
...VIDEO_BASE_FRAGMENT
...CHANNEL_BASE_FRAG
...PLAYLIST_BASE_FRAG
...HASHTAG_BASE_FRAG
...LIVE_BASE_FRAGMENT
__typename
}
__typename
}
__typename
}
videos(
query: $query,
first: $limit,
page: $page,
sort: $sortByVideos,
durationMin: $durationMinVideos,
durationMax: $durationMaxVideos,
createdAfter: $createdAfterVideos
) @include(if: $shouldIncludeVideos) {
metadata {
id
algorithm {
uuid
__typename
}
__typename
}
pageInfo {
hasNextPage
nextPage
__typename
}
edges {
node {
id
...VIDEO_BASE_FRAGMENT
__typename
}
__typename
}
__typename
}
lives(query: $query, first: $limit, page: $page) @include(if: $shouldIncludeLives) {
metadata {
id
algorithm {
uuid
__typename
}
__typename
}
pageInfo {
hasNextPage
nextPage
__typename
}
edges {
node {
id
...LIVE_BASE_FRAGMENT
__typename
}
__typename
}
__typename
}
channels(query: $query, first: $limit, page: $page) @include(if: $shouldIncludeChannels) {
metadata {
id
algorithm {
uuid
__typename
}
__typename
}
pageInfo {
hasNextPage
nextPage
__typename
}
edges {
node {
id
...CHANNEL_BASE_FRAG
__typename
}
__typename
}
__typename
}
playlists: collections(query: $query, first: $limit, page: $page) @include(if: $shouldIncludePlaylists) {
metadata {
id
algorithm {
uuid
__typename
}
__typename
}
pageInfo {
hasNextPage
nextPage
__typename
}
edges {
node {
id
...PLAYLIST_BASE_FRAG
__typename
}
__typename
}
__typename
}
hashtags(query: $query, first: $limit, page: $page) @include(if: $shouldIncludeHashtags) {
metadata {
id
algorithm {
uuid
__typename
}
__typename
}
pageInfo {
hasNextPage
nextPage
__typename
}
edges {
node {
id
...HASHTAG_BASE_FRAG
__typename
}
__typename
}
__typename
}
__typename
}
}
"""
}
payload = json.dumps(data).encode()
response = requests.post('https://graphql.api.dailymotion.com/', headers=headers, data=payload,
proxies=proxies)
data = response.json()
edges = data['data']['search']['stories']['edges']
for i, edge in enumerate(edges):
print(i, edge['node']['xid'])