649 lines
26 KiB
Python
649 lines
26 KiB
Python
import json
|
|
import random
|
|
import time
|
|
import uuid
|
|
|
|
import pandas as pd
|
|
import requests
|
|
import os
|
|
|
|
from requests import RequestException
|
|
|
|
BASE_DIR = os.path.dirname(os.path.abspath(__file__))
|
|
kw_path = os.path.join(BASE_DIR, 'data', 'keyword1.xlsx')
|
|
headers1 = {
|
|
'Accept': '*/*, */*',
|
|
# 'Accept-Encoding': 'gzip, deflate, br',
|
|
'Cache-Control': 'no-cache',
|
|
'Connection': 'keep-alive',
|
|
# 'Content-Length': '6237',
|
|
'Content-Type': 'application/json, application/json',
|
|
'Host': 'graphql.api.dailymotion.com',
|
|
'Origin': 'https://www.dailymotion.com',
|
|
'Referer': 'https://www.dailymotion.com/',
|
|
'Sec-Fetch-Dest': 'empty',
|
|
'Sec-Fetch-Mode': 'cors',
|
|
'Sec-Fetch-Site': 'same-site',
|
|
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/135.0.0.0 Safari/537.36',
|
|
'X-DM-AppInfo-Id': 'com.dailymotion.neon',
|
|
'X-DM-AppInfo-Type': 'website',
|
|
'X-DM-AppInfo-Version': 'v2025-04-28T12:37:52.391Z',
|
|
'X-DM-Neon-SSR': '0',
|
|
'X-DM-Preferred-Country': 'us',
|
|
'accept-language': 'zh-CN',
|
|
'authorization': 'Bearer eyJ0eXAiOiJKV1QiLCJhbGciOiJIUzI1NiJ9.eyJhaWQiOiJmMWEzNjJkMjg4YzFiOTgwOTljNyIsInJvbCI6ImNhbi1tYW5hZ2UtcGFydG5lcnMtcmVwb3J0cyBjYW4tcmVhZC12aWRlby1zdHJlYW1zIGNhbi1zcG9vZi1jb3VudHJ5IGNhbi1hZG9wdC11c2VycyBjYW4tcmVhZC1jbGFpbS1ydWxlcyBjYW4tbWFuYWdlLWNsYWltLXJ1bGVzIGNhbi1tYW5hZ2UtdXNlci1hbmFseXRpY3MgY2FuLXJlYWQtbXktdmlkZW8tc3RyZWFtcyBjYW4tZG93bmxvYWQtbXktdmlkZW9zIGFjdC1hcyBhbGxzY29wZXMgYWNjb3VudC1jcmVhdG9yIGNhbi1yZWFkLWFwcGxpY2F0aW9ucyIsInNjbyI6InJlYWQgd3JpdGUgZGVsZXRlIGVtYWlsIHVzZXJpbmZvIGZlZWQgbWFuYWdlX3ZpZGVvcyBtYW5hZ2VfY29tbWVudHMgbWFuYWdlX3BsYXlsaXN0cyBtYW5hZ2VfdGlsZXMgbWFuYWdlX3N1YnNjcmlwdGlvbnMgbWFuYWdlX2ZyaWVuZHMgbWFuYWdlX2Zhdm9yaXRlcyBtYW5hZ2VfbGlrZXMgbWFuYWdlX2dyb3VwcyBtYW5hZ2VfcmVjb3JkcyBtYW5hZ2Vfc3VidGl0bGVzIG1hbmFnZV9mZWF0dXJlcyBtYW5hZ2VfaGlzdG9yeSBpZnR0dCByZWFkX2luc2lnaHRzIG1hbmFnZV9jbGFpbV9ydWxlcyBkZWxlZ2F0ZV9hY2NvdW50X21hbmFnZW1lbnQgbWFuYWdlX2FuYWx5dGljcyBtYW5hZ2VfcGxheWVyIG1hbmFnZV9wbGF5ZXJzIG1hbmFnZV91c2VyX3NldHRpbmdzIG1hbmFnZV9jb2xsZWN0aW9ucyBtYW5hZ2VfYXBwX2Nvbm5lY3Rpb25zIG1hbmFnZV9hcHBsaWNhdGlvbnMgbWFuYWdlX2RvbWFpbnMgbWFuYWdlX3BvZGNhc3RzIiwibHRvIjoiZVdGV1JTSkdXRVZjVGg0eEYyRWpWblFlTHdrdUhTVjVPMGdrWGciLCJhaW4iOjEsImFkZyI6MSwiaWF0IjoxNzQ2MjU3NzI1LCJleHAiOjE3NDYyOTM1NjgsImRtdiI6IjEiLCJhdHAiOiJicm93c2VyIiwiYWRhIjoid3d3LmRhaWx5bW90aW9uLmNvbSIsInZpZCI6IjY0NjMzRDAzMDY1RjQxODZBRDBCMDI3Q0Y3OTVFRjBGIiwiZnRzIjo5MTE0MSwiY2FkIjoyLCJjeHAiOjIsImNhdSI6Miwia2lkIjoiQUY4NDlERDczQTU4NjNDRDdEOTdEMEJBQjA3MjI0M0IifQ.bMzShOLIb6datC92qGPTRVCW9eINTYDFwLtqed2P1d4',
|
|
'sec-ch-ua': '"Google Chrome";v="135", "Not-A.Brand";v="8", "Chromium";v="135"',
|
|
'sec-ch-ua-mobile': '?0',
|
|
'sec-ch-ua-platform': '"Windows"',
|
|
'x-dm-visit-id': '1745971699160',
|
|
'x-dm-visitor-id': '64633D03065F4186AD0B027CF795EF0F',
|
|
}
|
|
# proxies = None
|
|
|
|
proxies = {
|
|
"http": 'http://127.0.0.1:7890',
|
|
"https": 'http://127.0.0.1:7890',
|
|
}
|
|
|
|
def post_with_retry(url, json_payload=None, data=None, headers=None, proxies=None,
|
|
retries=3, timeout=10, backoff_factor=1):
|
|
"""
|
|
向指定 URL 发起 POST 请求,遇到网络错误时最多重试 `retries` 次。
|
|
|
|
:param url: 请求地址
|
|
:param json_payload: 要发送的 JSON 体
|
|
:param headers: 可选的请求头 dict
|
|
:param proxies: 可选的代理 dict
|
|
:param retries: 重试次数
|
|
:param timeout: 单次请求超时(秒)
|
|
:param backoff_factor: 重试间隔基数(会指数级增长)
|
|
:return: requests.Response 对象
|
|
:raises: 最后一次仍失败时抛出最后的异常
|
|
"""
|
|
attempt = 0
|
|
|
|
|
|
while attempt < retries:
|
|
try:
|
|
if json_payload is not None:
|
|
response = requests.post(
|
|
url, json=json_payload, headers=headers, proxies=proxies, timeout=timeout
|
|
)
|
|
else:
|
|
response = requests.post(
|
|
url, data=data, headers=headers, proxies=proxies, timeout=timeout
|
|
)
|
|
response.raise_for_status()
|
|
return response
|
|
except RequestException as e:
|
|
time.sleep(100)
|
|
attempt += 1
|
|
print(f"[{attempt}/{retries}] 请求失败: {e}")
|
|
if attempt == retries:
|
|
print("已达最大重试次数,抛出异常。")
|
|
raise
|
|
sleep_time = backoff_factor * (2 ** (attempt - 1))
|
|
print(f"等待 {sleep_time} 秒后重试…")
|
|
time.sleep(sleep_time)
|
|
|
|
|
|
def red_keyword_info():
|
|
df = pd.read_excel(kw_path, sheet_name=0)
|
|
records = df.to_dict(orient='records')
|
|
print(f"共 {len(records)} 行数据:")
|
|
return records
|
|
|
|
|
|
def gettoken():
|
|
headers = {
|
|
'Accept': '*/*',
|
|
'Accept-Language': 'zh-CN,zh;q=0.9',
|
|
'Cache-Control': 'no-cache',
|
|
'Connection': 'keep-alive',
|
|
'Content-Type': 'application/x-www-form-urlencoded',
|
|
'Origin': 'https://www.dailymotion.com',
|
|
'Pragma': 'no-cache',
|
|
'Referer': 'https://www.dailymotion.com/',
|
|
'Sec-Fetch-Dest': 'empty',
|
|
'Sec-Fetch-Mode': 'cors',
|
|
'Sec-Fetch-Site': 'same-site',
|
|
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/135.0.0.0 Safari/537.36',
|
|
'sec-ch-ua': '"Google Chrome";v="135", "Not-A.Brand";v="8", "Chromium";v="135"',
|
|
'sec-ch-ua-mobile': '?0',
|
|
'sec-ch-ua-platform': '"Windows"',
|
|
}
|
|
u = uuid.uuid4()
|
|
uuid_with_dash = str(u)
|
|
uuid_no_dash = u.hex
|
|
data = {
|
|
'client_id': 'f1a362d288c1b98099c7',
|
|
'client_secret': 'eea605b96e01c796ff369935357eca920c5da4c5',
|
|
'grant_type': 'client_credentials',
|
|
'traffic_segment': '567786',
|
|
'visitor_id': uuid_with_dash,
|
|
}
|
|
url = 'https://graphql.api.dailymotion.com/oauth/token'
|
|
response = post_with_retry(url, headers=headers, data=data, proxies=proxies)
|
|
token = response.json()['access_token']
|
|
headers1['authorization'] = "Bearer " + token
|
|
headers1['x-dm-visit-id'] = str(int(time.time() * 1000))
|
|
headers1['x-dm-visitor-id'] = uuid_no_dash
|
|
|
|
|
|
def get_searchInfo(keyword):
|
|
video_list = []
|
|
user_list = []
|
|
for j in range(1, 3):
|
|
# 别展开 = = !
|
|
data = {
|
|
"operationName": "SEARCH_QUERY",
|
|
"variables": {
|
|
"query": keyword,
|
|
"shouldIncludeTopResults": True,
|
|
"shouldIncludeChannels": False,
|
|
"shouldIncludePlaylists": False,
|
|
"shouldIncludeHashtags": False,
|
|
"shouldIncludeVideos": False,
|
|
"shouldIncludeLives": False,
|
|
"page": j,
|
|
"limit": 100,
|
|
"recaptchaToken": None
|
|
},
|
|
"query": """
|
|
fragment VIDEO_BASE_FRAGMENT on Video {
|
|
id
|
|
xid
|
|
title
|
|
createdAt
|
|
duration
|
|
aspectRatio
|
|
thumbnail(height: PORTRAIT_240) {
|
|
id
|
|
url
|
|
__typename
|
|
}
|
|
creator {
|
|
id
|
|
xid
|
|
name
|
|
displayName
|
|
accountType
|
|
avatar(height: SQUARE_60) {
|
|
id
|
|
url
|
|
__typename
|
|
}
|
|
__typename
|
|
}
|
|
__typename
|
|
}
|
|
|
|
fragment CHANNEL_BASE_FRAG on Channel {
|
|
id
|
|
xid
|
|
name
|
|
displayName
|
|
accountType
|
|
isFollowed
|
|
avatar(height: SQUARE_120) {
|
|
id
|
|
url
|
|
__typename
|
|
}
|
|
followerEngagement {
|
|
id
|
|
followDate
|
|
__typename
|
|
}
|
|
metrics {
|
|
id
|
|
engagement {
|
|
id
|
|
followers {
|
|
edges {
|
|
node {
|
|
id
|
|
total
|
|
__typename
|
|
}
|
|
__typename
|
|
}
|
|
__typename
|
|
}
|
|
__typename
|
|
}
|
|
__typename
|
|
}
|
|
__typename
|
|
}
|
|
|
|
fragment PLAYLIST_BASE_FRAG on Collection {
|
|
id
|
|
xid
|
|
name
|
|
description
|
|
thumbnail(height: PORTRAIT_240) {
|
|
id
|
|
url
|
|
__typename
|
|
}
|
|
creator {
|
|
id
|
|
xid
|
|
name
|
|
displayName
|
|
accountType
|
|
avatar(height: SQUARE_60) {
|
|
id
|
|
url
|
|
__typename
|
|
}
|
|
__typename
|
|
}
|
|
metrics {
|
|
id
|
|
engagement {
|
|
id
|
|
videos(filter: {visibility: {eq: PUBLIC}}) {
|
|
edges {
|
|
node {
|
|
id
|
|
total
|
|
__typename
|
|
}
|
|
__typename
|
|
}
|
|
__typename
|
|
}
|
|
__typename
|
|
}
|
|
__typename
|
|
}
|
|
__typename
|
|
}
|
|
|
|
fragment HASHTAG_BASE_FRAG on Hashtag {
|
|
id
|
|
xid
|
|
name
|
|
metrics {
|
|
id
|
|
engagement {
|
|
id
|
|
videos {
|
|
edges {
|
|
node {
|
|
id
|
|
total
|
|
__typename
|
|
}
|
|
__typename
|
|
}
|
|
__typename
|
|
}
|
|
__typename
|
|
}
|
|
__typename
|
|
}
|
|
__typename
|
|
}
|
|
|
|
fragment LIVE_BASE_FRAGMENT on Live {
|
|
id
|
|
xid
|
|
title
|
|
audienceCount
|
|
aspectRatio
|
|
isOnAir
|
|
thumbnail(height: PORTRAIT_240) {
|
|
id
|
|
url
|
|
__typename
|
|
}
|
|
creator {
|
|
id
|
|
xid
|
|
name
|
|
displayName
|
|
accountType
|
|
avatar(height: SQUARE_60) {
|
|
id
|
|
url
|
|
__typename
|
|
}
|
|
__typename
|
|
}
|
|
__typename
|
|
}
|
|
|
|
query SEARCH_QUERY($query: String!, $shouldIncludeTopResults: Boolean!, $shouldIncludeVideos: Boolean!, $shouldIncludeChannels: Boolean!, $shouldIncludePlaylists: Boolean!, $shouldIncludeHashtags: Boolean!, $shouldIncludeLives: Boolean!, $page: Int, $limit: Int, $sortByVideos: SearchVideoSort, $durationMinVideos: Int, $durationMaxVideos: Int, $createdAfterVideos: DateTime, $recaptchaToken: String) {
|
|
search(token: $recaptchaToken) {
|
|
id
|
|
stories(query: $query, first: $limit, page: $page) @include(if: $shouldIncludeTopResults) {
|
|
metadata {
|
|
id
|
|
algorithm {
|
|
uuid
|
|
__typename
|
|
}
|
|
__typename
|
|
}
|
|
pageInfo {
|
|
hasNextPage
|
|
nextPage
|
|
__typename
|
|
}
|
|
edges {
|
|
node {
|
|
...VIDEO_BASE_FRAGMENT
|
|
...CHANNEL_BASE_FRAG
|
|
...PLAYLIST_BASE_FRAG
|
|
...HASHTAG_BASE_FRAG
|
|
...LIVE_BASE_FRAGMENT
|
|
__typename
|
|
}
|
|
__typename
|
|
}
|
|
__typename
|
|
}
|
|
videos(
|
|
query: $query
|
|
first: $limit
|
|
page: $page
|
|
sort: $sortByVideos
|
|
durationMin: $durationMinVideos
|
|
durationMax: $durationMaxVideos
|
|
createdAfter: $createdAfterVideos
|
|
) @include(if: $shouldIncludeVideos) {
|
|
metadata {
|
|
id
|
|
algorithm {
|
|
uuid
|
|
__typename
|
|
}
|
|
__typename
|
|
}
|
|
pageInfo {
|
|
hasNextPage
|
|
nextPage
|
|
__typename
|
|
}
|
|
edges {
|
|
node {
|
|
id
|
|
...VIDEO_BASE_FRAGMENT
|
|
__typename
|
|
}
|
|
__typename
|
|
}
|
|
__typename
|
|
}
|
|
lives(query: $query, first: $limit, page: $page) @include(if: $shouldIncludeLives) {
|
|
metadata {
|
|
id
|
|
algorithm {
|
|
uuid
|
|
__typename
|
|
}
|
|
__typename
|
|
}
|
|
pageInfo {
|
|
hasNextPage
|
|
nextPage
|
|
__typename
|
|
}
|
|
edges {
|
|
node {
|
|
id
|
|
...LIVE_BASE_FRAGMENT
|
|
__typename
|
|
}
|
|
__typename
|
|
}
|
|
__typename
|
|
}
|
|
channels(query: $query, first: $limit, page: $page) @include(if: $shouldIncludeChannels) {
|
|
metadata {
|
|
id
|
|
algorithm {
|
|
uuid
|
|
__typename
|
|
}
|
|
__typename
|
|
}
|
|
pageInfo {
|
|
hasNextPage
|
|
nextPage
|
|
__typename
|
|
}
|
|
edges {
|
|
node {
|
|
id
|
|
...CHANNEL_BASE_FRAG
|
|
__typename
|
|
}
|
|
__typename
|
|
}
|
|
__typename
|
|
}
|
|
playlists: collections(query: $query, first: $limit, page: $page) @include(if: $shouldIncludePlaylists) {
|
|
metadata {
|
|
id
|
|
algorithm {
|
|
uuid
|
|
__typename
|
|
}
|
|
__typename
|
|
}
|
|
pageInfo {
|
|
hasNextPage
|
|
nextPage
|
|
__typename
|
|
}
|
|
edges {
|
|
node {
|
|
id
|
|
...PLAYLIST_BASE_FRAG
|
|
__typename
|
|
}
|
|
__typename
|
|
}
|
|
__typename
|
|
}
|
|
hashtags(query: $query, first: $limit, page: $page) @include(if: $shouldIncludeHashtags) {
|
|
metadata {
|
|
id
|
|
algorithm {
|
|
uuid
|
|
__typename
|
|
}
|
|
__typename
|
|
}
|
|
pageInfo {
|
|
hasNextPage
|
|
nextPage
|
|
__typename
|
|
}
|
|
edges {
|
|
node {
|
|
id
|
|
...HASHTAG_BASE_FRAG
|
|
__typename
|
|
}
|
|
__typename
|
|
}
|
|
__typename
|
|
}
|
|
__typename
|
|
}
|
|
}
|
|
"""
|
|
}
|
|
gettoken()
|
|
response = post_with_retry(
|
|
"https://graphql.api.dailymotion.com/",
|
|
json_payload=data,
|
|
headers=headers1,
|
|
proxies=proxies
|
|
)
|
|
|
|
jsondata = response.json()
|
|
try:
|
|
resinfo = jsondata['data']['search']['stories']['edges']
|
|
print('resinfo :', len(resinfo))
|
|
except Exception:
|
|
resinfo = []
|
|
ValueError("返回字段解析错误!")
|
|
for index, iteminfo in enumerate(resinfo):
|
|
calculated_index = index + 1 + (j - 1) * 100
|
|
print(calculated_index)
|
|
node = iteminfo['node']
|
|
__typename = node['__typename']
|
|
if __typename == "Video":
|
|
xid = node.get('xid')
|
|
v_info = get_videoInfo(xid)
|
|
time.sleep(3)
|
|
video_list.append({
|
|
"index": calculated_index,
|
|
"id": node.get('id'),
|
|
"xid": xid,
|
|
"link": "https://www.dailymotion.com/video/" + xid,
|
|
"title": node.get('title'),
|
|
"createtime": node.get('createdAt'),
|
|
"duration": node.get('duration'),
|
|
"pic": node.get('thumbnail').get('url'),
|
|
"view": v_info['view'],
|
|
"fans": v_info['fans'],
|
|
"videos": v_info['videos']
|
|
})
|
|
elif __typename == "Channel":
|
|
user_list.append({
|
|
'index': calculated_index,
|
|
'id': node['id'],
|
|
'xid': node['xid'],
|
|
'name': node['name'],
|
|
'upic': node['avatar']['url']
|
|
})
|
|
else:
|
|
continue
|
|
|
|
time.sleep(15)
|
|
return video_list, user_list
|
|
|
|
|
|
def get_videoInfo(x_id, r=3):
|
|
payload = {
|
|
"operationName": "WATCHING_VIDEO",
|
|
"variables": {
|
|
"xid": x_id,
|
|
"isSEO": False
|
|
},
|
|
"query": "fragment VIDEO_FRAGMENT on Video {\n id\n xid\n isPublished\n duration\n title\n description\n thumbnailx60: thumbnailURL(size: \"x60\")\n thumbnailx120: thumbnailURL(size: \"x120\")\n thumbnailx240: thumbnailURL(size: \"x240\")\n thumbnailx360: thumbnailURL(size: \"x360\")\n thumbnailx480: thumbnailURL(size: \"x480\")\n thumbnailx720: thumbnailURL(size: \"x720\")\n thumbnailx1080: thumbnailURL(size: \"x1080\")\n aspectRatio\n category\n categories(filter: {category: {eq: CONTENT_CATEGORY}}) {\n edges {\n node { id name slug __typename }\n __typename\n }\n __typename\n }\n iab_categories: categories(\n filter: {category: {eq: IAB_CATEGORY}, percentage: {gte: 70}}\n ) {\n edges {\n node { id slug __typename }\n __typename\n }\n __typename\n }\n bestAvailableQuality\n createdAt\n viewerEngagement {\n id\n liked\n favorited\n __typename\n }\n isPrivate\n isWatched\n isCreatedForKids\n isExplicit\n canDisplayAds\n videoWidth: width\n videoHeight: height\n status\n hashtags {\n edges {\n node { id name __typename }\n __typename\n }\n __typename\n }\n stats {\n id\n views { id total __typename }\n __typename\n }\n channel {\n __typename\n id\n xid\n name\n displayName\n isArtist\n logoURLx25: logoURL(size: \"x25\")\n logoURL(size: \"x60\")\n isFollowed\n accountType\n coverURLx375: coverURL(size: \"x375\")\n stats {\n id\n views { id total __typename }\n followers { id total __typename }\n videos { id total __typename }\n __typename\n }\n country { id codeAlpha2 __typename }\n organization @skip(if: $isSEO) {\n id\n xid\n owner { id xid __typename }\n __typename\n }\n }\n language { id codeAlpha2 __typename }\n tags {\n edges {\n node { id label __typename }\n __typename\n }\n __typename\n }\n moderation { id reviewedAt __typename }\n topics(whitelistedOnly: true, first: 3, page: 1) {\n edges {\n node {\n id\n xid\n name\n names {\n edges {\n node {\n id\n name\n language { id codeAlpha2 __typename }\n __typename\n }\n __typename\n }\n __typename\n }\n __typename\n }\n __typename\n }\n __typename\n }\n geoblockedCountries {\n id\n allowed\n denied\n __typename\n }\n transcript {\n edges {\n node { id timecode text __typename }\n __typename\n }\n __typename\n }\n __typename\n}\n\nfragment LIVE_FRAGMENT on Live {\n id\n xid\n startAt\n endAt\n isPublished\n title\n description\n thumbnailx60: thumbnailURL(size: \"x60\")\n thumbnailx120: thumbnailURL(size: \"x120\")\n thumbnailx240: thumbnailURL(size: \"x240\")\n thumbnailx360: thumbnailURL(size: \"x360\")\n thumbnailx480: thumbnailURL(size: \"x480\")\n thumbnailx720: thumbnailURL(size: \"x720\")\n thumbnailx1080: thumbnailURL(size: \"x1080\")\n aspectRatio\n category\n createdAt\n viewerEngagement { id liked favorited __typename }\n isPrivate\n isExplicit\n isCreatedForKids\n bestAvailableQuality\n canDisplayAds\n videoWidth: width\n videoHeight: height\n stats { id views { id total __typename } __typename }\n channel {\n __typename\n id\n xid\n name\n displayName\n isArtist\n logoURLx25: logoURL(size: \"x25\")\n logoURL(size: \"x60\")\n isFollowed\n accountType\n coverURLx375: coverURL(size: \"x375\")\n stats { id views { id total __typename } followers { id total __typename } videos { id total __typename } __typename }\n country { id codeAlpha2 __typename }\n organization @skip(if: $isSEO) { id xid owner { id xid __typename } __typename }\n }\n language { id codeAlpha2 __typename }\n tags { edges { node { id label __typename } __typename } __typename }\n moderation { id reviewedAt __typename }\n topics(whitelistedOnly: true, first: 3, page: 1) {\n edges { node { id xid name names { edges { node { id name language { id codeAlpha2 __typename } __typename } __typename } __typename } __typename } __typename }\n __typename\n }\n geoblockedCountries { id allowed denied __typename }\n __typename\n}\n\nquery WATCHING_VIDEO($xid: String!, $isSEO: Boolean!) {\n video: media(xid: $xid) {\n __typename\n ... on Video { id ...VIDEO_FRAGMENT __typename }\n ... on Live { id ...LIVE_FRAGMENT __typename }\n }\n}"
|
|
}
|
|
url = 'https://graphql.api.dailymotion.com/'
|
|
|
|
response = post_with_retry(
|
|
url,
|
|
json_payload=payload,
|
|
headers=headers1,
|
|
proxies=proxies,
|
|
)
|
|
jsondata = response.json()
|
|
try:
|
|
v_info = jsondata['data']['video']['channel']['stats']
|
|
except Exception:
|
|
if r > 0:
|
|
return get_videoInfo(x_id=x_id, r=r - 1)
|
|
else:
|
|
return {
|
|
"view": '-',
|
|
"fans": '-',
|
|
"videos": '-',
|
|
}
|
|
return {
|
|
"view": v_info['views']['total'],
|
|
"fans": v_info['followers']['total'],
|
|
"videos": v_info['videos']['total'],
|
|
}
|
|
|
|
|
|
def integrate_data():
|
|
keyword_list = red_keyword_info()
|
|
for key_word_item in keyword_list:
|
|
gettoken()
|
|
Video_PD_DATA = {
|
|
"片名": [],
|
|
"搜索词": [],
|
|
"ID": [],
|
|
"xid": [],
|
|
"连接地址": [],
|
|
"标题": [],
|
|
"时长 (秒)": [],
|
|
"关注数": [],
|
|
"视频数": [],
|
|
"浏览数": [],
|
|
"添加时间": [],
|
|
"封面图片": [],
|
|
"Index": [],
|
|
}
|
|
User_PD_DATA = {
|
|
"片名": [],
|
|
"搜索词": [],
|
|
"ID": [],
|
|
"xid": [],
|
|
"名称": [],
|
|
"头像": [],
|
|
"Index": [],
|
|
}
|
|
film_name = key_word_item['片名']
|
|
key_word = key_word_item['搜索词']
|
|
print(key_word)
|
|
v_list, u_list = get_searchInfo(key_word)
|
|
if len(v_list) < 1 and len(u_list) < 1:
|
|
i = 0
|
|
while i < 3:
|
|
time.sleep(i * 5)
|
|
v_list, u_list = get_searchInfo(key_word)
|
|
if len(v_list) > 1 or len(u_list) > 1:
|
|
print(len(v_list), len(u_list))
|
|
break
|
|
i += 1
|
|
time.sleep(2)
|
|
for item in v_list:
|
|
Video_PD_DATA['片名'].append(film_name)
|
|
Video_PD_DATA['搜索词'].append(key_word)
|
|
Video_PD_DATA['ID'].append(item.get('id'))
|
|
Video_PD_DATA['xid'].append(item.get('xid'))
|
|
Video_PD_DATA['连接地址'].append(item.get('link'))
|
|
Video_PD_DATA['标题'].append(item.get('title'))
|
|
Video_PD_DATA['时长 (秒)'].append(item.get('duration'))
|
|
Video_PD_DATA['关注数'].append(item.get('fans'))
|
|
Video_PD_DATA['视频数'].append(item.get('videos'))
|
|
Video_PD_DATA['浏览数'].append(item.get('view'))
|
|
Video_PD_DATA['添加时间'].append(item.get('createtime'))
|
|
Video_PD_DATA['封面图片'].append(item.get('pic'))
|
|
Video_PD_DATA['Index'].append(item.get('index'))
|
|
for item in u_list:
|
|
User_PD_DATA['片名'].append(film_name)
|
|
User_PD_DATA['搜索词'].append(key_word)
|
|
User_PD_DATA['ID'].append(item.get('id'))
|
|
User_PD_DATA['xid'].append(item.get('xid'))
|
|
User_PD_DATA['名称'].append(item.get('name'))
|
|
User_PD_DATA['头像'].append(item.get('upic'))
|
|
User_PD_DATA['Index'].append(item.get('index'))
|
|
|
|
df_vido = pd.DataFrame(Video_PD_DATA)
|
|
df_user = pd.DataFrame(User_PD_DATA)
|
|
|
|
output_path = "out_put_CNTW/{}_{}.xlsx".format(film_name, key_word)
|
|
|
|
with pd.ExcelWriter(output_path, engine="openpyxl") as w:
|
|
df_vido.to_excel(w, sheet_name="视频信息", index=False)
|
|
df_user.to_excel(w, sheet_name="用户信息", index=False)
|
|
|
|
|
|
if __name__ == '__main__':
|
|
# gettoken()
|
|
integrate_data()
|
|
# print(get_searchInfo('Running Man'))
|