feat: 优化 DB.py 和 dump_keyword_title.py,增强数据处理和日志记录功能
This commit is contained in:
parent
217d8c7ed7
commit
42b7b8ad79
203
flasl_app.py
Normal file
203
flasl_app.py
Normal file
@ -0,0 +1,203 @@
|
||||
import copy
|
||||
import time
|
||||
import uuid
|
||||
from random import random
|
||||
from threading import Lock
|
||||
|
||||
import requests
|
||||
from flask import Flask, request, jsonify
|
||||
from requests import RequestException
|
||||
from DB import DBVidcon
|
||||
|
||||
db = DBVidcon()
|
||||
app = Flask(__name__)
|
||||
headers1 = {
|
||||
'Accept': '*/*, */*',
|
||||
# 'Accept-Encoding': 'gzip, deflate, br',
|
||||
'Cache-Control': 'no-cache',
|
||||
'Connection': 'keep-alive',
|
||||
# 'Content-Length': '6237',
|
||||
'Content-Type': 'application/json, application/json',
|
||||
'Host': 'graphql.api.dailymotion.com',
|
||||
'Origin': 'https://www.dailymotion.com',
|
||||
'Referer': 'https://www.dailymotion.com/',
|
||||
'Sec-Fetch-Dest': 'empty',
|
||||
'Sec-Fetch-Mode': 'cors',
|
||||
'Sec-Fetch-Site': 'same-site',
|
||||
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/135.0.0.0 Safari/537.36',
|
||||
'X-DM-AppInfo-Id': 'com.dailymotion.neon',
|
||||
'X-DM-AppInfo-Type': 'website',
|
||||
'X-DM-AppInfo-Version': 'v2025-04-28T12:37:52.391Z',
|
||||
'X-DM-Neon-SSR': '0',
|
||||
'X-DM-Preferred-Country': 'us',
|
||||
'accept-language': 'zh-CN',
|
||||
'authorization': 'Bearer eyJ0eXAiOiJKV1QiLCJhbGciOiJIUzI1NiJ9.eyJhaWQiOiJmMWEzNjJkMjg4YzFiOTgwOTljNyIsInJvbCI6ImNhbi1tYW5hZ2UtcGFydG5lcnMtcmVwb3J0cyBjYW4tcmVhZC12aWRlby1zdHJlYW1zIGNhbi1zcG9vZi1jb3VudHJ5IGNhbi1hZG9wdC11c2VycyBjYW4tcmVhZC1jbGFpbS1ydWxlcyBjYW4tbWFuYWdlLWNsYWltLXJ1bGVzIGNhbi1tYW5hZ2UtdXNlci1hbmFseXRpY3MgY2FuLXJlYWQtbXktdmlkZW8tc3RyZWFtcyBjYW4tZG93bmxvYWQtbXktdmlkZW9zIGFjdC1hcyBhbGxzY29wZXMgYWNjb3VudC1jcmVhdG9yIGNhbi1yZWFkLWFwcGxpY2F0aW9ucyIsInNjbyI6InJlYWQgd3JpdGUgZGVsZXRlIGVtYWlsIHVzZXJpbmZvIGZlZWQgbWFuYWdlX3ZpZGVvcyBtYW5hZ2VfY29tbWVudHMgbWFuYWdlX3BsYXlsaXN0cyBtYW5hZ2VfdGlsZXMgbWFuYWdlX3N1YnNjcmlwdGlvbnMgbWFuYWdlX2ZyaWVuZHMgbWFuYWdlX2Zhdm9yaXRlcyBtYW5hZ2VfbGlrZXMgbWFuYWdlX2dyb3VwcyBtYW5hZ2VfcmVjb3JkcyBtYW5hZ2Vfc3VidGl0bGVzIG1hbmFnZV9mZWF0dXJlcyBtYW5hZ2VfaGlzdG9yeSBpZnR0dCByZWFkX2luc2lnaHRzIG1hbmFnZV9jbGFpbV9ydWxlcyBkZWxlZ2F0ZV9hY2NvdW50X21hbmFnZW1lbnQgbWFuYWdlX2FuYWx5dGljcyBtYW5hZ2VfcGxheWVyIG1hbmFnZV9wbGF5ZXJzIG1hbmFnZV91c2VyX3NldHRpbmdzIG1hbmFnZV9jb2xsZWN0aW9ucyBtYW5hZ2VfYXBwX2Nvbm5lY3Rpb25zIG1hbmFnZV9hcHBsaWNhdGlvbnMgbWFuYWdlX2RvbWFpbnMgbWFuYWdlX3BvZGNhc3RzIiwibHRvIjoiZVdGV1JTSkdXRVZjVGg0eEYyRWpWblFlTHdrdUhTVjVPMGdrWGciLCJhaW4iOjEsImFkZyI6MSwiaWF0IjoxNzQ2MjU3NzI1LCJleHAiOjE3NDYyOTM1NjgsImRtdiI6IjEiLCJhdHAiOiJicm93c2VyIiwiYWRhIjoid3d3LmRhaWx5bW90aW9uLmNvbSIsInZpZCI6IjY0NjMzRDAzMDY1RjQxODZBRDBCMDI3Q0Y3OTVFRjBGIiwiZnRzIjo5MTE0MSwiY2FkIjoyLCJjeHAiOjIsImNhdSI6Miwia2lkIjoiQUY4NDlERDczQTU4NjNDRDdEOTdEMEJBQjA3MjI0M0IifQ.bMzShOLIb6datC92qGPTRVCW9eINTYDFwLtqed2P1d4',
|
||||
'sec-ch-ua': '"Google Chrome";v="135", "Not-A.Brand";v="8", "Chromium";v="135"',
|
||||
'sec-ch-ua-mobile': '?0',
|
||||
'sec-ch-ua-platform': '"Windows"',
|
||||
'x-dm-visit-id': '1745971699160',
|
||||
'x-dm-visitor-id': '64633D03065F4186AD0B027CF795EF0F',
|
||||
}
|
||||
_headers_cache = None # 保存最近一次成功的 headers
|
||||
_cache_lock = Lock()
|
||||
|
||||
|
||||
def post_with_retry(url, proxy_name, json_payload=None, data=None, headers=None,
|
||||
retries=5, timeout=20, backoff_factor=2, verbose=True):
|
||||
token_refreshed = False
|
||||
for attempt in range(1, retries + 1):
|
||||
try:
|
||||
proxy_str = db.get_proxy(proxy_name)
|
||||
|
||||
proxies = {"http": proxy_str, "https": proxy_str}
|
||||
|
||||
resp = requests.post(
|
||||
url,
|
||||
json=json_payload,
|
||||
data=data,
|
||||
headers=headers,
|
||||
proxies=proxies,
|
||||
timeout=timeout
|
||||
)
|
||||
if resp.status_code == 401 and not token_refreshed:
|
||||
if verbose:
|
||||
print("[post_with_retry] 收到 401,刷新 token 后重试")
|
||||
gettoken(proxy_name)
|
||||
token_refreshed = True
|
||||
continue
|
||||
|
||||
resp.raise_for_status()
|
||||
return resp
|
||||
|
||||
except RequestException as e:
|
||||
if verbose:
|
||||
print(f"[{attempt}/{retries}] 请求失败: {e}")
|
||||
if not token_refreshed:
|
||||
if verbose:
|
||||
print("[post_with_retry] 刷新 token 后再试")
|
||||
token_refreshed = True
|
||||
continue
|
||||
if attempt == retries:
|
||||
if verbose:
|
||||
print(f"[post_with_retry] 最终失败:{url}")
|
||||
return None
|
||||
|
||||
sleep_time = backoff_factor * (2 ** (attempt - 1))
|
||||
if verbose:
|
||||
print(f"[post_with_retry] 等待 {sleep_time}s 后重试…")
|
||||
time.sleep(sleep_time)
|
||||
|
||||
|
||||
def gettoken(proxy, r=2):
|
||||
global _headers_cache
|
||||
headers = {
|
||||
'Accept': '*/*',
|
||||
'Accept-Language': 'zh-CN,zh;q=0.9,en;q=0.8,en-GB;q=0.7,en-US;q=0.6',
|
||||
'Cache-Control': 'no-cache',
|
||||
'Connection': 'keep-alive',
|
||||
'Content-Type': 'application/x-www-form-urlencoded',
|
||||
'Origin': 'https://www.dailymotion.com',
|
||||
'Pragma': 'no-cache',
|
||||
'Referer': 'https://www.dailymotion.com/',
|
||||
'Sec-Fetch-Dest': 'empty',
|
||||
'Sec-Fetch-Mode': 'cors',
|
||||
'Sec-Fetch-Site': 'same-site',
|
||||
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/136.0.0.0 Safari/537.36',
|
||||
'sec-ch-ua': '"Chromium";v="136", "Google Chrome";v="136", "Not.A/Brand";v="99"',
|
||||
'sec-ch-ua-mobile': '?0',
|
||||
'sec-ch-ua-platform': '"Windows"',
|
||||
}
|
||||
u = uuid.uuid4()
|
||||
uuid_with_dash = str(u)
|
||||
uuid_no_dash = u.hex
|
||||
traffic_segment = str(random.randint(100_000, 999_999))
|
||||
data = {
|
||||
'client_id': 'f1a362d288c1b98099c7',
|
||||
'client_secret': 'eea605b96e01c796ff369935357eca920c5da4c5',
|
||||
'grant_type': 'client_credentials',
|
||||
'traffic_segment': traffic_segment,
|
||||
'visitor_id': uuid_with_dash,
|
||||
}
|
||||
try:
|
||||
proxy_str = db.get_proxy(proxy)
|
||||
url = 'https://graphql.api.dailymotion.com/oauth/token'
|
||||
response = requests.post(url, headers=headers, data=data, proxies={"http": proxy_str, "https": proxy_str})
|
||||
token = response.json()['access_token']
|
||||
copy_headers = copy.deepcopy(headers1)
|
||||
copy_headers['authorization'] = "Bearer " + token
|
||||
copy_headers['x-dm-visit-id'] = str(int(time.time() * 1000))
|
||||
copy_headers['x-dm-visitor-id'] = uuid_no_dash
|
||||
with _cache_lock:
|
||||
_headers_cache = copy_headers
|
||||
return copy_headers
|
||||
except Exception as e:
|
||||
print("[gettoken] 失败:", e)
|
||||
if r > 0:
|
||||
time.sleep(5)
|
||||
return gettoken(proxy, r - 1)
|
||||
else:
|
||||
with _cache_lock:
|
||||
if _headers_cache:
|
||||
print("[gettoken] 用缓存 headers 兜底")
|
||||
return copy.deepcopy(_headers_cache)
|
||||
# 仍然没有 → 返回模板(没有 Auth)
|
||||
return copy.deepcopy(headers1)
|
||||
|
||||
|
||||
def get_videoInfo(x_id, proxy_name,headers, r=3):
|
||||
payload = {
|
||||
"operationName": "WATCHING_VIDEO",
|
||||
"variables": {
|
||||
"xid": x_id,
|
||||
"isSEO": False
|
||||
},
|
||||
"query": "fragment VIDEO_FRAGMENT on Video {\n id\n xid\n isPublished\n duration\n title\n description\n thumbnailx60: thumbnailURL(size: \"x60\")\n thumbnailx120: thumbnailURL(size: \"x120\")\n thumbnailx240: thumbnailURL(size: \"x240\")\n thumbnailx360: thumbnailURL(size: \"x360\")\n thumbnailx480: thumbnailURL(size: \"x480\")\n thumbnailx720: thumbnailURL(size: \"x720\")\n thumbnailx1080: thumbnailURL(size: \"x1080\")\n aspectRatio\n category\n categories(filter: {category: {eq: CONTENT_CATEGORY}}) {\n edges {\n node { id name slug __typename }\n __typename\n }\n __typename\n }\n iab_categories: categories(\n filter: {category: {eq: IAB_CATEGORY}, percentage: {gte: 70}}\n ) {\n edges {\n node { id slug __typename }\n __typename\n }\n __typename\n }\n bestAvailableQuality\n createdAt\n viewerEngagement {\n id\n liked\n favorited\n __typename\n }\n isPrivate\n isWatched\n isCreatedForKids\n isExplicit\n canDisplayAds\n videoWidth: width\n videoHeight: height\n status\n hashtags {\n edges {\n node { id name __typename }\n __typename\n }\n __typename\n }\n stats {\n id\n views { id total __typename }\n __typename\n }\n channel {\n __typename\n id\n xid\n name\n displayName\n isArtist\n logoURLx25: logoURL(size: \"x25\")\n logoURL(size: \"x60\")\n isFollowed\n accountType\n coverURLx375: coverURL(size: \"x375\")\n stats {\n id\n views { id total __typename }\n followers { id total __typename }\n videos { id total __typename }\n __typename\n }\n country { id codeAlpha2 __typename }\n organization @skip(if: $isSEO) {\n id\n xid\n owner { id xid __typename }\n __typename\n }\n }\n language { id codeAlpha2 __typename }\n tags {\n edges {\n node { id label __typename }\n __typename\n }\n __typename\n }\n moderation { id reviewedAt __typename }\n topics(whitelistedOnly: true, first: 3, page: 1) {\n edges {\n node {\n id\n xid\n name\n names {\n edges {\n node {\n id\n name\n language { id codeAlpha2 __typename }\n __typename\n }\n __typename\n }\n __typename\n }\n __typename\n }\n __typename\n }\n __typename\n }\n geoblockedCountries {\n id\n allowed\n denied\n __typename\n }\n transcript {\n edges {\n node { id timecode text __typename }\n __typename\n }\n __typename\n }\n __typename\n}\n\nfragment LIVE_FRAGMENT on Live {\n id\n xid\n startAt\n endAt\n isPublished\n title\n description\n thumbnailx60: thumbnailURL(size: \"x60\")\n thumbnailx120: thumbnailURL(size: \"x120\")\n thumbnailx240: thumbnailURL(size: \"x240\")\n thumbnailx360: thumbnailURL(size: \"x360\")\n thumbnailx480: thumbnailURL(size: \"x480\")\n thumbnailx720: thumbnailURL(size: \"x720\")\n thumbnailx1080: thumbnailURL(size: \"x1080\")\n aspectRatio\n category\n createdAt\n viewerEngagement { id liked favorited __typename }\n isPrivate\n isExplicit\n isCreatedForKids\n bestAvailableQuality\n canDisplayAds\n videoWidth: width\n videoHeight: height\n stats { id views { id total __typename } __typename }\n channel {\n __typename\n id\n xid\n name\n displayName\n isArtist\n logoURLx25: logoURL(size: \"x25\")\n logoURL(size: \"x60\")\n isFollowed\n accountType\n coverURLx375: coverURL(size: \"x375\")\n stats { id views { id total __typename } followers { id total __typename } videos { id total __typename } __typename }\n country { id codeAlpha2 __typename }\n organization @skip(if: $isSEO) { id xid owner { id xid __typename } __typename }\n }\n language { id codeAlpha2 __typename }\n tags { edges { node { id label __typename } __typename } __typename }\n moderation { id reviewedAt __typename }\n topics(whitelistedOnly: true, first: 3, page: 1) {\n edges { node { id xid name names { edges { node { id name language { id codeAlpha2 __typename } __typename } __typename } __typename } __typename } __typename }\n __typename\n }\n geoblockedCountries { id allowed denied __typename }\n __typename\n}\n\nquery WATCHING_VIDEO($xid: String!, $isSEO: Boolean!) {\n video: media(xid: $xid) {\n __typename\n ... on Video { id ...VIDEO_FRAGMENT __typename }\n ... on Live { id ...LIVE_FRAGMENT __typename }\n }\n}"
|
||||
}
|
||||
url = 'https://graphql.api.dailymotion.com/'
|
||||
|
||||
response = post_with_retry(
|
||||
url,
|
||||
json_payload=payload,
|
||||
headers=headers,
|
||||
proxy_name=proxy_name,
|
||||
)
|
||||
jsondata = response.json()
|
||||
try:
|
||||
v_info = jsondata['data']['video']['channel']['stats']
|
||||
except Exception:
|
||||
if r > 0:
|
||||
return get_videoInfo(x_id=x_id, r=r - 1)
|
||||
else:
|
||||
return {
|
||||
"view": '-',
|
||||
"fans": '-',
|
||||
"videos": '-',
|
||||
}
|
||||
return {
|
||||
"view": v_info['views']['total'],
|
||||
"fans": v_info['followers']['total'],
|
||||
"videos": v_info['videos']['total'],
|
||||
}
|
||||
|
||||
|
||||
@app.route('/submit', methods=['POST'])
|
||||
def submit():
|
||||
data = request.get_json(force=True)
|
||||
# if isinstance(data, dict):
|
||||
# items = [data]
|
||||
# elif isinstance(data, list):
|
||||
# items = data
|
||||
# else:
|
||||
# return jsonify({"error": "Unsupported JSON format"}), 400
|
||||
results = {}
|
||||
v_xid = data['v_xid']
|
||||
rn = data['rn']
|
||||
headers = gettoken(rn)
|
||||
results = get_videoInfo(v_xid, rn, headers)
|
||||
return jsonify({
|
||||
"results": results
|
||||
}), 200
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
app.run(host='0.0.0.0', port=5000, debug=True)
|
Loading…
x
Reference in New Issue
Block a user