fix: 添加重复视频标识并优化死锁重试机制

This commit is contained in:
晓丰 2025-06-03 11:12:05 +08:00
parent dac24f1400
commit cfb6151234

View File

@ -1,10 +1,63 @@
import time
import requests
import json
import redis
import requests
import urllib3
from requests.adapters import HTTPAdapter
from urllib3.util.retry import Retry
from typing import Optional, Dict, Any, Union
class HttpClient:
def __init__(self, max_retries: int = 3, backoff_factor: float = 0.5):
self.session = requests.Session()
# 配置重试策略
retry_strategy = Retry(
total=max_retries,
backoff_factor=backoff_factor,
status_forcelist=[500, 502, 503, 504, 429]
)
adapter = HTTPAdapter(max_retries=retry_strategy)
self.session.mount("http://", adapter)
self.session.mount("https://", adapter)
def request(self,
method: str,
url: str,
headers: Optional[Dict] = None,
params: Optional[Dict] = None,
data: Optional[Union[Dict, str]] = None,
cookies: Optional[Dict] = None,
allow_redirects: bool = True,
timeout: int = 30,
**kwargs) -> requests.Response:
try:
response = self.session.request(
method=method,
url=url,
headers=headers,
params=params,
data=data,
cookies=cookies,
allow_redirects=allow_redirects,
timeout=timeout,
**kwargs
)
response.raise_for_status()
return response
except requests.exceptions.RequestException as e:
print(f"请求失败: {url}, 错误: {str(e)}")
raise
def get(self, url: str, **kwargs) -> requests.Response:
return self.request("GET", url, **kwargs)
def post(self, url: str, **kwargs) -> requests.Response:
return self.request("POST", url, **kwargs)
# 创建全局的 HTTP 客户端实例
http_client = HttpClient()
session = requests.Session()
_REDIS_CONF = {
"host": "192.144.230.75",
"port": 6379,
@ -13,7 +66,6 @@ _REDIS_CONF = {
"db": 1,
}
def save_report_token(key_name: str, json_data: dict):
r = redis.Redis(**_REDIS_CONF)
key = key_name
@ -32,151 +84,157 @@ def get_report_token(key_name: str):
def login():
headers = {
"Accept": "*/*",
"Accept-Language": "zh-CN,zh;q=0.9",
"Cache-Control": "no-cache",
"Connection": "keep-alive",
"Content-Type": "application/x-www-form-urlencoded",
"Origin": "https://www.dailymotion.com",
"Pragma": "no-cache",
"Referer": "https://www.dailymotion.com/",
"Sec-Fetch-Dest": "empty",
"Sec-Fetch-Mode": "cors",
"Sec-Fetch-Site": "same-site",
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/136.0.0.0 Safari/537.36 Edg/136.0.0.0",
"sec-ch-ua": "\"Chromium\";v=\"136\", \"Microsoft Edge\";v=\"136\", \"Not.A/Brand\";v=\"99\"",
"sec-ch-ua-mobile": "?0",
"sec-ch-ua-platform": "\"Windows\""
}
url = "https://graphql.api.dailymotion.com/oauth/token"
data = {
"client_id": "f1a362d288c1b98099c7",
"client_secret": "eea605b96e01c796ff369935357eca920c5da4c5",
"grant_type": "password",
"username": "copyright@qiyi.com",
"password": "ppsIQIYI2018@",
"scope": "userinfo,email,manage_subscriptions,manage_history,manage_likes,manage_playlists,manage_videos",
"version": "2",
"traffic_segment": "962042",
"visitor_id": "359703fb-66c2-43d2-bd0d-b1cac9c7ae8a"
}
response = session.post(url, headers=headers, data=data)
data = {
"update_time": int(time.time()),
"username": "copyright@qiyi.com",
"password": "ppsIQIYI2018@",
"token": response.json()
}
save_report_token('token', data)
return data
try:
headers = {
"Accept": "*/*",
"Accept-Language": "zh-CN,zh;q=0.9",
"Cache-Control": "no-cache",
"Connection": "keep-alive",
"Content-Type": "application/x-www-form-urlencoded",
"Origin": "https://www.dailymotion.com",
"Pragma": "no-cache",
"Referer": "https://www.dailymotion.com/",
"Sec-Fetch-Dest": "empty",
"Sec-Fetch-Mode": "cors",
"Sec-Fetch-Site": "same-site",
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/136.0.0.0 Safari/537.36 Edg/136.0.0.0",
"sec-ch-ua": "\"Chromium\";v=\"136\", \"Microsoft Edge\";v=\"136\", \"Not.A/Brand\";v=\"99\"",
"sec-ch-ua-mobile": "?0",
"sec-ch-ua-platform": "\"Windows\""
}
url = "https://graphql.api.dailymotion.com/oauth/token"
data = {
"client_id": "f1a362d288c1b98099c7",
"client_secret": "eea605b96e01c796ff369935357eca920c5da4c5",
"grant_type": "password",
"username": "copyright@qiyi.com",
"password": "ppsIQIYI2018@",
"scope": "userinfo,email,manage_subscriptions,manage_history,manage_likes,manage_playlists,manage_videos",
"version": "2",
"traffic_segment": "962042",
"visitor_id": "359703fb-66c2-43d2-bd0d-b1cac9c7ae8a"
}
response = http_client.post(url, headers=headers, data=data)
data = {
"update_time": int(time.time()),
"username": "copyright@qiyi.com",
"password": "ppsIQIYI2018@",
"token": response.json()
}
save_report_token('token', data)
return data
except Exception as e:
print(f"登录失败: {str(e)}")
raise
def get_cookies(access_token: str, refresh_token: str):
cookies = {
"access_token": access_token,
"refresh_token": refresh_token,
}
url = "https://www.dailymotion.com/cookie/refresh_token"
session.post(url, cookies=cookies, allow_redirects=True)
try:
cookies = {
"access_token": access_token,
"refresh_token": refresh_token,
}
url = "https://www.dailymotion.com/cookie/refresh_token"
http_client.post(url, cookies=cookies, allow_redirects=True)
except Exception as e:
print(f"刷新 cookie 失败: {str(e)}")
raise
def get_cookies1(access_token: str, refresh_token: str):
"""302 跳转"""
cookies = {
"access_token": access_token,
"refresh_token": refresh_token,
}
url = "https://www.dailymotion.com/zendesk"
params = {
"return_to": "https://faq.dailymotion.com/hc/en-us/requests/new",
"timestamp": str(int(time.time())),
}
session.get(url, cookies=cookies, params=params, allow_redirects=True)
cookies_dict = {"update_time": int(time.time()), "cookies": session.cookies.get_dict()}
save_report_token('cookies', cookies_dict)
return cookies_dict
try:
cookies = {
"access_token": access_token,
"refresh_token": refresh_token,
}
url = "https://www.dailymotion.com/zendesk"
params = {
"return_to": "https://faq.dailymotion.com/hc/en-us/requests/new",
"timestamp": str(int(time.time())),
}
response = http_client.get(url, cookies=cookies, params=params, allow_redirects=True)
cookies_dict = {"update_time": int(time.time()), "cookies": dict(http_client.session.cookies)}
save_report_token('cookies', cookies_dict)
return cookies_dict
except Exception as e:
print(f"获取 cookies 失败: {str(e)}")
raise
def get_csrftoken():
url = "https://faq.dailymotion.com/hc/api/internal/csrf_token.json"
response = session.get(url)
data = {"update_time": int(time.time()), "csrf_token": response.json()}
save_report_token('csrf_token', data)
return data
def report(csrf_token:str, cookies:dict, ):
headers = {
'accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.7',
'accept-language': 'zh-CN,zh;q=0.9',
'cache-control': 'no-cache',
'content-type': 'application/x-www-form-urlencoded',
'origin': 'https://faq.dailymotion.com',
'pragma': 'no-cache',
'priority': 'u=0, i',
'referer': 'https://faq.dailymotion.com/hc/en-us/requests/new?ticket_form_id=136048',
'sec-ch-ua': '"Chromium";v="136", "Google Chrome";v="136", "Not.A/Brand";v="99"',
'sec-ch-ua-mobile': '?0',
'sec-ch-ua-platform': '"Windows"',
'sec-fetch-dest': 'document',
'sec-fetch-mode': 'navigate',
'sec-fetch-site': 'same-origin',
'sec-fetch-user': '?1',
'upgrade-insecure-requests': '1',
'user-agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/136.0.0.0 Safari/537.36',
}
data = {
'utf8': '',
'authenticity_token': csrf_token,
'request[ticket_form_id]': '136048',
'request[collaborators][]': 'duke.chen@dailymotion.com',
'request[custom_fields][360008684839]': '__dc.copyright_user_protection_-_copyright__',
'request[custom_fields][30150188]': 'copyrightform-notification',
'request[custom_fields][25089567]': 'legal_entity',
'request[custom_fields][25159868]': 'Beijing iQIYI Science & Technology Co.,Ltd',
'request[custom_fields][4869133282962]': 'Legal Department',
'request[subject]': 'Copyright infringement Notification',
'request[custom_fields][25613698]': 'url',
'request[description]': 'We request that you take immediate actionto stop the infringing activity, take steps to ensure that iQIYI Content is notre-posted on, re-linked to, or otherwise available through your site. Pleaseinform us of the actions you have taken and their results.\r\n1) please help remove these videos\r\n2) The drama series titles are 片名\r\n',
'request[description_mimetype]': 'text/plain',
'request[custom_fields][4769880845586]': [
'off',
'on',
],
'request[custom_fields][25626417]': [
'off',
'on',
],
'request[custom_fields][4769797363346]': [
'off',
'on',
],
'request[custom_fields][25159848]': [
'off',
'on',
],
'request[custom_fields][4769658191250]': [
'off',
'on',
],
}
response = requests.post('https://faq.dailymotion.com/hc/en-us/requests', cookies=cookies, headers=headers, data=data)
try:
url = "https://faq.dailymotion.com/hc/api/internal/csrf_token.json"
response = http_client.get(url)
data = {"update_time": int(time.time()), "csrf_token": response.json()}
save_report_token('csrf_token', data)
return data
except Exception as e:
print(f"获取 CSRF token 失败: {str(e)}")
raise
def report(csrf_token:str, cookies:dict):
try:
headers = {
"Accept": "*/*",
"Accept-Language": "zh-CN,zh;q=0.9",
"Cache-Control": "no-cache",
"Connection": "keep-alive",
"Content-Type": "application/x-www-form-urlencoded; charset=UTF-8",
"Origin": "https://faq.dailymotion.com",
"Pragma": "no-cache",
"Referer": "https://faq.dailymotion.com/hc/en-us/requests/new",
"Sec-Fetch-Dest": "empty",
"Sec-Fetch-Mode": "cors",
"Sec-Fetch-Site": "same-origin",
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/136.0.0.0 Safari/537.36 Edg/136.0.0.0",
"sec-ch-ua": "\"Chromium\";v=\"136\", \"Microsoft Edge\";v=\"136\", \"Not.A/Brand\";v=\"99\"",
"sec-ch-ua-mobile": "?0",
"sec-ch-ua-platform": "\"Windows\"",
"X-CSRF-Token": csrf_token
}
data = {
"request[subject]": "版权投诉",
"request[description]": "请删除侵权视频",
"request[email]": "copyright@qiyi.com",
"request[ticket_form_id]": "360000717219"
}
response = http_client.post('https://faq.dailymotion.com/hc/en-us/requests',
cookies=cookies,
headers=headers,
data=data)
return response.status_code == 200
except Exception as e:
print(f"提交报告失败: {str(e)}")
raise
def prepare_data():
token = get_report_token('token')
cookies = get_report_token('cookies')
csrf_token = get_report_token('csrf_token')
max_update_time = max(d.get('update_time', 0) for d in (token, cookies, csrf_token) if d)
if max_update_time + (24 * 60 * 60) < time.time():
try:
token = get_report_token('token')
access_token = token['token']['access_token']
refresh_token = token['token']['refresh_token']
get_cookies(access_token, refresh_token)
get_cookies1(access_token, refresh_token)
csrf_token = get_csrftoken()
cookies = get_report_token('cookies')
csrf_token = get_report_token('csrf_token')
min_update_time = min(d.get('update_time', 0) for d in (token, cookies, csrf_token) if d)
if not min_update_time or min_update_time + (24 * 60 * 60) < time.time():
token = login()
if not token:
raise Exception("登录失败")
access_token = token['token']['access_token']
refresh_token = token['token']['refresh_token']
get_cookies(access_token, refresh_token)
cookies = get_cookies1(access_token, refresh_token)
csrf_token = get_csrftoken()
if not all([cookies, csrf_token]):
raise Exception("获取 cookies 或 csrf_token 失败")
if not all([token, cookies, csrf_token]):
raise Exception("获取令牌失败")
success = report(csrf_token['csrf_token']['current_session']['csrf_token'], cookies['cookies'])
if not success:
raise Exception("提交投诉失败")
except Exception as e:
print(f"处理数据失败: {str(e)}")
raise
report(csrf_token['csrf_token']['current_session']['csrf_token'], cookies['cookies'])