feat: 更新报告功能以支持版权投诉并优化请求头

This commit is contained in:
晓丰 2025-06-03 23:48:16 +08:00
parent ada4bf468f
commit b3343e1034

View File

@ -3,10 +3,12 @@ import json
import redis import redis
import requests import requests
import urllib3 import urllib3
from matplotlib.artist import allow_rasterization
from requests.adapters import HTTPAdapter from requests.adapters import HTTPAdapter
from urllib3.util.retry import Retry from urllib3.util.retry import Retry
from typing import Optional, Dict, Any, Union from typing import Optional, Dict, Any, Union
class HttpClient: class HttpClient:
def __init__(self, max_retries: int = 3, backoff_factor: float = 0.5): def __init__(self, max_retries: int = 3, backoff_factor: float = 0.5):
self.session = requests.Session() self.session = requests.Session()
@ -55,6 +57,7 @@ class HttpClient:
def post(self, url: str, **kwargs) -> requests.Response: def post(self, url: str, **kwargs) -> requests.Response:
return self.request("POST", url, **kwargs) return self.request("POST", url, **kwargs)
# 创建全局的 HTTP 客户端实例 # 创建全局的 HTTP 客户端实例
http_client = HttpClient() http_client = HttpClient()
@ -66,6 +69,7 @@ _REDIS_CONF = {
"db": 1, "db": 1,
} }
def save_report_token(key_name: str, json_data: dict): def save_report_token(key_name: str, json_data: dict):
r = redis.Redis(**_REDIS_CONF) r = redis.Redis(**_REDIS_CONF)
key = key_name key = key_name
@ -127,42 +131,91 @@ def login():
print(f"登录失败: {str(e)}") print(f"登录失败: {str(e)}")
raise raise
def get_cookies(access_token: str, refresh_token: str):
try:
cookies = {
"access_token": access_token,
"refresh_token": refresh_token,
}
url = "https://www.dailymotion.com/cookie/refresh_token"
http_client.post(url, cookies=cookies, allow_redirects=True)
except Exception as e:
print(f"刷新 cookie 失败: {str(e)}")
raise
def get_cookies1(access_token: str, refresh_token: str): def refresh_token(access_token, refresh_token):
"""302 跳转""" headers = {
try: "Accept": "*/*",
cookies = { "Accept-Language": "zh-CN,zh;q=0.9",
"access_token": access_token, "Cache-Control": "no-cache",
"refresh_token": refresh_token, "Connection": "keep-alive",
} "Content-Length": "0",
url = "https://www.dailymotion.com/zendesk" "Origin": "https://www.dailymotion.com",
params = { "Pragma": "no-cache",
"return_to": "https://faq.dailymotion.com/hc/en-us/requests/new", "Referer": "https://www.dailymotion.com/signin?urlback=%2Fzendesk%3Ftimestamp%3D1748932650%26return_to%3Dhttps%253A%252F%252Ffaq.dailymotion.com%252Fhc%252Fen-us%252Frequests%252Fnew",
"timestamp": str(int(time.time())), "Sec-Fetch-Dest": "empty",
} "Sec-Fetch-Mode": "cors",
response = http_client.get(url, cookies=cookies, params=params, allow_redirects=True) "Sec-Fetch-Site": "same-origin",
cookies_dict = {"update_time": int(time.time()), "cookies": dict(http_client.session.cookies)} "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/136.0.0.0 Safari/537.36 Edg/136.0.0.0",
save_report_token('cookies', cookies_dict) "sec-ch-ua": "\"Chromium\";v=\"136\", \"Microsoft Edge\";v=\"136\", \"Not.A/Brand\";v=\"99\"",
return cookies_dict "sec-ch-ua-mobile": "?0",
except Exception as e: "sec-ch-ua-platform": "\"Windows\""
print(f"获取 cookies 失败: {str(e)}") }
raise cookies = {
"dmvk": "683e982c34e34",
"ts": "133696",
"v1st": "a847389a-6b91-4157-948f-457666f7172b",
"ff": "on",
"lang": "zh_CN",
"usprivacy": "1---",
"dmaid": "73ca37e4-6858-46c1-aac4-a4a5fc9a270e",
"cookie_policy_closed": "1",
"access_token": access_token,
"refresh_token": refresh_token,
}
url = "https://www.dailymotion.com/cookie/refresh_token"
response = http_client.post(url, headers=headers, cookies=cookies)
def zendesk():
headers = {
"Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.7",
"Accept-Language": "zh-CN,zh;q=0.9",
"Cache-Control": "no-cache",
"Connection": "keep-alive",
"Pragma": "no-cache",
"Referer": "https://www.dailymotion.com/sg",
"Sec-Fetch-Dest": "document",
"Sec-Fetch-Mode": "navigate",
"Sec-Fetch-Site": "same-origin",
"Sec-Fetch-User": "?1",
"Upgrade-Insecure-Requests": "1",
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/136.0.0.0 Safari/537.36 Edg/136.0.0.0",
"sec-ch-ua": "\"Chromium\";v=\"136\", \"Microsoft Edge\";v=\"136\", \"Not.A/Brand\";v=\"99\"",
"sec-ch-ua-mobile": "?0",
"sec-ch-ua-platform": "\"Windows\""
}
url = "https://www.dailymotion.com/zendesk"
params = {
"return_to": "https://faq.dailymotion.com/hc/en-us/requests/new",
"timestamp": str(time.time()),
}
response = http_client.get(url, headers=headers, params=params, allow_redirects=True)
data = http_client.session.cookies.get_dict()
data['update_time'] = int(time.time())
save_report_token('cookies', data)
def get_csrftoken(): def get_csrftoken():
try: try:
url = "https://faq.dailymotion.com/hc/api/internal/csrf_token.json" url = "https://faq.dailymotion.com/hc/api/internal/csrf_token.json"
response = http_client.get(url) headers = {
"Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.7",
"Accept-Language": "zh-CN,zh;q=0.9",
"Cache-Control": "no-cache",
"Connection": "keep-alive",
"Pragma": "no-cache",
"Referer": "https://www.dailymotion.com/sg",
"Sec-Fetch-Dest": "document",
"Sec-Fetch-Mode": "navigate",
"Sec-Fetch-Site": "same-origin",
"Sec-Fetch-User": "?1",
"Upgrade-Insecure-Requests": "1",
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/136.0.0.0 Safari/537.36 Edg/136.0.0.0",
"sec-ch-ua": "\"Chromium\";v=\"136\", \"Microsoft Edge\";v=\"136\", \"Not.A/Brand\";v=\"99\"",
"sec-ch-ua-mobile": "?0",
"sec-ch-ua-platform": "\"Windows\""
}
response = http_client.get(url, headers=headers)
data = {"update_time": int(time.time()), "csrf_token": response.json()} data = {"update_time": int(time.time()), "csrf_token": response.json()}
save_report_token('csrf_token', data) save_report_token('csrf_token', data)
return data return data
@ -170,71 +223,62 @@ def get_csrftoken():
print(f"获取 CSRF token 失败: {str(e)}") print(f"获取 CSRF token 失败: {str(e)}")
raise raise
def report(csrf_token:str, cookies:dict):
def report(csrf_token: str, v_url, title):
try: try:
headers = { headers = {
"Accept": "*/*", "accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.7",
"Accept-Language": "zh-CN,zh;q=0.9", "accept-language": "zh-CN,zh;q=0.9",
"Cache-Control": "no-cache", "cache-control": "no-cache",
"Connection": "keep-alive", "content-type": "application/x-www-form-urlencoded",
"Content-Type": "application/x-www-form-urlencoded; charset=UTF-8", "origin": "https://faq.dailymotion.com",
"Origin": "https://faq.dailymotion.com", "pragma": "no-cache",
"Pragma": "no-cache", "priority": "u=0, i",
"Referer": "https://faq.dailymotion.com/hc/en-us/requests/new", "referer": "https://faq.dailymotion.com/hc/en-us/requests/new?ticket_form_id=136048",
"Sec-Fetch-Dest": "empty",
"Sec-Fetch-Mode": "cors",
"Sec-Fetch-Site": "same-origin",
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/136.0.0.0 Safari/537.36 Edg/136.0.0.0",
"sec-ch-ua": "\"Chromium\";v=\"136\", \"Microsoft Edge\";v=\"136\", \"Not.A/Brand\";v=\"99\"", "sec-ch-ua": "\"Chromium\";v=\"136\", \"Microsoft Edge\";v=\"136\", \"Not.A/Brand\";v=\"99\"",
"sec-ch-ua-mobile": "?0", "sec-ch-ua-mobile": "?0",
"sec-ch-ua-platform": "\"Windows\"", "sec-ch-ua-platform": "\"Windows\"",
"X-CSRF-Token": csrf_token "sec-fetch-dest": "document",
"sec-fetch-mode": "navigate",
"sec-fetch-site": "same-origin",
"sec-fetch-user": "?1",
"upgrade-insecure-requests": "1",
"user-agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/136.0.0.0 Safari/537.36 Edg/136.0.0.0"
} }
url = "https://faq.dailymotion.com/hc/en-us/requests"
data = { data = {
"request[subject]": "版权投诉", "utf8": "",
"request[description]": "请删除侵权视频", "authenticity_token": csrf_token,
"request[email]": "copyright@qiyi.com", "request%5Bticket_form_id%5D": "136048",
"request[ticket_form_id]": "360000717219" "request%5Bcollaborators%5D%5B%5D": "duke.chen@dailymotion.com",
"request%5Bcustom_fields%5D%5B360008684839%5D": "__dc.copyright_user_protection_-_copyright__",
"request%5Bcustom_fields%5D%5B30150188%5D": "copyrightform-notification",
"request%5Bcustom_fields%5D%5B25089567%5D": "legal_entity",
"request%5Bcustom_fields%5D%5B25159868%5D": "Beijing iQIYI Science & Technology Co.,Ltd",
"request%5Bcustom_fields%5D%5B4869133282962%5D": "Legal Department",
"request%5Bsubject%5D": "Copyright infringement Notification",
"request%5Bcustom_fields%5D%5B25613698%5D": v_url,
"request%5Bdescription%5D": f"We request that you take immediate actionto stop the infringing activity, take steps to ensure that iQIYI Content is notre-posted on, re-linked to, or otherwise available through your site. Pleaseinform us of the actions you have taken and their results.\r\n1) please help remove these videos\r\n2) The drama series titles are \"{title}\"\r\n",
"request%5Bdescription_mimetype%5D": "text/plain",
"request%5Bcustom_fields%5D%5B4769880845586%5D": "on",
"request%5Bcustom_fields%5D%5B25626417%5D": "on",
"request%5Bcustom_fields%5D%5B4769797363346%5D": "on",
"request%5Bcustom_fields%5D%5B25159848%5D": "on",
"request%5Bcustom_fields%5D%5B4769658191250%5D": "on"
} }
response = http_client.post('https://faq.dailymotion.com/hc/en-us/requests', response = requests.post(url, headers=headers, data=data)
cookies=cookies, print(response.status_code)
headers=headers, print(response.text)
data=data) print(response)
return response.status_code == 200 return response.status_code == 200
except Exception as e: except Exception as e:
print(f"提交报告失败: {str(e)}") print(f"提交报告失败: {str(e)}")
raise raise
def prepare_data():
try:
token = get_report_token('token')
cookies = get_report_token('cookies')
csrf_token = get_report_token('csrf_token')
min_update_time = min(d.get('update_time', 0) for d in (token, cookies, csrf_token) if d)
if not min_update_time or min_update_time + (24 * 60 * 60) < time.time():
token = login()
if not token:
raise Exception("登录失败")
access_token = token['token']['access_token']
refresh_token = token['token']['refresh_token']
get_cookies(access_token, refresh_token)
cookies = get_cookies1(access_token, refresh_token)
csrf_token = get_csrftoken()
if not all([cookies, csrf_token]):
raise Exception("获取 cookies 或 csrf_token 失败")
if not all([token, cookies, csrf_token]):
raise Exception("获取令牌失败")
success = report(csrf_token['csrf_token']['current_session']['csrf_token'], cookies['cookies'])
if not success:
raise Exception("提交投诉失败")
except Exception as e:
print(f"处理数据失败: {str(e)}")
raise
if __name__ == '__main__':
cookies = get_report_token('cookies')['cookies']
http_client.session.cookies = requests.utils.cookiejar_from_dict(cookies)
csrf_token = get_csrftoken()['csrf_token']['current_session']['csrf_token']
report(csrf_token, 'Hunter X Hunter', 'https://www.dailymotion.com/video/x8kjx7v')