Compare commits

...

2 Commits

2 changed files with 378 additions and 140 deletions

180
report_video_cookies.py Normal file
View File

@ -0,0 +1,180 @@
import time
import functools
from queue import Queue, Empty
from typing import Callable, Any
from matplotlib.pyplot import title
from playwright.sync_api import (
sync_playwright,
TimeoutError as PlaywrightTimeoutError,
Page,
Browser,
)
def require_login(func):
@functools.wraps(func)
def wrapper(self, *args, **kwargs):
self.ensure_login()
return func(self, *args, **kwargs)
return wrapper
class DailymotionClient:
url = "https://faq.dailymotion.com/hc/en-us/requests/new"
EMAIL = "copyright@qiyi.com"
PASSWORD = "ppsIQIYI2018@"
def __init__(self, headless: bool = False):
self.email = self.EMAIL
self.password = self.PASSWORD
self.headless = headless
self.check_interval = 60 * 60
self._pw = sync_playwright().start()
self.browser: Browser = self._pw.chromium.launch(
headless=self.headless,
proxy={
"server": "http://127.0.0.1:7890"
}
)
self.page: Page = self.browser.new_page()
self._last_check_ts = 0
self._last_check_result = False
self.page.goto(self.url)
def _do_login(self) -> None:
self.page.goto(self.url, timeout=30000)
self.page.wait_for_load_state("networkidle", timeout=30000)
logbtn = self.page.locator("//a[@class='login button']")
if logbtn.count() > 0:
logbtn.nth(0).click()
self.page.wait_for_selector("//input[@data-testid=\"emailInput\"]")
# “我了解”弹窗
i_now_btn = self.page.locator("button:has-text(\"我了解\")")
if i_now_btn.count() > 0:
i_now_btn.click()
# 输入账号密码
email_edit = self.page.locator("//input[@data-testid=\"emailInput\"]")
password_edit = self.page.locator("//input[@data-testid=\"passwordInput\"]")
if email_edit.count():
email_edit.fill(self.email)
if password_edit.count():
password_edit.fill(self.password)
# 登录
login_btn = self.page.locator('button[form="signin-form"][type="submit"]')
try:
self.page.wait_for_selector(
'button[form="signin-form"][type="submit"]:not([disabled])', timeout=20000
)
except PlaywrightTimeoutError:
pass
login_btn.click()
# 等待跳回
self.page.wait_for_url(self.url, timeout=30000)
time.sleep(1)
def _detect_login(self) -> bool:
self.page.goto(self.url, timeout=30000)
self.page.wait_for_load_state("networkidle", timeout=30000)
return self.page.locator("//a[@class='login button']").count() == 0
def is_logged_in(self) -> bool:
"""带本地缓存,避免每几秒都访问一次页面。"""
now = time.time()
if now - self._last_check_ts < self.check_interval:
return self._last_check_result
# 重新检测
try:
ok = self._detect_login()
except Exception:
ok = False
self._last_check_ts = now
self._last_check_result = ok
return ok
def ensure_login(self) -> None:
if not self.is_logged_in():
self._do_login()
@require_login
def process_ticket(self, title, link):
description = """We request that you take immediate actionto stop the infringing activity, take steps to ensure that iQIYI Content is notre-posted on, re-linked to, or otherwise available through your site. Pleaseinform us of the actions you have taken and their results.
1) please help remove these videos
2) The drama series titles are {}
""".format(title)
print(f"开始处理任务")
resports = self.page.locator('li.blocks-item:nth-child(8)')
resports.click()
time.sleep(2)
cc = self.page.locator("input#request_collaborators_")
cc.scroll_into_view_if_needed()
cc.click()
cc.type("duke.chen@dailymotion.com")
self.page.get_by_role("button", name="Copyright infringement").click()
time.sleep(1)
self.page.get_by_role("button", name="Notification").nth(0).click()
time.sleep(1)
self.page.get_by_role("button", name="A legal entity").click()
time.sleep(1)
self.page.get_by_label("Corporate name").fill("Beijing iQIYI Science & Technology Co.,Ltd")
time.sleep(1)
self.page.get_by_label("Legal status").fill("Legal Department")
time.sleep(1)
self.page.get_by_label("Subject").fill("Copyright infringement Notification")
time.sleep(1)
self.page.get_by_label("Please indicate the URL of the video(s) you would like to report*").fill()
time.sleep(1)
self.page.get_by_label("Description").fill(description)
time.sleep(1)
self.page.get_by_label("I state in good faith", exact=False).check()
time.sleep(1)
self.page.get_by_label("I state in good faith that the use of the Protected", exact=False).check()
time.sleep(1)
self.page.get_by_role("checkbox", name="I certify that all information provided", exact=False).check()
time.sleep(1)
self.page.get_by_role("checkbox", name="I acknowledge that my statements", exact=False).check()
time.sleep(1)
self.page.get_by_role("checkbox", name="The data provided through this form", exact=False).check()
time.sleep(1)
self.page.get_by_role("checkbox", name="By submitting the present form,", exact=False).check()
time.sleep(1)
self.page.get_by_role("button", name="Submit").click()
time.sleep(2)
if self.page.url != self.url:
self.page.goto(self.url, timeout=30000)
def close(self):
try:
self.page.close()
except Exception:
pass
try:
self.browser.close()
except Exception:
pass
try:
self._pw.stop()
except Exception:
pass
if __name__ == "__main__":
dm = DailymotionClient()
dm.process_ticket()

View File

@ -1,10 +1,63 @@
import time
import requests
import json
import redis
import requests
import urllib3
from requests.adapters import HTTPAdapter
from urllib3.util.retry import Retry
from typing import Optional, Dict, Any, Union
class HttpClient:
def __init__(self, max_retries: int = 3, backoff_factor: float = 0.5):
self.session = requests.Session()
# 配置重试策略
retry_strategy = Retry(
total=max_retries,
backoff_factor=backoff_factor,
status_forcelist=[500, 502, 503, 504, 429]
)
adapter = HTTPAdapter(max_retries=retry_strategy)
self.session.mount("http://", adapter)
self.session.mount("https://", adapter)
def request(self,
method: str,
url: str,
headers: Optional[Dict] = None,
params: Optional[Dict] = None,
data: Optional[Union[Dict, str]] = None,
cookies: Optional[Dict] = None,
allow_redirects: bool = True,
timeout: int = 30,
**kwargs) -> requests.Response:
try:
response = self.session.request(
method=method,
url=url,
headers=headers,
params=params,
data=data,
cookies=cookies,
allow_redirects=allow_redirects,
timeout=timeout,
**kwargs
)
response.raise_for_status()
return response
except requests.exceptions.RequestException as e:
print(f"请求失败: {url}, 错误: {str(e)}")
raise
def get(self, url: str, **kwargs) -> requests.Response:
return self.request("GET", url, **kwargs)
def post(self, url: str, **kwargs) -> requests.Response:
return self.request("POST", url, **kwargs)
# 创建全局的 HTTP 客户端实例
http_client = HttpClient()
session = requests.Session()
_REDIS_CONF = {
"host": "192.144.230.75",
"port": 6379,
@ -13,7 +66,6 @@ _REDIS_CONF = {
"db": 1,
}
def save_report_token(key_name: str, json_data: dict):
r = redis.Redis(**_REDIS_CONF)
key = key_name
@ -32,6 +84,7 @@ def get_report_token(key_name: str):
def login():
try:
headers = {
"Accept": "*/*",
"Accept-Language": "zh-CN,zh;q=0.9",
@ -61,7 +114,7 @@ def login():
"traffic_segment": "962042",
"visitor_id": "359703fb-66c2-43d2-bd0d-b1cac9c7ae8a"
}
response = session.post(url, headers=headers, data=data)
response = http_client.post(url, headers=headers, data=data)
data = {
"update_time": int(time.time()),
"username": "copyright@qiyi.com",
@ -70,19 +123,25 @@ def login():
}
save_report_token('token', data)
return data
except Exception as e:
print(f"登录失败: {str(e)}")
raise
def get_cookies(access_token: str, refresh_token: str):
try:
cookies = {
"access_token": access_token,
"refresh_token": refresh_token,
}
url = "https://www.dailymotion.com/cookie/refresh_token"
session.post(url, cookies=cookies, allow_redirects=True)
http_client.post(url, cookies=cookies, allow_redirects=True)
except Exception as e:
print(f"刷新 cookie 失败: {str(e)}")
raise
def get_cookies1(access_token: str, refresh_token: str):
"""302 跳转"""
try:
cookies = {
"access_token": access_token,
"refresh_token": refresh_token,
@ -92,91 +151,90 @@ def get_cookies1(access_token: str, refresh_token: str):
"return_to": "https://faq.dailymotion.com/hc/en-us/requests/new",
"timestamp": str(int(time.time())),
}
session.get(url, cookies=cookies, params=params, allow_redirects=True)
cookies_dict = {"update_time": int(time.time()), "cookies": session.cookies.get_dict()}
response = http_client.get(url, cookies=cookies, params=params, allow_redirects=True)
cookies_dict = {"update_time": int(time.time()), "cookies": dict(http_client.session.cookies)}
save_report_token('cookies', cookies_dict)
return cookies_dict
except Exception as e:
print(f"获取 cookies 失败: {str(e)}")
raise
def get_csrftoken():
try:
url = "https://faq.dailymotion.com/hc/api/internal/csrf_token.json"
response = session.get(url)
response = http_client.get(url)
data = {"update_time": int(time.time()), "csrf_token": response.json()}
save_report_token('csrf_token', data)
return data
except Exception as e:
print(f"获取 CSRF token 失败: {str(e)}")
raise
def report(csrf_token:str, cookies:dict, ):
def report(csrf_token:str, cookies:dict):
try:
headers = {
'accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.7',
'accept-language': 'zh-CN,zh;q=0.9',
'cache-control': 'no-cache',
'content-type': 'application/x-www-form-urlencoded',
'origin': 'https://faq.dailymotion.com',
'pragma': 'no-cache',
'priority': 'u=0, i',
'referer': 'https://faq.dailymotion.com/hc/en-us/requests/new?ticket_form_id=136048',
'sec-ch-ua': '"Chromium";v="136", "Google Chrome";v="136", "Not.A/Brand";v="99"',
'sec-ch-ua-mobile': '?0',
'sec-ch-ua-platform': '"Windows"',
'sec-fetch-dest': 'document',
'sec-fetch-mode': 'navigate',
'sec-fetch-site': 'same-origin',
'sec-fetch-user': '?1',
'upgrade-insecure-requests': '1',
'user-agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/136.0.0.0 Safari/537.36',
"Accept": "*/*",
"Accept-Language": "zh-CN,zh;q=0.9",
"Cache-Control": "no-cache",
"Connection": "keep-alive",
"Content-Type": "application/x-www-form-urlencoded; charset=UTF-8",
"Origin": "https://faq.dailymotion.com",
"Pragma": "no-cache",
"Referer": "https://faq.dailymotion.com/hc/en-us/requests/new",
"Sec-Fetch-Dest": "empty",
"Sec-Fetch-Mode": "cors",
"Sec-Fetch-Site": "same-origin",
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/136.0.0.0 Safari/537.36 Edg/136.0.0.0",
"sec-ch-ua": "\"Chromium\";v=\"136\", \"Microsoft Edge\";v=\"136\", \"Not.A/Brand\";v=\"99\"",
"sec-ch-ua-mobile": "?0",
"sec-ch-ua-platform": "\"Windows\"",
"X-CSRF-Token": csrf_token
}
data = {
'utf8': '',
'authenticity_token': csrf_token,
'request[ticket_form_id]': '136048',
'request[collaborators][]': 'duke.chen@dailymotion.com',
'request[custom_fields][360008684839]': '__dc.copyright_user_protection_-_copyright__',
'request[custom_fields][30150188]': 'copyrightform-notification',
'request[custom_fields][25089567]': 'legal_entity',
'request[custom_fields][25159868]': 'Beijing iQIYI Science & Technology Co.,Ltd',
'request[custom_fields][4869133282962]': 'Legal Department',
'request[subject]': 'Copyright infringement Notification',
'request[custom_fields][25613698]': 'url',
'request[description]': 'We request that you take immediate actionto stop the infringing activity, take steps to ensure that iQIYI Content is notre-posted on, re-linked to, or otherwise available through your site. Pleaseinform us of the actions you have taken and their results.\r\n1) please help remove these videos\r\n2) The drama series titles are 片名\r\n',
'request[description_mimetype]': 'text/plain',
'request[custom_fields][4769880845586]': [
'off',
'on',
],
'request[custom_fields][25626417]': [
'off',
'on',
],
'request[custom_fields][4769797363346]': [
'off',
'on',
],
'request[custom_fields][25159848]': [
'off',
'on',
],
'request[custom_fields][4769658191250]': [
'off',
'on',
],
"request[subject]": "版权投诉",
"request[description]": "请删除侵权视频",
"request[email]": "copyright@qiyi.com",
"request[ticket_form_id]": "360000717219"
}
response = requests.post('https://faq.dailymotion.com/hc/en-us/requests', cookies=cookies, headers=headers, data=data)
response = http_client.post('https://faq.dailymotion.com/hc/en-us/requests',
cookies=cookies,
headers=headers,
data=data)
return response.status_code == 200
except Exception as e:
print(f"提交报告失败: {str(e)}")
raise
def prepare_data():
try:
token = get_report_token('token')
cookies = get_report_token('cookies')
csrf_token = get_report_token('csrf_token')
max_update_time = max(d.get('update_time', 0) for d in (token, cookies, csrf_token) if d)
if max_update_time + (24 * 60 * 60) < time.time():
token = get_report_token('token')
min_update_time = min(d.get('update_time', 0) for d in (token, cookies, csrf_token) if d)
if not min_update_time or min_update_time + (24 * 60 * 60) < time.time():
token = login()
if not token:
raise Exception("登录失败")
access_token = token['token']['access_token']
refresh_token = token['token']['refresh_token']
get_cookies(access_token, refresh_token)
get_cookies1(access_token, refresh_token)
cookies = get_cookies1(access_token, refresh_token)
csrf_token = get_csrftoken()
report(csrf_token['csrf_token']['current_session']['csrf_token'], cookies['cookies'])
if not all([cookies, csrf_token]):
raise Exception("获取 cookies 或 csrf_token 失败")
if not all([token, cookies, csrf_token]):
raise Exception("获取令牌失败")
success = report(csrf_token['csrf_token']['current_session']['csrf_token'], cookies['cookies'])
if not success:
raise Exception("提交投诉失败")
except Exception as e:
print(f"处理数据失败: {str(e)}")
raise