Compare commits

...

35 Commits

Author SHA1 Message Date
5f45c5d079 feat: 更新截图服务接口路径以指向新的截图目录 2025-06-06 23:05:06 +08:00
12c66a9eb4 feat: 添加自定义404和403错误响应,并优化图像服务接口逻辑 2025-06-06 23:00:23 +08:00
101a819663 fix: 修复图像服务接口中的路径越界访问和文件不存在错误处理 2025-06-06 22:58:51 +08:00
848f21b609 feat: 更改playwright 的配置去掉本地测试时的代理和有头选项 2025-06-06 22:45:06 +08:00
3aee8f4033 feat: 添加图像服务接口以提供截图访问功能并更新报告状态链接 2025-06-06 22:32:06 +08:00
11bd09e281 fix:单条举报词,举报时隔5分钟,成功写入截图路径 2025-06-06 22:23:07 +08:00
f74b5b6df6 fix: 调整报告处理逻辑中的睡眠时间以优化性能 2025-06-05 23:26:38 +08:00
c7c5b1b078 fix: 增加数据库查询限制并优化报告处理逻辑 2025-06-05 23:23:14 +08:00
6260f8b69a fix: 添加LIMIT限制以优化数据库查询并恢复异常处理 2025-06-04 22:49:38 +08:00
7ed3678811 refactor: 移除异常处理以简化报告处理逻辑 2025-06-04 22:35:37 +08:00
e7a6df8e3e refactor: 移除异常处理以简化报告处理逻辑 2025-06-04 22:33:46 +08:00
a51acc7ce0 feat: 添加功能以在视频处理时自动截图并保存 2025-06-04 22:31:50 +08:00
21d88cbcbf fix: 修改初始化参数以默认启用无头模式 2025-06-04 22:28:22 +08:00
2cd31e4a74 fix: 修改初始化参数以支持非无头模式并更新用户界面文本 2025-06-04 22:25:15 +08:00
52e2139ff4 feat: 添加 CapSolver 处理 Cloudflare Turnstile 的功能 2025-06-04 21:35:01 +08:00
e79be58ac0 feat: 添加 CapSolver 自动完成 Cloudflare Turnstile 的功能 2025-06-04 21:30:26 +08:00
a9b53e8c85 feat: 添加截图功能以记录视频处理任务 2025-06-04 21:14:09 +08:00
32a15d27ce fix: 添加错误日志记录以便于调试和问题追踪 2025-06-04 21:10:29 +08:00
f3b2102d62 fix: 修复报告视频列表检查逻辑以提高代码可读性 2025-06-04 20:58:50 +08:00
fc29bddab2 refactor: 移除代理设置以简化浏览器启动配置 2025-06-04 20:57:46 +08:00
a4682a9647 refactor: 移除未使用的导入以清理代码 2025-06-04 20:56:38 +08:00
44eb94f57b refactor: 移除未使用的导入以清理代码 2025-06-04 20:53:42 +08:00
5958782a54 feat: 添加日志记录功能以优化报告处理流程 2025-06-04 20:52:07 +08:00
da4a5ac828 feat: 添加视频报告功能并优化数据库操作 2025-06-04 20:49:07 +08:00
b3343e1034 feat: 更新报告功能以支持版权投诉并优化请求头 2025-06-03 23:48:16 +08:00
ada4bf468f feat: 添加DailymotionClient以实现版权侵权自动报告 2025-06-03 23:47:37 +08:00
cfb6151234 fix: 添加重复视频标识并优化死锁重试机制 2025-06-03 11:12:05 +08:00
dac24f1400 fix: 添加重复视频标识并优化死锁重试机制 2025-06-03 09:29:00 +08:00
ff612f09fd fix: 调整数据库刷新间隔以提高性能 2025-06-02 23:34:54 +08:00
ea2825bef9 fix: 调整数据库刷新间隔和请求延迟以优化性能 2025-06-02 23:31:22 +08:00
74466c7027 fix: 修改数据库刷新间隔以提高性能,减少视频列表生成时的请求延迟 2025-06-02 23:26:46 +08:00
ca1138164c fix: 修改数据库刷新间隔以提高性能 2025-06-02 23:22:42 +08:00
8d8b446766 fix: 减少视频列表生成时的请求间隔以提高效率 2025-06-02 22:35:55 +08:00
3b448a5ed3 fix: 优化任务处理逻辑,减少并发请求间隔以提高效率 2025-06-02 22:35:22 +08:00
93bb599ffb fix: 修改视频列表生成逻辑以限制最大视频数量 2025-06-02 22:29:48 +08:00
7 changed files with 670 additions and 136 deletions

83
DB.py
View File

@ -10,6 +10,8 @@ from sqlalchemy import (
BigInteger, Integer, String, Text, DateTime, tuple_
)
from sqlalchemy.dialects.mysql import insert as mysql_insert
from sqlalchemy.exc import OperationalError
from logger import logger
from datetime import datetime
@ -66,6 +68,7 @@ video = Table("sh_dm_video_v2", _meta,
Column("watch_number", Integer),
Column("follow_number", Integer),
Column("video_number", Integer),
Column("is_repeat", Integer),
)
video_author = Table(
"sh_dm_video_author",
@ -306,6 +309,43 @@ class DBVidcon:
self.redis.lpush(self.l2_list_key, *raws)
logger.info(f"[回滚l2] 已退回 {len(raws)}")
@mysql_retry()
def get_report_video(self):
sql = """
SELECT
id,
name_title,
link
FROM
sh_dm_fight_records
WHERE
status = 1
LIMIT 10
"""
self.cursor.execute(sql)
return self.cursor.fetchall()
@mysql_retry()
def update_fight_record_status(self, record_id: int, new_status: int, errinfo: str = ""):
sql = """
UPDATE
sh_dm_fight_records
SET
status = %s,
errinfo = %s,
updata_time = %s
WHERE
id = %s
"""
now_ts = int(time.time())
self.cursor.execute(sql, (new_status, errinfo, now_ts, record_id))
@mysql_retry()
def upsert_video(self, data: dict):
logger.info(fr"DB处理->{data.get('v_xid')},\tlevel->{data.get('level')}")
@ -503,7 +543,7 @@ class DBVidcon:
class DBSA:
FLUSH_EVERY_ROWS = 100 # 行阈值
FLUSH_INTERVAL = 3 # 秒阈值
FLUSH_INTERVAL = 30 # 秒阈值
_buf_op: List[Dict] = []
_buf_vid: List[Dict] = []
@ -604,7 +644,6 @@ class DBSA:
existing_keys = set()
if vid_rows:
# 收集 (v_xid, rn) 对,应与 video 表中的唯一索引匹配
all_keys = list({(row["v_xid"], row["rn"]) for row in vid_rows})
conn = _engine.connect()
try:
@ -624,12 +663,15 @@ class DBSA:
finally:
conn.close()
# 先给日志表的 op_rows 设置 0/11=重复0=不重复
for i, vid_row in enumerate(vid_rows):
key = (vid_row["v_xid"], vid_row["rn"])
if key in existing_keys:
op_rows[i]["is_repeat"] = 1
else:
op_rows[i]["is_repeat"] = 2
op_rows[i]["is_repeat"] = 1 if key in existing_keys else 0
# 再把同样的 is_repeat 值写到 vid_rows以便视频表也能存到 0/1
for i, vid_row in enumerate(vid_rows):
vid_row["is_repeat"] = op_rows[i]["is_repeat"]
vid_row.pop("level", None)
# 以下作者表、日志表和视频表写入逻辑保持不变...
authors_map = {}
@ -653,6 +695,7 @@ class DBSA:
author_rows = list(authors_map.values())
if author_rows:
author_rows.sort(key=lambda x: x["u_xid"])
stmt_author = mysql_insert(video_author).values(author_rows)
upd_author = {
"u_name": stmt_author.inserted.u_name,
@ -665,10 +708,9 @@ class DBSA:
}
ondup_author = stmt_author.on_duplicate_key_update(**upd_author)
try:
with _engine.begin() as conn2:
conn2.execute(ondup_author)
cls._execute_with_deadlock_retry(ondup_author)
except Exception as e:
logger.info(f"[DBSA] 写作者表失败: {e}")
logger.info(f"[DBSA] 写作者表失败(死锁重试后仍未成功): {e}")
try:
cls.push_record_many(payloads)
except Exception as re:
@ -686,10 +728,6 @@ class DBSA:
logger.info("[Redis 回退失败]", re)
return
for vid_row in vid_rows:
vid_row.pop("is_repeat", None)
vid_row.pop("level", None)
if vid_rows:
try:
cls._bulk_upsert(vid_rows)
@ -737,3 +775,22 @@ class DBSA:
daemon=True
)
thread.start()
@classmethod
def _execute_with_deadlock_retry(cls, statement):
for attempt in range(3):
try:
with _engine.begin() as conn:
conn.execute(statement)
return
except OperationalError as e:
# e.orig.args[0] == 1213 表示死锁
code = None
if hasattr(e.orig, "args") and len(e.orig.args) >= 1:
code = e.orig.args[0]
if code == 1213 and attempt < 3 - 1:
time.sleep(1)
continue
# 不是死锁,或者已经重试到达上限,直接抛出
raise

13
main.py
View File

@ -125,7 +125,8 @@ def get_searchInfo(keyword, level, headers, proxy_name, r=2):
"u_pic": iteminfo.get('owner.avatar_60_url')
}
video_list.append(v_data)
time.sleep(3)
if len(video_list) < 100:
break
return video_list
@ -161,10 +162,11 @@ def integrate_data_parallel():
time.sleep(10)
continue
futures = [
executor.submit(search_worker, payload, kitem, flag)
for payload, kitem in tasks
]
futures = []
for payload, kitem in tasks:
futures.append(executor.submit(search_worker, payload, kitem, flag))
time.sleep(1)
rollback = {0: [], 1: [], 2: []}
for fut in concurrent.futures.as_completed(futures):
@ -207,7 +209,6 @@ def integrate_data_parallel():
db.rollback_l1(rollback[1])
if rollback[2]:
db.rollback_l2(rollback[2])
time.sleep(10)
def parse_args() -> argparse.Namespace:

25
report.py Normal file
View File

@ -0,0 +1,25 @@
import time
from DB import DBVidcon, DBSA
from report_video import DailymotionClient
import logger
db = DBVidcon()
d = DailymotionClient()
while True:
lis = db.get_report_video()
if len(lis) == 0:
time.sleep(60 * 5)
else:
for li in lis:
logger.logger.info(f"name:{li['name_title']},link:{li['link']} ")
try:
info = d.process_ticket(li['name_title'], li['link'])
db.update_fight_record_status(li['id'], 2, f"http://123.58.197.91:5000/image/{info}")
db.flush()
time.sleep(5 * 60)
except Exception as e:
logger.logger.error(f"ID:{li['id']}, e:{e}")
db.update_fight_record_status(li['id'], 3, str(e))
time.sleep(1 * 60)

268
report_video.py Normal file
View File

@ -0,0 +1,268 @@
import time
import functools
import os
from playwright.sync_api import (
sync_playwright,
TimeoutError as PlaywrightTimeoutError,
Page,
Browser,
)
def solve_turnstile_capsolver(page: Page,
timeout: int = 120) -> bool:
"""
使用 CapSolver 自动完成当前 Page 上的 Cloudflare Turnstile
成功返回 True失败/超时返回 False
"""
cap_key = "CAP-A76C932D4C6CCB3CA748F77FDC07D996"
widget = page.query_selector("div.cf-turnstile[data-sitekey]")
if not widget:
return False
sitekey = widget.get_attribute("data-sitekey")
page_url = page.url
create_payload = {
"clientKey": cap_key,
"task": {
"type": "TurnstileTaskProxyLess",
"websiteURL": page_url,
"websiteKey": sitekey
}
}
create_resp = requests.post(
"https://api.capsolver.com/createTask",
json=create_payload, timeout=20
).json()
if create_resp.get("errorId"):
print("[CapSolver] createTask 失败:", create_resp)
return False
task_id = create_resp["taskId"]
poll_payload = {"clientKey": cap_key, "taskId": task_id}
token = None
elapsed, step = 0, 3
while elapsed < timeout:
time.sleep(step)
elapsed += step
res = requests.post(
"https://api.capsolver.com/getTaskResult",
json=poll_payload, timeout=15
).json()
if res.get("status") == "ready":
token = res["solution"]["token"]
break
if res.get("status") != "processing":
print("[CapSolver] getTaskResult 异常:", res)
return False
if not token:
print("[CapSolver] 超时未取到 token")
return False
page.evaluate(
"""(tk) => {
const ta = document.querySelector('textarea[name="cf-turnstile-response"]');
if (ta) ta.value = tk;
if (window.turnstileCallback)
try { window.turnstileCallback(tk); } catch(e){}
}""",
token
)
page.wait_for_timeout(1500)
return True
def require_login(func):
@functools.wraps(func)
def wrapper(self, *args, **kwargs):
self.ensure_login()
return func(self, *args, **kwargs)
return wrapper
class DailymotionClient:
url = "https://faq.dailymotion.com/hc/en-us/requests/new"
EMAIL = "copyright@qiyi.com"
PASSWORD = "ppsIQIYI2018@"
def __init__(self, headless: bool = True):
self.email = self.EMAIL
self.password = self.PASSWORD
self.headless = headless
self.check_interval = 60 * 60
self._pw = sync_playwright().start()
self.browser: Browser = self._pw.chromium.launch(
headless=self.headless,
# proxy={'server':'http://127.0.0.1:7890'}
)
self.context = self.browser.new_context(
user_agent="Mozilla/5.0 (Windows NT 10.0; Win64; x64) "
"AppleWebKit/537.36 (KHTML, like Gecko) "
"Chrome/122.0.0.0 Safari/537.36",
locale="en-US",
viewport={"width": 1280, "height": 800},
timezone_id="Asia/Shanghai",
permissions=[],
)
self.context.add_init_script("Object.defineProperty(navigator, 'webdriver', {get: () => undefined})")
self.page: Page = self.context.new_page()
self._last_check_ts = 0
self._last_check_result = False
os.makedirs('screenshots', exist_ok=True)
self.page.goto(self.url)
def _do_login(self) -> None:
self.page.goto(self.url, timeout=30000)
self.page.wait_for_load_state("networkidle", timeout=30000)
file_path = f'screenshots/{str(int(time.time()))}.png'
self.page.screenshot(path=file_path)
if self.page.query_selector("div.cf-turnstile[data-sitekey]"):
ok = solve_turnstile_capsolver(self.page)
if not ok:
raise RuntimeError("CapSolver 处理 Turnstile 失败")
logbtn = self.page.locator("//a[@class='login button']")
if logbtn.count() > 0:
logbtn.nth(0).click()
self.page.wait_for_selector("//input[@data-testid=\"emailInput\"]")
# “我了解”弹窗
i_now_btn = self.page.locator("button:has-text(\"I understand\")")
if i_now_btn.count() > 0:
i_now_btn.click()
# 输入账号密码
email_edit = self.page.locator("//input[@data-testid=\"emailInput\"]")
password_edit = self.page.locator("//input[@data-testid=\"passwordInput\"]")
if email_edit.count():
email_edit.fill(self.email)
if password_edit.count():
password_edit.fill(self.password)
# 登录
login_btn = self.page.locator('button[form="signin-form"][type="submit"]')
try:
self.page.wait_for_selector(
'button[form="signin-form"][type="submit"]:not([disabled])', timeout=20000
)
except PlaywrightTimeoutError:
pass
login_btn.click()
# 等待跳回
self.page.wait_for_url(self.url, timeout=30000)
time.sleep(1)
self._last_check_ts = time.time()
self._last_check_result = True
def _detect_login(self) -> bool:
self.page.goto(self.url, timeout=30000)
self.page.wait_for_load_state("networkidle", timeout=30000)
return self.page.locator("//a[@class='login button']").count() == 0
def is_logged_in(self) -> bool:
now = time.time()
if now - self._last_check_ts < self.check_interval:
return self._last_check_result
try:
ok = self._detect_login()
except Exception:
ok = False
self._last_check_ts = now
self._last_check_result = ok
return ok
def ensure_login(self) -> None:
if not self.is_logged_in():
self._do_login()
@require_login
def process_ticket(self, title, link):
# titles = '\r\n'.join(title)
description = """We request that you take immediate actionto stop the infringing activity, take steps to ensure that iQIYI Content is notre-posted on, re-linked to, or otherwise available through your site. Pleaseinform us of the actions you have taken and their results.
1) please help remove these videos
2) The drama series titles are {}
""".format(title)
# likls = ["\"" + l + "\"" for l in link]
# links = ','.join(likls)
if self.page.query_selector("div.cf-turnstile[data-sitekey]"):
ok = solve_turnstile_capsolver(self.page)
if not ok:
raise RuntimeError("CapSolver 处理 Turnstile 失败")
file_path = f'screenshots/{str(int(time.time()))}_{title}_{link.split("/")[-1]}.png'
self.page.screenshot(path=file_path)
resports = self.page.locator('li.blocks-item:nth-child(8)')
resports.click()
time.sleep(2)
cc = self.page.locator("input#request_collaborators_")
cc.scroll_into_view_if_needed()
cc.click()
cc.type("duke.chen@dailymotion.com")
self.page.get_by_role("button", name="Copyright infringement").click()
time.sleep(1)
self.page.get_by_role("button", name="Notification").nth(0).click()
time.sleep(1)
self.page.get_by_role("button", name="A legal entity").click()
time.sleep(1)
self.page.get_by_label("Corporate name").fill("Beijing iQIYI Science & Technology Co.,Ltd")
time.sleep(1)
self.page.get_by_label("Legal status").fill("Legal Department")
time.sleep(1)
self.page.get_by_label("Subject").fill("Copyright infringement Notification")
time.sleep(1)
self.page.get_by_label("Please indicate the URL of the video you would like to report.").fill(link)
time.sleep(1)
self.page.get_by_label("Description").nth(1).fill(description)
time.sleep(1)
self.page.get_by_label("I state in good faith", exact=False).check()
time.sleep(1)
self.page.get_by_label("I state in good faith that the use of the Protected", exact=False).check()
time.sleep(1)
self.page.get_by_role("checkbox", name="I certify that all information provided", exact=False).check()
time.sleep(1)
self.page.get_by_role("checkbox", name="I acknowledge that my statements", exact=False).check()
time.sleep(1)
self.page.get_by_role("checkbox", name="The data provided through this form", exact=False).check()
time.sleep(1)
self.page.get_by_role("checkbox", name="By submitting the present form,", exact=False).check()
time.sleep(1)
self.page.get_by_role("button", name="Submit").click()
time.sleep(2)
file_path = f'screenshots/{str(int(time.time()))}_{title}_{link.split("/")[-1]}.png'
self.page.screenshot(path=file_path)
if self.page.url != self.url:
self.page.goto(self.url, timeout=30000)
return file_path
def close(self):
try:
self.page.close()
except Exception:
pass
try:
self.browser.close()
except Exception:
pass
try:
self._pw.stop()
except Exception:
pass
if __name__ == "__main__":
dm = DailymotionClient()
dm.process_ticket()

View File

@ -1,10 +1,66 @@
import time
import requests
import json
import redis
import requests
import urllib3
from matplotlib.artist import allow_rasterization
from requests.adapters import HTTPAdapter
from urllib3.util.retry import Retry
from typing import Optional, Dict, Any, Union
class HttpClient:
def __init__(self, max_retries: int = 3, backoff_factor: float = 0.5):
self.session = requests.Session()
# 配置重试策略
retry_strategy = Retry(
total=max_retries,
backoff_factor=backoff_factor,
status_forcelist=[500, 502, 503, 504, 429]
)
adapter = HTTPAdapter(max_retries=retry_strategy)
self.session.mount("http://", adapter)
self.session.mount("https://", adapter)
def request(self,
method: str,
url: str,
headers: Optional[Dict] = None,
params: Optional[Dict] = None,
data: Optional[Union[Dict, str]] = None,
cookies: Optional[Dict] = None,
allow_redirects: bool = True,
timeout: int = 30,
**kwargs) -> requests.Response:
try:
response = self.session.request(
method=method,
url=url,
headers=headers,
params=params,
data=data,
cookies=cookies,
allow_redirects=allow_redirects,
timeout=timeout,
**kwargs
)
response.raise_for_status()
return response
except requests.exceptions.RequestException as e:
print(f"请求失败: {url}, 错误: {str(e)}")
raise
def get(self, url: str, **kwargs) -> requests.Response:
return self.request("GET", url, **kwargs)
def post(self, url: str, **kwargs) -> requests.Response:
return self.request("POST", url, **kwargs)
# 创建全局的 HTTP 客户端实例
http_client = HttpClient()
session = requests.Session()
_REDIS_CONF = {
"host": "192.144.230.75",
"port": 6379,
@ -32,151 +88,197 @@ def get_report_token(key_name: str):
def login():
try:
headers = {
"Accept": "*/*",
"Accept-Language": "zh-CN,zh;q=0.9",
"Cache-Control": "no-cache",
"Connection": "keep-alive",
"Content-Type": "application/x-www-form-urlencoded",
"Origin": "https://www.dailymotion.com",
"Pragma": "no-cache",
"Referer": "https://www.dailymotion.com/",
"Sec-Fetch-Dest": "empty",
"Sec-Fetch-Mode": "cors",
"Sec-Fetch-Site": "same-site",
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/136.0.0.0 Safari/537.36 Edg/136.0.0.0",
"sec-ch-ua": "\"Chromium\";v=\"136\", \"Microsoft Edge\";v=\"136\", \"Not.A/Brand\";v=\"99\"",
"sec-ch-ua-mobile": "?0",
"sec-ch-ua-platform": "\"Windows\""
}
url = "https://graphql.api.dailymotion.com/oauth/token"
data = {
"client_id": "f1a362d288c1b98099c7",
"client_secret": "eea605b96e01c796ff369935357eca920c5da4c5",
"grant_type": "password",
"username": "copyright@qiyi.com",
"password": "ppsIQIYI2018@",
"scope": "userinfo,email,manage_subscriptions,manage_history,manage_likes,manage_playlists,manage_videos",
"version": "2",
"traffic_segment": "962042",
"visitor_id": "359703fb-66c2-43d2-bd0d-b1cac9c7ae8a"
}
response = http_client.post(url, headers=headers, data=data)
data = {
"update_time": int(time.time()),
"username": "copyright@qiyi.com",
"password": "ppsIQIYI2018@",
"token": response.json()
}
save_report_token('token', data)
return data
except Exception as e:
print(f"登录失败: {str(e)}")
raise
def refresh_token(access_token, refresh_token):
headers = {
"Accept": "*/*",
"Accept-Language": "zh-CN,zh;q=0.9",
"Cache-Control": "no-cache",
"Connection": "keep-alive",
"Content-Type": "application/x-www-form-urlencoded",
"Content-Length": "0",
"Origin": "https://www.dailymotion.com",
"Pragma": "no-cache",
"Referer": "https://www.dailymotion.com/",
"Referer": "https://www.dailymotion.com/signin?urlback=%2Fzendesk%3Ftimestamp%3D1748932650%26return_to%3Dhttps%253A%252F%252Ffaq.dailymotion.com%252Fhc%252Fen-us%252Frequests%252Fnew",
"Sec-Fetch-Dest": "empty",
"Sec-Fetch-Mode": "cors",
"Sec-Fetch-Site": "same-site",
"Sec-Fetch-Site": "same-origin",
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/136.0.0.0 Safari/537.36 Edg/136.0.0.0",
"sec-ch-ua": "\"Chromium\";v=\"136\", \"Microsoft Edge\";v=\"136\", \"Not.A/Brand\";v=\"99\"",
"sec-ch-ua-mobile": "?0",
"sec-ch-ua-platform": "\"Windows\""
}
url = "https://graphql.api.dailymotion.com/oauth/token"
data = {
"client_id": "f1a362d288c1b98099c7",
"client_secret": "eea605b96e01c796ff369935357eca920c5da4c5",
"grant_type": "password",
"username": "copyright@qiyi.com",
"password": "ppsIQIYI2018@",
"scope": "userinfo,email,manage_subscriptions,manage_history,manage_likes,manage_playlists,manage_videos",
"version": "2",
"traffic_segment": "962042",
"visitor_id": "359703fb-66c2-43d2-bd0d-b1cac9c7ae8a"
}
response = session.post(url, headers=headers, data=data)
data = {
"update_time": int(time.time()),
"username": "copyright@qiyi.com",
"password": "ppsIQIYI2018@",
"token": response.json()
}
save_report_token('token', data)
return data
def get_cookies(access_token: str, refresh_token: str):
cookies = {
"dmvk": "683e982c34e34",
"ts": "133696",
"v1st": "a847389a-6b91-4157-948f-457666f7172b",
"ff": "on",
"lang": "zh_CN",
"usprivacy": "1---",
"dmaid": "73ca37e4-6858-46c1-aac4-a4a5fc9a270e",
"cookie_policy_closed": "1",
"access_token": access_token,
"refresh_token": refresh_token,
}
url = "https://www.dailymotion.com/cookie/refresh_token"
session.post(url, cookies=cookies, allow_redirects=True)
response = http_client.post(url, headers=headers, cookies=cookies)
def get_cookies1(access_token: str, refresh_token: str):
"""302 跳转"""
cookies = {
"access_token": access_token,
"refresh_token": refresh_token,
def zendesk():
headers = {
"Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.7",
"Accept-Language": "zh-CN,zh;q=0.9",
"Cache-Control": "no-cache",
"Connection": "keep-alive",
"Pragma": "no-cache",
"Referer": "https://www.dailymotion.com/sg",
"Sec-Fetch-Dest": "document",
"Sec-Fetch-Mode": "navigate",
"Sec-Fetch-Site": "same-origin",
"Sec-Fetch-User": "?1",
"Upgrade-Insecure-Requests": "1",
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/136.0.0.0 Safari/537.36 Edg/136.0.0.0",
"sec-ch-ua": "\"Chromium\";v=\"136\", \"Microsoft Edge\";v=\"136\", \"Not.A/Brand\";v=\"99\"",
"sec-ch-ua-mobile": "?0",
"sec-ch-ua-platform": "\"Windows\""
}
url = "https://www.dailymotion.com/zendesk"
params = {
"return_to": "https://faq.dailymotion.com/hc/en-us/requests/new",
"timestamp": str(int(time.time())),
"timestamp": str(time.time()),
}
session.get(url, cookies=cookies, params=params, allow_redirects=True)
cookies_dict = {"update_time": int(time.time()), "cookies": session.cookies.get_dict()}
save_report_token('cookies', cookies_dict)
return cookies_dict
response = http_client.get(url, headers=headers, params=params, allow_redirects=True)
data = http_client.session.cookies.get_dict()
data['update_time'] = int(time.time())
save_report_token('cookies', data)
def get_csrftoken():
url = "https://faq.dailymotion.com/hc/api/internal/csrf_token.json"
response = session.get(url)
data = {"update_time": int(time.time()), "csrf_token": response.json()}
save_report_token('csrf_token', data)
return data
try:
url = "https://faq.dailymotion.com/hc/api/internal/csrf_token.json"
headers = {
"Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.7",
"Accept-Language": "zh-CN,zh;q=0.9",
"Cache-Control": "no-cache",
"Connection": "keep-alive",
"Pragma": "no-cache",
"Referer": "https://www.dailymotion.com/sg",
"Sec-Fetch-Dest": "document",
"Sec-Fetch-Mode": "navigate",
"Sec-Fetch-Site": "same-origin",
"Sec-Fetch-User": "?1",
"Upgrade-Insecure-Requests": "1",
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/136.0.0.0 Safari/537.36 Edg/136.0.0.0",
"sec-ch-ua": "\"Chromium\";v=\"136\", \"Microsoft Edge\";v=\"136\", \"Not.A/Brand\";v=\"99\"",
"sec-ch-ua-mobile": "?0",
"sec-ch-ua-platform": "\"Windows\""
}
response = http_client.get(url, headers=headers)
data = {"update_time": int(time.time()), "csrf_token": response.json()}
save_report_token('csrf_token', data)
return data
except Exception as e:
print(f"获取 CSRF token 失败: {str(e)}")
raise
def report(csrf_token:str, cookies:dict, ):
headers = {
'accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.7',
'accept-language': 'zh-CN,zh;q=0.9',
'cache-control': 'no-cache',
'content-type': 'application/x-www-form-urlencoded',
'origin': 'https://faq.dailymotion.com',
'pragma': 'no-cache',
'priority': 'u=0, i',
'referer': 'https://faq.dailymotion.com/hc/en-us/requests/new?ticket_form_id=136048',
'sec-ch-ua': '"Chromium";v="136", "Google Chrome";v="136", "Not.A/Brand";v="99"',
'sec-ch-ua-mobile': '?0',
'sec-ch-ua-platform': '"Windows"',
'sec-fetch-dest': 'document',
'sec-fetch-mode': 'navigate',
'sec-fetch-site': 'same-origin',
'sec-fetch-user': '?1',
'upgrade-insecure-requests': '1',
'user-agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/136.0.0.0 Safari/537.36',
}
data = {
'utf8': '',
'authenticity_token': csrf_token,
'request[ticket_form_id]': '136048',
'request[collaborators][]': 'duke.chen@dailymotion.com',
'request[custom_fields][360008684839]': '__dc.copyright_user_protection_-_copyright__',
'request[custom_fields][30150188]': 'copyrightform-notification',
'request[custom_fields][25089567]': 'legal_entity',
'request[custom_fields][25159868]': 'Beijing iQIYI Science & Technology Co.,Ltd',
'request[custom_fields][4869133282962]': 'Legal Department',
'request[subject]': 'Copyright infringement Notification',
'request[custom_fields][25613698]': 'url',
'request[description]': 'We request that you take immediate actionto stop the infringing activity, take steps to ensure that iQIYI Content is notre-posted on, re-linked to, or otherwise available through your site. Pleaseinform us of the actions you have taken and their results.\r\n1) please help remove these videos\r\n2) The drama series titles are 片名\r\n',
'request[description_mimetype]': 'text/plain',
'request[custom_fields][4769880845586]': [
'off',
'on',
],
'request[custom_fields][25626417]': [
'off',
'on',
],
'request[custom_fields][4769797363346]': [
'off',
'on',
],
'request[custom_fields][25159848]': [
'off',
'on',
],
'request[custom_fields][4769658191250]': [
'off',
'on',
],
}
response = requests.post('https://faq.dailymotion.com/hc/en-us/requests', cookies=cookies, headers=headers, data=data)
def report(csrf_token: str, v_url, title):
try:
headers = {
"accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.7",
"accept-language": "zh-CN,zh;q=0.9",
"cache-control": "no-cache",
"content-type": "application/x-www-form-urlencoded",
"origin": "https://faq.dailymotion.com",
"pragma": "no-cache",
"priority": "u=0, i",
"referer": "https://faq.dailymotion.com/hc/en-us/requests/new?ticket_form_id=136048",
"sec-ch-ua": "\"Chromium\";v=\"136\", \"Microsoft Edge\";v=\"136\", \"Not.A/Brand\";v=\"99\"",
"sec-ch-ua-mobile": "?0",
"sec-ch-ua-platform": "\"Windows\"",
"sec-fetch-dest": "document",
"sec-fetch-mode": "navigate",
"sec-fetch-site": "same-origin",
"sec-fetch-user": "?1",
"upgrade-insecure-requests": "1",
"user-agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/136.0.0.0 Safari/537.36 Edg/136.0.0.0"
}
url = "https://faq.dailymotion.com/hc/en-us/requests"
data = {
"utf8": "",
"authenticity_token": csrf_token,
"request%5Bticket_form_id%5D": "136048",
"request%5Bcollaborators%5D%5B%5D": "duke.chen@dailymotion.com",
"request%5Bcustom_fields%5D%5B360008684839%5D": "__dc.copyright_user_protection_-_copyright__",
"request%5Bcustom_fields%5D%5B30150188%5D": "copyrightform-notification",
"request%5Bcustom_fields%5D%5B25089567%5D": "legal_entity",
"request%5Bcustom_fields%5D%5B25159868%5D": "Beijing iQIYI Science & Technology Co.,Ltd",
"request%5Bcustom_fields%5D%5B4869133282962%5D": "Legal Department",
"request%5Bsubject%5D": "Copyright infringement Notification",
"request%5Bcustom_fields%5D%5B25613698%5D": v_url,
"request%5Bdescription%5D": f"We request that you take immediate actionto stop the infringing activity, take steps to ensure that iQIYI Content is notre-posted on, re-linked to, or otherwise available through your site. Pleaseinform us of the actions you have taken and their results.\r\n1) please help remove these videos\r\n2) The drama series titles are \"{title}\"\r\n",
"request%5Bdescription_mimetype%5D": "text/plain",
"request%5Bcustom_fields%5D%5B4769880845586%5D": "on",
"request%5Bcustom_fields%5D%5B25626417%5D": "on",
"request%5Bcustom_fields%5D%5B4769797363346%5D": "on",
"request%5Bcustom_fields%5D%5B25159848%5D": "on",
"request%5Bcustom_fields%5D%5B4769658191250%5D": "on"
}
response = requests.post(url, headers=headers, data=data)
print(response.status_code)
print(response.text)
print(response)
return response.status_code == 200
except Exception as e:
print(f"提交报告失败: {str(e)}")
raise
def prepare_data():
token = get_report_token('token')
cookies = get_report_token('cookies')
csrf_token = get_report_token('csrf_token')
max_update_time = max(d.get('update_time', 0) for d in (token, cookies, csrf_token) if d)
if max_update_time + (24 * 60 * 60) < time.time():
token = get_report_token('token')
access_token = token['token']['access_token']
refresh_token = token['token']['refresh_token']
get_cookies(access_token, refresh_token)
get_cookies1(access_token, refresh_token)
csrf_token = get_csrftoken()
report(csrf_token['csrf_token']['current_session']['csrf_token'], cookies['cookies'])
if __name__ == '__main__':
cookies = get_report_token('cookies')['cookies']
http_client.session.cookies = requests.utils.cookiejar_from_dict(cookies)
csrf_token = get_csrftoken()['csrf_token']['current_session']['csrf_token']
report(csrf_token, 'Hunter X Hunter', 'https://www.dailymotion.com/video/x8kjx7v')

35
screenshots_flask.py Normal file
View File

@ -0,0 +1,35 @@
from flask import Flask, send_file, abort, request, jsonify
from pathlib import Path
app = Flask(__name__)
PROJECT_ROOT = Path(__file__).parent.resolve()
SCREENSHOTS_DIR = Path("/opt/ql/daily_com/bin/screenshots").resolve()
@app.route('/image/screenshots/<path:filename>')
def serve_image(filename):
file_path = SCREENSHOTS_DIR / filename
# 防止路径越界访问
try:
file_path.resolve().relative_to(SCREENSHOTS_DIR.resolve())
except ValueError:
abort(403, description=f"禁止访问目录外文件: {file_path.resolve()}")
if not file_path.exists():
abort(404, description=f"文件不存在: {file_path.resolve()}")
return send_file(file_path, as_attachment=False)
# 自定义 404 错误响应
@app.errorhandler(404)
def handle_404(e):
return f"404 错误:{e.description}", 404
# 自定义 403 错误响应
@app.errorhandler(403)
def handle_403(e):
return f"403 错误:{e.description}", 403
if __name__ == '__main__':
app.run(host='0.0.0.0', debug=False, port=5000)

46
test2.py Normal file
View File

@ -0,0 +1,46 @@
import requests
headers = {
"accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.7",
"accept-language": "zh-CN,zh;q=0.9",
"cache-control": "no-cache",
"content-type": "application/x-www-form-urlencoded",
"origin": "https://faq.dailymotion.com",
"pragma": "no-cache",
"priority": "u=0, i",
"referer": "https://faq.dailymotion.com/hc/en-us/requests/new?ticket_form_id=136048",
"sec-ch-ua": "\"Chromium\";v=\"136\", \"Microsoft Edge\";v=\"136\", \"Not.A/Brand\";v=\"99\"",
"sec-ch-ua-mobile": "?0",
"sec-ch-ua-platform": "\"Windows\"",
"sec-fetch-dest": "document",
"sec-fetch-mode": "navigate",
"sec-fetch-site": "same-origin",
"sec-fetch-user": "?1",
"upgrade-insecure-requests": "1",
"user-agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/136.0.0.0 Safari/537.36 Edg/136.0.0.0"
}
cookies = {'__cf_bm': '5p5XdlgJwCwEVb04rcGa1__.kAFxCJkqi_EaMAenlFc-1748939566-1.0.1.1-2nv40QIELoyIWTsxV091z7UNpt4ZL6c2BhlXPNYsqoEyPMBLy08jytaB4ue9OdgSY53JT.mJ9u0ZV2vOGdk_2Ct90JrwqsCK6nirTPAUt_E', '_cfuvid': 'l.EdbUtZDtbp.RJCJOTI0tpJ9OYVMZWzE6Ml7xSi1qQ-1748939566473-0.0.1.1-604800000', 'dmvk': '683eb331c93b5', 'ts': '161431', 'v1st': '0126e707-0bf0-4a41-be9e-a263c2f22f92', 'ff': 'on', 'lang': 'zh_CN', 'usprivacy': '1---', 'dmaid': '7bbd9c5e-845e-439d-9173-4ce2c1fa95ce', 'cookie_policy_closed': '1', 'access_token': 'eyJ0eXAiOiJKV1QiLCJhbGciOiJIUzI1NiJ9.eyJhaWQiOiJmMWEzNjJkMjg4YzFiOTgwOTljNyIsInJvbCI6ImNhbi1tYW5hZ2UtcGFydG5lcnMtcmVwb3J0cyBjYW4tcmVhZC12aWRlby1zdHJlYW1zIGNhbi1zcG9vZi1jb3VudHJ5IGNhbi1hZG9wdC11c2VycyBjYW4tcmVhZC1jbGFpbS1ydWxlcyBjYW4tbWFuYWdlLWNsYWltLXJ1bGVzIGNhbi1tYW5hZ2UtdXNlci1hbmFseXRpY3MgY2FuLXJlYWQtbXktdmlkZW8tc3RyZWFtcyBjYW4tZG93bmxvYWQtbXktdmlkZW9zIGFjdC1hcyBhbGxzY29wZXMgYWNjb3VudC1jcmVhdG9yIGNhbi1yZWFkLWFwcGxpY2F0aW9ucyB1c2VyOnBhcnRuZXItdmVyaWZpZWQgdXNlcjpwYXJ0bmVyIHVzZXI6aGFzLW9yZ2FuaXphdGlvbnMiLCJzY28iOiJlbWFpbCBtYW5hZ2VfaGlzdG9yeSBtYW5hZ2VfbGlrZXMgbWFuYWdlX3BsYXlsaXN0cyBtYW5hZ2Vfc3Vic2NyaXB0aW9ucyBtYW5hZ2VfdmlkZW9zIHVzZXJpbmZvIiwibHRvIjoiYzJGZ1dpeG1ma3BJQkVVd0lTMUZBRWdORlY5QlpCc0NhUXdDSW1NLVRnRjYiLCJhaW4iOjEsImFkZyI6MSwiaWF0IjoxNzQ4OTM5NTc1LCJleHAiOjE3NDg5NzU1NzUsImRtdiI6IjEiLCJhdHAiOiJicm93c2VyIiwiYWRhIjoid3d3LmRhaWx5bW90aW9uLmNvbSIsInJlaSI6IjViOWY2MDBmNzk2YzgyMDc3MDUyYTc4NSIsIm9pZCI6Ing1dGEiLCJvb2kiOiJ4MjcweWtkIiwib3VyIjoib3duZXIiLCJ2aWQiOiIwMTI2ZTcwNy0wYmYwLTRhNDEtYmU5ZS1hMjYzYzJmMjJmOTIiLCJmdHMiOjE2MTQzMSwibmV4IjoiNnBSbHh3NElfaVYyd2pyRC0xQVpUX3diN1pNbW5MVEdlVXA0el9jc1ltcmJ1dUV6SjhwTFlvOXY2aTNXaXBQTSIsInN1YiI6IngyNzB5a2QiLCJhYXUiOiJ4MjcweWtkIiwiZW1haWxfdmVyaWZpZWQiOnRydWUsImNhZCI6MiwiY3hwIjoyLCJjYXUiOjIsImtpZCI6IkFGODQ5REQ3M0E1ODYzQ0Q3RDk3RDBCQUIwNzIyNDNCIn0.NWtmCZtraLZB1m3zS8Y6zmJoEZL5dgIElz_PJS0RTeA', 'refresh_token': 'eyJ0eXAiOiJKV1QiLCJhbGciOiJIUzI1NiJ9.eyJjaWQiOiJmMWEzNjJkMjg4YzFiOTgwOTljNyIsImxydCI6IjBhN2FkZjQ3NmUxMmE3MDU5ODhiOWYxMjBkNzRhNTI1MmE3NTFhYzYiLCJpYXQiOjE3NDg5Mzk1NzUsImV4cCI6MTc1Njk3NDc3NSwiZG12IjoiMSIsInN1YiI6IngyNzB5a2QiLCJraWQiOiJBQUE4NkFFMDk0OTk1MjIyREMyMUIxMjJBRTY5NTM0NiJ9.orbhcNPSdZThBdpj5buQGHqBxu22RrtPRfiJKxD6wco', 'sdx': '6pRlxw4I_iV2wjrD-1AZT_wb7ZMmnLTGeUp4z_csYmrbuuEzJ8pLYo9v6i3WipPM', '_zendesk_shared_session': '-dlRsWTNQeExhdjdqTTVidDZlcFBaN3R4aVk2VHY0L3FIUHZvWkdiNTdwbkFuMU9QeENhVFZhazlhTkxOWGV0S0gyTDlnMGo0U2szaTJJMWl6TTRCYklHMzFFL1g3WDMyaWlCNmVTWVNkR01JN3YwNlBwNVV6ZTZYZGJNa3lXQVctLWdoUWtxMXJJbXArMFNvbGRsQ2tWdEE9PQ%3D%3D--8bc94ffb0b735b671aebdedc3f41824a4f393870', '_zendesk_authenticated': '1', '_zendesk_cookie': 'BAhJIhl7ImRldmljZV90b2tlbnMiOnt9fQY6BkVU--0bf2100788cb010d0183feca16aaf88ccaf719ca', '_zendesk_session': 'N4mJviwE36oOucGR21%2FY5Qvi5raDNGTh7t%2BXZBJlOttPb9s%2FJA4z6r1ZLrXMvaMajg56tOFb96tb2oc%2B3S%2FKJfQJtYPkMs%2BM7Qp%2FFT%2BnJvBZ69iLaao2fKSUXxzHTN1veKOulyFnsdSQEL77Jjy%2BeHcxhIvWbFkbaOTKFDvKqH9n3%2B5n29uQ674uCBw0YN4Gjh8NLsJaXfb6hnohjPuWYMJpXdXXr%2BsnkgwpI30b0qvoKfh89Murpwyyi0%2FG6pDQDo2kDEPFRTE7xgA0WmpBcuPMFglSQRjZlt40FVMG%2B6Ai1BPoh%2BB19379Ltj6dJokE%2BirZIUzC7lQtznJTsRQ473GwmiqgSmoyPiH8jHNWvHsU3Kf%2FuQy1tenbaIRvMAEEOOEro%2BNS7emJKhnW7GqIiV9Wc%2BCZDka6H1GfrFy0OBUHhAMxqjsynMDhoNcYYZlFgiZO6VgaNtJCCaLT6jIT2NO7U2HzdHS%2BxhZqi2OCJ72lcC36xonWBKalR3T%2Ft2co%2FEHbw%3D%3D--FlvBP1iaEsCoCdIO--btifX85nvBy73Hiv%2Bo49lA%3D%3D', '_help_center_session': 'ZGNuRFFLY29RcEpZMWpZK085emJKTElEQVRKYTRkcVluWUd2b3Q1d25QR3hTTXBkYnhsdVQ4dmZ1UFdDMDBSNkE1REE2TDdhYm1mSHY2SXhpeS9iSEs4ZmtmQzZjeE14TFZHdmJ0c0Q5YXVoL3RQV0w1TmE2c0tVU3lnZzlrMys5MEpPQ3pKOXRDQmErdVl3L3VMOThzM2dBemRJVWJUOStYcnc2QW8va1dVamxPOG1FOHRkWnFSY1JEY3JpcTZtZW9kVEVsemdjUHhjQlpoWWgrdDFJZz09LS1naGh5RnFFcWl0WHdNL0RlNzJROHN3PT0%3D--d8b0ef12def3a63cff03cfa1cd30040403c934b0'}
url = "https://faq.dailymotion.com/hc/en-us/requests"
data = {
"utf8": "",
"authenticity_token": "hc:requests:client:81RVgWn01YdQitELJBNZPesWKwaUQK0SXIaYjcdPMN9v9Y4im9p4kokDa693mVxFM_dwndUXVvlNw_jutz_iNQ",
"request%5Bticket_form_id%5D": "136048",
"request%5Bcollaborators%5D%5B%5D": "duke.chen@dailymotion.com",
"request%5Bcustom_fields%5D%5B360008684839%5D": "__dc.copyright_user_protection_-_copyright__",
"request%5Bcustom_fields%5D%5B30150188%5D": "copyrightform-notification",
"request%5Bcustom_fields%5D%5B25089567%5D": "legal_entity",
"request%5Bcustom_fields%5D%5B25159868%5D": "Beijing iQIYI Science & Technology Co.,Ltd",
"request%5Bcustom_fields%5D%5B4869133282962%5D": "Legal Department",
"request%5Bsubject%5D": "Copyright infringement Notification",
"request%5Bcustom_fields%5D%5B25613698%5D": "https://www.dailymotion.com/video/x925b9i",
"request%5Bdescription%5D": "We request that you take immediate actionto stop the infringing activity, take steps to ensure that iQIYI Content is notre-posted on, re-linked to, or otherwise available through your site. Pleaseinform us of the actions you have taken and their results.\r\n1) please help remove these videos\r\n2) The drama series titles are \"UK street food showdown: Which UK city has the best street cuisine?\"\r\n",
"request%5Bdescription_mimetype%5D": "text/plain",
"request%5Bcustom_fields%5D%5B4769880845586%5D": "on",
"request%5Bcustom_fields%5D%5B25626417%5D": "on",
"request%5Bcustom_fields%5D%5B4769797363346%5D": "off",
"request%5Bcustom_fields%5D%5B25159848%5D": "on",
"request%5Bcustom_fields%5D%5B4769658191250%5D": "on"
}
response = requests.post(url, headers=headers, cookies=cookies, data=data)
print(response.text)
print(response)