Compare commits
1 Commits
Author | SHA1 | Date | |
---|---|---|---|
12915c13a5 |
@ -1,38 +0,0 @@
|
|||||||
from DB import DBVidcon
|
|
||||||
import requests
|
|
||||||
from logger import logger
|
|
||||||
db = DBVidcon()
|
|
||||||
|
|
||||||
|
|
||||||
def check_video_removed(video_id):
|
|
||||||
url = f"https://api.dailymotion.com/video/{video_id}"
|
|
||||||
params = {"fields": "published,private,status"}
|
|
||||||
resp = requests.get(url, params=params, timeout=10)
|
|
||||||
|
|
||||||
# 404 -> 不存在或已被删除
|
|
||||||
if resp.status_code == 404:
|
|
||||||
return 1
|
|
||||||
|
|
||||||
data = resp.json()
|
|
||||||
# published=False 或 private=True 都视作“已下架”
|
|
||||||
if not data.get("published", False) or data.get("private", False):
|
|
||||||
return 1
|
|
||||||
|
|
||||||
return 0
|
|
||||||
|
|
||||||
|
|
||||||
def main():
|
|
||||||
lis = db.getreport_video()
|
|
||||||
for li in lis:
|
|
||||||
video_id = li['v_xid']
|
|
||||||
status = check_video_removed(video_id)
|
|
||||||
if status == 1:
|
|
||||||
db.mark_video_removed(li['id'], status)
|
|
||||||
logger.info(f"视频id {video_id} 下架")
|
|
||||||
else:
|
|
||||||
db.mark_video_removed(li['id'], status)
|
|
||||||
logger.info(f"视频id {video_id} 仍然存在")
|
|
||||||
|
|
||||||
|
|
||||||
if __name__ == '__main__':
|
|
||||||
main()
|
|
@ -1,6 +1,6 @@
|
|||||||
import json, time
|
import json, time
|
||||||
import argparse
|
import argparse
|
||||||
from DB import DBVidcon
|
from DB import DBVidcon, DBSA
|
||||||
|
|
||||||
def parse_args():
|
def parse_args():
|
||||||
parser = argparse.ArgumentParser(
|
parser = argparse.ArgumentParser(
|
||||||
@ -14,8 +14,10 @@ def main():
|
|||||||
args = parse_args()
|
args = parse_args()
|
||||||
batch = int(time.time())
|
batch = int(time.time())
|
||||||
db = DBVidcon()
|
db = DBVidcon()
|
||||||
push = None
|
|
||||||
empty = None
|
for chunk in DBSA.stream_video_keys(chunk_size=10_000):
|
||||||
|
db.cache_video_keys_bulk(chunk)
|
||||||
|
print(f"同步Redis=={len(chunk)}")
|
||||||
|
|
||||||
if args.level == 0:
|
if args.level == 0:
|
||||||
push = db.push_l0
|
push = db.push_l0
|
||||||
|
@ -1,40 +0,0 @@
|
|||||||
import json, time
|
|
||||||
import argparse
|
|
||||||
from DB import DBVidcon
|
|
||||||
|
|
||||||
def parse_args():
|
|
||||||
parser = argparse.ArgumentParser(
|
|
||||||
description="Dump keyword/title rows into Redis list."
|
|
||||||
)
|
|
||||||
parser.add_argument("-l", "--level", type=int, default=99,
|
|
||||||
help="value for t.level (default: 99)")
|
|
||||||
return parser.parse_args()
|
|
||||||
|
|
||||||
def main():
|
|
||||||
batch = int(time.time())
|
|
||||||
db = DBVidcon()
|
|
||||||
push = db.push_web
|
|
||||||
empty = db.web_empty
|
|
||||||
|
|
||||||
if empty():
|
|
||||||
rows = db.fetch_keyword_title(level=0)
|
|
||||||
payload_list = []
|
|
||||||
for row in rows:
|
|
||||||
payload_list.append(json.dumps({**row, "batch": batch}, ensure_ascii=False))
|
|
||||||
if len(payload_list) >= 10000:
|
|
||||||
push(payload_list)
|
|
||||||
payload_list.clear()
|
|
||||||
if payload_list: # 收尾
|
|
||||||
push(payload_list)
|
|
||||||
|
|
||||||
data = {
|
|
||||||
"level": 0,
|
|
||||||
"batch": batch,
|
|
||||||
"count": len(rows),
|
|
||||||
}
|
|
||||||
db.log_batch_start(data)
|
|
||||||
print(f"✔ 推送 {len(rows)} 行(batch={batch})到 {push.__name__}队列完毕")
|
|
||||||
db.close()
|
|
||||||
|
|
||||||
if __name__ == "__main__":
|
|
||||||
main()
|
|
@ -1,57 +0,0 @@
|
|||||||
import requests
|
|
||||||
from flask import Flask, request, jsonify
|
|
||||||
from DB import DBVidcon
|
|
||||||
|
|
||||||
app = Flask(__name__)
|
|
||||||
|
|
||||||
endpoint = "https://api.dailymotion.com/videos"
|
|
||||||
DEFAULT_PAGE = 1
|
|
||||||
FIXED_LIMIT = 100
|
|
||||||
VALID_SORTS = {
|
|
||||||
'recent', 'relevance', 'alpha', 'alphaaz',
|
|
||||||
'alphaza', 'most', 'least', 'changed'
|
|
||||||
}
|
|
||||||
|
|
||||||
db = DBVidcon()
|
|
||||||
|
|
||||||
@app.route("/get", methods=["GET"])
|
|
||||||
def get_videos():
|
|
||||||
keyword = request.args.get("keyword", "").strip()
|
|
||||||
if not keyword:
|
|
||||||
return jsonify({"status": "error", "msg": "keyword 参数不能为空"}), 400
|
|
||||||
|
|
||||||
# 页码和国家参数
|
|
||||||
i = request.args.get("page", DEFAULT_PAGE, type=int)
|
|
||||||
rn = request.args.get("rn", "US").upper()
|
|
||||||
|
|
||||||
# 排序参数,必须合法
|
|
||||||
sort = request.args.get("sort", "relevance").strip().lower()
|
|
||||||
if sort not in VALID_SORTS:
|
|
||||||
return jsonify({
|
|
||||||
"status": "error",
|
|
||||||
"msg": f"sort 参数非法,可选值: {sorted(VALID_SORTS)}"
|
|
||||||
}), 400
|
|
||||||
|
|
||||||
proxy_string = db.get_proxy(rn)
|
|
||||||
proxies = {"http": proxy_string, "https": proxy_string} if proxy_string else None
|
|
||||||
|
|
||||||
params = {
|
|
||||||
"search": keyword,
|
|
||||||
"fields": "id,title,created_time,thumbnail_240_url,duration,"
|
|
||||||
"owner.id,owner.screenname,likes_total,views_total",
|
|
||||||
"limit": FIXED_LIMIT,
|
|
||||||
"page": i,
|
|
||||||
"sort": sort
|
|
||||||
}
|
|
||||||
|
|
||||||
try:
|
|
||||||
resp = requests.get(endpoint, params=params, proxies=proxies, timeout=10)
|
|
||||||
resp.raise_for_status()
|
|
||||||
jd = resp.json()
|
|
||||||
return jsonify(jd), 200
|
|
||||||
except requests.exceptions.RequestException as e:
|
|
||||||
return jsonify({"status": "error", "msg": str(e)}), 502
|
|
||||||
|
|
||||||
|
|
||||||
if __name__ == "__main__":
|
|
||||||
app.run(host="0.0.0.0", port=8000, debug=False)
|
|
@ -1,51 +0,0 @@
|
|||||||
#!/usr/bin/env python3
|
|
||||||
# app.py
|
|
||||||
import requests
|
|
||||||
from flask import Flask, jsonify, abort
|
|
||||||
|
|
||||||
app = Flask(__name__)
|
|
||||||
|
|
||||||
|
|
||||||
def check_video_removed(video_id):
|
|
||||||
"""
|
|
||||||
调用 Dailymotion API 判断视频是否已下架/删除
|
|
||||||
返回:
|
|
||||||
1 → 已被删除 / 不存在 / 已下架 / 设为私有
|
|
||||||
0 → 正常公开中
|
|
||||||
"""
|
|
||||||
url = f"https://api.dailymotion.com/video/{video_id}"
|
|
||||||
params = {"fields": "published,private,status"}
|
|
||||||
try:
|
|
||||||
resp = requests.get(url, params=params, timeout=10)
|
|
||||||
except requests.RequestException as exc:
|
|
||||||
# 网络错误时返回 503,让上游知道需要重试
|
|
||||||
abort(503, description=f"Upstream request failed: {exc}")
|
|
||||||
|
|
||||||
# 404 → 不存在或已被删除
|
|
||||||
if resp.status_code == 404:
|
|
||||||
return 1
|
|
||||||
|
|
||||||
# 其他非 2xx 状态码 → 直接透传给客户端
|
|
||||||
if resp.status_code // 100 != 2:
|
|
||||||
abort(resp.status_code, description=resp.text)
|
|
||||||
|
|
||||||
data = resp.json()
|
|
||||||
# published=False 或 private=True 都视作“已下架”
|
|
||||||
if not data.get("published", False) or data.get("private", False):
|
|
||||||
return 1
|
|
||||||
|
|
||||||
return 0
|
|
||||||
|
|
||||||
|
|
||||||
@app.route("/video/<video_id>", methods=["GET"])
|
|
||||||
def video_status(video_id):
|
|
||||||
removed = check_video_removed(video_id)
|
|
||||||
return jsonify({"video_id": video_id, "removed": removed})
|
|
||||||
|
|
||||||
|
|
||||||
if __name__ == "__main__":
|
|
||||||
# 支持通过环境变量覆盖监听地址和端口
|
|
||||||
import os
|
|
||||||
host = os.getenv("HOST", "0.0.0.0")
|
|
||||||
port = 5100
|
|
||||||
app.run(host=host, port=port, debug=False)
|
|
18
kill_main.sh
18
kill_main.sh
@ -1,18 +0,0 @@
|
|||||||
#!/usr/bin/env bash
|
|
||||||
|
|
||||||
# -------- 可按需修改 --------
|
|
||||||
TARGET="/opt/ql/DailyMotion/main.py" # 关键字:精确到脚本路径即可
|
|
||||||
SIG="9" # 信号;默认 -9,想温和一点改成 15
|
|
||||||
# --------------------------------
|
|
||||||
|
|
||||||
pids=$(pgrep -f "$TARGET")
|
|
||||||
|
|
||||||
if [ -z "$pids" ]; then
|
|
||||||
echo "没有发现正在运行的 $TARGET"
|
|
||||||
exit 0
|
|
||||||
fi
|
|
||||||
|
|
||||||
echo "即将发送 SIG${SIG:-15} 到进程: $pids"
|
|
||||||
kill "-${SIG:-15}" $pids
|
|
||||||
|
|
||||||
echo "完成"
|
|
38
main.py
38
main.py
@ -1,4 +1,3 @@
|
|||||||
#!/opt/ql/daily_com/bin/python3
|
|
||||||
import base64
|
import base64
|
||||||
import traceback
|
import traceback
|
||||||
import argparse
|
import argparse
|
||||||
@ -56,15 +55,15 @@ def format_duration(seconds):
|
|||||||
return "00:00"
|
return "00:00"
|
||||||
|
|
||||||
|
|
||||||
def get_searchInfo(keyword, level, headers, proxy_name, r=2):
|
def get_searchInfo(keyword, level, rn, proxy_name, r=2):
|
||||||
if r == 2:
|
if r == 2:
|
||||||
logger.info(f"NET处理->{keyword},\trn->{proxy_name},\tlevel->{level}")
|
logger.info(f"NET处理->{keyword},\trn->{proxy_name},\tlevel->{level}")
|
||||||
video_list = []
|
video_list = []
|
||||||
max_page = 3
|
max_page = 2
|
||||||
limit = 100
|
limit = 30
|
||||||
endpoint = 'https://api.dailymotion.com/videos'
|
endpoint = 'https://api.dailymotion.com/videos'
|
||||||
if level == 0 or level == 1:
|
if level == 0 or level == 1:
|
||||||
max_page = 4
|
max_page = 3
|
||||||
limit = 100
|
limit = 100
|
||||||
for j in range(1, max_page):
|
for j in range(1, max_page):
|
||||||
params = {
|
params = {
|
||||||
@ -88,7 +87,7 @@ def get_searchInfo(keyword, level, headers, proxy_name, r=2):
|
|||||||
logger.exception(f"[Requested] 未知:{e}, keyword: {keyword}, l: {level}")
|
logger.exception(f"[Requested] 未知:{e}, keyword: {keyword}, l: {level}")
|
||||||
else:
|
else:
|
||||||
time.sleep((3 - r) * 5)
|
time.sleep((3 - r) * 5)
|
||||||
return get_searchInfo(keyword, level, headers, proxy_name, r - 1)
|
return get_searchInfo(keyword, level, rn, proxy_name, r - 1)
|
||||||
try:
|
try:
|
||||||
resinfo = jsondata.get("list")
|
resinfo = jsondata.get("list")
|
||||||
except Exception:
|
except Exception:
|
||||||
@ -98,7 +97,7 @@ def get_searchInfo(keyword, level, headers, proxy_name, r=2):
|
|||||||
return None
|
return None
|
||||||
else:
|
else:
|
||||||
time.sleep((3 - r) * 5)
|
time.sleep((3 - r) * 5)
|
||||||
return get_searchInfo(keyword, level, headers, proxy_name, r - 1)
|
return get_searchInfo(keyword, level, rn, proxy_name, r - 1)
|
||||||
for index, iteminfo in enumerate(resinfo):
|
for index, iteminfo in enumerate(resinfo):
|
||||||
calculated_index = index + 1 + (j - 1) * limit
|
calculated_index = index + 1 + (j - 1) * limit
|
||||||
xid = iteminfo["id"]
|
xid = iteminfo["id"]
|
||||||
@ -106,6 +105,10 @@ def get_searchInfo(keyword, level, headers, proxy_name, r=2):
|
|||||||
uxid = iteminfo["owner.id"]
|
uxid = iteminfo["owner.id"]
|
||||||
uid = base64.b64encode(f"Channel:{uxid}".encode('utf-8')).decode('utf-8')
|
uid = base64.b64encode(f"Channel:{uxid}".encode('utf-8')).decode('utf-8')
|
||||||
duration = iteminfo.get('duration')
|
duration = iteminfo.get('duration')
|
||||||
|
is_repeat = 0
|
||||||
|
if db.video_key_exists(vid.strip(), rn):
|
||||||
|
is_repeat = 1
|
||||||
|
|
||||||
if duration <= 300:
|
if duration <= 300:
|
||||||
continue
|
continue
|
||||||
v_data = {
|
v_data = {
|
||||||
@ -123,9 +126,11 @@ def get_searchInfo(keyword, level, headers, proxy_name, r=2):
|
|||||||
"u_id": uid,
|
"u_id": uid,
|
||||||
"u_xid": uxid,
|
"u_xid": uxid,
|
||||||
"u_name": iteminfo.get('owner.screenname'),
|
"u_name": iteminfo.get('owner.screenname'),
|
||||||
"u_pic": iteminfo.get('owner.avatar_60_url')
|
"u_pic": iteminfo.get('owner.avatar_60_url'),
|
||||||
|
"is_repeat": is_repeat,
|
||||||
}
|
}
|
||||||
video_list.append(v_data)
|
video_list.append(v_data)
|
||||||
|
time.sleep(3)
|
||||||
if len(video_list) < 100:
|
if len(video_list) < 100:
|
||||||
break
|
break
|
||||||
return video_list
|
return video_list
|
||||||
@ -137,11 +142,11 @@ proxiesdict = db.get_proxy_agent_dict()
|
|||||||
def search_worker(payload, kitem, flag):
|
def search_worker(payload, kitem, flag):
|
||||||
try:
|
try:
|
||||||
gproxies = proxiesdict[kitem['rn']]
|
gproxies = proxiesdict[kitem['rn']]
|
||||||
v_list = get_searchInfo(kitem['keyword'], kitem['level'], None, gproxies)
|
v_list = get_searchInfo(kitem['keyword'], kitem['level'], kitem['rn'], gproxies)
|
||||||
if not v_list:
|
if not v_list:
|
||||||
for i in range(2):
|
for i in range(2):
|
||||||
time.sleep(i * 5)
|
time.sleep(i * 5)
|
||||||
v_list = get_searchInfo(kitem['keyword'], kitem['level'], None, gproxies)
|
v_list = get_searchInfo(kitem['keyword'], kitem['level'], kitem['rn'], gproxies)
|
||||||
if v_list:
|
if v_list:
|
||||||
break
|
break
|
||||||
time.sleep(2)
|
time.sleep(2)
|
||||||
@ -163,11 +168,10 @@ def integrate_data_parallel():
|
|||||||
time.sleep(10)
|
time.sleep(10)
|
||||||
continue
|
continue
|
||||||
|
|
||||||
futures = []
|
futures = [
|
||||||
for payload, kitem in tasks:
|
executor.submit(search_worker, payload, kitem, flag)
|
||||||
futures.append(executor.submit(search_worker, payload, kitem, flag))
|
for payload, kitem in tasks
|
||||||
time.sleep(1)
|
]
|
||||||
|
|
||||||
rollback = {0: [], 1: [], 2: []}
|
rollback = {0: [], 1: [], 2: []}
|
||||||
|
|
||||||
for fut in concurrent.futures.as_completed(futures):
|
for fut in concurrent.futures.as_completed(futures):
|
||||||
@ -178,8 +182,6 @@ def integrate_data_parallel():
|
|||||||
continue
|
continue
|
||||||
|
|
||||||
for item in v_list:
|
for item in v_list:
|
||||||
if not v_list:
|
|
||||||
continue
|
|
||||||
DBSA.upsert_video({
|
DBSA.upsert_video({
|
||||||
"keyword": kitem["keyword"],
|
"keyword": kitem["keyword"],
|
||||||
"v_name": kitem["v_name"],
|
"v_name": kitem["v_name"],
|
||||||
@ -202,6 +204,7 @@ def integrate_data_parallel():
|
|||||||
"batch": kitem["batch"],
|
"batch": kitem["batch"],
|
||||||
"machine_id": MACHINE_ID,
|
"machine_id": MACHINE_ID,
|
||||||
"level": kitem["level"],
|
"level": kitem["level"],
|
||||||
|
"is_repeat": item['is_repeat']
|
||||||
})
|
})
|
||||||
DBSA.flush()
|
DBSA.flush()
|
||||||
if rollback[0]:
|
if rollback[0]:
|
||||||
@ -210,6 +213,7 @@ def integrate_data_parallel():
|
|||||||
db.rollback_l1(rollback[1])
|
db.rollback_l1(rollback[1])
|
||||||
if rollback[2]:
|
if rollback[2]:
|
||||||
db.rollback_l2(rollback[2])
|
db.rollback_l2(rollback[2])
|
||||||
|
time.sleep(10)
|
||||||
|
|
||||||
|
|
||||||
def parse_args() -> argparse.Namespace:
|
def parse_args() -> argparse.Namespace:
|
||||||
|
@ -98,9 +98,9 @@ def fetch_all_data_for_rn(rn: str, batches: list[int]) -> pd.DataFrame:
|
|||||||
|
|
||||||
def export_all():
|
def export_all():
|
||||||
# 指定要处理的批次
|
# 指定要处理的批次
|
||||||
batches = [1748965168, 1749049335]
|
batches = [1747324254, 1747323990]
|
||||||
# 先更新 is_repeat
|
# 先更新 is_repeat
|
||||||
# update_is_repeat(batches)
|
update_is_repeat(batches)
|
||||||
|
|
||||||
rn_list = get_rn_list()
|
rn_list = get_rn_list()
|
||||||
timestamp = datetime.now().strftime("%Y%m%d")
|
timestamp = datetime.now().strftime("%Y%m%d")
|
||||||
|
684
oneget.py
684
oneget.py
@ -1,684 +0,0 @@
|
|||||||
import argparse
|
|
||||||
import base64
|
|
||||||
from datetime import datetime
|
|
||||||
import concurrent.futures
|
|
||||||
import requests
|
|
||||||
import uuid
|
|
||||||
import random
|
|
||||||
import time
|
|
||||||
import copy
|
|
||||||
from threading import Lock
|
|
||||||
from DB import DBVidcon, DBSA
|
|
||||||
import json
|
|
||||||
from requests.adapters import HTTPAdapter
|
|
||||||
from urllib3.util.retry import Retry
|
|
||||||
from dateutil import parser as date_parser
|
|
||||||
|
|
||||||
MACHINE_ID = 0
|
|
||||||
db = DBVidcon()
|
|
||||||
proxiesdict = db.get_proxy_agent_dict()
|
|
||||||
|
|
||||||
|
|
||||||
class RetryRequests:
|
|
||||||
def __init__(
|
|
||||||
self,
|
|
||||||
proxies: dict = None,
|
|
||||||
timeout: int = 10,
|
|
||||||
total: int = 3,
|
|
||||||
backoff_factor: float = 1.0,
|
|
||||||
status_forcelist: tuple = (500, 502, 503, 504),
|
|
||||||
allowed_methods: tuple = ("GET", "POST"),
|
|
||||||
):
|
|
||||||
self.session = requests.Session()
|
|
||||||
self.timeout = timeout
|
|
||||||
self.proxies = proxies
|
|
||||||
|
|
||||||
retry = Retry(
|
|
||||||
total=total,
|
|
||||||
backoff_factor=backoff_factor,
|
|
||||||
status_forcelist=status_forcelist,
|
|
||||||
allowed_methods=allowed_methods,
|
|
||||||
raise_on_status=False
|
|
||||||
)
|
|
||||||
adapter = HTTPAdapter(max_retries=retry)
|
|
||||||
self.session.mount("http://", adapter)
|
|
||||||
self.session.mount("https://", adapter)
|
|
||||||
|
|
||||||
def get(self, url, **kwargs):
|
|
||||||
kwargs.setdefault("timeout", self.timeout)
|
|
||||||
if self.proxies:
|
|
||||||
kwargs.setdefault("proxies", self.proxies)
|
|
||||||
return self.session.get(url, **kwargs)
|
|
||||||
|
|
||||||
def post(self, url, **kwargs):
|
|
||||||
kwargs.setdefault("timeout", self.timeout)
|
|
||||||
if self.proxies:
|
|
||||||
kwargs.setdefault("proxies", self.proxies)
|
|
||||||
return self.session.post(url, **kwargs)
|
|
||||||
|
|
||||||
|
|
||||||
req = RetryRequests()
|
|
||||||
|
|
||||||
|
|
||||||
def clean_dash_to_zero(val):
|
|
||||||
if val in ('-', '', None):
|
|
||||||
return 0
|
|
||||||
try:
|
|
||||||
return int(val)
|
|
||||||
except (ValueError, TypeError) as e:
|
|
||||||
print(f"[字段异常] val = {val} → {str(e)}")
|
|
||||||
return 0
|
|
||||||
|
|
||||||
|
|
||||||
def format_create_time(timestr):
|
|
||||||
try:
|
|
||||||
dt = date_parser.isoparse(timestr)
|
|
||||||
return dt.strftime("%Y-%m-%d %H:%M:%S")
|
|
||||||
except Exception as e:
|
|
||||||
print(f"[时间格式错误] {timestr} → {str(e)}")
|
|
||||||
return "1970-01-01 00:00:00"
|
|
||||||
|
|
||||||
|
|
||||||
def format_duration(seconds):
|
|
||||||
try:
|
|
||||||
seconds = int(seconds)
|
|
||||||
return f"{seconds // 60:02}:{seconds % 60:02}"
|
|
||||||
except Exception:
|
|
||||||
return "00:00"
|
|
||||||
|
|
||||||
|
|
||||||
class DMHeaderManager:
|
|
||||||
_headers_template = {
|
|
||||||
'Accept': '*/*, */*',
|
|
||||||
'Cache-Control': 'no-cache',
|
|
||||||
'Connection': 'keep-alive',
|
|
||||||
'Content-Type': 'application/json, application/json',
|
|
||||||
'Host': 'graphql.api.dailymotion.com',
|
|
||||||
'Origin': 'https://www.dailymotion.com',
|
|
||||||
'Referer': 'https://www.dailymotion.com/',
|
|
||||||
'Sec-Fetch-Dest': 'empty',
|
|
||||||
'Sec-Fetch-Mode': 'cors',
|
|
||||||
'Sec-Fetch-Site': 'same-site',
|
|
||||||
'User-Agent': 'Mozilla/5.0',
|
|
||||||
'X-DM-AppInfo-Id': 'com.dailymotion.neon',
|
|
||||||
'X-DM-AppInfo-Type': 'website',
|
|
||||||
'X-DM-AppInfo-Version': 'v2025-05-26T13:45:05.666Z',
|
|
||||||
'X-DM-Neon-SSR': '0',
|
|
||||||
'X-DM-Preferred-Country': 'tw',
|
|
||||||
'accept-language': 'zh-CN',
|
|
||||||
'authorization': '',
|
|
||||||
'sec-ch-ua': '"Chromium";v="128", "Not;A=Brand";v="24", "Google Chrome";v="128"',
|
|
||||||
'sec-ch-ua-mobile': '?0',
|
|
||||||
'sec-ch-ua-platform': '"Windows"',
|
|
||||||
'x-dm-visit-id': '',
|
|
||||||
'x-dm-visitor-id': '',
|
|
||||||
}
|
|
||||||
|
|
||||||
_user_agents = [
|
|
||||||
'Mozilla/5.0 (Windows; U; Windows NT 6.1; en-us) AppleWebKit/534.50 (KHTML, like Gecko) Version/5.1 Safari/534.50',
|
|
||||||
'Mozilla/5.0 (Windows NT 6.1; rv:2.0.1) Gecko/20100101 Firefox/4.0.1',
|
|
||||||
'Opera/9.80 (Windows NT 6.1; U; en) Presto/2.8.131 Version/11.11',
|
|
||||||
'Mozilla/4.0 (compatible; MSIE 7.0; Windows NT 5.1; 360SE)',
|
|
||||||
'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/136.0.0.0 Safari/537.36',
|
|
||||||
'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/128.0.0.0 Safari/537.36',
|
|
||||||
]
|
|
||||||
|
|
||||||
def __init__(self, proxies: dict = None):
|
|
||||||
self._headers_cache = None
|
|
||||||
self._cache_lock = Lock()
|
|
||||||
self._proxies = proxies
|
|
||||||
|
|
||||||
def get_headers(self, retry: int = 2) -> dict:
|
|
||||||
visitor_id = str(uuid.uuid4())
|
|
||||||
visit_id = str(int(time.time() * 1000))
|
|
||||||
traffic_segment = str(random.randint(100_000, 999_999))
|
|
||||||
ua = random.choice(self._user_agents)
|
|
||||||
|
|
||||||
token_headers = {
|
|
||||||
'Accept': '*/*',
|
|
||||||
'Accept-Language': 'zh-CN,zh;q=0.9,en;q=0.8',
|
|
||||||
'Cache-Control': 'no-cache',
|
|
||||||
'Connection': 'keep-alive',
|
|
||||||
'Content-Type': 'application/x-www-form-urlencoded',
|
|
||||||
'Origin': 'https://www.dailymotion.com',
|
|
||||||
'Pragma': 'no-cache',
|
|
||||||
'Referer': 'https://www.dailymotion.com/',
|
|
||||||
'Sec-Fetch-Dest': 'empty',
|
|
||||||
'Sec-Fetch-Mode': 'cors',
|
|
||||||
'Sec-Fetch-Site': 'same-site',
|
|
||||||
'User-Agent': ua,
|
|
||||||
'sec-ch-ua': '"Chromium";v="136", "Google Chrome";v="136", "Not.A/Brand";v="99"',
|
|
||||||
'sec-ch-ua-mobile': '?0',
|
|
||||||
'sec-ch-ua-platform': '"Windows"',
|
|
||||||
}
|
|
||||||
|
|
||||||
data = {
|
|
||||||
'client_id': 'f1a362d288c1b98099c7',
|
|
||||||
'client_secret': 'eea605b96e01c796ff369935357eca920c5da4c5',
|
|
||||||
'grant_type': 'client_credentials',
|
|
||||||
'traffic_segment': traffic_segment,
|
|
||||||
'visitor_id': visitor_id,
|
|
||||||
}
|
|
||||||
|
|
||||||
response = req.post(
|
|
||||||
'https://graphql.api.dailymotion.com/oauth/token',
|
|
||||||
headers=token_headers,
|
|
||||||
data=data,
|
|
||||||
proxies=self._proxies,
|
|
||||||
timeout=10
|
|
||||||
)
|
|
||||||
response.raise_for_status()
|
|
||||||
token = response.json()['access_token']
|
|
||||||
|
|
||||||
new_headers = copy.deepcopy(self._headers_template)
|
|
||||||
new_headers['authorization'] = f'Bearer {token}'
|
|
||||||
new_headers['x-dm-visit-id'] = visit_id
|
|
||||||
new_headers['x-dm-visitor-id'] = visitor_id
|
|
||||||
new_headers['User-Agent'] = ua
|
|
||||||
|
|
||||||
with self._cache_lock:
|
|
||||||
self._headers_cache = copy.deepcopy(new_headers)
|
|
||||||
|
|
||||||
return new_headers
|
|
||||||
|
|
||||||
|
|
||||||
class DMVideoInfo:
|
|
||||||
def __init__(self, proxies: dict = None, max_retries: int = 3, backoff_factor: float = 0.5):
|
|
||||||
self.proxies = proxies
|
|
||||||
self.max_retries = max_retries
|
|
||||||
self.backoff_factor = backoff_factor
|
|
||||||
|
|
||||||
def get_video_info(self, data: dict) -> dict:
|
|
||||||
v_xid = data.get('v_xid')
|
|
||||||
url = f'https://api.dailymotion.com/video/{v_xid}'
|
|
||||||
params = {
|
|
||||||
'fields': 'id,title,created_time,thumbnail_240_url,duration,'
|
|
||||||
'owner.id,owner.screenname,likes_total,views_total,'
|
|
||||||
'owner.avatar_60_url,owner.followers_total,owner.videos_total'
|
|
||||||
}
|
|
||||||
|
|
||||||
try:
|
|
||||||
resp = req.get(url, params=params, timeout=10)
|
|
||||||
resp.raise_for_status()
|
|
||||||
r_data = resp.json()
|
|
||||||
xid = r_data["id"]
|
|
||||||
vid = base64.b64encode(f"Video:{xid}".encode('utf-8')).decode('utf-8')
|
|
||||||
uxid = r_data["owner.id"]
|
|
||||||
uid = base64.b64encode(f"Channel:{uxid}".encode('utf-8')).decode('utf-8')
|
|
||||||
duration = r_data.get("duration", 0)
|
|
||||||
if duration < 30:
|
|
||||||
return None
|
|
||||||
data["v_id"] = vid
|
|
||||||
data["title"] = r_data.get("title", "")
|
|
||||||
data["link"] = "https://www.dailymotion.com/video/" + xid
|
|
||||||
data["duration"] = format_duration(r_data.get("duration", 0))
|
|
||||||
data['create_time'] = format(
|
|
||||||
datetime.fromtimestamp(r_data.get("created_time")).strftime("%Y-%m-%d %H:%M:%S"))
|
|
||||||
data['fans'] = clean_dash_to_zero(r_data.get("owner.followers_total", 0))
|
|
||||||
data['videos'] = clean_dash_to_zero(r_data.get("owner.videos_total", 0))
|
|
||||||
data['watch_number'] = clean_dash_to_zero(r_data.get("views_total", 0))
|
|
||||||
data['cover_pic'] = r_data.get('thumbnail_240_url')
|
|
||||||
data['u_id'] = uid
|
|
||||||
data['u_xid'] = uxid
|
|
||||||
data['u_name'] = r_data.get("owner.screenname", "")
|
|
||||||
data['u_pic'] = r_data.get("owner.avatar_60_url", "")
|
|
||||||
DBSA.upsert_video(data)
|
|
||||||
DBSA.flush()
|
|
||||||
except requests.RequestException as e:
|
|
||||||
print(f"[ERROR] 请求失败 vxid={v_xid} : {e}")
|
|
||||||
return None
|
|
||||||
|
|
||||||
|
|
||||||
def parse_args() -> argparse.Namespace:
|
|
||||||
global MACHINE_ID
|
|
||||||
parser = argparse.ArgumentParser(
|
|
||||||
description="Configure worker settings."
|
|
||||||
)
|
|
||||||
parser.add_argument(
|
|
||||||
"-m", "--machine-id",
|
|
||||||
type=int,
|
|
||||||
help=f"Machine identifier (default: {MACHINE_ID})"
|
|
||||||
)
|
|
||||||
|
|
||||||
args = parser.parse_args()
|
|
||||||
|
|
||||||
if args.machine_id is not None:
|
|
||||||
MACHINE_ID = args.machine_id
|
|
||||||
|
|
||||||
if MACHINE_ID is None or MACHINE_ID == 0:
|
|
||||||
raise ValueError("请指定机器编号")
|
|
||||||
return args
|
|
||||||
|
|
||||||
|
|
||||||
def main():
|
|
||||||
while True:
|
|
||||||
kwdata = db.get_web_items()
|
|
||||||
if not kwdata:
|
|
||||||
print("没有获取到关键词数据")
|
|
||||||
time.sleep(30)
|
|
||||||
continue
|
|
||||||
print(f"搜索关键词数据: {kwdata}")
|
|
||||||
kwdata = kwdata[0][1]
|
|
||||||
rn = kwdata['rn']
|
|
||||||
proxy_name = proxiesdict.get(rn)
|
|
||||||
# proxies_str = "http://127.0.0.1:10808"
|
|
||||||
proxies_str = db.get_proxy(proxy_name, '-1')
|
|
||||||
proxies = {
|
|
||||||
'http': proxies_str,
|
|
||||||
'https': proxies_str
|
|
||||||
}
|
|
||||||
kw = kwdata['keyword']
|
|
||||||
dmheader_manager = DMHeaderManager(proxies=proxies)
|
|
||||||
dmvideo_info = DMVideoInfo(proxies=proxies)
|
|
||||||
headers = dmheader_manager.get_headers()
|
|
||||||
for i in range(1, 11):
|
|
||||||
data = {
|
|
||||||
"operationName": "SEARCH_QUERY",
|
|
||||||
"variables": {
|
|
||||||
"query": kw,
|
|
||||||
"shouldIncludeTopResults": True, # 是否包含热门结果
|
|
||||||
"shouldIncludeChannels": False, # 是否包含频道
|
|
||||||
"shouldIncludePlaylists": False, # 是否包含播放列表
|
|
||||||
"shouldIncludeHashtags": False, # 是否包含标签
|
|
||||||
"shouldIncludeVideos": False, # 是否包含视频
|
|
||||||
"shouldIncludeLives": False, # 是否包含直播
|
|
||||||
"page": i,
|
|
||||||
"limit": 20,
|
|
||||||
"recaptchaToken": None
|
|
||||||
},
|
|
||||||
"query": """
|
|
||||||
fragment VIDEO_BASE_FRAGMENT on Video {
|
|
||||||
id
|
|
||||||
xid
|
|
||||||
title
|
|
||||||
createdAt
|
|
||||||
duration
|
|
||||||
aspectRatio
|
|
||||||
thumbnail(height: PORTRAIT_240) {
|
|
||||||
id
|
|
||||||
url
|
|
||||||
__typename
|
|
||||||
}
|
|
||||||
creator {
|
|
||||||
id
|
|
||||||
xid
|
|
||||||
name
|
|
||||||
displayName
|
|
||||||
accountType
|
|
||||||
avatar(height: SQUARE_60) {
|
|
||||||
id
|
|
||||||
url
|
|
||||||
__typename
|
|
||||||
}
|
|
||||||
__typename
|
|
||||||
}
|
|
||||||
__typename
|
|
||||||
}
|
|
||||||
|
|
||||||
fragment CHANNEL_BASE_FRAG on Channel {
|
|
||||||
id
|
|
||||||
xid
|
|
||||||
name
|
|
||||||
displayName
|
|
||||||
accountType
|
|
||||||
isFollowed
|
|
||||||
avatar(height: SQUARE_120) {
|
|
||||||
id
|
|
||||||
url
|
|
||||||
__typename
|
|
||||||
}
|
|
||||||
followerEngagement {
|
|
||||||
id
|
|
||||||
followDate
|
|
||||||
__typename
|
|
||||||
}
|
|
||||||
metrics {
|
|
||||||
id
|
|
||||||
engagement {
|
|
||||||
id
|
|
||||||
followers {
|
|
||||||
edges {
|
|
||||||
node {
|
|
||||||
id
|
|
||||||
total
|
|
||||||
__typename
|
|
||||||
}
|
|
||||||
__typename
|
|
||||||
}
|
|
||||||
__typename
|
|
||||||
}
|
|
||||||
__typename
|
|
||||||
}
|
|
||||||
__typename
|
|
||||||
}
|
|
||||||
__typename
|
|
||||||
}
|
|
||||||
|
|
||||||
fragment PLAYLIST_BASE_FRAG on Collection {
|
|
||||||
id
|
|
||||||
xid
|
|
||||||
name
|
|
||||||
description
|
|
||||||
thumbnail(height: PORTRAIT_240) {
|
|
||||||
id
|
|
||||||
url
|
|
||||||
__typename
|
|
||||||
}
|
|
||||||
creator {
|
|
||||||
id
|
|
||||||
xid
|
|
||||||
name
|
|
||||||
displayName
|
|
||||||
accountType
|
|
||||||
avatar(height: SQUARE_60) {
|
|
||||||
id
|
|
||||||
url
|
|
||||||
__typename
|
|
||||||
}
|
|
||||||
__typename
|
|
||||||
}
|
|
||||||
metrics {
|
|
||||||
id
|
|
||||||
engagement {
|
|
||||||
id
|
|
||||||
videos(filter: {visibility: {eq: PUBLIC}}) {
|
|
||||||
edges {
|
|
||||||
node {
|
|
||||||
id
|
|
||||||
total
|
|
||||||
__typename
|
|
||||||
}
|
|
||||||
__typename
|
|
||||||
}
|
|
||||||
__typename
|
|
||||||
}
|
|
||||||
__typename
|
|
||||||
}
|
|
||||||
__typename
|
|
||||||
}
|
|
||||||
__typename
|
|
||||||
}
|
|
||||||
|
|
||||||
fragment HASHTAG_BASE_FRAG on Hashtag {
|
|
||||||
id
|
|
||||||
xid
|
|
||||||
name
|
|
||||||
metrics {
|
|
||||||
id
|
|
||||||
engagement {
|
|
||||||
id
|
|
||||||
videos {
|
|
||||||
edges {
|
|
||||||
node {
|
|
||||||
id
|
|
||||||
total
|
|
||||||
__typename
|
|
||||||
}
|
|
||||||
__typename
|
|
||||||
}
|
|
||||||
__typename
|
|
||||||
}
|
|
||||||
__typename
|
|
||||||
}
|
|
||||||
__typename
|
|
||||||
}
|
|
||||||
__typename
|
|
||||||
}
|
|
||||||
|
|
||||||
fragment LIVE_BASE_FRAGMENT on Live {
|
|
||||||
id
|
|
||||||
xid
|
|
||||||
title
|
|
||||||
audienceCount
|
|
||||||
aspectRatio
|
|
||||||
isOnAir
|
|
||||||
thumbnail(height: PORTRAIT_240) {
|
|
||||||
id
|
|
||||||
url
|
|
||||||
__typename
|
|
||||||
}
|
|
||||||
creator {
|
|
||||||
id
|
|
||||||
xid
|
|
||||||
name
|
|
||||||
displayName
|
|
||||||
accountType
|
|
||||||
avatar(height: SQUARE_60) {
|
|
||||||
id
|
|
||||||
url
|
|
||||||
__typename
|
|
||||||
}
|
|
||||||
__typename
|
|
||||||
}
|
|
||||||
__typename
|
|
||||||
}
|
|
||||||
|
|
||||||
query SEARCH_QUERY(
|
|
||||||
$query: String!,
|
|
||||||
$shouldIncludeTopResults: Boolean!,
|
|
||||||
$shouldIncludeVideos: Boolean!,
|
|
||||||
$shouldIncludeChannels: Boolean!,
|
|
||||||
$shouldIncludePlaylists: Boolean!,
|
|
||||||
$shouldIncludeHashtags: Boolean!,
|
|
||||||
$shouldIncludeLives: Boolean!,
|
|
||||||
$page: Int,
|
|
||||||
$limit: Int,
|
|
||||||
$sortByVideos: SearchVideoSort,
|
|
||||||
$durationMinVideos: Int,
|
|
||||||
$durationMaxVideos: Int,
|
|
||||||
$createdAfterVideos: DateTime,
|
|
||||||
$recaptchaToken: String
|
|
||||||
) {
|
|
||||||
search(token: $recaptchaToken) {
|
|
||||||
id
|
|
||||||
|
|
||||||
stories(query: $query, first: $limit, page: $page) @include(if: $shouldIncludeTopResults) {
|
|
||||||
metadata {
|
|
||||||
id
|
|
||||||
algorithm {
|
|
||||||
uuid
|
|
||||||
__typename
|
|
||||||
}
|
|
||||||
__typename
|
|
||||||
}
|
|
||||||
pageInfo {
|
|
||||||
hasNextPage
|
|
||||||
nextPage
|
|
||||||
__typename
|
|
||||||
}
|
|
||||||
edges {
|
|
||||||
node {
|
|
||||||
...VIDEO_BASE_FRAGMENT
|
|
||||||
...CHANNEL_BASE_FRAG
|
|
||||||
...PLAYLIST_BASE_FRAG
|
|
||||||
...HASHTAG_BASE_FRAG
|
|
||||||
...LIVE_BASE_FRAGMENT
|
|
||||||
__typename
|
|
||||||
}
|
|
||||||
__typename
|
|
||||||
}
|
|
||||||
__typename
|
|
||||||
}
|
|
||||||
|
|
||||||
videos(
|
|
||||||
query: $query,
|
|
||||||
first: $limit,
|
|
||||||
page: $page,
|
|
||||||
sort: $sortByVideos,
|
|
||||||
durationMin: $durationMinVideos,
|
|
||||||
durationMax: $durationMaxVideos,
|
|
||||||
createdAfter: $createdAfterVideos
|
|
||||||
) @include(if: $shouldIncludeVideos) {
|
|
||||||
metadata {
|
|
||||||
id
|
|
||||||
algorithm {
|
|
||||||
uuid
|
|
||||||
__typename
|
|
||||||
}
|
|
||||||
__typename
|
|
||||||
}
|
|
||||||
pageInfo {
|
|
||||||
hasNextPage
|
|
||||||
nextPage
|
|
||||||
__typename
|
|
||||||
}
|
|
||||||
edges {
|
|
||||||
node {
|
|
||||||
id
|
|
||||||
...VIDEO_BASE_FRAGMENT
|
|
||||||
__typename
|
|
||||||
}
|
|
||||||
__typename
|
|
||||||
}
|
|
||||||
__typename
|
|
||||||
}
|
|
||||||
|
|
||||||
lives(query: $query, first: $limit, page: $page) @include(if: $shouldIncludeLives) {
|
|
||||||
metadata {
|
|
||||||
id
|
|
||||||
algorithm {
|
|
||||||
uuid
|
|
||||||
__typename
|
|
||||||
}
|
|
||||||
__typename
|
|
||||||
}
|
|
||||||
pageInfo {
|
|
||||||
hasNextPage
|
|
||||||
nextPage
|
|
||||||
__typename
|
|
||||||
}
|
|
||||||
edges {
|
|
||||||
node {
|
|
||||||
id
|
|
||||||
...LIVE_BASE_FRAGMENT
|
|
||||||
__typename
|
|
||||||
}
|
|
||||||
__typename
|
|
||||||
}
|
|
||||||
__typename
|
|
||||||
}
|
|
||||||
|
|
||||||
channels(query: $query, first: $limit, page: $page) @include(if: $shouldIncludeChannels) {
|
|
||||||
metadata {
|
|
||||||
id
|
|
||||||
algorithm {
|
|
||||||
uuid
|
|
||||||
__typename
|
|
||||||
}
|
|
||||||
__typename
|
|
||||||
}
|
|
||||||
pageInfo {
|
|
||||||
hasNextPage
|
|
||||||
nextPage
|
|
||||||
__typename
|
|
||||||
}
|
|
||||||
edges {
|
|
||||||
node {
|
|
||||||
id
|
|
||||||
...CHANNEL_BASE_FRAG
|
|
||||||
__typename
|
|
||||||
}
|
|
||||||
__typename
|
|
||||||
}
|
|
||||||
__typename
|
|
||||||
}
|
|
||||||
|
|
||||||
playlists: collections(query: $query, first: $limit, page: $page) @include(if: $shouldIncludePlaylists) {
|
|
||||||
metadata {
|
|
||||||
id
|
|
||||||
algorithm {
|
|
||||||
uuid
|
|
||||||
__typename
|
|
||||||
}
|
|
||||||
__typename
|
|
||||||
}
|
|
||||||
pageInfo {
|
|
||||||
hasNextPage
|
|
||||||
nextPage
|
|
||||||
__typename
|
|
||||||
}
|
|
||||||
edges {
|
|
||||||
node {
|
|
||||||
id
|
|
||||||
...PLAYLIST_BASE_FRAG
|
|
||||||
__typename
|
|
||||||
}
|
|
||||||
__typename
|
|
||||||
}
|
|
||||||
__typename
|
|
||||||
}
|
|
||||||
|
|
||||||
hashtags(query: $query, first: $limit, page: $page) @include(if: $shouldIncludeHashtags) {
|
|
||||||
metadata {
|
|
||||||
id
|
|
||||||
algorithm {
|
|
||||||
uuid
|
|
||||||
__typename
|
|
||||||
}
|
|
||||||
__typename
|
|
||||||
}
|
|
||||||
pageInfo {
|
|
||||||
hasNextPage
|
|
||||||
nextPage
|
|
||||||
__typename
|
|
||||||
}
|
|
||||||
edges {
|
|
||||||
node {
|
|
||||||
id
|
|
||||||
...HASHTAG_BASE_FRAG
|
|
||||||
__typename
|
|
||||||
}
|
|
||||||
__typename
|
|
||||||
}
|
|
||||||
__typename
|
|
||||||
}
|
|
||||||
|
|
||||||
__typename
|
|
||||||
}
|
|
||||||
}
|
|
||||||
"""
|
|
||||||
}
|
|
||||||
|
|
||||||
payload = json.dumps(data).encode()
|
|
||||||
|
|
||||||
response = req.post('https://graphql.api.dailymotion.com/', headers=headers, data=payload,
|
|
||||||
proxies=proxies)
|
|
||||||
|
|
||||||
data = response.json()
|
|
||||||
try:
|
|
||||||
edges = data['data']['search']['stories']['edges']
|
|
||||||
except (TypeError,KeyError):
|
|
||||||
print("stories 为 None 或结构异常,跳过")
|
|
||||||
break
|
|
||||||
edges_len = len(edges)
|
|
||||||
print(f"第 {i} 页,关键词: {kw},获取到 {edges_len} 条数据")
|
|
||||||
tancks = []
|
|
||||||
for j, edge in enumerate(edges):
|
|
||||||
node = edge.get("node", {})
|
|
||||||
s_data = {
|
|
||||||
"keyword": kw,
|
|
||||||
"v_name": kwdata.get("v_name", ""),
|
|
||||||
"v_xid": node.get("xid"),
|
|
||||||
"batch": kwdata.get("batch"),
|
|
||||||
"rn": kwdata.get("rn"),
|
|
||||||
"machine_id": MACHINE_ID,
|
|
||||||
"index": (i - 1) * 20 + j + 1,
|
|
||||||
"level": 0,
|
|
||||||
}
|
|
||||||
tancks.append(s_data)
|
|
||||||
with concurrent.futures.ThreadPoolExecutor(max_workers=20) as executor:
|
|
||||||
executor.map(dmvideo_info.get_video_info, tancks)
|
|
||||||
if edges_len < 20:
|
|
||||||
break
|
|
||||||
time.sleep(10)
|
|
||||||
|
|
||||||
time.sleep(20)
|
|
||||||
|
|
||||||
if __name__ == '__main__':
|
|
||||||
parse_args()
|
|
||||||
start_time = datetime.now()
|
|
||||||
print(f"开始时间:{start_time.strftime('%Y-%m-%d %H:%M:%S')}")
|
|
||||||
main()
|
|
||||||
|
|
||||||
|
|
||||||
|
|
29
onoe.py
29
onoe.py
@ -34,7 +34,19 @@ UserAgent = [
|
|||||||
'User-Agent, Mozilla/4.0 (compatible; MSIE 7.0; Windows NT 5.1; Maxthon 2.0)',
|
'User-Agent, Mozilla/4.0 (compatible; MSIE 7.0; Windows NT 5.1; Maxthon 2.0)',
|
||||||
'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Maxthon/5.0.3.4000 Chrome/47.0.2526.73 Safari/537.36',
|
'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Maxthon/5.0.3.4000 Chrome/47.0.2526.73 Safari/537.36',
|
||||||
'User-Agent, Mozilla/4.0 (compatible; MSIE 7.0; Windows NT 5.1; The World)']
|
'User-Agent, Mozilla/4.0 (compatible; MSIE 7.0; Windows NT 5.1; The World)']
|
||||||
|
sec_ch_ua_list = [
|
||||||
|
'"Chromium";v="0", "Not;A=Brand";v="24", "Google Chrome";v="0"',
|
||||||
|
'"Chromium";v="0", "Not;A=Brand";v="24", "Google Chrome";v="0"',
|
||||||
|
'"Chromium";v="0", "Not;A=Brand";v="24", "Google Chrome";v="0"',
|
||||||
|
'"Chromium";v="0", "Not;A=Brand";v="24", "Google Chrome";v="0"',
|
||||||
|
'"Chromium";v="0", "Not;A=Brand";v="24", "Google Chrome";v="0"',
|
||||||
|
'"Chromium";v="136", "Not;A=Brand";v="24", "Google Chrome";v="136"',
|
||||||
|
'"Chromium";v="128", "Not;A=Brand";v="24", "Google Chrome";v="128"',
|
||||||
|
'"Chromium";v="53", "Not;A=Brand";v="24", "Google Chrome";v="53"',
|
||||||
|
'"Chromium";v="0", "Not;A=Brand";v="24", "Google Chrome";v="0"',
|
||||||
|
'"Chromium";v="47", "Not;A=Brand";v="24", "Google Chrome";v="47"',
|
||||||
|
'"Chromium";v="0", "Not;A=Brand";v="24", "Google Chrome";v="0"'
|
||||||
|
]
|
||||||
|
|
||||||
def get_part_ids(part_num: int, take: int, offset: int = 0):
|
def get_part_ids(part_num: int, take: int, offset: int = 0):
|
||||||
part_ids = list(range(offset, offset + take))
|
part_ids = list(range(offset, offset + take))
|
||||||
@ -221,14 +233,17 @@ def gettoken(proxy, r=2):
|
|||||||
}
|
}
|
||||||
try:
|
try:
|
||||||
proxy_str = db.get_proxy(proxy)
|
proxy_str = db.get_proxy(proxy)
|
||||||
|
logger.info(f"[代理] => {proxy_str}")
|
||||||
url = 'https://graphql.api.dailymotion.com/oauth/token'
|
url = 'https://graphql.api.dailymotion.com/oauth/token'
|
||||||
response = requests.post(url, headers=headers, data=data, proxies={"http": proxy_str, "https": proxy_str})
|
response = requests.post(url, headers=headers, data=data, proxies={"http": proxy_str, "https": proxy_str})
|
||||||
token = response.json()['access_token']
|
token = response.json()['access_token']
|
||||||
copy_headers = copy.deepcopy(headers1)
|
copy_headers = copy.deepcopy(headers1)
|
||||||
|
uaidx = random.randint(0, len(UserAgent) - 1)
|
||||||
copy_headers['authorization'] = "Bearer " + token
|
copy_headers['authorization'] = "Bearer " + token
|
||||||
copy_headers['x-dm-visit-id'] = str(int(time.time() * 1000))
|
copy_headers['x-dm-visit-id'] = str(int(time.time() * 1000))
|
||||||
copy_headers['x-dm-visitor-id'] = uuid_with_dash
|
copy_headers['x-dm-visitor-id'] = uuid_with_dash
|
||||||
copy_headers['User-Agent'] = UserAgent[random.randint(0, len(UserAgent) - 1)]
|
copy_headers['User-Agent'] = UserAgent[uaidx]
|
||||||
|
copy_headers['sec-ch-ua'] = sec_ch_ua_list[uaidx]
|
||||||
copy_headers['X-DM-Preferred-Country'] = proxy.lower()
|
copy_headers['X-DM-Preferred-Country'] = proxy.lower()
|
||||||
with _cache_lock:
|
with _cache_lock:
|
||||||
_headers_cache = copy_headers
|
_headers_cache = copy_headers
|
||||||
@ -267,18 +282,18 @@ def solve_recaptcha_v3_with_proxy(
|
|||||||
payload = {
|
payload = {
|
||||||
"clientKey": "CAP-A76C932D4C6CCB3CA748F77FDC07D996",
|
"clientKey": "CAP-A76C932D4C6CCB3CA748F77FDC07D996",
|
||||||
"task": {
|
"task": {
|
||||||
"type": "ReCaptchaV3Task",
|
"type": "ReCaptchaV3TaskProxyLess",
|
||||||
"websiteURL": f"https://www.dailymotion.com/search/{encoded_query}/top-results",
|
"websiteURL": f"https://www.dailymotion.com/search/{encoded_query}/top-results",
|
||||||
"websiteKey": "6LeOJBIrAAAAAPMIjyYvo-eN_9W1HDOkrEqHR8tM",
|
"websiteKey": "6LeOJBIrAAAAAPMIjyYvo-eN_9W1HDOkrEqHR8tM",
|
||||||
"pageAction": "___grecaptcha_cfg.clients['100000']['L']['L']['promise-callback'](gRecaptchaResponse)",
|
"pageAction": "search",
|
||||||
"minScore": 0.5
|
"minScore": 0.5
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
resp = requests.post(create_url, json=payload, headers=headers, timeout=30)
|
resp = requests.post(create_url, data=json.dumps(payload), headers=headers, timeout=30)
|
||||||
logger.info(f"[token] 发送 payload:{payload}")
|
logger.info(f"[token] 发送 payload:{payload}")
|
||||||
resp.raise_for_status()
|
resp.raise_for_status()
|
||||||
task_id = resp.json()["taskId"]
|
task_id = resp.json()["taskId"]
|
||||||
logger.info(f"task_id: {task_id}")
|
logger.info(f"task_id: {resp.text}")
|
||||||
# 轮询获取结果
|
# 轮询获取结果
|
||||||
check_payload = {"clientKey": "CAP-A76C932D4C6CCB3CA748F77FDC07D996", "taskId": task_id}
|
check_payload = {"clientKey": "CAP-A76C932D4C6CCB3CA748F77FDC07D996", "taskId": task_id}
|
||||||
for i in range(max_poll_attempts):
|
for i in range(max_poll_attempts):
|
||||||
@ -287,7 +302,7 @@ def solve_recaptcha_v3_with_proxy(
|
|||||||
result = r.json()
|
result = r.json()
|
||||||
logger.info(f"第{i}次,task_id:{task_id},结果:{result}")
|
logger.info(f"第{i}次,task_id:{task_id},结果:{result}")
|
||||||
if result.get("status") == "ready":
|
if result.get("status") == "ready":
|
||||||
return result["solution"]["token"]
|
return result["solution"]["gRecaptchaResponse"]
|
||||||
time.sleep(polling_interval)
|
time.sleep(polling_interval)
|
||||||
|
|
||||||
raise TimeoutError(f"任务 {task_id} 在轮询 {max_poll_attempts} 次后未完成")
|
raise TimeoutError(f"任务 {task_id} 在轮询 {max_poll_attempts} 次后未完成")
|
||||||
|
BIN
oss/BAZTSJT.pdf
BIN
oss/BAZTSJT.pdf
Binary file not shown.
BIN
oss/LOA.pdf
BIN
oss/LOA.pdf
Binary file not shown.
@ -1,56 +0,0 @@
|
|||||||
import requests
|
|
||||||
|
|
||||||
url = "https://api.siliconflow.cn/v1/chat/completions"
|
|
||||||
kw = "朝雪录"
|
|
||||||
rn = "US"
|
|
||||||
payload = {
|
|
||||||
"model": "Qwen/Qwen3-14B",
|
|
||||||
"max_tokens": 512,
|
|
||||||
"enable_thinking": True,
|
|
||||||
"thinking_budget": 4096,
|
|
||||||
"min_p": 0.05,
|
|
||||||
"temperature": 0.7,
|
|
||||||
"top_p": 0.7,
|
|
||||||
"top_k": 50,
|
|
||||||
"frequency_penalty": 0.5,
|
|
||||||
"n": 1,
|
|
||||||
"stream": False,
|
|
||||||
"stop": [],
|
|
||||||
"messages": [
|
|
||||||
{
|
|
||||||
"role": "user",
|
|
||||||
"content": """你是一个视频搜索优化助手。用户给你一个中文视频标题或关键词,请你翻译并联想出 10 个适合用于英文视频网站(如 Dailymotion)搜索的关键词,结果用英文逗号分隔输出,仅返回关键词列表,不加说明。
|
|
||||||
|
|
||||||
示例输入:朝雪录
|
|
||||||
示例输出:Coroner's Diary,Coroners Diary, Coroners Diary episode,Coroners Diary season 1,Coroners Diary full episode,coroners diary
|
|
||||||
"""
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"role": "user",
|
|
||||||
"content": f"请推理:{kw} 并输出 10 个地区缩写为{rn}的适合用于视频网站搜索的关键词,地区缩写不在关键词内,。"
|
|
||||||
}
|
|
||||||
]
|
|
||||||
}
|
|
||||||
|
|
||||||
headers = {
|
|
||||||
"Authorization": "Bearer sk-isvydeloxqhoiwoiojleghdsuhagryjbxzphfhxneevxeoeh",
|
|
||||||
"Content-Type": "application/json"
|
|
||||||
}
|
|
||||||
|
|
||||||
response = requests.post(url, json=payload, headers=headers, timeout=30)
|
|
||||||
|
|
||||||
def parse_keywords_from_response(resp_json):
|
|
||||||
try:
|
|
||||||
# 取出文本内容
|
|
||||||
content = resp_json["choices"][0]["message"]["content"]
|
|
||||||
# 按英文逗号分隔
|
|
||||||
keywords = [kw.strip() for kw in content.split(",") if kw.strip()]
|
|
||||||
return keywords
|
|
||||||
except Exception as e:
|
|
||||||
print("解析失败:", e)
|
|
||||||
return []
|
|
||||||
|
|
||||||
kws = parse_keywords_from_response(response.json())
|
|
||||||
|
|
||||||
print(kws)
|
|
||||||
print(len(kws))
|
|
@ -1,19 +0,0 @@
|
|||||||
import json
|
|
||||||
from DB import DBVidcon
|
|
||||||
payload_list = []
|
|
||||||
db = DBVidcon()
|
|
||||||
rows = db.get_report_video()
|
|
||||||
push = db.push_report
|
|
||||||
|
|
||||||
# =======================
|
|
||||||
|
|
||||||
for row in rows:
|
|
||||||
payload_list.append(json.dumps({**row}, ensure_ascii=False))
|
|
||||||
if len(payload_list) >= 10000:
|
|
||||||
push(payload_list)
|
|
||||||
payload_list.clear()
|
|
||||||
if payload_list: # 收尾
|
|
||||||
push(payload_list)
|
|
||||||
|
|
||||||
db.close()
|
|
||||||
|
|
124
report.py
124
report.py
@ -1,124 +0,0 @@
|
|||||||
import argparse
|
|
||||||
import json
|
|
||||||
import time
|
|
||||||
from DB import DBVidcon, DBSA
|
|
||||||
from report_video import DailymotionClient
|
|
||||||
from logger import logger
|
|
||||||
import requests
|
|
||||||
|
|
||||||
MACHINE_ID = None
|
|
||||||
IsSubsequent = False
|
|
||||||
|
|
||||||
|
|
||||||
def parse_args() -> argparse.Namespace:
|
|
||||||
global MACHINE_ID, IsSubsequent
|
|
||||||
|
|
||||||
parser = argparse.ArgumentParser(
|
|
||||||
description="Configure worker settings."
|
|
||||||
)
|
|
||||||
parser.add_argument(
|
|
||||||
"-m", "--machine-id",
|
|
||||||
type=int,
|
|
||||||
help=f"Machine identifier (default: {MACHINE_ID})"
|
|
||||||
)
|
|
||||||
parser.add_argument(
|
|
||||||
"-s", "--IsSubsequent",
|
|
||||||
type=int,
|
|
||||||
help=f"Maximum concurrent workers (default: {IsSubsequent})"
|
|
||||||
)
|
|
||||||
|
|
||||||
args = parser.parse_args()
|
|
||||||
|
|
||||||
if args.machine_id is not None:
|
|
||||||
MACHINE_ID = args.machine_id
|
|
||||||
|
|
||||||
if args.IsSubsequent is not None:
|
|
||||||
if args.IsSubsequent <= 0:
|
|
||||||
IsSubsequent = False
|
|
||||||
else:
|
|
||||||
IsSubsequent = True
|
|
||||||
if MACHINE_ID is None:
|
|
||||||
raise ValueError("请指定机器编号")
|
|
||||||
return args
|
|
||||||
|
|
||||||
|
|
||||||
parse_args()
|
|
||||||
|
|
||||||
|
|
||||||
def get_public_ip():
|
|
||||||
try:
|
|
||||||
response = requests.get("https://api.ipify.org?format=json", timeout=5)
|
|
||||||
return response.json().get("ip")
|
|
||||||
except requests.RequestException as e:
|
|
||||||
print("获取失败:", e)
|
|
||||||
return None
|
|
||||||
|
|
||||||
|
|
||||||
ip = get_public_ip()
|
|
||||||
logger.info(f"当前机器IP: {ip}, 机器编号: {MACHINE_ID}, 是否后续处理: {IsSubsequent}")
|
|
||||||
db = DBVidcon()
|
|
||||||
|
|
||||||
account = db.get_account_info(MACHINE_ID)
|
|
||||||
|
|
||||||
d = DailymotionClient(email=account['account'], password=account['password'])
|
|
||||||
|
|
||||||
k = {
|
|
||||||
"open": 1,
|
|
||||||
"solved": 2,
|
|
||||||
"awaiting your reply": 3,
|
|
||||||
}
|
|
||||||
|
|
||||||
last_main_run = 0
|
|
||||||
last_subsequent_run = 0
|
|
||||||
|
|
||||||
MAIN_INTERVAL = 60 * 60 # 每 5 分钟执行一次
|
|
||||||
SUBSEQUENT_INTERVAL = 30 * 60 # 每 60 分钟执行一次
|
|
||||||
|
|
||||||
# d.test()
|
|
||||||
|
|
||||||
while True:
|
|
||||||
now = int(time.time())
|
|
||||||
|
|
||||||
# 处理主流程
|
|
||||||
if now - last_main_run >= MAIN_INTERVAL:
|
|
||||||
last_main_run = now
|
|
||||||
re_list = []
|
|
||||||
idss = []
|
|
||||||
lis = db.item_report(100)
|
|
||||||
if len(lis) > 0:
|
|
||||||
for li in lis:
|
|
||||||
item = json.loads(li[0])
|
|
||||||
re_list.append(item)
|
|
||||||
idss.append(item['id'])
|
|
||||||
logger.info(f"name:{item['name_title']},link:{item['link']} ")
|
|
||||||
try:
|
|
||||||
ids, info, report_id, status, report_ts = d.process_ticket(re_list)
|
|
||||||
subsequent_status = k.get(status, 1)
|
|
||||||
db.update_fight_record_status(
|
|
||||||
ids, report_id, 2, f"http://{ip}:5000/image/{info}",
|
|
||||||
report_ts, subsequent_status, MACHINE_ID
|
|
||||||
)
|
|
||||||
db.flush()
|
|
||||||
except Exception as e:
|
|
||||||
logger.error(f"ID:{re_list[0]['id']}, end id{re_list[-1]['id']}, e:{e}")
|
|
||||||
db.update_fight_record_status(idss, 0, 3, str(e), mid=MACHINE_ID)
|
|
||||||
time.sleep(60) # 出错延迟
|
|
||||||
|
|
||||||
if now - last_subsequent_run >= SUBSEQUENT_INTERVAL and IsSubsequent:
|
|
||||||
last_subsequent_run = now
|
|
||||||
subsequent_list = db.get_subsequent_report_video(MACHINE_ID)
|
|
||||||
if len(subsequent_list) > 0:
|
|
||||||
for li in subsequent_list:
|
|
||||||
subsequent_status = 0
|
|
||||||
r_id = li['report_id']
|
|
||||||
logger.info(f"subsequent report_id:{r_id} ")
|
|
||||||
# try:
|
|
||||||
subsequent_status, info = d.report_follow_up(r_id)
|
|
||||||
db.update_subsequent_status_by_report_id(
|
|
||||||
r_id, subsequent_status, f"http://{ip}:5000/image/{info}"
|
|
||||||
)
|
|
||||||
# except Exception as e:
|
|
||||||
# logger.logger.error(f"ID:{rs_id}, e:{e}")
|
|
||||||
# db.update_subsequent_status_by_id(rs_id, 1, str(e))
|
|
||||||
time.sleep(5) # 避免频繁请求
|
|
||||||
time.sleep(5)
|
|
417
report_video.py
417
report_video.py
@ -1,417 +0,0 @@
|
|||||||
import time
|
|
||||||
import functools
|
|
||||||
import os
|
|
||||||
import re
|
|
||||||
from datetime import datetime
|
|
||||||
from sys import platform
|
|
||||||
import requests
|
|
||||||
from logger import logger
|
|
||||||
from playwright.sync_api import (
|
|
||||||
sync_playwright,
|
|
||||||
TimeoutError as PlaywrightTimeoutError,
|
|
||||||
Page,
|
|
||||||
Browser,
|
|
||||||
)
|
|
||||||
|
|
||||||
|
|
||||||
def solve_turnstile_capsolver(page: Page,
|
|
||||||
timeout: int = 120) -> bool:
|
|
||||||
"""
|
|
||||||
使用 CapSolver 自动完成当前 Page 上的 Cloudflare Turnstile。
|
|
||||||
成功返回 True,失败/超时返回 False。
|
|
||||||
"""
|
|
||||||
cap_key = "CAP-A76C932D4C6CCB3CA748F77FDC07D996"
|
|
||||||
widget = page.query_selector("div.cf-turnstile[data-sitekey]")
|
|
||||||
if not widget:
|
|
||||||
return False
|
|
||||||
sitekey = widget.get_attribute("data-sitekey")
|
|
||||||
page_url = page.url
|
|
||||||
|
|
||||||
create_payload = {
|
|
||||||
"clientKey": cap_key,
|
|
||||||
"task": {
|
|
||||||
"type": "TurnstileTaskProxyLess",
|
|
||||||
"websiteURL": page_url,
|
|
||||||
"websiteKey": sitekey
|
|
||||||
}
|
|
||||||
}
|
|
||||||
create_resp = requests.post(
|
|
||||||
"https://api.capsolver.com/createTask",
|
|
||||||
json=create_payload, timeout=20
|
|
||||||
).json()
|
|
||||||
if create_resp.get("errorId"):
|
|
||||||
print("[CapSolver] createTask 失败:", create_resp)
|
|
||||||
return False
|
|
||||||
task_id = create_resp["taskId"]
|
|
||||||
|
|
||||||
poll_payload = {"clientKey": cap_key, "taskId": task_id}
|
|
||||||
token = None
|
|
||||||
elapsed, step = 0, 3
|
|
||||||
while elapsed < timeout:
|
|
||||||
time.sleep(step)
|
|
||||||
elapsed += step
|
|
||||||
res = requests.post(
|
|
||||||
"https://api.capsolver.com/getTaskResult",
|
|
||||||
json=poll_payload, timeout=15
|
|
||||||
).json()
|
|
||||||
if res.get("status") == "ready":
|
|
||||||
token = res["solution"]["token"]
|
|
||||||
break
|
|
||||||
if res.get("status") != "processing":
|
|
||||||
print("[CapSolver] getTaskResult 异常:", res)
|
|
||||||
return False
|
|
||||||
|
|
||||||
if not token:
|
|
||||||
print("[CapSolver] 超时未取到 token")
|
|
||||||
return False
|
|
||||||
|
|
||||||
page.evaluate(
|
|
||||||
"""(tk) => {
|
|
||||||
const ta = document.querySelector('textarea[name="cf-turnstile-response"]');
|
|
||||||
if (ta) ta.value = tk;
|
|
||||||
if (window.turnstileCallback)
|
|
||||||
try { window.turnstileCallback(tk); } catch(e){}
|
|
||||||
}""",
|
|
||||||
token
|
|
||||||
)
|
|
||||||
page.wait_for_timeout(1500)
|
|
||||||
return True
|
|
||||||
|
|
||||||
|
|
||||||
def require_login(func):
|
|
||||||
@functools.wraps(func)
|
|
||||||
def wrapper(self, *args, **kwargs):
|
|
||||||
self.ensure_login()
|
|
||||||
return func(self, *args, **kwargs)
|
|
||||||
|
|
||||||
return wrapper
|
|
||||||
|
|
||||||
|
|
||||||
class DailymotionClient:
|
|
||||||
url = "https://faq.dailymotion.com/hc/en-us/requests/new"
|
|
||||||
EMAIL = "copyright@qiyi.com"
|
|
||||||
PASSWORD = "ppsIQIYI2018@"
|
|
||||||
|
|
||||||
def __init__(self,email, password, headless: bool = None):
|
|
||||||
self.email = email
|
|
||||||
self.password = password
|
|
||||||
self.headless = headless
|
|
||||||
self.check_interval = 60 * 60
|
|
||||||
if self.headless is None:
|
|
||||||
self.headless = platform == "linux" or platform == "linux2"
|
|
||||||
|
|
||||||
if self.headless:
|
|
||||||
proxy = None
|
|
||||||
self.file_path = "/opt/ql/DailyMotion/oss/LOA.pdf"
|
|
||||||
self.file_path2 = "/opt/ql/DailyMotion/oss/BAZTSJT.pdf"
|
|
||||||
else:
|
|
||||||
proxy={'server': 'http://127.0.0.1:7890'}
|
|
||||||
self.file_path = "./oss/LOA.pdf"
|
|
||||||
self.file_path2 = "./oss/BAZTSJT.pdf"
|
|
||||||
logger.info(f"Launching DailymotionClient with headless={self.headless}, proxy={proxy}")
|
|
||||||
self._pw = sync_playwright().start()
|
|
||||||
self.browser: Browser = self._pw.chromium.launch(
|
|
||||||
headless=self.headless,
|
|
||||||
proxy=proxy,
|
|
||||||
)
|
|
||||||
self.context = self.browser.new_context(
|
|
||||||
user_agent="Mozilla/5.0 (Windows NT 10.0; Win64; x64) "
|
|
||||||
"AppleWebKit/537.36 (KHTML, like Gecko) "
|
|
||||||
"Chrome/122.0.0.0 Safari/537.36",
|
|
||||||
locale="en-US",
|
|
||||||
viewport={"width": 1280, "height": 800},
|
|
||||||
timezone_id="Asia/Shanghai",
|
|
||||||
permissions=[],
|
|
||||||
)
|
|
||||||
self.context.add_init_script("Object.defineProperty(navigator, 'webdriver', {get: () => undefined})")
|
|
||||||
self.page: Page = self.context.new_page()
|
|
||||||
|
|
||||||
self._last_check_ts = 0
|
|
||||||
self._last_check_result = False
|
|
||||||
os.makedirs('screenshots', exist_ok=True)
|
|
||||||
self.page.goto(self.url)
|
|
||||||
|
|
||||||
def _do_login(self) -> None:
|
|
||||||
self.page.goto(self.url, timeout=30000)
|
|
||||||
# self.page.wait_for_load_state("networkidle", timeout=30000)
|
|
||||||
self.page.wait_for_timeout(3000)
|
|
||||||
|
|
||||||
file_path = f'screenshots/{str(int(time.time()))}.png'
|
|
||||||
self.page.screenshot(path=file_path)
|
|
||||||
|
|
||||||
if self.page.query_selector("div.cf-turnstile[data-sitekey]"):
|
|
||||||
ok = solve_turnstile_capsolver(self.page)
|
|
||||||
if not ok:
|
|
||||||
raise RuntimeError("CapSolver 处理 Turnstile 失败")
|
|
||||||
|
|
||||||
logbtn = self.page.locator("//a[@class='login button']")
|
|
||||||
if logbtn.count() > 0:
|
|
||||||
logbtn.nth(0).click()
|
|
||||||
|
|
||||||
self.page.wait_for_selector("//input[@data-testid=\"emailInput\"]")
|
|
||||||
|
|
||||||
# “我了解”弹窗
|
|
||||||
i_now_btn = self.page.locator("button:has-text(\"I understand\")")
|
|
||||||
if i_now_btn.count() > 0:
|
|
||||||
i_now_btn.click()
|
|
||||||
|
|
||||||
# 输入账号密码
|
|
||||||
email_edit = self.page.locator("//input[@data-testid=\"emailInput\"]")
|
|
||||||
password_edit = self.page.locator("//input[@data-testid=\"passwordInput\"]")
|
|
||||||
if email_edit.count():
|
|
||||||
email_edit.fill(self.email)
|
|
||||||
if password_edit.count():
|
|
||||||
password_edit.fill(self.password)
|
|
||||||
|
|
||||||
# 登录
|
|
||||||
login_btn = self.page.locator('button[form="signin-form"][type="submit"]')
|
|
||||||
try:
|
|
||||||
self.page.wait_for_selector(
|
|
||||||
'button[form="signin-form"][type="submit"]:not([disabled])', timeout=20000
|
|
||||||
)
|
|
||||||
except PlaywrightTimeoutError:
|
|
||||||
pass
|
|
||||||
login_btn.click()
|
|
||||||
|
|
||||||
# 等待跳回
|
|
||||||
self.page.wait_for_url(self.url, timeout=30000)
|
|
||||||
time.sleep(1)
|
|
||||||
self._last_check_ts = time.time()
|
|
||||||
self._last_check_result = True
|
|
||||||
|
|
||||||
def _detect_login(self) -> bool:
|
|
||||||
self.page.goto(self.url, timeout=30000)
|
|
||||||
self.page.wait_for_timeout(3000)
|
|
||||||
return self.page.locator("//a[@class='login button']").count() == 0
|
|
||||||
|
|
||||||
def is_logged_in(self) -> bool:
|
|
||||||
now = time.time()
|
|
||||||
if now - self._last_check_ts < self.check_interval:
|
|
||||||
return self._last_check_result
|
|
||||||
|
|
||||||
try:
|
|
||||||
ok = self._detect_login()
|
|
||||||
except Exception:
|
|
||||||
ok = False
|
|
||||||
|
|
||||||
self._last_check_ts = now
|
|
||||||
self._last_check_result = ok
|
|
||||||
return ok
|
|
||||||
|
|
||||||
def ensure_login(self) -> None:
|
|
||||||
if not self.is_logged_in():
|
|
||||||
self._do_login()
|
|
||||||
|
|
||||||
@require_login
|
|
||||||
def process_ticket(self, lis: list):
|
|
||||||
|
|
||||||
titles = "\r\n"
|
|
||||||
links = ""
|
|
||||||
ids= []
|
|
||||||
title = ""
|
|
||||||
link = ""
|
|
||||||
assignment = True
|
|
||||||
for li in lis:
|
|
||||||
if assignment:
|
|
||||||
title = li['name_title']
|
|
||||||
link = li['link']
|
|
||||||
assignment = False
|
|
||||||
ids.append(li['id'])
|
|
||||||
titles += li['name_title'] + ",\r\n"
|
|
||||||
links += li['link'] + ",\r\n"
|
|
||||||
logger.info(f"Processing ticket for title: {titles}, link: {links}")
|
|
||||||
self.page.goto(self.url, timeout=3000)
|
|
||||||
titles_list = [title.strip() for title in titles.split(',')]
|
|
||||||
unique_titles = list(set(titles_list))
|
|
||||||
unique_titles.sort()
|
|
||||||
titles =",".join(unique_titles) # 去重
|
|
||||||
description = """We request that you take immediate actionto stop the infringing activity, take steps to ensure that iQIYI Content is notre-posted on, re-linked to, or otherwise available through your site. Pleaseinform us of the actions you have taken and their results.
|
|
||||||
1) please help remove these videos
|
|
||||||
2) The drama series titles are {}
|
|
||||||
""".format(titles)
|
|
||||||
# likls = ["\"" + l + "\"" for l in link]
|
|
||||||
# links = ','.join(likls)
|
|
||||||
if self.page.query_selector("div.cf-turnstile[data-sitekey]"):
|
|
||||||
ok = solve_turnstile_capsolver(self.page)
|
|
||||||
if not ok:
|
|
||||||
raise RuntimeError("CapSolver 处理 Turnstile 失败")
|
|
||||||
# file_path = f'screenshots/{str(int(time.time()))}_{title}_{link.split("/")[-1]}.png'
|
|
||||||
# self.page.screenshot(path=file_path)
|
|
||||||
resports = self.page.locator('li.blocks-item:nth-child(8)')
|
|
||||||
resports.click()
|
|
||||||
|
|
||||||
time.sleep(2)
|
|
||||||
|
|
||||||
cc = self.page.locator("input#request_collaborators_")
|
|
||||||
cc.scroll_into_view_if_needed()
|
|
||||||
cc.click()
|
|
||||||
cc.type("duke.chen@dailymotion.com")
|
|
||||||
|
|
||||||
self.page.get_by_role("button", name="Copyright infringement").click()
|
|
||||||
time.sleep(1)
|
|
||||||
self.page.get_by_role("button", name="Notification").nth(0).click()
|
|
||||||
time.sleep(1)
|
|
||||||
self.page.get_by_role("button", name="A legal entity").click()
|
|
||||||
time.sleep(1)
|
|
||||||
self.page.get_by_label("Corporate name").fill("Beijing iQIYI Science & Technology Co.,Ltd")
|
|
||||||
time.sleep(1)
|
|
||||||
self.page.get_by_label("Legal status").fill("Legal Department")
|
|
||||||
time.sleep(1)
|
|
||||||
self.page.get_by_label("Subject").fill("Copyright infringement Notification")
|
|
||||||
time.sleep(1)
|
|
||||||
self.page.get_by_label("Please indicate the URL of the video(s) you would like to report*").fill(links)
|
|
||||||
time.sleep(1)
|
|
||||||
self.page.get_by_label("Description").nth(1).fill(description)
|
|
||||||
time.sleep(1)
|
|
||||||
self.page.get_by_label("I state in good faith", exact=False).check()
|
|
||||||
time.sleep(1)
|
|
||||||
self.page.get_by_label("I state in good faith that the use of the Protected", exact=False).check()
|
|
||||||
time.sleep(1)
|
|
||||||
self.page.get_by_role("checkbox", name="I certify that all information provided", exact=False).check()
|
|
||||||
time.sleep(1)
|
|
||||||
self.page.get_by_role("checkbox", name="I acknowledge that my statements", exact=False).check()
|
|
||||||
time.sleep(1)
|
|
||||||
self.page.get_by_role("checkbox", name="The data provided through this form", exact=False).check()
|
|
||||||
time.sleep(1)
|
|
||||||
self.page.get_by_role("checkbox", name="By submitting the present form,", exact=False).check()
|
|
||||||
time.sleep(1)
|
|
||||||
self.page.get_by_role("textbox", name="electronic signature", exact=False).fill("柴达") # 占位
|
|
||||||
time.sleep(1)
|
|
||||||
self.page.set_input_files('input#request-attachments', [
|
|
||||||
self.file_path,
|
|
||||||
self.file_path2
|
|
||||||
])
|
|
||||||
self.page.wait_for_timeout(8000)
|
|
||||||
self.page.get_by_role("button", name="Submit").click()
|
|
||||||
time.sleep(2)
|
|
||||||
file_path = f'screenshots/{str(int(time.time()))}_{title}_{link.split("/")[-1]}.png'
|
|
||||||
locator = self.page.locator("//dt[normalize-space(.)='Id']/following-sibling::dd[1]")
|
|
||||||
raw_text = locator.text_content()
|
|
||||||
match = re.search(r'\d+', raw_text or '')
|
|
||||||
report_id = match.group() if match else None
|
|
||||||
status_raw = self.page.locator("span.status-label").text_content()
|
|
||||||
subsequent_status = status_raw.strip().lower() if status_raw else None
|
|
||||||
time_elem = self.page.locator("dt", has_text="Created").locator("xpath=following-sibling::dd[1]/time")
|
|
||||||
|
|
||||||
datetime_str = time_elem.get_attribute("datetime") # e.g. 2025-06-12T06:15:33+00:00
|
|
||||||
if datetime_str:
|
|
||||||
dt = datetime.fromisoformat(datetime_str.replace("Z", "+00:00")) # 安全处理 ISO 时间
|
|
||||||
timestamp = int(dt.timestamp())
|
|
||||||
else:
|
|
||||||
timestamp = None
|
|
||||||
self.page.screenshot(path=file_path)
|
|
||||||
if self.page.url != self.url:
|
|
||||||
self.page.goto(self.url, timeout=30000)
|
|
||||||
|
|
||||||
return ids, file_path, report_id, subsequent_status, timestamp
|
|
||||||
|
|
||||||
@require_login
|
|
||||||
def report_follow_up(self, report_id: str):
|
|
||||||
max_retries = 3
|
|
||||||
retry_delay = 2
|
|
||||||
loaded = False
|
|
||||||
subsequent_status = ""
|
|
||||||
|
|
||||||
for attempt in range(max_retries):
|
|
||||||
try:
|
|
||||||
self.page.goto(f"https://faq.dailymotion.com/hc/en-us/requests/{report_id}", timeout=30000)
|
|
||||||
# self.page.wait_for_load_state("networkidle") # 保证页面加载稳定
|
|
||||||
self.page.wait_for_selector("span.status-label", timeout=30000)
|
|
||||||
try:
|
|
||||||
status_raw = self.page.locator("span.status-label").text_content()
|
|
||||||
except Exception as e:
|
|
||||||
print(f"[警告] 获取状态标签失败: {e}")
|
|
||||||
status_raw = None
|
|
||||||
|
|
||||||
subsequent_status = status_raw.strip().lower() if status_raw else None
|
|
||||||
loaded = True
|
|
||||||
break
|
|
||||||
except Exception as e:
|
|
||||||
print(f"[ERROR] 尝试 {attempt + 1}/{max_retries} 失败: {e}")
|
|
||||||
if attempt < max_retries - 1:
|
|
||||||
time.sleep(retry_delay)
|
|
||||||
|
|
||||||
if not loaded:
|
|
||||||
return 1, "页面加载失败"
|
|
||||||
|
|
||||||
txt = (
|
|
||||||
"I am the authorized agent of Beijing iQIYI Technology Co., Ltd., responsible for dealing with "
|
|
||||||
"unauthorized overseas distribution of pirated videos of our works. "
|
|
||||||
"We have confirmed that the above links contain infringing content and we insist on requesting to takedown. Thank you!"
|
|
||||||
)
|
|
||||||
|
|
||||||
if "awaiting your reply" in subsequent_status:
|
|
||||||
span_show = self.page.locator('span.comment-show-container-content')
|
|
||||||
if span_show.count() > 0:
|
|
||||||
span_show.nth(0).click()
|
|
||||||
self.page.wait_for_timeout(1000)
|
|
||||||
|
|
||||||
textarea = self.page.locator('#request_comment_body')
|
|
||||||
textarea.type(txt, delay=30)
|
|
||||||
self.page.wait_for_timeout(1000)
|
|
||||||
self.page.get_by_role("button", name="Submit").click()
|
|
||||||
|
|
||||||
success = self.wait_for_selector_safe("span.status-label", timeout=30000, retries=3)
|
|
||||||
if not success:
|
|
||||||
return 1, "提交后未检测到状态更新"
|
|
||||||
|
|
||||||
span_show = self.page.locator('span.comment-show-container-content')
|
|
||||||
if span_show.count() > 0:
|
|
||||||
span_show.nth(0).click()
|
|
||||||
pic_path = f'screenshots/{str(int(time.time()))}_{report_id}.png'
|
|
||||||
self.page.screenshot(path=pic_path)
|
|
||||||
return 0, pic_path
|
|
||||||
|
|
||||||
elif "open" in subsequent_status:
|
|
||||||
return 1, ""
|
|
||||||
|
|
||||||
elif "solved" in subsequent_status:
|
|
||||||
return 2, ""
|
|
||||||
|
|
||||||
return 0, "未知状态"
|
|
||||||
|
|
||||||
def wait_for_selector_safe(self, selector: str, timeout=30000, retries=3, retry_delay=2):
|
|
||||||
for i in range(retries):
|
|
||||||
try:
|
|
||||||
self.page.wait_for_selector(selector, timeout=timeout)
|
|
||||||
return True
|
|
||||||
except Exception as e:
|
|
||||||
print(f"[重试] 第 {i + 1}/{retries} 次等待 {selector} 失败: {e}")
|
|
||||||
if i < retries - 1:
|
|
||||||
time.sleep(retry_delay)
|
|
||||||
return False
|
|
||||||
|
|
||||||
@require_login
|
|
||||||
def test(self):
|
|
||||||
logger.info(f"Testing DailymotionClient with email: {self.email}")
|
|
||||||
self.page.goto(self.url, timeout=30000)
|
|
||||||
file_path = f'screenshots/{str(int(time.time()))}_test.png'
|
|
||||||
self.page.screenshot(path=file_path)
|
|
||||||
self.page.wait_for_timeout(1000)
|
|
||||||
file_path = f"screenshots/{str(int(time.time()))}_test2.png"
|
|
||||||
self.page.screenshot(path=file_path)
|
|
||||||
logger.info(f"Test screenshot saved to {file_path}")
|
|
||||||
self.page.wait_for_timeout(1000)
|
|
||||||
file_path = f"screenshots/{str(int(time.time()))}_test3.png"
|
|
||||||
self.page.screenshot(path=file_path)
|
|
||||||
logger.info(f"Test screenshot saved to {file_path}")
|
|
||||||
|
|
||||||
def close(self):
|
|
||||||
try:
|
|
||||||
self.page.close()
|
|
||||||
except Exception:
|
|
||||||
pass
|
|
||||||
try:
|
|
||||||
self.browser.close()
|
|
||||||
except Exception:
|
|
||||||
pass
|
|
||||||
try:
|
|
||||||
self._pw.stop()
|
|
||||||
except Exception:
|
|
||||||
pass
|
|
||||||
|
|
||||||
|
|
||||||
if __name__ == "__main__":
|
|
||||||
dm = DailymotionClient("copyright@qiyi.com", "ppsIQIYI2018@")
|
|
||||||
# dm.process_ticket("恋爱学园","https://www.dailymotion.com/video/x9lfr24")
|
|
||||||
dm.report_follow_up("13566")
|
|
@ -1,66 +1,10 @@
|
|||||||
import time
|
import time
|
||||||
|
|
||||||
|
import requests
|
||||||
import json
|
import json
|
||||||
import redis
|
import redis
|
||||||
import requests
|
|
||||||
import urllib3
|
|
||||||
from matplotlib.artist import allow_rasterization
|
|
||||||
from requests.adapters import HTTPAdapter
|
|
||||||
from urllib3.util.retry import Retry
|
|
||||||
from typing import Optional, Dict, Any, Union
|
|
||||||
|
|
||||||
|
|
||||||
class HttpClient:
|
|
||||||
def __init__(self, max_retries: int = 3, backoff_factor: float = 0.5):
|
|
||||||
self.session = requests.Session()
|
|
||||||
# 配置重试策略
|
|
||||||
retry_strategy = Retry(
|
|
||||||
total=max_retries,
|
|
||||||
backoff_factor=backoff_factor,
|
|
||||||
status_forcelist=[500, 502, 503, 504, 429]
|
|
||||||
)
|
|
||||||
|
|
||||||
adapter = HTTPAdapter(max_retries=retry_strategy)
|
|
||||||
self.session.mount("http://", adapter)
|
|
||||||
self.session.mount("https://", adapter)
|
|
||||||
|
|
||||||
def request(self,
|
|
||||||
method: str,
|
|
||||||
url: str,
|
|
||||||
headers: Optional[Dict] = None,
|
|
||||||
params: Optional[Dict] = None,
|
|
||||||
data: Optional[Union[Dict, str]] = None,
|
|
||||||
cookies: Optional[Dict] = None,
|
|
||||||
allow_redirects: bool = True,
|
|
||||||
timeout: int = 30,
|
|
||||||
**kwargs) -> requests.Response:
|
|
||||||
try:
|
|
||||||
response = self.session.request(
|
|
||||||
method=method,
|
|
||||||
url=url,
|
|
||||||
headers=headers,
|
|
||||||
params=params,
|
|
||||||
data=data,
|
|
||||||
cookies=cookies,
|
|
||||||
allow_redirects=allow_redirects,
|
|
||||||
timeout=timeout,
|
|
||||||
**kwargs
|
|
||||||
)
|
|
||||||
response.raise_for_status()
|
|
||||||
return response
|
|
||||||
except requests.exceptions.RequestException as e:
|
|
||||||
print(f"请求失败: {url}, 错误: {str(e)}")
|
|
||||||
raise
|
|
||||||
|
|
||||||
def get(self, url: str, **kwargs) -> requests.Response:
|
|
||||||
return self.request("GET", url, **kwargs)
|
|
||||||
|
|
||||||
def post(self, url: str, **kwargs) -> requests.Response:
|
|
||||||
return self.request("POST", url, **kwargs)
|
|
||||||
|
|
||||||
|
|
||||||
# 创建全局的 HTTP 客户端实例
|
|
||||||
http_client = HttpClient()
|
|
||||||
|
|
||||||
|
session = requests.Session()
|
||||||
_REDIS_CONF = {
|
_REDIS_CONF = {
|
||||||
"host": "192.144.230.75",
|
"host": "192.144.230.75",
|
||||||
"port": 6379,
|
"port": 6379,
|
||||||
@ -88,7 +32,6 @@ def get_report_token(key_name: str):
|
|||||||
|
|
||||||
|
|
||||||
def login():
|
def login():
|
||||||
try:
|
|
||||||
headers = {
|
headers = {
|
||||||
"Accept": "*/*",
|
"Accept": "*/*",
|
||||||
"Accept-Language": "zh-CN,zh;q=0.9",
|
"Accept-Language": "zh-CN,zh;q=0.9",
|
||||||
@ -118,7 +61,7 @@ def login():
|
|||||||
"traffic_segment": "962042",
|
"traffic_segment": "962042",
|
||||||
"visitor_id": "359703fb-66c2-43d2-bd0d-b1cac9c7ae8a"
|
"visitor_id": "359703fb-66c2-43d2-bd0d-b1cac9c7ae8a"
|
||||||
}
|
}
|
||||||
response = http_client.post(url, headers=headers, data=data)
|
response = session.post(url, headers=headers, data=data)
|
||||||
data = {
|
data = {
|
||||||
"update_time": int(time.time()),
|
"update_time": int(time.time()),
|
||||||
"username": "copyright@qiyi.com",
|
"username": "copyright@qiyi.com",
|
||||||
@ -127,158 +70,113 @@ def login():
|
|||||||
}
|
}
|
||||||
save_report_token('token', data)
|
save_report_token('token', data)
|
||||||
return data
|
return data
|
||||||
except Exception as e:
|
|
||||||
print(f"登录失败: {str(e)}")
|
|
||||||
raise
|
|
||||||
|
|
||||||
|
|
||||||
def refresh_token(access_token, refresh_token):
|
def get_cookies(access_token: str, refresh_token: str):
|
||||||
headers = {
|
|
||||||
"Accept": "*/*",
|
|
||||||
"Accept-Language": "zh-CN,zh;q=0.9",
|
|
||||||
"Cache-Control": "no-cache",
|
|
||||||
"Connection": "keep-alive",
|
|
||||||
"Content-Length": "0",
|
|
||||||
"Origin": "https://www.dailymotion.com",
|
|
||||||
"Pragma": "no-cache",
|
|
||||||
"Referer": "https://www.dailymotion.com/signin?urlback=%2Fzendesk%3Ftimestamp%3D1748932650%26return_to%3Dhttps%253A%252F%252Ffaq.dailymotion.com%252Fhc%252Fen-us%252Frequests%252Fnew",
|
|
||||||
"Sec-Fetch-Dest": "empty",
|
|
||||||
"Sec-Fetch-Mode": "cors",
|
|
||||||
"Sec-Fetch-Site": "same-origin",
|
|
||||||
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/136.0.0.0 Safari/537.36 Edg/136.0.0.0",
|
|
||||||
"sec-ch-ua": "\"Chromium\";v=\"136\", \"Microsoft Edge\";v=\"136\", \"Not.A/Brand\";v=\"99\"",
|
|
||||||
"sec-ch-ua-mobile": "?0",
|
|
||||||
"sec-ch-ua-platform": "\"Windows\""
|
|
||||||
}
|
|
||||||
cookies = {
|
cookies = {
|
||||||
"dmvk": "683e982c34e34",
|
|
||||||
"ts": "133696",
|
|
||||||
"v1st": "a847389a-6b91-4157-948f-457666f7172b",
|
|
||||||
"ff": "on",
|
|
||||||
"lang": "zh_CN",
|
|
||||||
"usprivacy": "1---",
|
|
||||||
"dmaid": "73ca37e4-6858-46c1-aac4-a4a5fc9a270e",
|
|
||||||
"cookie_policy_closed": "1",
|
|
||||||
"access_token": access_token,
|
"access_token": access_token,
|
||||||
"refresh_token": refresh_token,
|
"refresh_token": refresh_token,
|
||||||
}
|
}
|
||||||
url = "https://www.dailymotion.com/cookie/refresh_token"
|
url = "https://www.dailymotion.com/cookie/refresh_token"
|
||||||
response = http_client.post(url, headers=headers, cookies=cookies)
|
session.post(url, cookies=cookies, allow_redirects=True)
|
||||||
|
|
||||||
|
|
||||||
def zendesk():
|
def get_cookies1(access_token: str, refresh_token: str):
|
||||||
headers = {
|
"""302 跳转"""
|
||||||
"Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.7",
|
cookies = {
|
||||||
"Accept-Language": "zh-CN,zh;q=0.9",
|
"access_token": access_token,
|
||||||
"Cache-Control": "no-cache",
|
"refresh_token": refresh_token,
|
||||||
"Connection": "keep-alive",
|
|
||||||
"Pragma": "no-cache",
|
|
||||||
"Referer": "https://www.dailymotion.com/sg",
|
|
||||||
"Sec-Fetch-Dest": "document",
|
|
||||||
"Sec-Fetch-Mode": "navigate",
|
|
||||||
"Sec-Fetch-Site": "same-origin",
|
|
||||||
"Sec-Fetch-User": "?1",
|
|
||||||
"Upgrade-Insecure-Requests": "1",
|
|
||||||
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/136.0.0.0 Safari/537.36 Edg/136.0.0.0",
|
|
||||||
"sec-ch-ua": "\"Chromium\";v=\"136\", \"Microsoft Edge\";v=\"136\", \"Not.A/Brand\";v=\"99\"",
|
|
||||||
"sec-ch-ua-mobile": "?0",
|
|
||||||
"sec-ch-ua-platform": "\"Windows\""
|
|
||||||
}
|
}
|
||||||
url = "https://www.dailymotion.com/zendesk"
|
url = "https://www.dailymotion.com/zendesk"
|
||||||
params = {
|
params = {
|
||||||
"return_to": "https://faq.dailymotion.com/hc/en-us/requests/new",
|
"return_to": "https://faq.dailymotion.com/hc/en-us/requests/new",
|
||||||
"timestamp": str(time.time()),
|
"timestamp": str(int(time.time())),
|
||||||
}
|
}
|
||||||
response = http_client.get(url, headers=headers, params=params, allow_redirects=True)
|
session.get(url, cookies=cookies, params=params, allow_redirects=True)
|
||||||
data = http_client.session.cookies.get_dict()
|
cookies_dict = {"update_time": int(time.time()), "cookies": session.cookies.get_dict()}
|
||||||
data['update_time'] = int(time.time())
|
save_report_token('cookies', cookies_dict)
|
||||||
save_report_token('cookies', data)
|
return cookies_dict
|
||||||
|
|
||||||
|
|
||||||
def get_csrftoken():
|
def get_csrftoken():
|
||||||
try:
|
|
||||||
url = "https://faq.dailymotion.com/hc/api/internal/csrf_token.json"
|
url = "https://faq.dailymotion.com/hc/api/internal/csrf_token.json"
|
||||||
headers = {
|
response = session.get(url)
|
||||||
"Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.7",
|
|
||||||
"Accept-Language": "zh-CN,zh;q=0.9",
|
|
||||||
"Cache-Control": "no-cache",
|
|
||||||
"Connection": "keep-alive",
|
|
||||||
"Pragma": "no-cache",
|
|
||||||
"Referer": "https://www.dailymotion.com/sg",
|
|
||||||
"Sec-Fetch-Dest": "document",
|
|
||||||
"Sec-Fetch-Mode": "navigate",
|
|
||||||
"Sec-Fetch-Site": "same-origin",
|
|
||||||
"Sec-Fetch-User": "?1",
|
|
||||||
"Upgrade-Insecure-Requests": "1",
|
|
||||||
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/136.0.0.0 Safari/537.36 Edg/136.0.0.0",
|
|
||||||
"sec-ch-ua": "\"Chromium\";v=\"136\", \"Microsoft Edge\";v=\"136\", \"Not.A/Brand\";v=\"99\"",
|
|
||||||
"sec-ch-ua-mobile": "?0",
|
|
||||||
"sec-ch-ua-platform": "\"Windows\""
|
|
||||||
}
|
|
||||||
response = http_client.get(url, headers=headers)
|
|
||||||
data = {"update_time": int(time.time()), "csrf_token": response.json()}
|
data = {"update_time": int(time.time()), "csrf_token": response.json()}
|
||||||
save_report_token('csrf_token', data)
|
save_report_token('csrf_token', data)
|
||||||
return data
|
return data
|
||||||
except Exception as e:
|
|
||||||
print(f"获取 CSRF token 失败: {str(e)}")
|
|
||||||
raise
|
|
||||||
|
|
||||||
|
|
||||||
def report(csrf_token: str, v_url, title):
|
def report(csrf_token:str, cookies:dict, ):
|
||||||
try:
|
|
||||||
headers = {
|
headers = {
|
||||||
"accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.7",
|
'accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.7',
|
||||||
"accept-language": "zh-CN,zh;q=0.9",
|
'accept-language': 'zh-CN,zh;q=0.9',
|
||||||
"cache-control": "no-cache",
|
'cache-control': 'no-cache',
|
||||||
"content-type": "application/x-www-form-urlencoded",
|
'content-type': 'application/x-www-form-urlencoded',
|
||||||
"origin": "https://faq.dailymotion.com",
|
'origin': 'https://faq.dailymotion.com',
|
||||||
"pragma": "no-cache",
|
'pragma': 'no-cache',
|
||||||
"priority": "u=0, i",
|
'priority': 'u=0, i',
|
||||||
"referer": "https://faq.dailymotion.com/hc/en-us/requests/new?ticket_form_id=136048",
|
'referer': 'https://faq.dailymotion.com/hc/en-us/requests/new?ticket_form_id=136048',
|
||||||
"sec-ch-ua": "\"Chromium\";v=\"136\", \"Microsoft Edge\";v=\"136\", \"Not.A/Brand\";v=\"99\"",
|
'sec-ch-ua': '"Chromium";v="136", "Google Chrome";v="136", "Not.A/Brand";v="99"',
|
||||||
"sec-ch-ua-mobile": "?0",
|
'sec-ch-ua-mobile': '?0',
|
||||||
"sec-ch-ua-platform": "\"Windows\"",
|
'sec-ch-ua-platform': '"Windows"',
|
||||||
"sec-fetch-dest": "document",
|
'sec-fetch-dest': 'document',
|
||||||
"sec-fetch-mode": "navigate",
|
'sec-fetch-mode': 'navigate',
|
||||||
"sec-fetch-site": "same-origin",
|
'sec-fetch-site': 'same-origin',
|
||||||
"sec-fetch-user": "?1",
|
'sec-fetch-user': '?1',
|
||||||
"upgrade-insecure-requests": "1",
|
'upgrade-insecure-requests': '1',
|
||||||
"user-agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/136.0.0.0 Safari/537.36 Edg/136.0.0.0"
|
'user-agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/136.0.0.0 Safari/537.36',
|
||||||
}
|
}
|
||||||
url = "https://faq.dailymotion.com/hc/en-us/requests"
|
|
||||||
data = {
|
data = {
|
||||||
"utf8": "✓",
|
'utf8': '✓',
|
||||||
"authenticity_token": csrf_token,
|
'authenticity_token': csrf_token,
|
||||||
"request%5Bticket_form_id%5D": "136048",
|
'request[ticket_form_id]': '136048',
|
||||||
"request%5Bcollaborators%5D%5B%5D": "duke.chen@dailymotion.com",
|
'request[collaborators][]': 'duke.chen@dailymotion.com',
|
||||||
"request%5Bcustom_fields%5D%5B360008684839%5D": "__dc.copyright_user_protection_-_copyright__",
|
'request[custom_fields][360008684839]': '__dc.copyright_user_protection_-_copyright__',
|
||||||
"request%5Bcustom_fields%5D%5B30150188%5D": "copyrightform-notification",
|
'request[custom_fields][30150188]': 'copyrightform-notification',
|
||||||
"request%5Bcustom_fields%5D%5B25089567%5D": "legal_entity",
|
'request[custom_fields][25089567]': 'legal_entity',
|
||||||
"request%5Bcustom_fields%5D%5B25159868%5D": "Beijing iQIYI Science & Technology Co.,Ltd",
|
'request[custom_fields][25159868]': 'Beijing iQIYI Science & Technology Co.,Ltd',
|
||||||
"request%5Bcustom_fields%5D%5B4869133282962%5D": "Legal Department",
|
'request[custom_fields][4869133282962]': 'Legal Department',
|
||||||
"request%5Bsubject%5D": "Copyright infringement Notification",
|
'request[subject]': 'Copyright infringement Notification',
|
||||||
"request%5Bcustom_fields%5D%5B25613698%5D": v_url,
|
'request[custom_fields][25613698]': 'url',
|
||||||
"request%5Bdescription%5D": f"We request that you take immediate actionto stop the infringing activity, take steps to ensure that iQIYI Content is notre-posted on, re-linked to, or otherwise available through your site. Pleaseinform us of the actions you have taken and their results.\r\n1) please help remove these videos\r\n2) The drama series titles are \"{title}\"\r\n",
|
'request[description]': 'We request that you take immediate actionto stop the infringing activity, take steps to ensure that iQIYI Content is notre-posted on, re-linked to, or otherwise available through your site. Pleaseinform us of the actions you have taken and their results.\r\n1) please help remove these videos\r\n2) The drama series titles are 片名\r\n',
|
||||||
"request%5Bdescription_mimetype%5D": "text/plain",
|
'request[description_mimetype]': 'text/plain',
|
||||||
"request%5Bcustom_fields%5D%5B4769880845586%5D": "on",
|
'request[custom_fields][4769880845586]': [
|
||||||
"request%5Bcustom_fields%5D%5B25626417%5D": "on",
|
'off',
|
||||||
"request%5Bcustom_fields%5D%5B4769797363346%5D": "on",
|
'on',
|
||||||
"request%5Bcustom_fields%5D%5B25159848%5D": "on",
|
],
|
||||||
"request%5Bcustom_fields%5D%5B4769658191250%5D": "on"
|
'request[custom_fields][25626417]': [
|
||||||
|
'off',
|
||||||
|
'on',
|
||||||
|
],
|
||||||
|
'request[custom_fields][4769797363346]': [
|
||||||
|
'off',
|
||||||
|
'on',
|
||||||
|
],
|
||||||
|
'request[custom_fields][25159848]': [
|
||||||
|
'off',
|
||||||
|
'on',
|
||||||
|
],
|
||||||
|
'request[custom_fields][4769658191250]': [
|
||||||
|
'off',
|
||||||
|
'on',
|
||||||
|
],
|
||||||
}
|
}
|
||||||
response = requests.post(url, headers=headers, data=data)
|
|
||||||
print(response.status_code)
|
response = requests.post('https://faq.dailymotion.com/hc/en-us/requests', cookies=cookies, headers=headers, data=data)
|
||||||
print(response.text)
|
|
||||||
print(response)
|
|
||||||
return response.status_code == 200
|
|
||||||
except Exception as e:
|
|
||||||
print(f"提交报告失败: {str(e)}")
|
|
||||||
raise
|
|
||||||
|
|
||||||
|
|
||||||
|
def prepare_data():
|
||||||
|
token = get_report_token('token')
|
||||||
|
cookies = get_report_token('cookies')
|
||||||
|
csrf_token = get_report_token('csrf_token')
|
||||||
|
max_update_time = max(d.get('update_time', 0) for d in (token, cookies, csrf_token) if d)
|
||||||
|
if max_update_time + (24 * 60 * 60) < time.time():
|
||||||
|
token = get_report_token('token')
|
||||||
|
access_token = token['token']['access_token']
|
||||||
|
refresh_token = token['token']['refresh_token']
|
||||||
|
get_cookies(access_token, refresh_token)
|
||||||
|
get_cookies1(access_token, refresh_token)
|
||||||
|
csrf_token = get_csrftoken()
|
||||||
|
|
||||||
if __name__ == '__main__':
|
report(csrf_token['csrf_token']['current_session']['csrf_token'], cookies['cookies'])
|
||||||
cookies = get_report_token('cookies')['cookies']
|
|
||||||
http_client.session.cookies = requests.utils.cookiejar_from_dict(cookies)
|
|
||||||
csrf_token = get_csrftoken()['csrf_token']['current_session']['csrf_token']
|
|
||||||
report(csrf_token, 'Hunter X Hunter', 'https://www.dailymotion.com/video/x8kjx7v')
|
|
@ -4,7 +4,8 @@ charset-normalizer==3.4.2
|
|||||||
et-xmlfile==1.1.0
|
et-xmlfile==1.1.0
|
||||||
idna==3.10
|
idna==3.10
|
||||||
importlib-metadata==6.7.0
|
importlib-metadata==6.7.0
|
||||||
numpy==2.3.0
|
lxml==5.4.0
|
||||||
|
numpy==1.21.6
|
||||||
openpyxl==3.1.3
|
openpyxl==3.1.3
|
||||||
pandas==1.3.5
|
pandas==1.3.5
|
||||||
pkg_resources==0.0.0
|
pkg_resources==0.0.0
|
||||||
|
@ -1,35 +0,0 @@
|
|||||||
from flask import Flask, send_file, abort, request, jsonify
|
|
||||||
from pathlib import Path
|
|
||||||
|
|
||||||
app = Flask(__name__)
|
|
||||||
|
|
||||||
PROJECT_ROOT = Path(__file__).parent.resolve()
|
|
||||||
SCREENSHOTS_DIR = Path("/opt/ql/daily_com/bin/screenshots").resolve()
|
|
||||||
|
|
||||||
@app.route('/image/screenshots/<path:filename>')
|
|
||||||
def serve_image(filename):
|
|
||||||
file_path = SCREENSHOTS_DIR / filename
|
|
||||||
|
|
||||||
# 防止路径越界访问
|
|
||||||
try:
|
|
||||||
file_path.resolve().relative_to(SCREENSHOTS_DIR.resolve())
|
|
||||||
except ValueError:
|
|
||||||
abort(403, description=f"禁止访问目录外文件: {file_path.resolve()}")
|
|
||||||
|
|
||||||
if not file_path.exists():
|
|
||||||
abort(404, description=f"文件不存在: {file_path.resolve()}")
|
|
||||||
|
|
||||||
return send_file(file_path, as_attachment=False)
|
|
||||||
|
|
||||||
# 自定义 404 错误响应
|
|
||||||
@app.errorhandler(404)
|
|
||||||
def handle_404(e):
|
|
||||||
return f"404 错误:{e.description}", 404
|
|
||||||
|
|
||||||
# 自定义 403 错误响应
|
|
||||||
@app.errorhandler(403)
|
|
||||||
def handle_403(e):
|
|
||||||
return f"403 错误:{e.description}", 403
|
|
||||||
|
|
||||||
if __name__ == '__main__':
|
|
||||||
app.run(host='0.0.0.0', debug=False, port=5000)
|
|
6
test2.py
6
test2.py
@ -1,6 +0,0 @@
|
|||||||
from DB import DBVidcon
|
|
||||||
|
|
||||||
db = DBVidcon()
|
|
||||||
|
|
||||||
account = db.get_account_info('4')
|
|
||||||
print(account)
|
|
@ -1,10 +0,0 @@
|
|||||||
from DB import DBVidcon
|
|
||||||
from logger import logger
|
|
||||||
|
|
||||||
|
|
||||||
db = DBVidcon()
|
|
||||||
|
|
||||||
logger.info("开始更新视频举报状态")
|
|
||||||
db.update_video_ts_status()
|
|
||||||
db.close()
|
|
||||||
logger.info("更改视频举报状态完成")
|
|
Loading…
x
Reference in New Issue
Block a user