Compare commits
1 Commits
Author | SHA1 | Date | |
---|---|---|---|
12915c13a5 |
@ -1,38 +0,0 @@
|
||||
from DB import DBVidcon
|
||||
import requests
|
||||
from logger import logger
|
||||
db = DBVidcon()
|
||||
|
||||
|
||||
def check_video_removed(video_id):
|
||||
url = f"https://api.dailymotion.com/video/{video_id}"
|
||||
params = {"fields": "published,private,status"}
|
||||
resp = requests.get(url, params=params, timeout=10)
|
||||
|
||||
# 404 -> 不存在或已被删除
|
||||
if resp.status_code == 404:
|
||||
return 1
|
||||
|
||||
data = resp.json()
|
||||
# published=False 或 private=True 都视作“已下架”
|
||||
if not data.get("published", False) or data.get("private", False):
|
||||
return 1
|
||||
|
||||
return 0
|
||||
|
||||
|
||||
def main():
|
||||
lis = db.getreport_video()
|
||||
for li in lis:
|
||||
video_id = li['v_xid']
|
||||
status = check_video_removed(video_id)
|
||||
if status == 1:
|
||||
db.mark_video_removed(li['id'], status)
|
||||
logger.info(f"视频id {video_id} 下架")
|
||||
else:
|
||||
db.mark_video_removed(li['id'], status)
|
||||
logger.info(f"视频id {video_id} 仍然存在")
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
main()
|
@ -1,6 +1,6 @@
|
||||
import json, time
|
||||
import argparse
|
||||
from DB import DBVidcon
|
||||
from DB import DBVidcon, DBSA
|
||||
|
||||
def parse_args():
|
||||
parser = argparse.ArgumentParser(
|
||||
@ -14,8 +14,10 @@ def main():
|
||||
args = parse_args()
|
||||
batch = int(time.time())
|
||||
db = DBVidcon()
|
||||
push = None
|
||||
empty = None
|
||||
|
||||
for chunk in DBSA.stream_video_keys(chunk_size=10_000):
|
||||
db.cache_video_keys_bulk(chunk)
|
||||
print(f"同步Redis=={len(chunk)}")
|
||||
|
||||
if args.level == 0:
|
||||
push = db.push_l0
|
||||
|
@ -1,40 +0,0 @@
|
||||
import json, time
|
||||
import argparse
|
||||
from DB import DBVidcon
|
||||
|
||||
def parse_args():
|
||||
parser = argparse.ArgumentParser(
|
||||
description="Dump keyword/title rows into Redis list."
|
||||
)
|
||||
parser.add_argument("-l", "--level", type=int, default=99,
|
||||
help="value for t.level (default: 99)")
|
||||
return parser.parse_args()
|
||||
|
||||
def main():
|
||||
batch = int(time.time())
|
||||
db = DBVidcon()
|
||||
push = db.push_web
|
||||
empty = db.web_empty
|
||||
|
||||
if empty():
|
||||
rows = db.fetch_keyword_title(level=0)
|
||||
payload_list = []
|
||||
for row in rows:
|
||||
payload_list.append(json.dumps({**row, "batch": batch}, ensure_ascii=False))
|
||||
if len(payload_list) >= 10000:
|
||||
push(payload_list)
|
||||
payload_list.clear()
|
||||
if payload_list: # 收尾
|
||||
push(payload_list)
|
||||
|
||||
data = {
|
||||
"level": 0,
|
||||
"batch": batch,
|
||||
"count": len(rows),
|
||||
}
|
||||
db.log_batch_start(data)
|
||||
print(f"✔ 推送 {len(rows)} 行(batch={batch})到 {push.__name__}队列完毕")
|
||||
db.close()
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
@ -1,57 +0,0 @@
|
||||
import requests
|
||||
from flask import Flask, request, jsonify
|
||||
from DB import DBVidcon
|
||||
|
||||
app = Flask(__name__)
|
||||
|
||||
endpoint = "https://api.dailymotion.com/videos"
|
||||
DEFAULT_PAGE = 1
|
||||
FIXED_LIMIT = 100
|
||||
VALID_SORTS = {
|
||||
'recent', 'relevance', 'alpha', 'alphaaz',
|
||||
'alphaza', 'most', 'least', 'changed'
|
||||
}
|
||||
|
||||
db = DBVidcon()
|
||||
|
||||
@app.route("/get", methods=["GET"])
|
||||
def get_videos():
|
||||
keyword = request.args.get("keyword", "").strip()
|
||||
if not keyword:
|
||||
return jsonify({"status": "error", "msg": "keyword 参数不能为空"}), 400
|
||||
|
||||
# 页码和国家参数
|
||||
i = request.args.get("page", DEFAULT_PAGE, type=int)
|
||||
rn = request.args.get("rn", "US").upper()
|
||||
|
||||
# 排序参数,必须合法
|
||||
sort = request.args.get("sort", "relevance").strip().lower()
|
||||
if sort not in VALID_SORTS:
|
||||
return jsonify({
|
||||
"status": "error",
|
||||
"msg": f"sort 参数非法,可选值: {sorted(VALID_SORTS)}"
|
||||
}), 400
|
||||
|
||||
proxy_string = db.get_proxy(rn)
|
||||
proxies = {"http": proxy_string, "https": proxy_string} if proxy_string else None
|
||||
|
||||
params = {
|
||||
"search": keyword,
|
||||
"fields": "id,title,created_time,thumbnail_240_url,duration,"
|
||||
"owner.id,owner.screenname,likes_total,views_total",
|
||||
"limit": FIXED_LIMIT,
|
||||
"page": i,
|
||||
"sort": sort
|
||||
}
|
||||
|
||||
try:
|
||||
resp = requests.get(endpoint, params=params, proxies=proxies, timeout=10)
|
||||
resp.raise_for_status()
|
||||
jd = resp.json()
|
||||
return jsonify(jd), 200
|
||||
except requests.exceptions.RequestException as e:
|
||||
return jsonify({"status": "error", "msg": str(e)}), 502
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
app.run(host="0.0.0.0", port=8000, debug=False)
|
@ -1,51 +0,0 @@
|
||||
#!/usr/bin/env python3
|
||||
# app.py
|
||||
import requests
|
||||
from flask import Flask, jsonify, abort
|
||||
|
||||
app = Flask(__name__)
|
||||
|
||||
|
||||
def check_video_removed(video_id):
|
||||
"""
|
||||
调用 Dailymotion API 判断视频是否已下架/删除
|
||||
返回:
|
||||
1 → 已被删除 / 不存在 / 已下架 / 设为私有
|
||||
0 → 正常公开中
|
||||
"""
|
||||
url = f"https://api.dailymotion.com/video/{video_id}"
|
||||
params = {"fields": "published,private,status"}
|
||||
try:
|
||||
resp = requests.get(url, params=params, timeout=10)
|
||||
except requests.RequestException as exc:
|
||||
# 网络错误时返回 503,让上游知道需要重试
|
||||
abort(503, description=f"Upstream request failed: {exc}")
|
||||
|
||||
# 404 → 不存在或已被删除
|
||||
if resp.status_code == 404:
|
||||
return 1
|
||||
|
||||
# 其他非 2xx 状态码 → 直接透传给客户端
|
||||
if resp.status_code // 100 != 2:
|
||||
abort(resp.status_code, description=resp.text)
|
||||
|
||||
data = resp.json()
|
||||
# published=False 或 private=True 都视作“已下架”
|
||||
if not data.get("published", False) or data.get("private", False):
|
||||
return 1
|
||||
|
||||
return 0
|
||||
|
||||
|
||||
@app.route("/video/<video_id>", methods=["GET"])
|
||||
def video_status(video_id):
|
||||
removed = check_video_removed(video_id)
|
||||
return jsonify({"video_id": video_id, "removed": removed})
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
# 支持通过环境变量覆盖监听地址和端口
|
||||
import os
|
||||
host = os.getenv("HOST", "0.0.0.0")
|
||||
port = 5100
|
||||
app.run(host=host, port=port, debug=False)
|
18
kill_main.sh
18
kill_main.sh
@ -1,18 +0,0 @@
|
||||
#!/usr/bin/env bash
|
||||
|
||||
# -------- 可按需修改 --------
|
||||
TARGET="/opt/ql/DailyMotion/main.py" # 关键字:精确到脚本路径即可
|
||||
SIG="9" # 信号;默认 -9,想温和一点改成 15
|
||||
# --------------------------------
|
||||
|
||||
pids=$(pgrep -f "$TARGET")
|
||||
|
||||
if [ -z "$pids" ]; then
|
||||
echo "没有发现正在运行的 $TARGET"
|
||||
exit 0
|
||||
fi
|
||||
|
||||
echo "即将发送 SIG${SIG:-15} 到进程: $pids"
|
||||
kill "-${SIG:-15}" $pids
|
||||
|
||||
echo "完成"
|
38
main.py
38
main.py
@ -1,4 +1,3 @@
|
||||
#!/opt/ql/daily_com/bin/python3
|
||||
import base64
|
||||
import traceback
|
||||
import argparse
|
||||
@ -56,15 +55,15 @@ def format_duration(seconds):
|
||||
return "00:00"
|
||||
|
||||
|
||||
def get_searchInfo(keyword, level, headers, proxy_name, r=2):
|
||||
def get_searchInfo(keyword, level, rn, proxy_name, r=2):
|
||||
if r == 2:
|
||||
logger.info(f"NET处理->{keyword},\trn->{proxy_name},\tlevel->{level}")
|
||||
video_list = []
|
||||
max_page = 3
|
||||
limit = 100
|
||||
max_page = 2
|
||||
limit = 30
|
||||
endpoint = 'https://api.dailymotion.com/videos'
|
||||
if level == 0 or level == 1:
|
||||
max_page = 4
|
||||
max_page = 3
|
||||
limit = 100
|
||||
for j in range(1, max_page):
|
||||
params = {
|
||||
@ -88,7 +87,7 @@ def get_searchInfo(keyword, level, headers, proxy_name, r=2):
|
||||
logger.exception(f"[Requested] 未知:{e}, keyword: {keyword}, l: {level}")
|
||||
else:
|
||||
time.sleep((3 - r) * 5)
|
||||
return get_searchInfo(keyword, level, headers, proxy_name, r - 1)
|
||||
return get_searchInfo(keyword, level, rn, proxy_name, r - 1)
|
||||
try:
|
||||
resinfo = jsondata.get("list")
|
||||
except Exception:
|
||||
@ -98,7 +97,7 @@ def get_searchInfo(keyword, level, headers, proxy_name, r=2):
|
||||
return None
|
||||
else:
|
||||
time.sleep((3 - r) * 5)
|
||||
return get_searchInfo(keyword, level, headers, proxy_name, r - 1)
|
||||
return get_searchInfo(keyword, level, rn, proxy_name, r - 1)
|
||||
for index, iteminfo in enumerate(resinfo):
|
||||
calculated_index = index + 1 + (j - 1) * limit
|
||||
xid = iteminfo["id"]
|
||||
@ -106,6 +105,10 @@ def get_searchInfo(keyword, level, headers, proxy_name, r=2):
|
||||
uxid = iteminfo["owner.id"]
|
||||
uid = base64.b64encode(f"Channel:{uxid}".encode('utf-8')).decode('utf-8')
|
||||
duration = iteminfo.get('duration')
|
||||
is_repeat = 0
|
||||
if db.video_key_exists(vid.strip(), rn):
|
||||
is_repeat = 1
|
||||
|
||||
if duration <= 300:
|
||||
continue
|
||||
v_data = {
|
||||
@ -123,9 +126,11 @@ def get_searchInfo(keyword, level, headers, proxy_name, r=2):
|
||||
"u_id": uid,
|
||||
"u_xid": uxid,
|
||||
"u_name": iteminfo.get('owner.screenname'),
|
||||
"u_pic": iteminfo.get('owner.avatar_60_url')
|
||||
"u_pic": iteminfo.get('owner.avatar_60_url'),
|
||||
"is_repeat": is_repeat,
|
||||
}
|
||||
video_list.append(v_data)
|
||||
time.sleep(3)
|
||||
if len(video_list) < 100:
|
||||
break
|
||||
return video_list
|
||||
@ -137,11 +142,11 @@ proxiesdict = db.get_proxy_agent_dict()
|
||||
def search_worker(payload, kitem, flag):
|
||||
try:
|
||||
gproxies = proxiesdict[kitem['rn']]
|
||||
v_list = get_searchInfo(kitem['keyword'], kitem['level'], None, gproxies)
|
||||
v_list = get_searchInfo(kitem['keyword'], kitem['level'], kitem['rn'], gproxies)
|
||||
if not v_list:
|
||||
for i in range(2):
|
||||
time.sleep(i * 5)
|
||||
v_list = get_searchInfo(kitem['keyword'], kitem['level'], None, gproxies)
|
||||
v_list = get_searchInfo(kitem['keyword'], kitem['level'], kitem['rn'], gproxies)
|
||||
if v_list:
|
||||
break
|
||||
time.sleep(2)
|
||||
@ -163,11 +168,10 @@ def integrate_data_parallel():
|
||||
time.sleep(10)
|
||||
continue
|
||||
|
||||
futures = []
|
||||
for payload, kitem in tasks:
|
||||
futures.append(executor.submit(search_worker, payload, kitem, flag))
|
||||
time.sleep(1)
|
||||
|
||||
futures = [
|
||||
executor.submit(search_worker, payload, kitem, flag)
|
||||
for payload, kitem in tasks
|
||||
]
|
||||
rollback = {0: [], 1: [], 2: []}
|
||||
|
||||
for fut in concurrent.futures.as_completed(futures):
|
||||
@ -178,8 +182,6 @@ def integrate_data_parallel():
|
||||
continue
|
||||
|
||||
for item in v_list:
|
||||
if not v_list:
|
||||
continue
|
||||
DBSA.upsert_video({
|
||||
"keyword": kitem["keyword"],
|
||||
"v_name": kitem["v_name"],
|
||||
@ -202,6 +204,7 @@ def integrate_data_parallel():
|
||||
"batch": kitem["batch"],
|
||||
"machine_id": MACHINE_ID,
|
||||
"level": kitem["level"],
|
||||
"is_repeat": item['is_repeat']
|
||||
})
|
||||
DBSA.flush()
|
||||
if rollback[0]:
|
||||
@ -210,6 +213,7 @@ def integrate_data_parallel():
|
||||
db.rollback_l1(rollback[1])
|
||||
if rollback[2]:
|
||||
db.rollback_l2(rollback[2])
|
||||
time.sleep(10)
|
||||
|
||||
|
||||
def parse_args() -> argparse.Namespace:
|
||||
|
@ -98,9 +98,9 @@ def fetch_all_data_for_rn(rn: str, batches: list[int]) -> pd.DataFrame:
|
||||
|
||||
def export_all():
|
||||
# 指定要处理的批次
|
||||
batches = [1748965168, 1749049335]
|
||||
batches = [1747324254, 1747323990]
|
||||
# 先更新 is_repeat
|
||||
# update_is_repeat(batches)
|
||||
update_is_repeat(batches)
|
||||
|
||||
rn_list = get_rn_list()
|
||||
timestamp = datetime.now().strftime("%Y%m%d")
|
||||
|
684
oneget.py
684
oneget.py
@ -1,684 +0,0 @@
|
||||
import argparse
|
||||
import base64
|
||||
from datetime import datetime
|
||||
import concurrent.futures
|
||||
import requests
|
||||
import uuid
|
||||
import random
|
||||
import time
|
||||
import copy
|
||||
from threading import Lock
|
||||
from DB import DBVidcon, DBSA
|
||||
import json
|
||||
from requests.adapters import HTTPAdapter
|
||||
from urllib3.util.retry import Retry
|
||||
from dateutil import parser as date_parser
|
||||
|
||||
MACHINE_ID = 0
|
||||
db = DBVidcon()
|
||||
proxiesdict = db.get_proxy_agent_dict()
|
||||
|
||||
|
||||
class RetryRequests:
|
||||
def __init__(
|
||||
self,
|
||||
proxies: dict = None,
|
||||
timeout: int = 10,
|
||||
total: int = 3,
|
||||
backoff_factor: float = 1.0,
|
||||
status_forcelist: tuple = (500, 502, 503, 504),
|
||||
allowed_methods: tuple = ("GET", "POST"),
|
||||
):
|
||||
self.session = requests.Session()
|
||||
self.timeout = timeout
|
||||
self.proxies = proxies
|
||||
|
||||
retry = Retry(
|
||||
total=total,
|
||||
backoff_factor=backoff_factor,
|
||||
status_forcelist=status_forcelist,
|
||||
allowed_methods=allowed_methods,
|
||||
raise_on_status=False
|
||||
)
|
||||
adapter = HTTPAdapter(max_retries=retry)
|
||||
self.session.mount("http://", adapter)
|
||||
self.session.mount("https://", adapter)
|
||||
|
||||
def get(self, url, **kwargs):
|
||||
kwargs.setdefault("timeout", self.timeout)
|
||||
if self.proxies:
|
||||
kwargs.setdefault("proxies", self.proxies)
|
||||
return self.session.get(url, **kwargs)
|
||||
|
||||
def post(self, url, **kwargs):
|
||||
kwargs.setdefault("timeout", self.timeout)
|
||||
if self.proxies:
|
||||
kwargs.setdefault("proxies", self.proxies)
|
||||
return self.session.post(url, **kwargs)
|
||||
|
||||
|
||||
req = RetryRequests()
|
||||
|
||||
|
||||
def clean_dash_to_zero(val):
|
||||
if val in ('-', '', None):
|
||||
return 0
|
||||
try:
|
||||
return int(val)
|
||||
except (ValueError, TypeError) as e:
|
||||
print(f"[字段异常] val = {val} → {str(e)}")
|
||||
return 0
|
||||
|
||||
|
||||
def format_create_time(timestr):
|
||||
try:
|
||||
dt = date_parser.isoparse(timestr)
|
||||
return dt.strftime("%Y-%m-%d %H:%M:%S")
|
||||
except Exception as e:
|
||||
print(f"[时间格式错误] {timestr} → {str(e)}")
|
||||
return "1970-01-01 00:00:00"
|
||||
|
||||
|
||||
def format_duration(seconds):
|
||||
try:
|
||||
seconds = int(seconds)
|
||||
return f"{seconds // 60:02}:{seconds % 60:02}"
|
||||
except Exception:
|
||||
return "00:00"
|
||||
|
||||
|
||||
class DMHeaderManager:
|
||||
_headers_template = {
|
||||
'Accept': '*/*, */*',
|
||||
'Cache-Control': 'no-cache',
|
||||
'Connection': 'keep-alive',
|
||||
'Content-Type': 'application/json, application/json',
|
||||
'Host': 'graphql.api.dailymotion.com',
|
||||
'Origin': 'https://www.dailymotion.com',
|
||||
'Referer': 'https://www.dailymotion.com/',
|
||||
'Sec-Fetch-Dest': 'empty',
|
||||
'Sec-Fetch-Mode': 'cors',
|
||||
'Sec-Fetch-Site': 'same-site',
|
||||
'User-Agent': 'Mozilla/5.0',
|
||||
'X-DM-AppInfo-Id': 'com.dailymotion.neon',
|
||||
'X-DM-AppInfo-Type': 'website',
|
||||
'X-DM-AppInfo-Version': 'v2025-05-26T13:45:05.666Z',
|
||||
'X-DM-Neon-SSR': '0',
|
||||
'X-DM-Preferred-Country': 'tw',
|
||||
'accept-language': 'zh-CN',
|
||||
'authorization': '',
|
||||
'sec-ch-ua': '"Chromium";v="128", "Not;A=Brand";v="24", "Google Chrome";v="128"',
|
||||
'sec-ch-ua-mobile': '?0',
|
||||
'sec-ch-ua-platform': '"Windows"',
|
||||
'x-dm-visit-id': '',
|
||||
'x-dm-visitor-id': '',
|
||||
}
|
||||
|
||||
_user_agents = [
|
||||
'Mozilla/5.0 (Windows; U; Windows NT 6.1; en-us) AppleWebKit/534.50 (KHTML, like Gecko) Version/5.1 Safari/534.50',
|
||||
'Mozilla/5.0 (Windows NT 6.1; rv:2.0.1) Gecko/20100101 Firefox/4.0.1',
|
||||
'Opera/9.80 (Windows NT 6.1; U; en) Presto/2.8.131 Version/11.11',
|
||||
'Mozilla/4.0 (compatible; MSIE 7.0; Windows NT 5.1; 360SE)',
|
||||
'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/136.0.0.0 Safari/537.36',
|
||||
'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/128.0.0.0 Safari/537.36',
|
||||
]
|
||||
|
||||
def __init__(self, proxies: dict = None):
|
||||
self._headers_cache = None
|
||||
self._cache_lock = Lock()
|
||||
self._proxies = proxies
|
||||
|
||||
def get_headers(self, retry: int = 2) -> dict:
|
||||
visitor_id = str(uuid.uuid4())
|
||||
visit_id = str(int(time.time() * 1000))
|
||||
traffic_segment = str(random.randint(100_000, 999_999))
|
||||
ua = random.choice(self._user_agents)
|
||||
|
||||
token_headers = {
|
||||
'Accept': '*/*',
|
||||
'Accept-Language': 'zh-CN,zh;q=0.9,en;q=0.8',
|
||||
'Cache-Control': 'no-cache',
|
||||
'Connection': 'keep-alive',
|
||||
'Content-Type': 'application/x-www-form-urlencoded',
|
||||
'Origin': 'https://www.dailymotion.com',
|
||||
'Pragma': 'no-cache',
|
||||
'Referer': 'https://www.dailymotion.com/',
|
||||
'Sec-Fetch-Dest': 'empty',
|
||||
'Sec-Fetch-Mode': 'cors',
|
||||
'Sec-Fetch-Site': 'same-site',
|
||||
'User-Agent': ua,
|
||||
'sec-ch-ua': '"Chromium";v="136", "Google Chrome";v="136", "Not.A/Brand";v="99"',
|
||||
'sec-ch-ua-mobile': '?0',
|
||||
'sec-ch-ua-platform': '"Windows"',
|
||||
}
|
||||
|
||||
data = {
|
||||
'client_id': 'f1a362d288c1b98099c7',
|
||||
'client_secret': 'eea605b96e01c796ff369935357eca920c5da4c5',
|
||||
'grant_type': 'client_credentials',
|
||||
'traffic_segment': traffic_segment,
|
||||
'visitor_id': visitor_id,
|
||||
}
|
||||
|
||||
response = req.post(
|
||||
'https://graphql.api.dailymotion.com/oauth/token',
|
||||
headers=token_headers,
|
||||
data=data,
|
||||
proxies=self._proxies,
|
||||
timeout=10
|
||||
)
|
||||
response.raise_for_status()
|
||||
token = response.json()['access_token']
|
||||
|
||||
new_headers = copy.deepcopy(self._headers_template)
|
||||
new_headers['authorization'] = f'Bearer {token}'
|
||||
new_headers['x-dm-visit-id'] = visit_id
|
||||
new_headers['x-dm-visitor-id'] = visitor_id
|
||||
new_headers['User-Agent'] = ua
|
||||
|
||||
with self._cache_lock:
|
||||
self._headers_cache = copy.deepcopy(new_headers)
|
||||
|
||||
return new_headers
|
||||
|
||||
|
||||
class DMVideoInfo:
|
||||
def __init__(self, proxies: dict = None, max_retries: int = 3, backoff_factor: float = 0.5):
|
||||
self.proxies = proxies
|
||||
self.max_retries = max_retries
|
||||
self.backoff_factor = backoff_factor
|
||||
|
||||
def get_video_info(self, data: dict) -> dict:
|
||||
v_xid = data.get('v_xid')
|
||||
url = f'https://api.dailymotion.com/video/{v_xid}'
|
||||
params = {
|
||||
'fields': 'id,title,created_time,thumbnail_240_url,duration,'
|
||||
'owner.id,owner.screenname,likes_total,views_total,'
|
||||
'owner.avatar_60_url,owner.followers_total,owner.videos_total'
|
||||
}
|
||||
|
||||
try:
|
||||
resp = req.get(url, params=params, timeout=10)
|
||||
resp.raise_for_status()
|
||||
r_data = resp.json()
|
||||
xid = r_data["id"]
|
||||
vid = base64.b64encode(f"Video:{xid}".encode('utf-8')).decode('utf-8')
|
||||
uxid = r_data["owner.id"]
|
||||
uid = base64.b64encode(f"Channel:{uxid}".encode('utf-8')).decode('utf-8')
|
||||
duration = r_data.get("duration", 0)
|
||||
if duration < 30:
|
||||
return None
|
||||
data["v_id"] = vid
|
||||
data["title"] = r_data.get("title", "")
|
||||
data["link"] = "https://www.dailymotion.com/video/" + xid
|
||||
data["duration"] = format_duration(r_data.get("duration", 0))
|
||||
data['create_time'] = format(
|
||||
datetime.fromtimestamp(r_data.get("created_time")).strftime("%Y-%m-%d %H:%M:%S"))
|
||||
data['fans'] = clean_dash_to_zero(r_data.get("owner.followers_total", 0))
|
||||
data['videos'] = clean_dash_to_zero(r_data.get("owner.videos_total", 0))
|
||||
data['watch_number'] = clean_dash_to_zero(r_data.get("views_total", 0))
|
||||
data['cover_pic'] = r_data.get('thumbnail_240_url')
|
||||
data['u_id'] = uid
|
||||
data['u_xid'] = uxid
|
||||
data['u_name'] = r_data.get("owner.screenname", "")
|
||||
data['u_pic'] = r_data.get("owner.avatar_60_url", "")
|
||||
DBSA.upsert_video(data)
|
||||
DBSA.flush()
|
||||
except requests.RequestException as e:
|
||||
print(f"[ERROR] 请求失败 vxid={v_xid} : {e}")
|
||||
return None
|
||||
|
||||
|
||||
def parse_args() -> argparse.Namespace:
|
||||
global MACHINE_ID
|
||||
parser = argparse.ArgumentParser(
|
||||
description="Configure worker settings."
|
||||
)
|
||||
parser.add_argument(
|
||||
"-m", "--machine-id",
|
||||
type=int,
|
||||
help=f"Machine identifier (default: {MACHINE_ID})"
|
||||
)
|
||||
|
||||
args = parser.parse_args()
|
||||
|
||||
if args.machine_id is not None:
|
||||
MACHINE_ID = args.machine_id
|
||||
|
||||
if MACHINE_ID is None or MACHINE_ID == 0:
|
||||
raise ValueError("请指定机器编号")
|
||||
return args
|
||||
|
||||
|
||||
def main():
|
||||
while True:
|
||||
kwdata = db.get_web_items()
|
||||
if not kwdata:
|
||||
print("没有获取到关键词数据")
|
||||
time.sleep(30)
|
||||
continue
|
||||
print(f"搜索关键词数据: {kwdata}")
|
||||
kwdata = kwdata[0][1]
|
||||
rn = kwdata['rn']
|
||||
proxy_name = proxiesdict.get(rn)
|
||||
# proxies_str = "http://127.0.0.1:10808"
|
||||
proxies_str = db.get_proxy(proxy_name, '-1')
|
||||
proxies = {
|
||||
'http': proxies_str,
|
||||
'https': proxies_str
|
||||
}
|
||||
kw = kwdata['keyword']
|
||||
dmheader_manager = DMHeaderManager(proxies=proxies)
|
||||
dmvideo_info = DMVideoInfo(proxies=proxies)
|
||||
headers = dmheader_manager.get_headers()
|
||||
for i in range(1, 11):
|
||||
data = {
|
||||
"operationName": "SEARCH_QUERY",
|
||||
"variables": {
|
||||
"query": kw,
|
||||
"shouldIncludeTopResults": True, # 是否包含热门结果
|
||||
"shouldIncludeChannels": False, # 是否包含频道
|
||||
"shouldIncludePlaylists": False, # 是否包含播放列表
|
||||
"shouldIncludeHashtags": False, # 是否包含标签
|
||||
"shouldIncludeVideos": False, # 是否包含视频
|
||||
"shouldIncludeLives": False, # 是否包含直播
|
||||
"page": i,
|
||||
"limit": 20,
|
||||
"recaptchaToken": None
|
||||
},
|
||||
"query": """
|
||||
fragment VIDEO_BASE_FRAGMENT on Video {
|
||||
id
|
||||
xid
|
||||
title
|
||||
createdAt
|
||||
duration
|
||||
aspectRatio
|
||||
thumbnail(height: PORTRAIT_240) {
|
||||
id
|
||||
url
|
||||
__typename
|
||||
}
|
||||
creator {
|
||||
id
|
||||
xid
|
||||
name
|
||||
displayName
|
||||
accountType
|
||||
avatar(height: SQUARE_60) {
|
||||
id
|
||||
url
|
||||
__typename
|
||||
}
|
||||
__typename
|
||||
}
|
||||
__typename
|
||||
}
|
||||
|
||||
fragment CHANNEL_BASE_FRAG on Channel {
|
||||
id
|
||||
xid
|
||||
name
|
||||
displayName
|
||||
accountType
|
||||
isFollowed
|
||||
avatar(height: SQUARE_120) {
|
||||
id
|
||||
url
|
||||
__typename
|
||||
}
|
||||
followerEngagement {
|
||||
id
|
||||
followDate
|
||||
__typename
|
||||
}
|
||||
metrics {
|
||||
id
|
||||
engagement {
|
||||
id
|
||||
followers {
|
||||
edges {
|
||||
node {
|
||||
id
|
||||
total
|
||||
__typename
|
||||
}
|
||||
__typename
|
||||
}
|
||||
__typename
|
||||
}
|
||||
__typename
|
||||
}
|
||||
__typename
|
||||
}
|
||||
__typename
|
||||
}
|
||||
|
||||
fragment PLAYLIST_BASE_FRAG on Collection {
|
||||
id
|
||||
xid
|
||||
name
|
||||
description
|
||||
thumbnail(height: PORTRAIT_240) {
|
||||
id
|
||||
url
|
||||
__typename
|
||||
}
|
||||
creator {
|
||||
id
|
||||
xid
|
||||
name
|
||||
displayName
|
||||
accountType
|
||||
avatar(height: SQUARE_60) {
|
||||
id
|
||||
url
|
||||
__typename
|
||||
}
|
||||
__typename
|
||||
}
|
||||
metrics {
|
||||
id
|
||||
engagement {
|
||||
id
|
||||
videos(filter: {visibility: {eq: PUBLIC}}) {
|
||||
edges {
|
||||
node {
|
||||
id
|
||||
total
|
||||
__typename
|
||||
}
|
||||
__typename
|
||||
}
|
||||
__typename
|
||||
}
|
||||
__typename
|
||||
}
|
||||
__typename
|
||||
}
|
||||
__typename
|
||||
}
|
||||
|
||||
fragment HASHTAG_BASE_FRAG on Hashtag {
|
||||
id
|
||||
xid
|
||||
name
|
||||
metrics {
|
||||
id
|
||||
engagement {
|
||||
id
|
||||
videos {
|
||||
edges {
|
||||
node {
|
||||
id
|
||||
total
|
||||
__typename
|
||||
}
|
||||
__typename
|
||||
}
|
||||
__typename
|
||||
}
|
||||
__typename
|
||||
}
|
||||
__typename
|
||||
}
|
||||
__typename
|
||||
}
|
||||
|
||||
fragment LIVE_BASE_FRAGMENT on Live {
|
||||
id
|
||||
xid
|
||||
title
|
||||
audienceCount
|
||||
aspectRatio
|
||||
isOnAir
|
||||
thumbnail(height: PORTRAIT_240) {
|
||||
id
|
||||
url
|
||||
__typename
|
||||
}
|
||||
creator {
|
||||
id
|
||||
xid
|
||||
name
|
||||
displayName
|
||||
accountType
|
||||
avatar(height: SQUARE_60) {
|
||||
id
|
||||
url
|
||||
__typename
|
||||
}
|
||||
__typename
|
||||
}
|
||||
__typename
|
||||
}
|
||||
|
||||
query SEARCH_QUERY(
|
||||
$query: String!,
|
||||
$shouldIncludeTopResults: Boolean!,
|
||||
$shouldIncludeVideos: Boolean!,
|
||||
$shouldIncludeChannels: Boolean!,
|
||||
$shouldIncludePlaylists: Boolean!,
|
||||
$shouldIncludeHashtags: Boolean!,
|
||||
$shouldIncludeLives: Boolean!,
|
||||
$page: Int,
|
||||
$limit: Int,
|
||||
$sortByVideos: SearchVideoSort,
|
||||
$durationMinVideos: Int,
|
||||
$durationMaxVideos: Int,
|
||||
$createdAfterVideos: DateTime,
|
||||
$recaptchaToken: String
|
||||
) {
|
||||
search(token: $recaptchaToken) {
|
||||
id
|
||||
|
||||
stories(query: $query, first: $limit, page: $page) @include(if: $shouldIncludeTopResults) {
|
||||
metadata {
|
||||
id
|
||||
algorithm {
|
||||
uuid
|
||||
__typename
|
||||
}
|
||||
__typename
|
||||
}
|
||||
pageInfo {
|
||||
hasNextPage
|
||||
nextPage
|
||||
__typename
|
||||
}
|
||||
edges {
|
||||
node {
|
||||
...VIDEO_BASE_FRAGMENT
|
||||
...CHANNEL_BASE_FRAG
|
||||
...PLAYLIST_BASE_FRAG
|
||||
...HASHTAG_BASE_FRAG
|
||||
...LIVE_BASE_FRAGMENT
|
||||
__typename
|
||||
}
|
||||
__typename
|
||||
}
|
||||
__typename
|
||||
}
|
||||
|
||||
videos(
|
||||
query: $query,
|
||||
first: $limit,
|
||||
page: $page,
|
||||
sort: $sortByVideos,
|
||||
durationMin: $durationMinVideos,
|
||||
durationMax: $durationMaxVideos,
|
||||
createdAfter: $createdAfterVideos
|
||||
) @include(if: $shouldIncludeVideos) {
|
||||
metadata {
|
||||
id
|
||||
algorithm {
|
||||
uuid
|
||||
__typename
|
||||
}
|
||||
__typename
|
||||
}
|
||||
pageInfo {
|
||||
hasNextPage
|
||||
nextPage
|
||||
__typename
|
||||
}
|
||||
edges {
|
||||
node {
|
||||
id
|
||||
...VIDEO_BASE_FRAGMENT
|
||||
__typename
|
||||
}
|
||||
__typename
|
||||
}
|
||||
__typename
|
||||
}
|
||||
|
||||
lives(query: $query, first: $limit, page: $page) @include(if: $shouldIncludeLives) {
|
||||
metadata {
|
||||
id
|
||||
algorithm {
|
||||
uuid
|
||||
__typename
|
||||
}
|
||||
__typename
|
||||
}
|
||||
pageInfo {
|
||||
hasNextPage
|
||||
nextPage
|
||||
__typename
|
||||
}
|
||||
edges {
|
||||
node {
|
||||
id
|
||||
...LIVE_BASE_FRAGMENT
|
||||
__typename
|
||||
}
|
||||
__typename
|
||||
}
|
||||
__typename
|
||||
}
|
||||
|
||||
channels(query: $query, first: $limit, page: $page) @include(if: $shouldIncludeChannels) {
|
||||
metadata {
|
||||
id
|
||||
algorithm {
|
||||
uuid
|
||||
__typename
|
||||
}
|
||||
__typename
|
||||
}
|
||||
pageInfo {
|
||||
hasNextPage
|
||||
nextPage
|
||||
__typename
|
||||
}
|
||||
edges {
|
||||
node {
|
||||
id
|
||||
...CHANNEL_BASE_FRAG
|
||||
__typename
|
||||
}
|
||||
__typename
|
||||
}
|
||||
__typename
|
||||
}
|
||||
|
||||
playlists: collections(query: $query, first: $limit, page: $page) @include(if: $shouldIncludePlaylists) {
|
||||
metadata {
|
||||
id
|
||||
algorithm {
|
||||
uuid
|
||||
__typename
|
||||
}
|
||||
__typename
|
||||
}
|
||||
pageInfo {
|
||||
hasNextPage
|
||||
nextPage
|
||||
__typename
|
||||
}
|
||||
edges {
|
||||
node {
|
||||
id
|
||||
...PLAYLIST_BASE_FRAG
|
||||
__typename
|
||||
}
|
||||
__typename
|
||||
}
|
||||
__typename
|
||||
}
|
||||
|
||||
hashtags(query: $query, first: $limit, page: $page) @include(if: $shouldIncludeHashtags) {
|
||||
metadata {
|
||||
id
|
||||
algorithm {
|
||||
uuid
|
||||
__typename
|
||||
}
|
||||
__typename
|
||||
}
|
||||
pageInfo {
|
||||
hasNextPage
|
||||
nextPage
|
||||
__typename
|
||||
}
|
||||
edges {
|
||||
node {
|
||||
id
|
||||
...HASHTAG_BASE_FRAG
|
||||
__typename
|
||||
}
|
||||
__typename
|
||||
}
|
||||
__typename
|
||||
}
|
||||
|
||||
__typename
|
||||
}
|
||||
}
|
||||
"""
|
||||
}
|
||||
|
||||
payload = json.dumps(data).encode()
|
||||
|
||||
response = req.post('https://graphql.api.dailymotion.com/', headers=headers, data=payload,
|
||||
proxies=proxies)
|
||||
|
||||
data = response.json()
|
||||
try:
|
||||
edges = data['data']['search']['stories']['edges']
|
||||
except (TypeError,KeyError):
|
||||
print("stories 为 None 或结构异常,跳过")
|
||||
break
|
||||
edges_len = len(edges)
|
||||
print(f"第 {i} 页,关键词: {kw},获取到 {edges_len} 条数据")
|
||||
tancks = []
|
||||
for j, edge in enumerate(edges):
|
||||
node = edge.get("node", {})
|
||||
s_data = {
|
||||
"keyword": kw,
|
||||
"v_name": kwdata.get("v_name", ""),
|
||||
"v_xid": node.get("xid"),
|
||||
"batch": kwdata.get("batch"),
|
||||
"rn": kwdata.get("rn"),
|
||||
"machine_id": MACHINE_ID,
|
||||
"index": (i - 1) * 20 + j + 1,
|
||||
"level": 0,
|
||||
}
|
||||
tancks.append(s_data)
|
||||
with concurrent.futures.ThreadPoolExecutor(max_workers=20) as executor:
|
||||
executor.map(dmvideo_info.get_video_info, tancks)
|
||||
if edges_len < 20:
|
||||
break
|
||||
time.sleep(10)
|
||||
|
||||
time.sleep(20)
|
||||
|
||||
if __name__ == '__main__':
|
||||
parse_args()
|
||||
start_time = datetime.now()
|
||||
print(f"开始时间:{start_time.strftime('%Y-%m-%d %H:%M:%S')}")
|
||||
main()
|
||||
|
||||
|
||||
|
29
onoe.py
29
onoe.py
@ -34,7 +34,19 @@ UserAgent = [
|
||||
'User-Agent, Mozilla/4.0 (compatible; MSIE 7.0; Windows NT 5.1; Maxthon 2.0)',
|
||||
'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Maxthon/5.0.3.4000 Chrome/47.0.2526.73 Safari/537.36',
|
||||
'User-Agent, Mozilla/4.0 (compatible; MSIE 7.0; Windows NT 5.1; The World)']
|
||||
|
||||
sec_ch_ua_list = [
|
||||
'"Chromium";v="0", "Not;A=Brand";v="24", "Google Chrome";v="0"',
|
||||
'"Chromium";v="0", "Not;A=Brand";v="24", "Google Chrome";v="0"',
|
||||
'"Chromium";v="0", "Not;A=Brand";v="24", "Google Chrome";v="0"',
|
||||
'"Chromium";v="0", "Not;A=Brand";v="24", "Google Chrome";v="0"',
|
||||
'"Chromium";v="0", "Not;A=Brand";v="24", "Google Chrome";v="0"',
|
||||
'"Chromium";v="136", "Not;A=Brand";v="24", "Google Chrome";v="136"',
|
||||
'"Chromium";v="128", "Not;A=Brand";v="24", "Google Chrome";v="128"',
|
||||
'"Chromium";v="53", "Not;A=Brand";v="24", "Google Chrome";v="53"',
|
||||
'"Chromium";v="0", "Not;A=Brand";v="24", "Google Chrome";v="0"',
|
||||
'"Chromium";v="47", "Not;A=Brand";v="24", "Google Chrome";v="47"',
|
||||
'"Chromium";v="0", "Not;A=Brand";v="24", "Google Chrome";v="0"'
|
||||
]
|
||||
|
||||
def get_part_ids(part_num: int, take: int, offset: int = 0):
|
||||
part_ids = list(range(offset, offset + take))
|
||||
@ -221,14 +233,17 @@ def gettoken(proxy, r=2):
|
||||
}
|
||||
try:
|
||||
proxy_str = db.get_proxy(proxy)
|
||||
logger.info(f"[代理] => {proxy_str}")
|
||||
url = 'https://graphql.api.dailymotion.com/oauth/token'
|
||||
response = requests.post(url, headers=headers, data=data, proxies={"http": proxy_str, "https": proxy_str})
|
||||
token = response.json()['access_token']
|
||||
copy_headers = copy.deepcopy(headers1)
|
||||
uaidx = random.randint(0, len(UserAgent) - 1)
|
||||
copy_headers['authorization'] = "Bearer " + token
|
||||
copy_headers['x-dm-visit-id'] = str(int(time.time() * 1000))
|
||||
copy_headers['x-dm-visitor-id'] = uuid_with_dash
|
||||
copy_headers['User-Agent'] = UserAgent[random.randint(0, len(UserAgent) - 1)]
|
||||
copy_headers['User-Agent'] = UserAgent[uaidx]
|
||||
copy_headers['sec-ch-ua'] = sec_ch_ua_list[uaidx]
|
||||
copy_headers['X-DM-Preferred-Country'] = proxy.lower()
|
||||
with _cache_lock:
|
||||
_headers_cache = copy_headers
|
||||
@ -267,18 +282,18 @@ def solve_recaptcha_v3_with_proxy(
|
||||
payload = {
|
||||
"clientKey": "CAP-A76C932D4C6CCB3CA748F77FDC07D996",
|
||||
"task": {
|
||||
"type": "ReCaptchaV3Task",
|
||||
"type": "ReCaptchaV3TaskProxyLess",
|
||||
"websiteURL": f"https://www.dailymotion.com/search/{encoded_query}/top-results",
|
||||
"websiteKey": "6LeOJBIrAAAAAPMIjyYvo-eN_9W1HDOkrEqHR8tM",
|
||||
"pageAction": "___grecaptcha_cfg.clients['100000']['L']['L']['promise-callback'](gRecaptchaResponse)",
|
||||
"pageAction": "search",
|
||||
"minScore": 0.5
|
||||
}
|
||||
}
|
||||
resp = requests.post(create_url, json=payload, headers=headers, timeout=30)
|
||||
resp = requests.post(create_url, data=json.dumps(payload), headers=headers, timeout=30)
|
||||
logger.info(f"[token] 发送 payload:{payload}")
|
||||
resp.raise_for_status()
|
||||
task_id = resp.json()["taskId"]
|
||||
logger.info(f"task_id: {task_id}")
|
||||
logger.info(f"task_id: {resp.text}")
|
||||
# 轮询获取结果
|
||||
check_payload = {"clientKey": "CAP-A76C932D4C6CCB3CA748F77FDC07D996", "taskId": task_id}
|
||||
for i in range(max_poll_attempts):
|
||||
@ -287,7 +302,7 @@ def solve_recaptcha_v3_with_proxy(
|
||||
result = r.json()
|
||||
logger.info(f"第{i}次,task_id:{task_id},结果:{result}")
|
||||
if result.get("status") == "ready":
|
||||
return result["solution"]["token"]
|
||||
return result["solution"]["gRecaptchaResponse"]
|
||||
time.sleep(polling_interval)
|
||||
|
||||
raise TimeoutError(f"任务 {task_id} 在轮询 {max_poll_attempts} 次后未完成")
|
||||
|
BIN
oss/BAZTSJT.pdf
BIN
oss/BAZTSJT.pdf
Binary file not shown.
BIN
oss/LOA.pdf
BIN
oss/LOA.pdf
Binary file not shown.
@ -1,56 +0,0 @@
|
||||
import requests
|
||||
|
||||
url = "https://api.siliconflow.cn/v1/chat/completions"
|
||||
kw = "朝雪录"
|
||||
rn = "US"
|
||||
payload = {
|
||||
"model": "Qwen/Qwen3-14B",
|
||||
"max_tokens": 512,
|
||||
"enable_thinking": True,
|
||||
"thinking_budget": 4096,
|
||||
"min_p": 0.05,
|
||||
"temperature": 0.7,
|
||||
"top_p": 0.7,
|
||||
"top_k": 50,
|
||||
"frequency_penalty": 0.5,
|
||||
"n": 1,
|
||||
"stream": False,
|
||||
"stop": [],
|
||||
"messages": [
|
||||
{
|
||||
"role": "user",
|
||||
"content": """你是一个视频搜索优化助手。用户给你一个中文视频标题或关键词,请你翻译并联想出 10 个适合用于英文视频网站(如 Dailymotion)搜索的关键词,结果用英文逗号分隔输出,仅返回关键词列表,不加说明。
|
||||
|
||||
示例输入:朝雪录
|
||||
示例输出:Coroner's Diary,Coroners Diary, Coroners Diary episode,Coroners Diary season 1,Coroners Diary full episode,coroners diary
|
||||
"""
|
||||
},
|
||||
{
|
||||
"role": "user",
|
||||
"content": f"请推理:{kw} 并输出 10 个地区缩写为{rn}的适合用于视频网站搜索的关键词,地区缩写不在关键词内,。"
|
||||
}
|
||||
]
|
||||
}
|
||||
|
||||
headers = {
|
||||
"Authorization": "Bearer sk-isvydeloxqhoiwoiojleghdsuhagryjbxzphfhxneevxeoeh",
|
||||
"Content-Type": "application/json"
|
||||
}
|
||||
|
||||
response = requests.post(url, json=payload, headers=headers, timeout=30)
|
||||
|
||||
def parse_keywords_from_response(resp_json):
|
||||
try:
|
||||
# 取出文本内容
|
||||
content = resp_json["choices"][0]["message"]["content"]
|
||||
# 按英文逗号分隔
|
||||
keywords = [kw.strip() for kw in content.split(",") if kw.strip()]
|
||||
return keywords
|
||||
except Exception as e:
|
||||
print("解析失败:", e)
|
||||
return []
|
||||
|
||||
kws = parse_keywords_from_response(response.json())
|
||||
|
||||
print(kws)
|
||||
print(len(kws))
|
@ -1,19 +0,0 @@
|
||||
import json
|
||||
from DB import DBVidcon
|
||||
payload_list = []
|
||||
db = DBVidcon()
|
||||
rows = db.get_report_video()
|
||||
push = db.push_report
|
||||
|
||||
# =======================
|
||||
|
||||
for row in rows:
|
||||
payload_list.append(json.dumps({**row}, ensure_ascii=False))
|
||||
if len(payload_list) >= 10000:
|
||||
push(payload_list)
|
||||
payload_list.clear()
|
||||
if payload_list: # 收尾
|
||||
push(payload_list)
|
||||
|
||||
db.close()
|
||||
|
124
report.py
124
report.py
@ -1,124 +0,0 @@
|
||||
import argparse
|
||||
import json
|
||||
import time
|
||||
from DB import DBVidcon, DBSA
|
||||
from report_video import DailymotionClient
|
||||
from logger import logger
|
||||
import requests
|
||||
|
||||
MACHINE_ID = None
|
||||
IsSubsequent = False
|
||||
|
||||
|
||||
def parse_args() -> argparse.Namespace:
|
||||
global MACHINE_ID, IsSubsequent
|
||||
|
||||
parser = argparse.ArgumentParser(
|
||||
description="Configure worker settings."
|
||||
)
|
||||
parser.add_argument(
|
||||
"-m", "--machine-id",
|
||||
type=int,
|
||||
help=f"Machine identifier (default: {MACHINE_ID})"
|
||||
)
|
||||
parser.add_argument(
|
||||
"-s", "--IsSubsequent",
|
||||
type=int,
|
||||
help=f"Maximum concurrent workers (default: {IsSubsequent})"
|
||||
)
|
||||
|
||||
args = parser.parse_args()
|
||||
|
||||
if args.machine_id is not None:
|
||||
MACHINE_ID = args.machine_id
|
||||
|
||||
if args.IsSubsequent is not None:
|
||||
if args.IsSubsequent <= 0:
|
||||
IsSubsequent = False
|
||||
else:
|
||||
IsSubsequent = True
|
||||
if MACHINE_ID is None:
|
||||
raise ValueError("请指定机器编号")
|
||||
return args
|
||||
|
||||
|
||||
parse_args()
|
||||
|
||||
|
||||
def get_public_ip():
|
||||
try:
|
||||
response = requests.get("https://api.ipify.org?format=json", timeout=5)
|
||||
return response.json().get("ip")
|
||||
except requests.RequestException as e:
|
||||
print("获取失败:", e)
|
||||
return None
|
||||
|
||||
|
||||
ip = get_public_ip()
|
||||
logger.info(f"当前机器IP: {ip}, 机器编号: {MACHINE_ID}, 是否后续处理: {IsSubsequent}")
|
||||
db = DBVidcon()
|
||||
|
||||
account = db.get_account_info(MACHINE_ID)
|
||||
|
||||
d = DailymotionClient(email=account['account'], password=account['password'])
|
||||
|
||||
k = {
|
||||
"open": 1,
|
||||
"solved": 2,
|
||||
"awaiting your reply": 3,
|
||||
}
|
||||
|
||||
last_main_run = 0
|
||||
last_subsequent_run = 0
|
||||
|
||||
MAIN_INTERVAL = 60 * 60 # 每 5 分钟执行一次
|
||||
SUBSEQUENT_INTERVAL = 30 * 60 # 每 60 分钟执行一次
|
||||
|
||||
# d.test()
|
||||
|
||||
while True:
|
||||
now = int(time.time())
|
||||
|
||||
# 处理主流程
|
||||
if now - last_main_run >= MAIN_INTERVAL:
|
||||
last_main_run = now
|
||||
re_list = []
|
||||
idss = []
|
||||
lis = db.item_report(100)
|
||||
if len(lis) > 0:
|
||||
for li in lis:
|
||||
item = json.loads(li[0])
|
||||
re_list.append(item)
|
||||
idss.append(item['id'])
|
||||
logger.info(f"name:{item['name_title']},link:{item['link']} ")
|
||||
try:
|
||||
ids, info, report_id, status, report_ts = d.process_ticket(re_list)
|
||||
subsequent_status = k.get(status, 1)
|
||||
db.update_fight_record_status(
|
||||
ids, report_id, 2, f"http://{ip}:5000/image/{info}",
|
||||
report_ts, subsequent_status, MACHINE_ID
|
||||
)
|
||||
db.flush()
|
||||
except Exception as e:
|
||||
logger.error(f"ID:{re_list[0]['id']}, end id{re_list[-1]['id']}, e:{e}")
|
||||
db.update_fight_record_status(idss, 0, 3, str(e), mid=MACHINE_ID)
|
||||
time.sleep(60) # 出错延迟
|
||||
|
||||
if now - last_subsequent_run >= SUBSEQUENT_INTERVAL and IsSubsequent:
|
||||
last_subsequent_run = now
|
||||
subsequent_list = db.get_subsequent_report_video(MACHINE_ID)
|
||||
if len(subsequent_list) > 0:
|
||||
for li in subsequent_list:
|
||||
subsequent_status = 0
|
||||
r_id = li['report_id']
|
||||
logger.info(f"subsequent report_id:{r_id} ")
|
||||
# try:
|
||||
subsequent_status, info = d.report_follow_up(r_id)
|
||||
db.update_subsequent_status_by_report_id(
|
||||
r_id, subsequent_status, f"http://{ip}:5000/image/{info}"
|
||||
)
|
||||
# except Exception as e:
|
||||
# logger.logger.error(f"ID:{rs_id}, e:{e}")
|
||||
# db.update_subsequent_status_by_id(rs_id, 1, str(e))
|
||||
time.sleep(5) # 避免频繁请求
|
||||
time.sleep(5)
|
417
report_video.py
417
report_video.py
@ -1,417 +0,0 @@
|
||||
import time
|
||||
import functools
|
||||
import os
|
||||
import re
|
||||
from datetime import datetime
|
||||
from sys import platform
|
||||
import requests
|
||||
from logger import logger
|
||||
from playwright.sync_api import (
|
||||
sync_playwright,
|
||||
TimeoutError as PlaywrightTimeoutError,
|
||||
Page,
|
||||
Browser,
|
||||
)
|
||||
|
||||
|
||||
def solve_turnstile_capsolver(page: Page,
|
||||
timeout: int = 120) -> bool:
|
||||
"""
|
||||
使用 CapSolver 自动完成当前 Page 上的 Cloudflare Turnstile。
|
||||
成功返回 True,失败/超时返回 False。
|
||||
"""
|
||||
cap_key = "CAP-A76C932D4C6CCB3CA748F77FDC07D996"
|
||||
widget = page.query_selector("div.cf-turnstile[data-sitekey]")
|
||||
if not widget:
|
||||
return False
|
||||
sitekey = widget.get_attribute("data-sitekey")
|
||||
page_url = page.url
|
||||
|
||||
create_payload = {
|
||||
"clientKey": cap_key,
|
||||
"task": {
|
||||
"type": "TurnstileTaskProxyLess",
|
||||
"websiteURL": page_url,
|
||||
"websiteKey": sitekey
|
||||
}
|
||||
}
|
||||
create_resp = requests.post(
|
||||
"https://api.capsolver.com/createTask",
|
||||
json=create_payload, timeout=20
|
||||
).json()
|
||||
if create_resp.get("errorId"):
|
||||
print("[CapSolver] createTask 失败:", create_resp)
|
||||
return False
|
||||
task_id = create_resp["taskId"]
|
||||
|
||||
poll_payload = {"clientKey": cap_key, "taskId": task_id}
|
||||
token = None
|
||||
elapsed, step = 0, 3
|
||||
while elapsed < timeout:
|
||||
time.sleep(step)
|
||||
elapsed += step
|
||||
res = requests.post(
|
||||
"https://api.capsolver.com/getTaskResult",
|
||||
json=poll_payload, timeout=15
|
||||
).json()
|
||||
if res.get("status") == "ready":
|
||||
token = res["solution"]["token"]
|
||||
break
|
||||
if res.get("status") != "processing":
|
||||
print("[CapSolver] getTaskResult 异常:", res)
|
||||
return False
|
||||
|
||||
if not token:
|
||||
print("[CapSolver] 超时未取到 token")
|
||||
return False
|
||||
|
||||
page.evaluate(
|
||||
"""(tk) => {
|
||||
const ta = document.querySelector('textarea[name="cf-turnstile-response"]');
|
||||
if (ta) ta.value = tk;
|
||||
if (window.turnstileCallback)
|
||||
try { window.turnstileCallback(tk); } catch(e){}
|
||||
}""",
|
||||
token
|
||||
)
|
||||
page.wait_for_timeout(1500)
|
||||
return True
|
||||
|
||||
|
||||
def require_login(func):
|
||||
@functools.wraps(func)
|
||||
def wrapper(self, *args, **kwargs):
|
||||
self.ensure_login()
|
||||
return func(self, *args, **kwargs)
|
||||
|
||||
return wrapper
|
||||
|
||||
|
||||
class DailymotionClient:
|
||||
url = "https://faq.dailymotion.com/hc/en-us/requests/new"
|
||||
EMAIL = "copyright@qiyi.com"
|
||||
PASSWORD = "ppsIQIYI2018@"
|
||||
|
||||
def __init__(self,email, password, headless: bool = None):
|
||||
self.email = email
|
||||
self.password = password
|
||||
self.headless = headless
|
||||
self.check_interval = 60 * 60
|
||||
if self.headless is None:
|
||||
self.headless = platform == "linux" or platform == "linux2"
|
||||
|
||||
if self.headless:
|
||||
proxy = None
|
||||
self.file_path = "/opt/ql/DailyMotion/oss/LOA.pdf"
|
||||
self.file_path2 = "/opt/ql/DailyMotion/oss/BAZTSJT.pdf"
|
||||
else:
|
||||
proxy={'server': 'http://127.0.0.1:7890'}
|
||||
self.file_path = "./oss/LOA.pdf"
|
||||
self.file_path2 = "./oss/BAZTSJT.pdf"
|
||||
logger.info(f"Launching DailymotionClient with headless={self.headless}, proxy={proxy}")
|
||||
self._pw = sync_playwright().start()
|
||||
self.browser: Browser = self._pw.chromium.launch(
|
||||
headless=self.headless,
|
||||
proxy=proxy,
|
||||
)
|
||||
self.context = self.browser.new_context(
|
||||
user_agent="Mozilla/5.0 (Windows NT 10.0; Win64; x64) "
|
||||
"AppleWebKit/537.36 (KHTML, like Gecko) "
|
||||
"Chrome/122.0.0.0 Safari/537.36",
|
||||
locale="en-US",
|
||||
viewport={"width": 1280, "height": 800},
|
||||
timezone_id="Asia/Shanghai",
|
||||
permissions=[],
|
||||
)
|
||||
self.context.add_init_script("Object.defineProperty(navigator, 'webdriver', {get: () => undefined})")
|
||||
self.page: Page = self.context.new_page()
|
||||
|
||||
self._last_check_ts = 0
|
||||
self._last_check_result = False
|
||||
os.makedirs('screenshots', exist_ok=True)
|
||||
self.page.goto(self.url)
|
||||
|
||||
def _do_login(self) -> None:
|
||||
self.page.goto(self.url, timeout=30000)
|
||||
# self.page.wait_for_load_state("networkidle", timeout=30000)
|
||||
self.page.wait_for_timeout(3000)
|
||||
|
||||
file_path = f'screenshots/{str(int(time.time()))}.png'
|
||||
self.page.screenshot(path=file_path)
|
||||
|
||||
if self.page.query_selector("div.cf-turnstile[data-sitekey]"):
|
||||
ok = solve_turnstile_capsolver(self.page)
|
||||
if not ok:
|
||||
raise RuntimeError("CapSolver 处理 Turnstile 失败")
|
||||
|
||||
logbtn = self.page.locator("//a[@class='login button']")
|
||||
if logbtn.count() > 0:
|
||||
logbtn.nth(0).click()
|
||||
|
||||
self.page.wait_for_selector("//input[@data-testid=\"emailInput\"]")
|
||||
|
||||
# “我了解”弹窗
|
||||
i_now_btn = self.page.locator("button:has-text(\"I understand\")")
|
||||
if i_now_btn.count() > 0:
|
||||
i_now_btn.click()
|
||||
|
||||
# 输入账号密码
|
||||
email_edit = self.page.locator("//input[@data-testid=\"emailInput\"]")
|
||||
password_edit = self.page.locator("//input[@data-testid=\"passwordInput\"]")
|
||||
if email_edit.count():
|
||||
email_edit.fill(self.email)
|
||||
if password_edit.count():
|
||||
password_edit.fill(self.password)
|
||||
|
||||
# 登录
|
||||
login_btn = self.page.locator('button[form="signin-form"][type="submit"]')
|
||||
try:
|
||||
self.page.wait_for_selector(
|
||||
'button[form="signin-form"][type="submit"]:not([disabled])', timeout=20000
|
||||
)
|
||||
except PlaywrightTimeoutError:
|
||||
pass
|
||||
login_btn.click()
|
||||
|
||||
# 等待跳回
|
||||
self.page.wait_for_url(self.url, timeout=30000)
|
||||
time.sleep(1)
|
||||
self._last_check_ts = time.time()
|
||||
self._last_check_result = True
|
||||
|
||||
def _detect_login(self) -> bool:
|
||||
self.page.goto(self.url, timeout=30000)
|
||||
self.page.wait_for_timeout(3000)
|
||||
return self.page.locator("//a[@class='login button']").count() == 0
|
||||
|
||||
def is_logged_in(self) -> bool:
|
||||
now = time.time()
|
||||
if now - self._last_check_ts < self.check_interval:
|
||||
return self._last_check_result
|
||||
|
||||
try:
|
||||
ok = self._detect_login()
|
||||
except Exception:
|
||||
ok = False
|
||||
|
||||
self._last_check_ts = now
|
||||
self._last_check_result = ok
|
||||
return ok
|
||||
|
||||
def ensure_login(self) -> None:
|
||||
if not self.is_logged_in():
|
||||
self._do_login()
|
||||
|
||||
@require_login
|
||||
def process_ticket(self, lis: list):
|
||||
|
||||
titles = "\r\n"
|
||||
links = ""
|
||||
ids= []
|
||||
title = ""
|
||||
link = ""
|
||||
assignment = True
|
||||
for li in lis:
|
||||
if assignment:
|
||||
title = li['name_title']
|
||||
link = li['link']
|
||||
assignment = False
|
||||
ids.append(li['id'])
|
||||
titles += li['name_title'] + ",\r\n"
|
||||
links += li['link'] + ",\r\n"
|
||||
logger.info(f"Processing ticket for title: {titles}, link: {links}")
|
||||
self.page.goto(self.url, timeout=3000)
|
||||
titles_list = [title.strip() for title in titles.split(',')]
|
||||
unique_titles = list(set(titles_list))
|
||||
unique_titles.sort()
|
||||
titles =",".join(unique_titles) # 去重
|
||||
description = """We request that you take immediate actionto stop the infringing activity, take steps to ensure that iQIYI Content is notre-posted on, re-linked to, or otherwise available through your site. Pleaseinform us of the actions you have taken and their results.
|
||||
1) please help remove these videos
|
||||
2) The drama series titles are {}
|
||||
""".format(titles)
|
||||
# likls = ["\"" + l + "\"" for l in link]
|
||||
# links = ','.join(likls)
|
||||
if self.page.query_selector("div.cf-turnstile[data-sitekey]"):
|
||||
ok = solve_turnstile_capsolver(self.page)
|
||||
if not ok:
|
||||
raise RuntimeError("CapSolver 处理 Turnstile 失败")
|
||||
# file_path = f'screenshots/{str(int(time.time()))}_{title}_{link.split("/")[-1]}.png'
|
||||
# self.page.screenshot(path=file_path)
|
||||
resports = self.page.locator('li.blocks-item:nth-child(8)')
|
||||
resports.click()
|
||||
|
||||
time.sleep(2)
|
||||
|
||||
cc = self.page.locator("input#request_collaborators_")
|
||||
cc.scroll_into_view_if_needed()
|
||||
cc.click()
|
||||
cc.type("duke.chen@dailymotion.com")
|
||||
|
||||
self.page.get_by_role("button", name="Copyright infringement").click()
|
||||
time.sleep(1)
|
||||
self.page.get_by_role("button", name="Notification").nth(0).click()
|
||||
time.sleep(1)
|
||||
self.page.get_by_role("button", name="A legal entity").click()
|
||||
time.sleep(1)
|
||||
self.page.get_by_label("Corporate name").fill("Beijing iQIYI Science & Technology Co.,Ltd")
|
||||
time.sleep(1)
|
||||
self.page.get_by_label("Legal status").fill("Legal Department")
|
||||
time.sleep(1)
|
||||
self.page.get_by_label("Subject").fill("Copyright infringement Notification")
|
||||
time.sleep(1)
|
||||
self.page.get_by_label("Please indicate the URL of the video(s) you would like to report*").fill(links)
|
||||
time.sleep(1)
|
||||
self.page.get_by_label("Description").nth(1).fill(description)
|
||||
time.sleep(1)
|
||||
self.page.get_by_label("I state in good faith", exact=False).check()
|
||||
time.sleep(1)
|
||||
self.page.get_by_label("I state in good faith that the use of the Protected", exact=False).check()
|
||||
time.sleep(1)
|
||||
self.page.get_by_role("checkbox", name="I certify that all information provided", exact=False).check()
|
||||
time.sleep(1)
|
||||
self.page.get_by_role("checkbox", name="I acknowledge that my statements", exact=False).check()
|
||||
time.sleep(1)
|
||||
self.page.get_by_role("checkbox", name="The data provided through this form", exact=False).check()
|
||||
time.sleep(1)
|
||||
self.page.get_by_role("checkbox", name="By submitting the present form,", exact=False).check()
|
||||
time.sleep(1)
|
||||
self.page.get_by_role("textbox", name="electronic signature", exact=False).fill("柴达") # 占位
|
||||
time.sleep(1)
|
||||
self.page.set_input_files('input#request-attachments', [
|
||||
self.file_path,
|
||||
self.file_path2
|
||||
])
|
||||
self.page.wait_for_timeout(8000)
|
||||
self.page.get_by_role("button", name="Submit").click()
|
||||
time.sleep(2)
|
||||
file_path = f'screenshots/{str(int(time.time()))}_{title}_{link.split("/")[-1]}.png'
|
||||
locator = self.page.locator("//dt[normalize-space(.)='Id']/following-sibling::dd[1]")
|
||||
raw_text = locator.text_content()
|
||||
match = re.search(r'\d+', raw_text or '')
|
||||
report_id = match.group() if match else None
|
||||
status_raw = self.page.locator("span.status-label").text_content()
|
||||
subsequent_status = status_raw.strip().lower() if status_raw else None
|
||||
time_elem = self.page.locator("dt", has_text="Created").locator("xpath=following-sibling::dd[1]/time")
|
||||
|
||||
datetime_str = time_elem.get_attribute("datetime") # e.g. 2025-06-12T06:15:33+00:00
|
||||
if datetime_str:
|
||||
dt = datetime.fromisoformat(datetime_str.replace("Z", "+00:00")) # 安全处理 ISO 时间
|
||||
timestamp = int(dt.timestamp())
|
||||
else:
|
||||
timestamp = None
|
||||
self.page.screenshot(path=file_path)
|
||||
if self.page.url != self.url:
|
||||
self.page.goto(self.url, timeout=30000)
|
||||
|
||||
return ids, file_path, report_id, subsequent_status, timestamp
|
||||
|
||||
@require_login
|
||||
def report_follow_up(self, report_id: str):
|
||||
max_retries = 3
|
||||
retry_delay = 2
|
||||
loaded = False
|
||||
subsequent_status = ""
|
||||
|
||||
for attempt in range(max_retries):
|
||||
try:
|
||||
self.page.goto(f"https://faq.dailymotion.com/hc/en-us/requests/{report_id}", timeout=30000)
|
||||
# self.page.wait_for_load_state("networkidle") # 保证页面加载稳定
|
||||
self.page.wait_for_selector("span.status-label", timeout=30000)
|
||||
try:
|
||||
status_raw = self.page.locator("span.status-label").text_content()
|
||||
except Exception as e:
|
||||
print(f"[警告] 获取状态标签失败: {e}")
|
||||
status_raw = None
|
||||
|
||||
subsequent_status = status_raw.strip().lower() if status_raw else None
|
||||
loaded = True
|
||||
break
|
||||
except Exception as e:
|
||||
print(f"[ERROR] 尝试 {attempt + 1}/{max_retries} 失败: {e}")
|
||||
if attempt < max_retries - 1:
|
||||
time.sleep(retry_delay)
|
||||
|
||||
if not loaded:
|
||||
return 1, "页面加载失败"
|
||||
|
||||
txt = (
|
||||
"I am the authorized agent of Beijing iQIYI Technology Co., Ltd., responsible for dealing with "
|
||||
"unauthorized overseas distribution of pirated videos of our works. "
|
||||
"We have confirmed that the above links contain infringing content and we insist on requesting to takedown. Thank you!"
|
||||
)
|
||||
|
||||
if "awaiting your reply" in subsequent_status:
|
||||
span_show = self.page.locator('span.comment-show-container-content')
|
||||
if span_show.count() > 0:
|
||||
span_show.nth(0).click()
|
||||
self.page.wait_for_timeout(1000)
|
||||
|
||||
textarea = self.page.locator('#request_comment_body')
|
||||
textarea.type(txt, delay=30)
|
||||
self.page.wait_for_timeout(1000)
|
||||
self.page.get_by_role("button", name="Submit").click()
|
||||
|
||||
success = self.wait_for_selector_safe("span.status-label", timeout=30000, retries=3)
|
||||
if not success:
|
||||
return 1, "提交后未检测到状态更新"
|
||||
|
||||
span_show = self.page.locator('span.comment-show-container-content')
|
||||
if span_show.count() > 0:
|
||||
span_show.nth(0).click()
|
||||
pic_path = f'screenshots/{str(int(time.time()))}_{report_id}.png'
|
||||
self.page.screenshot(path=pic_path)
|
||||
return 0, pic_path
|
||||
|
||||
elif "open" in subsequent_status:
|
||||
return 1, ""
|
||||
|
||||
elif "solved" in subsequent_status:
|
||||
return 2, ""
|
||||
|
||||
return 0, "未知状态"
|
||||
|
||||
def wait_for_selector_safe(self, selector: str, timeout=30000, retries=3, retry_delay=2):
|
||||
for i in range(retries):
|
||||
try:
|
||||
self.page.wait_for_selector(selector, timeout=timeout)
|
||||
return True
|
||||
except Exception as e:
|
||||
print(f"[重试] 第 {i + 1}/{retries} 次等待 {selector} 失败: {e}")
|
||||
if i < retries - 1:
|
||||
time.sleep(retry_delay)
|
||||
return False
|
||||
|
||||
@require_login
|
||||
def test(self):
|
||||
logger.info(f"Testing DailymotionClient with email: {self.email}")
|
||||
self.page.goto(self.url, timeout=30000)
|
||||
file_path = f'screenshots/{str(int(time.time()))}_test.png'
|
||||
self.page.screenshot(path=file_path)
|
||||
self.page.wait_for_timeout(1000)
|
||||
file_path = f"screenshots/{str(int(time.time()))}_test2.png"
|
||||
self.page.screenshot(path=file_path)
|
||||
logger.info(f"Test screenshot saved to {file_path}")
|
||||
self.page.wait_for_timeout(1000)
|
||||
file_path = f"screenshots/{str(int(time.time()))}_test3.png"
|
||||
self.page.screenshot(path=file_path)
|
||||
logger.info(f"Test screenshot saved to {file_path}")
|
||||
|
||||
def close(self):
|
||||
try:
|
||||
self.page.close()
|
||||
except Exception:
|
||||
pass
|
||||
try:
|
||||
self.browser.close()
|
||||
except Exception:
|
||||
pass
|
||||
try:
|
||||
self._pw.stop()
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
dm = DailymotionClient("copyright@qiyi.com", "ppsIQIYI2018@")
|
||||
# dm.process_ticket("恋爱学园","https://www.dailymotion.com/video/x9lfr24")
|
||||
dm.report_follow_up("13566")
|
@ -1,66 +1,10 @@
|
||||
import time
|
||||
|
||||
import requests
|
||||
import json
|
||||
import redis
|
||||
import requests
|
||||
import urllib3
|
||||
from matplotlib.artist import allow_rasterization
|
||||
from requests.adapters import HTTPAdapter
|
||||
from urllib3.util.retry import Retry
|
||||
from typing import Optional, Dict, Any, Union
|
||||
|
||||
|
||||
class HttpClient:
|
||||
def __init__(self, max_retries: int = 3, backoff_factor: float = 0.5):
|
||||
self.session = requests.Session()
|
||||
# 配置重试策略
|
||||
retry_strategy = Retry(
|
||||
total=max_retries,
|
||||
backoff_factor=backoff_factor,
|
||||
status_forcelist=[500, 502, 503, 504, 429]
|
||||
)
|
||||
|
||||
adapter = HTTPAdapter(max_retries=retry_strategy)
|
||||
self.session.mount("http://", adapter)
|
||||
self.session.mount("https://", adapter)
|
||||
|
||||
def request(self,
|
||||
method: str,
|
||||
url: str,
|
||||
headers: Optional[Dict] = None,
|
||||
params: Optional[Dict] = None,
|
||||
data: Optional[Union[Dict, str]] = None,
|
||||
cookies: Optional[Dict] = None,
|
||||
allow_redirects: bool = True,
|
||||
timeout: int = 30,
|
||||
**kwargs) -> requests.Response:
|
||||
try:
|
||||
response = self.session.request(
|
||||
method=method,
|
||||
url=url,
|
||||
headers=headers,
|
||||
params=params,
|
||||
data=data,
|
||||
cookies=cookies,
|
||||
allow_redirects=allow_redirects,
|
||||
timeout=timeout,
|
||||
**kwargs
|
||||
)
|
||||
response.raise_for_status()
|
||||
return response
|
||||
except requests.exceptions.RequestException as e:
|
||||
print(f"请求失败: {url}, 错误: {str(e)}")
|
||||
raise
|
||||
|
||||
def get(self, url: str, **kwargs) -> requests.Response:
|
||||
return self.request("GET", url, **kwargs)
|
||||
|
||||
def post(self, url: str, **kwargs) -> requests.Response:
|
||||
return self.request("POST", url, **kwargs)
|
||||
|
||||
|
||||
# 创建全局的 HTTP 客户端实例
|
||||
http_client = HttpClient()
|
||||
|
||||
session = requests.Session()
|
||||
_REDIS_CONF = {
|
||||
"host": "192.144.230.75",
|
||||
"port": 6379,
|
||||
@ -88,197 +32,151 @@ def get_report_token(key_name: str):
|
||||
|
||||
|
||||
def login():
|
||||
try:
|
||||
headers = {
|
||||
"Accept": "*/*",
|
||||
"Accept-Language": "zh-CN,zh;q=0.9",
|
||||
"Cache-Control": "no-cache",
|
||||
"Connection": "keep-alive",
|
||||
"Content-Type": "application/x-www-form-urlencoded",
|
||||
"Origin": "https://www.dailymotion.com",
|
||||
"Pragma": "no-cache",
|
||||
"Referer": "https://www.dailymotion.com/",
|
||||
"Sec-Fetch-Dest": "empty",
|
||||
"Sec-Fetch-Mode": "cors",
|
||||
"Sec-Fetch-Site": "same-site",
|
||||
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/136.0.0.0 Safari/537.36 Edg/136.0.0.0",
|
||||
"sec-ch-ua": "\"Chromium\";v=\"136\", \"Microsoft Edge\";v=\"136\", \"Not.A/Brand\";v=\"99\"",
|
||||
"sec-ch-ua-mobile": "?0",
|
||||
"sec-ch-ua-platform": "\"Windows\""
|
||||
}
|
||||
url = "https://graphql.api.dailymotion.com/oauth/token"
|
||||
data = {
|
||||
"client_id": "f1a362d288c1b98099c7",
|
||||
"client_secret": "eea605b96e01c796ff369935357eca920c5da4c5",
|
||||
"grant_type": "password",
|
||||
"username": "copyright@qiyi.com",
|
||||
"password": "ppsIQIYI2018@",
|
||||
"scope": "userinfo,email,manage_subscriptions,manage_history,manage_likes,manage_playlists,manage_videos",
|
||||
"version": "2",
|
||||
"traffic_segment": "962042",
|
||||
"visitor_id": "359703fb-66c2-43d2-bd0d-b1cac9c7ae8a"
|
||||
}
|
||||
response = http_client.post(url, headers=headers, data=data)
|
||||
data = {
|
||||
"update_time": int(time.time()),
|
||||
"username": "copyright@qiyi.com",
|
||||
"password": "ppsIQIYI2018@",
|
||||
"token": response.json()
|
||||
}
|
||||
save_report_token('token', data)
|
||||
return data
|
||||
except Exception as e:
|
||||
print(f"登录失败: {str(e)}")
|
||||
raise
|
||||
|
||||
|
||||
def refresh_token(access_token, refresh_token):
|
||||
headers = {
|
||||
"Accept": "*/*",
|
||||
"Accept-Language": "zh-CN,zh;q=0.9",
|
||||
"Cache-Control": "no-cache",
|
||||
"Connection": "keep-alive",
|
||||
"Content-Length": "0",
|
||||
"Content-Type": "application/x-www-form-urlencoded",
|
||||
"Origin": "https://www.dailymotion.com",
|
||||
"Pragma": "no-cache",
|
||||
"Referer": "https://www.dailymotion.com/signin?urlback=%2Fzendesk%3Ftimestamp%3D1748932650%26return_to%3Dhttps%253A%252F%252Ffaq.dailymotion.com%252Fhc%252Fen-us%252Frequests%252Fnew",
|
||||
"Referer": "https://www.dailymotion.com/",
|
||||
"Sec-Fetch-Dest": "empty",
|
||||
"Sec-Fetch-Mode": "cors",
|
||||
"Sec-Fetch-Site": "same-origin",
|
||||
"Sec-Fetch-Site": "same-site",
|
||||
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/136.0.0.0 Safari/537.36 Edg/136.0.0.0",
|
||||
"sec-ch-ua": "\"Chromium\";v=\"136\", \"Microsoft Edge\";v=\"136\", \"Not.A/Brand\";v=\"99\"",
|
||||
"sec-ch-ua-mobile": "?0",
|
||||
"sec-ch-ua-platform": "\"Windows\""
|
||||
}
|
||||
url = "https://graphql.api.dailymotion.com/oauth/token"
|
||||
data = {
|
||||
"client_id": "f1a362d288c1b98099c7",
|
||||
"client_secret": "eea605b96e01c796ff369935357eca920c5da4c5",
|
||||
"grant_type": "password",
|
||||
"username": "copyright@qiyi.com",
|
||||
"password": "ppsIQIYI2018@",
|
||||
"scope": "userinfo,email,manage_subscriptions,manage_history,manage_likes,manage_playlists,manage_videos",
|
||||
"version": "2",
|
||||
"traffic_segment": "962042",
|
||||
"visitor_id": "359703fb-66c2-43d2-bd0d-b1cac9c7ae8a"
|
||||
}
|
||||
response = session.post(url, headers=headers, data=data)
|
||||
data = {
|
||||
"update_time": int(time.time()),
|
||||
"username": "copyright@qiyi.com",
|
||||
"password": "ppsIQIYI2018@",
|
||||
"token": response.json()
|
||||
}
|
||||
save_report_token('token', data)
|
||||
return data
|
||||
|
||||
|
||||
def get_cookies(access_token: str, refresh_token: str):
|
||||
cookies = {
|
||||
"dmvk": "683e982c34e34",
|
||||
"ts": "133696",
|
||||
"v1st": "a847389a-6b91-4157-948f-457666f7172b",
|
||||
"ff": "on",
|
||||
"lang": "zh_CN",
|
||||
"usprivacy": "1---",
|
||||
"dmaid": "73ca37e4-6858-46c1-aac4-a4a5fc9a270e",
|
||||
"cookie_policy_closed": "1",
|
||||
"access_token": access_token,
|
||||
"refresh_token": refresh_token,
|
||||
}
|
||||
url = "https://www.dailymotion.com/cookie/refresh_token"
|
||||
response = http_client.post(url, headers=headers, cookies=cookies)
|
||||
session.post(url, cookies=cookies, allow_redirects=True)
|
||||
|
||||
|
||||
def zendesk():
|
||||
headers = {
|
||||
"Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.7",
|
||||
"Accept-Language": "zh-CN,zh;q=0.9",
|
||||
"Cache-Control": "no-cache",
|
||||
"Connection": "keep-alive",
|
||||
"Pragma": "no-cache",
|
||||
"Referer": "https://www.dailymotion.com/sg",
|
||||
"Sec-Fetch-Dest": "document",
|
||||
"Sec-Fetch-Mode": "navigate",
|
||||
"Sec-Fetch-Site": "same-origin",
|
||||
"Sec-Fetch-User": "?1",
|
||||
"Upgrade-Insecure-Requests": "1",
|
||||
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/136.0.0.0 Safari/537.36 Edg/136.0.0.0",
|
||||
"sec-ch-ua": "\"Chromium\";v=\"136\", \"Microsoft Edge\";v=\"136\", \"Not.A/Brand\";v=\"99\"",
|
||||
"sec-ch-ua-mobile": "?0",
|
||||
"sec-ch-ua-platform": "\"Windows\""
|
||||
def get_cookies1(access_token: str, refresh_token: str):
|
||||
"""302 跳转"""
|
||||
cookies = {
|
||||
"access_token": access_token,
|
||||
"refresh_token": refresh_token,
|
||||
}
|
||||
url = "https://www.dailymotion.com/zendesk"
|
||||
params = {
|
||||
"return_to": "https://faq.dailymotion.com/hc/en-us/requests/new",
|
||||
"timestamp": str(time.time()),
|
||||
"timestamp": str(int(time.time())),
|
||||
}
|
||||
response = http_client.get(url, headers=headers, params=params, allow_redirects=True)
|
||||
data = http_client.session.cookies.get_dict()
|
||||
data['update_time'] = int(time.time())
|
||||
save_report_token('cookies', data)
|
||||
session.get(url, cookies=cookies, params=params, allow_redirects=True)
|
||||
cookies_dict = {"update_time": int(time.time()), "cookies": session.cookies.get_dict()}
|
||||
save_report_token('cookies', cookies_dict)
|
||||
return cookies_dict
|
||||
|
||||
|
||||
def get_csrftoken():
|
||||
try:
|
||||
url = "https://faq.dailymotion.com/hc/api/internal/csrf_token.json"
|
||||
headers = {
|
||||
"Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.7",
|
||||
"Accept-Language": "zh-CN,zh;q=0.9",
|
||||
"Cache-Control": "no-cache",
|
||||
"Connection": "keep-alive",
|
||||
"Pragma": "no-cache",
|
||||
"Referer": "https://www.dailymotion.com/sg",
|
||||
"Sec-Fetch-Dest": "document",
|
||||
"Sec-Fetch-Mode": "navigate",
|
||||
"Sec-Fetch-Site": "same-origin",
|
||||
"Sec-Fetch-User": "?1",
|
||||
"Upgrade-Insecure-Requests": "1",
|
||||
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/136.0.0.0 Safari/537.36 Edg/136.0.0.0",
|
||||
"sec-ch-ua": "\"Chromium\";v=\"136\", \"Microsoft Edge\";v=\"136\", \"Not.A/Brand\";v=\"99\"",
|
||||
"sec-ch-ua-mobile": "?0",
|
||||
"sec-ch-ua-platform": "\"Windows\""
|
||||
}
|
||||
response = http_client.get(url, headers=headers)
|
||||
data = {"update_time": int(time.time()), "csrf_token": response.json()}
|
||||
save_report_token('csrf_token', data)
|
||||
return data
|
||||
except Exception as e:
|
||||
print(f"获取 CSRF token 失败: {str(e)}")
|
||||
raise
|
||||
url = "https://faq.dailymotion.com/hc/api/internal/csrf_token.json"
|
||||
response = session.get(url)
|
||||
data = {"update_time": int(time.time()), "csrf_token": response.json()}
|
||||
save_report_token('csrf_token', data)
|
||||
return data
|
||||
|
||||
|
||||
def report(csrf_token: str, v_url, title):
|
||||
try:
|
||||
headers = {
|
||||
"accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.7",
|
||||
"accept-language": "zh-CN,zh;q=0.9",
|
||||
"cache-control": "no-cache",
|
||||
"content-type": "application/x-www-form-urlencoded",
|
||||
"origin": "https://faq.dailymotion.com",
|
||||
"pragma": "no-cache",
|
||||
"priority": "u=0, i",
|
||||
"referer": "https://faq.dailymotion.com/hc/en-us/requests/new?ticket_form_id=136048",
|
||||
"sec-ch-ua": "\"Chromium\";v=\"136\", \"Microsoft Edge\";v=\"136\", \"Not.A/Brand\";v=\"99\"",
|
||||
"sec-ch-ua-mobile": "?0",
|
||||
"sec-ch-ua-platform": "\"Windows\"",
|
||||
"sec-fetch-dest": "document",
|
||||
"sec-fetch-mode": "navigate",
|
||||
"sec-fetch-site": "same-origin",
|
||||
"sec-fetch-user": "?1",
|
||||
"upgrade-insecure-requests": "1",
|
||||
"user-agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/136.0.0.0 Safari/537.36 Edg/136.0.0.0"
|
||||
}
|
||||
url = "https://faq.dailymotion.com/hc/en-us/requests"
|
||||
data = {
|
||||
"utf8": "✓",
|
||||
"authenticity_token": csrf_token,
|
||||
"request%5Bticket_form_id%5D": "136048",
|
||||
"request%5Bcollaborators%5D%5B%5D": "duke.chen@dailymotion.com",
|
||||
"request%5Bcustom_fields%5D%5B360008684839%5D": "__dc.copyright_user_protection_-_copyright__",
|
||||
"request%5Bcustom_fields%5D%5B30150188%5D": "copyrightform-notification",
|
||||
"request%5Bcustom_fields%5D%5B25089567%5D": "legal_entity",
|
||||
"request%5Bcustom_fields%5D%5B25159868%5D": "Beijing iQIYI Science & Technology Co.,Ltd",
|
||||
"request%5Bcustom_fields%5D%5B4869133282962%5D": "Legal Department",
|
||||
"request%5Bsubject%5D": "Copyright infringement Notification",
|
||||
"request%5Bcustom_fields%5D%5B25613698%5D": v_url,
|
||||
"request%5Bdescription%5D": f"We request that you take immediate actionto stop the infringing activity, take steps to ensure that iQIYI Content is notre-posted on, re-linked to, or otherwise available through your site. Pleaseinform us of the actions you have taken and their results.\r\n1) please help remove these videos\r\n2) The drama series titles are \"{title}\"\r\n",
|
||||
"request%5Bdescription_mimetype%5D": "text/plain",
|
||||
"request%5Bcustom_fields%5D%5B4769880845586%5D": "on",
|
||||
"request%5Bcustom_fields%5D%5B25626417%5D": "on",
|
||||
"request%5Bcustom_fields%5D%5B4769797363346%5D": "on",
|
||||
"request%5Bcustom_fields%5D%5B25159848%5D": "on",
|
||||
"request%5Bcustom_fields%5D%5B4769658191250%5D": "on"
|
||||
}
|
||||
response = requests.post(url, headers=headers, data=data)
|
||||
print(response.status_code)
|
||||
print(response.text)
|
||||
print(response)
|
||||
return response.status_code == 200
|
||||
except Exception as e:
|
||||
print(f"提交报告失败: {str(e)}")
|
||||
raise
|
||||
def report(csrf_token:str, cookies:dict, ):
|
||||
headers = {
|
||||
'accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.7',
|
||||
'accept-language': 'zh-CN,zh;q=0.9',
|
||||
'cache-control': 'no-cache',
|
||||
'content-type': 'application/x-www-form-urlencoded',
|
||||
'origin': 'https://faq.dailymotion.com',
|
||||
'pragma': 'no-cache',
|
||||
'priority': 'u=0, i',
|
||||
'referer': 'https://faq.dailymotion.com/hc/en-us/requests/new?ticket_form_id=136048',
|
||||
'sec-ch-ua': '"Chromium";v="136", "Google Chrome";v="136", "Not.A/Brand";v="99"',
|
||||
'sec-ch-ua-mobile': '?0',
|
||||
'sec-ch-ua-platform': '"Windows"',
|
||||
'sec-fetch-dest': 'document',
|
||||
'sec-fetch-mode': 'navigate',
|
||||
'sec-fetch-site': 'same-origin',
|
||||
'sec-fetch-user': '?1',
|
||||
'upgrade-insecure-requests': '1',
|
||||
'user-agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/136.0.0.0 Safari/537.36',
|
||||
}
|
||||
|
||||
data = {
|
||||
'utf8': '✓',
|
||||
'authenticity_token': csrf_token,
|
||||
'request[ticket_form_id]': '136048',
|
||||
'request[collaborators][]': 'duke.chen@dailymotion.com',
|
||||
'request[custom_fields][360008684839]': '__dc.copyright_user_protection_-_copyright__',
|
||||
'request[custom_fields][30150188]': 'copyrightform-notification',
|
||||
'request[custom_fields][25089567]': 'legal_entity',
|
||||
'request[custom_fields][25159868]': 'Beijing iQIYI Science & Technology Co.,Ltd',
|
||||
'request[custom_fields][4869133282962]': 'Legal Department',
|
||||
'request[subject]': 'Copyright infringement Notification',
|
||||
'request[custom_fields][25613698]': 'url',
|
||||
'request[description]': 'We request that you take immediate actionto stop the infringing activity, take steps to ensure that iQIYI Content is notre-posted on, re-linked to, or otherwise available through your site. Pleaseinform us of the actions you have taken and their results.\r\n1) please help remove these videos\r\n2) The drama series titles are 片名\r\n',
|
||||
'request[description_mimetype]': 'text/plain',
|
||||
'request[custom_fields][4769880845586]': [
|
||||
'off',
|
||||
'on',
|
||||
],
|
||||
'request[custom_fields][25626417]': [
|
||||
'off',
|
||||
'on',
|
||||
],
|
||||
'request[custom_fields][4769797363346]': [
|
||||
'off',
|
||||
'on',
|
||||
],
|
||||
'request[custom_fields][25159848]': [
|
||||
'off',
|
||||
'on',
|
||||
],
|
||||
'request[custom_fields][4769658191250]': [
|
||||
'off',
|
||||
'on',
|
||||
],
|
||||
}
|
||||
|
||||
response = requests.post('https://faq.dailymotion.com/hc/en-us/requests', cookies=cookies, headers=headers, data=data)
|
||||
|
||||
|
||||
def prepare_data():
|
||||
token = get_report_token('token')
|
||||
cookies = get_report_token('cookies')
|
||||
csrf_token = get_report_token('csrf_token')
|
||||
max_update_time = max(d.get('update_time', 0) for d in (token, cookies, csrf_token) if d)
|
||||
if max_update_time + (24 * 60 * 60) < time.time():
|
||||
token = get_report_token('token')
|
||||
access_token = token['token']['access_token']
|
||||
refresh_token = token['token']['refresh_token']
|
||||
get_cookies(access_token, refresh_token)
|
||||
get_cookies1(access_token, refresh_token)
|
||||
csrf_token = get_csrftoken()
|
||||
|
||||
if __name__ == '__main__':
|
||||
cookies = get_report_token('cookies')['cookies']
|
||||
http_client.session.cookies = requests.utils.cookiejar_from_dict(cookies)
|
||||
csrf_token = get_csrftoken()['csrf_token']['current_session']['csrf_token']
|
||||
report(csrf_token, 'Hunter X Hunter', 'https://www.dailymotion.com/video/x8kjx7v')
|
||||
report(csrf_token['csrf_token']['current_session']['csrf_token'], cookies['cookies'])
|
@ -4,7 +4,8 @@ charset-normalizer==3.4.2
|
||||
et-xmlfile==1.1.0
|
||||
idna==3.10
|
||||
importlib-metadata==6.7.0
|
||||
numpy==2.3.0
|
||||
lxml==5.4.0
|
||||
numpy==1.21.6
|
||||
openpyxl==3.1.3
|
||||
pandas==1.3.5
|
||||
pkg_resources==0.0.0
|
||||
|
@ -1,35 +0,0 @@
|
||||
from flask import Flask, send_file, abort, request, jsonify
|
||||
from pathlib import Path
|
||||
|
||||
app = Flask(__name__)
|
||||
|
||||
PROJECT_ROOT = Path(__file__).parent.resolve()
|
||||
SCREENSHOTS_DIR = Path("/opt/ql/daily_com/bin/screenshots").resolve()
|
||||
|
||||
@app.route('/image/screenshots/<path:filename>')
|
||||
def serve_image(filename):
|
||||
file_path = SCREENSHOTS_DIR / filename
|
||||
|
||||
# 防止路径越界访问
|
||||
try:
|
||||
file_path.resolve().relative_to(SCREENSHOTS_DIR.resolve())
|
||||
except ValueError:
|
||||
abort(403, description=f"禁止访问目录外文件: {file_path.resolve()}")
|
||||
|
||||
if not file_path.exists():
|
||||
abort(404, description=f"文件不存在: {file_path.resolve()}")
|
||||
|
||||
return send_file(file_path, as_attachment=False)
|
||||
|
||||
# 自定义 404 错误响应
|
||||
@app.errorhandler(404)
|
||||
def handle_404(e):
|
||||
return f"404 错误:{e.description}", 404
|
||||
|
||||
# 自定义 403 错误响应
|
||||
@app.errorhandler(403)
|
||||
def handle_403(e):
|
||||
return f"403 错误:{e.description}", 403
|
||||
|
||||
if __name__ == '__main__':
|
||||
app.run(host='0.0.0.0', debug=False, port=5000)
|
6
test2.py
6
test2.py
@ -1,6 +0,0 @@
|
||||
from DB import DBVidcon
|
||||
|
||||
db = DBVidcon()
|
||||
|
||||
account = db.get_account_info('4')
|
||||
print(account)
|
@ -1,10 +0,0 @@
|
||||
from DB import DBVidcon
|
||||
from logger import logger
|
||||
|
||||
|
||||
db = DBVidcon()
|
||||
|
||||
logger.info("开始更新视频举报状态")
|
||||
db.update_video_ts_status()
|
||||
db.close()
|
||||
logger.info("更改视频举报状态完成")
|
Loading…
x
Reference in New Issue
Block a user