feat: 添加视频重复检测功能，支持Redis存储视频键

2025-06-02 22:23:44 +08:00
22 changed files with 487 additions and 2409 deletions
--- a/DB.py
+++ b/DB.py
--- a/dailymotion_status_monitor.py
+++ b/dailymotion_status_monitor.py
@ -1,38 +0,0 @@
 from DB import DBVidcon
 import requests
 from logger import logger
 db = DBVidcon()
 def check_video_removed(video_id):
    url = f"https://api.dailymotion.com/video/{video_id}"
    params = {"fields": "published,private,status"}
    resp = requests.get(url, params=params, timeout=10)
    # 404 -> 不存在或已被删除
    if resp.status_code == 404:
        return 1
    data = resp.json()
    # published=False 或 private=True 都视作“已下架”
    if not data.get("published", False) or data.get("private", False):
        return 1
    return 0
 def main():
    lis = db.getreport_video()
    for li in lis:
        video_id = li['v_xid']
        status = check_video_removed(video_id)
        if status == 1:
            db.mark_video_removed(li['id'], status)
            logger.info(f"视频id {video_id} 下架")
        else:
            db.mark_video_removed(li['id'], status)
            logger.info(f"视频id {video_id} 仍然存在")
 if __name__ == '__main__':
    main()
--- a/dump_keyword_title.py
+++ b/dump_keyword_title.py
@ -1,6 +1,6 @@
 import json, time
 import argparse
-from DB import DBVidcon
+from DB import DBVidcon, DBSA
 def parse_args():
    parser = argparse.ArgumentParser(
@ -14,8 +14,10 @@ def main():
    args   = parse_args()
    batch  = int(time.time())
    db     = DBVidcon()
-    push   = None
+
-    empty  = None
+    for chunk in DBSA.stream_video_keys(chunk_size=10_000):
        db.cache_video_keys_bulk(chunk)
        print(f"同步Redis=={len(chunk)}")
    if args.level == 0:
        push = db.push_l0
--- a/dump_keyword_web.py
+++ b/dump_keyword_web.py
@ -1,40 +0,0 @@
 import json, time
 import argparse
 from DB import DBVidcon
 def parse_args():
    parser = argparse.ArgumentParser(
        description="Dump keyword/title rows into Redis list."
    )
    parser.add_argument("-l", "--level", type=int, default=99,
                        help="value for t.level (default: 99)")
    return parser.parse_args()
 def main():
    batch  = int(time.time())
    db     = DBVidcon()
    push = db.push_web
    empty = db.web_empty
    if empty():
        rows = db.fetch_keyword_title(level=0)
        payload_list = []
        for row in rows:
            payload_list.append(json.dumps({**row, "batch": batch}, ensure_ascii=False))
            if len(payload_list) >= 10000:
                push(payload_list)
                payload_list.clear()
        if payload_list:          # 收尾
            push(payload_list)
        data = {
            "level": 0,
            "batch": batch,
            "count": len(rows),
        }
        db.log_batch_start(data)
        print(f"✔ 推送 {len(rows)} 行（batch={batch}）到 {push.__name__}队列完毕")
    db.close()
 if __name__ == "__main__":
    main()
--- a/flask_test.py
+++ b/flask_test.py
@ -1,57 +0,0 @@
 import requests
 from flask import Flask, request, jsonify
 from DB import DBVidcon
 app = Flask(__name__)
 endpoint = "https://api.dailymotion.com/videos"
 DEFAULT_PAGE = 1
 FIXED_LIMIT = 100
 VALID_SORTS = {
    'recent', 'relevance', 'alpha', 'alphaaz',
    'alphaza', 'most', 'least', 'changed'
 }
 db = DBVidcon()
@app.route("/get", methods=["GET"])
 def get_videos():
    keyword = request.args.get("keyword", "").strip()
    if not keyword:
        return jsonify({"status": "error", "msg": "keyword 参数不能为空"}), 400
    # 页码和国家参数
    i = request.args.get("page", DEFAULT_PAGE, type=int)
    rn = request.args.get("rn", "US").upper()
    # 排序参数，必须合法
    sort = request.args.get("sort", "relevance").strip().lower()
    if sort not in VALID_SORTS:
        return jsonify({
            "status": "error",
            "msg": f"sort 参数非法，可选值: {sorted(VALID_SORTS)}"
        }), 400
    proxy_string = db.get_proxy(rn)
    proxies = {"http": proxy_string, "https": proxy_string} if proxy_string else None
    params = {
        "search": keyword,
        "fields": "id,title,created_time,thumbnail_240_url,duration,"
                  "owner.id,owner.screenname,likes_total,views_total",
        "limit": FIXED_LIMIT,
        "page": i,
        "sort": sort
    }
    try:
        resp = requests.get(endpoint, params=params, proxies=proxies, timeout=10)
        resp.raise_for_status()
        jd = resp.json()
        return jsonify(jd), 200
    except requests.exceptions.RequestException as e:
        return jsonify({"status": "error", "msg": str(e)}), 502
 if __name__ == "__main__":
    app.run(host="0.0.0.0", port=8000, debug=False)
--- a/is_offline_api.py
+++ b/is_offline_api.py
@ -1,51 +0,0 @@
 #!/usr/bin/env python3
 # app.py
 import requests
 from flask import Flask, jsonify, abort
 app = Flask(__name__)
 def check_video_removed(video_id):
    """
    调用 Dailymotion API 判断视频是否已下架／删除
    返回:
        1 → 已被删除 / 不存在 / 已下架 / 设为私有
        0 → 正常公开中
    """
    url = f"https://api.dailymotion.com/video/{video_id}"
    params = {"fields": "published,private,status"}
    try:
        resp = requests.get(url, params=params, timeout=10)
    except requests.RequestException as exc:
        # 网络错误时返回 503，让上游知道需要重试
        abort(503, description=f"Upstream request failed: {exc}")
    # 404 → 不存在或已被删除
    if resp.status_code == 404:
        return 1
    # 其他非 2xx 状态码 → 直接透传给客户端
    if resp.status_code // 100 != 2:
        abort(resp.status_code, description=resp.text)
    data = resp.json()
    # published=False 或 private=True 都视作“已下架”
    if not data.get("published", False) or data.get("private", False):
        return 1
    return 0
@app.route("/video/<video_id>", methods=["GET"])
 def video_status(video_id):
    removed = check_video_removed(video_id)
    return jsonify({"video_id": video_id, "removed": removed})
 if __name__ == "__main__":
    # 支持通过环境变量覆盖监听地址和端口
    import os
    host = os.getenv("HOST", "0.0.0.0")
    port = 5100
    app.run(host=host, port=port, debug=False)
--- a/kill_main.sh
+++ b/kill_main.sh
@ -1,18 +0,0 @@
 #!/usr/bin/env bash
 # -------- 可按需修改 --------
 TARGET="/opt/ql/DailyMotion/main.py"   # 关键字：精确到脚本路径即可
 SIG="9"                                # 信号；默认 -9，想温和一点改成 15
 # --------------------------------
 pids=$(pgrep -f "$TARGET")
 if [ -z "$pids" ]; then
    echo "没有发现正在运行的 $TARGET"
    exit 0
 fi
 echo "即将发送 SIG${SIG:-15} 到进程: $pids"
 kill "-${SIG:-15}" $pids
 echo "完成"
--- a/main.py
+++ b/main.py
@ -1,4 +1,3 @@
 #!/opt/ql/daily_com/bin/python3
 import base64
 import traceback
 import argparse
@ -56,15 +55,15 @@ def format_duration(seconds):
        return "00:00"
-def get_searchInfo(keyword, level, headers, proxy_name, r=2):
+def get_searchInfo(keyword, level, rn, proxy_name, r=2):
    if r == 2:
        logger.info(f"NET处理->{keyword},\trn->{proxy_name},\tlevel->{level}")
    video_list = []
-    max_page = 3
+    max_page = 2
-    limit = 100
+    limit = 30
    endpoint = 'https://api.dailymotion.com/videos'
    if level == 0 or level == 1:
-        max_page = 4
+        max_page = 3
        limit = 100
    for j in range(1, max_page):
        params = {
@ -88,7 +87,7 @@ def get_searchInfo(keyword, level, headers, proxy_name, r=2):
                logger.exception(f"[Requested] 未知:{e}, keyword: {keyword}, l: {level}")
            else:
                time.sleep((3 - r) * 5)
-                return get_searchInfo(keyword, level, headers, proxy_name, r - 1)
+                return get_searchInfo(keyword, level, rn, proxy_name, r - 1)
        try:
            resinfo = jsondata.get("list")
        except Exception:
@ -98,7 +97,7 @@ def get_searchInfo(keyword, level, headers, proxy_name, r=2):
                return None
            else:
                time.sleep((3 - r) * 5)
-                return get_searchInfo(keyword, level, headers, proxy_name, r - 1)
+                return get_searchInfo(keyword, level, rn, proxy_name, r - 1)
        for index, iteminfo in enumerate(resinfo):
            calculated_index = index + 1 + (j - 1) * limit
            xid = iteminfo["id"]
@ -106,6 +105,10 @@ def get_searchInfo(keyword, level, headers, proxy_name, r=2):
            uxid = iteminfo["owner.id"]
            uid = base64.b64encode(f"Channel:{uxid}".encode('utf-8')).decode('utf-8')
            duration = iteminfo.get('duration')
            is_repeat = 0
            if db.video_key_exists(vid.strip(), rn):
                is_repeat = 1
            if duration <= 300:
                continue
            v_data = {
@ -123,9 +126,11 @@ def get_searchInfo(keyword, level, headers, proxy_name, r=2):
                "u_id": uid,
                "u_xid": uxid,
                "u_name": iteminfo.get('owner.screenname'),
-                "u_pic": iteminfo.get('owner.avatar_60_url')
+                "u_pic": iteminfo.get('owner.avatar_60_url'),
                "is_repeat": is_repeat,
            }
            video_list.append(v_data)
        time.sleep(3)
        if len(video_list) < 100:
            break
    return video_list
@ -137,11 +142,11 @@ proxiesdict = db.get_proxy_agent_dict()
 def search_worker(payload, kitem, flag):
    try:
        gproxies = proxiesdict[kitem['rn']]
-        v_list = get_searchInfo(kitem['keyword'], kitem['level'], None, gproxies)
+        v_list = get_searchInfo(kitem['keyword'], kitem['level'], kitem['rn'], gproxies)
        if not v_list:
            for i in range(2):
                time.sleep(i * 5)
-                v_list = get_searchInfo(kitem['keyword'], kitem['level'], None, gproxies)
+                v_list = get_searchInfo(kitem['keyword'], kitem['level'], kitem['rn'], gproxies)
                if v_list:
                    break
                time.sleep(2)
@ -163,11 +168,10 @@ def integrate_data_parallel():
            time.sleep(10)
            continue
-        futures = []
+        futures = [
-        for payload, kitem in tasks:
+            executor.submit(search_worker, payload, kitem, flag)
-            futures.append(executor.submit(search_worker, payload, kitem, flag))
+            for payload, kitem in tasks
-            time.sleep(1)
+        ]
        rollback = {0: [], 1: [], 2: []}
        for fut in concurrent.futures.as_completed(futures):
@ -178,8 +182,6 @@ def integrate_data_parallel():
                continue
            for item in v_list:
                if not v_list:
                    continue
                DBSA.upsert_video({
                    "keyword": kitem["keyword"],
                    "v_name": kitem["v_name"],
@ -202,6 +204,7 @@ def integrate_data_parallel():
                    "batch": kitem["batch"],
                    "machine_id": MACHINE_ID,
                    "level": kitem["level"],
                    "is_repeat": item['is_repeat']
                })
            DBSA.flush()
        if rollback[0]:
@ -210,6 +213,7 @@ def integrate_data_parallel():
            db.rollback_l1(rollback[1])
        if rollback[2]:
            db.rollback_l2(rollback[2])
        time.sleep(10)
 def parse_args() -> argparse.Namespace:
--- a/mysql_to_xlsx.py
+++ b/mysql_to_xlsx.py
@ -98,9 +98,9 @@ def fetch_all_data_for_rn(rn: str, batches: list[int]) -> pd.DataFrame:
 def export_all():
    # 指定要处理的批次
-    batches = [1748965168, 1749049335]
+    batches = [1747324254, 1747323990]
    # 先更新 is_repeat
-    # update_is_repeat(batches)
+    update_is_repeat(batches)
    rn_list = get_rn_list()
    timestamp = datetime.now().strftime("%Y%m%d")
--- a/oneget.py
+++ b/oneget.py
@ -1,684 +0,0 @@
 import argparse
 import base64
 from datetime import datetime
 import concurrent.futures
 import requests
 import uuid
 import random
 import time
 import copy
 from threading import Lock
 from DB import DBVidcon, DBSA
 import json
 from requests.adapters import HTTPAdapter
 from urllib3.util.retry import Retry
 from dateutil import parser as date_parser
 MACHINE_ID = 0
 db = DBVidcon()
 proxiesdict = db.get_proxy_agent_dict()
 class RetryRequests:
    def __init__(
            self,
            proxies: dict = None,
            timeout: int = 10,
            total: int = 3,
            backoff_factor: float = 1.0,
            status_forcelist: tuple = (500, 502, 503, 504),
            allowed_methods: tuple = ("GET", "POST"),
    ):
        self.session = requests.Session()
        self.timeout = timeout
        self.proxies = proxies
        retry = Retry(
            total=total,
            backoff_factor=backoff_factor,
            status_forcelist=status_forcelist,
            allowed_methods=allowed_methods,
            raise_on_status=False
        )
        adapter = HTTPAdapter(max_retries=retry)
        self.session.mount("http://", adapter)
        self.session.mount("https://", adapter)
    def get(self, url, **kwargs):
        kwargs.setdefault("timeout", self.timeout)
        if self.proxies:
            kwargs.setdefault("proxies", self.proxies)
        return self.session.get(url, **kwargs)
    def post(self, url, **kwargs):
        kwargs.setdefault("timeout", self.timeout)
        if self.proxies:
            kwargs.setdefault("proxies", self.proxies)
        return self.session.post(url, **kwargs)
 req = RetryRequests()
 def clean_dash_to_zero(val):
    if val in ('-', '', None):
        return 0
    try:
        return int(val)
    except (ValueError, TypeError) as e:
        print(f"[字段异常] val = {val} → {str(e)}")
        return 0
 def format_create_time(timestr):
    try:
        dt = date_parser.isoparse(timestr)
        return dt.strftime("%Y-%m-%d %H:%M:%S")
    except Exception as e:
        print(f"[时间格式错误] {timestr} → {str(e)}")
        return "1970-01-01 00:00:00"
 def format_duration(seconds):
    try:
        seconds = int(seconds)
        return f"{seconds // 60:02}:{seconds % 60:02}"
    except Exception:
        return "00:00"
 class DMHeaderManager:
    _headers_template = {
        'Accept': '*/*, */*',
        'Cache-Control': 'no-cache',
        'Connection': 'keep-alive',
        'Content-Type': 'application/json, application/json',
        'Host': 'graphql.api.dailymotion.com',
        'Origin': 'https://www.dailymotion.com',
        'Referer': 'https://www.dailymotion.com/',
        'Sec-Fetch-Dest': 'empty',
        'Sec-Fetch-Mode': 'cors',
        'Sec-Fetch-Site': 'same-site',
        'User-Agent': 'Mozilla/5.0',
        'X-DM-AppInfo-Id': 'com.dailymotion.neon',
        'X-DM-AppInfo-Type': 'website',
        'X-DM-AppInfo-Version': 'v2025-05-26T13:45:05.666Z',
        'X-DM-Neon-SSR': '0',
        'X-DM-Preferred-Country': 'tw',
        'accept-language': 'zh-CN',
        'authorization': '',
        'sec-ch-ua': '"Chromium";v="128", "Not;A=Brand";v="24", "Google Chrome";v="128"',
        'sec-ch-ua-mobile': '?0',
        'sec-ch-ua-platform': '"Windows"',
        'x-dm-visit-id': '',
        'x-dm-visitor-id': '',
    }
    _user_agents = [
        'Mozilla/5.0 (Windows; U; Windows NT 6.1; en-us) AppleWebKit/534.50 (KHTML, like Gecko) Version/5.1 Safari/534.50',
        'Mozilla/5.0 (Windows NT 6.1; rv:2.0.1) Gecko/20100101 Firefox/4.0.1',
        'Opera/9.80 (Windows NT 6.1; U; en) Presto/2.8.131 Version/11.11',
        'Mozilla/4.0 (compatible; MSIE 7.0; Windows NT 5.1; 360SE)',
        'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/136.0.0.0 Safari/537.36',
        'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/128.0.0.0 Safari/537.36',
    ]
    def __init__(self, proxies: dict = None):
        self._headers_cache = None
        self._cache_lock = Lock()
        self._proxies = proxies
    def get_headers(self, retry: int = 2) -> dict:
        visitor_id = str(uuid.uuid4())
        visit_id = str(int(time.time() * 1000))
        traffic_segment = str(random.randint(100_000, 999_999))
        ua = random.choice(self._user_agents)
        token_headers = {
            'Accept': '*/*',
            'Accept-Language': 'zh-CN,zh;q=0.9,en;q=0.8',
            'Cache-Control': 'no-cache',
            'Connection': 'keep-alive',
            'Content-Type': 'application/x-www-form-urlencoded',
            'Origin': 'https://www.dailymotion.com',
            'Pragma': 'no-cache',
            'Referer': 'https://www.dailymotion.com/',
            'Sec-Fetch-Dest': 'empty',
            'Sec-Fetch-Mode': 'cors',
            'Sec-Fetch-Site': 'same-site',
            'User-Agent': ua,
            'sec-ch-ua': '"Chromium";v="136", "Google Chrome";v="136", "Not.A/Brand";v="99"',
            'sec-ch-ua-mobile': '?0',
            'sec-ch-ua-platform': '"Windows"',
        }
        data = {
            'client_id': 'f1a362d288c1b98099c7',
            'client_secret': 'eea605b96e01c796ff369935357eca920c5da4c5',
            'grant_type': 'client_credentials',
            'traffic_segment': traffic_segment,
            'visitor_id': visitor_id,
        }
        response = req.post(
            'https://graphql.api.dailymotion.com/oauth/token',
            headers=token_headers,
            data=data,
            proxies=self._proxies,
            timeout=10
        )
        response.raise_for_status()
        token = response.json()['access_token']
        new_headers = copy.deepcopy(self._headers_template)
        new_headers['authorization'] = f'Bearer {token}'
        new_headers['x-dm-visit-id'] = visit_id
        new_headers['x-dm-visitor-id'] = visitor_id
        new_headers['User-Agent'] = ua
        with self._cache_lock:
            self._headers_cache = copy.deepcopy(new_headers)
        return new_headers
 class DMVideoInfo:
    def __init__(self, proxies: dict = None, max_retries: int = 3, backoff_factor: float = 0.5):
        self.proxies = proxies
        self.max_retries = max_retries
        self.backoff_factor = backoff_factor
    def get_video_info(self, data: dict) -> dict:
        v_xid = data.get('v_xid')
        url = f'https://api.dailymotion.com/video/{v_xid}'
        params = {
            'fields': 'id,title,created_time,thumbnail_240_url,duration,'
                      'owner.id,owner.screenname,likes_total,views_total,'
                      'owner.avatar_60_url,owner.followers_total,owner.videos_total'
        }
        try:
            resp = req.get(url, params=params, timeout=10)
            resp.raise_for_status()
            r_data = resp.json()
            xid = r_data["id"]
            vid = base64.b64encode(f"Video:{xid}".encode('utf-8')).decode('utf-8')
            uxid = r_data["owner.id"]
            uid = base64.b64encode(f"Channel:{uxid}".encode('utf-8')).decode('utf-8')
            duration = r_data.get("duration", 0)
            if duration < 30:
                return None
            data["v_id"] = vid
            data["title"] = r_data.get("title", "")
            data["link"] = "https://www.dailymotion.com/video/" + xid
            data["duration"] = format_duration(r_data.get("duration", 0))
            data['create_time'] = format(
                datetime.fromtimestamp(r_data.get("created_time")).strftime("%Y-%m-%d %H:%M:%S"))
            data['fans'] = clean_dash_to_zero(r_data.get("owner.followers_total", 0))
            data['videos'] = clean_dash_to_zero(r_data.get("owner.videos_total", 0))
            data['watch_number'] = clean_dash_to_zero(r_data.get("views_total", 0))
            data['cover_pic'] = r_data.get('thumbnail_240_url')
            data['u_id'] = uid
            data['u_xid'] = uxid
            data['u_name'] = r_data.get("owner.screenname", "")
            data['u_pic'] = r_data.get("owner.avatar_60_url", "")
            DBSA.upsert_video(data)
            DBSA.flush()
        except requests.RequestException as e:
            print(f"[ERROR] 请求失败 vxid={v_xid} : {e}")
            return None
 def parse_args() -> argparse.Namespace:
    global MACHINE_ID
    parser = argparse.ArgumentParser(
        description="Configure worker settings."
    )
    parser.add_argument(
        "-m", "--machine-id",
        type=int,
        help=f"Machine identifier (default: {MACHINE_ID})"
    )
    args = parser.parse_args()
    if args.machine_id is not None:
        MACHINE_ID = args.machine_id
    if MACHINE_ID is None or MACHINE_ID == 0:
        raise ValueError("请指定机器编号")
    return args
 def main():
    while True:
        kwdata = db.get_web_items()
        if not kwdata:
            print("没有获取到关键词数据")
            time.sleep(30)
            continue
        print(f"搜索关键词数据: {kwdata}")
        kwdata = kwdata[0][1]
        rn = kwdata['rn']
        proxy_name = proxiesdict.get(rn)
        # proxies_str = "http://127.0.0.1:10808"
        proxies_str = db.get_proxy(proxy_name, '-1')
        proxies = {
            'http': proxies_str,
            'https': proxies_str
        }
        kw = kwdata['keyword']
        dmheader_manager = DMHeaderManager(proxies=proxies)
        dmvideo_info = DMVideoInfo(proxies=proxies)
        headers = dmheader_manager.get_headers()
        for i in range(1, 11):
            data = {
                "operationName": "SEARCH_QUERY",
                "variables": {
                    "query": kw,
                    "shouldIncludeTopResults": True,  # 是否包含热门结果
                    "shouldIncludeChannels": False,  # 是否包含频道
                    "shouldIncludePlaylists": False,  # 是否包含播放列表
                    "shouldIncludeHashtags": False,  # 是否包含标签
                    "shouldIncludeVideos": False,  # 是否包含视频
                    "shouldIncludeLives": False,  # 是否包含直播
                    "page": i,
                    "limit": 20,
                    "recaptchaToken": None
                },
                "query": """
            fragment VIDEO_BASE_FRAGMENT on Video {
              id
              xid
              title
              createdAt
              duration
              aspectRatio
              thumbnail(height: PORTRAIT_240) {
                id
                url
                __typename
              }
              creator {
                id
                xid
                name
                displayName
                accountType
                avatar(height: SQUARE_60) {
                  id
                  url
                  __typename
                }
                __typename
              }
              __typename
            }
            fragment CHANNEL_BASE_FRAG on Channel {
              id
              xid
              name
              displayName
              accountType
              isFollowed
              avatar(height: SQUARE_120) {
                id
                url
                __typename
              }
              followerEngagement {
                id
                followDate
                __typename
              }
              metrics {
                id
                engagement {
                  id
                  followers {
                    edges {
                      node {
                        id
                        total
                        __typename
                      }
                      __typename
                    }
                    __typename
                  }
                  __typename
                }
                __typename
              }
              __typename
            }
            fragment PLAYLIST_BASE_FRAG on Collection {
              id
              xid
              name
              description
              thumbnail(height: PORTRAIT_240) {
                id
                url
                __typename
              }
              creator {
                id
                xid
                name
                displayName
                accountType
                avatar(height: SQUARE_60) {
                  id
                  url
                  __typename
                }
                __typename
              }
              metrics {
                id
                engagement {
                  id
                  videos(filter: {visibility: {eq: PUBLIC}}) {
                    edges {
                      node {
                        id
                        total
                        __typename
                      }
                      __typename
                    }
                    __typename
                  }
                  __typename
                }
                __typename
              }
              __typename
            }
            fragment HASHTAG_BASE_FRAG on Hashtag {
              id
              xid
              name
              metrics {
                id
                engagement {
                  id
                  videos {
                    edges {
                      node {
                        id
                        total
                        __typename
                      }
                      __typename
                    }
                    __typename
                  }
                  __typename
                }
                __typename
              }
              __typename
            }
            fragment LIVE_BASE_FRAGMENT on Live {
              id
              xid
              title
              audienceCount
              aspectRatio
              isOnAir
              thumbnail(height: PORTRAIT_240) {
                id
                url
                __typename
              }
              creator {
                id
                xid
                name
                displayName
                accountType
                avatar(height: SQUARE_60) {
                  id
                  url
                  __typename
                }
                __typename
              }
              __typename
            }
            query SEARCH_QUERY(
              $query: String!,
              $shouldIncludeTopResults: Boolean!,
              $shouldIncludeVideos: Boolean!,
              $shouldIncludeChannels: Boolean!,
              $shouldIncludePlaylists: Boolean!,
              $shouldIncludeHashtags: Boolean!,
              $shouldIncludeLives: Boolean!,
              $page: Int,
              $limit: Int,
              $sortByVideos: SearchVideoSort,
              $durationMinVideos: Int,
              $durationMaxVideos: Int,
              $createdAfterVideos: DateTime,
              $recaptchaToken: String
            ) {
              search(token: $recaptchaToken) {
                id
                stories(query: $query, first: $limit, page: $page) @include(if: $shouldIncludeTopResults) {
                  metadata {
                    id
                    algorithm {
                      uuid
                      __typename
                    }
                    __typename
                  }
                  pageInfo {
                    hasNextPage
                    nextPage
                    __typename
                  }
                  edges {
                    node {
                      ...VIDEO_BASE_FRAGMENT
                      ...CHANNEL_BASE_FRAG
                      ...PLAYLIST_BASE_FRAG
                      ...HASHTAG_BASE_FRAG
                      ...LIVE_BASE_FRAGMENT
                      __typename
                    }
                    __typename
                  }
                  __typename
                }
                videos(
                  query: $query,
                  first: $limit,
                  page: $page,
                  sort: $sortByVideos,
                  durationMin: $durationMinVideos,
                  durationMax: $durationMaxVideos,
                  createdAfter: $createdAfterVideos
                ) @include(if: $shouldIncludeVideos) {
                  metadata {
                    id
                    algorithm {
                      uuid
                      __typename
                    }
                    __typename
                  }
                  pageInfo {
                    hasNextPage
                    nextPage
                    __typename
                  }
                  edges {
                    node {
                      id
                      ...VIDEO_BASE_FRAGMENT
                      __typename
                    }
                    __typename
                  }
                  __typename
                }
                lives(query: $query, first: $limit, page: $page) @include(if: $shouldIncludeLives) {
                  metadata {
                    id
                    algorithm {
                      uuid
                      __typename
                    }
                    __typename
                  }
                  pageInfo {
                    hasNextPage
                    nextPage
                    __typename
                  }
                  edges {
                    node {
                      id
                      ...LIVE_BASE_FRAGMENT
                      __typename
                    }
                    __typename
                  }
                  __typename
                }
                channels(query: $query, first: $limit, page: $page) @include(if: $shouldIncludeChannels) {
                  metadata {
                    id
                    algorithm {
                      uuid
                      __typename
                    }
                    __typename
                  }
                  pageInfo {
                    hasNextPage
                    nextPage
                    __typename
                  }
                  edges {
                    node {
                      id
                      ...CHANNEL_BASE_FRAG
                      __typename
                    }
                    __typename
                  }
                  __typename
                }
                playlists: collections(query: $query, first: $limit, page: $page) @include(if: $shouldIncludePlaylists) {
                  metadata {
                    id
                    algorithm {
                      uuid
                      __typename
                    }
                    __typename
                  }
                  pageInfo {
                    hasNextPage
                    nextPage
                    __typename
                  }
                  edges {
                    node {
                      id
                      ...PLAYLIST_BASE_FRAG
                      __typename
                    }
                    __typename
                  }
                  __typename
                }
                hashtags(query: $query, first: $limit, page: $page) @include(if: $shouldIncludeHashtags) {
                  metadata {
                    id
                    algorithm {
                      uuid
                      __typename
                    }
                    __typename
                  }
                  pageInfo {
                    hasNextPage
                    nextPage
                    __typename
                  }
                  edges {
                    node {
                      id
                      ...HASHTAG_BASE_FRAG
                      __typename
                    }
                    __typename
                  }
                  __typename
                }
                __typename
              }
            }
            """
            }
            payload = json.dumps(data).encode()
            response = req.post('https://graphql.api.dailymotion.com/', headers=headers, data=payload,
                                proxies=proxies)
            data = response.json()
            try:
                edges = data['data']['search']['stories']['edges']
            except (TypeError,KeyError):
                print("stories 为 None 或结构异常，跳过")
                break
            edges_len = len(edges)
            print(f"第 {i} 页，关键词: {kw}，获取到 {edges_len} 条数据")
            tancks = []
            for j, edge in enumerate(edges):
                node = edge.get("node", {})
                s_data = {
                    "keyword": kw,
                    "v_name": kwdata.get("v_name", ""),
                    "v_xid": node.get("xid"),
                    "batch": kwdata.get("batch"),
                    "rn": kwdata.get("rn"),
                    "machine_id": MACHINE_ID,
                    "index": (i - 1) * 20 + j + 1,
                    "level": 0,
                }
                tancks.append(s_data)
            with concurrent.futures.ThreadPoolExecutor(max_workers=20) as executor:
                executor.map(dmvideo_info.get_video_info, tancks)
            if edges_len < 20:
                break
            time.sleep(10)
        time.sleep(20)
 if __name__ == '__main__':
    parse_args()
    start_time = datetime.now()
    print(f"开始时间：{start_time.strftime('%Y-%m-%d %H:%M:%S')}")
    main()
--- a/onoe.py
+++ b/onoe.py
@ -34,7 +34,19 @@ UserAgent = [
    'User-Agent, Mozilla/4.0 (compatible; MSIE 7.0; Windows NT 5.1; Maxthon 2.0)',
    'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Maxthon/5.0.3.4000 Chrome/47.0.2526.73 Safari/537.36',
    'User-Agent, Mozilla/4.0 (compatible; MSIE 7.0; Windows NT 5.1; The World)']
-
+sec_ch_ua_list = [
    '"Chromium";v="0", "Not;A=Brand";v="24", "Google Chrome";v="0"',
    '"Chromium";v="0", "Not;A=Brand";v="24", "Google Chrome";v="0"',
    '"Chromium";v="0", "Not;A=Brand";v="24", "Google Chrome";v="0"',
    '"Chromium";v="0", "Not;A=Brand";v="24", "Google Chrome";v="0"',
    '"Chromium";v="0", "Not;A=Brand";v="24", "Google Chrome";v="0"',
    '"Chromium";v="136", "Not;A=Brand";v="24", "Google Chrome";v="136"',
    '"Chromium";v="128", "Not;A=Brand";v="24", "Google Chrome";v="128"',
    '"Chromium";v="53", "Not;A=Brand";v="24", "Google Chrome";v="53"',
    '"Chromium";v="0", "Not;A=Brand";v="24", "Google Chrome";v="0"',
    '"Chromium";v="47", "Not;A=Brand";v="24", "Google Chrome";v="47"',
    '"Chromium";v="0", "Not;A=Brand";v="24", "Google Chrome";v="0"'
 ]
 def get_part_ids(part_num: int, take: int, offset: int = 0):
    part_ids = list(range(offset, offset + take))
@ -221,14 +233,17 @@ def gettoken(proxy, r=2):
    }
    try:
        proxy_str = db.get_proxy(proxy)
        logger.info(f"[代理] => {proxy_str}")
        url = 'https://graphql.api.dailymotion.com/oauth/token'
        response = requests.post(url, headers=headers, data=data, proxies={"http": proxy_str, "https": proxy_str})
        token = response.json()['access_token']
        copy_headers = copy.deepcopy(headers1)
        uaidx = random.randint(0, len(UserAgent) - 1)
        copy_headers['authorization'] = "Bearer " + token
        copy_headers['x-dm-visit-id'] = str(int(time.time() * 1000))
        copy_headers['x-dm-visitor-id'] = uuid_with_dash
-        copy_headers['User-Agent'] = UserAgent[random.randint(0, len(UserAgent) - 1)]
+        copy_headers['User-Agent'] = UserAgent[uaidx]
        copy_headers['sec-ch-ua'] = sec_ch_ua_list[uaidx]
        copy_headers['X-DM-Preferred-Country'] = proxy.lower()
        with _cache_lock:
            _headers_cache = copy_headers
@ -267,18 +282,18 @@ def solve_recaptcha_v3_with_proxy(
            payload = {
                "clientKey": "CAP-A76C932D4C6CCB3CA748F77FDC07D996",
                "task": {
-                    "type": "ReCaptchaV3Task",
+                    "type": "ReCaptchaV3TaskProxyLess",
                    "websiteURL": f"https://www.dailymotion.com/search/{encoded_query}/top-results",
                    "websiteKey": "6LeOJBIrAAAAAPMIjyYvo-eN_9W1HDOkrEqHR8tM",
-                    "pageAction": "___grecaptcha_cfg.clients['100000']['L']['L']['promise-callback'](gRecaptchaResponse)",
+                    "pageAction": "search",
                    "minScore": 0.5
                }
            }
-            resp = requests.post(create_url, json=payload, headers=headers, timeout=30)
+            resp = requests.post(create_url, data=json.dumps(payload), headers=headers, timeout=30)
            logger.info(f"[token] 发送 payload:{payload}")
            resp.raise_for_status()
            task_id = resp.json()["taskId"]
-            logger.info(f"task_id: {task_id}")
+            logger.info(f"task_id: {resp.text}")
            # 轮询获取结果
            check_payload = {"clientKey": "CAP-A76C932D4C6CCB3CA748F77FDC07D996", "taskId": task_id}
            for i in range(max_poll_attempts):
@ -287,7 +302,7 @@ def solve_recaptcha_v3_with_proxy(
                result = r.json()
                logger.info(f"第{i}次,task_id:{task_id},结果:{result}")
                if result.get("status") == "ready":
-                    return result["solution"]["token"]
+                    return result["solution"]["gRecaptchaResponse"]
                time.sleep(polling_interval)
            raise TimeoutError(f"任务 {task_id} 在轮询 {max_poll_attempts} 次后未完成")
--- a/oss/BAZTSJT.pdf
+++ b/oss/BAZTSJT.pdf
--- a/oss/LOA.pdf
+++ b/oss/LOA.pdf
--- a/participle.py
+++ b/participle.py
@ -1,56 +0,0 @@
 import requests
 url = "https://api.siliconflow.cn/v1/chat/completions"
 kw = "朝雪录"
 rn = "US"
 payload = {
    "model": "Qwen/Qwen3-14B",
    "max_tokens": 512,
    "enable_thinking": True,
    "thinking_budget": 4096,
    "min_p": 0.05,
    "temperature": 0.7,
    "top_p": 0.7,
    "top_k": 50,
    "frequency_penalty": 0.5,
    "n": 1,
    "stream": False,
    "stop": [],
    "messages": [
        {
            "role": "user",
            "content": """你是一个视频搜索优化助手。用户给你一个中文视频标题或关键词，请你翻译并联想出 10 个适合用于英文视频网站（如 Dailymotion）搜索的关键词，结果用英文逗号分隔输出，仅返回关键词列表，不加说明。
 示例输入：朝雪录
 示例输出：Coroner's Diary,Coroners Diary, Coroners Diary episode,Coroners Diary season 1,Coroners Diary full episode,coroners diary
 """
        },
        {
            "role": "user",
            "content": f"请推理：{kw} 并输出 10 个地区缩写为{rn}的适合用于视频网站搜索的关键词,地区缩写不在关键词内,。"
        }
    ]
 }
 headers = {
    "Authorization": "Bearer sk-isvydeloxqhoiwoiojleghdsuhagryjbxzphfhxneevxeoeh",
    "Content-Type": "application/json"
 }
 response = requests.post(url, json=payload, headers=headers, timeout=30)
 def parse_keywords_from_response(resp_json):
    try:
        # 取出文本内容
        content = resp_json["choices"][0]["message"]["content"]
        # 按英文逗号分隔
        keywords = [kw.strip() for kw in content.split(",") if kw.strip()]
        return keywords
    except Exception as e:
        print("解析失败:", e)
        return []
 kws = parse_keywords_from_response(response.json())
 print(kws)
 print(len(kws))
--- a/push_report_to_redis.py
+++ b/push_report_to_redis.py
@ -1,19 +0,0 @@
 import json
 from DB import DBVidcon
 payload_list = []
 db = DBVidcon()
 rows = db.get_report_video()
 push  = db.push_report
 # =======================
 for row in rows:
    payload_list.append(json.dumps({**row}, ensure_ascii=False))
    if len(payload_list) >= 10000:
        push(payload_list)
        payload_list.clear()
 if payload_list:  # 收尾
    push(payload_list)
 db.close()
--- a/report.py
+++ b/report.py
@ -1,124 +0,0 @@
 import argparse
 import json
 import time
 from DB import DBVidcon, DBSA
 from report_video import DailymotionClient
 from logger import logger
 import requests
 MACHINE_ID = None
 IsSubsequent = False
 def parse_args() -> argparse.Namespace:
    global MACHINE_ID, IsSubsequent
    parser = argparse.ArgumentParser(
        description="Configure worker settings."
    )
    parser.add_argument(
        "-m", "--machine-id",
        type=int,
        help=f"Machine identifier (default: {MACHINE_ID})"
    )
    parser.add_argument(
        "-s", "--IsSubsequent",
        type=int,
        help=f"Maximum concurrent workers (default: {IsSubsequent})"
    )
    args = parser.parse_args()
    if args.machine_id is not None:
        MACHINE_ID = args.machine_id
    if args.IsSubsequent is not None:
        if args.IsSubsequent <= 0:
            IsSubsequent = False
        else:
            IsSubsequent = True
    if MACHINE_ID is None:
        raise ValueError("请指定机器编号")
    return args
 parse_args()
 def get_public_ip():
    try:
        response = requests.get("https://api.ipify.org?format=json", timeout=5)
        return response.json().get("ip")
    except requests.RequestException as e:
        print("获取失败:", e)
        return None
 ip = get_public_ip()
 logger.info(f"当前机器IP: {ip}, 机器编号: {MACHINE_ID}, 是否后续处理: {IsSubsequent}")
 db = DBVidcon()
 account = db.get_account_info(MACHINE_ID)
 d = DailymotionClient(email=account['account'], password=account['password'])
 k = {
    "open": 1,
    "solved": 2,
    "awaiting your reply": 3,
 }
 last_main_run = 0
 last_subsequent_run = 0
 MAIN_INTERVAL = 60 * 60  # 每 5 分钟执行一次
 SUBSEQUENT_INTERVAL = 30 * 60  # 每 60 分钟执行一次
 # d.test()
 while True:
    now = int(time.time())
    # 处理主流程
    if now - last_main_run >= MAIN_INTERVAL:
        last_main_run = now
        re_list = []
        idss = []
        lis = db.item_report(100)
        if len(lis) > 0:
            for li in lis:
                item = json.loads(li[0])
                re_list.append(item)
                idss.append(item['id'])
                logger.info(f"name:{item['name_title']},link:{item['link']} ")
            try:
                ids, info, report_id, status, report_ts = d.process_ticket(re_list)
                subsequent_status = k.get(status, 1)
                db.update_fight_record_status(
                    ids, report_id, 2, f"http://{ip}:5000/image/{info}",
                    report_ts, subsequent_status, MACHINE_ID
                )
                db.flush()
            except Exception as e:
                logger.error(f"ID:{re_list[0]['id']}, end id{re_list[-1]['id']}, e:{e}")
                db.update_fight_record_status(idss, 0, 3, str(e), mid=MACHINE_ID)
                time.sleep(60)  # 出错延迟
    if now - last_subsequent_run >= SUBSEQUENT_INTERVAL and IsSubsequent:
        last_subsequent_run = now
        subsequent_list = db.get_subsequent_report_video(MACHINE_ID)
        if len(subsequent_list) > 0:
            for li in subsequent_list:
                subsequent_status = 0
                r_id = li['report_id']
                logger.info(f"subsequent report_id:{r_id} ")
                # try:
                subsequent_status, info = d.report_follow_up(r_id)
                db.update_subsequent_status_by_report_id(
                    r_id, subsequent_status, f"http://{ip}:5000/image/{info}"
                )
                # except Exception as e:
                #     logger.logger.error(f"ID:{rs_id}, e:{e}")
                #     db.update_subsequent_status_by_id(rs_id, 1, str(e))
                time.sleep(5)  # 避免频繁请求
    time.sleep(5)
--- a/report_video.py
+++ b/report_video.py
@ -1,417 +0,0 @@
 import time
 import functools
 import os
 import re
 from datetime import datetime
 from sys import platform
 import requests
 from logger import logger
 from playwright.sync_api import (
    sync_playwright,
    TimeoutError as PlaywrightTimeoutError,
    Page,
    Browser,
 )
 def solve_turnstile_capsolver(page: Page,
                              timeout: int = 120) -> bool:
    """
    使用 CapSolver 自动完成当前 Page 上的 Cloudflare Turnstile。
    成功返回 True，失败/超时返回 False。
    """
    cap_key = "CAP-A76C932D4C6CCB3CA748F77FDC07D996"
    widget = page.query_selector("div.cf-turnstile[data-sitekey]")
    if not widget:
        return False
    sitekey = widget.get_attribute("data-sitekey")
    page_url = page.url
    create_payload = {
        "clientKey": cap_key,
        "task": {
            "type": "TurnstileTaskProxyLess",
            "websiteURL": page_url,
            "websiteKey": sitekey
        }
    }
    create_resp = requests.post(
        "https://api.capsolver.com/createTask",
        json=create_payload, timeout=20
    ).json()
    if create_resp.get("errorId"):
        print("[CapSolver] createTask 失败:", create_resp)
        return False
    task_id = create_resp["taskId"]
    poll_payload = {"clientKey": cap_key, "taskId": task_id}
    token = None
    elapsed, step = 0, 3
    while elapsed < timeout:
        time.sleep(step)
        elapsed += step
        res = requests.post(
            "https://api.capsolver.com/getTaskResult",
            json=poll_payload, timeout=15
        ).json()
        if res.get("status") == "ready":
            token = res["solution"]["token"]
            break
        if res.get("status") != "processing":
            print("[CapSolver] getTaskResult 异常:", res)
            return False
    if not token:
        print("[CapSolver] 超时未取到 token")
        return False
    page.evaluate(
        """(tk) => {
            const ta = document.querySelector('textarea[name="cf-turnstile-response"]');
            if (ta) ta.value = tk;
            if (window.turnstileCallback)
                try { window.turnstileCallback(tk); } catch(e){}
        }""",
        token
    )
    page.wait_for_timeout(1500)
    return True
 def require_login(func):
    @functools.wraps(func)
    def wrapper(self, *args, **kwargs):
        self.ensure_login()
        return func(self, *args, **kwargs)
    return wrapper
 class DailymotionClient:
    url = "https://faq.dailymotion.com/hc/en-us/requests/new"
    EMAIL = "copyright@qiyi.com"
    PASSWORD = "ppsIQIYI2018@"
    def __init__(self,email, password, headless: bool = None):
        self.email = email
        self.password = password
        self.headless = headless
        self.check_interval = 60 * 60
        if self.headless is None:
            self.headless = platform == "linux" or platform == "linux2"
        if self.headless:
            proxy = None
            self.file_path = "/opt/ql/DailyMotion/oss/LOA.pdf"
            self.file_path2 = "/opt/ql/DailyMotion/oss/BAZTSJT.pdf"
        else:
            proxy={'server': 'http://127.0.0.1:7890'}
            self.file_path = "./oss/LOA.pdf"
            self.file_path2 = "./oss/BAZTSJT.pdf"
        logger.info(f"Launching DailymotionClient with headless={self.headless}, proxy={proxy}")
        self._pw = sync_playwright().start()
        self.browser: Browser = self._pw.chromium.launch(
            headless=self.headless,
            proxy=proxy,
        )
        self.context = self.browser.new_context(
            user_agent="Mozilla/5.0 (Windows NT 10.0; Win64; x64) "
                       "AppleWebKit/537.36 (KHTML, like Gecko) "
                       "Chrome/122.0.0.0 Safari/537.36",
            locale="en-US",
            viewport={"width": 1280, "height": 800},
            timezone_id="Asia/Shanghai",
            permissions=[],
        )
        self.context.add_init_script("Object.defineProperty(navigator, 'webdriver', {get: () => undefined})")
        self.page: Page = self.context.new_page()
        self._last_check_ts = 0
        self._last_check_result = False
        os.makedirs('screenshots', exist_ok=True)
        self.page.goto(self.url)
    def _do_login(self) -> None:
        self.page.goto(self.url, timeout=30000)
        # self.page.wait_for_load_state("networkidle", timeout=30000)
        self.page.wait_for_timeout(3000)
        file_path = f'screenshots/{str(int(time.time()))}.png'
        self.page.screenshot(path=file_path)
        if self.page.query_selector("div.cf-turnstile[data-sitekey]"):
            ok = solve_turnstile_capsolver(self.page)
            if not ok:
                raise RuntimeError("CapSolver 处理 Turnstile 失败")
        logbtn = self.page.locator("//a[@class='login button']")
        if logbtn.count() > 0:
            logbtn.nth(0).click()
        self.page.wait_for_selector("//input[@data-testid=\"emailInput\"]")
        # “我了解”弹窗
        i_now_btn = self.page.locator("button:has-text(\"I understand\")")
        if i_now_btn.count() > 0:
            i_now_btn.click()
        # 输入账号密码
        email_edit = self.page.locator("//input[@data-testid=\"emailInput\"]")
        password_edit = self.page.locator("//input[@data-testid=\"passwordInput\"]")
        if email_edit.count():
            email_edit.fill(self.email)
        if password_edit.count():
            password_edit.fill(self.password)
        # 登录
        login_btn = self.page.locator('button[form="signin-form"][type="submit"]')
        try:
            self.page.wait_for_selector(
                'button[form="signin-form"][type="submit"]:not([disabled])', timeout=20000
            )
        except PlaywrightTimeoutError:
            pass
        login_btn.click()
        # 等待跳回
        self.page.wait_for_url(self.url, timeout=30000)
        time.sleep(1)
        self._last_check_ts = time.time()
        self._last_check_result = True
    def _detect_login(self) -> bool:
        self.page.goto(self.url, timeout=30000)
        self.page.wait_for_timeout(3000)
        return self.page.locator("//a[@class='login button']").count() == 0
    def is_logged_in(self) -> bool:
        now = time.time()
        if now - self._last_check_ts < self.check_interval:
            return self._last_check_result
        try:
            ok = self._detect_login()
        except Exception:
            ok = False
        self._last_check_ts = now
        self._last_check_result = ok
        return ok
    def ensure_login(self) -> None:
        if not self.is_logged_in():
            self._do_login()
    @require_login
    def process_ticket(self, lis: list):
        titles = "\r\n"
        links = ""
        ids= []
        title = ""
        link = ""
        assignment = True
        for li in lis:
            if assignment:
                title = li['name_title']
                link = li['link']
                assignment = False
            ids.append(li['id'])
            titles += li['name_title'] + ",\r\n"
            links += li['link'] + ",\r\n"
        logger.info(f"Processing ticket for title: {titles}, link: {links}")
        self.page.goto(self.url, timeout=3000)
        titles_list = [title.strip() for title in titles.split(',')]
        unique_titles = list(set(titles_list))
        unique_titles.sort()
        titles =",".join(unique_titles)  # 去重
        description = """We request that you take immediate actionto stop the infringing activity, take steps to ensure that iQIYI Content is notre-posted on, re-linked to, or otherwise available through your site. Pleaseinform us of the actions you have taken and their results.
 1) please help remove these videos
 2) The drama series titles are {}
        """.format(titles)
        # likls = ["\"" + l + "\"" for l in link]
        # links = ','.join(likls)
        if self.page.query_selector("div.cf-turnstile[data-sitekey]"):
            ok = solve_turnstile_capsolver(self.page)
            if not ok:
                raise RuntimeError("CapSolver 处理 Turnstile 失败")
        # file_path = f'screenshots/{str(int(time.time()))}_{title}_{link.split("/")[-1]}.png'
        # self.page.screenshot(path=file_path)
        resports = self.page.locator('li.blocks-item:nth-child(8)')
        resports.click()
        time.sleep(2)
        cc = self.page.locator("input#request_collaborators_")
        cc.scroll_into_view_if_needed()
        cc.click()
        cc.type("duke.chen@dailymotion.com")
        self.page.get_by_role("button", name="Copyright infringement").click()
        time.sleep(1)
        self.page.get_by_role("button", name="Notification").nth(0).click()
        time.sleep(1)
        self.page.get_by_role("button", name="A legal entity").click()
        time.sleep(1)
        self.page.get_by_label("Corporate name").fill("Beijing iQIYI Science & Technology Co.,Ltd")
        time.sleep(1)
        self.page.get_by_label("Legal status").fill("Legal Department")
        time.sleep(1)
        self.page.get_by_label("Subject").fill("Copyright infringement Notification")
        time.sleep(1)
        self.page.get_by_label("Please indicate the URL of the video(s) you would like to report*").fill(links)
        time.sleep(1)
        self.page.get_by_label("Description").nth(1).fill(description)
        time.sleep(1)
        self.page.get_by_label("I state in good faith", exact=False).check()
        time.sleep(1)
        self.page.get_by_label("I state in good faith that the use of the Protected", exact=False).check()
        time.sleep(1)
        self.page.get_by_role("checkbox", name="I certify that all information provided", exact=False).check()
        time.sleep(1)
        self.page.get_by_role("checkbox", name="I acknowledge that my statements", exact=False).check()
        time.sleep(1)
        self.page.get_by_role("checkbox", name="The data provided through this form", exact=False).check()
        time.sleep(1)
        self.page.get_by_role("checkbox", name="By submitting the present form,", exact=False).check()
        time.sleep(1)
        self.page.get_by_role("textbox", name="electronic signature", exact=False).fill("柴达") # 占位
        time.sleep(1)
        self.page.set_input_files('input#request-attachments', [
            self.file_path,
            self.file_path2
        ])
        self.page.wait_for_timeout(8000)
        self.page.get_by_role("button", name="Submit").click()
        time.sleep(2)
        file_path = f'screenshots/{str(int(time.time()))}_{title}_{link.split("/")[-1]}.png'
        locator = self.page.locator("//dt[normalize-space(.)='Id']/following-sibling::dd[1]")
        raw_text = locator.text_content()
        match = re.search(r'\d+', raw_text or '')
        report_id = match.group() if match else None
        status_raw = self.page.locator("span.status-label").text_content()
        subsequent_status = status_raw.strip().lower() if status_raw else None
        time_elem = self.page.locator("dt", has_text="Created").locator("xpath=following-sibling::dd[1]/time")
        datetime_str = time_elem.get_attribute("datetime")  # e.g. 2025-06-12T06:15:33+00:00
        if datetime_str:
            dt = datetime.fromisoformat(datetime_str.replace("Z", "+00:00"))  # 安全处理 ISO 时间
            timestamp = int(dt.timestamp())
        else:
            timestamp = None
        self.page.screenshot(path=file_path)
        if self.page.url != self.url:
            self.page.goto(self.url, timeout=30000)
        return ids, file_path, report_id, subsequent_status, timestamp
    @require_login
    def report_follow_up(self, report_id: str):
        max_retries = 3
        retry_delay = 2
        loaded = False
        subsequent_status = ""
        for attempt in range(max_retries):
            try:
                self.page.goto(f"https://faq.dailymotion.com/hc/en-us/requests/{report_id}", timeout=30000)
                # self.page.wait_for_load_state("networkidle")  # 保证页面加载稳定
                self.page.wait_for_selector("span.status-label", timeout=30000)
                try:
                    status_raw = self.page.locator("span.status-label").text_content()
                except Exception as e:
                    print(f"[警告] 获取状态标签失败: {e}")
                    status_raw = None
                subsequent_status = status_raw.strip().lower() if status_raw else None
                loaded = True
                break
            except Exception as e:
                print(f"[ERROR] 尝试 {attempt + 1}/{max_retries} 失败: {e}")
                if attempt < max_retries - 1:
                    time.sleep(retry_delay)
        if not loaded:
            return 1, "页面加载失败"
        txt = (
            "I am the authorized agent of Beijing iQIYI Technology Co., Ltd., responsible for dealing with "
            "unauthorized overseas distribution of pirated videos of our works. "
            "We have confirmed that the above links contain infringing content and we insist on requesting to takedown. Thank you!"
        )
        if "awaiting your reply" in subsequent_status:
            span_show = self.page.locator('span.comment-show-container-content')
            if span_show.count() > 0:
                span_show.nth(0).click()
                self.page.wait_for_timeout(1000)
            textarea = self.page.locator('#request_comment_body')
            textarea.type(txt, delay=30)
            self.page.wait_for_timeout(1000)
            self.page.get_by_role("button", name="Submit").click()
            success = self.wait_for_selector_safe("span.status-label", timeout=30000, retries=3)
            if not success:
                return 1, "提交后未检测到状态更新"
            span_show = self.page.locator('span.comment-show-container-content')
            if span_show.count() > 0:
                span_show.nth(0).click()
            pic_path = f'screenshots/{str(int(time.time()))}_{report_id}.png'
            self.page.screenshot(path=pic_path)
            return 0, pic_path
        elif "open" in subsequent_status:
            return 1, ""
        elif "solved" in subsequent_status:
            return 2, ""
        return 0, "未知状态"
    def wait_for_selector_safe(self, selector: str, timeout=30000, retries=3, retry_delay=2):
        for i in range(retries):
            try:
                self.page.wait_for_selector(selector, timeout=timeout)
                return True
            except Exception as e:
                print(f"[重试] 第 {i + 1}/{retries} 次等待 {selector} 失败: {e}")
                if i < retries - 1:
                    time.sleep(retry_delay)
        return False
    @require_login
    def test(self):
        logger.info(f"Testing DailymotionClient with email: {self.email}")
        self.page.goto(self.url, timeout=30000)
        file_path = f'screenshots/{str(int(time.time()))}_test.png'
        self.page.screenshot(path=file_path)
        self.page.wait_for_timeout(1000)
        file_path = f"screenshots/{str(int(time.time()))}_test2.png"
        self.page.screenshot(path=file_path)
        logger.info(f"Test screenshot saved to {file_path}")
        self.page.wait_for_timeout(1000)
        file_path = f"screenshots/{str(int(time.time()))}_test3.png"
        self.page.screenshot(path=file_path)
        logger.info(f"Test screenshot saved to {file_path}")
    def close(self):
        try:
            self.page.close()
        except Exception:
            pass
        try:
            self.browser.close()
        except Exception:
            pass
        try:
            self._pw.stop()
        except Exception:
            pass
 if __name__ == "__main__":
    dm = DailymotionClient("copyright@qiyi.com", "ppsIQIYI2018@")
    # dm.process_ticket("恋爱学园","https://www.dailymotion.com/video/x9lfr24")
    dm.report_follow_up("13566")
--- a/report_video_infringement.py
+++ b/report_video_infringement.py
@ -1,66 +1,10 @@
 import time
 import requests
 import json
 import redis
 import requests
 import urllib3
 from matplotlib.artist import allow_rasterization
 from requests.adapters import HTTPAdapter
 from urllib3.util.retry import Retry
 from typing import Optional, Dict, Any, Union
 class HttpClient:
    def __init__(self, max_retries: int = 3, backoff_factor: float = 0.5):
        self.session = requests.Session()
        # 配置重试策略
        retry_strategy = Retry(
            total=max_retries,
            backoff_factor=backoff_factor,
            status_forcelist=[500, 502, 503, 504, 429]
        )
        adapter = HTTPAdapter(max_retries=retry_strategy)
        self.session.mount("http://", adapter)
        self.session.mount("https://", adapter)
    def request(self,
                method: str,
                url: str,
                headers: Optional[Dict] = None,
                params: Optional[Dict] = None,
                data: Optional[Union[Dict, str]] = None,
                cookies: Optional[Dict] = None,
                allow_redirects: bool = True,
                timeout: int = 30,
                **kwargs) -> requests.Response:
        try:
            response = self.session.request(
                method=method,
                url=url,
                headers=headers,
                params=params,
                data=data,
                cookies=cookies,
                allow_redirects=allow_redirects,
                timeout=timeout,
                **kwargs
            )
            response.raise_for_status()
            return response
        except requests.exceptions.RequestException as e:
            print(f"请求失败: {url}, 错误: {str(e)}")
            raise
    def get(self, url: str, **kwargs) -> requests.Response:
        return self.request("GET", url, **kwargs)
    def post(self, url: str, **kwargs) -> requests.Response:
        return self.request("POST", url, **kwargs)
 # 创建全局的 HTTP 客户端实例
 http_client = HttpClient()
 session = requests.Session()
 _REDIS_CONF = {
    "host": "192.144.230.75",
    "port": 6379,
@ -88,7 +32,6 @@ def get_report_token(key_name: str):
 def login():
    try:
    headers = {
        "Accept": "*/*",
        "Accept-Language": "zh-CN,zh;q=0.9",
@ -118,7 +61,7 @@ def login():
        "traffic_segment": "962042",
        "visitor_id": "359703fb-66c2-43d2-bd0d-b1cac9c7ae8a"
    }
-        response = http_client.post(url, headers=headers, data=data)
+    response = session.post(url, headers=headers, data=data)
    data = {
        "update_time": int(time.time()),
        "username": "copyright@qiyi.com",
@ -127,158 +70,113 @@ def login():
    }
    save_report_token('token', data)
    return data
    except Exception as e:
        print(f"登录失败: {str(e)}")
        raise
-def refresh_token(access_token, refresh_token):
+def get_cookies(access_token: str, refresh_token: str):
    headers = {
        "Accept": "*/*",
        "Accept-Language": "zh-CN,zh;q=0.9",
        "Cache-Control": "no-cache",
        "Connection": "keep-alive",
        "Content-Length": "0",
        "Origin": "https://www.dailymotion.com",
        "Pragma": "no-cache",
        "Referer": "https://www.dailymotion.com/signin?urlback=%2Fzendesk%3Ftimestamp%3D1748932650%26return_to%3Dhttps%253A%252F%252Ffaq.dailymotion.com%252Fhc%252Fen-us%252Frequests%252Fnew",
        "Sec-Fetch-Dest": "empty",
        "Sec-Fetch-Mode": "cors",
        "Sec-Fetch-Site": "same-origin",
        "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/136.0.0.0 Safari/537.36 Edg/136.0.0.0",
        "sec-ch-ua": "\"Chromium\";v=\"136\", \"Microsoft Edge\";v=\"136\", \"Not.A/Brand\";v=\"99\"",
        "sec-ch-ua-mobile": "?0",
        "sec-ch-ua-platform": "\"Windows\""
    }
    cookies = {
        "dmvk": "683e982c34e34",
        "ts": "133696",
        "v1st": "a847389a-6b91-4157-948f-457666f7172b",
        "ff": "on",
        "lang": "zh_CN",
        "usprivacy": "1---",
        "dmaid": "73ca37e4-6858-46c1-aac4-a4a5fc9a270e",
        "cookie_policy_closed": "1",
        "access_token": access_token,
        "refresh_token": refresh_token,
    }
    url = "https://www.dailymotion.com/cookie/refresh_token"
-    response = http_client.post(url, headers=headers, cookies=cookies)
+    session.post(url, cookies=cookies, allow_redirects=True)
-def zendesk():
+def get_cookies1(access_token: str, refresh_token: str):
-    headers = {
+    """302 跳转"""
-        "Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.7",
+    cookies = {
-        "Accept-Language": "zh-CN,zh;q=0.9",
+        "access_token": access_token,
-        "Cache-Control": "no-cache",
+        "refresh_token": refresh_token,
        "Connection": "keep-alive",
        "Pragma": "no-cache",
        "Referer": "https://www.dailymotion.com/sg",
        "Sec-Fetch-Dest": "document",
        "Sec-Fetch-Mode": "navigate",
        "Sec-Fetch-Site": "same-origin",
        "Sec-Fetch-User": "?1",
        "Upgrade-Insecure-Requests": "1",
        "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/136.0.0.0 Safari/537.36 Edg/136.0.0.0",
        "sec-ch-ua": "\"Chromium\";v=\"136\", \"Microsoft Edge\";v=\"136\", \"Not.A/Brand\";v=\"99\"",
        "sec-ch-ua-mobile": "?0",
        "sec-ch-ua-platform": "\"Windows\""
    }
    url = "https://www.dailymotion.com/zendesk"
    params = {
        "return_to": "https://faq.dailymotion.com/hc/en-us/requests/new",
-        "timestamp": str(time.time()),
+        "timestamp": str(int(time.time())),
    }
-    response = http_client.get(url, headers=headers, params=params, allow_redirects=True)
+    session.get(url, cookies=cookies, params=params, allow_redirects=True)
-    data = http_client.session.cookies.get_dict()
+    cookies_dict = {"update_time": int(time.time()), "cookies": session.cookies.get_dict()}
-    data['update_time'] = int(time.time())
+    save_report_token('cookies', cookies_dict)
-    save_report_token('cookies', data)
+    return cookies_dict
 def get_csrftoken():
    try:
    url = "https://faq.dailymotion.com/hc/api/internal/csrf_token.json"
-        headers = {
+    response = session.get(url)
            "Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.7",
            "Accept-Language": "zh-CN,zh;q=0.9",
            "Cache-Control": "no-cache",
            "Connection": "keep-alive",
            "Pragma": "no-cache",
            "Referer": "https://www.dailymotion.com/sg",
            "Sec-Fetch-Dest": "document",
            "Sec-Fetch-Mode": "navigate",
            "Sec-Fetch-Site": "same-origin",
            "Sec-Fetch-User": "?1",
            "Upgrade-Insecure-Requests": "1",
            "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/136.0.0.0 Safari/537.36 Edg/136.0.0.0",
            "sec-ch-ua": "\"Chromium\";v=\"136\", \"Microsoft Edge\";v=\"136\", \"Not.A/Brand\";v=\"99\"",
            "sec-ch-ua-mobile": "?0",
            "sec-ch-ua-platform": "\"Windows\""
        }
        response = http_client.get(url, headers=headers)
    data = {"update_time": int(time.time()), "csrf_token": response.json()}
    save_report_token('csrf_token', data)
    return data
    except Exception as e:
        print(f"获取 CSRF token 失败: {str(e)}")
        raise
-def report(csrf_token: str, v_url, title):
+def report(csrf_token:str, cookies:dict, ):
    try:
    headers = {
-            "accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.7",
+        'accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.7',
-            "accept-language": "zh-CN,zh;q=0.9",
+        'accept-language': 'zh-CN,zh;q=0.9',
-            "cache-control": "no-cache",
+        'cache-control': 'no-cache',
-            "content-type": "application/x-www-form-urlencoded",
+        'content-type': 'application/x-www-form-urlencoded',
-            "origin": "https://faq.dailymotion.com",
+        'origin': 'https://faq.dailymotion.com',
-            "pragma": "no-cache",
+        'pragma': 'no-cache',
-            "priority": "u=0, i",
+        'priority': 'u=0, i',
-            "referer": "https://faq.dailymotion.com/hc/en-us/requests/new?ticket_form_id=136048",
+        'referer': 'https://faq.dailymotion.com/hc/en-us/requests/new?ticket_form_id=136048',
-            "sec-ch-ua": "\"Chromium\";v=\"136\", \"Microsoft Edge\";v=\"136\", \"Not.A/Brand\";v=\"99\"",
+        'sec-ch-ua': '"Chromium";v="136", "Google Chrome";v="136", "Not.A/Brand";v="99"',
-            "sec-ch-ua-mobile": "?0",
+        'sec-ch-ua-mobile': '?0',
-            "sec-ch-ua-platform": "\"Windows\"",
+        'sec-ch-ua-platform': '"Windows"',
-            "sec-fetch-dest": "document",
+        'sec-fetch-dest': 'document',
-            "sec-fetch-mode": "navigate",
+        'sec-fetch-mode': 'navigate',
-            "sec-fetch-site": "same-origin",
+        'sec-fetch-site': 'same-origin',
-            "sec-fetch-user": "?1",
+        'sec-fetch-user': '?1',
-            "upgrade-insecure-requests": "1",
+        'upgrade-insecure-requests': '1',
-            "user-agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/136.0.0.0 Safari/537.36 Edg/136.0.0.0"
+        'user-agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/136.0.0.0 Safari/537.36',
    }
-        url = "https://faq.dailymotion.com/hc/en-us/requests"
+
    data = {
-            "utf8": "✓",
+        'utf8': '✓',
-            "authenticity_token": csrf_token,
+        'authenticity_token': csrf_token,
-            "request%5Bticket_form_id%5D": "136048",
+        'request[ticket_form_id]': '136048',
-            "request%5Bcollaborators%5D%5B%5D": "duke.chen@dailymotion.com",
+        'request[collaborators][]': 'duke.chen@dailymotion.com',
-            "request%5Bcustom_fields%5D%5B360008684839%5D": "__dc.copyright_user_protection_-_copyright__",
+        'request[custom_fields][360008684839]': '__dc.copyright_user_protection_-_copyright__',
-            "request%5Bcustom_fields%5D%5B30150188%5D": "copyrightform-notification",
+        'request[custom_fields][30150188]': 'copyrightform-notification',
-            "request%5Bcustom_fields%5D%5B25089567%5D": "legal_entity",
+        'request[custom_fields][25089567]': 'legal_entity',
-            "request%5Bcustom_fields%5D%5B25159868%5D": "Beijing iQIYI Science & Technology Co.,Ltd",
+        'request[custom_fields][25159868]': 'Beijing iQIYI Science & Technology Co.,Ltd',
-            "request%5Bcustom_fields%5D%5B4869133282962%5D": "Legal Department",
+        'request[custom_fields][4869133282962]': 'Legal Department',
-            "request%5Bsubject%5D": "Copyright infringement Notification",
+        'request[subject]': 'Copyright infringement Notification',
-            "request%5Bcustom_fields%5D%5B25613698%5D": v_url,
+        'request[custom_fields][25613698]': 'url',
-            "request%5Bdescription%5D": f"We request that you take immediate actionto stop the infringing activity, take steps to ensure that iQIYI Content is notre-posted on, re-linked to, or otherwise available through your site. Pleaseinform us of the actions you have taken and their results.\r\n1) please help remove these videos\r\n2) The drama series titles are \"{title}\"\r\n",
+        'request[description]': 'We request that you take immediate actionto stop the infringing activity, take steps to ensure that iQIYI Content is notre-posted on, re-linked to, or otherwise available through your site. Pleaseinform us of the actions you have taken and their results.\r\n1) please help remove these videos\r\n2) The drama series titles are 片名\r\n',
-            "request%5Bdescription_mimetype%5D": "text/plain",
+        'request[description_mimetype]': 'text/plain',
-            "request%5Bcustom_fields%5D%5B4769880845586%5D": "on",
+        'request[custom_fields][4769880845586]': [
-            "request%5Bcustom_fields%5D%5B25626417%5D": "on",
+            'off',
-            "request%5Bcustom_fields%5D%5B4769797363346%5D": "on",
+            'on',
-            "request%5Bcustom_fields%5D%5B25159848%5D": "on",
+        ],
-            "request%5Bcustom_fields%5D%5B4769658191250%5D": "on"
+        'request[custom_fields][25626417]': [
            'off',
            'on',
        ],
        'request[custom_fields][4769797363346]': [
            'off',
            'on',
        ],
        'request[custom_fields][25159848]': [
            'off',
            'on',
        ],
        'request[custom_fields][4769658191250]': [
            'off',
            'on',
        ],
    }
-        response = requests.post(url, headers=headers, data=data)
+
-        print(response.status_code)
+    response = requests.post('https://faq.dailymotion.com/hc/en-us/requests', cookies=cookies, headers=headers, data=data)
        print(response.text)
        print(response)
        return response.status_code == 200
    except Exception as e:
        print(f"提交报告失败: {str(e)}")
        raise
 def prepare_data():
    token = get_report_token('token')
    cookies = get_report_token('cookies')
    csrf_token = get_report_token('csrf_token')
    max_update_time = max(d.get('update_time', 0) for d in (token, cookies, csrf_token) if d)
    if max_update_time + (24 * 60 * 60) < time.time():
        token = get_report_token('token')
        access_token = token['token']['access_token']
        refresh_token = token['token']['refresh_token']
        get_cookies(access_token, refresh_token)
        get_cookies1(access_token, refresh_token)
        csrf_token = get_csrftoken()
-if __name__ == '__main__':
+    report(csrf_token['csrf_token']['current_session']['csrf_token'], cookies['cookies'])
    cookies = get_report_token('cookies')['cookies']
    http_client.session.cookies = requests.utils.cookiejar_from_dict(cookies)
    csrf_token = get_csrftoken()['csrf_token']['current_session']['csrf_token']
    report(csrf_token, 'Hunter X Hunter', 'https://www.dailymotion.com/video/x8kjx7v')
--- a/requirements.txt
+++ b/requirements.txt
@ -4,7 +4,8 @@ charset-normalizer==3.4.2
 et-xmlfile==1.1.0
 idna==3.10
 importlib-metadata==6.7.0
-numpy==2.3.0
+lxml==5.4.0
 numpy==1.21.6
 openpyxl==3.1.3
 pandas==1.3.5
 pkg_resources==0.0.0
--- a/screenshots_flask.py
+++ b/screenshots_flask.py
@ -1,35 +0,0 @@
 from flask import Flask, send_file, abort, request, jsonify
 from pathlib import Path
 app = Flask(__name__)
 PROJECT_ROOT = Path(__file__).parent.resolve()
 SCREENSHOTS_DIR = Path("/opt/ql/daily_com/bin/screenshots").resolve()
@app.route('/image/screenshots/<path:filename>')
 def serve_image(filename):
    file_path = SCREENSHOTS_DIR / filename
    # 防止路径越界访问
    try:
        file_path.resolve().relative_to(SCREENSHOTS_DIR.resolve())
    except ValueError:
        abort(403, description=f"禁止访问目录外文件: {file_path.resolve()}")
    if not file_path.exists():
        abort(404, description=f"文件不存在: {file_path.resolve()}")
    return send_file(file_path, as_attachment=False)
 # 自定义 404 错误响应
@app.errorhandler(404)
 def handle_404(e):
    return f"404 错误：{e.description}", 404
 # 自定义 403 错误响应
@app.errorhandler(403)
 def handle_403(e):
    return f"403 错误：{e.description}", 403
 if __name__ == '__main__':
    app.run(host='0.0.0.0', debug=False, port=5000)
--- a/test2.py
+++ b/test2.py
@ -1,6 +0,0 @@
 from DB import DBVidcon
 db = DBVidcon()
 account = db.get_account_info('4')
 print(account)
--- a/update_video_tsstatus.py
+++ b/update_video_tsstatus.py
@ -1,10 +0,0 @@
 from DB import DBVidcon
 from logger import logger
 db = DBVidcon()
 logger.info("开始更新视频举报状态")
 db.update_video_ts_status()
 db.close()
 logger.info("更改视频举报状态完成")