feat: 添加视频举报ID和创建时间戳提取功能

This commit is contained in:
晓丰 2025-06-12 20:10:12 +08:00
parent 72794508fb
commit 03b746aecb

View File

@ -1,6 +1,10 @@
import time
import functools
import os
import re
from datetime import datetime
import requests
from playwright.sync_api import (
sync_playwright,
TimeoutError as PlaywrightTimeoutError,
@ -87,7 +91,7 @@ class DailymotionClient:
EMAIL = "copyright@qiyi.com"
PASSWORD = "ppsIQIYI2018@"
def __init__(self, headless: bool = True):
def __init__(self, headless: bool = False):
self.email = self.EMAIL
self.password = self.PASSWORD
self.headless = headless
@ -96,7 +100,7 @@ class DailymotionClient:
self._pw = sync_playwright().start()
self.browser: Browser = self._pw.chromium.launch(
headless=self.headless,
# proxy={'server':'http://127.0.0.1:7890'}
proxy={'server':'http://127.0.0.1:7890'}
)
self.context = self.browser.new_context(
user_agent="Mozilla/5.0 (Windows NT 10.0; Win64; x64) "
@ -242,6 +246,20 @@ class DailymotionClient:
self.page.get_by_role("button", name="Submit").click()
time.sleep(2)
file_path = f'screenshots/{str(int(time.time()))}_{title}_{link.split("/")[-1]}.png'
locator = self.page.locator("//dt[normalize-space(.)='Id']/following-sibling::dd[1]")
raw_text = locator.text_content()
match = re.search(r'\d+', raw_text or '')
report_id = match.group() if match else None
status_raw = self.page.locator("span.status-label").text_content()
subsequent_status = status_raw.strip().lower() if status_raw else None
time_elem = self.page.locator("dt", has_text="Created").locator("xpath=following-sibling::dd[1]/time")
datetime_str = time_elem.get_attribute("datetime") # e.g. 2025-06-12T06:15:33+00:00
if datetime_str:
dt = datetime.fromisoformat(datetime_str.replace("Z", "+00:00")) # 安全处理 ISO 时间
timestamp = int(dt.timestamp())
else:
timestamp = None
self.page.screenshot(path=file_path)
if self.page.url != self.url:
self.page.goto(self.url, timeout=30000)