feat: 添加视频举报ID和创建时间戳提取功能
This commit is contained in:
parent
72794508fb
commit
03b746aecb
@ -1,6 +1,10 @@
|
|||||||
import time
|
import time
|
||||||
import functools
|
import functools
|
||||||
import os
|
import os
|
||||||
|
import re
|
||||||
|
from datetime import datetime
|
||||||
|
|
||||||
|
import requests
|
||||||
from playwright.sync_api import (
|
from playwright.sync_api import (
|
||||||
sync_playwright,
|
sync_playwright,
|
||||||
TimeoutError as PlaywrightTimeoutError,
|
TimeoutError as PlaywrightTimeoutError,
|
||||||
@ -87,7 +91,7 @@ class DailymotionClient:
|
|||||||
EMAIL = "copyright@qiyi.com"
|
EMAIL = "copyright@qiyi.com"
|
||||||
PASSWORD = "ppsIQIYI2018@"
|
PASSWORD = "ppsIQIYI2018@"
|
||||||
|
|
||||||
def __init__(self, headless: bool = True):
|
def __init__(self, headless: bool = False):
|
||||||
self.email = self.EMAIL
|
self.email = self.EMAIL
|
||||||
self.password = self.PASSWORD
|
self.password = self.PASSWORD
|
||||||
self.headless = headless
|
self.headless = headless
|
||||||
@ -96,7 +100,7 @@ class DailymotionClient:
|
|||||||
self._pw = sync_playwright().start()
|
self._pw = sync_playwright().start()
|
||||||
self.browser: Browser = self._pw.chromium.launch(
|
self.browser: Browser = self._pw.chromium.launch(
|
||||||
headless=self.headless,
|
headless=self.headless,
|
||||||
# proxy={'server':'http://127.0.0.1:7890'}
|
proxy={'server':'http://127.0.0.1:7890'}
|
||||||
)
|
)
|
||||||
self.context = self.browser.new_context(
|
self.context = self.browser.new_context(
|
||||||
user_agent="Mozilla/5.0 (Windows NT 10.0; Win64; x64) "
|
user_agent="Mozilla/5.0 (Windows NT 10.0; Win64; x64) "
|
||||||
@ -242,6 +246,20 @@ class DailymotionClient:
|
|||||||
self.page.get_by_role("button", name="Submit").click()
|
self.page.get_by_role("button", name="Submit").click()
|
||||||
time.sleep(2)
|
time.sleep(2)
|
||||||
file_path = f'screenshots/{str(int(time.time()))}_{title}_{link.split("/")[-1]}.png'
|
file_path = f'screenshots/{str(int(time.time()))}_{title}_{link.split("/")[-1]}.png'
|
||||||
|
locator = self.page.locator("//dt[normalize-space(.)='Id']/following-sibling::dd[1]")
|
||||||
|
raw_text = locator.text_content()
|
||||||
|
match = re.search(r'\d+', raw_text or '')
|
||||||
|
report_id = match.group() if match else None
|
||||||
|
status_raw = self.page.locator("span.status-label").text_content()
|
||||||
|
subsequent_status = status_raw.strip().lower() if status_raw else None
|
||||||
|
time_elem = self.page.locator("dt", has_text="Created").locator("xpath=following-sibling::dd[1]/time")
|
||||||
|
|
||||||
|
datetime_str = time_elem.get_attribute("datetime") # e.g. 2025-06-12T06:15:33+00:00
|
||||||
|
if datetime_str:
|
||||||
|
dt = datetime.fromisoformat(datetime_str.replace("Z", "+00:00")) # 安全处理 ISO 时间
|
||||||
|
timestamp = int(dt.timestamp())
|
||||||
|
else:
|
||||||
|
timestamp = None
|
||||||
self.page.screenshot(path=file_path)
|
self.page.screenshot(path=file_path)
|
||||||
if self.page.url != self.url:
|
if self.page.url != self.url:
|
||||||
self.page.goto(self.url, timeout=30000)
|
self.page.goto(self.url, timeout=30000)
|
||||||
|
Loading…
x
Reference in New Issue
Block a user