feat: 添加视频信息处理和时间格式化功能

This commit is contained in:
晓丰 2025-07-17 14:21:15 +08:00
parent e9ef87fe62
commit fe96e23cc2

114
oneget.py
View File

@ -1,3 +1,6 @@
import base64
from datetime import datetime
import requests
import uuid
import random
@ -7,11 +10,39 @@ from threading import Lock
import logging
from DB import DBVidcon
import json
from requests.adapters import HTTPAdapter
from urllib3.util.retry import Retry
MACHINE_ID = 3
logger = logging.getLogger(__name__)
db = DBVidcon()
proxiesdict = db.get_proxy_agent_dict()
def clean_dash_to_zero(val):
if val in ('-', '', None):
return 0
try:
return int(val)
except (ValueError, TypeError) as e:
logger.exception(f"[字段异常] val = {val}{str(e)}")
return 0
def format_create_time(timestr):
try:
dt = date_parser.isoparse(timestr)
return dt.strftime("%Y-%m-%d %H:%M:%S")
except Exception as e:
logger.exception(f"[时间格式错误] {timestr}{str(e)}")
return "1970-01-01 00:00:00"
def format_duration(seconds):
try:
seconds = int(seconds)
return f"{seconds // 60:02}:{seconds % 60:02}"
except Exception:
return "00:00"
class DMHeaderManager:
_headers_template = {
@ -124,6 +155,63 @@ class DMHeaderManager:
return new_headers
class DMVideoInfo:
def __init__(self, proxies: dict = None, max_retries: int = 3, backoff_factor: float = 0.5):
self.proxies = proxies
self.max_retries = max_retries
self.backoff_factor = backoff_factor
self.session = self._create_session()
def _create_session(self):
session = requests.Session()
retry = Retry(
total=self.max_retries,
connect=self.max_retries,
read=self.max_retries,
backoff_factor=self.backoff_factor,
status_forcelist=[500, 502, 503, 504],
allowed_methods=["GET"]
)
adapter = HTTPAdapter(max_retries=retry)
session.mount("http://", adapter)
session.mount("https://", adapter)
if self.proxies:
session.proxies.update(self.proxies)
return session
def get_video_info(self, data: dict) -> dict:
v_xid = data.get('v_xid')
url = f'https://api.dailymotion.com/video/{v_xid}'
params = {
'fields': 'id,title,created_time,thumbnail_240_url,duration,'
'owner.id,owner.screenname,likes_total,views_total,'
'owner.avatar_60_url,owner.followers_total,owner.videos_total'
}
try:
resp = self.session.get(url, params=params, timeout=10)
resp.raise_for_status()
r_data = resp.json()
xid = r_data["id"]
vid = base64.b64encode(f"Video:{xid}".encode('utf-8')).decode('utf-8')
uxid = r_data["owner.id"]
uid = base64.b64encode(f"Channel:{uxid}".encode('utf-8')).decode('utf-8')
data["v_id"] = vid
data["v_title"] = r_data["title"]
data["link"] = "https://www.dailymotion.com/video/" + xid,
data["duration"] = r_data["duration"]
data['createdtime'] = datetime.fromtimestamp(r_data.get("created_time")).strftime("%Y-%m-%d %H:%M:%S"),
data['']
except requests.RequestException as e:
print(f"[ERROR] 请求失败 vxid={v_xid} : {e}")
return None
def main():
kwdata = db.get_web_items()
if not kwdata:
logger.error("没有获取到关键词数据")
@ -138,12 +226,11 @@ proxies = {
'https': proxies_str
}
kw = kwdata['keyword']
print(kw)
print("=" * 30)
dmheader_manager = DMHeaderManager(proxies=proxies)
headers = dmheader_manager.get_headers()
for i in range(1, 11):
data = {
"operationName": "SEARCH_QUERY",
"variables": {
@ -154,7 +241,7 @@ data = {
"shouldIncludeHashtags": False, # 是否包含标签
"shouldIncludeVideos": False, # 是否包含视频
"shouldIncludeLives": False, # 是否包含直播
"page": 1,
"page": i,
"limit": 20,
"recaptchaToken": None
},
@ -518,6 +605,21 @@ response = requests.post('https://graphql.api.dailymotion.com/', headers=headers
data = response.json()
edges = data['data']['search']['stories']['edges']
edges_len = len(edges)
dm_video_info = DMVideoInfo(proxies=proxies)
tancks = []
for j, edge in enumerate(edges):
node = edge.get("node", {})
tancks.append({
"keyword": kw,
"v_name": kwdata.get("v_name", ""),
"v_xid": node.get("xid"),
"batch": kwdata.get("batch"),
"rn": kwdata.get("rn"),
"machine_id": MACHINE_ID,
"index": (i - 1) * 20 + j + 1,
"level": 0,
})
for i, edge in enumerate(edges):
print(i, edge['node']['xid'])
if edges_len < 20:
break