diff --git a/main.py b/main.py index afb313c..3ee4209 100644 --- a/main.py +++ b/main.py @@ -1,3 +1,4 @@ +import base64 import json import random import traceback @@ -252,38 +253,30 @@ def get_searchInfo(keyword, level, headers, proxy_name, r=2): if r == 2: logger.info(f"NET处理->{keyword},\trn->{proxy_name},\tlevel->{level}") video_list = [] - max_page = 4 - limit = 10 + max_page = 2 + limit = 30 + endpoint = 'https://api.dailymotion.com/videos' if level == 0 or level == 1: - max_page = 10 - limit = 20 + max_page = 3 + limit = 100 for j in range(1, max_page): - # 别展开 = = ! - data = ( - '{"operationName":"SEARCH_QUERY","variables":{"query":"%s","shouldIncludeTopResults":true,"shouldIncludeChannels":false,"shouldIncludePlaylists":false,"shouldIncludeHashtags":false,"shouldIncludeVideos":false,"shouldIncludeLives":false,"page":%d,"limit":%d,"recaptchaToken":null},"query":"fragment VIDEO_BASE_FRAGMENT on Video {\\n id\\n xid\\n title\\n createdAt\\n duration\\n aspectRatio\\n thumbnail(height: PORTRAIT_240) {\\n id\\n url\\n __typename\\n }\\n creator {\\n id\\n xid\\n name\\n displayName\\n accountType\\n avatar(height: SQUARE_60) {\\n id\\n url\\n __typename\\n }\\n __typename\\n }\\n __typename\\n}\\n\\nfragment CHANNEL_BASE_FRAG on Channel {\\n id\\n xid\\n name\\n displayName\\n accountType\\n isFollowed\\n avatar(height: SQUARE_120) {\\n id\\n url\\n __typename\\n }\\n followerEngagement {\\n id\\n followDate\\n __typename\\n }\\n metrics {\\n id\\n engagement {\\n id\\n followers {\\n edges {\\n node {\\n id\\n total\\n __typename\\n }\\n __typename\\n }\\n __typename\\n }\\n __typename\\n }\\n __typename\\n }\\n __typename\\n}\\n\\nfragment PLAYLIST_BASE_FRAG on Collection {\\n id\\n xid\\n name\\n description\\n thumbnail(height: PORTRAIT_240) {\\n id\\n url\\n __typename\\n }\\n creator {\\n id\\n xid\\n name\\n displayName\\n accountType\\n avatar(height: SQUARE_60) {\\n id\\n url\\n __typename\\n }\\n __typename\\n }\\n metrics {\\n id\\n engagement {\\n id\\n videos(filter: {visibility: {eq: PUBLIC}}) {\\n edges {\\n node {\\n id\\n total\\n __typename\\n }\\n __typename\\n }\\n __typename\\n }\\n __typename\\n }\\n __typename\\n }\\n __typename\\n}\\n\\nfragment HASHTAG_BASE_FRAG on Hashtag {\\n id\\n xid\\n name\\n metrics {\\n id\\n engagement {\\n id\\n videos {\\n edges {\\n node {\\n id\\n total\\n __typename\\n }\\n __typename\\n }\\n __typename\\n }\\n __typename\\n }\\n __typename\\n }\\n __typename\\n}\\n\\nfragment LIVE_BASE_FRAGMENT on Live {\\n id\\n xid\\n title\\n audienceCount\\n aspectRatio\\n isOnAir\\n thumbnail(height: PORTRAIT_240) {\\n id\\n url\\n __typename\\n }\\n creator {\\n id\\n xid\\n name\\n displayName\\n accountType\\n avatar(height: SQUARE_60) {\\n id\\n url\\n __typename\\n }\\n __typename\\n }\\n __typename\\n}\\n\\nquery SEARCH_QUERY($query: String!, $shouldIncludeTopResults: Boolean!, $shouldIncludeVideos: Boolean!, $shouldIncludeChannels: Boolean!, $shouldIncludePlaylists: Boolean!, $shouldIncludeHashtags: Boolean!, $shouldIncludeLives: Boolean!, $page: Int, $limit: Int, $sortByVideos: SearchVideoSort, $durationMinVideos: Int, $durationMaxVideos: Int, $createdAfterVideos: DateTime, $recaptchaToken: String) {\\n search(token: $recaptchaToken) {\\n id\\n stories(query: $query, first: $limit, page: $page) @include(if: $shouldIncludeTopResults) {\\n metadata {\\n id\\n algorithm {\\n uuid\\n __typename\\n }\\n __typename\\n }\\n pageInfo {\\n hasNextPage\\n nextPage\\n __typename\\n }\\n edges {\\n node {\\n ...VIDEO_BASE_FRAGMENT\\n ...CHANNEL_BASE_FRAG\\n ...PLAYLIST_BASE_FRAG\\n ...HASHTAG_BASE_FRAG\\n ...LIVE_BASE_FRAGMENT\\n __typename\\n }\\n __typename\\n }\\n __typename\\n }\\n videos(\\n query: $query\\n first: $limit\\n page: $page\\n sort: $sortByVideos\\n durationMin: $durationMinVideos\\n durationMax: $durationMaxVideos\\n createdAfter: $createdAfterVideos\\n ) @include(if: $shouldIncludeVideos) {\\n metadata {\\n id\\n algorithm {\\n uuid\\n __typename\\n }\\n __typename\\n }\\n pageInfo {\\n hasNextPage\\n nextPage\\n __typename\\n }\\n edges {\\n node {\\n id\\n ...VIDEO_BASE_FRAGMENT\\n __typename\\n }\\n __typename\\n }\\n __typename\\n }\\n lives(query: $query, first: $limit, page: $page) @include(if: $shouldIncludeLives) {\\n metadata {\\n id\\n algorithm {\\n uuid\\n __typename\\n }\\n __typename\\n }\\n pageInfo {\\n hasNextPage\\n nextPage\\n __typename\\n }\\n edges {\\n node {\\n id\\n ...LIVE_BASE_FRAGMENT\\n __typename\\n }\\n __typename\\n }\\n __typename\\n }\\n channels(query: $query, first: $limit, page: $page) @include(if: $shouldIncludeChannels) {\\n metadata {\\n id\\n algorithm {\\n uuid\\n __typename\\n }\\n __typename\\n }\\n pageInfo {\\n hasNextPage\\n nextPage\\n __typename\\n }\\n edges {\\n node {\\n id\\n ...CHANNEL_BASE_FRAG\\n __typename\\n }\\n __typename\\n }\\n __typename\\n }\\n playlists: collections(query: $query, first: $limit, page: $page) @include(if: $shouldIncludePlaylists) {\\n metadata {\\n id\\n algorithm {\\n uuid\\n __typename\\n }\\n __typename\\n }\\n pageInfo {\\n hasNextPage\\n nextPage\\n __typename\\n }\\n edges {\\n node {\\n id\\n ...PLAYLIST_BASE_FRAG\\n __typename\\n }\\n __typename\\n }\\n __typename\\n }\\n hashtags(query: $query, first: $limit, page: $page) @include(if: $shouldIncludeHashtags) {\\n metadata {\\n id\\n algorithm {\\n uuid\\n __typename\\n }\\n __typename\\n }\\n pageInfo {\\n hasNextPage\\n nextPage\\n __typename\\n }\\n edges {\\n node {\\n id\\n ...HASHTAG_BASE_FRAG\\n __typename\\n }\\n __typename\\n }\\n __typename\\n }\\n __typename\\n }\\n}\\n"}' % ( - keyword, - j, limit)).encode() - response = post_with_retry( - "https://graphql.api.dailymotion.com/", - data=data, - headers=headers, - proxy_name=proxy_name - ) - if response is None: - return None + params = { + 'search': keyword, + 'fields': 'id,title,created_time,thumbnail_240_url,duration,owner.id,owner.screenname,likes_total,views_total,owner.avatar_60_url,owner.followers_total,owner.videos_total', + 'limit': limit, + 'page': j, + 'sort': "relevance" + } + proxy_string = db.get_proxy(proxy_name) + logger.info(f"代理: {proxy_string}") + proxies = { + 'http':proxy_string, + 'https':proxy_string, + } + response = requests.get(endpoint, params=params, proxies=proxies) jsondata = response.json() try: - errors = jsondata.get("errors") # GraphQL errors 数组 - stories = jsondata.get("data", {}).get("search", {}).get("stories") - - if errors or stories is None: # 有错误 或 stories 为 null - if r == 0: - logger.info("连续 3 次错误或空结果:", json.dumps(jsondata, ensure_ascii=False)) - return None - time.sleep((3 - r) * 5) - return get_searchInfo(keyword, level, headers, proxy_name, r - 1) - resinfo = stories["edges"] - logger.info(f"resinfo: {len(resinfo)}") + resinfo=jsondata.get("list") except Exception: if r < 0: logger.exception("[搜索接口] 未知:未处理", response.text) @@ -294,29 +287,29 @@ def get_searchInfo(keyword, level, headers, proxy_name, r=2): return get_searchInfo(keyword, level, headers, proxy_name, r - 1) for index, iteminfo in enumerate(resinfo): calculated_index = index + 1 + (j - 1) * limit - node = iteminfo['node'] - if node['__typename'] != "Video": - continue - creator = node['creator'] - duration = node.get('duration') + xid = iteminfo["id"] + vid = base64.b64encode(f"Video:{xid}".encode('utf-8')).decode('utf-8') + uxid = iteminfo["owner"] + uid = base64.b64encode(f"Channel:{uxid}".encode('utf-8')).decode('utf-8') + duration = iteminfo.get('duration') if duration <= 300: continue v_data = { "index": calculated_index, - "v_id": node.get("id"), - "v_xid": node.get('xid'), - "link": "https://www.dailymotion.com/video/" + node.get('xid'), - "title": node.get("title"), - "createtime": node.get("createdAt"), - "duration": node.get("duration"), - "pic": node.get("thumbnail", {}).get("url"), - "view": 0, - "fans": 0, - "videos": 0, - "u_id": creator.get('id'), - "u_xid": creator.get('xid'), - "u_name": creator.get('name'), - "u_pic": node.get('thumbnail').get('url') + "v_id": vid, + "v_xid": xid, + "link": "https://www.dailymotion.com/video/" + xid, + "title": iteminfo.get("title"), + "createtime": iteminfo.get('created_time'), + "duration": iteminfo.get('duration'), + "pic": iteminfo.get('thumbnail_240_url'), + "view": iteminfo.get('views_total'), + "fans": iteminfo.get('owner.followers_total'), + "videos": iteminfo.get('owner.videos_total'), + "u_id": uid, + "u_xid": uxid, + "u_name": iteminfo.get('owner.screenname'), + "u_pic": iteminfo.get('owner.avatar_60_url') } video_list.append(v_data) time.sleep(1)