diff --git a/onoe.py b/onoe.py index 7c47966..6566459 100644 --- a/onoe.py +++ b/onoe.py @@ -1,7 +1,7 @@ import json import random import traceback -from urllib.parse import quote,urlparse +from urllib.parse import quote, urlparse import argparse import time import uuid @@ -18,7 +18,6 @@ from logger import logger import os import urllib3 - db = DBVidcon() MACHINE_ID = None MAX_WORKERS = 10 @@ -249,22 +248,22 @@ def gettoken(proxy, r=2): def solve_recaptcha_v3_with_proxy( - proxy_str: str, - keyword: str, - max_task_retries: int = 3, - polling_interval: float = 3, - max_poll_attempts: int = 5 - ) -> str: + proxy_str: str, + keyword: str, + max_task_retries: int = 3, + polling_interval: float = 3, + max_poll_attempts: int = 5 +) -> str: parsed = urlparse(proxy_str) proxy_kwargs = { - "proxyType": parsed.scheme, + "proxyType": parsed.scheme, "proxyAddress": parsed.hostname, - "proxyPort": parsed.port + "proxyPort": parsed.port } if parsed.username and parsed.password: proxy_kwargs.update({ - "proxyLogin": parsed.username, + "proxyLogin": parsed.username, "proxyPassword": parsed.password }) @@ -272,7 +271,7 @@ def solve_recaptcha_v3_with_proxy( result_url = "https://api.capsolver.com/getTaskResult" headers = { "Content-Type": "application/json", - "Accept": "application/json" + "Accept": "application/json" } last_error = None @@ -281,11 +280,11 @@ def solve_recaptcha_v3_with_proxy( payload = { "clientKey": "CAP-A76C932D4C6CCB3CA748F77FDC07D996", "task": { - "type": "ReCaptchaV3Task", - "websiteURL": f"https://www.dailymotion.com/search/{keyword}/top-results", - "websiteKey": "6LeOJBIrAAAAAPMIjyYvo-eN_9W1HDOkrEqHR8tM", - "pageAction": "___grecaptcha_cfg.clients['100000']['L']['L']['promise-callback'](gRecaptchaResponse)", - "minScore": 0.3, + "type": "ReCaptchaV3Task", + "websiteURL": f"https://www.dailymotion.com/search/{keyword}/top-results", + "websiteKey": "6LeOJBIrAAAAAPMIjyYvo-eN_9W1HDOkrEqHR8tM", + "pageAction": "___grecaptcha_cfg.clients['100000']['L']['L']['promise-callback'](gRecaptchaResponse)", + "minScore": 0.3, **proxy_kwargs } } @@ -331,8 +330,8 @@ def get_searchInfo(keyword, level, headers, proxy_name, r=2): recaptchaToken = solve_recaptcha_v3_with_proxy(proxy_str, keyword) data = ( '{"operationName":"SEARCH_QUERY","variables":{"query":"%s","shouldIncludeTopResults":true,"shouldIncludeChannels":false,"shouldIncludePlaylists":false,"shouldIncludeHashtags":false,"shouldIncludeVideos":false,"shouldIncludeLives":false,"page":%d,"limit":%d,"recaptchaToken":"%s"},"query":"fragment VIDEO_BASE_FRAGMENT on Video {\\n id\\n xid\\n title\\n createdAt\\n duration\\n aspectRatio\\n thumbnail(height: PORTRAIT_240) {\\n id\\n url\\n __typename\\n }\\n creator {\\n id\\n xid\\n name\\n displayName\\n accountType\\n avatar(height: SQUARE_60) {\\n id\\n url\\n __typename\\n }\\n __typename\\n }\\n __typename\\n}\\n\\nfragment CHANNEL_BASE_FRAG on Channel {\\n id\\n xid\\n name\\n displayName\\n accountType\\n isFollowed\\n avatar(height: SQUARE_120) {\\n id\\n url\\n __typename\\n }\\n followerEngagement {\\n id\\n followDate\\n __typename\\n }\\n metrics {\\n id\\n engagement {\\n id\\n followers {\\n edges {\\n node {\\n id\\n total\\n __typename\\n }\\n __typename\\n }\\n __typename\\n }\\n __typename\\n }\\n __typename\\n }\\n __typename\\n}\\n\\nfragment PLAYLIST_BASE_FRAG on Collection {\\n id\\n xid\\n name\\n description\\n thumbnail(height: PORTRAIT_240) {\\n id\\n url\\n __typename\\n }\\n creator {\\n id\\n xid\\n name\\n displayName\\n accountType\\n avatar(height: SQUARE_60) {\\n id\\n url\\n __typename\\n }\\n __typename\\n }\\n metrics {\\n id\\n engagement {\\n id\\n videos(filter: {visibility: {eq: PUBLIC}}) {\\n edges {\\n node {\\n id\\n total\\n __typename\\n }\\n __typename\\n }\\n __typename\\n }\\n __typename\\n }\\n __typename\\n }\\n __typename\\n}\\n\\nfragment HASHTAG_BASE_FRAG on Hashtag {\\n id\\n xid\\n name\\n metrics {\\n id\\n engagement {\\n id\\n videos {\\n edges {\\n node {\\n id\\n total\\n __typename\\n }\\n __typename\\n }\\n __typename\\n }\\n __typename\\n }\\n __typename\\n }\\n __typename\\n}\\n\\nfragment LIVE_BASE_FRAGMENT on Live {\\n id\\n xid\\n title\\n audienceCount\\n aspectRatio\\n isOnAir\\n thumbnail(height: PORTRAIT_240) {\\n id\\n url\\n __typename\\n }\\n creator {\\n id\\n xid\\n name\\n displayName\\n accountType\\n avatar(height: SQUARE_60) {\\n id\\n url\\n __typename\\n }\\n __typename\\n }\\n __typename\\n}\\n\\nquery SEARCH_QUERY($query: String!, $shouldIncludeTopResults: Boolean!, $shouldIncludeVideos: Boolean!, $shouldIncludeChannels: Boolean!, $shouldIncludePlaylists: Boolean!, $shouldIncludeHashtags: Boolean!, $shouldIncludeLives: Boolean!, $page: Int, $limit: Int, $sortByVideos: SearchVideoSort, $durationMinVideos: Int, $durationMaxVideos: Int, $createdAfterVideos: DateTime, $recaptchaToken: String) {\\n search(token: $recaptchaToken) {\\n id\\n stories(query: $query, first: $limit, page: $page) @include(if: $shouldIncludeTopResults) {\\n metadata {\\n id\\n algorithm {\\n uuid\\n __typename\\n }\\n __typename\\n }\\n pageInfo {\\n hasNextPage\\n nextPage\\n __typename\\n }\\n edges {\\n node {\\n ...VIDEO_BASE_FRAGMENT\\n ...CHANNEL_BASE_FRAG\\n ...PLAYLIST_BASE_FRAG\\n ...HASHTAG_BASE_FRAG\\n ...LIVE_BASE_FRAGMENT\\n __typename\\n }\\n __typename\\n }\\n __typename\\n }\\n videos(\\n query: $query\\n first: $limit\\n page: $page\\n sort: $sortByVideos\\n durationMin: $durationMinVideos\\n durationMax: $durationMaxVideos\\n createdAfter: $createdAfterVideos\\n ) @include(if: $shouldIncludeVideos) {\\n metadata {\\n id\\n algorithm {\\n uuid\\n __typename\\n }\\n __typename\\n }\\n pageInfo {\\n hasNextPage\\n nextPage\\n __typename\\n }\\n edges {\\n node {\\n id\\n ...VIDEO_BASE_FRAGMENT\\n __typename\\n }\\n __typename\\n }\\n __typename\\n }\\n lives(query: $query, first: $limit, page: $page) @include(if: $shouldIncludeLives) {\\n metadata {\\n id\\n algorithm {\\n uuid\\n __typename\\n }\\n __typename\\n }\\n pageInfo {\\n hasNextPage\\n nextPage\\n __typename\\n }\\n edges {\\n node {\\n id\\n ...LIVE_BASE_FRAGMENT\\n __typename\\n }\\n __typename\\n }\\n __typename\\n }\\n channels(query: $query, first: $limit, page: $page) @include(if: $shouldIncludeChannels) {\\n metadata {\\n id\\n algorithm {\\n uuid\\n __typename\\n }\\n __typename\\n }\\n pageInfo {\\n hasNextPage\\n nextPage\\n __typename\\n }\\n edges {\\n node {\\n id\\n ...CHANNEL_BASE_FRAG\\n __typename\\n }\\n __typename\\n }\\n __typename\\n }\\n playlists: collections(query: $query, first: $limit, page: $page) @include(if: $shouldIncludePlaylists) {\\n metadata {\\n id\\n algorithm {\\n uuid\\n __typename\\n }\\n __typename\\n }\\n pageInfo {\\n hasNextPage\\n nextPage\\n __typename\\n }\\n edges {\\n node {\\n id\\n ...PLAYLIST_BASE_FRAG\\n __typename\\n }\\n __typename\\n }\\n __typename\\n }\\n hashtags(query: $query, first: $limit, page: $page) @include(if: $shouldIncludeHashtags) {\\n metadata {\\n id\\n algorithm {\\n uuid\\n __typename\\n }\\n __typename\\n }\\n pageInfo {\\n hasNextPage\\n nextPage\\n __typename\\n }\\n edges {\\n node {\\n id\\n ...HASHTAG_BASE_FRAG\\n __typename\\n }\\n __typename\\n }\\n __typename\\n }\\n __typename\\n }\\n}\\n"}' % ( - keyword, - j, limit, recaptchaToken)).encode() + keyword, + j, limit, recaptchaToken)).encode() response = post_with_retry( "https://graphql.api.dailymotion.com/", data=data, @@ -424,7 +423,7 @@ def integrate_data_parallel(): while True: global proxiesdict proxiesdict = db.get_proxy_agent_dict() - tasks, flag = db.item_keyword() + tasks, flag = db.item_keyword(1) if not tasks: time.sleep(10) continue @@ -463,13 +462,7 @@ def integrate_data_parallel(): "level": kitem["level"], }) DBSA.flush() - if rollback[0]: - db.rollback_l0(rollback[0]) - if rollback[1]: - db.rollback_l1(rollback[1]) - if rollback[2]: - db.rollback_l2(rollback[2]) - time.sleep(10) + def parse_args() -> argparse.Namespace: global MACHINE_ID, MAX_WORKERS