201 lines
8.1 KiB
Python
201 lines
8.1 KiB
Python
import json
|
||
import re
|
||
import requests
|
||
import logging
|
||
import time
|
||
from lxml import etree
|
||
from types import SimpleNamespace
|
||
|
||
logging.basicConfig(level=logging.DEBUG, format='%(asctime)s - %(levelname)s - %(message)s')
|
||
|
||
|
||
class ExtendedResponse(requests.Response):
|
||
def xpath(self):
|
||
try:
|
||
tree = etree.HTML(self.text)
|
||
return tree
|
||
except Exception as e:
|
||
raise ValueError("XPath解析错误: " + str(e))
|
||
|
||
def to_Dict(self):
|
||
try:
|
||
data = self.json()
|
||
return self.dict_to_obj(data)
|
||
except Exception as e:
|
||
raise ValueError("JSON转换错误: " + str(e))
|
||
|
||
def to_Re_findall(self, regex):
|
||
try:
|
||
data = self.text
|
||
return re.findall(regex, data)
|
||
except Exception as e:
|
||
raise ValueError("Re搜索错误: " + str(e))
|
||
|
||
def cookies_dict(self):
|
||
try:
|
||
# 获取原有的 cookies 字典
|
||
cookie_dict = self.cookies.get_dict()
|
||
# 如果响应头中有 Set-Cookie,则解析并补充 cookies
|
||
if 'Set-Cookie' in self.headers:
|
||
from http.cookies import SimpleCookie
|
||
sc = SimpleCookie()
|
||
sc.load(self.headers['Set-Cookie'])
|
||
for key, morsel in sc.items():
|
||
cookie_dict[key] = morsel.value
|
||
return cookie_dict
|
||
except Exception as e:
|
||
raise ValueError("Cookies转换错误: " + str(e))
|
||
|
||
def save_cookies(self, filepath, format='json'):
|
||
"""
|
||
将当前响应中的cookie信息保存到指定文件中。
|
||
|
||
参数:
|
||
filepath (str): 保存文件的路径
|
||
format (str): 保存格式,支持 'json'、'pickle' 和 'txt' 三种格式,默认为 'json'
|
||
"""
|
||
try:
|
||
cookie_dict = self.cookies_dict()
|
||
if format.lower() == 'json':
|
||
with open(filepath, 'w', encoding='utf-8') as f:
|
||
json.dump(cookie_dict, f, ensure_ascii=False, indent=4)
|
||
elif format.lower() == 'pickle':
|
||
import pickle
|
||
with open(filepath, 'wb') as f:
|
||
pickle.dump(cookie_dict, f)
|
||
elif format.lower() == 'txt':
|
||
with open(filepath, 'w', encoding='utf-8') as f:
|
||
for key, value in cookie_dict.items():
|
||
f.write(f"{key}: {value}\n")
|
||
else:
|
||
raise ValueError("不支持的格式,请选择 'json'、'pickle' 或 'txt'")
|
||
except Exception as e:
|
||
raise ValueError("保存cookies出错: " + str(e))
|
||
|
||
@staticmethod
|
||
def dict_to_obj(d):
|
||
if isinstance(d, dict):
|
||
return SimpleNamespace(**{k: ExtendedResponse.dict_to_obj(v) for k, v in d.items()})
|
||
elif isinstance(d, list):
|
||
return [ExtendedResponse.dict_to_obj(item) for item in d]
|
||
else:
|
||
return d
|
||
|
||
|
||
class MyRequests:
|
||
def __init__(self, base_url, protocol='http', retries=3, proxy_options=True, default_timeout=10,
|
||
default_cookies=None):
|
||
"""
|
||
初始化 MyRequests 对象,自动加载本地 cookies 文件(根据 base_url 生成文件名,如 "www_zhrczp_com_cookies.json")中的 cookies,
|
||
如果文件存在,则将其加载到 session 中;否则使用 default_cookies(如果提供)更新 session。
|
||
|
||
参数:
|
||
base_url (str): 基础 URL
|
||
protocol (str): 协议(默认为 'http')
|
||
retries (int): 请求重试次数
|
||
proxy_options (bool): 是否使用代理
|
||
default_timeout (int): 默认超时时间
|
||
default_cookies (dict): 默认的 cookies 字典
|
||
"""
|
||
self.base_url = base_url.rstrip('/')
|
||
self.protocol = protocol
|
||
self.retries = retries
|
||
self.default_timeout = default_timeout
|
||
self.session = requests.Session()
|
||
|
||
if proxy_options:
|
||
self.session.proxies = {"http": "http://127.0.0.1:7890", "https": "http://127.0.0.1:7890"}
|
||
|
||
# 优先使用传入的 default_cookies 更新 session
|
||
if default_cookies:
|
||
self.session.cookies.update(default_cookies)
|
||
|
||
# 根据 base_url 生成 cookies 文件名,将 '.' 替换为 '_'
|
||
self.cookie_file = f"{self.base_url.replace('.', '_')}_cookies.json"
|
||
# 尝试加载本地已保存的 cookies 文件
|
||
try:
|
||
with open(self.cookie_file, 'r', encoding='utf-8') as f:
|
||
loaded_cookies = json.load(f)
|
||
self.session.cookies.update(loaded_cookies)
|
||
logging.info("成功加载本地 cookies")
|
||
except FileNotFoundError:
|
||
logging.info("本地 cookies 文件不存在,将在请求后自动保存")
|
||
except Exception as e:
|
||
logging.error("加载本地 cookies 失败:" + str(e))
|
||
|
||
def _save_cookies(self):
|
||
"""
|
||
将当前 session 中的 cookies 保存到本地文件(基于 base_url 的文件名),以 JSON 格式存储。
|
||
"""
|
||
try:
|
||
with open(self.cookie_file, 'w', encoding='utf-8') as f:
|
||
json.dump(self.session.cookies.get_dict(), f, ensure_ascii=False, indent=4)
|
||
logging.info("cookies 已保存到本地文件:" + self.cookie_file)
|
||
except Exception as e:
|
||
logging.error("保存 cookies 文件失败:" + str(e))
|
||
|
||
def _build_url(self, url):
|
||
if url.startswith("http://") or url.startswith("https://"):
|
||
return url
|
||
return f"{self.protocol}://{self.base_url}/{url.lstrip('/')}"
|
||
|
||
def set_default_headers(self, headers):
|
||
self.session.headers.update(headers)
|
||
|
||
def set_default_cookies(self, cookies):
|
||
self.session.cookies.update(cookies)
|
||
self._save_cookies()
|
||
|
||
def get(self, url, params=None, headers=None, cookies=None, **kwargs):
|
||
full_url = self._build_url(url)
|
||
return self._request("GET", full_url, params=params, headers=headers, cookies=cookies, **kwargs)
|
||
|
||
def post(self, url, data=None, json=None, headers=None, cookies=None, **kwargs):
|
||
full_url = self._build_url(url)
|
||
return self._request("POST", full_url, data=data, json=json, headers=headers, cookies=cookies, **kwargs)
|
||
|
||
def update(self, url, data=None, json=None, headers=None, cookies=None, **kwargs):
|
||
full_url = self._build_url(url)
|
||
return self._request("PUT", full_url, data=data, json=json, headers=headers, cookies=cookies, **kwargs)
|
||
|
||
def delete(self, url, params=None, headers=None, cookies=None, **kwargs):
|
||
full_url = self._build_url(url)
|
||
return self._request("DELETE", full_url, params=params, headers=headers, cookies=cookies, **kwargs)
|
||
|
||
def _request(self, method, url, retries=None, **kwargs):
|
||
if retries is None:
|
||
retries = self.retries
|
||
if 'timeout' not in kwargs:
|
||
kwargs['timeout'] = self.default_timeout
|
||
if 'headers' in kwargs and kwargs['headers']:
|
||
headers = kwargs['headers']
|
||
if 'referer' in headers:
|
||
headers['referer'] = self._build_url(headers['referer'])
|
||
try:
|
||
response = self.session.request(method, url, **kwargs)
|
||
response.raise_for_status()
|
||
# 更新 session 中的 cookies
|
||
self.session.cookies.update(response.cookies)
|
||
# 保存更新后的 cookies 到本地文件
|
||
self._save_cookies()
|
||
# 将 response 转换为扩展后的响应类
|
||
response.__class__ = ExtendedResponse
|
||
return response
|
||
except Exception as e:
|
||
if retries > 0:
|
||
logging.warning(f"请求 {method} {url} 失败,剩余重试次数 {retries},错误: {e}")
|
||
time.sleep(2 ** (self.retries - retries))
|
||
return self._request(method, url, retries=retries - 1, **kwargs)
|
||
else:
|
||
logging.error(f"请求 {method} {url} 重试次数用尽")
|
||
raise e
|
||
|
||
def get_cookies(self):
|
||
try:
|
||
return self.session.cookies.get_dict()
|
||
except Exception as e:
|
||
raise ValueError("获取 cookies 失败:" + str(e))
|
||
|
||
|
||
class MR(MyRequests):
|
||
pass |