import json import re import requests import logging import time from lxml import etree from types import SimpleNamespace from http.cookies import SimpleCookie logging.basicConfig(level=logging.DEBUG, format='%(asctime)s - %(levelname)s - %(message)s') class ExtendedResponse(requests.Response): def xpath(self): try: tree = etree.HTML(self.text) return tree except Exception as e: raise ValueError("XPath解析错误: " + str(e)) def to_Dict(self): try: data = self.json() return self.dict_to_obj(data) except Exception as e: raise ValueError("JSON转换错误: " + str(e)) def to_Re_findall(self, regex): try: data = self.text return re.findall(regex, data) except Exception as e: raise ValueError("Re搜索错误: " + str(e)) def cookies_dict(self): try: # 获取原有的 cookies 字典 cookie_dict = self.cookies.get_dict() # 如果响应头中有 Set-Cookie,则解析并补充 cookies if 'Set-Cookie' in self.headers: from http.cookies import SimpleCookie sc = SimpleCookie() sc.load(self.headers['Set-Cookie']) for key, morsel in sc.items(): cookie_dict[key] = morsel.value return cookie_dict except Exception as e: raise ValueError("Cookies转换错误: " + str(e)) def save_cookies(self, filepath, format='json'): """ 将当前响应中的cookie信息保存到指定文件中。 参数: filepath (str): 保存文件的路径 format (str): 保存格式,支持 'json'、'pickle' 和 'txt' 三种格式,默认为 'json' """ try: cookie_dict = self.cookies_dict() if format.lower() == 'json': with open(filepath, 'w', encoding='utf-8') as f: json.dump(cookie_dict, f, ensure_ascii=False, indent=4) elif format.lower() == 'pickle': import pickle with open(filepath, 'wb') as f: pickle.dump(cookie_dict, f) elif format.lower() == 'txt': with open(filepath, 'w', encoding='utf-8') as f: for key, value in cookie_dict.items(): f.write(f"{key}: {value}\n") else: raise ValueError("不支持的格式,请选择 'json'、'pickle' 或 'txt'") except Exception as e: raise ValueError("保存cookies出错: " + str(e)) @staticmethod def dict_to_obj(d): if isinstance(d, dict): return SimpleNamespace(**{k: ExtendedResponse.dict_to_obj(v) for k, v in d.items()}) elif isinstance(d, list): return [ExtendedResponse.dict_to_obj(item) for item in d] else: return d class MyRequests: def __init__(self, base_url, protocol='http', retries=3, proxy_options=True, default_timeout=10, default_cookies=None): """ 初始化 MyRequests 对象,自动加载本地 cookies 文件(根据 base_url 生成文件名,如 "www_zhrczp_com_cookies.json")中的 cookies, 如果文件存在,则将其加载到 session 中;否则使用 default_cookies(如果提供)更新 session。 参数: base_url (str): 基础 URL protocol (str): 协议(默认为 'http') retries (int): 请求重试次数 proxy_options (bool): 是否使用代理 default_timeout (int): 默认超时时间 default_cookies (dict): 默认的 cookies 字典 """ self.base_url = base_url.rstrip('/') self.protocol = protocol self.retries = retries self.default_timeout = default_timeout self.session = requests.Session() if proxy_options: self.session.proxies = {"http": "http://127.0.0.1:7890", "https": "http://127.0.0.1:7890"} # 优先使用传入的 default_cookies 更新 session if default_cookies: self.session.cookies.update(default_cookies) # 根据 base_url 生成 cookies 文件名,将 '.' 替换为 '_' self.cookie_file = f"{self.base_url.replace('.', '_')}_cookies.json" # 尝试加载本地已保存的 cookies 文件 try: with open(self.cookie_file, 'r', encoding='utf-8') as f: loaded_cookies = json.load(f) self.session.cookies.update(loaded_cookies) logging.info("成功加载本地 cookies") except FileNotFoundError: logging.info("本地 cookies 文件不存在,将在请求后自动保存") except Exception as e: logging.error("加载本地 cookies 失败:" + str(e)) def _save_cookies(self): """ 将当前 session 中的 cookies 保存到本地文件(基于 base_url 的文件名),以 JSON 格式存储。 """ try: with open(self.cookie_file, 'w', encoding='utf-8') as f: json.dump(self.session.cookies.get_dict(), f, ensure_ascii=False, indent=4) logging.info("cookies 已保存到本地文件:" + self.cookie_file) except Exception as e: logging.error("保存 cookies 文件失败:" + str(e)) def _build_url(self, url): if url.startswith("http://") or url.startswith("https://"): return url return f"{self.protocol}://{self.base_url}/{url.lstrip('/')}" def set_default_headers(self, headers): self.session.headers.update(headers) def set_default_cookies(self, cookies): self.session.cookies.update(cookies) self._save_cookies() def get(self, url, params=None, headers=None, cookies=None, **kwargs): full_url = self._build_url(url) return self._request("GET", full_url, params=params, headers=headers, cookies=cookies, **kwargs) def post(self, url, data=None, json=None, headers=None, cookies=None, **kwargs): full_url = self._build_url(url) return self._request("POST", full_url, data=data, json=json, headers=headers, cookies=cookies, **kwargs) def update(self, url, data=None, json=None, headers=None, cookies=None, **kwargs): full_url = self._build_url(url) return self._request("PUT", full_url, data=data, json=json, headers=headers, cookies=cookies, **kwargs) def delete(self, url, params=None, headers=None, cookies=None, **kwargs): full_url = self._build_url(url) return self._request("DELETE", full_url, params=params, headers=headers, cookies=cookies, **kwargs) def _request(self, method, url, retries=None, autosave=False, **kwargs): if retries is None: retries = self.retries if 'timeout' not in kwargs: kwargs['timeout'] = self.default_timeout try: response = self.session.request(method, url, **kwargs) response.raise_for_status() self.session.cookies.update(response.cookies) if 'Set-Cookie' in response.headers: from http.cookies import SimpleCookie sc = SimpleCookie() sc.load(response.headers['Set-Cookie']) for key, morsel in sc.items(): if morsel.value.lower() != 'deleted': self.session.cookies.set(key, morsel.value) if autosave: self._save_cookies() response.__class__ = ExtendedResponse return response except Exception as e: if retries > 0: logging.warning(f"请求 {method} {url} 失败,剩余重试次数 {retries},错误: {e}") time.sleep(2 ** (self.retries - retries)) return self._request(method, url, retries=retries - 1, autosave=autosave, **kwargs) else: logging.error(f"请求 {method} {url} 重试次数用尽") raise e def get_cookies(self): try: return self.session.cookies.get_dict() except Exception as e: raise ValueError("获取 cookies 失败:" + str(e)) class MR(MyRequests): pass