import requests import logging import time import json from lxml import etree from types import SimpleNamespace logging.basicConfig(level=logging.DEBUG, format='%(asctime)s - %(levelname)s - %(message)s') class ExtendedResponse(requests.Response): def xpath(self, xpath_expr): try: tree = etree.HTML(self.text) return tree.xpath(xpath_expr) except Exception as e: raise ValueError("XPath解析错误: " + str(e)) def to_Dict(self): try: data = self.json() return self.dict_to_obj(data) except Exception as e: raise ValueError("JSON转换错误: " + str(e)) @staticmethod def dict_to_obj(d): if isinstance(d, dict): return SimpleNamespace(**{k: ExtendedResponse.dict_to_obj(v) for k, v in d.items()}) elif isinstance(d, list): return [ExtendedResponse.dict_to_obj(item) for item in d] else: return d class MyRequests: def __init__(self, base_url, protocol='http', retries=3, proxy_options=True, default_timeout=10, default_cookies=None): self.base_url = base_url.rstrip('/') self.protocol = protocol self.retries = retries self.default_timeout = default_timeout self.session = requests.Session() if proxy_options: self.session.proxies = {"http": "http://127.0.0.1:7890", "https": "http://127.0.0.1:7890"} if default_cookies: self.session.cookies.update(default_cookies) def _build_url(self, url): if url.startswith("http://") or url.startswith("https://"): return url return f"{self.protocol}://{self.base_url}/{url.lstrip('/')}" def set_default_headers(self, headers): self.session.headers.update(headers) def set_default_cookies(self, cookies): self.session.cookies.update(cookies) def get(self, url, params=None, headers=None, cookies=None, **kwargs): full_url = self._build_url(url) return self._request("GET", full_url, params=params, headers=headers, cookies=cookies, **kwargs) def post(self, url, data=None, json=None, headers=None, cookies=None, **kwargs): full_url = self._build_url(url) return self._request("POST", full_url, data=data, json=json, headers=headers, cookies=cookies, **kwargs) def update(self, url, data=None, json=None, headers=None, cookies=None, **kwargs): full_url = self._build_url(url) return self._request("PUT", full_url, data=data, json=json, headers=headers, cookies=cookies, **kwargs) def delete(self, url, params=None, headers=None, cookies=None, **kwargs): full_url = self._build_url(url) return self._request("DELETE", full_url, params=params, headers=headers, cookies=cookies, **kwargs) def _request(self, method, url, retries=None, **kwargs): if retries is None: retries = self.retries if 'timeout' not in kwargs: kwargs['timeout'] = self.default_timeout if 'headers' in kwargs and kwargs['headers']: headers = kwargs['headers'] if 'referer' in headers: headers['referer'] = self._build_url(headers['referer']) try: response = self.session.request(method, url, **kwargs) response.raise_for_status() response.__class__ = ExtendedResponse return response except Exception as e: if retries > 0: logging.warning(f"请求 {method} {url} 失败,剩余重试次数 {retries},错误: {e}") time.sleep(2 ** (self.retries - retries)) return self._request(method, url, retries=retries - 1, **kwargs) else: logging.error(f"请求 {method} {url} 重试次数用尽") raise e def close(self): self.session.close() class MR(MyRequests): pass if __name__ == '__main__': req = MyRequests("httpbin.org", protocol="https", retries=3, proxy_options=True, default_timeout=5, default_cookies={"session": "abc"}) req.set_default_headers({"User-Agent": "MyRequests/1.0"}) try: resp = req.get("/get", headers={"referer": "/page"}) logging.info("状态码: %s", resp.status_code) logging.info("JSON: %s", resp.json()) logging.info("XPath: %s", resp.xpath('//title/text()')) obj = resp.to_Dict() logging.info("转换对象: %s", obj.url) except Exception as ex: logging.error("请求失败: %s", ex) finally: req.close()