import requests import pandas as pd from datetime import datetime import time import random import urllib3 from concurrent.futures import ThreadPoolExecutor, as_completed # 禁用 SSL 警告 urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning) class ResumeAPI: def __init__(self): self.base_url = 'https://www.qj050.com/api/v1' self.headers = { 'Accept': '*/*', 'Accept-Encoding': 'gzip, deflate, br', 'Accept-Language': 'zh-CN,zh;q=0.9', 'Authorization': 'Bearer eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9.eyJpZCI6NDgxNTMsInVzZXJuYW1lIjoi55yf6LSkODg4OCIsInB3ZCI6IjFiYmJjNzc5OGRkMTFiNTI2YWQ4ZTVmYTYyNWY5MjVkIiwiaWF0IjoxNzQyODgzNzU3LCJleHAiOjE3NzQ0MTk3NTd9.sLsOLcTnxoB0iWbks7_9IVp9OmDPlo0cKOwL6qHcID8', 'Connection': 'keep-alive', 'Cookie': 'token=eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9.eyJpZCI6NDgxNTMsInVzZXJuYW1lIjoi55yf6LSkODg4OCIsInB3ZCI6IjFiYmJjNzc5OGRkMTFiNTI2YWQ4ZTVmYTYyNWY5MjVkIiwiaWF0IjoxNzQyODgzNzU3LCJleHAiOjE3NzQ0MTk3NTd9.sLsOLcTnxoB0iWbks7_9IVp9OmDPlo0cKOwL6qHcID8;token.sig=SiletSGnwThzp8gd2-IEaawgh0aMNhG8ZduDjcH5syA;x-trace-id=5cbbd6e2d49347e2893925bbf110eb37', 'Host': 'www.qj050.com', 'User-Agent': 'PostmanRuntime-ApipostRuntime/1.1.0', 'x-platform': '1' } self.max_retries = 3 self.retry_delay = 2 def _make_request(self, url, params=None, method='get'): for attempt in range(self.max_retries): try: # 添加随机延迟,避免频繁请求 time.sleep(random.uniform(1, 3)) # 禁用 SSL 验证 response = requests.get(url, headers=self.headers, params=params, verify=False) if method == 'get' else \ requests.post(url, headers=self.headers, json=params, verify=False) response.raise_for_status() return response.json() except requests.exceptions.RequestException as e: if attempt == self.max_retries - 1: print(f"请求失败 ({url}): {str(e)}") raise print(f"重试请求 {attempt + 1}/{self.max_retries}") time.sleep(self.retry_delay * (attempt + 1)) def get_name(self, resume_id, blurred_name): url = f'{self.base_url}/resume/{resume_id}' params = {'_': int(time.time() * 1000)} try: data = self._make_request(url, params) # 后续详细信息可以从这获取 return data.get('data', {}).get('name', '') except: return blurred_name def get_contact_info(self, resume_id): url = f'{self.base_url}/resume/{resume_id}/contact' params = {'_': int(time.time() * 1000)} try: data = self._make_request(url, params) return { 'phone': data.get('data', {}).get('phone', ''), 'real_name': data.get('data', {}).get('real_name', '') } except: return {'phone': '', 'real_name': ''} def fetch_resumes(self, keyword='护工', page_size=10, page_index=1, save_csv=True): url = f'{self.base_url}/resumes' params = { '_': int(time.time() * 1000), 'tab': 'resume', 'keyword': keyword, 't': int(time.time() * 1000), 'info_subarea': '', 'info_category': '', 'pageSize': page_size, 'pageIndex': page_index, 'showStatus': 'true' } try: data = self._make_request(url, params) items = data.get('data', {}).get('items', []) if not items: return [] def process_resume(item): try: resume_id = item.get('id') blurred_name = item.get('name_value', '') contact_info = self.get_contact_info(resume_id) name_value = self.get_name(resume_id, blurred_name) category_names = [c.get('name', '') for c in item.get('infoCateforyArrObj', [])] categories_str = ','.join(category_names) return { 'name_value': blurred_name, 'age': item.get('age', ''), 'edu_value': item.get('edu_value', ''), 'job_instant_value': item.get('job_instant_value', ''), 'job_salary_from': item.get('job_salary_from', ''), 'job_salary_to': item.get('job_salary_to', ''), 'categories': categories_str, 'phone': contact_info['phone'], 'real_name': name_value } except Exception as e: print(f"处理简历失败: {str(e)}") return None resumes = [] with ThreadPoolExecutor(max_workers=10) as executor: future_to_resume = {executor.submit(process_resume, item): item for item in items} for idx, future in enumerate(as_completed(future_to_resume), 1): result = future.result() print(f"已处理: {idx}/{len(items)}") if result: resumes.append(result) # 保存 CSV(可选) if resumes and save_csv: df = pd.DataFrame(resumes) filename = f'resumes_{datetime.now().strftime("%Y%m%d_%H%M%S")}.csv' df.to_csv(filename, index=False, encoding='utf-8-sig') print(f'数据已保存到 {filename}') return resumes except Exception as e: print(f"获取简历数据失败: {str(e)}") return [] if __name__ == '__main__': api = ResumeAPI() api.fetch_resumes()