import re import sys, os project_root = os.path.abspath(os.path.join(os.path.dirname(__file__), "..", "..")) if project_root not in sys.path: sys.path.insert(0, project_root) from web.Requests_Except import MR base_url = 'www.yutian.top' protocol = 'https' default_headers = { 'accept': 'application/json, text/plain, */*', 'accept-language': 'zh-CN,zh;q=0.9', 'cache-control': 'no-cache', 'content-type': 'application/json;charset=UTF-8', 'origin': 'https://www.yutian.top', 'pragma': 'no-cache', 'priority': 'u=1, i', 'referer': 'https://www.yutian.top/enterprise/resume_store/list', 'sec-ch-ua': '"Chromium";v="134", "Not:A-Brand";v="24", "Google Chrome";v="134"', 'sec-ch-ua-mobile': '?0', 'sec-ch-ua-platform': '"Windows"', 'sec-fetch-dest': 'empty', 'sec-fetch-mode': 'cors', 'sec-fetch-site': 'same-origin', 'user-agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/134.0.0.0 Safari/537.36', } Requests = MR(base_url, protocol) Requests.set_default_headers(default_headers) def main(): url = "https://zp.yutian.top/position/9076.html" response = Requests.get(url) xpathobj = response.xpath() # infokey = xpathobj.xpath("//span[@class='job-info-item']/span[@class='label']/text()") # infovalue = xpathobj.xpath("//span[@class='job-info-item']/span[@class='value hide-text']/text()") # for index, key in enumerate(infokey): # if '工作性质' in key: # nature = infovalue[index].strip() # if "职位类别" in key: # category = infovalue[index].strip() # if "工作区域" in key: # region = infovalue[index].strip() # if "招聘人数" in key: # openings = infovalue[index].strip() # if "工作年限" in key: # experience = infovalue[index].strip() # if "学历要求" in key: # education = infovalue[index].strip() a = xpathobj.xpath("//div[@class='job-detail']/@data-io-company-id") print(a) if __name__ == '__main__': main()