Crawler/web/yutian_top/get_position.py

import re
import sys, os

project_root = os.path.abspath(os.path.join(os.path.dirname(__file__), "..", ".."))
if project_root not in sys.path:
    sys.path.insert(0, project_root)

from web.Requests_Except import MR

base_url = 'www.yutian.top'
protocol = 'https'
default_headers = {
    'accept': 'application/json, text/plain, */*',
    'accept-language': 'zh-CN,zh;q=0.9',
    'cache-control': 'no-cache',
    'content-type': 'application/json;charset=UTF-8',
    'origin': 'https://www.yutian.top',
    'pragma': 'no-cache',
    'priority': 'u=1, i',
    'referer': 'https://www.yutian.top/enterprise/resume_store/list',
    'sec-ch-ua': '"Chromium";v="134", "Not:A-Brand";v="24", "Google Chrome";v="134"',
    'sec-ch-ua-mobile': '?0',
    'sec-ch-ua-platform': '"Windows"',
    'sec-fetch-dest': 'empty',
    'sec-fetch-mode': 'cors',
    'sec-fetch-site': 'same-origin',
    'user-agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/134.0.0.0 Safari/537.36',
}

Requests = MR(base_url, protocol)
Requests.set_default_headers(default_headers)


def main():
    url = "https://zp.yutian.top/position/9076.html"
    response = Requests.get(url)
    xpathobj = response.xpath()
    # infokey = xpathobj.xpath("//span[@class='job-info-item']/span[@class='label']/text()")
    # infovalue = xpathobj.xpath("//span[@class='job-info-item']/span[@class='value hide-text']/text()")
    # for index, key in enumerate(infokey):
    #     if '工作性质' in key:
    #         nature = infovalue[index].strip()
    #     if "职位类别" in key:
    #         category = infovalue[index].strip()
    #     if "工作区域" in key:
    #         region = infovalue[index].strip()
    #     if "招聘人数" in key:
    #         openings = infovalue[index].strip()
    #     if "工作年限" in key:
    #         experience = infovalue[index].strip()
    #     if "学历要求" in key:
    #         education = infovalue[index].strip()
    a = xpathobj.xpath("//div[@class='job-detail']/@data-io-company-id")
    print(a)


if __name__ == '__main__':
    main()