Crawler/web/yutian_top/get_position.py

59 lines
2.1 KiB
Python

import re
import sys, os
project_root = os.path.abspath(os.path.join(os.path.dirname(__file__), "..", ".."))
if project_root not in sys.path:
sys.path.insert(0, project_root)
from web.Requests_Except import MR
base_url = 'www.yutian.top'
protocol = 'https'
default_headers = {
'accept': 'application/json, text/plain, */*',
'accept-language': 'zh-CN,zh;q=0.9',
'cache-control': 'no-cache',
'content-type': 'application/json;charset=UTF-8',
'origin': 'https://www.yutian.top',
'pragma': 'no-cache',
'priority': 'u=1, i',
'referer': 'https://www.yutian.top/enterprise/resume_store/list',
'sec-ch-ua': '"Chromium";v="134", "Not:A-Brand";v="24", "Google Chrome";v="134"',
'sec-ch-ua-mobile': '?0',
'sec-ch-ua-platform': '"Windows"',
'sec-fetch-dest': 'empty',
'sec-fetch-mode': 'cors',
'sec-fetch-site': 'same-origin',
'user-agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/134.0.0.0 Safari/537.36',
}
Requests = MR(base_url, protocol)
Requests.set_default_headers(default_headers)
def main():
url = "https://zp.yutian.top/position/9076.html"
response = Requests.get(url)
xpathobj = response.xpath()
# infokey = xpathobj.xpath("//span[@class='job-info-item']/span[@class='label']/text()")
# infovalue = xpathobj.xpath("//span[@class='job-info-item']/span[@class='value hide-text']/text()")
# for index, key in enumerate(infokey):
# if '工作性质' in key:
# nature = infovalue[index].strip()
# if "职位类别" in key:
# category = infovalue[index].strip()
# if "工作区域" in key:
# region = infovalue[index].strip()
# if "招聘人数" in key:
# openings = infovalue[index].strip()
# if "工作年限" in key:
# experience = infovalue[index].strip()
# if "学历要求" in key:
# education = infovalue[index].strip()
a = xpathobj.xpath("//div[@class='job-detail']/@data-io-company-id")
print(a)
if __name__ == '__main__':
main()