diff --git a/TS_resume_spider/spiders/yutian_top_compary.py b/TS_resume_spider/spiders/yutian_top_compary.py index 581d9e5..abff592 100644 --- a/TS_resume_spider/spiders/yutian_top_compary.py +++ b/TS_resume_spider/spiders/yutian_top_compary.py @@ -2,8 +2,7 @@ import scrapy import json import re -from lxml.extensions import xpath_code -from openpyxl.styles.builtins import title +from sympy.benchmarks.bench_meijerint import bench def first_or_empty(xpobj, path): @@ -36,7 +35,7 @@ class YutianTopSpider(scrapy.Spider): def start_requests(self): for i in range(1, 39): yield scrapy.Request( - url=self.start_urls[0], + url=f'https://zp.yutian.top/search?keywords=&page={i}', method='GET', headers=self.headers, dont_filter=True, @@ -88,22 +87,42 @@ class YutianTopSpider(scrapy.Spider): experience = infovalue[index].strip() if "学历要求" in key: education = infovalue[index].strip() - # TODO: 未完继续 - - yield { - "title": title, # 职位名称 - "nature": nature, # 职位性质 - "category": category, # 职位类别 - "region": region, # 职位区域 - "experience": experience, # 工作经历要求 - "education": education, # 学历要求 - "salary": salary, # 职位薪资 - "position_status": position_status, # 职位状态 - "description": description, # 职位描述(详情) - "contact_name": contact_name, # 联系人姓名 - "contact_info": contact_info, # 联系方式 - "benefits": benefits, # 职位福利 - "openings": openings, # 招聘人数 - "website_id": 2, # 网站ID - "company_name": company_name, # 所属企业名称 + salary = first_or_empty(response.xpath, "//div[@class='salary']/text()") + description = first_or_empty(response.xpath, "//div[@class='job-describe']/text()") + contact_name = first_or_empty(response.xpath, + "//div[@class='bg-mask']/div[@class='connect-info-item']/span[@class='value']/text()") + contact_info = first_or_empty(response.xpath, + "(//div[@class='bg-mask']//div[@class='connect-info-item'])[2]/span[@class='value']/text()") + benefits = "" + if openings == "若干": + openings = 1 + company_name = first_or_empty(response.xpath, "//div[@class='company-name']/a/text()") + meta = { + "title": title, # 职位名称 + "nature": nature, # 职位性质 + "category": category, # 职位类别 + "region": region, # 职位区域 + "experience": experience, # 工作经历要求 + "education": education, # 学历要求 + "salary": salary, # 职位薪资 + "position_status": 1, # 职位状态 + "description": description, # 职位描述(详情) + "contact_name": contact_name, # 联系人姓名 + "contact_info": contact_info, # 联系方式 + "benefits": benefits, # 职位福利 + "openings": openings, # 招聘人数 + "website_id": 2, # 网站ID + "company_name": company_name, # 所属企业名称 } + company_id = first_or_empty(response.xpath, "//div[@class='job-detail']/@data-io-company-id") + yield scrapy.Request( + url=f"https://zp.yutian.top/company/{company_id}.html", + headers=self.headers, + callback=self.parse_company, + meta=meta, + dont_filter=True, + ) + + def parse_company(self, response): + pass + # TODO: 解析公司信息