From 15be08c866581f97f6ca5e97740870961fa151e0 Mon Sep 17 00:00:00 2001 From: Franklin-F Date: Tue, 27 May 2025 23:09:47 +0800 Subject: [PATCH] =?UTF-8?q?=E6=9B=B4=E6=96=B0YutianTopSpider=E7=88=AC?= =?UTF-8?q?=E8=99=AB=E4=BB=A5=E6=8F=90=E5=8F=96=E8=81=8C=E4=BD=8D=E4=BF=A1?= =?UTF-8?q?=E6=81=AF=EF=BC=8C=E5=A2=9E=E5=8A=A0=E5=85=AC=E5=8F=B8ID?= =?UTF-8?q?=E7=9A=84=E8=8E=B7=E5=8F=96=E5=92=8C=E5=85=AC=E5=8F=B8=E4=BF=A1?= =?UTF-8?q?=E6=81=AF=E8=A7=A3=E6=9E=90=E9=80=BB=E8=BE=91?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../spiders/yutian_top_compary.py | 61 ++++++++++++------- 1 file changed, 40 insertions(+), 21 deletions(-) diff --git a/TS_resume_spider/spiders/yutian_top_compary.py b/TS_resume_spider/spiders/yutian_top_compary.py index 581d9e5..abff592 100644 --- a/TS_resume_spider/spiders/yutian_top_compary.py +++ b/TS_resume_spider/spiders/yutian_top_compary.py @@ -2,8 +2,7 @@ import scrapy import json import re -from lxml.extensions import xpath_code -from openpyxl.styles.builtins import title +from sympy.benchmarks.bench_meijerint import bench def first_or_empty(xpobj, path): @@ -36,7 +35,7 @@ class YutianTopSpider(scrapy.Spider): def start_requests(self): for i in range(1, 39): yield scrapy.Request( - url=self.start_urls[0], + url=f'https://zp.yutian.top/search?keywords=&page={i}', method='GET', headers=self.headers, dont_filter=True, @@ -88,22 +87,42 @@ class YutianTopSpider(scrapy.Spider): experience = infovalue[index].strip() if "学历要求" in key: education = infovalue[index].strip() - # TODO: 未完继续 - - yield { - "title": title, # 职位名称 - "nature": nature, # 职位性质 - "category": category, # 职位类别 - "region": region, # 职位区域 - "experience": experience, # 工作经历要求 - "education": education, # 学历要求 - "salary": salary, # 职位薪资 - "position_status": position_status, # 职位状态 - "description": description, # 职位描述(详情) - "contact_name": contact_name, # 联系人姓名 - "contact_info": contact_info, # 联系方式 - "benefits": benefits, # 职位福利 - "openings": openings, # 招聘人数 - "website_id": 2, # 网站ID - "company_name": company_name, # 所属企业名称 + salary = first_or_empty(response.xpath, "//div[@class='salary']/text()") + description = first_or_empty(response.xpath, "//div[@class='job-describe']/text()") + contact_name = first_or_empty(response.xpath, + "//div[@class='bg-mask']/div[@class='connect-info-item']/span[@class='value']/text()") + contact_info = first_or_empty(response.xpath, + "(//div[@class='bg-mask']//div[@class='connect-info-item'])[2]/span[@class='value']/text()") + benefits = "" + if openings == "若干": + openings = 1 + company_name = first_or_empty(response.xpath, "//div[@class='company-name']/a/text()") + meta = { + "title": title, # 职位名称 + "nature": nature, # 职位性质 + "category": category, # 职位类别 + "region": region, # 职位区域 + "experience": experience, # 工作经历要求 + "education": education, # 学历要求 + "salary": salary, # 职位薪资 + "position_status": 1, # 职位状态 + "description": description, # 职位描述(详情) + "contact_name": contact_name, # 联系人姓名 + "contact_info": contact_info, # 联系方式 + "benefits": benefits, # 职位福利 + "openings": openings, # 招聘人数 + "website_id": 2, # 网站ID + "company_name": company_name, # 所属企业名称 } + company_id = first_or_empty(response.xpath, "//div[@class='job-detail']/@data-io-company-id") + yield scrapy.Request( + url=f"https://zp.yutian.top/company/{company_id}.html", + headers=self.headers, + callback=self.parse_company, + meta=meta, + dont_filter=True, + ) + + def parse_company(self, response): + pass + # TODO: 解析公司信息