From dc80fb6c72b88bac924e01e0509de0ebcb18c94c Mon Sep 17 00:00:00 2001 From: Franklin-F Date: Mon, 26 May 2025 22:10:45 +0800 Subject: [PATCH] =?UTF-8?q?=E6=9B=B4=E6=96=B0=E5=85=AC=E5=8F=B8=E4=BB=8B?= =?UTF-8?q?=E7=BB=8D=E6=8F=90=E5=8F=96=E9=80=BB=E8=BE=91=E4=BB=A5=E6=94=AF?= =?UTF-8?q?=E6=8C=81=E8=81=8C=E4=BD=8D=E4=BF=A1=E6=81=AF=E7=9A=84=E5=88=A4?= =?UTF-8?q?=E6=96=AD=EF=BC=9B=E8=B0=83=E6=95=B4=E7=88=AC=E8=99=AB=E8=AF=B7?= =?UTF-8?q?=E6=B1=82=E9=A1=B5=E6=95=B0=E8=87=B396?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- TS_resume_spider/spiders/zhrczp_com_compary.py | 4 +++- TS_resume_spider/spiders/zhrczp_com_position.py | 2 +- 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/TS_resume_spider/spiders/zhrczp_com_compary.py b/TS_resume_spider/spiders/zhrczp_com_compary.py index 8c31bfa..2b6ffcd 100644 --- a/TS_resume_spider/spiders/zhrczp_com_compary.py +++ b/TS_resume_spider/spiders/zhrczp_com_compary.py @@ -15,11 +15,13 @@ def extract_company_data(xpathobj): intro_list = [t.strip() for t in xpathobj.xpath('//div[@class="company_img_auto"]/text()') if t.strip()] if not intro_list: intro_list = [t.strip() for t in xpathobj.xpath('//div[@class="company_img_auto"]/p/span/text()') if t.strip()] + # 判断一下有无职位 + job_list = xpathobj.xpath('//div[@class="comshow_job"]/div[@class="firm_post"]') introduction = "\r\n".join(intro_list) # 如果没有名称或介绍,直接忽略 - if not (name and introduction): + if not (name and (introduction or job_list)): return None # 公司详情信息 diff --git a/TS_resume_spider/spiders/zhrczp_com_position.py b/TS_resume_spider/spiders/zhrczp_com_position.py index 561c518..23f73ed 100644 --- a/TS_resume_spider/spiders/zhrczp_com_position.py +++ b/TS_resume_spider/spiders/zhrczp_com_position.py @@ -86,7 +86,7 @@ class ZunHuaComSpider(scrapy.Spider): } def start_requests(self): - for page in range(1, 2): + for page in range(1, 97): yield scrapy.Request( url=f"https://www.zhrczp.com/job/list/0-0-0-0_0_0_0_0_0_0_0-0-0-0-{page}.html", headers=self.headers,