From 15be08c866581f97f6ca5e97740870961fa151e0 Mon Sep 17 00:00:00 2001
From: Franklin-F <dewujie64@gmail.com>
Date: Tue, 27 May 2025 23:09:47 +0800
Subject: [PATCH] =?UTF-8?q?=E6=9B=B4=E6=96=B0YutianTopSpider=E7=88=AC?=
 =?UTF-8?q?=E8=99=AB=E4=BB=A5=E6=8F=90=E5=8F=96=E8=81=8C=E4=BD=8D=E4=BF=A1?=
 =?UTF-8?q?=E6=81=AF=EF=BC=8C=E5=A2=9E=E5=8A=A0=E5=85=AC=E5=8F=B8ID?=
 =?UTF-8?q?=E7=9A=84=E8=8E=B7=E5=8F=96=E5=92=8C=E5=85=AC=E5=8F=B8=E4=BF=A1?=
 =?UTF-8?q?=E6=81=AF=E8=A7=A3=E6=9E=90=E9=80=BB=E8=BE=91?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 .../spiders/yutian_top_compary.py             | 61 ++++++++++++-------
 1 file changed, 40 insertions(+), 21 deletions(-)

diff --git a/TS_resume_spider/spiders/yutian_top_compary.py b/TS_resume_spider/spiders/yutian_top_compary.py
index 581d9e5..abff592 100644
--- a/TS_resume_spider/spiders/yutian_top_compary.py
+++ b/TS_resume_spider/spiders/yutian_top_compary.py
@@ -2,8 +2,7 @@ import scrapy
 import json
 import re
 
-from lxml.extensions import xpath_code
-from openpyxl.styles.builtins import title
+from sympy.benchmarks.bench_meijerint import bench
 
 
 def first_or_empty(xpobj, path):
@@ -36,7 +35,7 @@ class YutianTopSpider(scrapy.Spider):
     def start_requests(self):
         for i in range(1, 39):
             yield scrapy.Request(
-                url=self.start_urls[0],
+                url=f'https://zp.yutian.top/search?keywords=&page={i}',
                 method='GET',
                 headers=self.headers,
                 dont_filter=True,
@@ -88,22 +87,42 @@ class YutianTopSpider(scrapy.Spider):
                     experience = infovalue[index].strip()
                 if "学历要求" in key:
                     education = infovalue[index].strip()
-        # TODO: 未完继续
-
-        yield {
-            "title": title, # 职位名称
-            "nature": nature, # 职位性质
-            "category": category, # 职位类别
-            "region": region, # 职位区域
-            "experience": experience, # 工作经历要求
-            "education": education, # 学历要求
-            "salary": salary, # 职位薪资
-            "position_status": position_status, # 职位状态
-            "description": description, # 职位描述(详情)
-            "contact_name": contact_name, # 联系人姓名
-            "contact_info": contact_info, # 联系方式
-            "benefits": benefits, # 职位福利
-            "openings": openings, # 招聘人数
-            "website_id": 2, # 网站ID
-            "company_name": company_name, # 所属企业名称
+        salary = first_or_empty(response.xpath, "//div[@class='salary']/text()")
+        description = first_or_empty(response.xpath, "//div[@class='job-describe']/text()")
+        contact_name = first_or_empty(response.xpath,
+                                      "//div[@class='bg-mask']/div[@class='connect-info-item']/span[@class='value']/text()")
+        contact_info = first_or_empty(response.xpath,
+                                      "(//div[@class='bg-mask']//div[@class='connect-info-item'])[2]/span[@class='value']/text()")
+        benefits = ""
+        if openings == "若干":
+            openings = 1
+        company_name = first_or_empty(response.xpath, "//div[@class='company-name']/a/text()")
+        meta = {
+            "title": title,  # 职位名称
+            "nature": nature,  # 职位性质
+            "category": category,  # 职位类别
+            "region": region,  # 职位区域
+            "experience": experience,  # 工作经历要求
+            "education": education,  # 学历要求
+            "salary": salary,  # 职位薪资
+            "position_status": 1,  # 职位状态
+            "description": description,  # 职位描述(详情)
+            "contact_name": contact_name,  # 联系人姓名
+            "contact_info": contact_info,  # 联系方式
+            "benefits": benefits,  # 职位福利
+            "openings": openings,  # 招聘人数
+            "website_id": 2,  # 网站ID
+            "company_name": company_name,  # 所属企业名称
         }
+        company_id = first_or_empty(response.xpath, "//div[@class='job-detail']/@data-io-company-id")
+        yield scrapy.Request(
+            url=f"https://zp.yutian.top/company/{company_id}.html",
+            headers=self.headers,
+            callback=self.parse_company,
+            meta=meta,
+            dont_filter=True,
+        )
+
+    def parse_company(self, response):
+        pass
+        # TODO: 解析公司信息