From 688e7fb3f7daa8f09bf7fe41d5416d497ea1e4bf Mon Sep 17 00:00:00 2001
From: Franklin-F <dewujie64@gmail.com>
Date: Sun, 25 May 2025 22:59:19 +0800
Subject: [PATCH] =?UTF-8?q?=E6=B7=BB=E5=8A=A0.gitignore=E6=96=87=E4=BB=B6?=
 =?UTF-8?q?=E4=BB=A5=E6=8E=92=E9=99=A4=E4=B8=8D=E5=BF=85=E8=A6=81=E7=9A=84?=
 =?UTF-8?q?=E6=96=87=E4=BB=B6=E5=92=8C=E7=9B=AE=E5=BD=95=EF=BC=9B=E6=9B=B4?=
 =?UTF-8?q?=E6=96=B0=E7=88=AC=E8=99=AB=E4=BB=A3=E7=A0=81=E4=BB=A5=E5=A4=84?=
 =?UTF-8?q?=E7=90=86=E5=85=AC=E5=8F=B8=E4=BB=8B=E7=BB=8D=E7=9A=84=E6=8F=90?=
 =?UTF-8?q?=E5=8F=96=E9=80=BB=E8=BE=91?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 .gitignore                                    | 56 +++++++++++++++++++
 .../spiders/zhrczp_com_compary.py             |  2 +
 .../spiders/zhrczp_com_position.py            |  4 --
 3 files changed, 58 insertions(+), 4 deletions(-)
 create mode 100644 .gitignore

diff --git a/.gitignore b/.gitignore
new file mode 100644
index 0000000..59fa06a
--- /dev/null
+++ b/.gitignore
@@ -0,0 +1,56 @@
+# === Python 缓存 ===
+__pycache__/
+*.py[cod]
+*$py.class
+
+# === 环境变量文件 ===
+.env
+.env.*
+
+# === 虚拟环境目录 ===
+venv/
+.venv/
+env/
+ENV/
+env.bak/
+venv.bak/
+
+# === 安装构建缓存 ===
+*.egg
+*.egg-info/
+.eggs/
+dist/
+build/
+pip-log.txt
+
+# === 测试相关缓存文件 ===
+.coverage
+.tox/
+nosetests.xml
+coverage.xml
+*.cover
+*.py,cover
+
+# === 数据库相关 ===
+*.sqlite3
+db.sqlite3
+
+# === 日志文件 ===
+*.log
+logs/
+
+# === 静态与媒体文件（Django） ===
+media/
+static/
+staticfiles/
+
+# === IDE 配置 ===
+.idea/          # PyCharm
+*.iml
+*.ipr
+*.iws
+.vscode/        # VS Code
+
+# === 系统自动生成文件 ===
+.DS_Store       # macOS
+Thumbs.db       # Windows
diff --git a/TS_resume_spider/spiders/zhrczp_com_compary.py b/TS_resume_spider/spiders/zhrczp_com_compary.py
index 96f85d6..ac5c505 100644
--- a/TS_resume_spider/spiders/zhrczp_com_compary.py
+++ b/TS_resume_spider/spiders/zhrczp_com_compary.py
@@ -11,6 +11,8 @@ def extract_company_data(xpathobj):
     name = first_or_empty('//h1/a/text()')
     # 公司介绍段落
     intro_list = [t.strip() for t in xpathobj.xpath('//div[@class="company_img_auto"]/p/text()') if t.strip()]
+    if not intro_list:
+        intro_list = [t.strip() for t in xpathobj.xpath('//div[@class="company_img_auto"]/text()') if t.strip()]
     introduction = "\r\n".join(intro_list)
 
     # 如果没有名称或介绍，直接忽略
diff --git a/TS_resume_spider/spiders/zhrczp_com_position.py b/TS_resume_spider/spiders/zhrczp_com_position.py
index 075253e..561c518 100644
--- a/TS_resume_spider/spiders/zhrczp_com_position.py
+++ b/TS_resume_spider/spiders/zhrczp_com_position.py
@@ -8,9 +8,7 @@ def first_or_empty(xpobj,path):
     return lst[0].strip() if lst else ""
 
 def extract_position_data(xpathobj):
-    print("aaa")
     title = first_or_empty(xpathobj, '//h1[@class="job_details_name"]/text()')
-    print(title)
     if not title:
         return None
     nature = "全职"
@@ -64,7 +62,6 @@ def extract_position_data(xpathobj):
 
 def get_position_href(xpathobj):
     hrefs = xpathobj.xpath("//div[@class='yunjoblist_newname']/a/@href")
-    print(hrefs)
     return [href.strip() for href in hrefs if href.strip()]
 
 
@@ -98,7 +95,6 @@ class ZunHuaComSpider(scrapy.Spider):
             )
 
     def parse(self, response):
-        self.logger.info(f"Parsing page: {response.url}")
         xpathobj = etree.HTML(response.text)
         position_hrefs = get_position_href(xpathobj)
         if position_hrefs: