更新YutianTopSpider爬虫以处理公司和职位信息的保存逻辑,增加错误处理和公司ID回写功能
This commit is contained in:
parent
5e4ff46c80
commit
f864ea6bb3
@ -120,6 +120,24 @@ class YTSavePipeline:
|
|||||||
|
|
||||||
class CompanySavePipeline:
|
class CompanySavePipeline:
|
||||||
def process_item(self, item, spider):
|
def process_item(self, item, spider):
|
||||||
|
if spider.name == 'yutian_top_compary':
|
||||||
|
company = item.get("company")
|
||||||
|
if not company:
|
||||||
|
return item
|
||||||
|
if 'website' in company:
|
||||||
|
company['website_id'] = company.pop('website')
|
||||||
|
company_name = company.get("name")
|
||||||
|
website_id = company.get("website_id")
|
||||||
|
if not company_name or not website_id:
|
||||||
|
return item
|
||||||
|
try:
|
||||||
|
DB.insert_company(company)
|
||||||
|
# 设置 company_id 回写
|
||||||
|
item["position"]["company_id"] = DB.get_company_id(company_name)
|
||||||
|
except Exception as e:
|
||||||
|
spider.logger.warning(f"❌ 写入失败:company_name={company_name}, 错误={e}")
|
||||||
|
return item
|
||||||
|
|
||||||
if spider.name not in ['zhrczp_com_compary']:
|
if spider.name not in ['zhrczp_com_compary']:
|
||||||
return item
|
return item
|
||||||
|
|
||||||
@ -138,6 +156,19 @@ class CompanySavePipeline:
|
|||||||
|
|
||||||
class PositionSavePipeline:
|
class PositionSavePipeline:
|
||||||
def process_item(self, item, spider):
|
def process_item(self, item, spider):
|
||||||
|
if spider.name == 'yutian_top_compary':
|
||||||
|
position = item.get("position")
|
||||||
|
if not position:
|
||||||
|
return item
|
||||||
|
title = position.get("title")
|
||||||
|
if not title:
|
||||||
|
return item
|
||||||
|
try:
|
||||||
|
DB.insert_position(position)
|
||||||
|
except Exception as e:
|
||||||
|
spider.logger.warning(f"❌ 写入失败:title={title}, company_id={position.get('company_id')}, 错误={e}")
|
||||||
|
return item
|
||||||
|
|
||||||
if spider.name not in ['zhrczp_com_position']:
|
if spider.name not in ['zhrczp_com_position']:
|
||||||
return item
|
return item
|
||||||
|
|
||||||
|
Loading…
x
Reference in New Issue
Block a user