diff --git a/TS_resume_spider/pipelines.py b/TS_resume_spider/pipelines.py index b872d0c..76ab4bb 100644 --- a/TS_resume_spider/pipelines.py +++ b/TS_resume_spider/pipelines.py @@ -120,6 +120,24 @@ class YTSavePipeline: class CompanySavePipeline: def process_item(self, item, spider): + if spider.name == 'yutian_top_compary': + company = item.get("company") + if not company: + return item + if 'website' in company: + company['website_id'] = company.pop('website') + company_name = company.get("name") + website_id = company.get("website_id") + if not company_name or not website_id: + return item + try: + DB.insert_company(company) + # 设置 company_id 回写 + item["position"]["company_id"] = DB.get_company_id(company_name) + except Exception as e: + spider.logger.warning(f"❌ 写入失败:company_name={company_name}, 错误={e}") + return item + if spider.name not in ['zhrczp_com_compary']: return item @@ -138,6 +156,19 @@ class CompanySavePipeline: class PositionSavePipeline: def process_item(self, item, spider): + if spider.name == 'yutian_top_compary': + position = item.get("position") + if not position: + return item + title = position.get("title") + if not title: + return item + try: + DB.insert_position(position) + except Exception as e: + spider.logger.warning(f"❌ 写入失败:title={title}, company_id={position.get('company_id')}, 错误={e}") + return item + if spider.name not in ['zhrczp_com_position']: return item