from datetime import datetime, timedelta from django.core.management.base import BaseCommand from pandas._libs.tslibs.timestamps import Timestamp import pandas as pd from resumes.models import ResumeBasic import re import traceback class Command(BaseCommand): help = "导入会计类简历(支持 --keyword 和 --source 参数)" def add_arguments(self, parser): parser.add_argument('--file', required=True, help='Excel 文件路径') parser.add_argument('--keyword', default='', help='crawl_keywords 值') parser.add_argument('--source', type=int, default=1, help='source_id 值') def handle(self, *args, **options): filepath = options['file'] default_keyword = options['keyword'] default_source = options['source'] df = pd.read_excel(filepath) rename_map = { '姓名': 'name', '性别': 'gender', '年龄': 'age', '求职区域': 'job_location', '生日': 'birthday', '学校': 'school', '期望职务': 'expected_position', '手机': 'phone', '婚姻': 'marital_status', '身高': 'height', '体重': 'weight', '电话': 'phone', '学历': 'education', '毕业学校': 'school', '工作经验': 'work_years', '现居住地': 'current_location', '工作地点': 'job_location', '到岗时间': 'available_time', '最后活跃时间': 'update_time', '最高学历': 'education', '婚姻状态': 'marital_status', '民族': 'ethnicity', '工作职能': 'job_function', '意向岗位': 'intended_position', '从事行业': 'industry', '期望薪资': 'expected_salary', '求职类型': 'job_property', '现居地': 'current_location', '求职状态': 'job_status', '工作1经历': 'work_1_experience', '工作1时间': 'work_1_time', '工作1内容': 'work_1_description', '工作2经历': 'work_2_experience', '工作2时间': 'work_2_time', '工作2内容': 'work_2_description', '工作3经历': 'work_3_experience', '工作3时间': 'work_3_time', '工作3内容': 'work_3_description', '工作4经历': 'work_4_experience', '工作4时间': 'work_4_time', '工作4内容': 'work_4_description', } df.rename(columns={k: v for k, v in rename_map.items() if k in df.columns}, inplace=True) df['source_id'] = default_source df['crawl_keywords'] = default_keyword def val(v, field=None): if v is None or pd.isna(v): if field == 'update_time': return datetime(2019, 12, 12) return None if field == 'update_time': if isinstance(v, Timestamp): return v.to_pydatetime() if isinstance(v, str): try: return pd.to_datetime(v).to_pydatetime() except Exception: return datetime(2019, 12, 12) if isinstance(v, datetime): return v return datetime(2019, 12, 12) if isinstance(v, Timestamp): return v.to_pydatetime() return v success_count = 0 fail_count = 0 errors = [] for i, row in df.iterrows(): try: resume_id = val(row.get('resume_id')) # 自动从年龄字段中提取整数(如 "38岁" → 38) raw_age = row.get('age') try: extracted_age = int(re.search(r'\d+', str(raw_age)).group()) if raw_age else None except: extracted_age = None defaults = { 'name': val(row.get('name')), 'gender': val(row.get('gender')), 'age': extracted_age, 'phone': val(row.get('phone')), 'marital_status': val(row.get('marital_status')), 'height': val(row.get('height')), 'weight': val(row.get('weight')), 'education': val(row.get('education')), 'school': val(row.get('school')), 'work_years': val(row.get('work_years')), 'current_location': val(row.get('current_location')), 'job_location': val(row.get('job_location')), 'available_time': val(row.get('available_time')), 'update_time': val(row['update_time'], field='update_time'), 'ethnicity': val(row.get('ethnicity')), 'job_function': val(row.get('job_function')), 'intended_position': val(row.get('intended_position')), 'industry': val(row.get('industry')), 'expected_salary': val(row.get('expected_salary')), 'job_property': val(row.get('job_property')), 'job_status': val(row.get('job_status')), 'source_id': val(row.get('source_id')), 'crawl_keywords': val(row.get('crawl_keywords')), 'birthday': val(row.get('birthday')), 'expected_position': val(row.get('expected_position')), 'work_1_experience': val(row.get('work_1_experience')), 'work_1_time': val(row.get('work_1_time')), 'work_1_description': val(row.get('work_1_description')), 'work_2_experience': val(row.get('work_2_experience')), 'work_2_time': val(row.get('work_2_time')), 'work_2_description': val(row.get('work_2_description')), 'work_3_experience': val(row.get('work_3_experience')), 'work_3_time': val(row.get('work_3_time')), 'work_3_description': val(row.get('work_3_description')), 'work_4_experience': val(row.get('work_4_experience')), 'work_4_time': val(row.get('work_4_time')), 'work_4_description': val(row.get('work_4_description')), } # 安全方式:get_or_create + 逐字段 set obj, _ = ResumeBasic.objects.get_or_create(resume_id=resume_id) for k, v in defaults.items(): try: setattr(obj, k, v) except Exception as field_error: print(f"[字段设置错误] {k} = {v!r} ({type(v)}) → {field_error}") raise obj.save() success_count += 1 except Exception as e: fail_count += 1 errors.append((i + 2, str(e))) print(f"\n❌ 第 {i + 2} 行出错:{e}") print(f"resume_id: {repr(resume_id)} ({type(resume_id).__name__})") for k, v in defaults.items(): print(f"{k:<20} | {repr(v):<30} | {type(v).__name__}") traceback.print_exc() self.stdout.write(self.style.SUCCESS(f"导入完成!总数:{len(df)},成功:{success_count},失败:{fail_count}")) if errors: self.stdout.write(self.style.WARNING("失败记录如下:")) for line_no, msg in errors: self.stdout.write(f" 第 {line_no} 行出错:{msg}")