TS-ResHub/resumes/management/commands/import_nursing_resumes.py

148 lines
7.3 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

from datetime import datetime, timedelta
from django.core.management.base import BaseCommand
from pandas._libs.tslibs.timestamps import Timestamp
import pandas as pd
from resumes.models import ResumeBasic
import re
import traceback
class Command(BaseCommand):
help = "导入会计类简历(支持 --keyword 和 --source 参数)"
def add_arguments(self, parser):
parser.add_argument('--file', required=True, help='Excel 文件路径')
parser.add_argument('--keyword', default='', help='crawl_keywords 值')
parser.add_argument('--source', type=int, default=1, help='source_id 值')
def handle(self, *args, **options):
filepath = options['file']
default_keyword = options['keyword']
default_source = options['source']
df = pd.read_excel(filepath)
rename_map = {
'姓名': 'name', '性别': 'gender', '年龄': 'age', '求职区域': 'job_location', '生日': 'birthday',
'学校': 'school', '期望职务': 'expected_position',
'手机': 'phone', '婚姻': 'marital_status', '身高': 'height', '体重': 'weight', '电话': 'phone',
'学历': 'education', '毕业学校': 'school', '工作经验': 'work_years',
'现居住地': 'current_location', '工作地点': 'job_location', '到岗时间': 'available_time',
'最后活跃时间': 'update_time', '最高学历': 'education', '婚姻状态': 'marital_status',
'民族': 'ethnicity', '工作职能': 'job_function', '意向岗位': 'intended_position',
'从事行业': 'industry', '期望薪资': 'expected_salary', '求职类型': 'job_property',
'现居地': 'current_location',
'求职状态': 'job_status', '工作1经历': 'work_1_experience', '工作1时间': 'work_1_time',
'工作1内容': 'work_1_description',
'工作2经历': 'work_2_experience', '工作2时间': 'work_2_time', '工作2内容': 'work_2_description',
'工作3经历': 'work_3_experience', '工作3时间': 'work_3_time', '工作3内容': 'work_3_description',
'工作4经历': 'work_4_experience', '工作4时间': 'work_4_time', '工作4内容': 'work_4_description',
}
df.rename(columns={k: v for k, v in rename_map.items() if k in df.columns}, inplace=True)
df['source_id'] = default_source
df['crawl_keywords'] = default_keyword
def val(v, field=None):
if v is None or pd.isna(v):
if field == 'update_time':
return datetime(2019, 12, 12)
return None
if field == 'update_time':
if isinstance(v, Timestamp):
return v.to_pydatetime()
if isinstance(v, str):
try:
return pd.to_datetime(v).to_pydatetime()
except Exception:
return datetime(2019, 12, 12)
if isinstance(v, datetime):
return v
return datetime(2019, 12, 12)
if isinstance(v, Timestamp):
return v.to_pydatetime()
return v
success_count = 0
fail_count = 0
errors = []
for i, row in df.iterrows():
try:
resume_id = val(row.get('resume_id'))
# 自动从年龄字段中提取整数(如 "38岁" → 38
raw_age = row.get('age')
try:
extracted_age = int(re.search(r'\d+', str(raw_age)).group()) if raw_age else None
except:
extracted_age = None
defaults = {
'name': val(row.get('name')),
'gender': val(row.get('gender')),
'age': extracted_age,
'phone': val(row.get('phone')),
'marital_status': val(row.get('marital_status')),
'height': val(row.get('height')),
'weight': val(row.get('weight')),
'education': val(row.get('education')),
'school': val(row.get('school')),
'work_years': val(row.get('work_years')),
'current_location': val(row.get('current_location')),
'job_location': val(row.get('job_location')),
'available_time': val(row.get('available_time')),
'update_time': val(row['update_time'], field='update_time'),
'ethnicity': val(row.get('ethnicity')),
'job_function': val(row.get('job_function')),
'intended_position': val(row.get('intended_position')),
'industry': val(row.get('industry')),
'expected_salary': val(row.get('expected_salary')),
'job_property': val(row.get('job_property')),
'job_status': val(row.get('job_status')),
'source_id': val(row.get('source_id')),
'crawl_keywords': val(row.get('crawl_keywords')),
'birthday': val(row.get('birthday')),
'expected_position': val(row.get('expected_position')),
'work_1_experience': val(row.get('work_1_experience')),
'work_1_time': val(row.get('work_1_time')),
'work_1_description': val(row.get('work_1_description')),
'work_2_experience': val(row.get('work_2_experience')),
'work_2_time': val(row.get('work_2_time')),
'work_2_description': val(row.get('work_2_description')),
'work_3_experience': val(row.get('work_3_experience')),
'work_3_time': val(row.get('work_3_time')),
'work_3_description': val(row.get('work_3_description')),
'work_4_experience': val(row.get('work_4_experience')),
'work_4_time': val(row.get('work_4_time')),
'work_4_description': val(row.get('work_4_description')),
}
# 安全方式get_or_create + 逐字段 set
obj, _ = ResumeBasic.objects.get_or_create(resume_id=resume_id)
for k, v in defaults.items():
try:
setattr(obj, k, v)
except Exception as field_error:
print(f"[字段设置错误] {k} = {v!r} ({type(v)}) → {field_error}")
raise
obj.save()
success_count += 1
except Exception as e:
fail_count += 1
errors.append((i + 2, str(e)))
print(f"\n❌ 第 {i + 2} 行出错:{e}")
print(f"resume_id: {repr(resume_id)} ({type(resume_id).__name__})")
for k, v in defaults.items():
print(f"{k:<20} | {repr(v):<30} | {type(v).__name__}")
traceback.print_exc()
self.stdout.write(self.style.SUCCESS(f"导入完成!总数:{len(df)},成功:{success_count},失败:{fail_count}"))
if errors:
self.stdout.write(self.style.WARNING("失败记录如下:"))
for line_no, msg in errors:
self.stdout.write(f"{line_no} 行出错:{msg}")