TS-ResHub/resumes/management/commands/import_accounting_resumes.py

140 lines
6.1 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

from datetime import datetime, timedelta
from django.core.management.base import BaseCommand
from pandas._libs.tslibs.timestamps import Timestamp
import pandas as pd
from resumes.models import ResumeBasic
import re
import traceback
class Command(BaseCommand):
help = "导入会计类简历(支持 --keyword 和 --source 参数)"
def add_arguments(self, parser):
parser.add_argument('--file', required=True, help='Excel 文件路径')
parser.add_argument('--keyword', default='', help='crawl_keywords 值')
parser.add_argument('--source', type=int, default=2, help='source_id 值')
def handle(self, *args, **options):
filepath = options['file']
default_keyword = options['keyword']
default_source = options['source']
df = pd.read_excel(filepath)
rename_map = {
'姓名': 'name', '性别': 'gender', '年龄': 'age',
'手机': 'phone', '婚姻状况': 'marital_status', '身高': 'height', '体重': 'weight',
'学历': 'education', '毕业学校': 'school', '工作经验': 'work_years',
'现居住地': 'current_location', '工作地点': 'job_location', '到岗时间': 'available_time',
'更新时间': 'update_time', '最高学历': 'education', '婚姻状态': 'marital_status',
'民族': 'ethnicity', '工作职能': 'job_function', '意向岗位': 'intended_position',
'从事行业': 'industry', '期望薪资': 'expected_salary', '工作性质': 'job_property',
'求职状态': 'job_status',
}
df.rename(columns={k: v for k, v in rename_map.items() if k in df.columns}, inplace=True)
df['source_id'] = default_source
df['crawl_keywords'] = default_keyword
def parse_update_time(val):
if pd.isna(val):
return datetime(2019, 12, 12)
val = str(val)
now = datetime.now()
if "刚刚" in val:
return now
if "小时前" in val:
hours = int(re.search(r'\d+', val).group())
return now - timedelta(hours=hours)
if "天前" in val:
days = int(re.search(r'\d+', val).group())
return now - timedelta(days=days)
try:
dt = pd.to_datetime(val)
return dt.to_pydatetime()
except Exception:
return datetime(2019, 12, 12)
df['update_time'] = df['update_time'].apply(parse_update_time)
def val(v, field=None):
if v is None or pd.isna(v):
if field == 'update_time':
return datetime(2019, 12, 12)
return None
if field == 'update_time':
if isinstance(v, Timestamp):
return v.to_pydatetime()
if isinstance(v, str):
try:
return pd.to_datetime(v).to_pydatetime()
except Exception:
return datetime(2019, 12, 12)
if isinstance(v, datetime):
return v
return datetime(2019, 12, 12)
if isinstance(v, Timestamp):
return v.to_pydatetime()
return v
success_count = 0
fail_count = 0
errors = []
for i, row in df.iterrows():
try:
resume_id = val(row.get('resume_id'))
defaults = {
'name': val(row.get('name')),
'gender': val(row.get('gender')),
'age': val(row.get('age')),
'phone': val(row.get('phone')),
'marital_status': val(row.get('marital_status')),
'height': val(row.get('height')),
'weight': val(row.get('weight')),
'education': val(row.get('education')),
'school': val(row.get('school')),
'work_years': val(row.get('work_years')),
'current_location': val(row.get('current_location')),
'job_location': val(row.get('job_location')),
'available_time': val(row.get('available_time')),
'update_time': val(row['update_time'], field='update_time'),
'ethnicity': val(row.get('ethnicity')),
'job_function': val(row.get('job_function')),
'intended_position': val(row.get('intended_position')),
'industry': val(row.get('industry')),
'expected_salary': val(row.get('expected_salary')),
'job_property': val(row.get('job_property')),
'job_status': val(row.get('job_status')),
'source_id': val(row.get('source_id')),
'crawl_keywords': val(row.get('crawl_keywords')),
}
# 安全方式get_or_create + 逐字段 set
obj, _ = ResumeBasic.objects.get_or_create(resume_id=resume_id)
for k, v in defaults.items():
try:
setattr(obj, k, v)
except Exception as field_error:
print(f"[字段设置错误] {k} = {v!r} ({type(v)}) → {field_error}")
raise
obj.save()
success_count += 1
except Exception as e:
fail_count += 1
errors.append((i + 2, str(e)))
print(f"\n❌ 第 {i + 2} 行出错:{e}")
print(f"resume_id: {repr(resume_id)} ({type(resume_id).__name__})")
for k, v in defaults.items():
print(f"{k:<20} | {repr(v):<30} | {type(v).__name__}")
traceback.print_exc()
self.stdout.write(self.style.SUCCESS(f"导入完成!总数:{len(df)},成功:{success_count},失败:{fail_count}"))
if errors:
self.stdout.write(self.style.WARNING("失败记录如下:"))
for line_no, msg in errors:
self.stdout.write(f"{line_no} 行出错:{msg}")