140 lines
6.1 KiB
Python
140 lines
6.1 KiB
Python
from datetime import datetime, timedelta
|
||
from django.core.management.base import BaseCommand
|
||
from pandas._libs.tslibs.timestamps import Timestamp
|
||
import pandas as pd
|
||
from resumes.models import ResumeBasic
|
||
import re
|
||
import traceback
|
||
|
||
|
||
class Command(BaseCommand):
|
||
help = "导入会计类简历(支持 --keyword 和 --source 参数)"
|
||
|
||
def add_arguments(self, parser):
|
||
parser.add_argument('--file', required=True, help='Excel 文件路径')
|
||
parser.add_argument('--keyword', default='', help='crawl_keywords 值')
|
||
parser.add_argument('--source', type=int, default=2, help='source_id 值')
|
||
|
||
def handle(self, *args, **options):
|
||
filepath = options['file']
|
||
default_keyword = options['keyword']
|
||
default_source = options['source']
|
||
df = pd.read_excel(filepath)
|
||
|
||
rename_map = {
|
||
'姓名': 'name', '性别': 'gender', '年龄': 'age',
|
||
'手机': 'phone', '婚姻状况': 'marital_status', '身高': 'height', '体重': 'weight',
|
||
'学历': 'education', '毕业学校': 'school', '工作经验': 'work_years',
|
||
'现居住地': 'current_location', '工作地点': 'job_location', '到岗时间': 'available_time',
|
||
'更新时间': 'update_time', '最高学历': 'education', '婚姻状态': 'marital_status',
|
||
'民族': 'ethnicity', '工作职能': 'job_function', '意向岗位': 'intended_position',
|
||
'从事行业': 'industry', '期望薪资': 'expected_salary', '工作性质': 'job_property',
|
||
'求职状态': 'job_status',
|
||
}
|
||
df.rename(columns={k: v for k, v in rename_map.items() if k in df.columns}, inplace=True)
|
||
|
||
df['source_id'] = default_source
|
||
df['crawl_keywords'] = default_keyword
|
||
|
||
def parse_update_time(val):
|
||
if pd.isna(val):
|
||
return datetime(2019, 12, 12)
|
||
val = str(val)
|
||
now = datetime.now()
|
||
if "刚刚" in val:
|
||
return now
|
||
if "小时前" in val:
|
||
hours = int(re.search(r'\d+', val).group())
|
||
return now - timedelta(hours=hours)
|
||
if "天前" in val:
|
||
days = int(re.search(r'\d+', val).group())
|
||
return now - timedelta(days=days)
|
||
try:
|
||
dt = pd.to_datetime(val)
|
||
return dt.to_pydatetime()
|
||
except Exception:
|
||
return datetime(2019, 12, 12)
|
||
|
||
df['update_time'] = df['update_time'].apply(parse_update_time)
|
||
|
||
def val(v, field=None):
|
||
if v is None or pd.isna(v):
|
||
if field == 'update_time':
|
||
return datetime(2019, 12, 12)
|
||
return None
|
||
if field == 'update_time':
|
||
if isinstance(v, Timestamp):
|
||
return v.to_pydatetime()
|
||
if isinstance(v, str):
|
||
try:
|
||
return pd.to_datetime(v).to_pydatetime()
|
||
except Exception:
|
||
return datetime(2019, 12, 12)
|
||
if isinstance(v, datetime):
|
||
return v
|
||
return datetime(2019, 12, 12)
|
||
if isinstance(v, Timestamp):
|
||
return v.to_pydatetime()
|
||
return v
|
||
|
||
success_count = 0
|
||
fail_count = 0
|
||
errors = []
|
||
|
||
for i, row in df.iterrows():
|
||
try:
|
||
resume_id = val(row.get('resume_id'))
|
||
defaults = {
|
||
'name': val(row.get('name')),
|
||
'gender': val(row.get('gender')),
|
||
'age': val(row.get('age')),
|
||
'phone': val(row.get('phone')),
|
||
'marital_status': val(row.get('marital_status')),
|
||
'height': val(row.get('height')),
|
||
'weight': val(row.get('weight')),
|
||
'education': val(row.get('education')),
|
||
'school': val(row.get('school')),
|
||
'work_years': val(row.get('work_years')),
|
||
'current_location': val(row.get('current_location')),
|
||
'job_location': val(row.get('job_location')),
|
||
'available_time': val(row.get('available_time')),
|
||
'update_time': val(row['update_time'], field='update_time'),
|
||
'ethnicity': val(row.get('ethnicity')),
|
||
'job_function': val(row.get('job_function')),
|
||
'intended_position': val(row.get('intended_position')),
|
||
'industry': val(row.get('industry')),
|
||
'expected_salary': val(row.get('expected_salary')),
|
||
'job_property': val(row.get('job_property')),
|
||
'job_status': val(row.get('job_status')),
|
||
'source_id': val(row.get('source_id')),
|
||
'crawl_keywords': val(row.get('crawl_keywords')),
|
||
}
|
||
|
||
# 安全方式:get_or_create + 逐字段 set
|
||
obj, _ = ResumeBasic.objects.get_or_create(resume_id=resume_id)
|
||
|
||
for k, v in defaults.items():
|
||
try:
|
||
setattr(obj, k, v)
|
||
except Exception as field_error:
|
||
print(f"[字段设置错误] {k} = {v!r} ({type(v)}) → {field_error}")
|
||
raise
|
||
|
||
obj.save()
|
||
success_count += 1
|
||
|
||
except Exception as e:
|
||
fail_count += 1
|
||
errors.append((i + 2, str(e)))
|
||
print(f"\n❌ 第 {i + 2} 行出错:{e}")
|
||
print(f"resume_id: {repr(resume_id)} ({type(resume_id).__name__})")
|
||
for k, v in defaults.items():
|
||
print(f"{k:<20} | {repr(v):<30} | {type(v).__name__}")
|
||
traceback.print_exc()
|
||
|
||
self.stdout.write(self.style.SUCCESS(f"导入完成!总数:{len(df)},成功:{success_count},失败:{fail_count}"))
|
||
if errors:
|
||
self.stdout.write(self.style.WARNING("失败记录如下:"))
|
||
for line_no, msg in errors:
|
||
self.stdout.write(f" 第 {line_no} 行出错:{msg}")
|