导入玉田招聘网站成功 更改模型类工作时间为串

This commit is contained in:
晓丰 2025-04-15 14:01:13 +08:00
parent 8d3719206f
commit 1b4248e865
3 changed files with 152 additions and 5 deletions

View File

@ -12,7 +12,7 @@ class Command(BaseCommand):
def add_arguments(self, parser): def add_arguments(self, parser):
parser.add_argument('--file', required=True, help='Excel 文件路径') parser.add_argument('--file', required=True, help='Excel 文件路径')
parser.add_argument('--keyword', default='会计', help='crawl_keywords 值') parser.add_argument('--keyword', default='', help='crawl_keywords 值')
parser.add_argument('--source', type=int, default=2, help='source_id 值') parser.add_argument('--source', type=int, default=2, help='source_id 值')
def handle(self, *args, **options): def handle(self, *args, **options):

View File

@ -0,0 +1,147 @@
from datetime import datetime, timedelta
from django.core.management.base import BaseCommand
from pandas._libs.tslibs.timestamps import Timestamp
import pandas as pd
from resumes.models import ResumeBasic
import re
import traceback
class Command(BaseCommand):
help = "导入会计类简历(支持 --keyword 和 --source 参数)"
def add_arguments(self, parser):
parser.add_argument('--file', required=True, help='Excel 文件路径')
parser.add_argument('--keyword', default='', help='crawl_keywords 值')
parser.add_argument('--source', type=int, default=1, help='source_id 值')
def handle(self, *args, **options):
filepath = options['file']
default_keyword = options['keyword']
default_source = options['source']
df = pd.read_excel(filepath)
rename_map = {
'姓名': 'name', '性别': 'gender', '年龄': 'age', '求职区域': 'job_location', '生日': 'birthday',
'学校': 'school', '期望职务': 'expected_position',
'手机': 'phone', '婚姻': 'marital_status', '身高': 'height', '体重': 'weight', '电话': 'phone',
'学历': 'education', '毕业学校': 'school', '工作经验': 'work_years',
'现居住地': 'current_location', '工作地点': 'job_location', '到岗时间': 'available_time',
'最后活跃时间': 'update_time', '最高学历': 'education', '婚姻状态': 'marital_status',
'民族': 'ethnicity', '工作职能': 'job_function', '意向岗位': 'intended_position',
'从事行业': 'industry', '期望薪资': 'expected_salary', '求职类型': 'job_property',
'现居地': 'current_location',
'求职状态': 'job_status', '工作1经历': 'work_1_experience', '工作1时间': 'work_1_time',
'工作1内容': 'work_1_description',
'工作2经历': 'work_2_experience', '工作2时间': 'work_2_time', '工作2内容': 'work_2_description',
'工作3经历': 'work_3_experience', '工作3时间': 'work_3_time', '工作3内容': 'work_3_description',
'工作4经历': 'work_4_experience', '工作4时间': 'work_4_time', '工作4内容': 'work_4_description',
}
df.rename(columns={k: v for k, v in rename_map.items() if k in df.columns}, inplace=True)
df['source_id'] = default_source
df['crawl_keywords'] = default_keyword
def val(v, field=None):
if v is None or pd.isna(v):
if field == 'update_time':
return datetime(2019, 12, 12)
return None
if field == 'update_time':
if isinstance(v, Timestamp):
return v.to_pydatetime()
if isinstance(v, str):
try:
return pd.to_datetime(v).to_pydatetime()
except Exception:
return datetime(2019, 12, 12)
if isinstance(v, datetime):
return v
return datetime(2019, 12, 12)
if isinstance(v, Timestamp):
return v.to_pydatetime()
return v
success_count = 0
fail_count = 0
errors = []
for i, row in df.iterrows():
try:
resume_id = val(row.get('resume_id'))
# 自动从年龄字段中提取整数(如 "38岁" → 38
raw_age = row.get('age')
try:
extracted_age = int(re.search(r'\d+', str(raw_age)).group()) if raw_age else None
except:
extracted_age = None
defaults = {
'name': val(row.get('name')),
'gender': val(row.get('gender')),
'age': extracted_age,
'phone': val(row.get('phone')),
'marital_status': val(row.get('marital_status')),
'height': val(row.get('height')),
'weight': val(row.get('weight')),
'education': val(row.get('education')),
'school': val(row.get('school')),
'work_years': val(row.get('work_years')),
'current_location': val(row.get('current_location')),
'job_location': val(row.get('job_location')),
'available_time': val(row.get('available_time')),
'update_time': val(row['update_time'], field='update_time'),
'ethnicity': val(row.get('ethnicity')),
'job_function': val(row.get('job_function')),
'intended_position': val(row.get('intended_position')),
'industry': val(row.get('industry')),
'expected_salary': val(row.get('expected_salary')),
'job_property': val(row.get('job_property')),
'job_status': val(row.get('job_status')),
'source_id': val(row.get('source_id')),
'crawl_keywords': val(row.get('crawl_keywords')),
'birthday': val(row.get('birthday')),
'expected_position': val(row.get('expected_position')),
'work_1_experience': val(row.get('work_1_experience')),
'work_1_time': val(row.get('work_1_time')),
'work_1_description': val(row.get('work_1_description')),
'work_2_experience': val(row.get('work_2_experience')),
'work_2_time': val(row.get('work_2_time')),
'work_2_description': val(row.get('work_2_description')),
'work_3_experience': val(row.get('work_3_experience')),
'work_3_time': val(row.get('work_3_time')),
'work_3_description': val(row.get('work_3_description')),
'work_4_experience': val(row.get('work_4_experience')),
'work_4_time': val(row.get('work_4_time')),
'work_4_description': val(row.get('work_4_description')),
}
# 安全方式get_or_create + 逐字段 set
obj, _ = ResumeBasic.objects.get_or_create(resume_id=resume_id)
for k, v in defaults.items():
try:
setattr(obj, k, v)
except Exception as field_error:
print(f"[字段设置错误] {k} = {v!r} ({type(v)}) → {field_error}")
raise
obj.save()
success_count += 1
except Exception as e:
fail_count += 1
errors.append((i + 2, str(e)))
print(f"\n❌ 第 {i + 2} 行出错:{e}")
print(f"resume_id: {repr(resume_id)} ({type(resume_id).__name__})")
for k, v in defaults.items():
print(f"{k:<20} | {repr(v):<30} | {type(v).__name__}")
traceback.print_exc()
self.stdout.write(self.style.SUCCESS(f"导入完成!总数:{len(df)},成功:{success_count},失败:{fail_count}"))
if errors:
self.stdout.write(self.style.WARNING("失败记录如下:"))
for line_no, msg in errors:
self.stdout.write(f"{line_no} 行出错:{msg}")

View File

@ -30,19 +30,19 @@ class ResumeBasic(models.Model):
help_text="求职状态") help_text="求职状态")
work_1_experience = models.TextField(null=True, blank=True, verbose_name="工作1经历", help_text="工作1经历") work_1_experience = models.TextField(null=True, blank=True, verbose_name="工作1经历", help_text="工作1经历")
work_1_time = models.DateTimeField(null=True, blank=True, verbose_name="工作1时间", help_text="工作1时间") work_1_time = models.CharField(max_length=255, null=True, blank=True, verbose_name="工作1时间", help_text="工作1时间")
work_1_description = models.TextField(null=True, blank=True, verbose_name="工作1内容", help_text="工作1内容") work_1_description = models.TextField(null=True, blank=True, verbose_name="工作1内容", help_text="工作1内容")
work_2_experience = models.TextField(null=True, blank=True, verbose_name="工作2经历", help_text="工作2经历") work_2_experience = models.TextField(null=True, blank=True, verbose_name="工作2经历", help_text="工作2经历")
work_2_time = models.DateTimeField(null=True, blank=True, verbose_name="工作2时间", help_text="工作2时间") work_2_time = models.CharField(max_length=255, null=True, blank=True, verbose_name="工作2时间", help_text="工作2时间")
work_2_description = models.TextField(null=True, blank=True, verbose_name="工作2内容", help_text="工作2内容") work_2_description = models.TextField(null=True, blank=True, verbose_name="工作2内容", help_text="工作2内容")
work_3_experience = models.TextField(null=True, blank=True, verbose_name="工作3经历", help_text="工作3经历") work_3_experience = models.TextField(null=True, blank=True, verbose_name="工作3经历", help_text="工作3经历")
work_3_time = models.DateTimeField(null=True, blank=True, verbose_name="工作3时间", help_text="工作3时间") work_3_time = models.CharField(max_length=255, null=True, blank=True, verbose_name="工作3时间", help_text="工作3时间")
work_3_description = models.TextField(null=True, blank=True, verbose_name="工作3内容", help_text="工作3内容") work_3_description = models.TextField(null=True, blank=True, verbose_name="工作3内容", help_text="工作3内容")
work_4_experience = models.TextField(null=True, blank=True, verbose_name="工作4经历", help_text="工作4经历") work_4_experience = models.TextField(null=True, blank=True, verbose_name="工作4经历", help_text="工作4经历")
work_4_time = models.DateTimeField(null=True, blank=True, verbose_name="工作4时间", help_text="工作4时间") work_4_time = models.CharField(max_length=255, null=True, blank=True, verbose_name="工作4时间", help_text="工作4时间")
work_4_description = models.TextField(null=True, blank=True, verbose_name="工作4内容", help_text="工作4内容") work_4_description = models.TextField(null=True, blank=True, verbose_name="工作4内容", help_text="工作4内容")
height = models.IntegerField(null=True, blank=True, verbose_name="身高", help_text="身高") height = models.IntegerField(null=True, blank=True, verbose_name="身高", help_text="身高")