149 lines
4.9 KiB
Python

import time
import pandas as pd
from Requests_Except import MR
base_url = 'www.yutian.top'
protocol = 'https'
default_headers = {
"accept": "application/json, text/plain, */*",
"accept-language": "zh-CN,zh;q=0.9,en;q=0.8",
"cache-control": "no-cache",
"content-type": "application/json;charset=UTF-8",
"origin": "https://www.yutian.top",
"pragma": "no-cache",
"priority": "u=1, i",
"referer": "https://www.yutian.top/enterprise/resume_store/list",
"sec-ch-ua": "\"Not)A;Brand\";v=\"8\", \"Chromium\";v=\"138\", \"Google Chrome\";v=\"138\"",
"sec-ch-ua-mobile": "?0",
"sec-ch-ua-platform": "\"Windows\"",
"sec-fetch-dest": "empty",
"sec-fetch-mode": "cors",
"sec-fetch-site": "same-origin",
"user-agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/138.0.0.0 Safari/537.36"
}
default_cookies = {
"PHPSESSID": "8622ac2f6caf545585d9b3c4537bc036",
"auth-token": "eyJ0eXAiOiJKV1QiLCJhbGciOiJIUzI1NiJ9.eyJleHAiOjE3NTQ4NzUzOTksImp0aSI6IjMxMzY5YmQ3LTIwOTMtNGI4Ni04ZGY3LWUzZTY1NDhjOTg0OCIsIm5hbWUiOiIxODYxNzg3MjE4NSIsInVzZXJfaWQiOiIwM2M2MmI5ODM4Yjk3Y2UzYmQxZTQwNDllZGVlNmI0OCIsInRlbmFudF90b2tlbiI6IjY1OTAxM2RlNjAxZmJmNjg1MzZmYTU0OTc4ODVkMTA2In0.TYpA94cCO7-HCeeksicrtpBDJB2AsbBvsuGBrJiFVWU",
"company_sign": "",
"company_nonce": "",
"cuid": ""
}
Requests = MR(base_url, protocol, proxy_options=False)
Requests.set_default_headers(default_headers)
Requests.set_default_cookies(default_cookies)
# print(Requests.session.proxies)
pd_data = {
'resume_id': [],
'姓名': [], # user_name
'求职区域': [], # area_show
'生日': [], # birthday
'学历': [], # education_level_msg
'学校': [], # education.school
'期望职务': [], # expect_job
'最后活跃时间': [], # last_edit_time
'婚姻': [], # marry_status_show
'现居地': [], # residence
'年龄': [], # user_age
'电话': [], # phone_encrypt
'性别': [], # sex_show
'求职类型': [], # work_type_show
'求职状态': [], # work_status_show
'工作1经历': [],
'工作1时间': [],
'工作1内容': [],
'工作2经历': [],
'工作2时间': [],
'工作2内容': [],
'工作3经历': [],
'工作3时间': [],
'工作3内容': [],
'工作4经历': [],
'工作4时间': [],
'工作4内容': [],
}
resume_list = []
def get_page(key_word, step=100):
json_data = {
'step': step,
'page': 1,
'education_level': [],
'arrival_time': [],
'work_time': [],
'area_id': [],
'keywords': key_word,
'work_status': '',
'work_status_show': '求职状态',
'category_id': '',
'work_type': '',
'work_type_show': '是否兼职',
'sex': '',
'sex_show': '性别',
'is_head': '',
'is_head_show': '有无照片',
'job_id': '',
'age': [],
'age_show': '年龄',
'refresh_time': 0,
'site_id': '',
'site_id2': '',
'province': '',
'city': '',
'county': '',
'provinceArr': [],
'cityArr': [],
'countyArr': [],
'only_job_category': 0,
}
url = '/job/company/v1/resume/page'
resp = Requests.post(url, json=json_data)
return resp.to_Dict()
def organize_information_into_to_pandas(keyword):
resp_obj = get_page(keyword, 100)
for i in resp_obj.data:
# resume_info = get_resume_info(i.resume_id)
pd_data['resume_id'].append(i.resume_id)
pd_data['姓名'].append(i.user_name)
pd_data['求职区域'].append(i.area_show)
pd_data['生日'].append(i.birthday)
pd_data['学历'].append(i.education_level_msg)
pd_data['学校'].append(';'.join([edu.school for edu in i.education]))
pd_data['期望职务'].append(i.expect_job)
pd_data['最后活跃时间'].append(i.last_edit_time)
pd_data['婚姻'].append(i.marry_status_show)
pd_data['现居地'].append(i.residence)
pd_data['年龄'].append(i.user_age)
pd_data['电话'].append(i.phone_encrypt)
pd_data['性别'].append(i.sex_show)
pd_data['求职类型'].append(i.work_type_show)
pd_data['求职状态'].append(i.work_status_show)
experience = i.experience
for j in range(4):
if j < len(experience) and experience[j].company:
company = experience[j].company
time_line = experience[j].time_line
content = experience[j].content
else:
company = ''
time_line = ''
content = ''
pd_data[f'工作{j + 1}经历'].append(company)
pd_data[f'工作{j + 1}时间'].append(time_line)
pd_data[f'工作{j + 1}内容'].append(content)
def main(keyword):
organize_information_into_to_pandas(keyword)
df = pd.DataFrame(pd_data)
df.to_excel(keyword+"_"+str(int(time.time())) + '.xlsx', index=False)
if __name__ == '__main__':
main("看护")