149 lines
4.9 KiB
Python
149 lines
4.9 KiB
Python
import time
|
|
|
|
import pandas as pd
|
|
|
|
from Requests_Except import MR
|
|
|
|
base_url = 'www.yutian.top'
|
|
protocol = 'https'
|
|
default_headers = {
|
|
"accept": "application/json, text/plain, */*",
|
|
"accept-language": "zh-CN,zh;q=0.9,en;q=0.8",
|
|
"cache-control": "no-cache",
|
|
"content-type": "application/json;charset=UTF-8",
|
|
"origin": "https://www.yutian.top",
|
|
"pragma": "no-cache",
|
|
"priority": "u=1, i",
|
|
"referer": "https://www.yutian.top/enterprise/resume_store/list",
|
|
"sec-ch-ua": "\"Not)A;Brand\";v=\"8\", \"Chromium\";v=\"138\", \"Google Chrome\";v=\"138\"",
|
|
"sec-ch-ua-mobile": "?0",
|
|
"sec-ch-ua-platform": "\"Windows\"",
|
|
"sec-fetch-dest": "empty",
|
|
"sec-fetch-mode": "cors",
|
|
"sec-fetch-site": "same-origin",
|
|
"user-agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/138.0.0.0 Safari/537.36"
|
|
}
|
|
|
|
default_cookies = {
|
|
"PHPSESSID": "8622ac2f6caf545585d9b3c4537bc036",
|
|
"auth-token": "eyJ0eXAiOiJKV1QiLCJhbGciOiJIUzI1NiJ9.eyJleHAiOjE3NTQ4NzUzOTksImp0aSI6IjMxMzY5YmQ3LTIwOTMtNGI4Ni04ZGY3LWUzZTY1NDhjOTg0OCIsIm5hbWUiOiIxODYxNzg3MjE4NSIsInVzZXJfaWQiOiIwM2M2MmI5ODM4Yjk3Y2UzYmQxZTQwNDllZGVlNmI0OCIsInRlbmFudF90b2tlbiI6IjY1OTAxM2RlNjAxZmJmNjg1MzZmYTU0OTc4ODVkMTA2In0.TYpA94cCO7-HCeeksicrtpBDJB2AsbBvsuGBrJiFVWU",
|
|
"company_sign": "",
|
|
"company_nonce": "",
|
|
"cuid": ""
|
|
}
|
|
Requests = MR(base_url, protocol, proxy_options=False)
|
|
Requests.set_default_headers(default_headers)
|
|
Requests.set_default_cookies(default_cookies)
|
|
# print(Requests.session.proxies)
|
|
pd_data = {
|
|
'resume_id': [],
|
|
'姓名': [], # user_name
|
|
'求职区域': [], # area_show
|
|
'生日': [], # birthday
|
|
'学历': [], # education_level_msg
|
|
'学校': [], # education.school
|
|
'期望职务': [], # expect_job
|
|
'最后活跃时间': [], # last_edit_time
|
|
'婚姻': [], # marry_status_show
|
|
'现居地': [], # residence
|
|
'年龄': [], # user_age
|
|
'电话': [], # phone_encrypt
|
|
'性别': [], # sex_show
|
|
'求职类型': [], # work_type_show
|
|
'求职状态': [], # work_status_show
|
|
'工作1经历': [],
|
|
'工作1时间': [],
|
|
'工作1内容': [],
|
|
'工作2经历': [],
|
|
'工作2时间': [],
|
|
'工作2内容': [],
|
|
'工作3经历': [],
|
|
'工作3时间': [],
|
|
'工作3内容': [],
|
|
'工作4经历': [],
|
|
'工作4时间': [],
|
|
'工作4内容': [],
|
|
}
|
|
resume_list = []
|
|
|
|
|
|
def get_page(key_word, step=100):
|
|
json_data = {
|
|
'step': step,
|
|
'page': 1,
|
|
'education_level': [],
|
|
'arrival_time': [],
|
|
'work_time': [],
|
|
'area_id': [],
|
|
'keywords': key_word,
|
|
'work_status': '',
|
|
'work_status_show': '求职状态',
|
|
'category_id': '',
|
|
'work_type': '',
|
|
'work_type_show': '是否兼职',
|
|
'sex': '',
|
|
'sex_show': '性别',
|
|
'is_head': '',
|
|
'is_head_show': '有无照片',
|
|
'job_id': '',
|
|
'age': [],
|
|
'age_show': '年龄',
|
|
'refresh_time': 0,
|
|
'site_id': '',
|
|
'site_id2': '',
|
|
'province': '',
|
|
'city': '',
|
|
'county': '',
|
|
'provinceArr': [],
|
|
'cityArr': [],
|
|
'countyArr': [],
|
|
'only_job_category': 0,
|
|
}
|
|
url = '/job/company/v1/resume/page'
|
|
resp = Requests.post(url, json=json_data)
|
|
return resp.to_Dict()
|
|
|
|
|
|
def organize_information_into_to_pandas(keyword):
|
|
resp_obj = get_page(keyword, 100)
|
|
for i in resp_obj.data:
|
|
# resume_info = get_resume_info(i.resume_id)
|
|
pd_data['resume_id'].append(i.resume_id)
|
|
pd_data['姓名'].append(i.user_name)
|
|
pd_data['求职区域'].append(i.area_show)
|
|
pd_data['生日'].append(i.birthday)
|
|
pd_data['学历'].append(i.education_level_msg)
|
|
pd_data['学校'].append(';'.join([edu.school for edu in i.education]))
|
|
pd_data['期望职务'].append(i.expect_job)
|
|
pd_data['最后活跃时间'].append(i.last_edit_time)
|
|
pd_data['婚姻'].append(i.marry_status_show)
|
|
pd_data['现居地'].append(i.residence)
|
|
pd_data['年龄'].append(i.user_age)
|
|
pd_data['电话'].append(i.phone_encrypt)
|
|
pd_data['性别'].append(i.sex_show)
|
|
pd_data['求职类型'].append(i.work_type_show)
|
|
pd_data['求职状态'].append(i.work_status_show)
|
|
experience = i.experience
|
|
for j in range(4):
|
|
if j < len(experience) and experience[j].company:
|
|
company = experience[j].company
|
|
time_line = experience[j].time_line
|
|
content = experience[j].content
|
|
else:
|
|
company = ''
|
|
time_line = ''
|
|
content = ''
|
|
pd_data[f'工作{j + 1}经历'].append(company)
|
|
pd_data[f'工作{j + 1}时间'].append(time_line)
|
|
pd_data[f'工作{j + 1}内容'].append(content)
|
|
|
|
|
|
def main(keyword):
|
|
organize_information_into_to_pandas(keyword)
|
|
df = pd.DataFrame(pd_data)
|
|
df.to_excel(keyword+"_"+str(int(time.time())) + '.xlsx', index=False)
|
|
|
|
|
|
if __name__ == '__main__':
|
|
main("看护")
|