import time import pandas as pd from Requests_Except import MR base_url = 'www.yutian.top' protocol = 'https' default_headers = { "accept": "application/json, text/plain, */*", "accept-language": "zh-CN,zh;q=0.9,en;q=0.8", "cache-control": "no-cache", "content-type": "application/json;charset=UTF-8", "origin": "https://www.yutian.top", "pragma": "no-cache", "priority": "u=1, i", "referer": "https://www.yutian.top/enterprise/resume_store/list", "sec-ch-ua": "\"Not)A;Brand\";v=\"8\", \"Chromium\";v=\"138\", \"Google Chrome\";v=\"138\"", "sec-ch-ua-mobile": "?0", "sec-ch-ua-platform": "\"Windows\"", "sec-fetch-dest": "empty", "sec-fetch-mode": "cors", "sec-fetch-site": "same-origin", "user-agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/138.0.0.0 Safari/537.36" } default_cookies = { "PHPSESSID": "8622ac2f6caf545585d9b3c4537bc036", "auth-token": "eyJ0eXAiOiJKV1QiLCJhbGciOiJIUzI1NiJ9.eyJleHAiOjE3NTQ4NzUzOTksImp0aSI6IjMxMzY5YmQ3LTIwOTMtNGI4Ni04ZGY3LWUzZTY1NDhjOTg0OCIsIm5hbWUiOiIxODYxNzg3MjE4NSIsInVzZXJfaWQiOiIwM2M2MmI5ODM4Yjk3Y2UzYmQxZTQwNDllZGVlNmI0OCIsInRlbmFudF90b2tlbiI6IjY1OTAxM2RlNjAxZmJmNjg1MzZmYTU0OTc4ODVkMTA2In0.TYpA94cCO7-HCeeksicrtpBDJB2AsbBvsuGBrJiFVWU", "company_sign": "", "company_nonce": "", "cuid": "" } Requests = MR(base_url, protocol, proxy_options=False) Requests.set_default_headers(default_headers) Requests.set_default_cookies(default_cookies) # print(Requests.session.proxies) pd_data = { 'resume_id': [], '姓名': [], # user_name '求职区域': [], # area_show '生日': [], # birthday '学历': [], # education_level_msg '学校': [], # education.school '期望职务': [], # expect_job '最后活跃时间': [], # last_edit_time '婚姻': [], # marry_status_show '现居地': [], # residence '年龄': [], # user_age '电话': [], # phone_encrypt '性别': [], # sex_show '求职类型': [], # work_type_show '求职状态': [], # work_status_show '工作1经历': [], '工作1时间': [], '工作1内容': [], '工作2经历': [], '工作2时间': [], '工作2内容': [], '工作3经历': [], '工作3时间': [], '工作3内容': [], '工作4经历': [], '工作4时间': [], '工作4内容': [], } resume_list = [] def get_page(key_word, step=100): json_data = { 'step': step, 'page': 1, 'education_level': [], 'arrival_time': [], 'work_time': [], 'area_id': [], 'keywords': key_word, 'work_status': '', 'work_status_show': '求职状态', 'category_id': '', 'work_type': '', 'work_type_show': '是否兼职', 'sex': '', 'sex_show': '性别', 'is_head': '', 'is_head_show': '有无照片', 'job_id': '', 'age': [], 'age_show': '年龄', 'refresh_time': 0, 'site_id': '', 'site_id2': '', 'province': '', 'city': '', 'county': '', 'provinceArr': [], 'cityArr': [], 'countyArr': [], 'only_job_category': 0, } url = '/job/company/v1/resume/page' resp = Requests.post(url, json=json_data) return resp.to_Dict() def organize_information_into_to_pandas(keyword): resp_obj = get_page(keyword, 100) for i in resp_obj.data: # resume_info = get_resume_info(i.resume_id) pd_data['resume_id'].append(i.resume_id) pd_data['姓名'].append(i.user_name) pd_data['求职区域'].append(i.area_show) pd_data['生日'].append(i.birthday) pd_data['学历'].append(i.education_level_msg) pd_data['学校'].append(';'.join([edu.school for edu in i.education])) pd_data['期望职务'].append(i.expect_job) pd_data['最后活跃时间'].append(i.last_edit_time) pd_data['婚姻'].append(i.marry_status_show) pd_data['现居地'].append(i.residence) pd_data['年龄'].append(i.user_age) pd_data['电话'].append(i.phone_encrypt) pd_data['性别'].append(i.sex_show) pd_data['求职类型'].append(i.work_type_show) pd_data['求职状态'].append(i.work_status_show) experience = i.experience for j in range(4): if j < len(experience) and experience[j].company: company = experience[j].company time_line = experience[j].time_line content = experience[j].content else: company = '' time_line = '' content = '' pd_data[f'工作{j + 1}经历'].append(company) pd_data[f'工作{j + 1}时间'].append(time_line) pd_data[f'工作{j + 1}内容'].append(content) def main(keyword): organize_information_into_to_pandas(keyword) df = pd.DataFrame(pd_data) df.to_excel(keyword+"_"+str(int(time.time())) + '.xlsx', index=False) if __name__ == '__main__': main("看护")