141 lines
4.5 KiB
Python
141 lines
4.5 KiB
Python
import datetime
|
|
|
|
from Requests_Except import *
|
|
import pandas as pd
|
|
|
|
base_url = 'www.96rz.com'
|
|
protocol = 'https'
|
|
headers = {
|
|
"accept": "application/json, text/plain, */*",
|
|
"accept-language": "zh-CN,zh;q=0.9,en;q=0.8",
|
|
"authorization": "Bearer eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9.eyJpZCI6MTI4MTMwLCJ1c2VybmFtZSI6IueUhOePjV8xNTgyMzMiLCJwd2QiOiJiNGUzZDQyODUwMTA3YzRkMzBlNmRkYjU4N2IzZTM3ZCIsImlhdCI6MTc1MTI2NzY5MiwiZXhwIjoxNzgyODAzNjkyfQ.Q_u73JFMxjZESQC9yUAwb7V7La5bM9OT37iGl3UO_cY",
|
|
"cache-control": "no-cache",
|
|
"pragma": "no-cache",
|
|
"priority": "u=1, i",
|
|
"referer": "https://www.96rz.com/uc/enterprise/resume-library?tab=resume&keyword=%E6%9C%8D%E5%8A%A1%E5%91%98&t=1751267805224",
|
|
"sec-ch-ua": "\"Google Chrome\";v=\"137\", \"Chromium\";v=\"137\", \"Not/A)Brand\";v=\"24\"",
|
|
"sec-ch-ua-mobile": "?0",
|
|
"sec-ch-ua-platform": "\"Windows\"",
|
|
"sec-fetch-dest": "empty",
|
|
"sec-fetch-mode": "cors",
|
|
"sec-fetch-site": "same-origin",
|
|
"user-agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/137.0.0.0 Safari/537.36",
|
|
"x-platform": "1",
|
|
"x-site-id": "undefined"
|
|
}
|
|
cookies = {
|
|
"Hm_lvt_0fcb5413ca26ff9fe1a29c6f98b5e6d0": "1751267674",
|
|
"HMACCOUNT": "52014CC932A93E9B",
|
|
"Hm_lpvt_0fcb5413ca26ff9fe1a29c6f98b5e6d0": "1751267677",
|
|
"token": "eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9.eyJpZCI6MTI4MTMwLCJ1c2VybmFtZSI6IueUhOePjV8xNTgyMzMiLCJwd2QiOiJiNGUzZDQyODUwMTA3YzRkMzBlNmRkYjU4N2IzZTM3ZCIsImlhdCI6MTc1MTI2NzY5MiwiZXhwIjoxNzgyODAzNjkyfQ.Q_u73JFMxjZESQC9yUAwb7V7La5bM9OT37iGl3UO_cY",
|
|
"token.sig": "LdkOB3mNW_a59rzyTefnRuTybegvcDHEUd4hRXc-lO8",
|
|
"x-trace-id": "9587d2f9a1e84ae783cb2a5f055a7a51"
|
|
}
|
|
Requests = MR(base_url, protocol, headers)
|
|
Requests.set_default_cookies(cookies=cookies)
|
|
_keyword = ""
|
|
pd_data = {
|
|
'resume_id': [],
|
|
'姓名': [],
|
|
'年龄': [],
|
|
'生日': [],
|
|
'工作经验': [],
|
|
'最高学历': [],
|
|
'婚姻状态': [],
|
|
'电话': [],
|
|
'意向岗位': [],
|
|
'期望薪资': [],
|
|
'工作性质': [],
|
|
'求职状态': [],
|
|
'工作地点': [],
|
|
'工作经历1': [],
|
|
'工作经历2': [],
|
|
'工作经历3': [],
|
|
'工作经历4': [],
|
|
}
|
|
|
|
|
|
def login():
|
|
url = '/account/login'
|
|
params = {
|
|
'ref': '/?from=h5',
|
|
}
|
|
data = {
|
|
'_type': '1',
|
|
'_from': 'quick',
|
|
'account': '18244681207',
|
|
'password': 'zhenxian8888',
|
|
}
|
|
response = Requests.post(url, params=params, data=data, autosave=True)
|
|
response.cookies_dict()
|
|
|
|
|
|
def get_page_for_keyword(keyword):
|
|
global _keyword
|
|
_keyword = keyword
|
|
url = '/api/v1/resumes'
|
|
params = {
|
|
'_': str(int(time.time() * 1000 - 10000)),
|
|
'tab': 'resume',
|
|
'keyword': keyword,
|
|
't': str(int(time.time() * 1000)),
|
|
'pageSize': '100',
|
|
'pageIndex': '1',
|
|
'showStatus': 'true',
|
|
}
|
|
response = Requests.get(url, params=params)
|
|
return response.to_Dict()
|
|
|
|
|
|
def get_resumes_info(resumes_id):
|
|
# print(resumes_id)
|
|
url = '/api/v1/resume/{}'.format(resumes_id)
|
|
params = {
|
|
'_': str(int(time.time() * 1000)),
|
|
'view_type': 'resumeLibrary',
|
|
'privacy_description': '1',
|
|
}
|
|
response = Requests.get(url, params=params)
|
|
info = response.to_Dict().data
|
|
data = {
|
|
'resume_id': resumes_id,
|
|
'姓名': info.name,
|
|
'年龄': info.age,
|
|
'生日': info.birthday,
|
|
'工作经验': info.work_exp_value,
|
|
'最高学历': info.edu_value,
|
|
'婚姻状态': info.marriage_value,
|
|
'电话': info.phone,
|
|
'意向岗位': ','.join([item.name for item in info.infoCateforyArrObj]),
|
|
'期望薪资': info.salaryDesc,
|
|
'工作性质': info.work_type_value,
|
|
'求职状态': info.job_instant_value,
|
|
'工作地点': info.job_region_value,
|
|
}
|
|
for i in range(4): # 0, 1, 2, 3
|
|
if i < len(info.works):
|
|
work = info.works[i]
|
|
data[f'工作经历{i + 1}'] = f"{work.company}:{work.content}"
|
|
else:
|
|
data[f'工作经历{i + 1}'] = ''
|
|
|
|
return data
|
|
|
|
|
|
def integration(keyword):
|
|
global _keyword
|
|
_keyword = keyword
|
|
page = get_page_for_keyword(_keyword)
|
|
for item in page.data.items:
|
|
resumes_info = get_resumes_info(item.id)
|
|
for key, value in resumes_info.items():
|
|
pd_data[key].append(value)
|
|
|
|
df = pd.DataFrame(pd_data)
|
|
df.to_excel(f'{datetime.datetime.now().strftime("%Y%m%d")}_滦南_{_keyword}.xlsx', index=False)
|
|
|
|
|
|
if __name__ == '__main__':
|
|
integration("维修工")
|
|
# get_resumes_info('36859')
|