Crawler/web/fnrc_vip/main.py

from web.Requests_Except import *
import datetime
import pandas as pd

headers = {
    "accept": "application/json, text/plain, */*",
    "accept-language": "zh-CN,zh;q=0.9,en;q=0.8",
    "cache-control": "no-cache",
    "content-type": "application/json;charset=UTF-8",
    "origin": "https://www.fnrc.vip",
    "pragma": "no-cache",
    "priority": "u=1, i",
    "referer": "https://www.fnrc.vip/enterprise/resume_store/list",
    "sec-ch-ua": "\"Google Chrome\";v=\"137\", \"Chromium\";v=\"137\", \"Not/A)Brand\";v=\"24\"",
    "sec-ch-ua-mobile": "?0",
    "sec-ch-ua-platform": "\"Windows\"",
    "sec-fetch-dest": "empty",
    "sec-fetch-mode": "cors",
    "sec-fetch-site": "same-origin",
    "user-agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/137.0.0.0 Safari/537.36"
}
cookies = {
    "PHPSESSID": "7e50a60cd4544448634f6f2a77c2e17d",
    "auth-token": "eyJ0eXAiOiJKV1QiLCJhbGciOiJIUzI1NiJ9.eyJleHAiOjE3NTMwMDMyNTIsImp0aSI6IjAxNDU1NjA1LTlhZDUtNDFlNS1iYzk5LWQwZGUyZTZkMWZjOCIsIm5hbWUiOiIxODYxNzg3MjE4NSIsInVzZXJfaWQiOiIxYTJkODFjMTFkM2MzMmVhYmVlNWFkM2E3NGFmYWViNyIsInRlbmFudF90b2tlbiI6ImQzNWVjMmEzNjAxODM1NWE4MTg3ZTEyODI3MzE3ZGRjIn0.ZCRc25o9J4DVykGriAXpEG5sQuJBwTrd-FpUKjnaq6Q",
    "company_sign": "",
    "company_nonce": "",
    "cuid": ""
}
base_url = 'www.fnrc.vip'
protocol = 'https'
Requests = MR(base_url, protocol)
Requests.set_default_headers(headers)
Requests.set_default_cookies(cookies)
keyword = ""
pd_data = {
    'resume_id': [],
    '姓名': [],  # user_name
    '求职区域': [],  # area_show
    '生日': [],  # birthday
    '学历': [],  # education_level_msg
    '学校': [],  # education.school
    '期望职务': [],  # expect_job
    '最后活跃时间': [],  # last_edit_time
    '婚姻': [],  # marry_status_show
    '现居地': [],  # residence
    '年龄': [],  # user_age
    '电话': [],  # phone_encrypt
    '性别': [],  # sex_show
    '求职类型': [],  # work_type_show
    '求职状态': [],  # work_status_show
    '工作1经历': [],
    '工作1时间': [],
    '工作1内容': [],
    '工作2经历': [],
    '工作2时间': [],
    '工作2内容': [],
    '工作3经历': [],
    '工作3时间': [],
    '工作3内容': [],
    '工作4经历': [],
    '工作4时间': [],
    '工作4内容': [],
}
resume_list = []


def getpageforkeyword(keyword: str, step: int = 100):
    json_data = {
    "step": step,
    "page": 1,
    "education_level": [],
    "arrival_time": [],
    "work_time": [],
    "area_id": [],
    "keywords": keyword,
    "work_status": "",
    "work_status_show": "求职状态",
    "category_id": "",
    "work_type": "",
    "work_type_show": "是否兼职",
    "sex": "",
    "sex_show": "性别",
    "is_head": "",
    "is_head_show": "有无照片",
    "job_id": "",
    "age": [],
    "age_show": "年龄",
    "refresh_time": 0,
    "site_id": "",
    "site_id2": "",
    "province": "",
    "city": "",
    "county": "",
    "provinceArr": [],
    "cityArr": [],
    "countyArr": [],
    "only_job_category": 0
}
    url = "/job/company/v1/resume/page"
    res = Requests.post(url, json=json_data)
    return res.to_Dict()


def organize_information_into_to_pandas():
    resp_obj = getpageforkeyword(keyword, 1000)
    for i in resp_obj.data:
        # resume_info = get_resume_info(i.resume_id)
        pd_data['resume_id'].append(i.resume_id)
        pd_data['姓名'].append(i.user_name)
        pd_data['求职区域'].append(i.area_show)
        pd_data['生日'].append(i.birthday)
        pd_data['学历'].append(i.education_level_msg)
        pd_data['学校'].append(';'.join([edu.school for edu in i.education]))
        pd_data['期望职务'].append(i.expect_job)
        pd_data['最后活跃时间'].append(i.last_edit_time)
        pd_data['婚姻'].append(i.marry_status_show)
        pd_data['现居地'].append(i.residence)
        pd_data['年龄'].append(i.user_age)
        pd_data['电话'].append(i.phone_encrypt)
        pd_data['性别'].append(i.sex_show)
        pd_data['求职类型'].append(i.work_type_show)
        pd_data['求职状态'].append(i.work_status_show)
        experience = i.experience
        for j in range(4):
            if j < len(experience) and experience[j].company:
                company = experience[j].company
                time_line = experience[j].time_line
                content = experience[j].content
            else:
                company = ''
                time_line = ''
                content = ''
            pd_data[f'工作{j + 1}经历'].append(company)
            pd_data[f'工作{j + 1}时间'].append(time_line)
            pd_data[f'工作{j + 1}内容'].append(content)


def main(keywords):
    global keyword
    keyword = keywords
    organize_information_into_to_pandas()
    df = pd.DataFrame(pd_data)
    df.to_excel(f'{datetime.datetime.now().strftime("%Y%m%d")}_丰南_{keyword}.xlsx', index=False)


if __name__ == '__main__':
    main("维修工")