Crawler/web/qj050_com/main.py

import datetime

from Requests_Except import *
import pandas as pd

base_url = 'www.qj050.com'
protocol = 'https'
headers = {
    'accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.7',
    'accept-language': 'zh-CN,zh;q=0.9,en;q=0.8',
    'cache-control': 'no-cache',
    'content-type': 'application/x-www-form-urlencoded',
    'origin': 'https://www.qj050.com',
    'pragma': 'no-cache',
    'priority': 'u=0, i',
    'referer': 'https://www.qj050.com/account/quick?login_type=1&ref=/?from=h5',
    'sec-ch-ua': '"Google Chrome";v="135", "Not-A.Brand";v="8", "Chromium";v="135"',
    'sec-ch-ua-mobile': '?0',
    'sec-ch-ua-platform': '"Windows"',
    'sec-fetch-dest': 'iframe',
    'sec-fetch-mode': 'navigate',
    'sec-fetch-site': 'same-origin',
    'sec-fetch-user': '?1',
    'upgrade-insecure-requests': '1',
    'user-agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/135.0.0.0 Safari/537.36',
}
Requests = MR(base_url, protocol, headers)
_keyword = ""
pd_data = {
    'resume_id': [],
    '姓名': [],
    '年龄': [],
    '生日': [],
    '工作经验': [],
    '最高学历': [],
    '婚姻状态': [],
    '电话': [],
    '意向岗位': [],
    '期望薪资': [],
    '工作性质': [],
    '求职状态': [],
    '工作地点': [],
    '工作经历1': [],
    '工作经历2': [],
    '工作经历3': [],
    '工作经历4': [],
}


def login():
    url = '/account/login'
    params = {
        'ref': '/?from=h5',
    }
    data = {
        '_type': '1',
        '_from': 'quick',
        'account': '真贤8888',
        'password': 'zhenxian8888',
    }
    response = Requests.post(url, params=params, data=data, autosave=True)
    response.cookies_dict()


def get_page_for_keyword(keyword):
    global _keyword
    _keyword = keyword
    url = '/api/v1/resumes'
    params = {
        '_': str(int(time.time() * 1000 - 10000)),
        'tab': 'resume',
        'keyword': keyword,
        't': str(int(time.time() * 1000)),
        'pageSize': '100',
        'pageIndex': '1',
        'showStatus': 'true',
    }
    response = Requests.get(url, params=params)
    return response.to_Dict()


def get_resumes_info(resumes_id):
    url = '/api/v1/resume/{}'.format(resumes_id)
    params = {
        '_': str(int(time.time() * 1000)),
        'view_type': 'resumeLibrary',
        'privacy_description': '1',
    }
    response = Requests.get(url, params=params)
    info = response.to_Dict().data
    data = {
        'resume_id': resumes_id,
        '姓名': info.name,
        '年龄': info.age,
        '生日': info.birthday,
        '工作经验': info.work_exp_value,
        '最高学历': info.edu_value,
        '婚姻状态': info.marriage_value,
        '电话': info.phone,
        '意向岗位': ','.join([item.name for item in info.infoCateforyArrObj]),
        '期望薪资': info.salaryDesc,
        '工作性质': info.work_type_value,
        '求职状态': info.job_instant_value,
        '工作地点': info.job_region_value,
    }
    for i in range(4):  # 0, 1, 2, 3
        if i < len(info.works):
            work = info.works[i]
            data[f'工作经历{i + 1}'] = f"{work.company}:{work.content}"
        else:
            data[f'工作经历{i + 1}'] = ''

    return data


def integration(keyword):
    global _keyword
    _keyword = keyword
    page = get_page_for_keyword(_keyword)
    for item in page.data.items:
        resumes_info = get_resumes_info(item.id)
        for key, value in resumes_info.items():
            pd_data[key].append(value)

    df = pd.DataFrame(pd_data)
    df.to_excel(f'{datetime.datetime.now().strftime("%Y%m%d_%H%M%S")}_{_keyword}.xlsx', index=False)

if __name__ == '__main__':
    integration("财务")
    # get_resumes_info('36859')