43 lines
1.4 KiB
Python
43 lines
1.4 KiB
Python
from cfd_zp import ResumeAPI
|
|
import pandas as pd
|
|
from datetime import datetime
|
|
|
|
def fetch_multiple_pages(keyword, total_pages=10, page_size=10):
|
|
api = ResumeAPI()
|
|
all_resumes = []
|
|
|
|
# 创建唯一的CSV文件名
|
|
filename = f'resumes_{keyword}_{datetime.now().strftime("%Y%m%d_%H%M%S")}.csv'
|
|
print(f"开始采集关键词 '{keyword}' 的数据,将保存到文件: {filename}")
|
|
|
|
for page in range(1, total_pages + 1):
|
|
print(f"\n正在采集第 {page}/{total_pages} 页")
|
|
resumes = api.fetch_resumes(
|
|
keyword=keyword,
|
|
page_size=page_size,
|
|
page_index=page,
|
|
save_csv=False # 不在每页都保存CSV
|
|
)
|
|
if resumes:
|
|
all_resumes.extend(resumes)
|
|
# 将当前所有数据保存到CSV
|
|
df = pd.DataFrame(all_resumes)
|
|
df.to_csv(filename, index=False, encoding='utf-8-sig')
|
|
print(f"已保存 {len(all_resumes)} 条数据到 {filename}")
|
|
else:
|
|
print(f"第 {page} 页数据获取失败或为空")
|
|
|
|
print(f"\n采集完成,共获取 {len(all_resumes)} 条数据")
|
|
return all_resumes
|
|
|
|
def main():
|
|
# 设置关键词和采集页数
|
|
keyword = '护工'
|
|
total_pages = 10
|
|
page_size = 10
|
|
|
|
# 开始批量采集
|
|
fetch_multiple_pages(keyword, total_pages, page_size)
|
|
|
|
if __name__ == '__main__':
|
|
main() |