正常提交,忽略警告
60
.gitignore
vendored
Normal file
@ -0,0 +1,60 @@
|
||||
*.xlsx
|
||||
.idea/
|
||||
**/.idea/
|
||||
# === Python 缓存 ===
|
||||
__pycache__/
|
||||
*.py[cod]
|
||||
*$py.class
|
||||
|
||||
# === 环境变量文件 ===
|
||||
.env
|
||||
.env.*
|
||||
|
||||
# === 虚拟环境目录 ===
|
||||
venv/
|
||||
.venv/
|
||||
env/
|
||||
ENV/
|
||||
env.bak/
|
||||
venv.bak/
|
||||
|
||||
# === 安装构建缓存 ===
|
||||
*.egg
|
||||
*.egg-info/
|
||||
.eggs/
|
||||
dist/
|
||||
build/
|
||||
pip-log.txt
|
||||
|
||||
# === 测试相关缓存文件 ===
|
||||
.coverage
|
||||
.tox/
|
||||
nosetests.xml
|
||||
coverage.xml
|
||||
*.cover
|
||||
*.py,cover
|
||||
|
||||
# === 数据库相关 ===
|
||||
*.sqlite3
|
||||
db.sqlite3
|
||||
|
||||
# === 日志文件 ===
|
||||
*.log
|
||||
logs/
|
||||
**/logs/
|
||||
|
||||
# === 静态与媒体文件(Django) ===
|
||||
media/
|
||||
static/
|
||||
staticfiles/
|
||||
|
||||
# === IDE 配置 ===
|
||||
.idea/* # PyCharm
|
||||
*.iml
|
||||
*.ipr
|
||||
*.iws
|
||||
.vscode/ # VS Code
|
||||
|
||||
# === 系统自动生成文件 ===
|
||||
.DS_Store # macOS
|
||||
Thumbs.db # Windows
|
37
APP/frida_attach.py
Normal file
@ -0,0 +1,37 @@
|
||||
# frida_attach.py
|
||||
import frida
|
||||
|
||||
APP_PACKAGE_NAME = "com.huoketuoke.www"
|
||||
|
||||
# 连接设备
|
||||
device = frida.get_usb_device()
|
||||
|
||||
# 查找 PID
|
||||
pid = None
|
||||
for app in device.enumerate_processes():
|
||||
if app.name == APP_PACKAGE_NAME:
|
||||
pid = app.pid
|
||||
break
|
||||
|
||||
if pid is None:
|
||||
raise RuntimeError(f"[-] 找不到运行中的 {APP_PACKAGE_NAME}")
|
||||
|
||||
print(f"[+] 找到进程 {APP_PACKAGE_NAME},PID: {pid}")
|
||||
|
||||
# 附加进程
|
||||
session = device.attach(pid)
|
||||
|
||||
# 注入 JS 脚本
|
||||
script = session.create_script("""
|
||||
console.log("Frida 注入成功 - attach 模式");
|
||||
""")
|
||||
|
||||
# 可选:处理 JS 消息
|
||||
def on_message(message, data):
|
||||
print("[*] JS 消息:", message)
|
||||
|
||||
script.on("message", on_message)
|
||||
script.load()
|
||||
|
||||
input("[*] 按 Enter 键退出...")
|
||||
session.detach()
|
31
APP/frida_spawn.py
Normal file
@ -0,0 +1,31 @@
|
||||
import frida
|
||||
|
||||
APP_PACKAGE_NAME = "com.huoketuoke.www"
|
||||
|
||||
device = frida.get_usb_device()
|
||||
|
||||
# 冷启动 app,处于挂起状态
|
||||
pid = device.spawn([APP_PACKAGE_NAME])
|
||||
print(f"[+] Spawned {APP_PACKAGE_NAME} with PID {pid}")
|
||||
|
||||
# 附加到挂起进程
|
||||
session = device.attach(pid)
|
||||
|
||||
# JS 脚本内容,可替换为从文件读取
|
||||
script = session.create_script("""
|
||||
console.log("Frida 注入成功 - spawn 模式");
|
||||
""")
|
||||
|
||||
# 可选:监听 JS 中的 send 消息
|
||||
def on_message(message, data):
|
||||
print("[*] JS 消息:", message)
|
||||
|
||||
script.on("message", on_message)
|
||||
script.load()
|
||||
|
||||
# 恢复 app 执行
|
||||
device.resume(pid)
|
||||
print("[+] App resumed. You can now interact with it.")
|
||||
|
||||
input("[*] 按 Enter 键退出...")
|
||||
session.detach()
|
15
APP/hookjs/dexclass.js
Normal file
@ -0,0 +1,15 @@
|
||||
Java.perform(function() {
|
||||
console.log("[*] 正在hook DexClassLoader");
|
||||
|
||||
var DexClassLoader = Java.use("dalvik.system.DexClassLoader");
|
||||
DexClassLoader.loadClass.overload('java.lang.String').implementation = function(name) {
|
||||
console.log("[+] DexClassLoader.loadClass: " + name);
|
||||
return this.loadClass(name);
|
||||
};
|
||||
|
||||
var PathClassLoader = Java.use("dalvik.system.PathClassLoader");
|
||||
PathClassLoader.loadClass.overload('java.lang.String', 'boolean').implementation = function(name, resolve) {
|
||||
console.log("[+] PathClassLoader.loadClass: " + name);
|
||||
return this.loadClass(name, resolve);
|
||||
};
|
||||
});
|
46
APP/hookjs/ssl_hook.js
Normal file
@ -0,0 +1,46 @@
|
||||
// Java层 SSL Pinning绕过
|
||||
Java.perform(function () {
|
||||
console.log("[+] Start SSL Pinning Bypass (Java layer)");
|
||||
|
||||
var TrustManagerImpl = Java.use("com.android.org.conscrypt.TrustManagerImpl");
|
||||
TrustManagerImpl.verifyChain.implementation = function (chain, authType, host, clientAuth, ocspData, tlsSctData) {
|
||||
console.log("[+] TrustManagerImpl.verifyChain bypassed for host: " + host);
|
||||
return chain;
|
||||
};
|
||||
|
||||
try {
|
||||
var CertificatePinner = Java.use("okhttp3.CertificatePinner");
|
||||
CertificatePinner.check.overload("java.lang.String", "java.util.List").implementation = function (str, list) {
|
||||
console.log("[+] OkHttp3 CertificatePinner.check() bypassed for: " + str);
|
||||
return;
|
||||
};
|
||||
} catch (e) {
|
||||
console.log("[-] OkHttp3 not found.");
|
||||
}
|
||||
});
|
||||
|
||||
// Native层 libssl.so绕过
|
||||
setImmediate(function() {
|
||||
var libssl = Process.findModuleByName("libssl.so");
|
||||
if (libssl) {
|
||||
console.log("[*] libssl.so base address: " + libssl.base);
|
||||
|
||||
var SSL_get_verify_result = libssl.findExportByName("SSL_get_verify_result");
|
||||
if (SSL_get_verify_result) {
|
||||
Interceptor.replace(SSL_get_verify_result, new NativeCallback(function (ssl) {
|
||||
console.log("[+] SSL_get_verify_result() bypassed");
|
||||
return 0;
|
||||
}, 'int', ['pointer']));
|
||||
}
|
||||
|
||||
var SSL_CTX_set_custom_verify = libssl.findExportByName("SSL_CTX_set_custom_verify");
|
||||
if (SSL_CTX_set_custom_verify) {
|
||||
Interceptor.attach(SSL_CTX_set_custom_verify, {
|
||||
onEnter: function (args) {
|
||||
console.log("[+] SSL_CTX_set_custom_verify() called - force mode to 0");
|
||||
args[1] = ptr('0');
|
||||
}
|
||||
});
|
||||
}
|
||||
}
|
||||
});
|
63
APP/main.py
Normal file
@ -0,0 +1,63 @@
|
||||
import frida
|
||||
import time
|
||||
import sys
|
||||
|
||||
# 保证输出UTF-8编码
|
||||
sys.stdout.reconfigure(encoding='utf-8')
|
||||
sys.stderr.reconfigure(encoding='utf-8')
|
||||
|
||||
APP_PACKAGE_NAME = "com.lmhl.yituoke"
|
||||
SCRIPT_FILE = "./hookjs/dexclass.js"
|
||||
|
||||
def on_message(message, data):
|
||||
if message['type'] == 'send':
|
||||
print(f"[消息] {message['payload']}")
|
||||
elif message['type'] == 'error':
|
||||
print(f"[错误] {message['stack']}")
|
||||
|
||||
def main():
|
||||
try:
|
||||
# 连接设备
|
||||
device = frida.get_usb_device(timeout=5)
|
||||
print(f"[连接成功] 已连接到设备:{device.name}")
|
||||
|
||||
# 启动应用(spawn)
|
||||
print(f"[启动应用] 准备启动应用:{APP_PACKAGE_NAME}")
|
||||
pid = device.spawn([APP_PACKAGE_NAME])
|
||||
|
||||
# 附加到新进程
|
||||
session = device.attach(pid)
|
||||
print(f"[附加成功] 已附加到应用,PID: {pid}")
|
||||
|
||||
# 加载脚本
|
||||
with open(SCRIPT_FILE, encoding="utf-8") as f: # 保证读取脚本不会出编码问题
|
||||
script = session.create_script(f.read())
|
||||
|
||||
script.on('message', on_message)
|
||||
script.load()
|
||||
print(f"[脚本加载成功] {SCRIPT_FILE} 脚本已成功加载!")
|
||||
|
||||
# 恢复应用运行
|
||||
device.resume(pid)
|
||||
print(f"[应用恢复] 应用已恢复运行,可以开始操作了。")
|
||||
|
||||
# 保持运行状态
|
||||
print("[保持运行] 按 Ctrl+C 退出...")
|
||||
while True:
|
||||
time.sleep(1)
|
||||
|
||||
except KeyboardInterrupt:
|
||||
print("\n[退出] 正在断开连接...")
|
||||
try:
|
||||
session.detach()
|
||||
except:
|
||||
pass
|
||||
sys.exit(0)
|
||||
|
||||
except Exception as e:
|
||||
import traceback
|
||||
print(f"[出现异常] {e}")
|
||||
traceback.print_exc()
|
||||
|
||||
if __name__ == '__main__':
|
||||
main()
|
25
MiniApp/鲜平鲜食/main.py
Normal file
@ -0,0 +1,25 @@
|
||||
import requests
|
||||
|
||||
|
||||
headers = {
|
||||
"Host": "www.parfresh.com",
|
||||
"authToken": "",
|
||||
"Accept": "application/json",
|
||||
"xweb_xhr": "1",
|
||||
"uuid": "b303ba60-6323-4a65-9e12-f067ba723dea",
|
||||
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/122.0.0.0 Safari/537.36 MicroMessenger/7.0.20.1781(0x6700143B) NetType/WIFI MiniProgramEnv/Windows WindowsWechat/WMPF WindowsWechat(0x63090c11)XWEB/11275",
|
||||
"Content-Type": "application/json",
|
||||
"Sec-Fetch-Site": "cross-site",
|
||||
"Sec-Fetch-Mode": "cors",
|
||||
"Sec-Fetch-Dest": "empty",
|
||||
"Referer": "https://servicewechat.com/wx2b90b97b6821b332/19/page-frame.html",
|
||||
"Accept-Language": "zh-CN,zh;q=0.9"
|
||||
}
|
||||
url = "https://www.parfresh.com/api/goods/goodsInfo"
|
||||
params = {
|
||||
"goods_id": "5774"
|
||||
}
|
||||
response = requests.get(url, headers=headers, params=params)
|
||||
|
||||
print(response.text)
|
||||
print(response)
|
26
PDF/main.py
Normal file
@ -0,0 +1,26 @@
|
||||
import os
|
||||
from PyPDF2 import PdfReader, PdfWriter
|
||||
|
||||
# 指定 PDF 文件路径
|
||||
input_pdf_path = r"C:\Users\Franklin_Kali\Desktop\保险保单.pdf"
|
||||
output_pdf_path = r"C:\Users\Franklin_Kali\Desktop\四平保单_无签名.pdf"
|
||||
|
||||
# 确保文件存在
|
||||
if not os.path.exists(input_pdf_path):
|
||||
raise FileNotFoundError(f"文件未找到: {input_pdf_path}")
|
||||
|
||||
# 读取 PDF 文件
|
||||
reader = PdfReader(input_pdf_path)
|
||||
writer = PdfWriter()
|
||||
|
||||
# 遍历所有页面并移除签名注释
|
||||
for page in reader.pages:
|
||||
if "/Annots" in page: # 如果页面有注释
|
||||
del page["/Annots"] # 移除所有注释(包括电子签名)
|
||||
writer.add_page(page)
|
||||
|
||||
# 写入新的 PDF 文件
|
||||
with open(output_pdf_path, "wb") as output_pdf:
|
||||
writer.write(output_pdf)
|
||||
|
||||
print(f"处理完成,新文件已保存: {output_pdf_path}")
|
BIN
web/96rz_com/96rz_com.zip
Normal file
208
web/96rz_com/Requests_Except.py
Normal file
@ -0,0 +1,208 @@
|
||||
import json
|
||||
import re
|
||||
import requests
|
||||
import logging
|
||||
import time
|
||||
from lxml import etree
|
||||
from types import SimpleNamespace
|
||||
from http.cookies import SimpleCookie
|
||||
|
||||
logging.basicConfig(level=logging.DEBUG, format='%(asctime)s - %(levelname)s - %(message)s')
|
||||
|
||||
|
||||
class ExtendedResponse(requests.Response):
|
||||
def xpath(self):
|
||||
try:
|
||||
tree = etree.HTML(self.text)
|
||||
return tree
|
||||
except Exception as e:
|
||||
raise ValueError("XPath解析错误: " + str(e))
|
||||
|
||||
def to_Dict(self):
|
||||
try:
|
||||
data = self.json()
|
||||
return self.dict_to_obj(data)
|
||||
except Exception as e:
|
||||
raise ValueError("JSON转换错误: " + str(e))
|
||||
|
||||
def to_Re_findall(self, regex):
|
||||
try:
|
||||
data = self.text
|
||||
return re.findall(regex, data)
|
||||
except Exception as e:
|
||||
raise ValueError("Re搜索错误: " + str(e))
|
||||
|
||||
def cookies_dict(self):
|
||||
try:
|
||||
# 获取原有的 cookies 字典
|
||||
cookie_dict = self.cookies.get_dict()
|
||||
# 如果响应头中有 Set-Cookie,则解析并补充 cookies
|
||||
if 'Set-Cookie' in self.headers:
|
||||
from http.cookies import SimpleCookie
|
||||
sc = SimpleCookie()
|
||||
sc.load(self.headers['Set-Cookie'])
|
||||
for key, morsel in sc.items():
|
||||
cookie_dict[key] = morsel.value
|
||||
return cookie_dict
|
||||
except Exception as e:
|
||||
raise ValueError("Cookies转换错误: " + str(e))
|
||||
|
||||
def save_cookies(self, filepath, format='json'):
|
||||
"""
|
||||
将当前响应中的cookie信息保存到指定文件中。
|
||||
|
||||
参数:
|
||||
filepath (str): 保存文件的路径
|
||||
format (str): 保存格式,支持 'json'、'pickle' 和 'txt' 三种格式,默认为 'json'
|
||||
"""
|
||||
try:
|
||||
cookie_dict = self.cookies_dict()
|
||||
if format.lower() == 'json':
|
||||
with open(filepath, 'w', encoding='utf-8') as f:
|
||||
json.dump(cookie_dict, f, ensure_ascii=False, indent=4)
|
||||
elif format.lower() == 'pickle':
|
||||
import pickle
|
||||
with open(filepath, 'wb') as f:
|
||||
pickle.dump(cookie_dict, f)
|
||||
elif format.lower() == 'txt':
|
||||
with open(filepath, 'w', encoding='utf-8') as f:
|
||||
for key, value in cookie_dict.items():
|
||||
f.write(f"{key}: {value}\n")
|
||||
else:
|
||||
raise ValueError("不支持的格式,请选择 'json'、'pickle' 或 'txt'")
|
||||
except Exception as e:
|
||||
raise ValueError("保存cookies出错: " + str(e))
|
||||
|
||||
@staticmethod
|
||||
def dict_to_obj(d):
|
||||
if isinstance(d, dict):
|
||||
return SimpleNamespace(**{k: ExtendedResponse.dict_to_obj(v) for k, v in d.items()})
|
||||
elif isinstance(d, list):
|
||||
return [ExtendedResponse.dict_to_obj(item) for item in d]
|
||||
else:
|
||||
return d
|
||||
|
||||
|
||||
class MyRequests:
|
||||
def __init__(self, base_url, protocol='http', retries=3, proxy_options=True, default_timeout=10,
|
||||
default_cookies=None):
|
||||
"""
|
||||
初始化 MyRequests 对象,自动加载本地 cookies 文件(根据 base_url 生成文件名,如 "www_zhrczp_com_cookies.json")中的 cookies,
|
||||
如果文件存在,则将其加载到 session 中;否则使用 default_cookies(如果提供)更新 session。
|
||||
|
||||
参数:
|
||||
base_url (str): 基础 URL
|
||||
protocol (str): 协议(默认为 'http')
|
||||
retries (int): 请求重试次数
|
||||
proxy_options (bool): 是否使用代理
|
||||
default_timeout (int): 默认超时时间
|
||||
default_cookies (dict): 默认的 cookies 字典
|
||||
"""
|
||||
self.base_url = base_url.rstrip('/')
|
||||
self.protocol = protocol
|
||||
self.retries = retries
|
||||
self.default_timeout = default_timeout
|
||||
self.session = requests.Session()
|
||||
|
||||
if proxy_options:
|
||||
self.session.proxies = {"http": "http://127.0.0.1:7890", "https": "http://127.0.0.1:7890"}
|
||||
|
||||
# 优先使用传入的 default_cookies 更新 session
|
||||
if default_cookies:
|
||||
self.session.cookies.update(default_cookies)
|
||||
|
||||
# 根据 base_url 生成 cookies 文件名,将 '.' 替换为 '_'
|
||||
self.cookie_file = f"{self.base_url.replace('.', '_')}_cookies.json"
|
||||
# 尝试加载本地已保存的 cookies 文件
|
||||
try:
|
||||
with open(self.cookie_file, 'r', encoding='utf-8') as f:
|
||||
loaded_cookies = json.load(f)
|
||||
self.session.cookies.update(loaded_cookies)
|
||||
logging.info("成功加载本地 cookies")
|
||||
except FileNotFoundError:
|
||||
logging.info("本地 cookies 文件不存在,将在请求后自动保存")
|
||||
except Exception as e:
|
||||
logging.error("加载本地 cookies 失败:" + str(e))
|
||||
|
||||
def _save_cookies(self):
|
||||
"""
|
||||
将当前 session 中的 cookies 保存到本地文件(基于 base_url 的文件名),以 JSON 格式存储。
|
||||
"""
|
||||
try:
|
||||
with open(self.cookie_file, 'w', encoding='utf-8') as f:
|
||||
json.dump(self.session.cookies.get_dict(), f, ensure_ascii=False, indent=4)
|
||||
logging.info("cookies 已保存到本地文件:" + self.cookie_file)
|
||||
except Exception as e:
|
||||
logging.error("保存 cookies 文件失败:" + str(e))
|
||||
|
||||
def _build_url(self, url):
|
||||
if url.startswith("http://") or url.startswith("https://"):
|
||||
return url
|
||||
return f"{self.protocol}://{self.base_url}/{url.lstrip('/')}"
|
||||
|
||||
def set_default_headers(self, headers):
|
||||
self.session.headers.update(headers)
|
||||
|
||||
def set_default_cookies(self, cookies):
|
||||
self.session.cookies.update(cookies)
|
||||
self._save_cookies()
|
||||
|
||||
def get(self, url, params=None, headers=None, cookies=None, **kwargs):
|
||||
full_url = self._build_url(url)
|
||||
return self._request("GET", full_url, params=params, headers=headers, cookies=cookies, **kwargs)
|
||||
|
||||
def post(self, url, data=None, json=None, headers=None, cookies=None, **kwargs):
|
||||
full_url = self._build_url(url)
|
||||
return self._request("POST", full_url, data=data, json=json, headers=headers, cookies=cookies, **kwargs)
|
||||
|
||||
def update(self, url, data=None, json=None, headers=None, cookies=None, **kwargs):
|
||||
full_url = self._build_url(url)
|
||||
return self._request("PUT", full_url, data=data, json=json, headers=headers, cookies=cookies, **kwargs)
|
||||
|
||||
def delete(self, url, params=None, headers=None, cookies=None, **kwargs):
|
||||
full_url = self._build_url(url)
|
||||
return self._request("DELETE", full_url, params=params, headers=headers, cookies=cookies, **kwargs)
|
||||
|
||||
def _request(self, method, url, retries=None, autosave=False, **kwargs):
|
||||
if retries is None:
|
||||
retries = self.retries
|
||||
if 'timeout' not in kwargs:
|
||||
kwargs['timeout'] = self.default_timeout
|
||||
|
||||
try:
|
||||
response = self.session.request(method, url, **kwargs)
|
||||
response.raise_for_status()
|
||||
self.session.cookies.update(response.cookies)
|
||||
|
||||
if 'Set-Cookie' in response.headers:
|
||||
from http.cookies import SimpleCookie
|
||||
sc = SimpleCookie()
|
||||
sc.load(response.headers['Set-Cookie'])
|
||||
for key, morsel in sc.items():
|
||||
if morsel.value.lower() != 'deleted':
|
||||
self.session.cookies.set(key, morsel.value)
|
||||
|
||||
if autosave:
|
||||
self._save_cookies()
|
||||
|
||||
response.__class__ = ExtendedResponse
|
||||
return response
|
||||
|
||||
except Exception as e:
|
||||
if retries > 0:
|
||||
logging.warning(f"请求 {method} {url} 失败,剩余重试次数 {retries},错误: {e}")
|
||||
time.sleep(2 ** (self.retries - retries))
|
||||
return self._request(method, url, retries=retries - 1, autosave=autosave, **kwargs)
|
||||
else:
|
||||
logging.error(f"请求 {method} {url} 重试次数用尽")
|
||||
raise e
|
||||
|
||||
def get_cookies(self):
|
||||
try:
|
||||
return self.session.cookies.get_dict()
|
||||
except Exception as e:
|
||||
raise ValueError("获取 cookies 失败:" + str(e))
|
||||
|
||||
|
||||
class MR(MyRequests):
|
||||
pass
|
140
web/96rz_com/main.py
Normal file
@ -0,0 +1,140 @@
|
||||
import datetime
|
||||
|
||||
from Requests_Except import *
|
||||
import pandas as pd
|
||||
|
||||
base_url = 'www.96rz.com'
|
||||
protocol = 'https'
|
||||
headers = {
|
||||
"accept": "application/json, text/plain, */*",
|
||||
"accept-language": "zh-CN,zh;q=0.9,en;q=0.8",
|
||||
"authorization": "Bearer eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9.eyJpZCI6MTI4MTMwLCJ1c2VybmFtZSI6IueUhOePjV8xNTgyMzMiLCJwd2QiOiJiNGUzZDQyODUwMTA3YzRkMzBlNmRkYjU4N2IzZTM3ZCIsImlhdCI6MTc1MTI2NzY5MiwiZXhwIjoxNzgyODAzNjkyfQ.Q_u73JFMxjZESQC9yUAwb7V7La5bM9OT37iGl3UO_cY",
|
||||
"cache-control": "no-cache",
|
||||
"pragma": "no-cache",
|
||||
"priority": "u=1, i",
|
||||
"referer": "https://www.96rz.com/uc/enterprise/resume-library?tab=resume&keyword=%E6%9C%8D%E5%8A%A1%E5%91%98&t=1751267805224",
|
||||
"sec-ch-ua": "\"Google Chrome\";v=\"137\", \"Chromium\";v=\"137\", \"Not/A)Brand\";v=\"24\"",
|
||||
"sec-ch-ua-mobile": "?0",
|
||||
"sec-ch-ua-platform": "\"Windows\"",
|
||||
"sec-fetch-dest": "empty",
|
||||
"sec-fetch-mode": "cors",
|
||||
"sec-fetch-site": "same-origin",
|
||||
"user-agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/137.0.0.0 Safari/537.36",
|
||||
"x-platform": "1",
|
||||
"x-site-id": "undefined"
|
||||
}
|
||||
cookies = {
|
||||
"Hm_lvt_0fcb5413ca26ff9fe1a29c6f98b5e6d0": "1751267674",
|
||||
"HMACCOUNT": "52014CC932A93E9B",
|
||||
"Hm_lpvt_0fcb5413ca26ff9fe1a29c6f98b5e6d0": "1751267677",
|
||||
"token": "eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9.eyJpZCI6MTI4MTMwLCJ1c2VybmFtZSI6IueUhOePjV8xNTgyMzMiLCJwd2QiOiJiNGUzZDQyODUwMTA3YzRkMzBlNmRkYjU4N2IzZTM3ZCIsImlhdCI6MTc1MTI2NzY5MiwiZXhwIjoxNzgyODAzNjkyfQ.Q_u73JFMxjZESQC9yUAwb7V7La5bM9OT37iGl3UO_cY",
|
||||
"token.sig": "LdkOB3mNW_a59rzyTefnRuTybegvcDHEUd4hRXc-lO8",
|
||||
"x-trace-id": "9587d2f9a1e84ae783cb2a5f055a7a51"
|
||||
}
|
||||
Requests = MR(base_url, protocol, headers)
|
||||
Requests.set_default_cookies(cookies=cookies)
|
||||
_keyword = ""
|
||||
pd_data = {
|
||||
'resume_id': [],
|
||||
'姓名': [],
|
||||
'年龄': [],
|
||||
'生日': [],
|
||||
'工作经验': [],
|
||||
'最高学历': [],
|
||||
'婚姻状态': [],
|
||||
'电话': [],
|
||||
'意向岗位': [],
|
||||
'期望薪资': [],
|
||||
'工作性质': [],
|
||||
'求职状态': [],
|
||||
'工作地点': [],
|
||||
'工作经历1': [],
|
||||
'工作经历2': [],
|
||||
'工作经历3': [],
|
||||
'工作经历4': [],
|
||||
}
|
||||
|
||||
|
||||
def login():
|
||||
url = '/account/login'
|
||||
params = {
|
||||
'ref': '/?from=h5',
|
||||
}
|
||||
data = {
|
||||
'_type': '1',
|
||||
'_from': 'quick',
|
||||
'account': '18244681207',
|
||||
'password': 'zhenxian8888',
|
||||
}
|
||||
response = Requests.post(url, params=params, data=data, autosave=True)
|
||||
response.cookies_dict()
|
||||
|
||||
|
||||
def get_page_for_keyword(keyword):
|
||||
global _keyword
|
||||
_keyword = keyword
|
||||
url = '/api/v1/resumes'
|
||||
params = {
|
||||
'_': str(int(time.time() * 1000 - 10000)),
|
||||
'tab': 'resume',
|
||||
'keyword': keyword,
|
||||
't': str(int(time.time() * 1000)),
|
||||
'pageSize': '100',
|
||||
'pageIndex': '1',
|
||||
'showStatus': 'true',
|
||||
}
|
||||
response = Requests.get(url, params=params)
|
||||
return response.to_Dict()
|
||||
|
||||
|
||||
def get_resumes_info(resumes_id):
|
||||
# print(resumes_id)
|
||||
url = '/api/v1/resume/{}'.format(resumes_id)
|
||||
params = {
|
||||
'_': str(int(time.time() * 1000)),
|
||||
'view_type': 'resumeLibrary',
|
||||
'privacy_description': '1',
|
||||
}
|
||||
response = Requests.get(url, params=params)
|
||||
info = response.to_Dict().data
|
||||
data = {
|
||||
'resume_id': resumes_id,
|
||||
'姓名': info.name,
|
||||
'年龄': info.age,
|
||||
'生日': info.birthday,
|
||||
'工作经验': info.work_exp_value,
|
||||
'最高学历': info.edu_value,
|
||||
'婚姻状态': info.marriage_value,
|
||||
'电话': info.phone,
|
||||
'意向岗位': ','.join([item.name for item in info.infoCateforyArrObj]),
|
||||
'期望薪资': info.salaryDesc,
|
||||
'工作性质': info.work_type_value,
|
||||
'求职状态': info.job_instant_value,
|
||||
'工作地点': info.job_region_value,
|
||||
}
|
||||
for i in range(4): # 0, 1, 2, 3
|
||||
if i < len(info.works):
|
||||
work = info.works[i]
|
||||
data[f'工作经历{i + 1}'] = f"{work.company}:{work.content}"
|
||||
else:
|
||||
data[f'工作经历{i + 1}'] = ''
|
||||
|
||||
return data
|
||||
|
||||
|
||||
def integration(keyword):
|
||||
global _keyword
|
||||
_keyword = keyword
|
||||
page = get_page_for_keyword(_keyword)
|
||||
for item in page.data.items:
|
||||
resumes_info = get_resumes_info(item.id)
|
||||
for key, value in resumes_info.items():
|
||||
pd_data[key].append(value)
|
||||
|
||||
df = pd.DataFrame(pd_data)
|
||||
df.to_excel(f'{datetime.datetime.now().strftime("%Y%m%d")}_滦南_{_keyword}.xlsx', index=False)
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
integration("维修工")
|
||||
# get_resumes_info('36859')
|
8
web/96rz_com/www_96rz_com_cookies.json
Normal file
@ -0,0 +1,8 @@
|
||||
{
|
||||
"HMACCOUNT": "52014CC932A93E9B",
|
||||
"Hm_lpvt_0fcb5413ca26ff9fe1a29c6f98b5e6d0": "1751267677",
|
||||
"Hm_lvt_0fcb5413ca26ff9fe1a29c6f98b5e6d0": "1751267674",
|
||||
"token": "eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9.eyJpZCI6MTI4MTMwLCJ1c2VybmFtZSI6IueUhOePjV8xNTgyMzMiLCJwd2QiOiJiNGUzZDQyODUwMTA3YzRkMzBlNmRkYjU4N2IzZTM3ZCIsImlhdCI6MTc1MTI2NzY5MiwiZXhwIjoxNzgyODAzNjkyfQ.Q_u73JFMxjZESQC9yUAwb7V7La5bM9OT37iGl3UO_cY",
|
||||
"token.sig": "LdkOB3mNW_a59rzyTefnRuTybegvcDHEUd4hRXc-lO8",
|
||||
"x-trace-id": "9587d2f9a1e84ae783cb2a5f055a7a51"
|
||||
}
|
1
web/DailyMotion
Submodule
@ -0,0 +1 @@
|
||||
Subproject commit ecfd2d227af68e5c223782e8d3538f8c1f80dbb1
|
208
web/Requests_Except.py
Normal file
@ -0,0 +1,208 @@
|
||||
import json
|
||||
import re
|
||||
import requests
|
||||
import logging
|
||||
import time
|
||||
from lxml import etree
|
||||
from types import SimpleNamespace
|
||||
from http.cookies import SimpleCookie
|
||||
|
||||
logging.basicConfig(level=logging.DEBUG, format='%(asctime)s - %(levelname)s - %(message)s')
|
||||
|
||||
|
||||
class ExtendedResponse(requests.Response):
|
||||
def xpath(self):
|
||||
try:
|
||||
tree = etree.HTML(self.text)
|
||||
return tree
|
||||
except Exception as e:
|
||||
raise ValueError("XPath解析错误: " + str(e))
|
||||
|
||||
def to_Dict(self):
|
||||
try:
|
||||
data = self.json()
|
||||
return self.dict_to_obj(data)
|
||||
except Exception as e:
|
||||
raise ValueError("JSON转换错误: " + str(e))
|
||||
|
||||
def to_Re_findall(self, regex):
|
||||
try:
|
||||
data = self.text
|
||||
return re.findall(regex, data)
|
||||
except Exception as e:
|
||||
raise ValueError("Re搜索错误: " + str(e))
|
||||
|
||||
def cookies_dict(self):
|
||||
try:
|
||||
# 获取原有的 cookies 字典
|
||||
cookie_dict = self.cookies.get_dict()
|
||||
# 如果响应头中有 Set-Cookie,则解析并补充 cookies
|
||||
if 'Set-Cookie' in self.headers:
|
||||
from http.cookies import SimpleCookie
|
||||
sc = SimpleCookie()
|
||||
sc.load(self.headers['Set-Cookie'])
|
||||
for key, morsel in sc.items():
|
||||
cookie_dict[key] = morsel.value
|
||||
return cookie_dict
|
||||
except Exception as e:
|
||||
raise ValueError("Cookies转换错误: " + str(e))
|
||||
|
||||
def save_cookies(self, filepath, format='json'):
|
||||
"""
|
||||
将当前响应中的cookie信息保存到指定文件中。
|
||||
|
||||
参数:
|
||||
filepath (str): 保存文件的路径
|
||||
format (str): 保存格式,支持 'json'、'pickle' 和 'txt' 三种格式,默认为 'json'
|
||||
"""
|
||||
try:
|
||||
cookie_dict = self.cookies_dict()
|
||||
if format.lower() == 'json':
|
||||
with open(filepath, 'w', encoding='utf-8') as f:
|
||||
json.dump(cookie_dict, f, ensure_ascii=False, indent=4)
|
||||
elif format.lower() == 'pickle':
|
||||
import pickle
|
||||
with open(filepath, 'wb') as f:
|
||||
pickle.dump(cookie_dict, f)
|
||||
elif format.lower() == 'txt':
|
||||
with open(filepath, 'w', encoding='utf-8') as f:
|
||||
for key, value in cookie_dict.items():
|
||||
f.write(f"{key}: {value}\n")
|
||||
else:
|
||||
raise ValueError("不支持的格式,请选择 'json'、'pickle' 或 'txt'")
|
||||
except Exception as e:
|
||||
raise ValueError("保存cookies出错: " + str(e))
|
||||
|
||||
@staticmethod
|
||||
def dict_to_obj(d):
|
||||
if isinstance(d, dict):
|
||||
return SimpleNamespace(**{k: ExtendedResponse.dict_to_obj(v) for k, v in d.items()})
|
||||
elif isinstance(d, list):
|
||||
return [ExtendedResponse.dict_to_obj(item) for item in d]
|
||||
else:
|
||||
return d
|
||||
|
||||
|
||||
class MyRequests:
|
||||
def __init__(self, base_url, protocol='http', retries=3, proxy_options=True, default_timeout=10,
|
||||
default_cookies=None):
|
||||
"""
|
||||
初始化 MyRequests 对象,自动加载本地 cookies 文件(根据 base_url 生成文件名,如 "www_zhrczp_com_cookies.json")中的 cookies,
|
||||
如果文件存在,则将其加载到 session 中;否则使用 default_cookies(如果提供)更新 session。
|
||||
|
||||
参数:
|
||||
base_url (str): 基础 URL
|
||||
protocol (str): 协议(默认为 'http')
|
||||
retries (int): 请求重试次数
|
||||
proxy_options (bool): 是否使用代理
|
||||
default_timeout (int): 默认超时时间
|
||||
default_cookies (dict): 默认的 cookies 字典
|
||||
"""
|
||||
self.base_url = base_url.rstrip('/')
|
||||
self.protocol = protocol
|
||||
self.retries = retries
|
||||
self.default_timeout = default_timeout
|
||||
self.session = requests.Session()
|
||||
|
||||
if proxy_options:
|
||||
self.session.proxies = {"http": "http://127.0.0.1:7890", "https": "http://127.0.0.1:7890"}
|
||||
|
||||
# 优先使用传入的 default_cookies 更新 session
|
||||
if default_cookies:
|
||||
self.session.cookies.update(default_cookies)
|
||||
|
||||
# 根据 base_url 生成 cookies 文件名,将 '.' 替换为 '_'
|
||||
self.cookie_file = f"{self.base_url.replace('.', '_')}_cookies.json"
|
||||
# 尝试加载本地已保存的 cookies 文件
|
||||
try:
|
||||
with open(self.cookie_file, 'r', encoding='utf-8') as f:
|
||||
loaded_cookies = json.load(f)
|
||||
self.session.cookies.update(loaded_cookies)
|
||||
logging.info("成功加载本地 cookies")
|
||||
except FileNotFoundError:
|
||||
logging.info("本地 cookies 文件不存在,将在请求后自动保存")
|
||||
except Exception as e:
|
||||
logging.error("加载本地 cookies 失败:" + str(e))
|
||||
|
||||
def _save_cookies(self):
|
||||
"""
|
||||
将当前 session 中的 cookies 保存到本地文件(基于 base_url 的文件名),以 JSON 格式存储。
|
||||
"""
|
||||
try:
|
||||
with open(self.cookie_file, 'w', encoding='utf-8') as f:
|
||||
json.dump(self.session.cookies.get_dict(), f, ensure_ascii=False, indent=4)
|
||||
logging.info("cookies 已保存到本地文件:" + self.cookie_file)
|
||||
except Exception as e:
|
||||
logging.error("保存 cookies 文件失败:" + str(e))
|
||||
|
||||
def _build_url(self, url):
|
||||
if url.startswith("http://") or url.startswith("https://"):
|
||||
return url
|
||||
return f"{self.protocol}://{self.base_url}/{url.lstrip('/')}"
|
||||
|
||||
def set_default_headers(self, headers):
|
||||
self.session.headers.update(headers)
|
||||
|
||||
def set_default_cookies(self, cookies):
|
||||
self.session.cookies.update(cookies)
|
||||
self._save_cookies()
|
||||
|
||||
def get(self, url, params=None, headers=None, cookies=None, **kwargs):
|
||||
full_url = self._build_url(url)
|
||||
return self._request("GET", full_url, params=params, headers=headers, cookies=cookies, **kwargs)
|
||||
|
||||
def post(self, url, data=None, json=None, headers=None, cookies=None, **kwargs):
|
||||
full_url = self._build_url(url)
|
||||
return self._request("POST", full_url, data=data, json=json, headers=headers, cookies=cookies, **kwargs)
|
||||
|
||||
def update(self, url, data=None, json=None, headers=None, cookies=None, **kwargs):
|
||||
full_url = self._build_url(url)
|
||||
return self._request("PUT", full_url, data=data, json=json, headers=headers, cookies=cookies, **kwargs)
|
||||
|
||||
def delete(self, url, params=None, headers=None, cookies=None, **kwargs):
|
||||
full_url = self._build_url(url)
|
||||
return self._request("DELETE", full_url, params=params, headers=headers, cookies=cookies, **kwargs)
|
||||
|
||||
def _request(self, method, url, retries=None, autosave=False, **kwargs):
|
||||
if retries is None:
|
||||
retries = self.retries
|
||||
if 'timeout' not in kwargs:
|
||||
kwargs['timeout'] = self.default_timeout
|
||||
|
||||
try:
|
||||
response = self.session.request(method, url, **kwargs)
|
||||
response.raise_for_status()
|
||||
self.session.cookies.update(response.cookies)
|
||||
|
||||
if 'Set-Cookie' in response.headers:
|
||||
from http.cookies import SimpleCookie
|
||||
sc = SimpleCookie()
|
||||
sc.load(response.headers['Set-Cookie'])
|
||||
for key, morsel in sc.items():
|
||||
if morsel.value.lower() != 'deleted':
|
||||
self.session.cookies.set(key, morsel.value)
|
||||
|
||||
if autosave:
|
||||
self._save_cookies()
|
||||
|
||||
response.__class__ = ExtendedResponse
|
||||
return response
|
||||
|
||||
except Exception as e:
|
||||
if retries > 0:
|
||||
logging.warning(f"请求 {method} {url} 失败,剩余重试次数 {retries},错误: {e}")
|
||||
time.sleep(2 ** (self.retries - retries))
|
||||
return self._request(method, url, retries=retries - 1, autosave=autosave, **kwargs)
|
||||
else:
|
||||
logging.error(f"请求 {method} {url} 重试次数用尽")
|
||||
raise e
|
||||
|
||||
def get_cookies(self):
|
||||
try:
|
||||
return self.session.cookies.get_dict()
|
||||
except Exception as e:
|
||||
raise ValueError("获取 cookies 失败:" + str(e))
|
||||
|
||||
|
||||
class MR(MyRequests):
|
||||
pass
|
1
web/TS_resume_spider
Submodule
@ -0,0 +1 @@
|
||||
Subproject commit f41404e1fdc6e2018245ecdea85148a3c9e99069
|
1
web/__init__.py
Normal file
@ -0,0 +1 @@
|
||||
from web import Requests_Except
|
137
web/cfdzp/cfd_zp.py
Normal file
@ -0,0 +1,137 @@
|
||||
import requests
|
||||
import pandas as pd
|
||||
from datetime import datetime
|
||||
import time
|
||||
import random
|
||||
import urllib3
|
||||
from concurrent.futures import ThreadPoolExecutor, as_completed
|
||||
# 禁用 SSL 警告
|
||||
urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning)
|
||||
|
||||
class ResumeAPI:
|
||||
def __init__(self):
|
||||
self.base_url = 'https://www.qj050.com/api/v1'
|
||||
self.headers = {
|
||||
'Accept': '*/*',
|
||||
'Accept-Encoding': 'gzip, deflate, br',
|
||||
'Accept-Language': 'zh-CN,zh;q=0.9',
|
||||
'Authorization': 'Bearer eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9.eyJpZCI6NDgxNTMsInVzZXJuYW1lIjoi55yf6LSkODg4OCIsInB3ZCI6IjFiYmJjNzc5OGRkMTFiNTI2YWQ4ZTVmYTYyNWY5MjVkIiwiaWF0IjoxNzQyODgzNzU3LCJleHAiOjE3NzQ0MTk3NTd9.sLsOLcTnxoB0iWbks7_9IVp9OmDPlo0cKOwL6qHcID8',
|
||||
'Connection': 'keep-alive',
|
||||
'Cookie': 'token=eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9.eyJpZCI6NDgxNTMsInVzZXJuYW1lIjoi55yf6LSkODg4OCIsInB3ZCI6IjFiYmJjNzc5OGRkMTFiNTI2YWQ4ZTVmYTYyNWY5MjVkIiwiaWF0IjoxNzQyODgzNzU3LCJleHAiOjE3NzQ0MTk3NTd9.sLsOLcTnxoB0iWbks7_9IVp9OmDPlo0cKOwL6qHcID8;token.sig=SiletSGnwThzp8gd2-IEaawgh0aMNhG8ZduDjcH5syA;x-trace-id=5cbbd6e2d49347e2893925bbf110eb37',
|
||||
'Host': 'www.qj050.com',
|
||||
'User-Agent': 'PostmanRuntime-ApipostRuntime/1.1.0',
|
||||
'x-platform': '1'
|
||||
}
|
||||
self.max_retries = 3
|
||||
self.retry_delay = 2
|
||||
|
||||
def _make_request(self, url, params=None, method='get'):
|
||||
for attempt in range(self.max_retries):
|
||||
try:
|
||||
# 添加随机延迟,避免频繁请求
|
||||
time.sleep(random.uniform(1, 3))
|
||||
|
||||
# 禁用 SSL 验证
|
||||
response = requests.get(url, headers=self.headers, params=params, verify=False) if method == 'get' else \
|
||||
requests.post(url, headers=self.headers, json=params, verify=False)
|
||||
response.raise_for_status()
|
||||
return response.json()
|
||||
except requests.exceptions.RequestException as e:
|
||||
if attempt == self.max_retries - 1:
|
||||
print(f"请求失败 ({url}): {str(e)}")
|
||||
raise
|
||||
print(f"重试请求 {attempt + 1}/{self.max_retries}")
|
||||
time.sleep(self.retry_delay * (attempt + 1))
|
||||
|
||||
def get_name(self, resume_id, blurred_name):
|
||||
url = f'{self.base_url}/resume/{resume_id}'
|
||||
params = {'_': int(time.time() * 1000)}
|
||||
try:
|
||||
data = self._make_request(url, params) # 后续详细信息可以从这获取
|
||||
return data.get('data', {}).get('name', '')
|
||||
except:
|
||||
return blurred_name
|
||||
|
||||
def get_contact_info(self, resume_id):
|
||||
url = f'{self.base_url}/resume/{resume_id}/contact'
|
||||
params = {'_': int(time.time() * 1000)}
|
||||
try:
|
||||
data = self._make_request(url, params)
|
||||
return {
|
||||
'phone': data.get('data', {}).get('phone', ''),
|
||||
'real_name': data.get('data', {}).get('real_name', '')
|
||||
}
|
||||
except:
|
||||
return {'phone': '', 'real_name': ''}
|
||||
|
||||
def fetch_resumes(self, keyword='护工', page_size=10, page_index=1, save_csv=True):
|
||||
url = f'{self.base_url}/resumes'
|
||||
params = {
|
||||
'_': int(time.time() * 1000),
|
||||
'tab': 'resume',
|
||||
'keyword': keyword,
|
||||
't': int(time.time() * 1000),
|
||||
'info_subarea': '',
|
||||
'info_category': '',
|
||||
'pageSize': page_size,
|
||||
'pageIndex': page_index,
|
||||
'showStatus': 'true'
|
||||
}
|
||||
|
||||
try:
|
||||
data = self._make_request(url, params)
|
||||
|
||||
items = data.get('data', {}).get('items', [])
|
||||
if not items:
|
||||
return []
|
||||
|
||||
def process_resume(item):
|
||||
try:
|
||||
resume_id = item.get('id')
|
||||
blurred_name = item.get('name_value', '')
|
||||
contact_info = self.get_contact_info(resume_id)
|
||||
name_value = self.get_name(resume_id, blurred_name)
|
||||
|
||||
category_names = [c.get('name', '') for c in item.get('infoCateforyArrObj', [])]
|
||||
categories_str = ','.join(category_names)
|
||||
|
||||
return {
|
||||
'name_value': blurred_name,
|
||||
'age': item.get('age', ''),
|
||||
'edu_value': item.get('edu_value', ''),
|
||||
'job_instant_value': item.get('job_instant_value', ''),
|
||||
'job_salary_from': item.get('job_salary_from', ''),
|
||||
'job_salary_to': item.get('job_salary_to', ''),
|
||||
'categories': categories_str,
|
||||
'phone': contact_info['phone'],
|
||||
'real_name': name_value
|
||||
}
|
||||
except Exception as e:
|
||||
print(f"处理简历失败: {str(e)}")
|
||||
return None
|
||||
|
||||
resumes = []
|
||||
with ThreadPoolExecutor(max_workers=10) as executor:
|
||||
future_to_resume = {executor.submit(process_resume, item): item for item in items}
|
||||
for idx, future in enumerate(as_completed(future_to_resume), 1):
|
||||
result = future.result()
|
||||
print(f"已处理: {idx}/{len(items)}")
|
||||
if result:
|
||||
resumes.append(result)
|
||||
|
||||
# 保存 CSV(可选)
|
||||
if resumes and save_csv:
|
||||
df = pd.DataFrame(resumes)
|
||||
filename = f'resumes_{datetime.now().strftime("%Y%m%d_%H%M%S")}.csv'
|
||||
df.to_csv(filename, index=False, encoding='utf-8-sig')
|
||||
print(f'数据已保存到 {filename}')
|
||||
|
||||
return resumes
|
||||
|
||||
except Exception as e:
|
||||
print(f"获取简历数据失败: {str(e)}")
|
||||
return []
|
||||
|
||||
if __name__ == '__main__':
|
||||
api = ResumeAPI()
|
||||
api.fetch_resumes()
|
43
web/cfdzp/main.py
Normal file
@ -0,0 +1,43 @@
|
||||
from cfd_zp import ResumeAPI
|
||||
import pandas as pd
|
||||
from datetime import datetime
|
||||
|
||||
def fetch_multiple_pages(keyword, total_pages=10, page_size=10):
|
||||
api = ResumeAPI()
|
||||
all_resumes = []
|
||||
|
||||
# 创建唯一的CSV文件名
|
||||
filename = f'resumes_{keyword}_{datetime.now().strftime("%Y%m%d_%H%M%S")}.csv'
|
||||
print(f"开始采集关键词 '{keyword}' 的数据,将保存到文件: {filename}")
|
||||
|
||||
for page in range(1, total_pages + 1):
|
||||
print(f"\n正在采集第 {page}/{total_pages} 页")
|
||||
resumes = api.fetch_resumes(
|
||||
keyword=keyword,
|
||||
page_size=page_size,
|
||||
page_index=page,
|
||||
save_csv=False # 不在每页都保存CSV
|
||||
)
|
||||
if resumes:
|
||||
all_resumes.extend(resumes)
|
||||
# 将当前所有数据保存到CSV
|
||||
df = pd.DataFrame(all_resumes)
|
||||
df.to_csv(filename, index=False, encoding='utf-8-sig')
|
||||
print(f"已保存 {len(all_resumes)} 条数据到 {filename}")
|
||||
else:
|
||||
print(f"第 {page} 页数据获取失败或为空")
|
||||
|
||||
print(f"\n采集完成,共获取 {len(all_resumes)} 条数据")
|
||||
return all_resumes
|
||||
|
||||
def main():
|
||||
# 设置关键词和采集页数
|
||||
keyword = '护工'
|
||||
total_pages = 10
|
||||
page_size = 10
|
||||
|
||||
# 开始批量采集
|
||||
fetch_multiple_pages(keyword, total_pages, page_size)
|
||||
|
||||
if __name__ == '__main__':
|
||||
main()
|
11
web/cfdzp/resumes_20250327_185144.csv
Normal file
@ -0,0 +1,11 @@
|
||||
name_value,age,edu_value,job_instant_value,job_salary_from,job_salary_to,categories,phone,real_name
|
||||
李政伟,36,本科,"我目前已离职, 可快速到岗",10000.0,15000.0,"招聘经理/主任,测试工程师,经理助理,项目经理,储备干部",17600364716,
|
||||
李先生,39,大专,"我目前已离职, 可快速到岗",6000.0,8000.0,"物业管理,项目经理,招投标",18033572860,
|
||||
刘先生,34,大专,"我目前已离职, 可快速到岗",0.0,0.0,部门主管,16643562515,
|
||||
孙晓磊,36,本科,我目前正在职,考虑换个环境,4000.0,8000.0,工业工厂其他相关职位,18332778778,
|
||||
于萍,23,大专,我目前正在职,考虑换个环境,0.0,0.0,"人事文员,行政文员,前台接待,文员,文职文员其他相关职位",15132578638,
|
||||
王德梅,58,高中,"我目前已离职, 可快速到岗",,,其它相关职位,17736517991,
|
||||
李女士,35,大专,"我目前已离职, 可快速到岗",4000.0,7000.0,"保安,文员,招投标",18203357869,
|
||||
嘉嘉,24,高中,"我目前已离职, 可快速到岗",0.0,0.0,销售助理,18134247015,
|
||||
王女士_324932,46,不限,"我目前已离职, 可快速到岗",,,护士/护理,15830585203,
|
||||
张伟,31,大专,我目前正在职,考虑换个环境,5000.0,8000.0,"电工/锅炉工,设备修理",15075586374,
|
|
101
web/cfdzp/resumes_护工_20250327_180952.csv
Normal file
@ -0,0 +1,101 @@
|
||||
name_value,age,edu_value,job_instant_value,job_salary_from,job_salary_to,categories,phone,real_name
|
||||
李先生,36,本科,"我目前已离职, 可快速到岗",10000.0,15000.0,"招聘经理/主任,测试工程师,经理助理,项目经理,储备干部",17600364716,
|
||||
李先生,39,大专,"我目前已离职, 可快速到岗",6000.0,8000.0,"物业管理,项目经理,招投标",18033572860,
|
||||
刘先生,34,大专,"我目前已离职, 可快速到岗",0.0,0.0,部门主管,16643562515,
|
||||
孙先生,36,本科,我目前正在职,考虑换个环境,4000.0,8000.0,工业工厂其他相关职位,18332778778,
|
||||
于女士,23,大专,我目前正在职,考虑换个环境,0.0,0.0,"人事文员,行政文员,前台接待,文员,文职文员其他相关职位",15132578638,
|
||||
王女士,58,高中,"我目前已离职, 可快速到岗",,,其它相关职位,17736517991,
|
||||
李女士,35,大专,"我目前已离职, 可快速到岗",4000.0,7000.0,"保安,文员,招投标",18203357869,
|
||||
嘉女士,24,高中,"我目前已离职, 可快速到岗",0.0,0.0,销售助理,18134247015,
|
||||
王女士,46,不限,"我目前已离职, 可快速到岗",,,护士/护理,15830585203,
|
||||
张先生,31,大专,我目前正在职,考虑换个环境,5000.0,8000.0,"电工/锅炉工,设备修理",15075586374,
|
||||
杨女士,33,大专,我目前正在职,考虑换个环境,2000.0,4000.0,文员,17796957849,
|
||||
张女士,34,大专,"我目前已离职, 可快速到岗",0.0,0.0,"档案管理,物业管理,人事文员,行政文员",13472959755,
|
||||
卫先生,32,中专/技校,"我目前已离职, 可快速到岗",5000.0,6000.0,司机,15127515220,
|
||||
韩先生,34,本科,我目前正在职,考虑换个环境,7000.0,12000.0,电工/锅炉工,18903152186,
|
||||
陈先生,33,大专,我目前正在职,考虑换个环境,5000.0,6000.0,操作工/普工,15032902921,
|
||||
董先生,36,本科,"我目前已离职, 可快速到岗",7000.0,8000.0,"行政经理,行政助理,行政外联专员",15833464006,
|
||||
高女士,24,本科,我目前正在职,考虑换个环境,4000.0,7000.0,"行政文员,市场销售其他相关职位,应届毕业生,教务/教务管理,文体培训其他相关职位",19930024957,
|
||||
李先生,34,本科,"我目前已离职, 可快速到岗",5000.0,8000.0,"产品工艺/制程工程师,工业工厂其他相关职位",15632548320,
|
||||
刘先生,24,大专,我目前正在职,考虑换个环境,4000.0,5000.0,"电气工程师,电工/锅炉工,电器工程师,电气维修,自动控制",13230526763,
|
||||
成先生,34,本科,"我目前已离职, 可快速到岗",4000.0,6000.0,会计,16631565969,
|
||||
李女士,39,高中以下,"我目前已离职, 可快速到岗",,,"物流主管,推(营、促)销员,市场销售其他相关",15732594271,
|
||||
艾先生,31,大专,我目前正在职,考虑换个环境,6000.0,8000.0,"操作工/普工,组长/拉长,计划员/调度员",15931517465,
|
||||
夏先生,25,大专,我目前正在职,考虑换个环境,6000.0,9000.0,电工/锅炉工,18132674564,
|
||||
王先生,38,大专,"我目前已离职, 可快速到岗",8000.0,12000.0,生产经理,15832531755,
|
||||
郑女士,49,高中以下,"我目前已离职, 可快速到岗",1000.0,2000.0,餐饮休闲其他相关职位,13131592702,
|
||||
王先生,27,本科,"我目前已离职, 可快速到岗",8000.0,10000.0,"工艺工程师,无机化工,有机化工,精细化工",18713472122,
|
||||
刘先生,30,大专,我目前正在职,考虑换个环境,5000.0,6000.0,"文员,硬件工程师,网络硬件其他相关职位",18631558615,
|
||||
姚先生,23,大专,"我目前已离职, 可快速到岗",5000.0,6000.0,"操作工/普工,应届毕业生,普工技工其他相关职位",18633325443,
|
||||
高先生,41,本科,"我目前已离职, 可快速到岗",9000.0,10000.0,"机电一体化,工程经理/主管,工程设备工程师,设备经理/主管,工业工厂其他相关职位",13933476359,
|
||||
张先生,29,不限,我目前正在职,考虑换个环境,0.0,0.0,电工/锅炉工,18714117412,
|
||||
高先生,25,大专,"我目前已离职, 可快速到岗",0.0,0.0,档案管理,15373158665,
|
||||
孙女士,29,本科,"我目前已离职, 可快速到岗",0.0,0.0,"会计助理,文员,出纳",15633977516,
|
||||
高女士,32,本科,我目前正在职,考虑换个环境,5000.0,8000.0,"人事经理,行政经理,行政人事其他相关职位",18532503671,
|
||||
女女士,47,高中以下,"我目前已离职, 可快速到岗",4000.0,9000.0,工业工厂其他相关职位,15001005401,
|
||||
毕先生,28,大专,"我目前已离职, 可快速到岗",0.0,0.0,"人事助理,物业管理,仓库管理员",17736578597,
|
||||
李先生,22,本科,"我目前已离职, 可快速到岗",8000.0,10000.0,电气工程师,18134151697,
|
||||
王先生,28,大专,"我目前已离职, 可快速到岗",8000.0,12000.0,"销售经理,销售主管",15100595409,
|
||||
郑先生,16,大专,"我目前已离职, 可快速到岗",4000.0,5000.0,"电工/锅炉工,文员,管道(水、电),机电一体化,安全员",13273553623,
|
||||
邢先生,28,中专/技校,"我目前已离职, 可快速到岗",5000.0,8000.0,操作工/普工,17332835118,
|
||||
赵女士,26,大专,"我目前已离职, 可快速到岗",3000.0,5000.0,"人事文员,行政文员,文员",13303349597,
|
||||
李先生,33,本科,"我目前已离职, 可快速到岗",10000.0,15000.0,能源环保其他相关职位,18232580171,
|
||||
宋女士,30,本科,"我目前已离职, 可快速到岗",3000.0,5000.0,"人事助理,行政助理,人事文员,行政文员,行政人事其他相关职位",17736543160,
|
||||
刘先生,36,大专,我目前正在职,考虑换个环境,5000.0,8000.0,"电气工程师,电工/锅炉工",19930055542,
|
||||
李女士,24,大专,"我目前已离职, 可快速到岗",4000.0,5000.0,"营业员/店员,置业顾问,物业管理,服务员/收银员/迎宾,仓库管理员",13031553751,
|
||||
乐先生,23,大专,"我目前已离职, 可快速到岗",0.0,0.0,"电气工程师,电工/锅炉工,工业工厂其他相关职位",15511951165,
|
||||
杨先生,36,大专,"我目前已离职, 可快速到岗",8000.0,12000.0,"电工/锅炉工,操作工/普工",15033463533,
|
||||
王先生,37,大专,"我目前已离职, 可快速到岗",0.0,0.0,"行政经理,厂长/副厂长,生产经理",13832558785,
|
||||
孙先生,32,中专/技校,"我目前已离职, 可快速到岗",5000.0,7000.0,司机后勤其他相关职位,15102556611,
|
||||
金先生,30,大专,我目前正在职,考虑换个环境,5000.0,8000.0,"生管主管/督导,计划员/调度员,安全主任,安全员,工业工厂其他相关职位",18532521663,
|
||||
孙先生,45,大专,我目前正在职,考虑换个环境,5000.0,8000.0,"电工/锅炉工,操作工/普工,普工技工其他相关职位",13103258527,
|
||||
江先生,33,本科,"我目前已离职, 可快速到岗",0.0,0.0,"水利、水电,核电、火电,电厂、电力,能源环保其他相关职位",13722578791,
|
||||
赵女士,32,大专,我目前正在职,考虑换个环境,3000.0,5000.0,"行政助理,行政文员,客户服务",15033383977,
|
||||
任先生,53,中专/技校,我目前正在职,考虑换个环境,0.0,0.0,普工技工其他相关职位,18032511802,
|
||||
先先生,62,不限,"我目前已离职, 可快速到岗",0.0,0.0,"保安,人事文员,行政文员,安全员",13582958606,
|
||||
郭女士,25,大专,"我目前已离职, 可快速到岗",,,其它相关职位,13231577871,
|
||||
邢女士,23,大专,"我目前已离职, 可快速到岗",0.0,0.0,人事文员,17331579095,
|
||||
曹先生,33,本科,"我目前已离职, 可快速到岗",0.0,0.0,"档案管理,电脑操作员/打字员,文员,文职文员其他相关职位,测试工程师",13121174103,
|
||||
聂先生,31,本科,"我目前已离职, 可快速到岗",5000.0,8000.0,"操作工/普工,司机",19930010655,
|
||||
王女士,35,本科,"我目前已离职, 可快速到岗",3000.0,5000.0,"销售主管,行政助理,招聘专员,行政人事其他相关职位,市场销售其他相关",13426090458,
|
||||
赵先生,38,大专,"我目前已离职, 可快速到岗",,,"电气维修,自动控制",15833534957,
|
||||
聂先生,23,大专,我是应届毕业生,0.0,0.0,应届毕业生,15503327285,
|
||||
侯先生,26,本科,"我目前已离职, 可快速到岗",0.0,0.0,应届毕业生,13031970785,
|
||||
冯女士,38,大专,"我目前已离职, 可快速到岗",3000.0,5000.0,"会计助理,出纳,会计财务其他相关职位,文体培训其他相关职位",15931510441,
|
||||
刘先生,28,不限,"我目前已离职, 可快速到岗",6000.0,8000.0,售前/售后支持,18713822511,
|
||||
刘女士,29,大专,我目前正在职,考虑换个环境,0.0,0.0,"人事经理,档案管理,会计",18231596068,
|
||||
孙女士,30,大专,"我目前已离职, 可快速到岗",5000.0,8000.0,"行政文员,招投标,统计员",15930531215,
|
||||
李先生,33,本科,"我目前已离职, 可快速到岗",6000.0,11000.0,销售经理,13091088301,
|
||||
王女士,28,本科,"我目前已离职, 可快速到岗",4000.0,6000.0,"产品经理,产品/品牌企划,售前/售后支持,家具设计,采购员",18802419065,
|
||||
白女士,26,大专,"我目前已离职, 可快速到岗",,,其它相关职位,19931423082,
|
||||
刘先生,19,高中以下,我目前正在职,考虑换个环境,3000.0,5000.0,"大堂经理/副理,服务员/收银员/迎宾",15230538512,
|
||||
王女士,24,大专,"我目前已离职, 可快速到岗",3000.0,5000.0,"人事文员,热线咨询,话务员,文员,投诉处理",15383150933,
|
||||
付先生,56,中专/技校,"我目前已离职, 可快速到岗",5000.0,6000.0,"物业管理,管理运营其他相关职位",15033982787,
|
||||
李女士,30,大专,"我目前已离职, 可快速到岗",3000.0,5000.0,"文职文员其他相关职位,行政人事其他相关职位,文体培训其他相关职位",13262759509,
|
||||
冯先生,45,高中以下,"我目前已离职, 可快速到岗",5000.0,7000.0,"水工/木工/油漆工,司机,护士/护理,叉车工,针灸推拿",18931483153,
|
||||
蔡女士,23,大专,"我目前已离职, 可快速到岗",3000.0,4000.0,"仓库管理员,人事文员,行政文员,文员",19833993360,
|
||||
赵女士,31,本科,"我目前已离职, 可快速到岗",0.0,0.0,"档案管理,人事文员,文员,仓库管理员,文体培训其他相关职位",13171520203,
|
||||
张先生,27,大专,"我目前已离职, 可快速到岗",5000.0,8000.0,普工技工其他相关职位,,
|
||||
张先生,31,硕士,"我目前已离职, 可快速到岗",8000.0,9000.0,大学教师,,
|
||||
孙先生,26,本科,"我目前已离职, 可快速到岗",5000.0,6000.0,"电工/锅炉工,电厂、电力,电脑操作员/打字员",,
|
||||
王女士,33,大专,"我目前已离职, 可快速到岗",0.0,0.0,"会计,电工/锅炉工",,
|
||||
周先生,23,大专,"我目前已离职, 可快速到岗",0.0,0.0,"人事助理,人事文员",,
|
||||
邢女士,32,大专,"我目前已离职, 可快速到岗",0.0,0.0,"人事经理,人事助理,文化艺术,行政助理,文员",,
|
||||
张女士,31,大专,"我目前已离职, 可快速到岗",5000.0,6000.0,"人事文员,行政文员",,
|
||||
马先生,49,高中,"我目前已离职, 可快速到岗",6000.0,7000.0,"电工/锅炉工,操作工/普工,钳、钣、铆、冲、焊、铸,普工技工其他相关职位",,
|
||||
陶先生,37,大专,"我目前已离职, 可快速到岗",4000.0,5000.0,操作工/普工,,
|
||||
刘先生,44,本科,我目前正在职,考虑换个环境,0.0,0.0,"电气工程师,自动控制",,
|
||||
毕先生,37,大专,我目前正在职,考虑换个环境,6000.0,8000.0,电工/锅炉工,,
|
||||
董女士,27,本科,"我目前已离职, 可快速到岗",5000.0,8000.0,"商务人员,国际业务,外贸员",,
|
||||
静女士,27,本科,"我目前已离职, 可快速到岗",0.0,0.0,"行政文员,行政人事其他相关职位",,
|
||||
李女士,35,本科,我目前正在职,考虑换个环境,0.0,0.0,"档案管理,大学教师,教务/教务管理,文体培训其他相关职位",,
|
||||
齐先生,38,大专,我目前正在职,考虑换个环境,12000.0,20000.0,"机电一体化,机械仪表其他相关职位,工业工厂其他相关职位",,
|
||||
李先生,37,高中,"我目前已离职, 可快速到岗",5000.0,8000.0,"制冷、暖通,能源环保其他相关职位,部门主管,项目经理,管理运营其他相关职位",,
|
||||
郑先生,28,大专,我目前正在职,考虑换个环境,0.0,0.0,"电气工程师,测试工程师",,
|
||||
林先生,40,本科,我目前正在职,考虑换个环境,5000.0,8000.0,"工程经理/主管,工程设备工程师,工业工厂其他相关职位",,
|
||||
周女士,38,大专,"我目前已离职, 可快速到岗",,,"文职文员其他相关职位,行政人事其他相关职位,市场销售其他相关职位,客户服务其他相关职位",,
|
||||
李先生,35,中专/技校,我目前正在职,考虑换个环境,5000.0,8000.0,"操作工/普工,石油/天燃气/储运,仓库管理员,工业工厂其他相关职位,普工技工其他相关职位",,
|
||||
安女士,26,大专,"我目前已离职, 可快速到岗",3000.0,5000.0,"会计,会计助理,出纳,统计,其它相关职位",,
|
||||
董先生,30,中专/技校,"我目前已离职, 可快速到岗",,,"操作工/普工,生管员,司机,叉车工,仓库管理员",,
|
||||
丁女士,33,大专,"我目前已离职, 可快速到岗",0.0,0.0,"人事文员,行政人事其他相关职位",,
|
||||
李女士,28,本科,"我目前已离职, 可快速到岗",3000.0,5000.0,行政文员,,
|
|
101
web/cfdzp/resumes_护工_20250327_182930.csv
Normal file
@ -0,0 +1,101 @@
|
||||
name_value,age,edu_value,job_instant_value,job_salary_from,job_salary_to,categories,phone,real_name
|
||||
李政伟,36,本科,"我目前已离职, 可快速到岗",10000.0,15000.0,"招聘经理/主任,测试工程师,经理助理,项目经理,储备干部",17600364716,
|
||||
李先生,39,大专,"我目前已离职, 可快速到岗",6000.0,8000.0,"物业管理,项目经理,招投标",18033572860,
|
||||
刘先生,34,大专,"我目前已离职, 可快速到岗",0.0,0.0,部门主管,16643562515,
|
||||
孙晓磊,36,本科,我目前正在职,考虑换个环境,4000.0,8000.0,工业工厂其他相关职位,18332778778,
|
||||
于萍,23,大专,我目前正在职,考虑换个环境,0.0,0.0,"人事文员,行政文员,前台接待,文员,文职文员其他相关职位",15132578638,
|
||||
王德梅,58,高中,"我目前已离职, 可快速到岗",,,其它相关职位,17736517991,
|
||||
李女士,35,大专,"我目前已离职, 可快速到岗",4000.0,7000.0,"保安,文员,招投标",18203357869,
|
||||
嘉嘉,24,高中,"我目前已离职, 可快速到岗",0.0,0.0,销售助理,18134247015,
|
||||
王女士_324932,46,不限,"我目前已离职, 可快速到岗",,,护士/护理,15830585203,
|
||||
张伟,31,大专,我目前正在职,考虑换个环境,5000.0,8000.0,"电工/锅炉工,设备修理",15075586374,
|
||||
杨小雪,33,大专,我目前正在职,考虑换个环境,2000.0,4000.0,文员,17796957849,
|
||||
张女士,34,大专,"我目前已离职, 可快速到岗",0.0,0.0,"档案管理,物业管理,人事文员,行政文员",13472959755,
|
||||
卫庆,32,中专/技校,"我目前已离职, 可快速到岗",5000.0,6000.0,司机,15127515220,
|
||||
韩子建,34,本科,我目前正在职,考虑换个环境,7000.0,12000.0,电工/锅炉工,18903152186,
|
||||
陈光,33,大专,我目前正在职,考虑换个环境,5000.0,6000.0,操作工/普工,15032902921,
|
||||
董鑫,36,本科,"我目前已离职, 可快速到岗",7000.0,8000.0,"行政经理,行政助理,行政外联专员",15833464006,
|
||||
高倩,24,本科,我目前正在职,考虑换个环境,4000.0,7000.0,"行政文员,市场销售其他相关职位,应届毕业生,教务/教务管理,文体培训其他相关职位",19930024957,
|
||||
李先生,34,本科,"我目前已离职, 可快速到岗",5000.0,8000.0,"产品工艺/制程工程师,工业工厂其他相关职位",15632548320,
|
||||
刘志杰,24,大专,我目前正在职,考虑换个环境,4000.0,5000.0,"电气工程师,电工/锅炉工,电器工程师,电气维修,自动控制",13230526763,
|
||||
成浩,34,本科,"我目前已离职, 可快速到岗",4000.0,6000.0,会计,16631565969,
|
||||
李小芹,39,高中以下,"我目前已离职, 可快速到岗",,,"物流主管,推(营、促)销员,市场销售其他相关",15732594271,
|
||||
艾顺坤,31,大专,我目前正在职,考虑换个环境,6000.0,8000.0,"操作工/普工,组长/拉长,计划员/调度员",15931517465,
|
||||
夏铭飞,25,大专,我目前正在职,考虑换个环境,6000.0,9000.0,电工/锅炉工,18132674564,
|
||||
王朝,38,大专,"我目前已离职, 可快速到岗",8000.0,12000.0,生产经理,15832531755,
|
||||
郑忆新,49,高中以下,"我目前已离职, 可快速到岗",1000.0,2000.0,餐饮休闲其他相关职位,13131592702,
|
||||
王荣辉,27,本科,"我目前已离职, 可快速到岗",8000.0,10000.0,"工艺工程师,无机化工,有机化工,精细化工",18713472122,
|
||||
刘同禹,30,大专,我目前正在职,考虑换个环境,5000.0,6000.0,"文员,硬件工程师,网络硬件其他相关职位",18631558615,
|
||||
姚希烨,23,大专,"我目前已离职, 可快速到岗",5000.0,6000.0,"操作工/普工,应届毕业生,普工技工其他相关职位",18633325443,
|
||||
高先生,41,本科,"我目前已离职, 可快速到岗",9000.0,10000.0,"机电一体化,工程经理/主管,工程设备工程师,设备经理/主管,工业工厂其他相关职位",13933476359,
|
||||
张芳铭,29,不限,我目前正在职,考虑换个环境,0.0,0.0,电工/锅炉工,18714117412,
|
||||
高宇,25,大专,"我目前已离职, 可快速到岗",0.0,0.0,档案管理,15373158665,
|
||||
孙飞宇,29,本科,"我目前已离职, 可快速到岗",0.0,0.0,"会计助理,文员,出纳",15633977516,
|
||||
高女士,32,本科,我目前正在职,考虑换个环境,5000.0,8000.0,"人事经理,行政经理,行政人事其他相关职位",18532503671,
|
||||
女士,47,高中以下,"我目前已离职, 可快速到岗",4000.0,9000.0,工业工厂其他相关职位,15001005401,
|
||||
毕文轩,28,大专,"我目前已离职, 可快速到岗",0.0,0.0,"人事助理,物业管理,仓库管理员",17736578597,
|
||||
李明阳,22,本科,"我目前已离职, 可快速到岗",8000.0,10000.0,电气工程师,18134151697,
|
||||
王猛,28,大专,"我目前已离职, 可快速到岗",8000.0,12000.0,"销售经理,销售主管",15100595409,
|
||||
郑咏徽,16,大专,"我目前已离职, 可快速到岗",4000.0,5000.0,"电工/锅炉工,文员,管道(水、电),机电一体化,安全员",13273553623,
|
||||
邢宝代,28,中专/技校,"我目前已离职, 可快速到岗",5000.0,8000.0,操作工/普工,17332835118,
|
||||
赵曼,26,大专,"我目前已离职, 可快速到岗",3000.0,5000.0,"人事文员,行政文员,文员",13303349597,
|
||||
李帆,33,本科,"我目前已离职, 可快速到岗",10000.0,15000.0,能源环保其他相关职位,18232580171,
|
||||
宋女士,30,本科,"我目前已离职, 可快速到岗",3000.0,5000.0,"人事助理,行政助理,人事文员,行政文员,行政人事其他相关职位",17736543160,
|
||||
刘景欣,36,大专,我目前正在职,考虑换个环境,5000.0,8000.0,"电气工程师,电工/锅炉工",19930055542,
|
||||
李xx,24,大专,"我目前已离职, 可快速到岗",4000.0,5000.0,"营业员/店员,置业顾问,物业管理,服务员/收银员/迎宾,仓库管理员",13031553751,
|
||||
乐子强,23,大专,"我目前已离职, 可快速到岗",0.0,0.0,"电气工程师,电工/锅炉工,工业工厂其他相关职位",15511951165,
|
||||
杨建乐,36,大专,"我目前已离职, 可快速到岗",8000.0,12000.0,"电工/锅炉工,操作工/普工",15033463533,
|
||||
王朝,37,大专,"我目前已离职, 可快速到岗",0.0,0.0,"行政经理,厂长/副厂长,生产经理",13832558785,
|
||||
孙季阳,32,中专/技校,"我目前已离职, 可快速到岗",5000.0,7000.0,司机后勤其他相关职位,15102556611,
|
||||
金先生,30,大专,我目前正在职,考虑换个环境,5000.0,8000.0,"生管主管/督导,计划员/调度员,安全主任,安全员,工业工厂其他相关职位",18532521663,
|
||||
孙泽泉,45,大专,我目前正在职,考虑换个环境,5000.0,8000.0,"电工/锅炉工,操作工/普工,普工技工其他相关职位",13103258527,
|
||||
江临,33,本科,"我目前已离职, 可快速到岗",0.0,0.0,"水利、水电,核电、火电,电厂、电力,能源环保其他相关职位",13722578791,
|
||||
赵影,32,大专,我目前正在职,考虑换个环境,3000.0,5000.0,"行政助理,行政文员,客户服务",15033383977,
|
||||
任长青,53,中专/技校,我目前正在职,考虑换个环境,0.0,0.0,普工技工其他相关职位,18032511802,
|
||||
先生,62,不限,"我目前已离职, 可快速到岗",0.0,0.0,"保安,人事文员,行政文员,安全员",13582958606,
|
||||
郭一帆,25,大专,"我目前已离职, 可快速到岗",,,其它相关职位,13231577871,
|
||||
邢雅婷,23,大专,"我目前已离职, 可快速到岗",0.0,0.0,人事文员,17331579095,
|
||||
曹文琪,33,本科,"我目前已离职, 可快速到岗",0.0,0.0,"档案管理,电脑操作员/打字员,文员,文职文员其他相关职位,测试工程师",13121174103,
|
||||
聂灿,31,本科,"我目前已离职, 可快速到岗",5000.0,8000.0,"操作工/普工,司机",19930010655,
|
||||
王建霞,35,本科,"我目前已离职, 可快速到岗",3000.0,5000.0,"销售主管,行政助理,招聘专员,行政人事其他相关职位,市场销售其他相关",13426090458,
|
||||
赵绍鑫,38,大专,"我目前已离职, 可快速到岗",,,"电气维修,自动控制",15833534957,
|
||||
聂新旺,23,大专,我是应届毕业生,0.0,0.0,应届毕业生,15503327285,
|
||||
侯德宇,26,本科,"我目前已离职, 可快速到岗",0.0,0.0,应届毕业生,13031970785,
|
||||
冯野,38,大专,"我目前已离职, 可快速到岗",3000.0,5000.0,"会计助理,出纳,会计财务其他相关职位,文体培训其他相关职位",15931510441,
|
||||
刘子晔,28,不限,"我目前已离职, 可快速到岗",6000.0,8000.0,售前/售后支持,18713822511,
|
||||
刘雅静,29,大专,我目前正在职,考虑换个环境,0.0,0.0,"人事经理,档案管理,会计",18231596068,
|
||||
孙秋悦,30,大专,"我目前已离职, 可快速到岗",5000.0,8000.0,"行政文员,招投标,统计员",15930531215,
|
||||
李先生,33,本科,"我目前已离职, 可快速到岗",6000.0,11000.0,销售经理,13091088301,
|
||||
王心瑜,28,本科,"我目前已离职, 可快速到岗",4000.0,6000.0,"产品经理,产品/品牌企划,售前/售后支持,家具设计,采购员",18802419065,
|
||||
白紫祎,26,大专,"我目前已离职, 可快速到岗",,,其它相关职位,19931423082,
|
||||
刘建铖,19,高中以下,我目前正在职,考虑换个环境,3000.0,5000.0,"大堂经理/副理,服务员/收银员/迎宾",15230538512,
|
||||
王丹雨,24,大专,"我目前已离职, 可快速到岗",3000.0,5000.0,"人事文员,热线咨询,话务员,文员,投诉处理",15383150933,
|
||||
付建军,56,中专/技校,"我目前已离职, 可快速到岗",5000.0,6000.0,"物业管理,管理运营其他相关职位",15033982787,
|
||||
李鹏雪,30,大专,"我目前已离职, 可快速到岗",3000.0,5000.0,"文职文员其他相关职位,行政人事其他相关职位,文体培训其他相关职位",13262759509,
|
||||
冯工,45,高中以下,"我目前已离职, 可快速到岗",5000.0,7000.0,"水工/木工/油漆工,司机,护士/护理,叉车工,针灸推拿",18931483153,
|
||||
蔡颖,23,大专,"我目前已离职, 可快速到岗",3000.0,4000.0,"仓库管理员,人事文员,行政文员,文员",19833993360,
|
||||
赵乃萱,31,本科,"我目前已离职, 可快速到岗",0.0,0.0,"档案管理,人事文员,文员,仓库管理员,文体培训其他相关职位",13171520203,
|
||||
张先生,27,大专,"我目前已离职, 可快速到岗",5000.0,8000.0,普工技工其他相关职位,,
|
||||
张先生,31,硕士,"我目前已离职, 可快速到岗",8000.0,9000.0,大学教师,,
|
||||
孙先生,26,本科,"我目前已离职, 可快速到岗",5000.0,6000.0,"电工/锅炉工,电厂、电力,电脑操作员/打字员",,
|
||||
王女士,33,大专,"我目前已离职, 可快速到岗",0.0,0.0,"会计,电工/锅炉工",,
|
||||
周先生,23,大专,"我目前已离职, 可快速到岗",0.0,0.0,"人事助理,人事文员",,
|
||||
邢女士,32,大专,"我目前已离职, 可快速到岗",0.0,0.0,"人事经理,人事助理,文化艺术,行政助理,文员",,
|
||||
张女士,31,大专,"我目前已离职, 可快速到岗",5000.0,6000.0,"人事文员,行政文员",,
|
||||
马先生,49,高中,"我目前已离职, 可快速到岗",6000.0,7000.0,"电工/锅炉工,操作工/普工,钳、钣、铆、冲、焊、铸,普工技工其他相关职位",,
|
||||
陶先生,37,大专,"我目前已离职, 可快速到岗",4000.0,5000.0,操作工/普工,,
|
||||
刘先生,44,本科,我目前正在职,考虑换个环境,0.0,0.0,"电气工程师,自动控制",,
|
||||
毕先生,37,大专,我目前正在职,考虑换个环境,6000.0,8000.0,电工/锅炉工,,
|
||||
董女士,27,本科,"我目前已离职, 可快速到岗",5000.0,8000.0,"商务人员,国际业务,外贸员",,
|
||||
静女士,27,本科,"我目前已离职, 可快速到岗",0.0,0.0,"行政文员,行政人事其他相关职位",,
|
||||
李女士,35,本科,我目前正在职,考虑换个环境,0.0,0.0,"档案管理,大学教师,教务/教务管理,文体培训其他相关职位",,
|
||||
齐先生,38,大专,我目前正在职,考虑换个环境,12000.0,20000.0,"机电一体化,机械仪表其他相关职位,工业工厂其他相关职位",,
|
||||
李先生,37,高中,"我目前已离职, 可快速到岗",5000.0,8000.0,"制冷、暖通,能源环保其他相关职位,部门主管,项目经理,管理运营其他相关职位",,
|
||||
郑先生,28,大专,我目前正在职,考虑换个环境,0.0,0.0,"电气工程师,测试工程师",,
|
||||
林先生,40,本科,我目前正在职,考虑换个环境,5000.0,8000.0,"工程经理/主管,工程设备工程师,工业工厂其他相关职位",,
|
||||
周女士,38,大专,"我目前已离职, 可快速到岗",,,"文职文员其他相关职位,行政人事其他相关职位,市场销售其他相关职位,客户服务其他相关职位",,
|
||||
李先生,35,中专/技校,我目前正在职,考虑换个环境,5000.0,8000.0,"操作工/普工,石油/天燃气/储运,仓库管理员,工业工厂其他相关职位,普工技工其他相关职位",,
|
||||
安女士,26,大专,"我目前已离职, 可快速到岗",3000.0,5000.0,"会计,会计助理,出纳,统计,其它相关职位",,
|
||||
董先生,30,中专/技校,"我目前已离职, 可快速到岗",,,"操作工/普工,生管员,司机,叉车工,仓库管理员",,
|
||||
丁女士,33,大专,"我目前已离职, 可快速到岗",0.0,0.0,"人事文员,行政人事其他相关职位",,
|
||||
李女士,28,本科,"我目前已离职, 可快速到岗",3000.0,5000.0,行政文员,,
|
|
101
web/cfdzp/resumes_护工_20250327_185425.csv
Normal file
@ -0,0 +1,101 @@
|
||||
name_value,age,edu_value,job_instant_value,job_salary_from,job_salary_to,categories,phone,real_name
|
||||
李政伟,36,本科,"我目前已离职, 可快速到岗",10000.0,15000.0,"招聘经理/主任,测试工程师,经理助理,项目经理,储备干部",17600364716,
|
||||
张伟,31,大专,我目前正在职,考虑换个环境,5000.0,8000.0,"电工/锅炉工,设备修理",15075586374,
|
||||
孙晓磊,36,本科,我目前正在职,考虑换个环境,4000.0,8000.0,工业工厂其他相关职位,18332778778,
|
||||
李女士,35,大专,"我目前已离职, 可快速到岗",4000.0,7000.0,"保安,文员,招投标",18203357869,
|
||||
刘先生,34,大专,"我目前已离职, 可快速到岗",0.0,0.0,部门主管,16643562515,
|
||||
嘉嘉,24,高中,"我目前已离职, 可快速到岗",0.0,0.0,销售助理,18134247015,
|
||||
王女士_324932,46,不限,"我目前已离职, 可快速到岗",,,护士/护理,15830585203,
|
||||
李先生,39,大专,"我目前已离职, 可快速到岗",6000.0,8000.0,"物业管理,项目经理,招投标",18033572860,
|
||||
于萍,23,大专,我目前正在职,考虑换个环境,0.0,0.0,"人事文员,行政文员,前台接待,文员,文职文员其他相关职位",15132578638,
|
||||
王德梅,58,高中,"我目前已离职, 可快速到岗",,,其它相关职位,17736517991,
|
||||
董鑫,36,本科,"我目前已离职, 可快速到岗",7000.0,8000.0,"行政经理,行政助理,行政外联专员",15833464006,
|
||||
杨小雪,33,大专,我目前正在职,考虑换个环境,2000.0,4000.0,文员,17796957849,
|
||||
卫庆,32,中专/技校,"我目前已离职, 可快速到岗",5000.0,6000.0,司机,15127515220,
|
||||
韩子建,34,本科,我目前正在职,考虑换个环境,7000.0,12000.0,电工/锅炉工,18903152186,
|
||||
陈光,33,大专,我目前正在职,考虑换个环境,5000.0,6000.0,操作工/普工,15032902921,
|
||||
高倩,24,本科,我目前正在职,考虑换个环境,4000.0,7000.0,"行政文员,市场销售其他相关职位,应届毕业生,教务/教务管理,文体培训其他相关职位",19930024957,
|
||||
张女士,34,大专,"我目前已离职, 可快速到岗",0.0,0.0,"档案管理,物业管理,人事文员,行政文员",13472959755,
|
||||
成浩,34,本科,"我目前已离职, 可快速到岗",4000.0,6000.0,会计,16631565969,
|
||||
刘志杰,24,大专,我目前正在职,考虑换个环境,4000.0,5000.0,"电气工程师,电工/锅炉工,电器工程师,电气维修,自动控制",13230526763,
|
||||
李先生,34,本科,"我目前已离职, 可快速到岗",5000.0,8000.0,"产品工艺/制程工程师,工业工厂其他相关职位",15632548320,
|
||||
高先生,41,本科,"我目前已离职, 可快速到岗",9000.0,10000.0,"机电一体化,工程经理/主管,工程设备工程师,设备经理/主管,工业工厂其他相关职位",13933476359,
|
||||
姚希烨,23,大专,"我目前已离职, 可快速到岗",5000.0,6000.0,"操作工/普工,应届毕业生,普工技工其他相关职位",18633325443,
|
||||
王朝,38,大专,"我目前已离职, 可快速到岗",8000.0,12000.0,生产经理,15832531755,
|
||||
夏铭飞,25,大专,我目前正在职,考虑换个环境,6000.0,9000.0,电工/锅炉工,18132674564,
|
||||
刘同禹,30,大专,我目前正在职,考虑换个环境,5000.0,6000.0,"文员,硬件工程师,网络硬件其他相关职位",18631558615,
|
||||
艾顺坤,31,大专,我目前正在职,考虑换个环境,6000.0,8000.0,"操作工/普工,组长/拉长,计划员/调度员",15931517465,
|
||||
郑忆新,49,高中以下,"我目前已离职, 可快速到岗",1000.0,2000.0,餐饮休闲其他相关职位,13131592702,
|
||||
李小芹,39,高中以下,"我目前已离职, 可快速到岗",,,"物流主管,推(营、促)销员,市场销售其他相关",15732594271,
|
||||
王荣辉,27,本科,"我目前已离职, 可快速到岗",8000.0,10000.0,"工艺工程师,无机化工,有机化工,精细化工",18713472122,
|
||||
张芳铭,29,不限,我目前正在职,考虑换个环境,0.0,0.0,电工/锅炉工,18714117412,
|
||||
赵曼,26,大专,"我目前已离职, 可快速到岗",3000.0,5000.0,"人事文员,行政文员,文员",13303349597,
|
||||
邢宝代,28,中专/技校,"我目前已离职, 可快速到岗",5000.0,8000.0,操作工/普工,17332835118,
|
||||
王猛,28,大专,"我目前已离职, 可快速到岗",8000.0,12000.0,"销售经理,销售主管",15100595409,
|
||||
孙飞宇,29,本科,"我目前已离职, 可快速到岗",0.0,0.0,"会计助理,文员,出纳",15633977516,
|
||||
女士,47,高中以下,"我目前已离职, 可快速到岗",4000.0,9000.0,工业工厂其他相关职位,15001005401,
|
||||
高女士,32,本科,我目前正在职,考虑换个环境,5000.0,8000.0,"人事经理,行政经理,行政人事其他相关职位",18532503671,
|
||||
李明阳,22,本科,"我目前已离职, 可快速到岗",8000.0,10000.0,电气工程师,18134151697,
|
||||
郑咏徽,16,大专,"我目前已离职, 可快速到岗",4000.0,5000.0,"电工/锅炉工,文员,管道(水、电),机电一体化,安全员",13273553623,
|
||||
高宇,25,大专,"我目前已离职, 可快速到岗",0.0,0.0,档案管理,15373158665,
|
||||
毕文轩,28,大专,"我目前已离职, 可快速到岗",0.0,0.0,"人事助理,物业管理,仓库管理员",17736578597,
|
||||
孙泽泉,45,大专,我目前正在职,考虑换个环境,5000.0,8000.0,"电工/锅炉工,操作工/普工,普工技工其他相关职位",13103258527,
|
||||
宋女士,30,本科,"我目前已离职, 可快速到岗",3000.0,5000.0,"人事助理,行政助理,人事文员,行政文员,行政人事其他相关职位",17736543160,
|
||||
杨建乐,36,大专,"我目前已离职, 可快速到岗",8000.0,12000.0,"电工/锅炉工,操作工/普工",15033463533,
|
||||
李帆,33,本科,"我目前已离职, 可快速到岗",10000.0,15000.0,能源环保其他相关职位,18232580171,
|
||||
乐子强,23,大专,"我目前已离职, 可快速到岗",0.0,0.0,"电气工程师,电工/锅炉工,工业工厂其他相关职位",15511951165,
|
||||
李xx,24,大专,"我目前已离职, 可快速到岗",4000.0,5000.0,"营业员/店员,置业顾问,物业管理,服务员/收银员/迎宾,仓库管理员",13031553751,
|
||||
王朝,37,大专,"我目前已离职, 可快速到岗",0.0,0.0,"行政经理,厂长/副厂长,生产经理",13832558785,
|
||||
刘景欣,36,大专,我目前正在职,考虑换个环境,5000.0,8000.0,"电气工程师,电工/锅炉工",19930055542,
|
||||
孙季阳,32,中专/技校,"我目前已离职, 可快速到岗",5000.0,7000.0,司机后勤其他相关职位,15102556611,
|
||||
金先生,30,大专,我目前正在职,考虑换个环境,5000.0,8000.0,"生管主管/督导,计划员/调度员,安全主任,安全员,工业工厂其他相关职位",18532521663,
|
||||
赵影,32,大专,我目前正在职,考虑换个环境,3000.0,5000.0,"行政助理,行政文员,客户服务",15033383977,
|
||||
先生,62,不限,"我目前已离职, 可快速到岗",0.0,0.0,"保安,人事文员,行政文员,安全员",13582958606,
|
||||
江临,33,本科,"我目前已离职, 可快速到岗",0.0,0.0,"水利、水电,核电、火电,电厂、电力,能源环保其他相关职位",13722578791,
|
||||
任长青,53,中专/技校,我目前正在职,考虑换个环境,0.0,0.0,普工技工其他相关职位,18032511802,
|
||||
赵绍鑫,38,大专,"我目前已离职, 可快速到岗",,,"电气维修,自动控制",15833534957,
|
||||
郭一帆,25,大专,"我目前已离职, 可快速到岗",,,其它相关职位,13231577871,
|
||||
聂灿,31,本科,"我目前已离职, 可快速到岗",5000.0,8000.0,"操作工/普工,司机",19930010655,
|
||||
王建霞,35,本科,"我目前已离职, 可快速到岗",3000.0,5000.0,"销售主管,行政助理,招聘专员,行政人事其他相关职位,市场销售其他相关",13426090458,
|
||||
邢雅婷,23,大专,"我目前已离职, 可快速到岗",0.0,0.0,人事文员,17331579095,
|
||||
曹文琪,33,本科,"我目前已离职, 可快速到岗",0.0,0.0,"档案管理,电脑操作员/打字员,文员,文职文员其他相关职位,测试工程师",13121174103,
|
||||
李先生,33,本科,"我目前已离职, 可快速到岗",6000.0,11000.0,销售经理,13091088301,
|
||||
刘雅静,29,大专,我目前正在职,考虑换个环境,0.0,0.0,"人事经理,档案管理,会计",18231596068,
|
||||
王心瑜,28,本科,"我目前已离职, 可快速到岗",4000.0,6000.0,"产品经理,产品/品牌企划,售前/售后支持,家具设计,采购员",18802419065,
|
||||
冯野,38,大专,"我目前已离职, 可快速到岗",3000.0,5000.0,"会计助理,出纳,会计财务其他相关职位,文体培训其他相关职位",15931510441,
|
||||
刘建铖,19,高中以下,我目前正在职,考虑换个环境,3000.0,5000.0,"大堂经理/副理,服务员/收银员/迎宾",15230538512,
|
||||
白紫祎,26,大专,"我目前已离职, 可快速到岗",,,其它相关职位,19931423082,
|
||||
刘子晔,28,不限,"我目前已离职, 可快速到岗",6000.0,8000.0,售前/售后支持,18713822511,
|
||||
侯德宇,26,本科,"我目前已离职, 可快速到岗",0.0,0.0,应届毕业生,13031970785,
|
||||
孙秋悦,30,大专,"我目前已离职, 可快速到岗",5000.0,8000.0,"行政文员,招投标,统计员",15930531215,
|
||||
聂新旺,23,大专,我是应届毕业生,0.0,0.0,应届毕业生,15503327285,
|
||||
张先生,31,硕士,"我目前已离职, 可快速到岗",8000.0,9000.0,大学教师,,
|
||||
冯工,45,高中以下,"我目前已离职, 可快速到岗",5000.0,7000.0,"水工/木工/油漆工,司机,护士/护理,叉车工,针灸推拿",18931483153,
|
||||
王丹雨,24,大专,"我目前已离职, 可快速到岗",3000.0,5000.0,"人事文员,热线咨询,话务员,文员,投诉处理",15383150933,
|
||||
蔡颖,23,大专,"我目前已离职, 可快速到岗",3000.0,4000.0,"仓库管理员,人事文员,行政文员,文员",19833993360,
|
||||
付建军,56,中专/技校,"我目前已离职, 可快速到岗",5000.0,6000.0,"物业管理,管理运营其他相关职位",15033982787,
|
||||
赵乃萱,31,本科,"我目前已离职, 可快速到岗",0.0,0.0,"档案管理,人事文员,文员,仓库管理员,文体培训其他相关职位",13171520203,
|
||||
李鹏雪,30,大专,"我目前已离职, 可快速到岗",3000.0,5000.0,"文职文员其他相关职位,行政人事其他相关职位,文体培训其他相关职位",13262759509,
|
||||
王女士,33,大专,"我目前已离职, 可快速到岗",0.0,0.0,"会计,电工/锅炉工",,
|
||||
张先生,27,大专,"我目前已离职, 可快速到岗",5000.0,8000.0,普工技工其他相关职位,,
|
||||
孙先生,26,本科,"我目前已离职, 可快速到岗",5000.0,6000.0,"电工/锅炉工,电厂、电力,电脑操作员/打字员",,
|
||||
静女士,27,本科,"我目前已离职, 可快速到岗",0.0,0.0,"行政文员,行政人事其他相关职位",,
|
||||
陶先生,37,大专,"我目前已离职, 可快速到岗",4000.0,5000.0,操作工/普工,,
|
||||
马先生,49,高中,"我目前已离职, 可快速到岗",6000.0,7000.0,"电工/锅炉工,操作工/普工,钳、钣、铆、冲、焊、铸,普工技工其他相关职位",,
|
||||
李女士,35,本科,我目前正在职,考虑换个环境,0.0,0.0,"档案管理,大学教师,教务/教务管理,文体培训其他相关职位",,
|
||||
董女士,27,本科,"我目前已离职, 可快速到岗",5000.0,8000.0,"商务人员,国际业务,外贸员",,
|
||||
周先生,23,大专,"我目前已离职, 可快速到岗",0.0,0.0,"人事助理,人事文员",,
|
||||
张女士,31,大专,"我目前已离职, 可快速到岗",5000.0,6000.0,"人事文员,行政文员",,
|
||||
毕先生,37,大专,我目前正在职,考虑换个环境,6000.0,8000.0,电工/锅炉工,,
|
||||
邢女士,32,大专,"我目前已离职, 可快速到岗",0.0,0.0,"人事经理,人事助理,文化艺术,行政助理,文员",,
|
||||
刘先生,44,本科,我目前正在职,考虑换个环境,0.0,0.0,"电气工程师,自动控制",,
|
||||
齐先生,38,大专,我目前正在职,考虑换个环境,12000.0,20000.0,"机电一体化,机械仪表其他相关职位,工业工厂其他相关职位",,
|
||||
郑先生,28,大专,我目前正在职,考虑换个环境,0.0,0.0,"电气工程师,测试工程师",,
|
||||
董先生,30,中专/技校,"我目前已离职, 可快速到岗",,,"操作工/普工,生管员,司机,叉车工,仓库管理员",,
|
||||
丁女士,33,大专,"我目前已离职, 可快速到岗",0.0,0.0,"人事文员,行政人事其他相关职位",,
|
||||
李先生,35,中专/技校,我目前正在职,考虑换个环境,5000.0,8000.0,"操作工/普工,石油/天燃气/储运,仓库管理员,工业工厂其他相关职位,普工技工其他相关职位",,
|
||||
安女士,26,大专,"我目前已离职, 可快速到岗",3000.0,5000.0,"会计,会计助理,出纳,统计,其它相关职位",,
|
||||
周女士,38,大专,"我目前已离职, 可快速到岗",,,"文职文员其他相关职位,行政人事其他相关职位,市场销售其他相关职位,客户服务其他相关职位",,
|
||||
李女士,28,本科,"我目前已离职, 可快速到岗",3000.0,5000.0,行政文员,,
|
||||
李先生,37,高中,"我目前已离职, 可快速到岗",5000.0,8000.0,"制冷、暖通,能源环保其他相关职位,部门主管,项目经理,管理运营其他相关职位",,
|
||||
林先生,40,本科,我目前正在职,考虑换个环境,5000.0,8000.0,"工程经理/主管,工程设备工程师,工业工厂其他相关职位",,
|
|
101
web/cfdzp/resumes_护工_20250327_192414.csv
Normal file
@ -0,0 +1,101 @@
|
||||
name_value,age,edu_value,job_instant_value,job_salary_from,job_salary_to,categories,phone,real_name
|
||||
孙先生,36,本科,我目前正在职,考虑换个环境,4000.0,8000.0,工业工厂其他相关职位,18332778778,孙晓磊
|
||||
李先生,39,大专,"我目前已离职, 可快速到岗",6000.0,8000.0,"物业管理,项目经理,招投标",18033572860,李先生
|
||||
张先生,31,大专,我目前正在职,考虑换个环境,5000.0,8000.0,"电工/锅炉工,设备修理",15075586374,张伟
|
||||
王女士,58,高中,"我目前已离职, 可快速到岗",,,其它相关职位,17736517991,王德梅
|
||||
嘉女士,24,高中,"我目前已离职, 可快速到岗",0.0,0.0,销售助理,18134247015,嘉嘉
|
||||
李女士,35,大专,"我目前已离职, 可快速到岗",4000.0,7000.0,"保安,文员,招投标",18203357869,李女士
|
||||
于女士,23,大专,我目前正在职,考虑换个环境,0.0,0.0,"人事文员,行政文员,前台接待,文员,文职文员其他相关职位",15132578638,于萍
|
||||
王女士,46,不限,"我目前已离职, 可快速到岗",,,护士/护理,15830585203,王女士_324932
|
||||
刘先生,34,大专,"我目前已离职, 可快速到岗",0.0,0.0,部门主管,16643562515,刘先生
|
||||
李先生,36,本科,"我目前已离职, 可快速到岗",10000.0,15000.0,"招聘经理/主任,测试工程师,经理助理,项目经理,储备干部",17600364716,李政伟
|
||||
卫先生,32,中专/技校,"我目前已离职, 可快速到岗",5000.0,6000.0,司机,15127515220,卫庆
|
||||
董先生,36,本科,"我目前已离职, 可快速到岗",7000.0,8000.0,"行政经理,行政助理,行政外联专员",15833464006,董鑫
|
||||
高女士,24,本科,我目前正在职,考虑换个环境,4000.0,7000.0,"行政文员,市场销售其他相关职位,应届毕业生,教务/教务管理,文体培训其他相关职位",19930024957,高倩
|
||||
韩先生,34,本科,我目前正在职,考虑换个环境,7000.0,12000.0,电工/锅炉工,18903152186,韩子建
|
||||
陈先生,33,大专,我目前正在职,考虑换个环境,5000.0,6000.0,操作工/普工,15032902921,陈光
|
||||
李先生,34,本科,"我目前已离职, 可快速到岗",5000.0,8000.0,"产品工艺/制程工程师,工业工厂其他相关职位",15632548320,李先生
|
||||
刘先生,24,大专,我目前正在职,考虑换个环境,4000.0,5000.0,"电气工程师,电工/锅炉工,电器工程师,电气维修,自动控制",13230526763,刘志杰
|
||||
成先生,34,本科,"我目前已离职, 可快速到岗",4000.0,6000.0,会计,16631565969,成浩
|
||||
杨女士,33,大专,我目前正在职,考虑换个环境,2000.0,4000.0,文员,17796957849,杨小雪
|
||||
张女士,34,大专,"我目前已离职, 可快速到岗",0.0,0.0,"档案管理,物业管理,人事文员,行政文员",13472959755,张女士
|
||||
高先生,41,本科,"我目前已离职, 可快速到岗",9000.0,10000.0,"机电一体化,工程经理/主管,工程设备工程师,设备经理/主管,工业工厂其他相关职位",13933476359,高先生
|
||||
艾先生,31,大专,我目前正在职,考虑换个环境,6000.0,8000.0,"操作工/普工,组长/拉长,计划员/调度员",15931517465,艾顺坤
|
||||
郑女士,49,高中以下,"我目前已离职, 可快速到岗",1000.0,2000.0,餐饮休闲其他相关职位,13131592702,郑忆新
|
||||
王先生,27,本科,"我目前已离职, 可快速到岗",8000.0,10000.0,"工艺工程师,无机化工,有机化工,精细化工",18713472122,王荣辉
|
||||
王先生,38,大专,"我目前已离职, 可快速到岗",8000.0,12000.0,生产经理,15832531755,王朝
|
||||
姚先生,23,大专,"我目前已离职, 可快速到岗",5000.0,6000.0,"操作工/普工,应届毕业生,普工技工其他相关职位",18633325443,姚希烨
|
||||
张先生,29,不限,我目前正在职,考虑换个环境,0.0,0.0,电工/锅炉工,18714117412,张芳铭
|
||||
李女士,39,高中以下,"我目前已离职, 可快速到岗",,,"物流主管,推(营、促)销员,市场销售其他相关",15732594271,李小芹
|
||||
刘先生,30,大专,我目前正在职,考虑换个环境,5000.0,6000.0,"文员,硬件工程师,网络硬件其他相关职位",18631558615,刘同禹
|
||||
夏先生,25,大专,我目前正在职,考虑换个环境,6000.0,9000.0,电工/锅炉工,18132674564,夏铭飞
|
||||
邢先生,28,中专/技校,"我目前已离职, 可快速到岗",5000.0,8000.0,操作工/普工,17332835118,邢宝代
|
||||
郑先生,16,大专,"我目前已离职, 可快速到岗",4000.0,5000.0,"电工/锅炉工,文员,管道(水、电),机电一体化,安全员",13273553623,郑咏徽
|
||||
孙女士,29,本科,"我目前已离职, 可快速到岗",0.0,0.0,"会计助理,文员,出纳",15633977516,孙飞宇
|
||||
李先生,22,本科,"我目前已离职, 可快速到岗",8000.0,10000.0,电气工程师,18134151697,李明阳
|
||||
赵女士,26,大专,"我目前已离职, 可快速到岗",3000.0,5000.0,"人事文员,行政文员,文员",13303349597,赵曼
|
||||
毕先生,28,大专,"我目前已离职, 可快速到岗",0.0,0.0,"人事助理,物业管理,仓库管理员",17736578597,毕文轩
|
||||
王先生,28,大专,"我目前已离职, 可快速到岗",8000.0,12000.0,"销售经理,销售主管",15100595409,王猛
|
||||
高先生,25,大专,"我目前已离职, 可快速到岗",0.0,0.0,档案管理,15373158665,高宇
|
||||
高女士,32,本科,我目前正在职,考虑换个环境,5000.0,8000.0,"人事经理,行政经理,行政人事其他相关职位",18532503671,高女士
|
||||
女女士,47,高中以下,"我目前已离职, 可快速到岗",4000.0,9000.0,工业工厂其他相关职位,15001005401,女士
|
||||
王先生,37,大专,"我目前已离职, 可快速到岗",0.0,0.0,"行政经理,厂长/副厂长,生产经理",13832558785,王朝
|
||||
杨先生,36,大专,"我目前已离职, 可快速到岗",8000.0,12000.0,"电工/锅炉工,操作工/普工",15033463533,杨建乐
|
||||
李女士,24,大专,"我目前已离职, 可快速到岗",4000.0,5000.0,"营业员/店员,置业顾问,物业管理,服务员/收银员/迎宾,仓库管理员",13031553751,李xx
|
||||
乐先生,23,大专,"我目前已离职, 可快速到岗",0.0,0.0,"电气工程师,电工/锅炉工,工业工厂其他相关职位",15511951165,乐子强
|
||||
金先生,30,大专,我目前正在职,考虑换个环境,5000.0,8000.0,"生管主管/督导,计划员/调度员,安全主任,安全员,工业工厂其他相关职位",18532521663,金先生
|
||||
刘先生,36,大专,我目前正在职,考虑换个环境,5000.0,8000.0,"电气工程师,电工/锅炉工",19930055542,刘景欣
|
||||
孙先生,45,大专,我目前正在职,考虑换个环境,5000.0,8000.0,"电工/锅炉工,操作工/普工,普工技工其他相关职位",13103258527,孙泽泉
|
||||
宋女士,30,本科,"我目前已离职, 可快速到岗",3000.0,5000.0,"人事助理,行政助理,人事文员,行政文员,行政人事其他相关职位",17736543160,宋女士
|
||||
孙先生,32,中专/技校,"我目前已离职, 可快速到岗",5000.0,7000.0,司机后勤其他相关职位,15102556611,孙季阳
|
||||
李先生,33,本科,"我目前已离职, 可快速到岗",10000.0,15000.0,能源环保其他相关职位,18232580171,李帆
|
||||
聂先生,31,本科,"我目前已离职, 可快速到岗",5000.0,8000.0,"操作工/普工,司机",19930010655,聂灿
|
||||
江先生,33,本科,"我目前已离职, 可快速到岗",0.0,0.0,"水利、水电,核电、火电,电厂、电力,能源环保其他相关职位",13722578791,江临
|
||||
赵先生,38,大专,"我目前已离职, 可快速到岗",,,"电气维修,自动控制",15833534957,赵绍鑫
|
||||
王女士,35,本科,"我目前已离职, 可快速到岗",3000.0,5000.0,"销售主管,行政助理,招聘专员,行政人事其他相关职位,市场销售其他相关",13426090458,王建霞
|
||||
郭女士,25,大专,"我目前已离职, 可快速到岗",,,其它相关职位,13231577871,郭一帆
|
||||
先先生,62,不限,"我目前已离职, 可快速到岗",0.0,0.0,"保安,人事文员,行政文员,安全员",13582958606,先生
|
||||
赵女士,32,大专,我目前正在职,考虑换个环境,3000.0,5000.0,"行政助理,行政文员,客户服务",15033383977,赵影
|
||||
任先生,53,中专/技校,我目前正在职,考虑换个环境,0.0,0.0,普工技工其他相关职位,18032511802,任长青
|
||||
邢女士,23,大专,"我目前已离职, 可快速到岗",0.0,0.0,人事文员,17331579095,邢雅婷
|
||||
曹先生,33,本科,"我目前已离职, 可快速到岗",0.0,0.0,"档案管理,电脑操作员/打字员,文员,文职文员其他相关职位,测试工程师",13121174103,曹文琪
|
||||
侯先生,26,本科,"我目前已离职, 可快速到岗",0.0,0.0,应届毕业生,13031970785,侯德宇
|
||||
刘先生,19,高中以下,我目前正在职,考虑换个环境,3000.0,5000.0,"大堂经理/副理,服务员/收银员/迎宾",15230538512,刘建铖
|
||||
王女士,28,本科,"我目前已离职, 可快速到岗",4000.0,6000.0,"产品经理,产品/品牌企划,售前/售后支持,家具设计,采购员",18802419065,王心瑜
|
||||
白女士,26,大专,"我目前已离职, 可快速到岗",,,其它相关职位,19931423082,白紫祎
|
||||
聂先生,23,大专,我是应届毕业生,0.0,0.0,应届毕业生,15503327285,聂新旺
|
||||
李先生,33,本科,"我目前已离职, 可快速到岗",6000.0,11000.0,销售经理,13091088301,李先生
|
||||
刘先生,28,不限,"我目前已离职, 可快速到岗",6000.0,8000.0,售前/售后支持,18713822511,刘子晔
|
||||
孙女士,30,大专,"我目前已离职, 可快速到岗",5000.0,8000.0,"行政文员,招投标,统计员",15930531215,孙秋悦
|
||||
冯女士,38,大专,"我目前已离职, 可快速到岗",3000.0,5000.0,"会计助理,出纳,会计财务其他相关职位,文体培训其他相关职位",15931510441,冯野
|
||||
刘女士,29,大专,我目前正在职,考虑换个环境,0.0,0.0,"人事经理,档案管理,会计",18231596068,刘雅静
|
||||
王女士,24,大专,"我目前已离职, 可快速到岗",3000.0,5000.0,"人事文员,热线咨询,话务员,文员,投诉处理",15383150933,王丹雨
|
||||
付先生,56,中专/技校,"我目前已离职, 可快速到岗",5000.0,6000.0,"物业管理,管理运营其他相关职位",15033982787,付建军
|
||||
孙先生,26,本科,"我目前已离职, 可快速到岗",5000.0,6000.0,"电工/锅炉工,电厂、电力,电脑操作员/打字员",,孙先生
|
||||
冯先生,45,高中以下,"我目前已离职, 可快速到岗",5000.0,7000.0,"水工/木工/油漆工,司机,护士/护理,叉车工,针灸推拿",18931483153,冯工
|
||||
李女士,30,大专,"我目前已离职, 可快速到岗",3000.0,5000.0,"文职文员其他相关职位,行政人事其他相关职位,文体培训其他相关职位",13262759509,李鹏雪
|
||||
张先生,31,硕士,"我目前已离职, 可快速到岗",8000.0,9000.0,大学教师,,张先生
|
||||
蔡女士,23,大专,"我目前已离职, 可快速到岗",3000.0,4000.0,"仓库管理员,人事文员,行政文员,文员",19833993360,蔡颖
|
||||
王女士,33,大专,"我目前已离职, 可快速到岗",0.0,0.0,"会计,电工/锅炉工",,王女士
|
||||
赵女士,31,本科,"我目前已离职, 可快速到岗",0.0,0.0,"档案管理,人事文员,文员,仓库管理员,文体培训其他相关职位",13171520203,赵乃萱
|
||||
张先生,27,大专,"我目前已离职, 可快速到岗",5000.0,8000.0,普工技工其他相关职位,,张先生
|
||||
刘先生,44,本科,我目前正在职,考虑换个环境,0.0,0.0,"电气工程师,自动控制",,刘先生
|
||||
静女士,27,本科,"我目前已离职, 可快速到岗",0.0,0.0,"行政文员,行政人事其他相关职位",,静女士
|
||||
邢女士,32,大专,"我目前已离职, 可快速到岗",0.0,0.0,"人事经理,人事助理,文化艺术,行政助理,文员",,邢女士
|
||||
毕先生,37,大专,我目前正在职,考虑换个环境,6000.0,8000.0,电工/锅炉工,,毕先生
|
||||
董女士,27,本科,"我目前已离职, 可快速到岗",5000.0,8000.0,"商务人员,国际业务,外贸员",,董女士
|
||||
张女士,31,大专,"我目前已离职, 可快速到岗",5000.0,6000.0,"人事文员,行政文员",,张女士
|
||||
马先生,49,高中,"我目前已离职, 可快速到岗",6000.0,7000.0,"电工/锅炉工,操作工/普工,钳、钣、铆、冲、焊、铸,普工技工其他相关职位",,马先生
|
||||
周先生,23,大专,"我目前已离职, 可快速到岗",0.0,0.0,"人事助理,人事文员",,周先生
|
||||
李女士,35,本科,我目前正在职,考虑换个环境,0.0,0.0,"档案管理,大学教师,教务/教务管理,文体培训其他相关职位",,李女士
|
||||
陶先生,37,大专,"我目前已离职, 可快速到岗",4000.0,5000.0,操作工/普工,,陶先生
|
||||
李先生,35,中专/技校,我目前正在职,考虑换个环境,5000.0,8000.0,"操作工/普工,石油/天燃气/储运,仓库管理员,工业工厂其他相关职位,普工技工其他相关职位",,李先生
|
||||
丁女士,33,大专,"我目前已离职, 可快速到岗",0.0,0.0,"人事文员,行政人事其他相关职位",,丁女士
|
||||
林先生,40,本科,我目前正在职,考虑换个环境,5000.0,8000.0,"工程经理/主管,工程设备工程师,工业工厂其他相关职位",,林先生
|
||||
周女士,38,大专,"我目前已离职, 可快速到岗",,,"文职文员其他相关职位,行政人事其他相关职位,市场销售其他相关职位,客户服务其他相关职位",,周女士
|
||||
李女士,28,本科,"我目前已离职, 可快速到岗",3000.0,5000.0,行政文员,,李女士
|
||||
李先生,37,高中,"我目前已离职, 可快速到岗",5000.0,8000.0,"制冷、暖通,能源环保其他相关职位,部门主管,项目经理,管理运营其他相关职位",,李先生
|
||||
安女士,26,大专,"我目前已离职, 可快速到岗",3000.0,5000.0,"会计,会计助理,出纳,统计,其它相关职位",,安女士
|
||||
齐先生,38,大专,我目前正在职,考虑换个环境,12000.0,20000.0,"机电一体化,机械仪表其他相关职位,工业工厂其他相关职位",,齐先生
|
||||
董先生,30,中专/技校,"我目前已离职, 可快速到岗",,,"操作工/普工,生管员,司机,叉车工,仓库管理员",,董先生
|
||||
郑先生,28,大专,我目前正在职,考虑换个环境,0.0,0.0,"电气工程师,测试工程师",,郑先生
|
|
39
web/dailymotion_com/conversion_time.py
Normal file
@ -0,0 +1,39 @@
|
||||
import pandas as pd
|
||||
import pytz
|
||||
|
||||
input_path = "xid_dedup.xlsx"
|
||||
output_path = "id_dedup_时间格式化.xlsx"
|
||||
|
||||
video_df = pd.read_excel(input_path, sheet_name="视频信息")
|
||||
user_df = pd.read_excel(input_path, sheet_name="用户信息")
|
||||
|
||||
def convert_to_east8(dt):
|
||||
try:
|
||||
dt = pd.to_datetime(dt, errors='coerce', utc=True)
|
||||
return dt.tz_convert("Asia/Shanghai") if pd.notna(dt) else None
|
||||
except Exception:
|
||||
return None
|
||||
|
||||
# 1. 转东八区
|
||||
video_df["添加时间"] = video_df["添加时间"].apply(convert_to_east8)
|
||||
|
||||
# 2. 去除时区并格式化为字符串
|
||||
video_df["添加时间"] = video_df["添加时间"].dt.tz_localize(None).dt.strftime("%Y-%m-%d %H:%M:%S")
|
||||
|
||||
# 3. 转换时长为 mm:ss
|
||||
def format_duration(seconds):
|
||||
try:
|
||||
seconds = int(seconds)
|
||||
minutes = seconds // 60
|
||||
remain = seconds % 60
|
||||
return f"{minutes}:{remain:02d}"
|
||||
except Exception:
|
||||
return None
|
||||
|
||||
video_df["时长 (秒)"] = video_df["时长 (秒)"].apply(format_duration)
|
||||
|
||||
with pd.ExcelWriter(output_path, engine="openpyxl") as writer:
|
||||
video_df.to_excel(writer, sheet_name="视频信息", index=False)
|
||||
user_df.to_excel(writer, sheet_name="用户信息", index=False)
|
||||
|
||||
print(f"✅ 处理完成,结果保存为:{output_path}")
|
34
web/dailymotion_com/csrf_token.py
Normal file
@ -0,0 +1,34 @@
|
||||
import requests
|
||||
|
||||
cookies = {
|
||||
'__cf_bm': 'dfG1B_FG413nDSGYQTYkWU176T0KJojCUPr8UDo4MXw-1748354550-1.0.1.1-Lt_7svNQuvIgT4Z_2d6RQfFVmzXp_CW38JsBxjmKQ_K88.jY5BYcy6Hv4y3k.BnUxDddV.uq92I8dROoZuNCQWAmBk7C4Ev2IlmqO.i4HJo',
|
||||
'_cfuvid': 'fGc1QZvNM1jFmpbWBdMuP4dt8iW5v.s7cjizbdVyADg-1748354550248-0.0.1.1-604800000',
|
||||
'_zendesk_session': 'dG52k29ixnhUsRbhaWpjJLXe%2BH8r8tLF0K7cfCz6sW%2Bd9mkkFwxI7AG7cDyUxzhQAVykNdU34x8Fw90O2dvrXALF72aIies%2Frn0zc8QyUT0I2eZqzozv1IPMKu9nPdK1KrHiK2WnfhgyxE5u5FiyG8ALoTgBHiyj96pert5Hb7SockbT2fsEp%2BPG1w1OfqMVbw9RFUwSJMwxzeMw6dS2640AKhdkxXRF6UPz%2BB2UITI%3D--TlOUUXSOxKJMRzMf--2jiXyQeDlpBjXXWyBzwxCA%3D%3D',
|
||||
'_help_center_session': 'a3hDOWxiSzV6b28yM2dzY25xd2NQUE4wZGViYTNNUUZZMzc1ZG44TnB3T3I5L2R1Y1RkVlg3RWFPbXVtWVRJNitpV2RGdXU5Y1hRTjZsTHo2UkxoWDlEUFZRY1ZPMk5leHUrUGlRTVFDNG40RjdlUlJYb0R4eHlvV0hCN2p6d0crRlMyMXdJdEJ5ZWtGVncvekV6QzBTRi8vK0FwSUxmYVByOEFuNFZKNHlHL0YrMmtndFhpVEUwekJlRkFWMWhHZ0V5ZXk1eXpPcjZFZ3dJbGhkNm5udz09LS1MNmlnMWgxYkhpRDlrSlh6Z3h2Y253PT0%3D--297fc9e633156720ef1b3e6268745e6d32e6ea1b',
|
||||
}
|
||||
|
||||
headers = {
|
||||
'accept': '*/*',
|
||||
'accept-language': 'zh-CN,zh;q=0.9',
|
||||
'cache-control': 'no-cache',
|
||||
'pragma': 'no-cache',
|
||||
'priority': 'u=1, i',
|
||||
'referer': 'https://faq.dailymotion.com/hc/en-us/requests/new?ticket_form_id=136048',
|
||||
'sec-ch-ua': '"Chromium";v="136", "Google Chrome";v="136", "Not.A/Brand";v="99"',
|
||||
'sec-ch-ua-mobile': '?0',
|
||||
'sec-ch-ua-platform': '"Windows"',
|
||||
'sec-fetch-dest': 'empty',
|
||||
'sec-fetch-mode': 'cors',
|
||||
'sec-fetch-site': 'same-origin',
|
||||
'user-agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/136.0.0.0 Safari/537.36',
|
||||
}
|
||||
proxies = {
|
||||
'http': 'http://127.0.0.1:10808',
|
||||
'https': 'http://127.0.0.1:10808',
|
||||
}
|
||||
response = requests.get('https://faq.dailymotion.com/hc/api/internal/csrf_token.json', cookies=None, headers=headers,
|
||||
proxies=proxies)
|
||||
|
||||
json_data = response.json()
|
||||
csrf_token = json_data['current_session']['csrf_token']
|
||||
print(csrf_token)
|
23
web/dailymotion_com/dailymotion_api/test.py
Normal file
@ -0,0 +1,23 @@
|
||||
import requests
|
||||
|
||||
# 1. 公共 API 地址(无需 token)
|
||||
endpoint = 'https://api.dailymotion.com/videos'
|
||||
|
||||
# 2. 构造查询参数:
|
||||
# - search:搜索关键词
|
||||
# - fields:只取 id 和 title 两个字段
|
||||
# - limit:最多返回 5 条
|
||||
params = {
|
||||
'search': '郭德纲',
|
||||
'fields': 'id,title,created_time,thumbnail_240_url,duration,owner.id,owner.screenname,likes_total,views_total,owner.avatar_60_url,owner.followers_total,owner.videos_total',
|
||||
'limit': 5,
|
||||
'page': 2,
|
||||
'sort': "relevance"
|
||||
}
|
||||
|
||||
# 3. 发起 GET 请求
|
||||
response = requests.get(endpoint, params=params)
|
||||
|
||||
# 4. 将结果转为 JSON 并打印
|
||||
data = response.json()
|
||||
print(data)
|
BIN
web/dailymotion_com/dailymotion_com.zip
Normal file
20
web/dailymotion_com/deduplicateby_xid.py
Normal file
@ -0,0 +1,20 @@
|
||||
import pandas as pd
|
||||
|
||||
# 读取目标文件
|
||||
input_path = "merge.xlsx"
|
||||
output_path = "xid_dedup.xlsx"
|
||||
|
||||
# 读取两个 sheet
|
||||
video_df = pd.read_excel(input_path, sheet_name="视频信息")
|
||||
user_df = pd.read_excel(input_path, sheet_name="用户信息")
|
||||
|
||||
# 按 xid 去重,保留第一条记录
|
||||
video_df_dedup = video_df.drop_duplicates(subset="xid", keep="first")
|
||||
user_df_dedup = user_df.drop_duplicates(subset="xid", keep="first")
|
||||
|
||||
# 写入去重后的新文件
|
||||
with pd.ExcelWriter(output_path, engine="openpyxl") as writer:
|
||||
video_df_dedup.to_excel(writer, sheet_name="视频信息", index=False)
|
||||
user_df_dedup.to_excel(writer, sheet_name="用户信息", index=False)
|
||||
|
||||
print(f"去重完成,结果保存为:{output_path}")
|
38
web/dailymotion_com/get_token.py
Normal file
@ -0,0 +1,38 @@
|
||||
import requests
|
||||
import uuid
|
||||
|
||||
|
||||
u = uuid.uuid4()
|
||||
uuid_with_dash = str(u)
|
||||
headers = {
|
||||
'Accept': '*/*',
|
||||
'Accept-Language': 'zh-CN,zh;q=0.9',
|
||||
'Cache-Control': 'no-cache',
|
||||
'Connection': 'keep-alive',
|
||||
'Content-Type': 'application/x-www-form-urlencoded',
|
||||
'Origin': 'https://www.dailymotion.com',
|
||||
'Pragma': 'no-cache',
|
||||
'Referer': 'https://www.dailymotion.com/',
|
||||
'Sec-Fetch-Dest': 'empty',
|
||||
'Sec-Fetch-Mode': 'cors',
|
||||
'Sec-Fetch-Site': 'same-site',
|
||||
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/135.0.0.0 Safari/537.36',
|
||||
'sec-ch-ua': '"Google Chrome";v="135", "Not-A.Brand";v="8", "Chromium";v="135"',
|
||||
'sec-ch-ua-mobile': '?0',
|
||||
'sec-ch-ua-platform': '"Windows"',
|
||||
}
|
||||
|
||||
data = {
|
||||
'client_id': 'f1a362d288c1b98099c7',
|
||||
'client_secret': 'eea605b96e01c796ff369935357eca920c5da4c5',
|
||||
'grant_type': 'client_credentials',
|
||||
'traffic_segment': '567786',
|
||||
'visitor_id': uuid_with_dash,
|
||||
}
|
||||
proxies = {
|
||||
"http": 'http://127.0.0.1:7890',
|
||||
"https": 'http://127.0.0.1:7890',
|
||||
}
|
||||
response = requests.post('https://graphql.api.dailymotion.com/oauth/token', headers=headers, data=data, proxies=proxies)
|
||||
|
||||
print(response.json())
|
41
web/dailymotion_com/login.py
Normal file
@ -0,0 +1,41 @@
|
||||
import requests
|
||||
|
||||
headers = {
|
||||
'Accept': '*/*',
|
||||
'Accept-Language': 'zh-CN,zh;q=0.9',
|
||||
'Cache-Control': 'no-cache',
|
||||
'Connection': 'keep-alive',
|
||||
'Content-Type': 'application/x-www-form-urlencoded',
|
||||
'Origin': 'https://www.dailymotion.com',
|
||||
'Pragma': 'no-cache',
|
||||
'Referer': 'https://www.dailymotion.com/',
|
||||
'Sec-Fetch-Dest': 'empty',
|
||||
'Sec-Fetch-Mode': 'cors',
|
||||
'Sec-Fetch-Site': 'same-site',
|
||||
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/136.0.0.0 Safari/537.36',
|
||||
'sec-ch-ua': '"Chromium";v="136", "Google Chrome";v="136", "Not.A/Brand";v="99"',
|
||||
'sec-ch-ua-mobile': '?0',
|
||||
'sec-ch-ua-platform': '"Windows"',
|
||||
}
|
||||
proxies = {
|
||||
'http': 'http://127.0.0.1:10808',
|
||||
'https': 'http://127.0.0.1:10808',
|
||||
}
|
||||
data = {
|
||||
'client_id': 'f1a362d288c1b98099c7',
|
||||
'client_secret': 'eea605b96e01c796ff369935357eca920c5da4c5',
|
||||
'grant_type': 'password',
|
||||
'username': 'dewujie64@gmail.com',
|
||||
'password': 'fKW8pF_CPh4#q%y',
|
||||
'scope': 'userinfo,email,manage_subscriptions,manage_history,manage_likes,manage_playlists,manage_videos',
|
||||
'version': '2',
|
||||
'traffic_segment': '440397',
|
||||
'visitor_id': 'e1ce77f1-0d3e-493f-89bc-2f22669f8985',
|
||||
}
|
||||
|
||||
response = requests.post('https://graphql.api.dailymotion.com/oauth/token', headers=headers, data=data, proxies=proxies)
|
||||
json_data = response.json()
|
||||
|
||||
access_token = json_data.get('access_token')
|
||||
|
||||
print()
|
332
web/dailymotion_com/main1.py
Normal file
@ -0,0 +1,332 @@
|
||||
import json
|
||||
import random
|
||||
import time
|
||||
import uuid
|
||||
import concurrent.futures
|
||||
import logging
|
||||
from random import uniform
|
||||
from datetime import datetime
|
||||
from typing import Dict, List, Optional, Union
|
||||
|
||||
import pandas as pd
|
||||
import requests
|
||||
import os
|
||||
import urllib3
|
||||
from requests import RequestException
|
||||
from fake_useragent import UserAgent
|
||||
|
||||
# 配置日志记录
|
||||
logging.basicConfig(
|
||||
level=logging.INFO,
|
||||
format='%(asctime)s - %(levelname)s - %(message)s',
|
||||
handlers=[
|
||||
logging.FileHandler('dailymotion.log', encoding='utf-8'),
|
||||
logging.StreamHandler()
|
||||
]
|
||||
)
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
# 禁用 SSL 警告
|
||||
urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning)
|
||||
|
||||
# 基础配置
|
||||
BASE_DIR = os.path.dirname(os.path.abspath(__file__))
|
||||
KW_PATH = os.path.join(BASE_DIR, 'data', 'keyword.xlsx')
|
||||
OUTPUT_DIR = os.path.join(BASE_DIR, 'out_put_CNTW')
|
||||
|
||||
# 创建输出目录
|
||||
if not os.path.exists(OUTPUT_DIR):
|
||||
os.makedirs(OUTPUT_DIR)
|
||||
logger.info(f'创建输出目录: {OUTPUT_DIR}')
|
||||
|
||||
# 请求配置
|
||||
MAX_RETRIES = 3
|
||||
BASE_DELAY = 2
|
||||
MAX_WORKERS = 5 # 并发线程数限制
|
||||
REQUEST_TIMEOUT = 30 # 请求超时时间
|
||||
|
||||
class DailymotionAPI:
|
||||
def __init__(self):
|
||||
self.session = requests.Session()
|
||||
self.session.headers.update({
|
||||
'Accept': '*/*, */*',
|
||||
'Cache-Control': 'no-cache',
|
||||
'Connection': 'keep-alive',
|
||||
'Content-Type': 'application/json, application/json',
|
||||
'Host': 'graphql.api.dailymotion.com',
|
||||
'Origin': 'https://www.dailymotion.com',
|
||||
'Referer': 'https://www.dailymotion.com/',
|
||||
'Sec-Fetch-Dest': 'empty',
|
||||
'Sec-Fetch-Mode': 'cors',
|
||||
'Sec-Fetch-Site': 'same-site',
|
||||
'X-DM-AppInfo-Id': 'com.dailymotion.neon',
|
||||
'X-DM-AppInfo-Type': 'website',
|
||||
'X-DM-AppInfo-Version': 'v2025-04-28T12:37:52.391Z',
|
||||
'X-DM-Neon-SSR': '0',
|
||||
'X-DM-Preferred-Country': 'us',
|
||||
'accept-language': 'zh-CN',
|
||||
})
|
||||
self.session.proxies = {
|
||||
"http": 'http://127.0.0.1:7890',
|
||||
"https": 'http://127.0.0.1:7890',
|
||||
}
|
||||
|
||||
def _make_request(self, url: str, json_data: Dict, retries: int = MAX_RETRIES) -> Dict:
|
||||
"""发送请求并处理响应
|
||||
|
||||
Args:
|
||||
url: 请求URL
|
||||
json_data: 请求数据
|
||||
retries: 重试次数
|
||||
|
||||
Returns:
|
||||
Dict: 响应数据
|
||||
"""
|
||||
for attempt in range(retries):
|
||||
try:
|
||||
self.session.headers['User-Agent'] = UserAgent().random
|
||||
response = self.session.post(
|
||||
url,
|
||||
json=json_data,
|
||||
timeout=REQUEST_TIMEOUT,
|
||||
verify=False
|
||||
)
|
||||
response.raise_for_status()
|
||||
return response.json()
|
||||
except Exception as e:
|
||||
if attempt == retries - 1:
|
||||
logger.error(f'请求失败: {str(e)}')
|
||||
raise
|
||||
wait_time = BASE_DELAY * (2 ** attempt) + uniform(1, 3)
|
||||
logger.warning(f'请求失败,等待 {wait_time:.2f} 秒后重试...')
|
||||
time.sleep(wait_time)
|
||||
|
||||
def get_video_info(self, x_id: str) -> Dict[str, Union[int, str]]:
|
||||
"""获取视频详细信息
|
||||
|
||||
Args:
|
||||
x_id: 视频ID
|
||||
|
||||
Returns:
|
||||
Dict: 包含视频统计信息的字典
|
||||
"""
|
||||
try:
|
||||
payload = {
|
||||
"operationName": "WATCHING_VIDEO",
|
||||
"variables": {"xid": x_id, "isSEO": False},
|
||||
"query": "fragment VIDEO_FRAGMENT on Video {\n id\n xid\n isPublished\n duration\n title\n description\n thumbnailx60: thumbnailURL(size: \"x60\")\n thumbnailx120: thumbnailURL(size: \"x120\")\n thumbnailx240: thumbnailURL(size: \"x240\")\n thumbnailx360: thumbnailURL(size: \"x360\")\n thumbnailx480: thumbnailURL(size: \"x480\")\n thumbnailx720: thumbnailURL(size: \"x720\")\n thumbnailx1080: thumbnailURL(size: \"x1080\")\n aspectRatio\n category\n categories(filter: {category: {eq: CONTENT_CATEGORY}}) {\n edges {\n node { id name slug __typename }\n __typename\n }\n __typename\n }\n iab_categories: categories(\n filter: {category: {eq: IAB_CATEGORY}, percentage: {gte: 70}}\n ) {\n edges {\n node { id slug __typename }\n __typename\n }\n __typename\n }\n bestAvailableQuality\n createdAt\n viewerEngagement {\n id\n liked\n favorited\n __typename\n }\n isPrivate\n isWatched\n isCreatedForKids\n isExplicit\n canDisplayAds\n videoWidth: width\n videoHeight: height\n status\n hashtags {\n edges {\n node { id name __typename }\n __typename\n }\n __typename\n }\n stats {\n id\n views { id total __typename }\n __typename\n }\n channel {\n __typename\n id\n xid\n name\n displayName\n isArtist\n logoURLx25: logoURL(size: \"x25\")\n logoURL(size: \"x60\")\n isFollowed\n accountType\n coverURLx375: coverURL(size: \"x375\")\n stats {\n id\n views { id total __typename }\n followers { id total __typename }\n videos { id total __typename }\n __typename\n }\n country { id codeAlpha2 __typename }\n organization @skip(if: $isSEO) {\n id\n xid\n owner { id xid __typename }\n __typename\n }\n }\n language { id codeAlpha2 __typename }\n tags {\n edges {\n node { id label __typename }\n __typename\n }\n __typename\n }\n moderation { id reviewedAt __typename }\n topics(whitelistedOnly: true, first: 3, page: 1) {\n edges {\n node {\n id\n xid\n name\n names {\n edges {\n node {\n id\n name\n language { id codeAlpha2 __typename }\n __typename\n }\n __typename\n }\n __typename\n }\n __typename\n }\n __typename\n }\n __typename\n }\n geoblockedCountries {\n id\n allowed\n denied\n __typename\n }\n transcript {\n edges {\n node { id timecode text __typename }\n __typename\n }\n __typename\n }\n __typename\n}\n\nfragment LIVE_FRAGMENT on Live {\n id\n xid\n startAt\n endAt\n isPublished\n title\n description\n thumbnailx60: thumbnailURL(size: \"x60\")\n thumbnailx120: thumbnailURL(size: \"x120\")\n thumbnailx240: thumbnailURL(size: \"x240\")\n thumbnailx360: thumbnailURL(size: \"x360\")\n thumbnailx480: thumbnailURL(size: \"x480\")\n thumbnailx720: thumbnailURL(size: \"x720\")\n thumbnailx1080: thumbnailURL(size: \"x1080\")\n aspectRatio\n category\n createdAt\n viewerEngagement { id liked favorited __typename }\n isPrivate\n isExplicit\n isCreatedForKids\n bestAvailableQuality\n canDisplayAds\n videoWidth: width\n videoHeight: height\n stats { id views { id total __typename } __typename }\n channel {\n __typename\n id\n xid\n name\n displayName\n isArtist\n logoURLx25: logoURL(size: \"x25\")\n logoURL(size: \"x60\")\n isFollowed\n accountType\n coverURLx375: coverURL(size: \"x375\")\n stats { id views { id total __typename } followers { id total __typename } videos { id total __typename } __typename }\n country { id codeAlpha2 __typename }\n organization @skip(if: $isSEO) { id xid owner { id xid __typename } __typename }\n }\n language { id codeAlpha2 __typename }\n tags { edges { node { id label __typename } __typename } __typename }\n moderation { id reviewedAt __typename }\n topics(whitelistedOnly: true, first: 3, page: 1) {\n edges { node { id xid name names { edges { node { id name language { id codeAlpha2 __typename } __typename } __typename } __typename } __typename } __typename }\n __typename\n }\n geoblockedCountries { id allowed denied __typename }\n __typename\n}\n\nquery WATCHING_VIDEO($xid: String!, $isSEO: Boolean!) {\n video: media(xid: $xid) {\n __typename\n ... on Video { id ...VIDEO_FRAGMENT __typename }\n ... on Live { id ...LIVE_FRAGMENT __typename }\n }\n}"
|
||||
}
|
||||
|
||||
response_data = self._make_request('https://graphql.api.dailymotion.com/', payload)
|
||||
v_info = response_data['data']['video']['channel']['stats']
|
||||
|
||||
return {
|
||||
"view": v_info['views']['total'],
|
||||
"fans": v_info['followers']['total'],
|
||||
"videos": v_info['videos']['total'],
|
||||
}
|
||||
except Exception as e:
|
||||
logger.error(f'获取视频信息失败: {str(e)}')
|
||||
return {"view": '-', "fans": '-', "videos": '-'}
|
||||
|
||||
def process_video(api: DailymotionAPI, node: Dict, calculated_index: int) -> Optional[Dict]:
|
||||
"""处理单个视频信息
|
||||
|
||||
Args:
|
||||
api: DailymotionAPI实例
|
||||
node: 视频节点数据
|
||||
calculated_index: 计算的索引
|
||||
|
||||
Returns:
|
||||
Optional[Dict]: 处理后的视频信息
|
||||
"""
|
||||
xid = node.get('xid')
|
||||
try:
|
||||
logger.info(f'开始处理视频 {xid} (索引: {calculated_index})')
|
||||
|
||||
# 添加随机延迟避免请求过于频繁
|
||||
time.sleep(uniform(1, 2))
|
||||
|
||||
v_info = api.get_video_info(xid)
|
||||
result = {
|
||||
"index": calculated_index,
|
||||
"id": node.get('id'),
|
||||
"xid": xid,
|
||||
"link": f"https://www.dailymotion.com/video/{xid}",
|
||||
"title": node.get('title'),
|
||||
"createtime": node.get('createdAt'),
|
||||
"duration": node.get('duration'),
|
||||
"pic": node.get('thumbnail', {}).get('url'),
|
||||
"view": v_info['view'],
|
||||
"fans": v_info['fans'],
|
||||
"videos": v_info['videos']
|
||||
}
|
||||
|
||||
logger.debug(f'视频 {xid} 处理成功')
|
||||
return result
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f'处理视频 {xid} 出错: {str(e)}')
|
||||
return None
|
||||
|
||||
def process_videos_batch(api: DailymotionAPI, videos: List[Dict], start_index: int) -> List[Dict]:
|
||||
"""批量处理视频信息
|
||||
|
||||
Args:
|
||||
api: DailymotionAPI实例
|
||||
videos: 视频列表
|
||||
start_index: 起始索引
|
||||
|
||||
Returns:
|
||||
List[Dict]: 处理后的视频信息列表
|
||||
"""
|
||||
results = []
|
||||
with concurrent.futures.ThreadPoolExecutor(max_workers=MAX_WORKERS) as executor:
|
||||
future_to_video = {executor.submit(process_video, api, video, i): (video, i)
|
||||
for i, video in enumerate(videos, start=start_index)}
|
||||
|
||||
for future in concurrent.futures.as_completed(future_to_video):
|
||||
video, index = future_to_video[future]
|
||||
try:
|
||||
result = future.result()
|
||||
if result:
|
||||
results.append(result)
|
||||
except Exception as e:
|
||||
logger.error(f'处理视频失败 (索引: {index}): {str(e)}')
|
||||
|
||||
return results
|
||||
|
||||
def save_results(results: List[Dict], output_file: str):
|
||||
"""保存处理结果
|
||||
|
||||
Args:
|
||||
results: 处理结果列表
|
||||
output_file: 输出文件路径
|
||||
"""
|
||||
try:
|
||||
df = pd.DataFrame(results)
|
||||
df.to_excel(output_file, index=False, engine='openpyxl')
|
||||
logger.info(f'结果已保存到: {output_file}')
|
||||
except Exception as e:
|
||||
logger.error(f'保存结果失败: {str(e)}')
|
||||
|
||||
def search_videos(api: DailymotionAPI, keyword: str, page: int = 1) -> List[Dict]:
|
||||
"""搜索视频列表
|
||||
|
||||
Args:
|
||||
api: DailymotionAPI实例
|
||||
keyword: 搜索关键词
|
||||
page: 页码
|
||||
|
||||
Returns:
|
||||
List[Dict]: 视频列表
|
||||
"""
|
||||
try:
|
||||
payload = {
|
||||
"operationName": "SEARCH_VIDEOS",
|
||||
"variables": {
|
||||
"query": keyword,
|
||||
"page": page,
|
||||
"limit": 20,
|
||||
"sort": "relevance"
|
||||
},
|
||||
"query": "query SEARCH_VIDEOS($query: String!, $page: Int!, $limit: Int!, $sort: String!) {\n videos(\n first: $limit\n page: $page\n search: {query: $query, sort: $sort}\n ) {\n pageInfo { hasNextPage currentPage __typename }\n edges {\n node {\n id\n xid\n title\n createdAt\n duration\n thumbnail { url __typename }\n __typename\n }\n __typename\n }\n __typename\n }\n}"
|
||||
}
|
||||
|
||||
response = api._make_request('https://graphql.api.dailymotion.com/', payload)
|
||||
videos = response['data']['videos']['edges']
|
||||
return [video['node'] for video in videos]
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f'搜索视频失败: {str(e)}')
|
||||
return []
|
||||
|
||||
def load_progress(keyword: str) -> Dict:
|
||||
"""加载进度信息
|
||||
|
||||
Args:
|
||||
keyword: 关键词
|
||||
|
||||
Returns:
|
||||
Dict: 进度信息
|
||||
"""
|
||||
progress_file = os.path.join(OUTPUT_DIR, f'{keyword}_progress.json')
|
||||
if os.path.exists(progress_file):
|
||||
try:
|
||||
with open(progress_file, 'r', encoding='utf-8') as f:
|
||||
return json.load(f)
|
||||
except Exception as e:
|
||||
logger.error(f'加载进度失败: {str(e)}')
|
||||
return {'page': 1, 'video_data': [], 'user_data': []}
|
||||
|
||||
def save_progress(keyword: str, progress: Dict):
|
||||
"""保存进度信息
|
||||
|
||||
Args:
|
||||
keyword: 关键词
|
||||
progress: 进度信息
|
||||
"""
|
||||
progress_file = os.path.join(OUTPUT_DIR, f'{keyword}_progress.json')
|
||||
try:
|
||||
with open(progress_file, 'w', encoding='utf-8') as f:
|
||||
json.dump(progress, f)
|
||||
except Exception as e:
|
||||
logger.error(f'保存进度失败: {str(e)}')
|
||||
|
||||
def main():
|
||||
"""主函数"""
|
||||
try:
|
||||
# 读取关键词
|
||||
df = pd.read_excel(KW_PATH)
|
||||
if '搜索词' in df.columns:
|
||||
keywords = df['搜索词'].tolist()
|
||||
elif 'keyword' in df.columns:
|
||||
keywords = df['keyword'].tolist()
|
||||
else:
|
||||
raise ValueError('Excel文件中未找到列名"搜索词"或"keyword",请检查文件格式')
|
||||
|
||||
api = DailymotionAPI()
|
||||
|
||||
for keyword in keywords:
|
||||
logger.info(f'开始处理关键词: {keyword}')
|
||||
|
||||
# 加载进度
|
||||
progress = load_progress(keyword)
|
||||
current_page = progress['page']
|
||||
video_data = progress['video_data']
|
||||
|
||||
try:
|
||||
while True:
|
||||
# 搜索视频
|
||||
videos = search_videos(api, keyword, current_page)
|
||||
if not videos:
|
||||
break
|
||||
|
||||
# 处理视频信息
|
||||
results = process_videos_batch(api, videos, len(video_data))
|
||||
video_data.extend(results)
|
||||
|
||||
# 保存进度
|
||||
progress['page'] = current_page
|
||||
progress['video_data'] = video_data
|
||||
save_progress(keyword, progress)
|
||||
|
||||
logger.info(f'已处理 {len(video_data)} 个视频')
|
||||
current_page += 1
|
||||
|
||||
# 保存结果
|
||||
if video_data:
|
||||
output_file = os.path.join(OUTPUT_DIR, f'{keyword}_results.xlsx')
|
||||
save_results(video_data, output_file)
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f'处理关键词 {keyword} 出错: {str(e)}')
|
||||
continue
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f'程序执行出错: {str(e)}')
|
||||
finally:
|
||||
logger.info('程序执行完成')
|
||||
|
||||
if __name__ == '__main__':
|
||||
main()
|
648
web/dailymotion_com/main2.py
Normal file
@ -0,0 +1,648 @@
|
||||
import json
|
||||
import random
|
||||
import time
|
||||
import uuid
|
||||
|
||||
import pandas as pd
|
||||
import requests
|
||||
import os
|
||||
|
||||
from requests import RequestException
|
||||
|
||||
BASE_DIR = os.path.dirname(os.path.abspath(__file__))
|
||||
kw_path = os.path.join(BASE_DIR, 'data', 'keyword1.xlsx')
|
||||
headers1 = {
|
||||
'Accept': '*/*, */*',
|
||||
# 'Accept-Encoding': 'gzip, deflate, br',
|
||||
'Cache-Control': 'no-cache',
|
||||
'Connection': 'keep-alive',
|
||||
# 'Content-Length': '6237',
|
||||
'Content-Type': 'application/json, application/json',
|
||||
'Host': 'graphql.api.dailymotion.com',
|
||||
'Origin': 'https://www.dailymotion.com',
|
||||
'Referer': 'https://www.dailymotion.com/',
|
||||
'Sec-Fetch-Dest': 'empty',
|
||||
'Sec-Fetch-Mode': 'cors',
|
||||
'Sec-Fetch-Site': 'same-site',
|
||||
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/135.0.0.0 Safari/537.36',
|
||||
'X-DM-AppInfo-Id': 'com.dailymotion.neon',
|
||||
'X-DM-AppInfo-Type': 'website',
|
||||
'X-DM-AppInfo-Version': 'v2025-04-28T12:37:52.391Z',
|
||||
'X-DM-Neon-SSR': '0',
|
||||
'X-DM-Preferred-Country': 'us',
|
||||
'accept-language': 'zh-CN',
|
||||
'authorization': 'Bearer eyJ0eXAiOiJKV1QiLCJhbGciOiJIUzI1NiJ9.eyJhaWQiOiJmMWEzNjJkMjg4YzFiOTgwOTljNyIsInJvbCI6ImNhbi1tYW5hZ2UtcGFydG5lcnMtcmVwb3J0cyBjYW4tcmVhZC12aWRlby1zdHJlYW1zIGNhbi1zcG9vZi1jb3VudHJ5IGNhbi1hZG9wdC11c2VycyBjYW4tcmVhZC1jbGFpbS1ydWxlcyBjYW4tbWFuYWdlLWNsYWltLXJ1bGVzIGNhbi1tYW5hZ2UtdXNlci1hbmFseXRpY3MgY2FuLXJlYWQtbXktdmlkZW8tc3RyZWFtcyBjYW4tZG93bmxvYWQtbXktdmlkZW9zIGFjdC1hcyBhbGxzY29wZXMgYWNjb3VudC1jcmVhdG9yIGNhbi1yZWFkLWFwcGxpY2F0aW9ucyIsInNjbyI6InJlYWQgd3JpdGUgZGVsZXRlIGVtYWlsIHVzZXJpbmZvIGZlZWQgbWFuYWdlX3ZpZGVvcyBtYW5hZ2VfY29tbWVudHMgbWFuYWdlX3BsYXlsaXN0cyBtYW5hZ2VfdGlsZXMgbWFuYWdlX3N1YnNjcmlwdGlvbnMgbWFuYWdlX2ZyaWVuZHMgbWFuYWdlX2Zhdm9yaXRlcyBtYW5hZ2VfbGlrZXMgbWFuYWdlX2dyb3VwcyBtYW5hZ2VfcmVjb3JkcyBtYW5hZ2Vfc3VidGl0bGVzIG1hbmFnZV9mZWF0dXJlcyBtYW5hZ2VfaGlzdG9yeSBpZnR0dCByZWFkX2luc2lnaHRzIG1hbmFnZV9jbGFpbV9ydWxlcyBkZWxlZ2F0ZV9hY2NvdW50X21hbmFnZW1lbnQgbWFuYWdlX2FuYWx5dGljcyBtYW5hZ2VfcGxheWVyIG1hbmFnZV9wbGF5ZXJzIG1hbmFnZV91c2VyX3NldHRpbmdzIG1hbmFnZV9jb2xsZWN0aW9ucyBtYW5hZ2VfYXBwX2Nvbm5lY3Rpb25zIG1hbmFnZV9hcHBsaWNhdGlvbnMgbWFuYWdlX2RvbWFpbnMgbWFuYWdlX3BvZGNhc3RzIiwibHRvIjoiZVdGV1JTSkdXRVZjVGg0eEYyRWpWblFlTHdrdUhTVjVPMGdrWGciLCJhaW4iOjEsImFkZyI6MSwiaWF0IjoxNzQ2MjU3NzI1LCJleHAiOjE3NDYyOTM1NjgsImRtdiI6IjEiLCJhdHAiOiJicm93c2VyIiwiYWRhIjoid3d3LmRhaWx5bW90aW9uLmNvbSIsInZpZCI6IjY0NjMzRDAzMDY1RjQxODZBRDBCMDI3Q0Y3OTVFRjBGIiwiZnRzIjo5MTE0MSwiY2FkIjoyLCJjeHAiOjIsImNhdSI6Miwia2lkIjoiQUY4NDlERDczQTU4NjNDRDdEOTdEMEJBQjA3MjI0M0IifQ.bMzShOLIb6datC92qGPTRVCW9eINTYDFwLtqed2P1d4',
|
||||
'sec-ch-ua': '"Google Chrome";v="135", "Not-A.Brand";v="8", "Chromium";v="135"',
|
||||
'sec-ch-ua-mobile': '?0',
|
||||
'sec-ch-ua-platform': '"Windows"',
|
||||
'x-dm-visit-id': '1745971699160',
|
||||
'x-dm-visitor-id': '64633D03065F4186AD0B027CF795EF0F',
|
||||
}
|
||||
# proxies = None
|
||||
|
||||
proxies = {
|
||||
"http": 'http://127.0.0.1:7890',
|
||||
"https": 'http://127.0.0.1:7890',
|
||||
}
|
||||
|
||||
def post_with_retry(url, json_payload=None, data=None, headers=None, proxies=None,
|
||||
retries=3, timeout=10, backoff_factor=1):
|
||||
"""
|
||||
向指定 URL 发起 POST 请求,遇到网络错误时最多重试 `retries` 次。
|
||||
|
||||
:param url: 请求地址
|
||||
:param json_payload: 要发送的 JSON 体
|
||||
:param headers: 可选的请求头 dict
|
||||
:param proxies: 可选的代理 dict
|
||||
:param retries: 重试次数
|
||||
:param timeout: 单次请求超时(秒)
|
||||
:param backoff_factor: 重试间隔基数(会指数级增长)
|
||||
:return: requests.Response 对象
|
||||
:raises: 最后一次仍失败时抛出最后的异常
|
||||
"""
|
||||
attempt = 0
|
||||
|
||||
|
||||
while attempt < retries:
|
||||
try:
|
||||
if json_payload is not None:
|
||||
response = requests.post(
|
||||
url, json=json_payload, headers=headers, proxies=proxies, timeout=timeout
|
||||
)
|
||||
else:
|
||||
response = requests.post(
|
||||
url, data=data, headers=headers, proxies=proxies, timeout=timeout
|
||||
)
|
||||
response.raise_for_status()
|
||||
return response
|
||||
except RequestException as e:
|
||||
time.sleep(100)
|
||||
attempt += 1
|
||||
print(f"[{attempt}/{retries}] 请求失败: {e}")
|
||||
if attempt == retries:
|
||||
print("已达最大重试次数,抛出异常。")
|
||||
raise
|
||||
sleep_time = backoff_factor * (2 ** (attempt - 1))
|
||||
print(f"等待 {sleep_time} 秒后重试…")
|
||||
time.sleep(sleep_time)
|
||||
|
||||
|
||||
def red_keyword_info():
|
||||
df = pd.read_excel(kw_path, sheet_name=0)
|
||||
records = df.to_dict(orient='records')
|
||||
print(f"共 {len(records)} 行数据:")
|
||||
return records
|
||||
|
||||
|
||||
def gettoken():
|
||||
headers = {
|
||||
'Accept': '*/*',
|
||||
'Accept-Language': 'zh-CN,zh;q=0.9',
|
||||
'Cache-Control': 'no-cache',
|
||||
'Connection': 'keep-alive',
|
||||
'Content-Type': 'application/x-www-form-urlencoded',
|
||||
'Origin': 'https://www.dailymotion.com',
|
||||
'Pragma': 'no-cache',
|
||||
'Referer': 'https://www.dailymotion.com/',
|
||||
'Sec-Fetch-Dest': 'empty',
|
||||
'Sec-Fetch-Mode': 'cors',
|
||||
'Sec-Fetch-Site': 'same-site',
|
||||
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/135.0.0.0 Safari/537.36',
|
||||
'sec-ch-ua': '"Google Chrome";v="135", "Not-A.Brand";v="8", "Chromium";v="135"',
|
||||
'sec-ch-ua-mobile': '?0',
|
||||
'sec-ch-ua-platform': '"Windows"',
|
||||
}
|
||||
u = uuid.uuid4()
|
||||
uuid_with_dash = str(u)
|
||||
uuid_no_dash = u.hex
|
||||
data = {
|
||||
'client_id': 'f1a362d288c1b98099c7',
|
||||
'client_secret': 'eea605b96e01c796ff369935357eca920c5da4c5',
|
||||
'grant_type': 'client_credentials',
|
||||
'traffic_segment': '567786',
|
||||
'visitor_id': uuid_with_dash,
|
||||
}
|
||||
url = 'https://graphql.api.dailymotion.com/oauth/token'
|
||||
response = post_with_retry(url, headers=headers, data=data, proxies=proxies)
|
||||
token = response.json()['access_token']
|
||||
headers1['authorization'] = "Bearer " + token
|
||||
headers1['x-dm-visit-id'] = str(int(time.time() * 1000))
|
||||
headers1['x-dm-visitor-id'] = uuid_no_dash
|
||||
|
||||
|
||||
def get_searchInfo(keyword):
|
||||
video_list = []
|
||||
user_list = []
|
||||
for j in range(1, 3):
|
||||
# 别展开 = = !
|
||||
data = {
|
||||
"operationName": "SEARCH_QUERY",
|
||||
"variables": {
|
||||
"query": keyword,
|
||||
"shouldIncludeTopResults": True,
|
||||
"shouldIncludeChannels": False,
|
||||
"shouldIncludePlaylists": False,
|
||||
"shouldIncludeHashtags": False,
|
||||
"shouldIncludeVideos": False,
|
||||
"shouldIncludeLives": False,
|
||||
"page": j,
|
||||
"limit": 100,
|
||||
"recaptchaToken": None
|
||||
},
|
||||
"query": """
|
||||
fragment VIDEO_BASE_FRAGMENT on Video {
|
||||
id
|
||||
xid
|
||||
title
|
||||
createdAt
|
||||
duration
|
||||
aspectRatio
|
||||
thumbnail(height: PORTRAIT_240) {
|
||||
id
|
||||
url
|
||||
__typename
|
||||
}
|
||||
creator {
|
||||
id
|
||||
xid
|
||||
name
|
||||
displayName
|
||||
accountType
|
||||
avatar(height: SQUARE_60) {
|
||||
id
|
||||
url
|
||||
__typename
|
||||
}
|
||||
__typename
|
||||
}
|
||||
__typename
|
||||
}
|
||||
|
||||
fragment CHANNEL_BASE_FRAG on Channel {
|
||||
id
|
||||
xid
|
||||
name
|
||||
displayName
|
||||
accountType
|
||||
isFollowed
|
||||
avatar(height: SQUARE_120) {
|
||||
id
|
||||
url
|
||||
__typename
|
||||
}
|
||||
followerEngagement {
|
||||
id
|
||||
followDate
|
||||
__typename
|
||||
}
|
||||
metrics {
|
||||
id
|
||||
engagement {
|
||||
id
|
||||
followers {
|
||||
edges {
|
||||
node {
|
||||
id
|
||||
total
|
||||
__typename
|
||||
}
|
||||
__typename
|
||||
}
|
||||
__typename
|
||||
}
|
||||
__typename
|
||||
}
|
||||
__typename
|
||||
}
|
||||
__typename
|
||||
}
|
||||
|
||||
fragment PLAYLIST_BASE_FRAG on Collection {
|
||||
id
|
||||
xid
|
||||
name
|
||||
description
|
||||
thumbnail(height: PORTRAIT_240) {
|
||||
id
|
||||
url
|
||||
__typename
|
||||
}
|
||||
creator {
|
||||
id
|
||||
xid
|
||||
name
|
||||
displayName
|
||||
accountType
|
||||
avatar(height: SQUARE_60) {
|
||||
id
|
||||
url
|
||||
__typename
|
||||
}
|
||||
__typename
|
||||
}
|
||||
metrics {
|
||||
id
|
||||
engagement {
|
||||
id
|
||||
videos(filter: {visibility: {eq: PUBLIC}}) {
|
||||
edges {
|
||||
node {
|
||||
id
|
||||
total
|
||||
__typename
|
||||
}
|
||||
__typename
|
||||
}
|
||||
__typename
|
||||
}
|
||||
__typename
|
||||
}
|
||||
__typename
|
||||
}
|
||||
__typename
|
||||
}
|
||||
|
||||
fragment HASHTAG_BASE_FRAG on Hashtag {
|
||||
id
|
||||
xid
|
||||
name
|
||||
metrics {
|
||||
id
|
||||
engagement {
|
||||
id
|
||||
videos {
|
||||
edges {
|
||||
node {
|
||||
id
|
||||
total
|
||||
__typename
|
||||
}
|
||||
__typename
|
||||
}
|
||||
__typename
|
||||
}
|
||||
__typename
|
||||
}
|
||||
__typename
|
||||
}
|
||||
__typename
|
||||
}
|
||||
|
||||
fragment LIVE_BASE_FRAGMENT on Live {
|
||||
id
|
||||
xid
|
||||
title
|
||||
audienceCount
|
||||
aspectRatio
|
||||
isOnAir
|
||||
thumbnail(height: PORTRAIT_240) {
|
||||
id
|
||||
url
|
||||
__typename
|
||||
}
|
||||
creator {
|
||||
id
|
||||
xid
|
||||
name
|
||||
displayName
|
||||
accountType
|
||||
avatar(height: SQUARE_60) {
|
||||
id
|
||||
url
|
||||
__typename
|
||||
}
|
||||
__typename
|
||||
}
|
||||
__typename
|
||||
}
|
||||
|
||||
query SEARCH_QUERY($query: String!, $shouldIncludeTopResults: Boolean!, $shouldIncludeVideos: Boolean!, $shouldIncludeChannels: Boolean!, $shouldIncludePlaylists: Boolean!, $shouldIncludeHashtags: Boolean!, $shouldIncludeLives: Boolean!, $page: Int, $limit: Int, $sortByVideos: SearchVideoSort, $durationMinVideos: Int, $durationMaxVideos: Int, $createdAfterVideos: DateTime, $recaptchaToken: String) {
|
||||
search(token: $recaptchaToken) {
|
||||
id
|
||||
stories(query: $query, first: $limit, page: $page) @include(if: $shouldIncludeTopResults) {
|
||||
metadata {
|
||||
id
|
||||
algorithm {
|
||||
uuid
|
||||
__typename
|
||||
}
|
||||
__typename
|
||||
}
|
||||
pageInfo {
|
||||
hasNextPage
|
||||
nextPage
|
||||
__typename
|
||||
}
|
||||
edges {
|
||||
node {
|
||||
...VIDEO_BASE_FRAGMENT
|
||||
...CHANNEL_BASE_FRAG
|
||||
...PLAYLIST_BASE_FRAG
|
||||
...HASHTAG_BASE_FRAG
|
||||
...LIVE_BASE_FRAGMENT
|
||||
__typename
|
||||
}
|
||||
__typename
|
||||
}
|
||||
__typename
|
||||
}
|
||||
videos(
|
||||
query: $query
|
||||
first: $limit
|
||||
page: $page
|
||||
sort: $sortByVideos
|
||||
durationMin: $durationMinVideos
|
||||
durationMax: $durationMaxVideos
|
||||
createdAfter: $createdAfterVideos
|
||||
) @include(if: $shouldIncludeVideos) {
|
||||
metadata {
|
||||
id
|
||||
algorithm {
|
||||
uuid
|
||||
__typename
|
||||
}
|
||||
__typename
|
||||
}
|
||||
pageInfo {
|
||||
hasNextPage
|
||||
nextPage
|
||||
__typename
|
||||
}
|
||||
edges {
|
||||
node {
|
||||
id
|
||||
...VIDEO_BASE_FRAGMENT
|
||||
__typename
|
||||
}
|
||||
__typename
|
||||
}
|
||||
__typename
|
||||
}
|
||||
lives(query: $query, first: $limit, page: $page) @include(if: $shouldIncludeLives) {
|
||||
metadata {
|
||||
id
|
||||
algorithm {
|
||||
uuid
|
||||
__typename
|
||||
}
|
||||
__typename
|
||||
}
|
||||
pageInfo {
|
||||
hasNextPage
|
||||
nextPage
|
||||
__typename
|
||||
}
|
||||
edges {
|
||||
node {
|
||||
id
|
||||
...LIVE_BASE_FRAGMENT
|
||||
__typename
|
||||
}
|
||||
__typename
|
||||
}
|
||||
__typename
|
||||
}
|
||||
channels(query: $query, first: $limit, page: $page) @include(if: $shouldIncludeChannels) {
|
||||
metadata {
|
||||
id
|
||||
algorithm {
|
||||
uuid
|
||||
__typename
|
||||
}
|
||||
__typename
|
||||
}
|
||||
pageInfo {
|
||||
hasNextPage
|
||||
nextPage
|
||||
__typename
|
||||
}
|
||||
edges {
|
||||
node {
|
||||
id
|
||||
...CHANNEL_BASE_FRAG
|
||||
__typename
|
||||
}
|
||||
__typename
|
||||
}
|
||||
__typename
|
||||
}
|
||||
playlists: collections(query: $query, first: $limit, page: $page) @include(if: $shouldIncludePlaylists) {
|
||||
metadata {
|
||||
id
|
||||
algorithm {
|
||||
uuid
|
||||
__typename
|
||||
}
|
||||
__typename
|
||||
}
|
||||
pageInfo {
|
||||
hasNextPage
|
||||
nextPage
|
||||
__typename
|
||||
}
|
||||
edges {
|
||||
node {
|
||||
id
|
||||
...PLAYLIST_BASE_FRAG
|
||||
__typename
|
||||
}
|
||||
__typename
|
||||
}
|
||||
__typename
|
||||
}
|
||||
hashtags(query: $query, first: $limit, page: $page) @include(if: $shouldIncludeHashtags) {
|
||||
metadata {
|
||||
id
|
||||
algorithm {
|
||||
uuid
|
||||
__typename
|
||||
}
|
||||
__typename
|
||||
}
|
||||
pageInfo {
|
||||
hasNextPage
|
||||
nextPage
|
||||
__typename
|
||||
}
|
||||
edges {
|
||||
node {
|
||||
id
|
||||
...HASHTAG_BASE_FRAG
|
||||
__typename
|
||||
}
|
||||
__typename
|
||||
}
|
||||
__typename
|
||||
}
|
||||
__typename
|
||||
}
|
||||
}
|
||||
"""
|
||||
}
|
||||
gettoken()
|
||||
response = post_with_retry(
|
||||
"https://graphql.api.dailymotion.com/",
|
||||
json_payload=data,
|
||||
headers=headers1,
|
||||
proxies=proxies
|
||||
)
|
||||
|
||||
jsondata = response.json()
|
||||
try:
|
||||
resinfo = jsondata['data']['search']['stories']['edges']
|
||||
print('resinfo :', len(resinfo))
|
||||
except Exception:
|
||||
resinfo = []
|
||||
ValueError("返回字段解析错误!")
|
||||
for index, iteminfo in enumerate(resinfo):
|
||||
calculated_index = index + 1 + (j - 1) * 100
|
||||
print(calculated_index)
|
||||
node = iteminfo['node']
|
||||
__typename = node['__typename']
|
||||
if __typename == "Video":
|
||||
xid = node.get('xid')
|
||||
v_info = get_videoInfo(xid)
|
||||
time.sleep(3)
|
||||
video_list.append({
|
||||
"index": calculated_index,
|
||||
"id": node.get('id'),
|
||||
"xid": xid,
|
||||
"link": "https://www.dailymotion.com/video/" + xid,
|
||||
"title": node.get('title'),
|
||||
"createtime": node.get('createdAt'),
|
||||
"duration": node.get('duration'),
|
||||
"pic": node.get('thumbnail').get('url'),
|
||||
"view": v_info['view'],
|
||||
"fans": v_info['fans'],
|
||||
"videos": v_info['videos']
|
||||
})
|
||||
elif __typename == "Channel":
|
||||
user_list.append({
|
||||
'index': calculated_index,
|
||||
'id': node['id'],
|
||||
'xid': node['xid'],
|
||||
'name': node['name'],
|
||||
'upic': node['avatar']['url']
|
||||
})
|
||||
else:
|
||||
continue
|
||||
|
||||
time.sleep(15)
|
||||
return video_list, user_list
|
||||
|
||||
|
||||
def get_videoInfo(x_id, r=3):
|
||||
payload = {
|
||||
"operationName": "WATCHING_VIDEO",
|
||||
"variables": {
|
||||
"xid": x_id,
|
||||
"isSEO": False
|
||||
},
|
||||
"query": "fragment VIDEO_FRAGMENT on Video {\n id\n xid\n isPublished\n duration\n title\n description\n thumbnailx60: thumbnailURL(size: \"x60\")\n thumbnailx120: thumbnailURL(size: \"x120\")\n thumbnailx240: thumbnailURL(size: \"x240\")\n thumbnailx360: thumbnailURL(size: \"x360\")\n thumbnailx480: thumbnailURL(size: \"x480\")\n thumbnailx720: thumbnailURL(size: \"x720\")\n thumbnailx1080: thumbnailURL(size: \"x1080\")\n aspectRatio\n category\n categories(filter: {category: {eq: CONTENT_CATEGORY}}) {\n edges {\n node { id name slug __typename }\n __typename\n }\n __typename\n }\n iab_categories: categories(\n filter: {category: {eq: IAB_CATEGORY}, percentage: {gte: 70}}\n ) {\n edges {\n node { id slug __typename }\n __typename\n }\n __typename\n }\n bestAvailableQuality\n createdAt\n viewerEngagement {\n id\n liked\n favorited\n __typename\n }\n isPrivate\n isWatched\n isCreatedForKids\n isExplicit\n canDisplayAds\n videoWidth: width\n videoHeight: height\n status\n hashtags {\n edges {\n node { id name __typename }\n __typename\n }\n __typename\n }\n stats {\n id\n views { id total __typename }\n __typename\n }\n channel {\n __typename\n id\n xid\n name\n displayName\n isArtist\n logoURLx25: logoURL(size: \"x25\")\n logoURL(size: \"x60\")\n isFollowed\n accountType\n coverURLx375: coverURL(size: \"x375\")\n stats {\n id\n views { id total __typename }\n followers { id total __typename }\n videos { id total __typename }\n __typename\n }\n country { id codeAlpha2 __typename }\n organization @skip(if: $isSEO) {\n id\n xid\n owner { id xid __typename }\n __typename\n }\n }\n language { id codeAlpha2 __typename }\n tags {\n edges {\n node { id label __typename }\n __typename\n }\n __typename\n }\n moderation { id reviewedAt __typename }\n topics(whitelistedOnly: true, first: 3, page: 1) {\n edges {\n node {\n id\n xid\n name\n names {\n edges {\n node {\n id\n name\n language { id codeAlpha2 __typename }\n __typename\n }\n __typename\n }\n __typename\n }\n __typename\n }\n __typename\n }\n __typename\n }\n geoblockedCountries {\n id\n allowed\n denied\n __typename\n }\n transcript {\n edges {\n node { id timecode text __typename }\n __typename\n }\n __typename\n }\n __typename\n}\n\nfragment LIVE_FRAGMENT on Live {\n id\n xid\n startAt\n endAt\n isPublished\n title\n description\n thumbnailx60: thumbnailURL(size: \"x60\")\n thumbnailx120: thumbnailURL(size: \"x120\")\n thumbnailx240: thumbnailURL(size: \"x240\")\n thumbnailx360: thumbnailURL(size: \"x360\")\n thumbnailx480: thumbnailURL(size: \"x480\")\n thumbnailx720: thumbnailURL(size: \"x720\")\n thumbnailx1080: thumbnailURL(size: \"x1080\")\n aspectRatio\n category\n createdAt\n viewerEngagement { id liked favorited __typename }\n isPrivate\n isExplicit\n isCreatedForKids\n bestAvailableQuality\n canDisplayAds\n videoWidth: width\n videoHeight: height\n stats { id views { id total __typename } __typename }\n channel {\n __typename\n id\n xid\n name\n displayName\n isArtist\n logoURLx25: logoURL(size: \"x25\")\n logoURL(size: \"x60\")\n isFollowed\n accountType\n coverURLx375: coverURL(size: \"x375\")\n stats { id views { id total __typename } followers { id total __typename } videos { id total __typename } __typename }\n country { id codeAlpha2 __typename }\n organization @skip(if: $isSEO) { id xid owner { id xid __typename } __typename }\n }\n language { id codeAlpha2 __typename }\n tags { edges { node { id label __typename } __typename } __typename }\n moderation { id reviewedAt __typename }\n topics(whitelistedOnly: true, first: 3, page: 1) {\n edges { node { id xid name names { edges { node { id name language { id codeAlpha2 __typename } __typename } __typename } __typename } __typename } __typename }\n __typename\n }\n geoblockedCountries { id allowed denied __typename }\n __typename\n}\n\nquery WATCHING_VIDEO($xid: String!, $isSEO: Boolean!) {\n video: media(xid: $xid) {\n __typename\n ... on Video { id ...VIDEO_FRAGMENT __typename }\n ... on Live { id ...LIVE_FRAGMENT __typename }\n }\n}"
|
||||
}
|
||||
url = 'https://graphql.api.dailymotion.com/'
|
||||
|
||||
response = post_with_retry(
|
||||
url,
|
||||
json_payload=payload,
|
||||
headers=headers1,
|
||||
proxies=proxies,
|
||||
)
|
||||
jsondata = response.json()
|
||||
try:
|
||||
v_info = jsondata['data']['video']['channel']['stats']
|
||||
except Exception:
|
||||
if r > 0:
|
||||
return get_videoInfo(x_id=x_id, r=r - 1)
|
||||
else:
|
||||
return {
|
||||
"view": '-',
|
||||
"fans": '-',
|
||||
"videos": '-',
|
||||
}
|
||||
return {
|
||||
"view": v_info['views']['total'],
|
||||
"fans": v_info['followers']['total'],
|
||||
"videos": v_info['videos']['total'],
|
||||
}
|
||||
|
||||
|
||||
def integrate_data():
|
||||
keyword_list = red_keyword_info()
|
||||
for key_word_item in keyword_list:
|
||||
gettoken()
|
||||
Video_PD_DATA = {
|
||||
"片名": [],
|
||||
"搜索词": [],
|
||||
"ID": [],
|
||||
"xid": [],
|
||||
"连接地址": [],
|
||||
"标题": [],
|
||||
"时长 (秒)": [],
|
||||
"关注数": [],
|
||||
"视频数": [],
|
||||
"浏览数": [],
|
||||
"添加时间": [],
|
||||
"封面图片": [],
|
||||
"Index": [],
|
||||
}
|
||||
User_PD_DATA = {
|
||||
"片名": [],
|
||||
"搜索词": [],
|
||||
"ID": [],
|
||||
"xid": [],
|
||||
"名称": [],
|
||||
"头像": [],
|
||||
"Index": [],
|
||||
}
|
||||
film_name = key_word_item['片名']
|
||||
key_word = key_word_item['搜索词']
|
||||
print(key_word)
|
||||
v_list, u_list = get_searchInfo(key_word)
|
||||
if len(v_list) < 1 and len(u_list) < 1:
|
||||
i = 0
|
||||
while i < 3:
|
||||
time.sleep(i * 5)
|
||||
v_list, u_list = get_searchInfo(key_word)
|
||||
if len(v_list) > 1 or len(u_list) > 1:
|
||||
print(len(v_list), len(u_list))
|
||||
break
|
||||
i += 1
|
||||
time.sleep(2)
|
||||
for item in v_list:
|
||||
Video_PD_DATA['片名'].append(film_name)
|
||||
Video_PD_DATA['搜索词'].append(key_word)
|
||||
Video_PD_DATA['ID'].append(item.get('id'))
|
||||
Video_PD_DATA['xid'].append(item.get('xid'))
|
||||
Video_PD_DATA['连接地址'].append(item.get('link'))
|
||||
Video_PD_DATA['标题'].append(item.get('title'))
|
||||
Video_PD_DATA['时长 (秒)'].append(item.get('duration'))
|
||||
Video_PD_DATA['关注数'].append(item.get('fans'))
|
||||
Video_PD_DATA['视频数'].append(item.get('videos'))
|
||||
Video_PD_DATA['浏览数'].append(item.get('view'))
|
||||
Video_PD_DATA['添加时间'].append(item.get('createtime'))
|
||||
Video_PD_DATA['封面图片'].append(item.get('pic'))
|
||||
Video_PD_DATA['Index'].append(item.get('index'))
|
||||
for item in u_list:
|
||||
User_PD_DATA['片名'].append(film_name)
|
||||
User_PD_DATA['搜索词'].append(key_word)
|
||||
User_PD_DATA['ID'].append(item.get('id'))
|
||||
User_PD_DATA['xid'].append(item.get('xid'))
|
||||
User_PD_DATA['名称'].append(item.get('name'))
|
||||
User_PD_DATA['头像'].append(item.get('upic'))
|
||||
User_PD_DATA['Index'].append(item.get('index'))
|
||||
|
||||
df_vido = pd.DataFrame(Video_PD_DATA)
|
||||
df_user = pd.DataFrame(User_PD_DATA)
|
||||
|
||||
output_path = "out_put_CNTW/{}_{}.xlsx".format(film_name, key_word)
|
||||
|
||||
with pd.ExcelWriter(output_path, engine="openpyxl") as w:
|
||||
df_vido.to_excel(w, sheet_name="视频信息", index=False)
|
||||
df_user.to_excel(w, sheet_name="用户信息", index=False)
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
# gettoken()
|
||||
integrate_data()
|
||||
# print(get_searchInfo('Running Man'))
|
37
web/dailymotion_com/merge_video_user_data.py
Normal file
@ -0,0 +1,37 @@
|
||||
import os
|
||||
import pandas as pd
|
||||
|
||||
video_data_list = []
|
||||
user_data_list = []
|
||||
|
||||
folder_path = "out_put_US"
|
||||
|
||||
for filename in os.listdir(folder_path):
|
||||
if filename.endswith(".xlsx"):
|
||||
file_path = os.path.join(folder_path, filename)
|
||||
|
||||
try:
|
||||
video_df = pd.read_excel(file_path, sheet_name="视频信息")
|
||||
user_df = pd.read_excel(file_path, sheet_name="用户信息")
|
||||
|
||||
# 正确添加 来源文件列,不改动原来的 Index
|
||||
video_df["来源文件"] = filename
|
||||
user_df["来源文件"] = filename
|
||||
|
||||
video_data_list.append(video_df)
|
||||
user_data_list.append(user_df)
|
||||
|
||||
except Exception as e:
|
||||
print(f"❌ 读取失败: {filename}, 错误信息: {e}")
|
||||
|
||||
# 合并
|
||||
all_video_df = pd.concat(video_data_list, ignore_index=True)
|
||||
all_user_df = pd.concat(user_data_list, ignore_index=True)
|
||||
|
||||
# 写入一个Excel文件中两个Sheet
|
||||
output_path = "合并视频用户信息.xlsx"
|
||||
with pd.ExcelWriter(output_path, engine="openpyxl") as writer:
|
||||
all_video_df.to_excel(writer, sheet_name="视频信息", index=False)
|
||||
all_user_df.to_excel(writer, sheet_name="用户信息", index=False)
|
||||
|
||||
print(f"✅ 合并完成,文件保存为:{output_path}")
|
0
web/dailymotion_com/out_put_CNTW/你的天空 特别篇_Your Sky
Normal file
0
web/dailymotion_com/out_put_CNTW/恋爱学园_Boys in Love
Normal file
0
web/dailymotion_com/out_put_US/恋爱学园_Boys in Love
Normal file
96
web/dailymotion_com/read_excel_.py
Normal file
@ -0,0 +1,96 @@
|
||||
import os
|
||||
import pandas as pd
|
||||
|
||||
folder_path = "out_put_CNTW"
|
||||
output_path = "out_put_CNTW.xlsx"
|
||||
|
||||
def convert_to_east8(dt):
|
||||
try:
|
||||
dt = pd.to_datetime(dt, errors='coerce', utc=True)
|
||||
return dt.tz_convert("Asia/Shanghai") if pd.notna(dt) else None
|
||||
except Exception:
|
||||
return None
|
||||
|
||||
def safe_format_datetime(val):
|
||||
if pd.isna(val):
|
||||
return ""
|
||||
try:
|
||||
return val.tz_localize(None).strftime("%Y-%m-%d %H:%M:%S")
|
||||
except Exception:
|
||||
return ""
|
||||
|
||||
def format_duration(seconds):
|
||||
try:
|
||||
seconds = int(seconds)
|
||||
minutes = seconds // 60
|
||||
remain = seconds % 60
|
||||
return f"{minutes}:{remain:02d}"
|
||||
except Exception:
|
||||
return ""
|
||||
|
||||
merged_list = []
|
||||
|
||||
for file in os.listdir(folder_path):
|
||||
if file.endswith(".xlsx"):
|
||||
path = os.path.join(folder_path, file)
|
||||
try:
|
||||
video_df = pd.read_excel(path, sheet_name="视频信息")
|
||||
user_df = pd.read_excel(path, sheet_name="用户信息")
|
||||
|
||||
# 字段改名 & 添加来源 & 类型标记
|
||||
video_df = video_df.rename(columns={"xid": "v_xid"})
|
||||
user_df = user_df.rename(columns={"xid": "u_xid"})
|
||||
|
||||
video_df["xid"] = video_df["v_xid"]
|
||||
user_df["xid"] = user_df["u_xid"]
|
||||
|
||||
video_df["来源文件"] = file
|
||||
user_df["来源文件"] = file
|
||||
|
||||
video_df["数据类型"] = "视频"
|
||||
user_df["数据类型"] = "用户"
|
||||
|
||||
# 视频专属字段处理
|
||||
video_df["添加时间"] = video_df["添加时间"].apply(convert_to_east8)
|
||||
video_df["添加时间"] = video_df["添加时间"].apply(safe_format_datetime)
|
||||
video_df["时长 (秒)"] = video_df["时长 (秒)"].apply(format_duration)
|
||||
|
||||
# 统一添加空字段
|
||||
for col in ["操作员", "是否盗版", "投诉时间", "历史状态", "是否重复", "重复对象"]:
|
||||
video_df[col] = ""
|
||||
user_df[col] = ""
|
||||
|
||||
# 合并当前文件的视频+用户,并按 Index 排序
|
||||
combined = pd.concat([video_df, user_df], ignore_index=True, sort=False)
|
||||
if "Index" in combined.columns:
|
||||
combined = combined.sort_values(by="Index", ignore_index=True)
|
||||
|
||||
merged_list.append(combined)
|
||||
|
||||
except Exception as e:
|
||||
print(f"❌ 读取失败: {file} 错误: {e}")
|
||||
|
||||
merged_df = pd.concat(merged_list, ignore_index=True, sort=False)
|
||||
|
||||
# 添加序号
|
||||
merged_df.insert(0, "序号", range(1, len(merged_df) + 1))
|
||||
|
||||
# ========= 重复识别 ==========
|
||||
if "xid" in merged_df.columns:
|
||||
xid_rows = merged_df[merged_df["xid"].notna()]
|
||||
for xid_val, group in xid_rows.groupby("xid"):
|
||||
if len(group) > 1:
|
||||
serials = group["序号"].tolist()
|
||||
for idx, row in group.iterrows():
|
||||
others = [str(s) for s in serials if s != row["序号"]]
|
||||
merged_df.at[idx, "是否重复"] = "√"
|
||||
merged_df.at[idx, "重复对象"] = ",".join(others)
|
||||
|
||||
# 删除 v_xid/u_xid 列(可选)
|
||||
merged_df.drop(columns=["v_xid", "u_xid"], inplace=True, errors='ignore')
|
||||
|
||||
# ========= 导出结果 ==========
|
||||
with pd.ExcelWriter(output_path, engine="openpyxl") as writer:
|
||||
merged_df.to_excel(writer, sheet_name="合并信息", index=False)
|
||||
|
||||
print(f"✅ 合并完成,保存为:{output_path}")
|
240
web/dailymotion_com/report.py
Normal file
@ -0,0 +1,240 @@
|
||||
import time
|
||||
import json
|
||||
import redis
|
||||
import requests
|
||||
import urllib3
|
||||
from requests.adapters import HTTPAdapter
|
||||
from urllib3.util.retry import Retry
|
||||
from typing import Optional, Dict, Any, Union
|
||||
|
||||
class HttpClient:
|
||||
def __init__(self, max_retries: int = 3, backoff_factor: float = 0.5):
|
||||
self.session = requests.Session()
|
||||
# 配置重试策略
|
||||
retry_strategy = Retry(
|
||||
total=max_retries,
|
||||
backoff_factor=backoff_factor,
|
||||
status_forcelist=[500, 502, 503, 504, 429]
|
||||
)
|
||||
|
||||
adapter = HTTPAdapter(max_retries=retry_strategy)
|
||||
self.session.mount("http://", adapter)
|
||||
self.session.mount("https://", adapter)
|
||||
|
||||
def request(self,
|
||||
method: str,
|
||||
url: str,
|
||||
headers: Optional[Dict] = None,
|
||||
params: Optional[Dict] = None,
|
||||
data: Optional[Union[Dict, str]] = None,
|
||||
cookies: Optional[Dict] = None,
|
||||
allow_redirects: bool = True,
|
||||
timeout: int = 30,
|
||||
**kwargs) -> requests.Response:
|
||||
try:
|
||||
response = self.session.request(
|
||||
method=method,
|
||||
url=url,
|
||||
headers=headers,
|
||||
params=params,
|
||||
data=data,
|
||||
cookies=cookies,
|
||||
allow_redirects=allow_redirects,
|
||||
timeout=timeout,
|
||||
**kwargs
|
||||
)
|
||||
response.raise_for_status()
|
||||
return response
|
||||
except requests.exceptions.RequestException as e:
|
||||
print(f"请求失败: {url}, 错误: {str(e)}")
|
||||
raise
|
||||
|
||||
def get(self, url: str, **kwargs) -> requests.Response:
|
||||
return self.request("GET", url, **kwargs)
|
||||
|
||||
def post(self, url: str, **kwargs) -> requests.Response:
|
||||
return self.request("POST", url, **kwargs)
|
||||
|
||||
# 创建全局的 HTTP 客户端实例
|
||||
http_client = HttpClient()
|
||||
|
||||
_REDIS_CONF = {
|
||||
"host": "192.144.230.75",
|
||||
"port": 6379,
|
||||
"password": "qwert@$123!&",
|
||||
"decode_responses": True,
|
||||
"db": 1,
|
||||
}
|
||||
|
||||
def save_report_token(key_name: str, json_data: dict):
|
||||
r = redis.Redis(**_REDIS_CONF)
|
||||
key = key_name
|
||||
json_str = json.dumps(json_data, ensure_ascii=False)
|
||||
r.set(key, json_str)
|
||||
print(f"已在 Redis(DB {_REDIS_CONF['db']}) 中写入 key -> {key}")
|
||||
|
||||
|
||||
def get_report_token(key_name: str):
|
||||
r = redis.Redis(**_REDIS_CONF)
|
||||
key = key_name
|
||||
json_str = r.get(key)
|
||||
if not json_str:
|
||||
return None
|
||||
return json.loads(json_str)
|
||||
|
||||
|
||||
def login():
|
||||
try:
|
||||
headers = {
|
||||
"Accept": "*/*",
|
||||
"Accept-Language": "zh-CN,zh;q=0.9",
|
||||
"Cache-Control": "no-cache",
|
||||
"Connection": "keep-alive",
|
||||
"Content-Type": "application/x-www-form-urlencoded",
|
||||
"Origin": "https://www.dailymotion.com",
|
||||
"Pragma": "no-cache",
|
||||
"Referer": "https://www.dailymotion.com/",
|
||||
"Sec-Fetch-Dest": "empty",
|
||||
"Sec-Fetch-Mode": "cors",
|
||||
"Sec-Fetch-Site": "same-site",
|
||||
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/136.0.0.0 Safari/537.36 Edg/136.0.0.0",
|
||||
"sec-ch-ua": "\"Chromium\";v=\"136\", \"Microsoft Edge\";v=\"136\", \"Not.A/Brand\";v=\"99\"",
|
||||
"sec-ch-ua-mobile": "?0",
|
||||
"sec-ch-ua-platform": "\"Windows\""
|
||||
}
|
||||
url = "https://graphql.api.dailymotion.com/oauth/token"
|
||||
data = {
|
||||
"client_id": "f1a362d288c1b98099c7",
|
||||
"client_secret": "eea605b96e01c796ff369935357eca920c5da4c5",
|
||||
"grant_type": "password",
|
||||
"username": "copyright@qiyi.com",
|
||||
"password": "ppsIQIYI2018@",
|
||||
"scope": "userinfo,email,manage_subscriptions,manage_history,manage_likes,manage_playlists,manage_videos",
|
||||
"version": "2",
|
||||
"traffic_segment": "962042",
|
||||
"visitor_id": "359703fb-66c2-43d2-bd0d-b1cac9c7ae8a"
|
||||
}
|
||||
response = http_client.post(url, headers=headers, data=data)
|
||||
data = {
|
||||
"update_time": int(time.time()),
|
||||
"username": "copyright@qiyi.com",
|
||||
"password": "ppsIQIYI2018@",
|
||||
"token": response.json()
|
||||
}
|
||||
save_report_token('token', data)
|
||||
return data
|
||||
except Exception as e:
|
||||
print(f"登录失败: {str(e)}")
|
||||
raise
|
||||
|
||||
def get_cookies(access_token: str, refresh_token: str):
|
||||
try:
|
||||
cookies = {
|
||||
"access_token": access_token,
|
||||
"refresh_token": refresh_token,
|
||||
}
|
||||
url = "https://www.dailymotion.com/cookie/refresh_token"
|
||||
http_client.post(url, cookies=cookies, allow_redirects=True)
|
||||
except Exception as e:
|
||||
print(f"刷新 cookie 失败: {str(e)}")
|
||||
raise
|
||||
|
||||
def get_cookies1(access_token: str, refresh_token: str):
|
||||
"""302 跳转"""
|
||||
try:
|
||||
cookies = {
|
||||
"access_token": access_token,
|
||||
"refresh_token": refresh_token,
|
||||
}
|
||||
url = "https://www.dailymotion.com/zendesk"
|
||||
params = {
|
||||
"return_to": "https://faq.dailymotion.com/hc/en-us/requests/new",
|
||||
"timestamp": str(int(time.time())),
|
||||
}
|
||||
response = http_client.get(url, cookies=cookies, params=params, allow_redirects=True)
|
||||
cookies_dict = {"update_time": int(time.time()), "cookies": dict(http_client.session.cookies)}
|
||||
save_report_token('cookies', cookies_dict)
|
||||
return cookies_dict
|
||||
except Exception as e:
|
||||
print(f"获取 cookies 失败: {str(e)}")
|
||||
raise
|
||||
|
||||
def get_csrftoken():
|
||||
try:
|
||||
url = "https://faq.dailymotion.com/hc/api/internal/csrf_token.json"
|
||||
response = http_client.get(url)
|
||||
data = {"update_time": int(time.time()), "csrf_token": response.json()}
|
||||
save_report_token('csrf_token', data)
|
||||
return data
|
||||
except Exception as e:
|
||||
print(f"获取 CSRF token 失败: {str(e)}")
|
||||
raise
|
||||
|
||||
def report(csrf_token:str, cookies:dict):
|
||||
try:
|
||||
headers = {
|
||||
"Accept": "*/*",
|
||||
"Accept-Language": "zh-CN,zh;q=0.9",
|
||||
"Cache-Control": "no-cache",
|
||||
"Connection": "keep-alive",
|
||||
"Content-Type": "application/x-www-form-urlencoded; charset=UTF-8",
|
||||
"Origin": "https://faq.dailymotion.com",
|
||||
"Pragma": "no-cache",
|
||||
"Referer": "https://faq.dailymotion.com/hc/en-us/requests/new",
|
||||
"Sec-Fetch-Dest": "empty",
|
||||
"Sec-Fetch-Mode": "cors",
|
||||
"Sec-Fetch-Site": "same-origin",
|
||||
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/136.0.0.0 Safari/537.36 Edg/136.0.0.0",
|
||||
"sec-ch-ua": "\"Chromium\";v=\"136\", \"Microsoft Edge\";v=\"136\", \"Not.A/Brand\";v=\"99\"",
|
||||
"sec-ch-ua-mobile": "?0",
|
||||
"sec-ch-ua-platform": "\"Windows\"",
|
||||
"X-CSRF-Token": csrf_token
|
||||
}
|
||||
data = {
|
||||
"request[subject]": "版权投诉",
|
||||
"request[description]": "请删除侵权视频",
|
||||
"request[email]": "copyright@qiyi.com",
|
||||
"request[ticket_form_id]": "360000717219"
|
||||
}
|
||||
response = http_client.post('https://faq.dailymotion.com/hc/en-us/requests',
|
||||
cookies=cookies,
|
||||
headers=headers,
|
||||
data=data)
|
||||
return response.status_code == 200
|
||||
except Exception as e:
|
||||
print(f"提交报告失败: {str(e)}")
|
||||
raise
|
||||
|
||||
def prepare_data():
|
||||
try:
|
||||
token = get_report_token('token')
|
||||
cookies = get_report_token('cookies')
|
||||
csrf_token = get_report_token('csrf_token')
|
||||
|
||||
min_update_time = min(d.get('update_time', 0) for d in (token, cookies, csrf_token) if d)
|
||||
if not min_update_time or min_update_time + (24 * 60 * 60) < time.time():
|
||||
token = login()
|
||||
if not token:
|
||||
raise Exception("登录失败")
|
||||
|
||||
access_token = token['token']['access_token']
|
||||
refresh_token = token['token']['refresh_token']
|
||||
|
||||
get_cookies(access_token, refresh_token)
|
||||
cookies = get_cookies1(access_token, refresh_token)
|
||||
csrf_token = get_csrftoken()
|
||||
|
||||
if not all([cookies, csrf_token]):
|
||||
raise Exception("获取 cookies 或 csrf_token 失败")
|
||||
|
||||
if not all([token, cookies, csrf_token]):
|
||||
raise Exception("获取令牌失败")
|
||||
|
||||
success = report(csrf_token['csrf_token']['current_session']['csrf_token'], cookies['cookies'])
|
||||
if not success:
|
||||
raise Exception("提交投诉失败")
|
||||
|
||||
except Exception as e:
|
||||
print(f"处理数据失败: {str(e)}")
|
||||
raise
|
||||
|
66
web/dailymotion_com/utils/http_client.py
Normal file
@ -0,0 +1,66 @@
|
||||
import time
|
||||
import requests
|
||||
from requests.adapters import HTTPAdapter
|
||||
from requests.packages.urllib3.util.retry import Retry
|
||||
from typing import Optional, Dict, Any, Union
|
||||
|
||||
class HttpClient:
|
||||
def __init__(self, max_retries: int = 3, backoff_factor: float = 0.5):
|
||||
self.session = requests.Session()
|
||||
# 配置重试策略
|
||||
retry_strategy = Retry(
|
||||
total=max_retries,
|
||||
backoff_factor=backoff_factor,
|
||||
status_forcelist=[500, 502, 503, 504, 429] # 这些状态码会触发重试
|
||||
)
|
||||
|
||||
# 将重试策略应用到 session
|
||||
adapter = HTTPAdapter(max_retries=retry_strategy)
|
||||
self.session.mount("http://", adapter)
|
||||
self.session.mount("https://", adapter)
|
||||
|
||||
def request(self,
|
||||
method: str,
|
||||
url: str,
|
||||
headers: Optional[Dict] = None,
|
||||
params: Optional[Dict] = None,
|
||||
data: Optional[Union[Dict, str]] = None,
|
||||
cookies: Optional[Dict] = None,
|
||||
allow_redirects: bool = True,
|
||||
timeout: int = 30,
|
||||
**kwargs) -> requests.Response:
|
||||
"""
|
||||
通用请求方法
|
||||
"""
|
||||
try:
|
||||
response = self.session.request(
|
||||
method=method,
|
||||
url=url,
|
||||
headers=headers,
|
||||
params=params,
|
||||
data=data,
|
||||
cookies=cookies,
|
||||
allow_redirects=allow_redirects,
|
||||
timeout=timeout,
|
||||
**kwargs
|
||||
)
|
||||
response.raise_for_status()
|
||||
return response
|
||||
except requests.exceptions.RequestException as e:
|
||||
print(f"请求失败: {url}, 错误: {str(e)}")
|
||||
raise
|
||||
|
||||
def get(self, url: str, **kwargs) -> requests.Response:
|
||||
"""
|
||||
GET 请求封装
|
||||
"""
|
||||
return self.request("GET", url, **kwargs)
|
||||
|
||||
def post(self, url: str, **kwargs) -> requests.Response:
|
||||
"""
|
||||
POST 请求封装
|
||||
"""
|
||||
return self.request("POST", url, **kwargs)
|
||||
|
||||
# 创建一个全局的 HttpClient 实例
|
||||
http_client = HttpClient()
|
116
web/doordash/main.py
Normal file
@ -0,0 +1,116 @@
|
||||
import requests
|
||||
import json
|
||||
|
||||
class DoorDash:
|
||||
def __init__(self):
|
||||
pass
|
||||
|
||||
def parse(self):
|
||||
|
||||
cookies = {
|
||||
'__cf_bm': 'DeiqM0wBQUTk.n88hE77uEZ.WfLdmDCCAjcUeUD895Y-1741189317-1.0.1.1-A.cPG7yAL2gQ9ErX9iHLETCB16qpewadnG4ad5tE1vwGMbt1GJ1eYZvmeQVGHdwH1lfpC4_OG2_Zqbv4.pE53jLMuzQNib75mRrYI50e52IXcW8a5_UcRHI5CYACnLLf',
|
||||
'__cfwaitingroom': 'Chg0U0ZnUjFhbXBOQjJrMWFXdzUrNEJBPT0SgAJySXhTYlJublhNTEhJejhpbVJkV01vd0YrblBDMGVMdDdXSGhIYkprYjVUR2E2bytPcGgzeWZKZE9kUHdISDJIWTdjR01jZ3NqcEJycUxqVHB2REtKVlN3aEx5c09yU0ltbkVWMHFjcGd3RVBULzhuWFR6RDVNZlVXeHp3clgrS0NVaWZ4eGN1QVRpUWI0aENWUDV6Z2g0Uk1jbUFDUVA5NEV0amt4K1Y4bkNSNWdDc2hSTkdHL2lzbnpiV0RndXZxd0Y2emFoejlIOGcwVmNxVTdQVktwdm8wOXN0TXdDRWJlZmhIa0xmeDZLODFhTFdSOVhiR2U2TThUWlV6bDV1',
|
||||
'_cfuvid': 'OhLu8QiDvU1kFzSW0Bc3jnsJCdKW4bX88JsDj6HA9MY-1741189318649-0.0.1.1-604800000',
|
||||
}
|
||||
|
||||
headers = {
|
||||
'accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.7',
|
||||
'accept-language': 'zh-CN,zh;q=0.9',
|
||||
'cache-control': 'no-cache',
|
||||
'content-type': 'application/x-www-form-urlencoded',
|
||||
'origin': 'https://www.doordash.com',
|
||||
'pragma': 'no-cache',
|
||||
'priority': 'u=0, i',
|
||||
'referer': 'https://www.doordash.com/store/orlando-china-ocean-orlando-24579150/?srsltid=AfmBOopVpfmVadkxGCuL8OFMk-I2G54QsfI0akAcPzxvLYbz8Wzxp0P0&__cf_chl_tk=Jjhqq2WTVqoit5J4FyAS1SUt2xz6wZ8gwtgaPrUg1_4-1741189317-1.0.1.1-HyUzlYWauczcJmr1NXUGCOr5zGRO2UI2RgzN8ufTLQA',
|
||||
'sec-ch-ua': '"Not(A:Brand";v="99", "Google Chrome";v="133", "Chromium";v="133"',
|
||||
'sec-ch-ua-arch': '"x86"',
|
||||
'sec-ch-ua-bitness': '"64"',
|
||||
'sec-ch-ua-full-version': '"133.0.6943.142"',
|
||||
'sec-ch-ua-full-version-list': '"Not(A:Brand";v="99.0.0.0", "Google Chrome";v="133.0.6943.142", "Chromium";v="133.0.6943.142"',
|
||||
'sec-ch-ua-mobile': '?0',
|
||||
'sec-ch-ua-model': '""',
|
||||
'sec-ch-ua-platform': '"Windows"',
|
||||
'sec-ch-ua-platform-version': '"10.0.0"',
|
||||
'sec-fetch-dest': 'document',
|
||||
'sec-fetch-mode': 'navigate',
|
||||
'sec-fetch-site': 'same-origin',
|
||||
'sec-fetch-user': '?1',
|
||||
'upgrade-insecure-requests': '1',
|
||||
'user-agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/133.0.0.0 Safari/537.36',
|
||||
# 'cookie': '__cf_bm=DeiqM0wBQUTk.n88hE77uEZ.WfLdmDCCAjcUeUD895Y-1741189317-1.0.1.1-A.cPG7yAL2gQ9ErX9iHLETCB16qpewadnG4ad5tE1vwGMbt1GJ1eYZvmeQVGHdwH1lfpC4_OG2_Zqbv4.pE53jLMuzQNib75mRrYI50e52IXcW8a5_UcRHI5CYACnLLf; __cfwaitingroom=Chg0U0ZnUjFhbXBOQjJrMWFXdzUrNEJBPT0SgAJySXhTYlJublhNTEhJejhpbVJkV01vd0YrblBDMGVMdDdXSGhIYkprYjVUR2E2bytPcGgzeWZKZE9kUHdISDJIWTdjR01jZ3NqcEJycUxqVHB2REtKVlN3aEx5c09yU0ltbkVWMHFjcGd3RVBULzhuWFR6RDVNZlVXeHp3clgrS0NVaWZ4eGN1QVRpUWI0aENWUDV6Z2g0Uk1jbUFDUVA5NEV0amt4K1Y4bkNSNWdDc2hSTkdHL2lzbnpiV0RndXZxd0Y2emFoejlIOGcwVmNxVTdQVktwdm8wOXN0TXdDRWJlZmhIa0xmeDZLODFhTFdSOVhiR2U2TThUWlV6bDV1; _cfuvid=OhLu8QiDvU1kFzSW0Bc3jnsJCdKW4bX88JsDj6HA9MY-1741189318649-0.0.1.1-604800000',
|
||||
}
|
||||
|
||||
params = {
|
||||
'srsltid': 'AfmBOopVpfmVadkxGCuL8OFMk-I2G54QsfI0akAcPzxvLYbz8Wzxp0P0',
|
||||
}
|
||||
|
||||
data = {
|
||||
'62f575be50a6447af9fe0d9959952ee6dec9bf2ffbdf0558f021763cb249bcab': '9eHmWM7NMmZh1C8FtgbjrStbitYveFM7mthG7xnA2Ns-1741189317-1.2.1.1-__PYXC7JoGCXoo_25K_6TXEnUckVOlewmPlh3Y9YhIpS2wnTMq2mpxVkW4nY1lxE0xGbln5UrRh.lH7Vl57Vnh3tLJsRT5lOGJwSY6bKqIHEUPvb5HU0Bf2RxTATmyzEQN19zSlu6kx4zUkQ14UN21iHUqCqJ6LXyofEZYnEVWu0kgabM9lKZhSOL4SoLGJ2v5V_63Gyu6fJyZmJxzXvnlQE0ehDGsCLI_2JPF6XUKsTMtuMZRAkZSL02gjJkZYNJubeLuM83O2VOtkjn88pB1VhsdTuEVKbZBCyxAfW2CbxFRwRK5gMhqdMdR1qK0DBRqqnbSUvrr7gtKiV_pKVUxFeVBROqS9Nugx9MK3xDopBodE4oUAiK37mD.0PwlVn_pnFc0uQQNxWBOqqcWgo4OWurEZ3qJPjsuP.wL6sndYK45j0pI6Zm5PJ5RoT.ZkAhAijwV6MbtodqizwJOv8vpMNNiPq_ybnj6VcdWrbi7NhGUVYXrqpfoFypR8SaUaYg7rTCp3Xc1sTRMQw2f_ab8yb8wLVe19EnNG_SZVm79CnXgaV.qA9gfMUw7dncyKbqn9l_h8jXUrLezS0OucgcJ20MZGyiZHY0eLVp.ZS2xY2olJ44lWACoiN0H54hyxpUGnQxuZhPlCFN3DJFNYGfP.IshJw5T1cDieUwkMFfL3w5JcdPh0cES6scydGWlROZ3tnKxHUHT2brsZoe9h7jmYYTLaJbG5PcDO8e5dh3WIglBDYrXNWfrL8QCIXwtgEK88pOO60_WtvzthrdM0x_.4CNXLHPLHv2eJ4rI1dWt9nOeTy4WNdXLktSN5l2_THBh6xloHpIeKcyf6j2GGoQO2bTvuYABkOdGtSORiiRjSJ1i1kWF5QJcIWpFRemKOpKF0vXrzRRJAYRf4Lt8fbh2dooiImKqRdiXncHXMHBysBXcTyVH5iBoX37DKrpsZkXP2uiU.VF5oG6LOn2TOd38Jgoi6l37NY15vaXeEg6EFZw5KHEyn2PLOd9kvy4FbhzhfjHbVvuof.ggB6LoqNMKtVtoXTKKfx9a0wARNK4HfsfPF06VZOjvd1LX8Wzlrr5UtUWosMoEPmM9NbJ_E2sB8k5DwlTOsfHUks_lmUxsQNr3xDs7C363MK1mt.dUWJ9XGm0h5cvSMha3i1SQQhTXLv6RAk1BD7t_K4IuoPzLE8oE2UaQ..QQrxtpVsAkYKTWCkWcOdbL1FF1mMF7rh6w0Gsh7ETIVWTRzcM4kFn9Dlzxh_tpzlozsXMg1XgiKWRlcWRjTLbY7sIJqTt5E9x4fpdPd06zUa56g6Chw7QqElPn3VgRvPvmM5lj6owPiAW1n_K_aeQbZCNjn3xlwGrsB7EykuUJg98W2EJHghnH.RppgG1Zokr6Bzp4KQPnGL4BMqr4gHK1J7zjuC0VXnHehzSlrxEeGn761wBsfI70k',
|
||||
'6e598580991b23da60b50de353ecd7be82cbaa04a283acff81a7abbaebc64e10': 'ZhR68_OeLGIT1blglp0v39UpIkfYfAaEC8878.6tAY0-1741189317-1.2.1.1-.4GefhDsbhTEryFpybpNySSL2VtJ5cRzwxpdMd7Z5oU8qlspFIi_0vQ5jooMr7GgP_mP6wRFJDW6yUkOdBQRvKEZnyep_P9u4h6GwppGJhMTniFv3I9RoAq8PYZALkFGiwZCsCsB3OLplBdkPegTjwJUHCCif1ZlgXAP1ZtG61pCNWT2DSLn_p3fW.Q6prUM1VNPLBy69zw4MrBxgZZPK.qrqVX85dvlLy03W2UA7DJmXwyMilgPWs3Qc43zIsn6CXXlifliNv_3VUI003jsgZubzKop1TDZ_dm4afIIndaInD_iDtrO.01Mw.Ir7WYs.pkF3YBXM81SHZ4Nn2r0Um_ClyN643qbpA_poblAXix2XjAb1rtTp8DRXRciHe1P8nww7yPWa1W8E4PJy7rzj1klcGS6ccx7mDm056rhc_rt6WHPVe3labuxjGtJUTboRyqvnSrdqZs0rhv21vD2RJk1BtolGmVOY7CPei2VapMfXcpv60ROZtpNJMVCtnrMdVvgflniaNMSiwvEXBKcizTr61w6ZM39lYjfc.DA7RfKSEpJi6itprKtJd3FMoYlHqtZQEmbq7WMw93CzV.TDEe8gB5jx3jwo2bFiO5aNuL5NswFgZTbZeQ2_9bAEpLy3iIS7yBFFUBQ0DxMCkCMEDEkDmm1.BjT9attJunefBqg8ElRN27_iPhez.MD1SqedrDX3_azxaSlckzZT222e2WGkQwaaQW7oW.xH_WXBpe6_.de9YD5LRTA5fv1y0fX2yl8CpQwrV1n8N9N5KO8m8xBlCBysto9FlTwuZipUw9SdNX7ufuOhYTP7me3dgCLQv.0TNHwJSa2yegQSuDl2hRKtMqCbApP1x7T_MBM9i4RMUmYwPsDNt5_LRG2ApszmqID8xrX55b1LaEANR.QjPo2ZhrbtQNBK.hUHNI7x7CdHs40ls_Qlmchly3CCdsBkcz1QNaiWuVbOMBNJEEB1H00MIEDXepGDk1OT5d05wnVIDjdZYCavCye0AvjTwyqNqHU8pELaAeosJWSqtWEVBpntr7lMqXTBauh.eSc_90bMwmwVdeCR0aUAu_Ny2j6z2jXlexrkVQpmV4HWjmfJni_qkJvGlE5Gu_tMCJjddsdMr8ulPQWCCHxN11g3vI_tLmBoO5hh8sR5G5glXaIAejdLR.bdEw1sffTZyKJwgQ.nKbIz228aqEOraJC4AxTQs_AWI.OKR7zGHmUlmrgtkYOr8Rz3gdOjlkinJqJhACjIe1AGOUH4qK5n.QEdsDCCrlrc3FkqzLfhOPtCDiGYuAzBNRpFPomk8csFBz9ql6I5VJ0bJxwDF18GBM2irBsNqFSm5Jf1kQ3N93M4ZUpcCJ1712ziuqS1USLhY1oyfR.8Mz6uKgniepMha3IyiU0ayBaziu7PMV4MAJqard78mOBLTQKCIVlP58FuXmrwBR3wx1s99anrcSD1Z06Ha3YmhptPaotgB84oZiiOC49hLxJD_sdDDineeoU0WigNErVkylMJ_IdHcjhvPWsJnv_ohJbxYhK9TPPhOCpMSNsLQK5ehwbzlW4jUDuh8hwvVFsqAyjIMhtoBSE9.SzbywNqEQ7N4z4WwlcbLaxQS9FHHp4TBuNdIEWxyfudz5dNCo.XSqFQU2zXC9h1_NxxyGYKWHBfX_RImvAwbePtGSKuKxFGmfTsmPI6PyWjo139_zOGaNneKbxiqyD1UBde26aqui9LgZEWvtTVDL49k8WkxNPlO_R6rbhe6EwTj32F_6sY8dniHDF3vUHlGKdhfxf.XfWNLSzwtlPa3sWe2gj5iq_X8JNZgEeKD1k94PF4PECSCTfNTNcVbSKyO9nXh6mddvyGkbgK1gbNDlcQHtjJNdSNtcEyOpWLPyPmjImbox7JaNn.ldukMwJ3C68rIBDqHVUSI8_5XdsWsJHRn4Ia_nSvO42POpI0OAC0gdGyC32UYoFrMY2ZhQJIrWeL5Icp5o62TOpJ8zgLbND953iqOdxMGfHVxqSoaHwqx5GSCF_n_7ew9T0VyJlg6AszpnkNranJqESFILA3cbOJQ3HJ_Ls8OWjfSYfPe1p4Ywzeer7tu_i4.804TH.OT2G6oiyzyI73T0hcXp.ryslhejUF1vjy1Y6bNpYZLCrYqz92PvuC.BvkzafaX_CFS7F8oq3qq5W._MSlXgg7tmtEh2M0xsEvTVyaabvDbgE.6mJUxS2PXTSV5gJ_XFpKLG14MQOXbtnGe2yy7gFWp7uBs62Y8jeMrAm_O6EMqF1BlveTca5okKjQsRcqLqP1eww4.ofbDWiOf15277FIYuTWnvEROsnxkv4pKI_d99SORab6HBs4_rohdrWaOVNiiTAx.7AXj3jwsDS0zuENXVqkicM1N9zP2uRfi7UIFuivX1P9LdgPUYgeGPcWGOtMpSXYEUS03rnPcKgVhxNk98OqszVH7.HeD5l0_eFsNYmUjnEhsjn9Ou9aldxpe044k8PY7at.li1iUF0RI0Ejmtr8BL.B6ue5OPR9NldfKz6SVImcK.3WncZBt.pFNh8.UFwcWWi5n7wNCBXHIJfif_dIljeQTQVercJglnxMKanBoz0rSjo4ep1YmcvWcoSffHW1XNdZ8UhfQjmKHyefod7WJo3dUjNoA6Hyu8S_OdklHBqmHERyTy8zUvxntmiPcd2xaYf',
|
||||
'36ff6248f65188bf84765b4f9f16789df2a522c5d132bb8d275f15d273c986bf': 'gOy7UhEi6LyMVNz2.e5jiWbKGW4U1aeHXOKpnKAPwlE-1741189358-1.1.1.1-.75nzfIDuPdP7tbAuIKQI1PQOs0.fe.cbtjneFBMEpalt3jp9UYUxyIXbqMr1ayt1o.p1FsHsj.zmuKTu8ilGNMGpbvZ9AkGraZ8v2FjSUrBs..RhBs45pS761RhZ_6ULj0C_WGwgPFc9gw4Mo08c6e0zwmtvHYXeA6s992jnypGOULp1IhGGRe4tdAQo00B',
|
||||
}
|
||||
|
||||
response = requests.post(
|
||||
'https://www.doordash.com/store/orlando-china-ocean-orlando-24579150/',
|
||||
params=params,
|
||||
cookies=cookies,
|
||||
headers=headers,
|
||||
data=data,
|
||||
)
|
||||
print(response.text)
|
||||
|
||||
def store(self):
|
||||
cookies = {
|
||||
'cf_clearance': '4nu56rwYVjgmEhmhzLzWE2lUkGSif3bRcIS7rZEFO_8-1741189789-1.2.1.1-39bCLNa.HimyPNWE5OX3KNIGsre3Uyz0HgwU7WfEbCbSP2wWU.WzmwReED0WfQW8oUrtTH7Mmdr3mq7cZCXbsyI8zn0hii2mVdC0QvdKxfUv_liW6QZZy.PUCOI.GCT4JAiP4s8afKZiK2Hu2AGuzEJnxpdu85yuMeajDZstihieiDwYf5HC2M2R74SciEqCUtb7GS8VJAuY_dQl6Xulqw0ywKPtmZabX.kJJxMm41DV71fSNBrw4AAX3E0q7c3G4TeuKrcyvkKdy8LMpOjilNXaOzMOliLijJa7VrJkgcVJUSXf9MtacV0O3klUFcxAAdm5hQy0Tw8Ms0QflQorTTJ514OzGbu9k2YQsn7uyibz_H5k3kLPvmRuoQzIwh532xZMKFIt8MizE6XMcZf.njOXhbmqQ3mNFkPW7Y9SwhAsrH_G6whURaWrmQjNYe3TeFDr0IQN6H3YsIGvX_0bEiBZ7aDW6c8KChgS7v12mRM',
|
||||
'lastRskxRun': '1741189790792',
|
||||
'_ga_BXB2XKP8LL': 'GS1.1.1741189794.1.0.1741189794.0.0.0',
|
||||
'_ga_J4BQM7M3T2': 'GS1.1.1741189368.1.1.1741189794.25.0.1702931391',
|
||||
'_ga': 'GA1.2.1107195518.1741189794',
|
||||
'_uetvid': '7e0568d0f9d811ef8225f7d345309425|1ne431i|1741189796909|4|1|bat.bing.com/p/insights/c/o',
|
||||
'__cfwaitingroom': 'ChgvSGh1RkpBc3FiZ1hpZ0g5dG5MY1pRPT0SjAJyUytBQUovajE1Z2hWTkE2OW5kRzhhUTdKQkM0c1NIY0VWNkhZdndJK1FZVko4eS8rbUxYU1I5UHFtYW5tZitzMDBMWFNwUVllK1gySnRoLzdjUC9LOFUvai90aStHU3QyZWh2U3pNbHF5TFludGlKb3ZDaVZKUjlPbE9maDZsWXo1Z0I1YlJxTFBmd0JTVUVQbTR3bTlCc0dUL2RxWGE1dGt2NHRhTElMK3VRTWVkdWpEOTQ2RnpRKzMyNHdoZDdHOTd1bVNmZHBSUk9Fdk1LbDJRWGp5UlBPM0JkdUVOZThRekJlUnRlRTVBb3FDcFZQeEF3MUhBUXRuMUhtTzdzS05ubWpQalNqdz09',
|
||||
'__cf_bm': 'IgYmFAKo0m17Bd9zrC3TxmYCcY7A8Z3T1VA9RlC5r2U-1741189802-1.0.1.1-wTjgUnO70QxQRNK497JzLMMf_GlQH6hXzhyN_BgWpYRXtIsKmlHyJiIXRJV_OnZhFaoUF2dptNWFOQhso08EF7nowNmbk8uc.OzZwBY6hGYIaKwuM6ejWPKJ3_RY2c4u',
|
||||
'_cfuvid': 'vHXGbiJ8z27CvWUMoLxxUy7t_Owf3MgJL0Lmk0sGK.8-1741189802499-0.0.1.1-604800000',
|
||||
'amplitude_id_8a4cf5f3981e8b7827bab3968fb1ad2bdoordash.com': 'eyJkZXZpY2VJZCI6IjUyMTRkZTg3LTIwNzMtNDJjNS1hMWY1LWUyM2U1NzQ4OTFkY1IiLCJ1c2VySWQiOm51bGwsIm9wdE91dCI6ZmFsc2UsInNlc3Npb25JZCI6MTc0MTE4OTc5MzY5OSwibGFzdEV2ZW50VGltZSI6MTc0MTE4OTgyMDE4MSwiZXZlbnRJZCI6NSwiaWRlbnRpZnlJZCI6Mywic2VxdWVuY2VOdW1iZXIiOjh9',
|
||||
}
|
||||
|
||||
headers = {
|
||||
'accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.7',
|
||||
'accept-language': 'zh-CN,zh;q=0.9',
|
||||
'cache-control': 'no-cache',
|
||||
'pragma': 'no-cache',
|
||||
'priority': 'u=0, i',
|
||||
'referer': 'https://www.doordash.com/store/orlando-china-ocean-orlando-24579150/?srsltid=AfmBOopVpfmVadkxGCuL8OFMk-I2G54QsfI0akAcPzxvLYbz8Wzxp0P0&__cf_chl_tk=Id3Y.U2lJHpLzA5FmcHBoe6BfOodJN1PBWNO4H5qEuA-1741189781-1.0.1.1-0eEqpSdGuZHhOfy.CS9y3jQptQo.ajcHXN0ot.A5QVc',
|
||||
'sec-ch-ua': '"Not(A:Brand";v="99", "Google Chrome";v="133", "Chromium";v="133"',
|
||||
'sec-ch-ua-arch': '"x86"',
|
||||
'sec-ch-ua-bitness': '"64"',
|
||||
'sec-ch-ua-full-version': '"133.0.6943.142"',
|
||||
'sec-ch-ua-full-version-list': '"Not(A:Brand";v="99.0.0.0", "Google Chrome";v="133.0.6943.142", "Chromium";v="133.0.6943.142"',
|
||||
'sec-ch-ua-mobile': '?0',
|
||||
'sec-ch-ua-model': '""',
|
||||
'sec-ch-ua-platform': '"Windows"',
|
||||
'sec-ch-ua-platform-version': '"10.0.0"',
|
||||
'sec-fetch-dest': 'document',
|
||||
'sec-fetch-mode': 'navigate',
|
||||
'sec-fetch-site': 'same-origin',
|
||||
'sec-fetch-user': '?1',
|
||||
'upgrade-insecure-requests': '1',
|
||||
'user-agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/133.0.0.0 Safari/537.36'
|
||||
}
|
||||
|
||||
params = {
|
||||
'srsltid': 'AfmBOopVpfmVadkxGCuL8OFMk-I2G54QsfI0akAcPzxvLYbz8Wzxp0P0',
|
||||
}
|
||||
|
||||
response = requests.get(
|
||||
'https://www.doordash.com/store/orlando-china-ocean-orlando-24579150/',
|
||||
params=params,
|
||||
cookies=cookies,
|
||||
headers=headers,
|
||||
)
|
||||
print(response.text)
|
||||
print(response.status_code)
|
||||
|
||||
if __name__ == '__main__':
|
||||
dd = DoorDash()
|
||||
# dd.parse()
|
||||
dd.store()
|
200
web/fnrc_vip/2db.py
Normal file
@ -0,0 +1,200 @@
|
||||
import requests
|
||||
import pymysql
|
||||
import re
|
||||
from datetime import datetime
|
||||
|
||||
# ==== 数据库配置 ====
|
||||
MYSQL_CONFIG = {
|
||||
'host': '39.101.135.56',
|
||||
'user': 'tsreshub_prod',
|
||||
'password': 'Tr5h$Prod!92@TsRH',
|
||||
'database': 'tsreshub_db',
|
||||
'port': 3306,
|
||||
'charset': 'utf8mb4'
|
||||
}
|
||||
|
||||
# ==== 请求配置 ====
|
||||
HEADERS = {
|
||||
'accept': 'application/json, text/plain, */*',
|
||||
'accept-language': 'zh-CN,zh;q=0.9,en;q=0.8',
|
||||
'cache-control': 'no-cache',
|
||||
'content-type': 'application/json;charset=UTF-8',
|
||||
'origin': 'https://www.fnrc.vip',
|
||||
'pragma': 'no-cache',
|
||||
'priority': 'u=1, i',
|
||||
'referer': 'https://www.fnrc.vip/enterprise/resume_store/list',
|
||||
'sec-ch-ua': '"Google Chrome";v="135", "Not-A.Brand";v="8", "Chromium";v="135"',
|
||||
'sec-ch-ua-mobile': '?0',
|
||||
'sec-ch-ua-platform': '"Windows"',
|
||||
'sec-fetch-dest': 'empty',
|
||||
'sec-fetch-mode': 'cors',
|
||||
'sec-fetch-site': 'same-origin',
|
||||
'user-agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/135.0.0.0 Safari/537.36',
|
||||
}
|
||||
COOKIES = {
|
||||
'PHPSESSID': 'ca613ae99706037e356a247500acb97b',
|
||||
'auth-token': 'eyJ0eXAiOiJKV1QiLCJhbGciOiJIUzI1NiJ9.eyJleHAiOjE3NDczNzA1ODUsImp0aSI6IjBlZDI0NTM0LWE0NjEtNDkxNC1iNDU1LWQxZGEzYzQ5N2U0NiIsIm5hbWUiOiIxODYxNzg3MjE4NSIsInVzZXJfaWQiOiIxYTJkODFjMTFkM2MzMmVhYmVlNWFkM2E3NGFmYWViNyIsInRlbmFudF90b2tlbiI6ImQzNWVjMmEzNjAxODM1NWE4MTg3ZTEyODI3MzE3ZGRjIn0.HoaWksDiMxtkbBJ8jVPlKLKzd1UqNHo4KfecS2uVUaM',
|
||||
'company_sign': '',
|
||||
'company_nonce': '',
|
||||
'cuid': '',
|
||||
}
|
||||
|
||||
# ==== 字段清洗函数 ====
|
||||
def extract_int(s):
|
||||
try:
|
||||
return int(re.search(r'\d+', str(s)).group())
|
||||
except:
|
||||
return None
|
||||
|
||||
def parse_datetime(s):
|
||||
try:
|
||||
return datetime.fromisoformat(s)
|
||||
except:
|
||||
return datetime(2019, 12, 12)
|
||||
|
||||
def clean_item(item):
|
||||
reverse_field_map = {
|
||||
'resume_id': 'resume_id',
|
||||
'user_name': 'name',
|
||||
'sex_show': 'gender',
|
||||
'user_age': 'age',
|
||||
'area_show': 'job_location',
|
||||
'birthday': 'birthday',
|
||||
'education_level_msg': 'education',
|
||||
'expect_job': 'expected_position',
|
||||
'last_edit_time': 'update_time',
|
||||
'marry_status_show': 'marital_status',
|
||||
'residence': 'current_location',
|
||||
'phone_encrypt': 'phone',
|
||||
'work_type_show': 'job_property',
|
||||
'work_status_show': 'job_status',
|
||||
'work_1_description': 'work_1_description',
|
||||
'work_1_time': 'work_1_time',
|
||||
'work_1_experience': 'work_1_experience',
|
||||
'work_2_description': 'work_2_description',
|
||||
'work_2_time': 'work_2_time',
|
||||
'work_2_experience': 'work_2_experience',
|
||||
'work_3_description': 'work_3_description',
|
||||
'work_3_time': 'work_3_time',
|
||||
'work_3_experience': 'work_3_experience',
|
||||
'work_4_description': 'work_4_description',
|
||||
'work_4_time': 'work_4_time',
|
||||
'work_4_experience': 'work_4_experience',
|
||||
}
|
||||
|
||||
experience = item.get("experience", [])
|
||||
for j in range(4):
|
||||
if j < len(experience):
|
||||
company = experience[j].get("company", "")
|
||||
time_line = experience[j].get("time_line", "")
|
||||
content = experience[j].get("content", "")
|
||||
else:
|
||||
company = ''
|
||||
time_line = ''
|
||||
content = ''
|
||||
item[f"work_{j + 1}_experience"] = company
|
||||
item[f"work_{j + 1}_time"] = time_line
|
||||
item[f"work_{j + 1}_description"] = content
|
||||
|
||||
cleaned = {
|
||||
reverse_field_map[k]: v
|
||||
for k, v in item.items()
|
||||
if k in reverse_field_map
|
||||
}
|
||||
|
||||
if "age" in cleaned:
|
||||
cleaned["age"] = extract_int(cleaned["age"])
|
||||
|
||||
if "height" in cleaned:
|
||||
cleaned["height"] = extract_int(cleaned["height"])
|
||||
|
||||
if "weight" in cleaned:
|
||||
cleaned["weight"] = extract_int(cleaned["weight"])
|
||||
|
||||
if "update_time" in cleaned:
|
||||
cleaned["update_time"] = parse_datetime(cleaned["update_time"])
|
||||
|
||||
cleaned["source_id"] = 3
|
||||
return cleaned
|
||||
|
||||
# ==== 主逻辑 ====
|
||||
def main():
|
||||
session = requests.Session()
|
||||
session.headers.update(HEADERS)
|
||||
session.cookies.update(COOKIES)
|
||||
|
||||
connection = pymysql.connect(**MYSQL_CONFIG)
|
||||
cursor = connection.cursor()
|
||||
|
||||
url = "https://www.fnrc.vip/job/company/v1/resume/page"
|
||||
all_items = []
|
||||
|
||||
for page in range(6, 8):
|
||||
payload = {
|
||||
'step': 1000,
|
||||
'page': page,
|
||||
'education_level': [],
|
||||
'arrival_time': [],
|
||||
'work_time': [],
|
||||
'area_id': [],
|
||||
'keywords': '',
|
||||
'work_status': '',
|
||||
'work_status_show': '求职状态',
|
||||
'category_id': '',
|
||||
'work_type': '',
|
||||
'work_type_show': '是否兼职',
|
||||
'sex': '',
|
||||
'sex_show': '性别',
|
||||
'is_head': '',
|
||||
'is_head_show': '有无照片',
|
||||
'job_id': '',
|
||||
'age': [],
|
||||
'age_show': '年龄',
|
||||
'refresh_time': 0,
|
||||
'site_id': '',
|
||||
'site_id2': '',
|
||||
'province': '',
|
||||
'city': '',
|
||||
'county': '',
|
||||
'provinceArr': [],
|
||||
'cityArr': [],
|
||||
'countyArr': [],
|
||||
'only_job_category': 0,
|
||||
}
|
||||
|
||||
try:
|
||||
resp = session.post(url, json=payload, timeout=10)
|
||||
resp.raise_for_status()
|
||||
data = resp.json().get('data', [])
|
||||
print(f"📖 第{page}页拿到 {len(data)} 条数据")
|
||||
for item in data:
|
||||
all_items.append(clean_item(item))
|
||||
except Exception as e:
|
||||
print(f"❌ 请求第{page}页失败: {e}")
|
||||
|
||||
if all_items:
|
||||
keys = all_items[0].keys()
|
||||
columns = ', '.join(keys)
|
||||
placeholders = ', '.join(['%s'] * len(keys))
|
||||
update_clause = ', '.join([f"{key}=VALUES({key})" for key in keys if key != 'resume_id'])
|
||||
|
||||
sql = f"""
|
||||
INSERT INTO resumes_resumebasic ({columns})
|
||||
VALUES ({placeholders})
|
||||
ON DUPLICATE KEY UPDATE {update_clause}
|
||||
"""
|
||||
|
||||
try:
|
||||
values = [tuple(item.values()) for item in all_items]
|
||||
cursor.executemany(sql, values)
|
||||
connection.commit()
|
||||
print(f"✅ 成功插入 {len(all_items)} 条数据")
|
||||
except Exception as e:
|
||||
print(f"❌ 批量插入失败: {e}")
|
||||
connection.rollback()
|
||||
|
||||
cursor.close()
|
||||
connection.close()
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
94
web/fnrc_vip/dow.py
Normal file
@ -0,0 +1,94 @@
|
||||
from web.Requests_Except import *
|
||||
import pandas as pd
|
||||
|
||||
r_id_list = [30113,37407,23330,44513,36089,3456,7916]
|
||||
|
||||
|
||||
pd_data = {
|
||||
"resume_id": [],
|
||||
"姓名": [],
|
||||
"电话": [],
|
||||
}
|
||||
|
||||
cookies = {
|
||||
'auth-token': 'eyJ0eXAiOiJKV1QiLCJhbGciOiJIUzI1NiJ9.eyJleHAiOjE3NDczNzA1ODUsImp0aSI6IjBlZDI0NTM0LWE0NjEtNDkxNC1iNDU1LWQxZGEzYzQ5N2U0NiIsIm5hbWUiOiIxODYxNzg3MjE4NSIsInVzZXJfaWQiOiIxYTJkODFjMTFkM2MzMmVhYmVlNWFkM2E3NGFmYWViNyIsInRlbmFudF90b2tlbiI6ImQzNWVjMmEzNjAxODM1NWE4MTg3ZTEyODI3MzE3ZGRjIn0.HoaWksDiMxtkbBJ8jVPlKLKzd1UqNHo4KfecS2uVUaM',
|
||||
'PHPSESSID': '04cdc37cc18bb148fec8e276a6796eed',
|
||||
}
|
||||
|
||||
headers = {
|
||||
'accept': 'application/json, text/plain, */*',
|
||||
'accept-language': 'zh-CN,zh;q=0.9,en;q=0.8',
|
||||
'cache-control': 'no-cache',
|
||||
'content-type': 'application/json;charset=UTF-8',
|
||||
'origin': 'https://www.fnrc.vip',
|
||||
'pragma': 'no-cache',
|
||||
'priority': 'u=1, i',
|
||||
'referer': 'https://www.fnrc.vip/enterprise/resume_store/list',
|
||||
'sec-ch-ua': '"Google Chrome";v="135", "Not-A.Brand";v="8", "Chromium";v="135"',
|
||||
'sec-ch-ua-mobile': '?0',
|
||||
'sec-ch-ua-platform': '"Windows"',
|
||||
'sec-fetch-dest': 'empty',
|
||||
'sec-fetch-mode': 'cors',
|
||||
'sec-fetch-site': 'same-origin',
|
||||
'user-agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/135.0.0.0 Safari/537.36',
|
||||
}
|
||||
|
||||
base_url = 'www.fnrc.vip'
|
||||
protocol = 'https'
|
||||
|
||||
Request = MR(base_url, protocol)
|
||||
Request.set_default_cookies(cookies)
|
||||
Request.set_default_headers(headers)
|
||||
|
||||
|
||||
def get_resume_info(resume_id):
|
||||
url = '/job/company/v1/resume/loadResume'
|
||||
json_data = {
|
||||
'resume_id': resume_id,}
|
||||
response = Request.post(url, json=json_data)
|
||||
return response.to_Dict()
|
||||
|
||||
def get_phone_encrypt(resume_id):
|
||||
url = '/job/company/v1/company/getResumeUserPhone'
|
||||
json_data = {
|
||||
'resume_id': resume_id,
|
||||
'delivery_id': '',
|
||||
'is_pc': 1,
|
||||
}
|
||||
response = Request.post(url, json=json_data)
|
||||
return response.to_Dict()
|
||||
|
||||
def buy_resume(resume_id):
|
||||
url = '/job/company/v1/company/buyResumeUserPhone'
|
||||
json_data = {
|
||||
'resume_id': resume_id,
|
||||
'from_type': '',
|
||||
}
|
||||
response = Request.post(url, json=json_data)
|
||||
return response.to_Dict()
|
||||
|
||||
def integrate():
|
||||
for resume_id in r_id_list:
|
||||
resume_info = get_resume_info(resume_id)
|
||||
phone_encrypt = get_phone_encrypt(resume_id)
|
||||
if hasattr(phone_encrypt,'phone'):
|
||||
phone_encrypt = phone_encrypt.phone
|
||||
else:
|
||||
buy_resume_info = buy_resume(resume_id)
|
||||
if hasattr(buy_resume_info,'buy_success') and buy_resume_info.buy_success:
|
||||
phone_encrypt = get_phone_encrypt(resume_id).phone
|
||||
|
||||
else:
|
||||
phone_encrypt = None
|
||||
|
||||
pd_data['resume_id'].append(resume_id)
|
||||
pd_data['姓名'].append(resume_info.user_name)
|
||||
pd_data['电话'].append(phone_encrypt)
|
||||
|
||||
df = pd.DataFrame(pd_data)
|
||||
df.to_excel('服务_Phone.xlsx', index=False)
|
||||
print("数据已保存到 厨师.csv")
|
||||
|
||||
if __name__ == '__main__':
|
||||
integrate()
|
||||
|
BIN
web/fnrc_vip/fnrc_vip.zip
Normal file
147
web/fnrc_vip/main.py
Normal file
@ -0,0 +1,147 @@
|
||||
from web.Requests_Except import *
|
||||
import datetime
|
||||
import pandas as pd
|
||||
|
||||
headers = {
|
||||
"accept": "application/json, text/plain, */*",
|
||||
"accept-language": "zh-CN,zh;q=0.9,en;q=0.8",
|
||||
"cache-control": "no-cache",
|
||||
"content-type": "application/json;charset=UTF-8",
|
||||
"origin": "https://www.fnrc.vip",
|
||||
"pragma": "no-cache",
|
||||
"priority": "u=1, i",
|
||||
"referer": "https://www.fnrc.vip/enterprise/resume_store/list",
|
||||
"sec-ch-ua": "\"Google Chrome\";v=\"137\", \"Chromium\";v=\"137\", \"Not/A)Brand\";v=\"24\"",
|
||||
"sec-ch-ua-mobile": "?0",
|
||||
"sec-ch-ua-platform": "\"Windows\"",
|
||||
"sec-fetch-dest": "empty",
|
||||
"sec-fetch-mode": "cors",
|
||||
"sec-fetch-site": "same-origin",
|
||||
"user-agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/137.0.0.0 Safari/537.36"
|
||||
}
|
||||
cookies = {
|
||||
"PHPSESSID": "7e50a60cd4544448634f6f2a77c2e17d",
|
||||
"auth-token": "eyJ0eXAiOiJKV1QiLCJhbGciOiJIUzI1NiJ9.eyJleHAiOjE3NTMwMDMyNTIsImp0aSI6IjAxNDU1NjA1LTlhZDUtNDFlNS1iYzk5LWQwZGUyZTZkMWZjOCIsIm5hbWUiOiIxODYxNzg3MjE4NSIsInVzZXJfaWQiOiIxYTJkODFjMTFkM2MzMmVhYmVlNWFkM2E3NGFmYWViNyIsInRlbmFudF90b2tlbiI6ImQzNWVjMmEzNjAxODM1NWE4MTg3ZTEyODI3MzE3ZGRjIn0.ZCRc25o9J4DVykGriAXpEG5sQuJBwTrd-FpUKjnaq6Q",
|
||||
"company_sign": "",
|
||||
"company_nonce": "",
|
||||
"cuid": ""
|
||||
}
|
||||
base_url = 'www.fnrc.vip'
|
||||
protocol = 'https'
|
||||
Requests = MR(base_url, protocol)
|
||||
Requests.set_default_headers(headers)
|
||||
Requests.set_default_cookies(cookies)
|
||||
keyword = ""
|
||||
pd_data = {
|
||||
'resume_id': [],
|
||||
'姓名': [], # user_name
|
||||
'求职区域': [], # area_show
|
||||
'生日': [], # birthday
|
||||
'学历': [], # education_level_msg
|
||||
'学校': [], # education.school
|
||||
'期望职务': [], # expect_job
|
||||
'最后活跃时间': [], # last_edit_time
|
||||
'婚姻': [], # marry_status_show
|
||||
'现居地': [], # residence
|
||||
'年龄': [], # user_age
|
||||
'电话': [], # phone_encrypt
|
||||
'性别': [], # sex_show
|
||||
'求职类型': [], # work_type_show
|
||||
'求职状态': [], # work_status_show
|
||||
'工作1经历': [],
|
||||
'工作1时间': [],
|
||||
'工作1内容': [],
|
||||
'工作2经历': [],
|
||||
'工作2时间': [],
|
||||
'工作2内容': [],
|
||||
'工作3经历': [],
|
||||
'工作3时间': [],
|
||||
'工作3内容': [],
|
||||
'工作4经历': [],
|
||||
'工作4时间': [],
|
||||
'工作4内容': [],
|
||||
}
|
||||
resume_list = []
|
||||
|
||||
|
||||
def getpageforkeyword(keyword: str, step: int = 100):
|
||||
json_data = {
|
||||
"step": step,
|
||||
"page": 1,
|
||||
"education_level": [],
|
||||
"arrival_time": [],
|
||||
"work_time": [],
|
||||
"area_id": [],
|
||||
"keywords": keyword,
|
||||
"work_status": "",
|
||||
"work_status_show": "求职状态",
|
||||
"category_id": "",
|
||||
"work_type": "",
|
||||
"work_type_show": "是否兼职",
|
||||
"sex": "",
|
||||
"sex_show": "性别",
|
||||
"is_head": "",
|
||||
"is_head_show": "有无照片",
|
||||
"job_id": "",
|
||||
"age": [],
|
||||
"age_show": "年龄",
|
||||
"refresh_time": 0,
|
||||
"site_id": "",
|
||||
"site_id2": "",
|
||||
"province": "",
|
||||
"city": "",
|
||||
"county": "",
|
||||
"provinceArr": [],
|
||||
"cityArr": [],
|
||||
"countyArr": [],
|
||||
"only_job_category": 0
|
||||
}
|
||||
url = "/job/company/v1/resume/page"
|
||||
res = Requests.post(url, json=json_data)
|
||||
return res.to_Dict()
|
||||
|
||||
|
||||
def organize_information_into_to_pandas():
|
||||
resp_obj = getpageforkeyword(keyword, 1000)
|
||||
for i in resp_obj.data:
|
||||
# resume_info = get_resume_info(i.resume_id)
|
||||
pd_data['resume_id'].append(i.resume_id)
|
||||
pd_data['姓名'].append(i.user_name)
|
||||
pd_data['求职区域'].append(i.area_show)
|
||||
pd_data['生日'].append(i.birthday)
|
||||
pd_data['学历'].append(i.education_level_msg)
|
||||
pd_data['学校'].append(';'.join([edu.school for edu in i.education]))
|
||||
pd_data['期望职务'].append(i.expect_job)
|
||||
pd_data['最后活跃时间'].append(i.last_edit_time)
|
||||
pd_data['婚姻'].append(i.marry_status_show)
|
||||
pd_data['现居地'].append(i.residence)
|
||||
pd_data['年龄'].append(i.user_age)
|
||||
pd_data['电话'].append(i.phone_encrypt)
|
||||
pd_data['性别'].append(i.sex_show)
|
||||
pd_data['求职类型'].append(i.work_type_show)
|
||||
pd_data['求职状态'].append(i.work_status_show)
|
||||
experience = i.experience
|
||||
for j in range(4):
|
||||
if j < len(experience) and experience[j].company:
|
||||
company = experience[j].company
|
||||
time_line = experience[j].time_line
|
||||
content = experience[j].content
|
||||
else:
|
||||
company = ''
|
||||
time_line = ''
|
||||
content = ''
|
||||
pd_data[f'工作{j + 1}经历'].append(company)
|
||||
pd_data[f'工作{j + 1}时间'].append(time_line)
|
||||
pd_data[f'工作{j + 1}内容'].append(content)
|
||||
|
||||
|
||||
def main(keywords):
|
||||
global keyword
|
||||
keyword = keywords
|
||||
organize_information_into_to_pandas()
|
||||
df = pd.DataFrame(pd_data)
|
||||
df.to_excel(f'{datetime.datetime.now().strftime("%Y%m%d")}_丰南_{keyword}.xlsx', index=False)
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
main("维修工")
|
37
web/fnrc_vip/test.py
Normal file
@ -0,0 +1,37 @@
|
||||
import requests
|
||||
|
||||
cookies = {
|
||||
'auth-token': 'eyJ0eXAiOiJKV1QiLCJhbGciOiJIUzI1NiJ9.eyJleHAiOjE3NDczNzA1ODUsImp0aSI6IjBlZDI0NTM0LWE0NjEtNDkxNC1iNDU1LWQxZGEzYzQ5N2U0NiIsIm5hbWUiOiIxODYxNzg3MjE4NSIsInVzZXJfaWQiOiIxYTJkODFjMTFkM2MzMmVhYmVlNWFkM2E3NGFmYWViNyIsInRlbmFudF90b2tlbiI6ImQzNWVjMmEzNjAxODM1NWE4MTg3ZTEyODI3MzE3ZGRjIn0.HoaWksDiMxtkbBJ8jVPlKLKzd1UqNHo4KfecS2uVUaM',
|
||||
'PHPSESSID': '04cdc37cc18bb148fec8e276a6796eed',
|
||||
}
|
||||
|
||||
headers = {
|
||||
'accept': 'application/json, text/plain, */*',
|
||||
'accept-language': 'zh-CN,zh;q=0.9,en;q=0.8',
|
||||
'cache-control': 'no-cache',
|
||||
'content-type': 'application/json;charset=UTF-8',
|
||||
'origin': 'https://www.fnrc.vip',
|
||||
'pragma': 'no-cache',
|
||||
'priority': 'u=1, i',
|
||||
'referer': 'https://www.fnrc.vip/enterprise/resume_store/list',
|
||||
'sec-ch-ua': '"Google Chrome";v="135", "Not-A.Brand";v="8", "Chromium";v="135"',
|
||||
'sec-ch-ua-mobile': '?0',
|
||||
'sec-ch-ua-platform': '"Windows"',
|
||||
'sec-fetch-dest': 'empty',
|
||||
'sec-fetch-mode': 'cors',
|
||||
'sec-fetch-site': 'same-origin',
|
||||
'user-agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/135.0.0.0 Safari/537.36',
|
||||
# 'cookie': 'auth-token=eyJ0eXAiOiJKV1QiLCJhbGciOiJIUzI1NiJ9.eyJleHAiOjE3NDczNzA1ODUsImp0aSI6IjBlZDI0NTM0LWE0NjEtNDkxNC1iNDU1LWQxZGEzYzQ5N2U0NiIsIm5hbWUiOiIxODYxNzg3MjE4NSIsInVzZXJfaWQiOiIxYTJkODFjMTFkM2MzMmVhYmVlNWFkM2E3NGFmYWViNyIsInRlbmFudF90b2tlbiI6ImQzNWVjMmEzNjAxODM1NWE4MTg3ZTEyODI3MzE3ZGRjIn0.HoaWksDiMxtkbBJ8jVPlKLKzd1UqNHo4KfecS2uVUaM; PHPSESSID=04cdc37cc18bb148fec8e276a6796eed',
|
||||
}
|
||||
|
||||
json_data = {
|
||||
'resume_id': 45144,
|
||||
}
|
||||
|
||||
response = requests.post('https://www.fnrc.vip/job/company/v1/resume/loadResume', cookies=cookies, headers=headers, json=json_data)
|
||||
|
||||
# Note: json_data will not be serialized by requests
|
||||
# exactly as it was in the original request.
|
||||
#data = '{"resume_id":45144}'
|
||||
#response = requests.post('https://www.fnrc.vip/job/company/v1/resume/loadResume', cookies=cookies, headers=headers, data=data)
|
||||
print(response.json())
|
7
web/fnrc_vip/www_fnrc_vip_cookies.json
Normal file
@ -0,0 +1,7 @@
|
||||
{
|
||||
"PHPSESSID": "7e50a60cd4544448634f6f2a77c2e17d",
|
||||
"auth-token": "eyJ0eXAiOiJKV1QiLCJhbGciOiJIUzI1NiJ9.eyJleHAiOjE3NTMwMDMyNTIsImp0aSI6IjAxNDU1NjA1LTlhZDUtNDFlNS1iYzk5LWQwZGUyZTZkMWZjOCIsIm5hbWUiOiIxODYxNzg3MjE4NSIsInVzZXJfaWQiOiIxYTJkODFjMTFkM2MzMmVhYmVlNWFkM2E3NGFmYWViNyIsInRlbmFudF90b2tlbiI6ImQzNWVjMmEzNjAxODM1NWE4MTg3ZTEyODI3MzE3ZGRjIn0.ZCRc25o9J4DVykGriAXpEG5sQuJBwTrd-FpUKjnaq6Q",
|
||||
"company_nonce": "",
|
||||
"company_sign": "",
|
||||
"cuid": ""
|
||||
}
|
4
web/fnrc_vip/www_qj050_com_cookies.json
Normal file
@ -0,0 +1,4 @@
|
||||
{
|
||||
"PHPSESSID": "04cdc37cc18bb148fec8e276a6796eed",
|
||||
"auth-token": "eyJ0eXAiOiJKV1QiLCJhbGciOiJIUzI1NiJ9.eyJleHAiOjE3NDczNzA1ODUsImp0aSI6IjBlZDI0NTM0LWE0NjEtNDkxNC1iNDU1LWQxZGEzYzQ5N2U0NiIsIm5hbWUiOiIxODYxNzg3MjE4NSIsInVzZXJfaWQiOiIxYTJkODFjMTFkM2MzMmVhYmVlNWFkM2E3NGFmYWViNyIsInRlbmFudF90b2tlbiI6ImQzNWVjMmEzNjAxODM1NWE4MTg3ZTEyODI3MzE3ZGRjIn0.HoaWksDiMxtkbBJ8jVPlKLKzd1UqNHo4KfecS2uVUaM"
|
||||
}
|
43
web/fnrc_vip/厨师.csv
Normal file
@ -0,0 +1,43 @@
|
||||
resume_id,姓名,电话
|
||||
1241,李子,13513045060
|
||||
13448,刘聪,15081530150
|
||||
11640,于雪,15832560082
|
||||
5773,李雪,namespace(phone='15031877698')
|
||||
778,王玉山,namespace(phone='13903151823')
|
||||
16506,何立彬,namespace(phone='13831501533')
|
||||
41328,孙朕堃,namespace(phone='13623331632')
|
||||
3896,董振军,namespace(phone='13663370955')
|
||||
45268,陈靖尧,namespace(phone='18812784911')
|
||||
45300,葛逸恒,namespace(phone='15210096813')
|
||||
23014,周德辉,namespace(phone='15102530538')
|
||||
3498,王冀昆,namespace(phone='18732530825')
|
||||
27397,徐桂新,namespace(phone='13933349702')
|
||||
44810,吕雷,namespace(phone='18254857385')
|
||||
4247,董女士,namespace(phone='18633448279')
|
||||
3092,孙建勇,namespace(phone='15795905186')
|
||||
16714,张立李,namespace(phone='18445528091')
|
||||
43180,张宏双,namespace(phone='15613803918')
|
||||
35849,么双路,namespace(phone='18942692728')
|
||||
14441,王,namespace(phone='13323256632')
|
||||
43669,徐,namespace(phone='18432756682')
|
||||
43910,卢立军,namespace(phone='13230893072')
|
||||
7889,江山,namespace(phone='19930029993')
|
||||
9874,吴俊亮,namespace(phone='13803308204')
|
||||
43962,梁远超,namespace(phone='13131538630')
|
||||
26282,徐涛,namespace(phone='15232544412')
|
||||
43872,董,namespace(phone='17736561036')
|
||||
42954,闫文红,namespace(phone='15102575879')
|
||||
42296,毕嘉乐,namespace(phone='15032511297')
|
||||
43613,张娟,namespace(phone='17367651586')
|
||||
43528,岳会强,namespace(phone='18622746933')
|
||||
15160,何启圣,namespace(phone='17746155775')
|
||||
20697,王一辉,namespace(phone='13031529634')
|
||||
42091,王梦楠,namespace(phone='13473605736')
|
||||
41277,高志良,namespace(phone='18903388242')
|
||||
20258,刘宝良,namespace(phone='15075511071')
|
||||
35638,宣冰,namespace(phone='15733793991')
|
||||
41216,王志浩,namespace(phone='13703155523')
|
||||
41525,杜双存,namespace(phone='13933319327')
|
||||
40197,杨朝,namespace(phone='13111432186')
|
||||
42071,文开,namespace(phone='15032533517')
|
||||
24304,刘金国,namespace(phone='13582871130')
|
|
35
web/grubhub/1.py
Normal file
@ -0,0 +1,35 @@
|
||||
import requests
|
||||
|
||||
headers = {
|
||||
'accept': 'application/json',
|
||||
'accept-language': 'zh-CN,zh;q=0.9',
|
||||
'authorization': 'Bearer a8e6801d-0060-4288-a1a8-7a8003996871',
|
||||
'cache-control': 'no-cache',
|
||||
'if-modified-since': '0',
|
||||
'origin': 'https://www.grubhub.com',
|
||||
'perimeter-x': 'eyJ1IjoiOTIwNWMzOTAtZmVlMi0xMWVmLTljZTEtYzU0ODE2NWJhZGM5IiwidiI6IjhmNWZmZmMxLWZlZTItMTFlZi04OGY4LTNlNmE5YjdjNmY1NSIsInQiOjE3NDE3NDM5NTEwNzcsImgiOiI2Nzk1NDMzM2IwZTJmODk1NjJjOTM2ZDdmYjczYzM0MmM5NjViYzhlY2Y0MDg1ZDljOTY5NDA0N2Y5ODFkOWFiIn0=',
|
||||
'pragma': 'no-cache',
|
||||
'priority': 'u=1, i',
|
||||
'referer': 'https://www.grubhub.com/',
|
||||
'sec-ch-ua': '"Chromium";v="134", "Not:A-Brand";v="24", "Google Chrome";v="134"',
|
||||
'sec-ch-ua-mobile': '?0',
|
||||
'sec-ch-ua-platform': '"Windows"',
|
||||
'sec-fetch-dest': 'empty',
|
||||
'sec-fetch-mode': 'cors',
|
||||
'sec-fetch-site': 'same-site',
|
||||
'user-agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/134.0.0.0 Safari/537.36',
|
||||
}
|
||||
|
||||
params = {
|
||||
'time': '1741743544922',
|
||||
'hideUnavailableMenuItems': 'true',
|
||||
'orderType': 'standard',
|
||||
'version': '4',
|
||||
}
|
||||
|
||||
response = requests.get(
|
||||
'https://api-gtm.grubhub.com/restaurants/10316176/menu_items/295249416128',
|
||||
params=params,
|
||||
headers=headers,
|
||||
)
|
||||
print(response.json())
|
475
web/grubhub/main.py
Normal file
@ -0,0 +1,475 @@
|
||||
import json
|
||||
import time
|
||||
import requests
|
||||
from openpyxl import load_workbook
|
||||
from openpyxl.styles import Font, PatternFill, Alignment, Border, Side
|
||||
|
||||
|
||||
|
||||
class Grubhub:
|
||||
def __init__(self):
|
||||
self.proxies = {"http": "http://127.0.0.1:7890", "https": "http://127.0.0.1:7890"}
|
||||
self.token = None
|
||||
self.get_menuid_lit = {}
|
||||
self.wb = load_workbook('Menu.xlsx')
|
||||
self.modify_first_row = self.modify_first_row()
|
||||
|
||||
def clear_sheet(self, sheet):
|
||||
ws = self.wb[sheet]
|
||||
for row in ws.iter_rows(min_row=2): # 首行不清空
|
||||
for cell in row:
|
||||
if cell.value is not None:
|
||||
cell.value = None
|
||||
self.wb.save('grubhubMenu.xlsx')
|
||||
|
||||
def clear_except_first_row(self, sheet):
|
||||
ws = self.wb[sheet]
|
||||
|
||||
# **解除所有合并单元格**
|
||||
merged_ranges = list(ws.merged_cells.ranges)
|
||||
for merged_range in merged_ranges:
|
||||
ws.unmerge_cells(str(merged_range))
|
||||
|
||||
# **获取最大行和最大列**
|
||||
max_row = ws.max_row
|
||||
max_col = ws.max_column
|
||||
|
||||
# **清除第二行及之后的所有数据和格式**
|
||||
if max_row > 1:
|
||||
for row in range(2, max_row + 1): # 从第二行开始清除
|
||||
for col in range(1, max_col + 1):
|
||||
cell = ws.cell(row=row, column=col)
|
||||
cell.value = None # 清除数据
|
||||
cell.fill = PatternFill(fill_type=None) # 清除背景色
|
||||
cell.font = Font() # 重置字体
|
||||
cell.alignment = Alignment() # 重置对齐方式
|
||||
cell.border = Border() # 清除边框
|
||||
|
||||
# **删除第二行及之后的所有行**
|
||||
ws.delete_rows(2, max_row - 1 if max_row > 2 else 1)
|
||||
|
||||
# **清除行级别格式**
|
||||
for row in range(2, max_row + 1):
|
||||
if row in ws.row_dimensions:
|
||||
ws.row_dimensions[row].fill = PatternFill(fill_type=None) # 清除行级背景色
|
||||
ws.row_dimensions[row].font = Font() # 清除行级字体
|
||||
ws.row_dimensions[row].alignment = Alignment() # 清除行级对齐方式
|
||||
|
||||
# **保存 Excel**
|
||||
self.wb.save('grubhubMenu.xlsx')
|
||||
|
||||
def modify_first_row(self):
|
||||
ws = self.wb["Modifier"]
|
||||
source_row = 1
|
||||
row_data = {}
|
||||
|
||||
# 提取第一行数据和格式
|
||||
for col in range(1, ws.max_column + 1):
|
||||
source_cell = ws.cell(row=source_row, column=col)
|
||||
|
||||
row_data[col] = {
|
||||
"value": source_cell.value, # 数据
|
||||
"font": Font(
|
||||
name=source_cell.font.name,
|
||||
size=source_cell.font.size,
|
||||
bold=source_cell.font.bold,
|
||||
italic=source_cell.font.italic,
|
||||
underline=source_cell.font.underline,
|
||||
color=source_cell.font.color.rgb if source_cell.font.color else None
|
||||
),
|
||||
"alignment": Alignment(
|
||||
horizontal=source_cell.alignment.horizontal,
|
||||
vertical=source_cell.alignment.vertical,
|
||||
wrap_text=source_cell.alignment.wrap_text
|
||||
),
|
||||
"fill": PatternFill(
|
||||
fill_type=source_cell.fill.patternType,
|
||||
fgColor=source_cell.fill.fgColor.rgb if source_cell.fill.fgColor else None,
|
||||
bgColor=source_cell.fill.bgColor.rgb if source_cell.fill.bgColor else None
|
||||
) if source_cell.fill and source_cell.fill.patternType else None,
|
||||
"border": Border(
|
||||
left=Side(style=source_cell.border.left.style, color=source_cell.border.left.color),
|
||||
right=Side(style=source_cell.border.right.style, color=source_cell.border.right.color),
|
||||
top=Side(style=source_cell.border.top.style, color=source_cell.border.top.color),
|
||||
bottom=Side(style=source_cell.border.bottom.style, color=source_cell.border.bottom.color),
|
||||
) if source_cell.border else None
|
||||
}
|
||||
row_data["row_height"] = ws.row_dimensions[source_row].height
|
||||
return row_data
|
||||
|
||||
def get_token(self):
|
||||
headers = {
|
||||
'accept': '*/*',
|
||||
'accept-language': 'zh-CN,zh;q=0.9',
|
||||
'authorization': 'Bearer',
|
||||
'cache-control': 'no-cache',
|
||||
'content-type': 'application/json;charset=UTF-8',
|
||||
'origin': 'https://www.grubhub.com',
|
||||
'pragma': 'no-cache',
|
||||
'priority': 'u=1, i',
|
||||
'referer': 'https://www.grubhub.com/',
|
||||
'sec-ch-ua': '"Chromium";v="134", "Not:A-Brand";v="24", "Google Chrome";v="134"',
|
||||
'sec-ch-ua-mobile': '?0',
|
||||
'sec-ch-ua-platform': '"Windows"',
|
||||
'sec-fetch-dest': 'empty',
|
||||
'sec-fetch-mode': 'cors',
|
||||
'sec-fetch-site': 'same-site',
|
||||
'user-agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/134.0.0.0 Safari/537.36',
|
||||
}
|
||||
|
||||
json_data = {
|
||||
'brand': 'GRUBHUB',
|
||||
'client_id': 'beta_UmWlpstzQSFmocLy3h1UieYcVST',
|
||||
'device_id': 1277616243,
|
||||
'scope': 'anonymous',
|
||||
}
|
||||
|
||||
proxies = {
|
||||
"http": "http://127.0.0.1:7890",
|
||||
"https": "http://127.0.0.1:7890"
|
||||
}
|
||||
response = requests.post('https://api-gtm.grubhub.com/auth', headers=headers, json=json_data, proxies=proxies)
|
||||
# print(response.json())
|
||||
return response.json().get("session_handle", {}).get('access_token')
|
||||
|
||||
|
||||
def get_menu_items(self):
|
||||
headers = {
|
||||
'accept': 'application/json',
|
||||
'accept-language': 'zh-CN,zh;q=0.9',
|
||||
'authorization': 'Bearer {}'.format(self.token),
|
||||
'cache-control': 'no-cache',
|
||||
'if-modified-since': '0',
|
||||
'origin': 'https://www.grubhub.com',
|
||||
# 'perimeter-x': 'eyJ1IjoiZTljMjg0OTAtZmU3Ni0xMWVmLTljZGQtM2JjYWU1OWQwYmIwIiwidiI6ImU3YWY1NDVkLWZlNzYtMTFlZi05MDc5LWQxNGEzZThjMWMyZSIsInQiOjE3NDE2OTc3MTMwNjAsImgiOiJjNWNkM2M5ZTU4NTMwNzE4YzQ4YzU1Y2E1NDM3ZWYwMjUwMmY0MGFjMjkyYTJkY2JlZWY5OGEwN2FjMTMyMzFmIn0=',
|
||||
'pragma': 'no-cache',
|
||||
'priority': 'u=1, i',
|
||||
'referer': 'https://www.grubhub.com/',
|
||||
'sec-ch-ua': '"Chromium";v="134", "Not:A-Brand";v="24", "Google Chrome";v="134"',
|
||||
'sec-ch-ua-mobile': '?0',
|
||||
'sec-ch-ua-platform': '"Windows"',
|
||||
'sec-fetch-dest': 'empty',
|
||||
'sec-fetch-mode': 'cors',
|
||||
'sec-fetch-site': 'same-site',
|
||||
'user-agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/134.0.0.0 Safari/537.36',
|
||||
}
|
||||
|
||||
params = {
|
||||
'orderType': 'standard',
|
||||
'version': '4',
|
||||
}
|
||||
|
||||
response = requests.get('https://api-gtm.grubhub.com/restaurants/10316176/menu_items/', params=params,
|
||||
headers=headers, proxies=self.proxies)
|
||||
|
||||
menu_json = json.dumps(response.json(), indent=4)
|
||||
with open('menu.json', 'w', encoding='utf-8') as f:
|
||||
f.write(menu_json)
|
||||
|
||||
def get_menuid(self):
|
||||
|
||||
# headers = {
|
||||
# 'accept': 'application/json',
|
||||
# 'accept-language': 'zh-CN,zh;q=0.9',
|
||||
# 'authorization': 'Bearer {}'.format(self.token),
|
||||
# 'cache-control': 'no-cache',
|
||||
# 'if-modified-since': '0',
|
||||
# 'origin': 'https://www.grubhub.com',
|
||||
# # 'perimeter-x': 'eyJ1IjoiZTljMjg0OTAtZmU3Ni0xMWVmLTljZGQtM2JjYWU1OWQwYmIwIiwidiI6ImU3YWY1NDVkLWZlNzYtMTFlZi05MDc5LWQxNGEzZThjMWMyZSIsInQiOjE1Mjk5NzEyMDAwMDAsImgiOiJhN2U0MjMwNWY4YTkwMGRlYTA3OTIwZGJmNjkzNjM3MDlhZTg2ZTNiYTFlN2VlMzhkODZkNDA5Njg1OTI2MTRjIn0=',
|
||||
# 'pragma': 'no-cache',
|
||||
# 'priority': 'u=1, i',
|
||||
# 'referer': 'https://www.grubhub.com/',
|
||||
# 'sec-ch-ua': '"Chromium";v="134", "Not:A-Brand";v="24", "Google Chrome";v="134"',
|
||||
# 'sec-ch-ua-mobile': '?0',
|
||||
# 'sec-ch-ua-platform': '"Windows"',
|
||||
# 'sec-fetch-dest': 'empty',
|
||||
# 'sec-fetch-mode': 'cors',
|
||||
# 'sec-fetch-site': 'same-site',
|
||||
# 'user-agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/134.0.0.0 Safari/537.36',
|
||||
# }
|
||||
#
|
||||
# params = {
|
||||
# 'orderType': 'STANDARD',
|
||||
# 'platform': 'WEB',
|
||||
# 'enhancedFeed': 'true',
|
||||
# }
|
||||
#
|
||||
# response = requests.get('https://api-gtm.grubhub.com/restaurant_gateway/info/volatile/10316176', params=params,
|
||||
# headers=headers, proxies=self.proxies)
|
||||
# menu_id_json = json.dumps(response.json(), indent=4)
|
||||
# with open('menu_id.json', 'w', encoding='utf-8') as f:
|
||||
# f.write(menu_id_json)
|
||||
self.clear_sheet("Categories")
|
||||
ws = self.wb["Categories"]
|
||||
with open('menu_id.json', 'r', encoding='utf-8') as f:
|
||||
menu_id = json.load(f)
|
||||
menu_info = menu_id.get("object", {}).get("data", {}).get("enhanced_feed", [])
|
||||
menu_id_dic = {}
|
||||
idx = 2
|
||||
for menu in menu_info:
|
||||
if menu.get("id") == "None":
|
||||
continue
|
||||
else:
|
||||
menu_id_dic[menu.get("name")] = menu.get("id")
|
||||
ws.cell(row=idx, column=1, value="Online All Day Menu")
|
||||
ws.cell(row=idx, column=2, value=menu.get("name"))
|
||||
ws.cell(row=idx, column=3, value="") # 翻译
|
||||
idx = idx + 1
|
||||
self.get_menuid_lit = menu_id_dic
|
||||
self.wb.save('grubhubMenu.xlsx')
|
||||
|
||||
def get_itme(self):
|
||||
self.clear_except_first_row("Item")
|
||||
self.clear_except_first_row("Modifier")
|
||||
index = 2
|
||||
s = requests.session()
|
||||
ws = self.wb["Item"]
|
||||
|
||||
data_info = []
|
||||
size_identifiers = ["(S)", "(L)", "(小)", "(大)", "(Half Gallon)", "(One Gallon)", "1.4pcs", "8pcs", "4pcs"]
|
||||
for i in self.get_menuid_lit.keys():
|
||||
print(i, self.get_menuid_lit[i])
|
||||
headers = {
|
||||
'accept': 'application/json',
|
||||
'accept-language': 'zh-CN,zh;q=0.9',
|
||||
'authorization': 'Bearer {}'.format(self.token),
|
||||
'cache-control': 'max-age=0',
|
||||
'if-modified-since': '0',
|
||||
'origin': 'https://www.grubhub.com',
|
||||
'perimeter-x': 'eyJ1IjoiY2M3YWQyNDAtZmVlMi0xMWVmLTljZTEtYzU0ODE2NWJhZGM5IiwidiI6IjhmNWZmZmMxLWZlZTItMTFlZi04OGY4LTNlNmE5YjdjNmY1NSIsInQiOjE3NDE3NDQzNTI1NjAsImgiOiI5YWFjOTBkZDBmZTc1N2EzOTJlYmMwM2ViMTNiZGU1YzhhMWY4MDljYzNmOTZlZjdhNDAwZWJlZGVmMDkxOTljIn0=',
|
||||
'pragma': 'no-cache',
|
||||
'priority': 'u=1, i',
|
||||
'referer': 'https://www.grubhub.com/',
|
||||
'sec-ch-ua': '"Chromium";v="134", "Not:A-Brand";v="24", "Google Chrome";v="134"',
|
||||
'sec-ch-ua-mobile': '?0',
|
||||
'sec-ch-ua-platform': '"Windows"',
|
||||
'sec-fetch-dest': 'empty',
|
||||
'sec-fetch-mode': 'cors',
|
||||
'sec-fetch-site': 'same-site',
|
||||
'user-agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/134.0.0.0 Safari/537.36',
|
||||
}
|
||||
|
||||
params = {
|
||||
'time': '1741743447761',
|
||||
'operationId': '8e6c2210-fee2-11ef-a211-03a08d96f471',
|
||||
'isFutureOrder': 'false',
|
||||
'restaurantStatus': 'ORDERABLE',
|
||||
'isNonRestaurantMerchant': 'false',
|
||||
'merchantTypes': '',
|
||||
'orderType': 'STANDARD',
|
||||
'agent': 'false',
|
||||
'task': 'CATEGORY',
|
||||
'platform': 'WEB',
|
||||
}
|
||||
response = s.get(
|
||||
'https://api-gtm.grubhub.com/restaurant_gateway/feed/10316176/{}'.format(self.get_menuid_lit[i]),
|
||||
params=params,
|
||||
headers=headers,
|
||||
proxies=self.proxies
|
||||
)
|
||||
menucontent = response.json()["object"]["data"]["content"]
|
||||
for menu in menucontent:
|
||||
menuid = menu.get("entity").get("item_id")
|
||||
item_name = menu.get("entity").get("item_name")
|
||||
price = menu.get("entity").get("item_price").get("pickup").get("value") / 100.0
|
||||
description = menu.get("entity").get("item_description")
|
||||
headers = {
|
||||
'accept': 'application/json',
|
||||
'accept-language': 'zh-CN,zh;q=0.9',
|
||||
'authorization': 'Bearer {}'.format(self.token),
|
||||
'cache-control': 'no-cache',
|
||||
'if-modified-since': '0',
|
||||
'origin': 'https://www.grubhub.com',
|
||||
'perimeter-x': 'eyJ1IjoiY2M3YWQyNDAtZmVlMi0xMWVmLTljZTEtYzU0ODE2NWJhZGM5IiwidiI6IjhmNWZmZmMxLWZlZTItMTFlZi04OGY4LTNlNmE5YjdjNmY1NSIsInQiOjE3NDE3NDQzNTI1NjAsImgiOiI5YWFjOTBkZDBmZTc1N2EzOTJlYmMwM2ViMTNiZGU1YzhhMWY4MDljYzNmOTZlZjdhNDAwZWJlZGVmMDkxOTljIn0=',
|
||||
'pragma': 'no-cache',
|
||||
'priority': 'u=1, i',
|
||||
'referer': 'https://www.grubhub.com/',
|
||||
'sec-ch-ua': '"Chromium";v="134", "N`ot:A-Brand";v="24", "Google Chrome";v="134"',
|
||||
'sec-ch-ua-mobile': '?0',
|
||||
'sec-ch-ua-platform': '"Windows"',
|
||||
'sec-fetch-dest': 'empty',
|
||||
'sec-fetch-mode': 'cors',
|
||||
'sec-fetch-site': 'same-site',
|
||||
'user-agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/134.0.0.0 Safari/537.36',
|
||||
}
|
||||
|
||||
params = {
|
||||
'time': '1741743544922',
|
||||
'hideUnavailableMenuItems': 'true',
|
||||
'orderType': 'standard',
|
||||
'version': '4',
|
||||
}
|
||||
|
||||
response = s.get(
|
||||
'https://api-gtm.grubhub.com/restaurants/10316176/menu_items/{}'.format(menuid),
|
||||
params=params,
|
||||
headers=headers,proxies=self.proxies
|
||||
)
|
||||
data = {"ism": 0, "sizes": [], "addons": [], "nameList": []} # **新增 nameList**
|
||||
has_size_option = False
|
||||
has_addon_option = False
|
||||
customizationsList = response.json().get('choice_category_list', [])
|
||||
for customizations in customizationsList:
|
||||
title = customizations.get('name', '')
|
||||
customization_entry = {"name": title, "list": [], "required": True if customizations.get('quantity_settings', {}).get('minimum_units',0) >= 1 else False, "min": customizations.get('quantity_settings', {}).get('minimum_units',0), "max": customizations.get('quantity_settings', {}).get('maximum_units',0)}
|
||||
|
||||
|
||||
for item in customizations.get('choice_option_list', []):
|
||||
option_title = item.get('description', '')
|
||||
price = item.get('price', {}).get('amount', 0) / 100
|
||||
|
||||
# **大小份归一化**
|
||||
if any(option_title.startswith(size) for size in size_identifiers):
|
||||
data['sizes'].append({"name": option_title, "price": price})
|
||||
has_size_option = True
|
||||
else:
|
||||
customization_entry["list"].append({"name": option_title, "price": price})
|
||||
has_addon_option = True
|
||||
|
||||
# **如果这个 `title` 是配菜分组,存入 `addons`**
|
||||
if customization_entry["list"]:
|
||||
data["addons"].append(customization_entry)
|
||||
|
||||
# **在 ism=3 时,生成 `nameList`**
|
||||
if has_size_option and has_addon_option:
|
||||
data['ism'] = 3 # **大小份 + 配菜**
|
||||
rename = data["addons"][0]["name"]
|
||||
data['nameList'] = [f"{size['name']}: {rename}" for size in data["sizes"]]
|
||||
elif has_size_option:
|
||||
data['ism'] = 1 # **只有大小份**
|
||||
elif has_addon_option:
|
||||
data['ism'] = 2 # **只有配菜**
|
||||
|
||||
ws.cell(row=index, column=1, value="Online All Day Menu")
|
||||
ws.cell(row=index, column=2, value=i)
|
||||
ws.cell(row=index, column=3, value=item_name)
|
||||
ws.cell(row=index, column=4, value="")
|
||||
ws.cell(row=index, column=5, value=price)
|
||||
ws.cell(row=index, column=7, value=description)
|
||||
if data['ism'] == 3 or data['ism'] == 1:
|
||||
value5 = ";".join(
|
||||
[f"{format(price if i['price'] == 0.0 else i['price'] + price, '.2f')}/{i['name']}" for i in
|
||||
data['sizes']])
|
||||
ws.cell(row=index, column=5, value=value5)
|
||||
if data['ism'] == 3:
|
||||
v2 = "\n".join([i for i in data['nameList']])
|
||||
ws.cell(row=index, column=6, value=v2)
|
||||
if data['ism'] == 2:
|
||||
v2 = "\n".join([i['name'] for i in data['addons']])
|
||||
ws.cell(row=index, column=6, value=v2)
|
||||
|
||||
if data['ism'] != 1:
|
||||
for addons in data['addons']:
|
||||
existing_addon = next((item for item in data_info if item["name"] == addons["name"]), None)
|
||||
|
||||
if existing_addon:
|
||||
existing_items = {item["name"] for item in existing_addon["list"]}
|
||||
new_items = [item for item in addons["list"] if item["name"] not in existing_items]
|
||||
existing_addon["list"].extend(new_items)
|
||||
else:
|
||||
data_info.append(addons)
|
||||
index += 1
|
||||
self.wb.save('grubhubMenu.xlsx')
|
||||
with open('menu_item.json', 'w', encoding='utf-8') as f:
|
||||
f.write(json.dumps(data_info, indent=4))
|
||||
|
||||
def write_xlsx(self):
|
||||
ws = self.wb["Modifier"]
|
||||
self.clear_except_first_row("Modifier")
|
||||
with open('menu_item.json', 'r', encoding='utf-8') as f:
|
||||
data = json.load(f)
|
||||
index = 2
|
||||
for i in data:
|
||||
# **确保从 index > 2 才复制格式**
|
||||
if index > 2:
|
||||
ws.row_dimensions[index].height = self.modify_first_row["row_height"]
|
||||
|
||||
for col, cell_data in self.modify_first_row.items():
|
||||
if col == "row_height":
|
||||
continue
|
||||
|
||||
target_cell = ws.cell(row=index, column=col)
|
||||
|
||||
# **正确赋值**
|
||||
target_cell.value = cell_data["value"]
|
||||
|
||||
# **复制格式**
|
||||
if cell_data["font"]:
|
||||
target_cell.font = Font(
|
||||
name=cell_data["font"].name,
|
||||
size=cell_data["font"].size,
|
||||
bold=cell_data["font"].bold,
|
||||
italic=cell_data["font"].italic,
|
||||
underline=cell_data["font"].underline,
|
||||
color=cell_data["font"].color
|
||||
)
|
||||
if cell_data["alignment"]:
|
||||
target_cell.alignment = Alignment(
|
||||
horizontal=cell_data["alignment"].horizontal,
|
||||
vertical=cell_data["alignment"].vertical,
|
||||
wrap_text=cell_data["alignment"].wrap_text
|
||||
)
|
||||
if cell_data["fill"] and cell_data["fill"].patternType:
|
||||
target_cell.fill = PatternFill(
|
||||
fill_type=cell_data["fill"].patternType,
|
||||
fgColor=cell_data["fill"].fgColor.rgb,
|
||||
bgColor=cell_data["fill"].bgColor.rgb
|
||||
)
|
||||
if cell_data["border"]:
|
||||
target_cell.border = Border(
|
||||
left=Side(style=cell_data["border"].left.style, color=cell_data["border"].left.color),
|
||||
right=Side(style=cell_data["border"].right.style,
|
||||
color=cell_data["border"].right.color),
|
||||
top=Side(style=cell_data["border"].top.style, color=cell_data["border"].top.color),
|
||||
bottom=Side(style=cell_data["border"].bottom.style,
|
||||
color=cell_data["border"].bottom.color),
|
||||
)
|
||||
index += 1
|
||||
|
||||
# **填充 JSON 数据**
|
||||
ws.cell(row=index, column=1, value=i['name'])
|
||||
ws.cell(row=index, column=2, value="")
|
||||
ws.cell(row=index, column=7, value="Required" if i['required'] else "Not Required")
|
||||
ws.cell(row=index, column=8, value=i['min'])
|
||||
ws.cell(row=index, column=9, value=i['max'])
|
||||
ws.cell(row=index, column=10, value="NO")
|
||||
aindex = index
|
||||
for item in i['list']:
|
||||
ws.cell(row=index, column=3, value=item['name'])
|
||||
ws.cell(row=index, column=6, value=item['price'])
|
||||
|
||||
index += 1
|
||||
index += 1
|
||||
bindex = index
|
||||
if bindex - aindex > 1:
|
||||
ws.merge_cells(start_row=aindex, start_column=1, end_row=bindex - 2, end_column=1)
|
||||
ws.cell(row=aindex, column=1).alignment = Alignment(horizontal="center", vertical="center")
|
||||
|
||||
ws.merge_cells(start_row=aindex, start_column=2, end_row=bindex - 2, end_column=2)
|
||||
ws.cell(row=aindex, column=2).alignment = Alignment(horizontal="center", vertical="center")
|
||||
|
||||
ws.merge_cells(start_row=aindex, start_column=7, end_row=bindex - 2, end_column=7)
|
||||
ws.cell(row=aindex, column=7).alignment = Alignment(horizontal="center", vertical="center")
|
||||
ws.merge_cells(start_row=aindex, start_column=8, end_row=bindex - 2, end_column=8)
|
||||
ws.cell(row=aindex, column=8).alignment = Alignment(horizontal="center", vertical="center")
|
||||
ws.merge_cells(start_row=aindex, start_column=9, end_row=bindex - 2, end_column=9)
|
||||
ws.cell(row=aindex, column=9).alignment = Alignment(horizontal="center", vertical="center")
|
||||
ws.merge_cells(start_row=aindex, start_column=10, end_row=bindex - 2, end_column=10)
|
||||
ws.cell(row=aindex, column=10).alignment = Alignment(horizontal="center", vertical="center")
|
||||
|
||||
|
||||
|
||||
self.wb.save('grubhubMenu.xlsx')
|
||||
|
||||
if __name__ == '__main__':
|
||||
gh = Grubhub()
|
||||
# gh.token = gh.get_token()
|
||||
# gh.get_menuid()
|
||||
# gh.get_itme()
|
||||
gh.write_xlsx()
|
||||
# print(gh.token)
|
||||
# gh.get_menuid()
|
||||
# gh.get_itme()
|
||||
# gh.get_jsondata(gh.token)
|
||||
# gh.get_menu_items()
|
||||
# gh.get_request_id()
|
834
web/grubhub/menu.json
Normal file
@ -0,0 +1,834 @@
|
||||
{
|
||||
"restaurant_data": {
|
||||
"restaurant_availability": {
|
||||
"restaurant_id": "10316176",
|
||||
"delivery_fee": {
|
||||
"amount": 149,
|
||||
"currency": "USD",
|
||||
"styled_text": null
|
||||
},
|
||||
"delivery_fee_without_discounts": {
|
||||
"amount": 149,
|
||||
"currency": "USD",
|
||||
"styled_text": null
|
||||
},
|
||||
"delivery_fee_estimate": {
|
||||
"amount": 149,
|
||||
"currency": "USD",
|
||||
"styled_text": null
|
||||
},
|
||||
"delivery_fee_as_percentage": 0,
|
||||
"delivery_fee_taxable": false,
|
||||
"delivery_fee_allocation_info": {
|
||||
"attribution": "GRUBHUB"
|
||||
},
|
||||
"order_minimum": {
|
||||
"amount": 0,
|
||||
"currency": "USD",
|
||||
"styled_text": null
|
||||
},
|
||||
"sales_tax": 6.5,
|
||||
"delivery_offered_to_diner_location": false,
|
||||
"open": false,
|
||||
"open_delivery": false,
|
||||
"open_pickup": false,
|
||||
"available_for_delivery": true,
|
||||
"available_for_pickup": true,
|
||||
"delivery_estimate": 30,
|
||||
"delivery_estimate_range": "{\"maximum\":40,\"minimum\":30}",
|
||||
"delivery_estimate_range_v2": {
|
||||
"maximum": 40,
|
||||
"minimum": 30
|
||||
},
|
||||
"pickup_estimate": 20,
|
||||
"pickup_estimate_range": "{\"maximum\":30,\"minimum\":20}",
|
||||
"pickup_estimate_range_v2": {
|
||||
"maximum": 30,
|
||||
"minimum": 20
|
||||
},
|
||||
"cash_accepted": false,
|
||||
"credit_card_accepted": true,
|
||||
"paypal_accepted": false,
|
||||
"time_zone_id": "America/New_York",
|
||||
"time_zone_offset": -14400000,
|
||||
"tracker": true,
|
||||
"delivery_cutoff": 15,
|
||||
"pickup_cutoff": 30,
|
||||
"delivery_type": "UNKNOWN",
|
||||
"available_hours": [
|
||||
{
|
||||
"day_of_week": 1,
|
||||
"time_ranges": [
|
||||
"17:00-03:59"
|
||||
]
|
||||
},
|
||||
{
|
||||
"day_of_week": 2,
|
||||
"time_ranges": [
|
||||
"17:00-03:59"
|
||||
]
|
||||
},
|
||||
{
|
||||
"day_of_week": 3,
|
||||
"time_ranges": [
|
||||
"17:00-03:59"
|
||||
]
|
||||
},
|
||||
{
|
||||
"day_of_week": 4,
|
||||
"time_ranges": [
|
||||
"17:00-03:59"
|
||||
]
|
||||
},
|
||||
{
|
||||
"day_of_week": 5,
|
||||
"time_ranges": [
|
||||
"17:00-03:59"
|
||||
]
|
||||
},
|
||||
{
|
||||
"day_of_week": 6,
|
||||
"time_ranges": [
|
||||
"17:00-03:59"
|
||||
]
|
||||
},
|
||||
{
|
||||
"day_of_week": 7,
|
||||
"time_ranges": [
|
||||
"17:00-03:59"
|
||||
]
|
||||
}
|
||||
],
|
||||
"available_hours_pickup": [
|
||||
{
|
||||
"day_of_week": 1,
|
||||
"time_ranges": [
|
||||
"17:00-03:59"
|
||||
]
|
||||
},
|
||||
{
|
||||
"day_of_week": 2,
|
||||
"time_ranges": [
|
||||
"17:00-03:59"
|
||||
]
|
||||
},
|
||||
{
|
||||
"day_of_week": 3,
|
||||
"time_ranges": [
|
||||
"17:00-03:59"
|
||||
]
|
||||
},
|
||||
{
|
||||
"day_of_week": 4,
|
||||
"time_ranges": [
|
||||
"17:00-03:59"
|
||||
]
|
||||
},
|
||||
{
|
||||
"day_of_week": 5,
|
||||
"time_ranges": [
|
||||
"17:00-03:59"
|
||||
]
|
||||
},
|
||||
{
|
||||
"day_of_week": 6,
|
||||
"time_ranges": [
|
||||
"17:00-03:59"
|
||||
]
|
||||
},
|
||||
{
|
||||
"day_of_week": 7,
|
||||
"time_ranges": [
|
||||
"17:00-03:59"
|
||||
]
|
||||
}
|
||||
],
|
||||
"future_order_available_hours_delivery": [
|
||||
{
|
||||
"day_of_week": 1,
|
||||
"time_ranges": [
|
||||
"17:30-04:29"
|
||||
]
|
||||
},
|
||||
{
|
||||
"day_of_week": 2,
|
||||
"time_ranges": [
|
||||
"17:30-04:29"
|
||||
]
|
||||
},
|
||||
{
|
||||
"day_of_week": 3,
|
||||
"time_ranges": [
|
||||
"17:30-04:29"
|
||||
]
|
||||
},
|
||||
{
|
||||
"day_of_week": 4,
|
||||
"time_ranges": [
|
||||
"17:30-04:29"
|
||||
]
|
||||
},
|
||||
{
|
||||
"day_of_week": 5,
|
||||
"time_ranges": [
|
||||
"17:30-04:29"
|
||||
]
|
||||
},
|
||||
{
|
||||
"day_of_week": 6,
|
||||
"time_ranges": [
|
||||
"17:30-04:29"
|
||||
]
|
||||
},
|
||||
{
|
||||
"day_of_week": 7,
|
||||
"time_ranges": [
|
||||
"17:30-04:29"
|
||||
]
|
||||
}
|
||||
],
|
||||
"future_order_available_hours_pickup": [
|
||||
{
|
||||
"day_of_week": 1,
|
||||
"time_ranges": [
|
||||
"17:20-04:19"
|
||||
]
|
||||
},
|
||||
{
|
||||
"day_of_week": 2,
|
||||
"time_ranges": [
|
||||
"17:20-04:19"
|
||||
]
|
||||
},
|
||||
{
|
||||
"day_of_week": 3,
|
||||
"time_ranges": [
|
||||
"17:20-04:19"
|
||||
]
|
||||
},
|
||||
{
|
||||
"day_of_week": 4,
|
||||
"time_ranges": [
|
||||
"17:20-04:19"
|
||||
]
|
||||
},
|
||||
{
|
||||
"day_of_week": 5,
|
||||
"time_ranges": [
|
||||
"17:20-04:19"
|
||||
]
|
||||
},
|
||||
{
|
||||
"day_of_week": 6,
|
||||
"time_ranges": [
|
||||
"17:20-04:19"
|
||||
]
|
||||
},
|
||||
{
|
||||
"day_of_week": 7,
|
||||
"time_ranges": [
|
||||
"17:20-04:19"
|
||||
]
|
||||
}
|
||||
],
|
||||
"min_delivery_fee": {
|
||||
"amount": 0,
|
||||
"currency": "USD",
|
||||
"styled_text": null
|
||||
},
|
||||
"catering_info": {
|
||||
"catering_sibling_id": null,
|
||||
"order_thresholds": {
|
||||
"currency": "USD",
|
||||
"order_price_bucket_list": [
|
||||
{
|
||||
"amount": 250,
|
||||
"buffer_minutes": 30
|
||||
}
|
||||
]
|
||||
},
|
||||
"catering": false
|
||||
},
|
||||
"next_open_closed_info": {
|
||||
"next_order_send_time_delivery": "2025-03-11T17:00:00.000Z",
|
||||
"next_order_send_time_pickup": "2025-03-11T17:00:00.000Z",
|
||||
"next_delivery_time": "2025-03-11T17:30:00.000Z",
|
||||
"next_pickup_time": "2025-03-11T17:20:00.000Z",
|
||||
"order_tiers": [
|
||||
{
|
||||
"threshold_type": "ORDER_AMOUNT_CENTS",
|
||||
"threshold": 15000,
|
||||
"additional_prep_time_minutes": 20,
|
||||
"next_delivery_time": "2025-03-11T17:50:00.000Z",
|
||||
"next_order_send_time_delivery": "2025-03-11T17:00:00.000Z",
|
||||
"next_pickup_time": "2025-03-11T17:40:00.000Z",
|
||||
"next_order_send_time_pickup": "2025-03-11T17:00:00.000Z"
|
||||
}
|
||||
]
|
||||
},
|
||||
"allowable_order_types": [
|
||||
"standard"
|
||||
],
|
||||
"service_fee": {
|
||||
"name": "Service fee",
|
||||
"description": "A service fee of 15.0% for delivery orders will be charged with Orlando China Ocean.",
|
||||
"delivery_fee": {
|
||||
"fee_type": "PERCENT",
|
||||
"percent_value": 15,
|
||||
"maximum_amount_for_percent": {
|
||||
"amount": 1400,
|
||||
"currency": "USD",
|
||||
"styled_text": null
|
||||
}
|
||||
},
|
||||
"delivery_service_fee_allocation_info": {
|
||||
"attribution": "GRUBHUB"
|
||||
},
|
||||
"fee_allocation_info": {
|
||||
"fee_allocation_type": "GRUBHUB"
|
||||
}
|
||||
},
|
||||
"small_order_fee": {
|
||||
"name": "Small order delivery fee",
|
||||
"description": "A small order delivery fee applies to your Orlando China Ocean subtotal of $8.69 or less.",
|
||||
"minimum_order_value_cents": 1000,
|
||||
"fee": {
|
||||
"fee_type": "FLAT",
|
||||
"flat_cents_value": {
|
||||
"amount": 200,
|
||||
"currency": "USD",
|
||||
"styled_text": null
|
||||
}
|
||||
},
|
||||
"fee_allocation_info": {
|
||||
"attribution": "GRUBHUB"
|
||||
}
|
||||
},
|
||||
"fee_display_setting": {
|
||||
"search_display_setting": null,
|
||||
"menu_display_setting": {
|
||||
"id": "UniversalControl",
|
||||
"subtitle_fee_styled_text": {
|
||||
"text": "$1.49 delivery",
|
||||
"text_style": "DEFAULT"
|
||||
},
|
||||
"disclaimer": null
|
||||
}
|
||||
},
|
||||
"grouped_overrides_availability": {
|
||||
"delivery_blacked_out": false,
|
||||
"pickup_blacked_out": false,
|
||||
"delivery_soft_blacked_out": false,
|
||||
"pickup_soft_blacked_out": false,
|
||||
"delivery_estimate_in_minutes": null,
|
||||
"order_minimum_increase_in_cents": 0,
|
||||
"inundated": false
|
||||
},
|
||||
"contact_free_required": false,
|
||||
"delivery_fees": [
|
||||
{
|
||||
"fee_name": "DELIVERY",
|
||||
"type": "DELIVERY",
|
||||
"operand": "FLAT",
|
||||
"amount": 149,
|
||||
"non_adjusted_amount": null,
|
||||
"adjustment": null,
|
||||
"threshold": null,
|
||||
"potential_adjustment": null,
|
||||
"non_adjusted_values": null,
|
||||
"potential_adjustment_values": null,
|
||||
"applicable_hours": null,
|
||||
"applicable_days_of_the_week": null,
|
||||
"attribution": "GRUBHUB",
|
||||
"fee_adjustment_category": "NONE",
|
||||
"has_campus_subscription_specific_requirements": false,
|
||||
"amount_benefits": null,
|
||||
"non_adjusted_amount_benefits": null,
|
||||
"potential_adjustment_values_benefits": null,
|
||||
"non_adjusted_values_benefits": null,
|
||||
"potential_adjustment_benefits": null
|
||||
},
|
||||
{
|
||||
"fee_name": "DRIVER_BENEFITS_FEE",
|
||||
"type": "DRIVER_BENEFITS_FEE",
|
||||
"operand": "FLAT",
|
||||
"amount": 0,
|
||||
"non_adjusted_amount": null,
|
||||
"adjustment": null,
|
||||
"threshold": null,
|
||||
"potential_adjustment": null,
|
||||
"non_adjusted_values": null,
|
||||
"potential_adjustment_values": null,
|
||||
"applicable_hours": null,
|
||||
"applicable_days_of_the_week": null,
|
||||
"attribution": "GRUBHUB",
|
||||
"fee_adjustment_category": "NONE",
|
||||
"has_campus_subscription_specific_requirements": false,
|
||||
"amount_benefits": null,
|
||||
"non_adjusted_amount_benefits": null,
|
||||
"potential_adjustment_values_benefits": null,
|
||||
"non_adjusted_values_benefits": null,
|
||||
"potential_adjustment_benefits": null
|
||||
},
|
||||
{
|
||||
"fee_name": "PRIORITY_DELIVERY",
|
||||
"type": "PRIORITY_DELIVERY",
|
||||
"operand": "FLAT",
|
||||
"amount": 249,
|
||||
"non_adjusted_amount": null,
|
||||
"adjustment": null,
|
||||
"threshold": {},
|
||||
"potential_adjustment": null,
|
||||
"non_adjusted_values": null,
|
||||
"potential_adjustment_values": null,
|
||||
"applicable_hours": null,
|
||||
"applicable_days_of_the_week": null,
|
||||
"attribution": "GRUBHUB",
|
||||
"fee_adjustment_category": "NONE",
|
||||
"has_campus_subscription_specific_requirements": false,
|
||||
"amount_benefits": null,
|
||||
"non_adjusted_amount_benefits": null,
|
||||
"potential_adjustment_values_benefits": null,
|
||||
"non_adjusted_values_benefits": null,
|
||||
"potential_adjustment_benefits": null
|
||||
},
|
||||
{
|
||||
"fee_name": "SERVICE",
|
||||
"type": "SERVICE",
|
||||
"operand": "PERCENT",
|
||||
"amount": 1500,
|
||||
"non_adjusted_amount": null,
|
||||
"adjustment": null,
|
||||
"threshold": {
|
||||
"maximum_amount_for_percentage": 1400
|
||||
},
|
||||
"potential_adjustment": null,
|
||||
"non_adjusted_values": null,
|
||||
"potential_adjustment_values": null,
|
||||
"applicable_hours": null,
|
||||
"applicable_days_of_the_week": null,
|
||||
"attribution": "GRUBHUB",
|
||||
"fee_adjustment_category": "NONE",
|
||||
"has_campus_subscription_specific_requirements": false,
|
||||
"amount_benefits": null,
|
||||
"non_adjusted_amount_benefits": null,
|
||||
"potential_adjustment_values_benefits": null,
|
||||
"non_adjusted_values_benefits": null,
|
||||
"potential_adjustment_benefits": null
|
||||
},
|
||||
{
|
||||
"fee_name": "SMALL",
|
||||
"type": "SMALL",
|
||||
"operand": "FLAT",
|
||||
"amount": 200,
|
||||
"non_adjusted_amount": null,
|
||||
"adjustment": null,
|
||||
"threshold": {
|
||||
"threshold": 1000
|
||||
},
|
||||
"potential_adjustment": null,
|
||||
"non_adjusted_values": null,
|
||||
"potential_adjustment_values": null,
|
||||
"applicable_hours": null,
|
||||
"applicable_days_of_the_week": null,
|
||||
"attribution": "GRUBHUB",
|
||||
"fee_adjustment_category": "NONE",
|
||||
"has_campus_subscription_specific_requirements": false,
|
||||
"amount_benefits": null,
|
||||
"non_adjusted_amount_benefits": null,
|
||||
"potential_adjustment_values_benefits": null,
|
||||
"non_adjusted_values_benefits": null,
|
||||
"potential_adjustment_benefits": null
|
||||
}
|
||||
],
|
||||
"pickup_fees": [],
|
||||
"cutoff_for_delivery": true,
|
||||
"cutoff_for_pickup": true,
|
||||
"white_in": false,
|
||||
"inundated": false,
|
||||
"blacked_out": false
|
||||
},
|
||||
"restaurant": {
|
||||
"id": "10316176",
|
||||
"restaurant_hash": "30ceb46e88ede69efa34081e6b076e8b",
|
||||
"merchant_uuid": "GLfVcNN1Ee-VcetVHBgGHQ",
|
||||
"package_state_type_id": 2,
|
||||
"available_restaurant_features": [],
|
||||
"disclaimers": [],
|
||||
"duplicate": false,
|
||||
"name": "Orlando China Ocean",
|
||||
"address": {
|
||||
"locality": "Orlando",
|
||||
"region": "FL",
|
||||
"postal_code": "32822-2710",
|
||||
"zip": "32822",
|
||||
"street_address": "2508 S Semoran Blvd",
|
||||
"country": "USA",
|
||||
"time_zone_id": "America/New_York",
|
||||
"phone_number_for_delivery": null,
|
||||
"latitude": null,
|
||||
"longitude": null
|
||||
},
|
||||
"cross_street_required": false,
|
||||
"premium": true,
|
||||
"online_ordering_available": true,
|
||||
"pickup_offered": true,
|
||||
"phone_ordering_available": true,
|
||||
"phone_number_for_delivery": "4078237799",
|
||||
"suppress_diner_phone_contact": false,
|
||||
"latitude": "28.51548767",
|
||||
"longitude": "-81.31121064",
|
||||
"city_id": 283,
|
||||
"managed_delivery": true,
|
||||
"delivery_mode": "FULL_GHD",
|
||||
"minimum_tip_percent": 10.0,
|
||||
"default_tip_percent": 20.0,
|
||||
"minimum_tip": {
|
||||
"amount": 200,
|
||||
"currency": "USD",
|
||||
"styled_text": null
|
||||
},
|
||||
"logo": "https://media-cdn.grubhub.com/image/upload/v1737040004/iztzardvszedgbisytr1.png",
|
||||
"group_ids": [
|
||||
"pauseByPartner:fd317fe0-2215-11ec-8fa6-f705ecbb3af7-partner-grp:GENERIC_GROUP_IDS",
|
||||
"pauseByPartner:3aefd0b0-2217-11ec-8fe1-8bb6431a0c16-partner-grp:GENERIC_GROUP_IDS",
|
||||
"1898fe81-0b40-4c7d-bf88-76b74f92a896:polygon:GENERIC_GROUP_IDS",
|
||||
"pauseByPartner:63efd650-2216-11ec-8715-1b87ad3ba198-partner-grp:GENERIC_GROUP_IDS"
|
||||
],
|
||||
"has_coupons": false,
|
||||
"is_new": true,
|
||||
"routing_number": "4078237799",
|
||||
"cuisines": [
|
||||
"Chicken",
|
||||
"Chinese",
|
||||
"Noodles",
|
||||
"Seafood",
|
||||
"Soup"
|
||||
],
|
||||
"restaurant_coupons": [],
|
||||
"just_in_time_orders": false,
|
||||
"restaurant_cdn_image_url": "http://s1.seamless.com/-/ri/gh/10316176",
|
||||
"media_image": {
|
||||
"base_url": "https://media-cdn.grubhub.com/image/upload/",
|
||||
"public_id": "iztzardvszedgbisytr1",
|
||||
"format": "png",
|
||||
"tag": "logo",
|
||||
"tags": [
|
||||
"primary_logo"
|
||||
],
|
||||
"image_source": null,
|
||||
"uploader_details": null,
|
||||
"image_description": null,
|
||||
"image_title": null,
|
||||
"scale_mode": "FILL"
|
||||
},
|
||||
"rating": {
|
||||
"rating_count": "1",
|
||||
"rating_value": "4"
|
||||
},
|
||||
"price_rating": "2",
|
||||
"rating_bayesian_half_point": {
|
||||
"rating_count": "1",
|
||||
"rating_value": "3.5"
|
||||
},
|
||||
"rating_bayesian10_point": {
|
||||
"rating_count": "1",
|
||||
"rating_value": "3.5"
|
||||
},
|
||||
"restaurant_managed_hours_list_v2": [
|
||||
{
|
||||
"day": "Monday",
|
||||
"hours": "6:00 AM - 11:59 PM",
|
||||
"start_time": "06:00:00.000",
|
||||
"end_time": "23:58:59.999"
|
||||
},
|
||||
{
|
||||
"day": "Tuesday",
|
||||
"hours": "6:00 AM - 11:59 PM",
|
||||
"start_time": "06:00:00.000",
|
||||
"end_time": "23:58:59.999"
|
||||
},
|
||||
{
|
||||
"day": "Wednesday",
|
||||
"hours": "6:00 AM - 11:59 PM",
|
||||
"start_time": "06:00:00.000",
|
||||
"end_time": "23:58:59.999"
|
||||
},
|
||||
{
|
||||
"day": "Thursday",
|
||||
"hours": "6:00 AM - 11:59 PM",
|
||||
"start_time": "06:00:00.000",
|
||||
"end_time": "23:58:59.999"
|
||||
},
|
||||
{
|
||||
"day": "Friday",
|
||||
"hours": "6:00 AM - 11:59 PM",
|
||||
"start_time": "06:00:00.000",
|
||||
"end_time": "23:58:59.999"
|
||||
},
|
||||
{
|
||||
"day": "Saturday",
|
||||
"hours": "6:00 AM - 11:59 PM",
|
||||
"start_time": "06:00:00.000",
|
||||
"end_time": "23:58:59.999"
|
||||
},
|
||||
{
|
||||
"day": "Sunday",
|
||||
"hours": "6:00 AM - 11:59 PM",
|
||||
"start_time": "06:00:00.000",
|
||||
"end_time": "23:58:59.999"
|
||||
}
|
||||
],
|
||||
"restaurant_intersected_managed_hours_list": [
|
||||
{
|
||||
"day": "Monday",
|
||||
"hours": "1:00 PM - 11:59 PM",
|
||||
"start_time": "13:00:00.000",
|
||||
"end_time": "23:58:59.999"
|
||||
},
|
||||
{
|
||||
"day": "Tuesday",
|
||||
"hours": "1:00 PM - 11:59 PM",
|
||||
"start_time": "13:00:00.000",
|
||||
"end_time": "23:58:59.999"
|
||||
},
|
||||
{
|
||||
"day": "Wednesday",
|
||||
"hours": "1:00 PM - 11:59 PM",
|
||||
"start_time": "13:00:00.000",
|
||||
"end_time": "23:58:59.999"
|
||||
},
|
||||
{
|
||||
"day": "Thursday",
|
||||
"hours": "1:00 PM - 11:59 PM",
|
||||
"start_time": "13:00:00.000",
|
||||
"end_time": "23:58:59.999"
|
||||
},
|
||||
{
|
||||
"day": "Friday",
|
||||
"hours": "1:00 PM - 11:59 PM",
|
||||
"start_time": "13:00:00.000",
|
||||
"end_time": "23:58:59.999"
|
||||
},
|
||||
{
|
||||
"day": "Saturday",
|
||||
"hours": "1:00 PM - 11:59 PM",
|
||||
"start_time": "13:00:00.000",
|
||||
"end_time": "23:58:59.999"
|
||||
},
|
||||
{
|
||||
"day": "Sunday",
|
||||
"hours": "1:00 PM - 11:59 PM",
|
||||
"start_time": "13:00:00.000",
|
||||
"end_time": "23:58:59.999"
|
||||
}
|
||||
],
|
||||
"managed_delivery_settings": {
|
||||
"order_minimum": {
|
||||
"amount": 0,
|
||||
"currency": "USD",
|
||||
"styled_text": null
|
||||
}
|
||||
},
|
||||
"order_fulfillment_methods": {
|
||||
"SELF_DELIVERY": {
|
||||
"allowable_order_types": null,
|
||||
"fulfillment_tags": []
|
||||
},
|
||||
"PICKUP": {
|
||||
"allowable_order_types": [
|
||||
"standard"
|
||||
],
|
||||
"fulfillment_tags": []
|
||||
},
|
||||
"ORDER_BASED_ON_DEMAND_DELIVERY": {
|
||||
"allowable_order_types": null,
|
||||
"fulfillment_tags": []
|
||||
},
|
||||
"MANAGED_DELIVERY": {
|
||||
"allowable_order_types": [
|
||||
"standard"
|
||||
],
|
||||
"fulfillment_tags": []
|
||||
}
|
||||
},
|
||||
"alcohol_enabled": false,
|
||||
"alcohol_market_status": "DISABLED",
|
||||
"faceted_rating_data": {
|
||||
"review_data": {
|
||||
"total_count": 0,
|
||||
"valid_count": 0,
|
||||
"top_review": {
|
||||
"rating_value": null,
|
||||
"review_text": null,
|
||||
"reviewer_display_name": null,
|
||||
"time_created": null
|
||||
}
|
||||
},
|
||||
"faceted_rating_list": [],
|
||||
"faceted_rating_too_few": true
|
||||
},
|
||||
"additional_media_images": {
|
||||
"LOGO_HOME_PAGE": {
|
||||
"base_url": "https://media-cdn.grubhub.com/image/upload/",
|
||||
"public_id": "iztzardvszedgbisytr1",
|
||||
"format": "png",
|
||||
"tag": "logo",
|
||||
"tags": [
|
||||
"primary_logo"
|
||||
],
|
||||
"image_source": null,
|
||||
"uploader_details": null,
|
||||
"image_description": null,
|
||||
"image_title": null,
|
||||
"scale_mode": "FILL"
|
||||
},
|
||||
"MENU_PAGE": {
|
||||
"base_url": "https://media-cdn.grubhub.com/image/upload/",
|
||||
"public_id": "iztzardvszedgbisytr1",
|
||||
"format": "png",
|
||||
"tag": "logo",
|
||||
"tags": [
|
||||
"primary_logo"
|
||||
],
|
||||
"image_source": null,
|
||||
"uploader_details": null,
|
||||
"image_description": null,
|
||||
"image_title": null,
|
||||
"scale_mode": "FILL"
|
||||
},
|
||||
"SEARCH_HOME_PAGE": {
|
||||
"base_url": "https://media-cdn.grubhub.com/image/upload/",
|
||||
"public_id": "u5ffhbgs258fspctbfgh",
|
||||
"format": "jpg",
|
||||
"tag": "search",
|
||||
"tags": [
|
||||
"primary_search"
|
||||
],
|
||||
"image_source": null,
|
||||
"uploader_details": null,
|
||||
"image_description": null,
|
||||
"image_title": null,
|
||||
"scale_mode": "FILL"
|
||||
},
|
||||
"SEARCH_RESULTS_PAGE": {
|
||||
"base_url": "https://media-cdn.grubhub.com/image/upload/",
|
||||
"public_id": "u5ffhbgs258fspctbfgh",
|
||||
"format": "jpg",
|
||||
"tag": "search",
|
||||
"tags": [
|
||||
"primary_search"
|
||||
],
|
||||
"image_source": null,
|
||||
"uploader_details": null,
|
||||
"image_description": null,
|
||||
"image_title": null,
|
||||
"scale_mode": "FILL"
|
||||
},
|
||||
"HEADER_BACKGROUND": {
|
||||
"base_url": "https://media-cdn.grubhub.com/image/upload/",
|
||||
"public_id": "lhd8t0tiktjp1bif9tp8",
|
||||
"format": "jpg",
|
||||
"tag": "header_background",
|
||||
"tags": [
|
||||
"primary_header"
|
||||
],
|
||||
"image_source": null,
|
||||
"uploader_details": null,
|
||||
"image_description": null,
|
||||
"image_title": null,
|
||||
"scale_mode": "FILL"
|
||||
}
|
||||
},
|
||||
"tag_list": [],
|
||||
"available_promo_codes": [],
|
||||
"available_offers": [],
|
||||
"available_progress_campaigns": [],
|
||||
"merchant_url_path": "orlando-china-ocean-2508-s-semoran-blvd-orlando",
|
||||
"order_type_settings": {
|
||||
"service_fee": {
|
||||
"name": "Service fee",
|
||||
"description": "A service fee of 15.0% for delivery orders will be charged with Orlando China Ocean.",
|
||||
"delivery_fee": {
|
||||
"fee_type": "PERCENT",
|
||||
"percent_value": 15,
|
||||
"maximum_amount_for_percent": {
|
||||
"amount": 1400,
|
||||
"currency": "USD",
|
||||
"styled_text": null
|
||||
}
|
||||
},
|
||||
"delivery_service_fee_allocation_info": {
|
||||
"attribution": "GRUBHUB"
|
||||
},
|
||||
"fee_allocation_info": {
|
||||
"fee_allocation_type": "GRUBHUB"
|
||||
}
|
||||
},
|
||||
"small_order_fee": {
|
||||
"name": "Small order delivery fee",
|
||||
"description": "A small order delivery fee applies to your Orlando China Ocean subtotal of $8.69 or less.",
|
||||
"minimum_order_value_cents": 1000,
|
||||
"fee": {
|
||||
"fee_type": "FLAT",
|
||||
"flat_cents_value": {
|
||||
"amount": 200,
|
||||
"currency": "USD",
|
||||
"styled_text": null
|
||||
}
|
||||
},
|
||||
"fee_allocation_info": {
|
||||
"attribution": "GRUBHUB"
|
||||
}
|
||||
},
|
||||
"merchant_status": {
|
||||
"category": "ACTIVE",
|
||||
"category_description": "Active",
|
||||
"description": "Premium",
|
||||
"requires_attrition_reason": false,
|
||||
"status": "PREMIUM"
|
||||
},
|
||||
"delivery_estimate_minutes": 30,
|
||||
"pickup_estimate_minutes": 20
|
||||
},
|
||||
"pickup_tips_disabled": false,
|
||||
"special_instructions_disabled": true,
|
||||
"utensils_selection_disabled": false,
|
||||
"restaurant_tags": [],
|
||||
"version_ids": {
|
||||
"catalog": "v1:wrma6JfYXP4jQrYOidEWeIaIl041p7w4"
|
||||
},
|
||||
"shared_cart": true,
|
||||
"subscription_information": {
|
||||
"order_minimum": null
|
||||
},
|
||||
"excluded_menu_item_count": 0,
|
||||
"aggregations": {
|
||||
"menu_item_tags": {
|
||||
"values": {}
|
||||
}
|
||||
},
|
||||
"menu_item_features": [
|
||||
"CHOICE_OPTION_MEDIA",
|
||||
"CHOICE_CATEGORY_QUANTITIES",
|
||||
"CHOICE_OPTION_QUANTITIES",
|
||||
"SUBCATEGORIES"
|
||||
],
|
||||
"pos_integrated": true,
|
||||
"data_bytes": 0,
|
||||
"menu_data_bytes": 0,
|
||||
"template_type": "STANDARD",
|
||||
"tax_engine_information": {
|
||||
"tax_engine_calculation_enabled": false,
|
||||
"tax_type_code": null
|
||||
},
|
||||
"service_fee_taxable": false,
|
||||
"service_fee_taxable_when_delivery_only": true,
|
||||
"delivery_fee_taxable_when_delivery_only": true,
|
||||
"cash_tip_allowed": false,
|
||||
"is_too_few": true,
|
||||
"too_few": true
|
||||
}
|
||||
},
|
||||
"menu_items": []
|
||||
}
|
4288
web/grubhub/menu_id.json
Normal file
414
web/grubhub/menu_item.json
Normal file
@ -0,0 +1,414 @@
|
||||
[
|
||||
{
|
||||
"name": "Add Or No Add Or No (3rd)",
|
||||
"list": [
|
||||
{
|
||||
"name": "No Change",
|
||||
"price": 0.0
|
||||
},
|
||||
{
|
||||
"name": "No Egg",
|
||||
"price": 0.0
|
||||
},
|
||||
{
|
||||
"name": "No Onions",
|
||||
"price": 0.0
|
||||
},
|
||||
{
|
||||
"name": "No Pork",
|
||||
"price": 0.0
|
||||
},
|
||||
{
|
||||
"name": "Add Pork",
|
||||
"price": 1.5
|
||||
},
|
||||
{
|
||||
"name": "Add Shrimp",
|
||||
"price": 2.0
|
||||
},
|
||||
{
|
||||
"name": "Add Onions",
|
||||
"price": 1.0
|
||||
},
|
||||
{
|
||||
"name": "Add Broccoli",
|
||||
"price": 1.0
|
||||
},
|
||||
{
|
||||
"name": "Add Chicken",
|
||||
"price": 1.5
|
||||
},
|
||||
{
|
||||
"name": "Add Beef",
|
||||
"price": 2.0
|
||||
}
|
||||
],
|
||||
"required": false,
|
||||
"min": 0,
|
||||
"max": 100
|
||||
},
|
||||
{
|
||||
"name": "Rice Choice (3rd)",
|
||||
"list": [
|
||||
{
|
||||
"name": "w. White Rice",
|
||||
"price": 0.0
|
||||
},
|
||||
{
|
||||
"name": "w. Plain Fried Rice",
|
||||
"price": 2.0
|
||||
},
|
||||
{
|
||||
"name": "w. Veg Fried Rice",
|
||||
"price": 2.0
|
||||
},
|
||||
{
|
||||
"name": "w. Chicken Fried Rice",
|
||||
"price": 2.0
|
||||
},
|
||||
{
|
||||
"name": "w. Pork Fried Rice",
|
||||
"price": 2.0
|
||||
},
|
||||
{
|
||||
"name": "w. Shrimp Fried Rice",
|
||||
"price": 2.5
|
||||
},
|
||||
{
|
||||
"name": "w. Beef Fried Rice",
|
||||
"price": 2.5
|
||||
},
|
||||
{
|
||||
"name": "w. Ham Fried Rice",
|
||||
"price": 2.5
|
||||
},
|
||||
{
|
||||
"name": "w. House Special Fried Rice",
|
||||
"price": 3.0
|
||||
},
|
||||
{
|
||||
"name": "w. Plain Lo Mein",
|
||||
"price": 2.0
|
||||
},
|
||||
{
|
||||
"name": "w. Veg Lo Mein",
|
||||
"price": 2.0
|
||||
},
|
||||
{
|
||||
"name": "w. Chicken Lo Mein",
|
||||
"price": 2.0
|
||||
},
|
||||
{
|
||||
"name": "w. Pork Lo Mein",
|
||||
"price": 2.0
|
||||
},
|
||||
{
|
||||
"name": "w. Beef Lo Mein",
|
||||
"price": 2.5
|
||||
},
|
||||
{
|
||||
"name": "w. Shrimp Lo Mein",
|
||||
"price": 2.5
|
||||
},
|
||||
{
|
||||
"name": "w. House Lo Mein",
|
||||
"price": 3.0
|
||||
},
|
||||
{
|
||||
"name": "No Rice",
|
||||
"price": 0.0
|
||||
}
|
||||
],
|
||||
"required": true,
|
||||
"min": 1,
|
||||
"max": 1
|
||||
},
|
||||
{
|
||||
"name": "Buffalo Wings",
|
||||
"list": [
|
||||
{
|
||||
"name": "(Plain)",
|
||||
"price": 0.0
|
||||
},
|
||||
{
|
||||
"name": "(w. Fries)",
|
||||
"price": 0.96
|
||||
},
|
||||
{
|
||||
"name": "(w. Pork Fried Rice)",
|
||||
"price": 0.96
|
||||
},
|
||||
{
|
||||
"name": "(w. Chicken Fried Rice)",
|
||||
"price": 0.96
|
||||
},
|
||||
{
|
||||
"name": "(w. Beef Fried Rice)",
|
||||
"price": 2.04
|
||||
},
|
||||
{
|
||||
"name": "(w. Shrimp Fried Rice)",
|
||||
"price": 2.04
|
||||
}
|
||||
],
|
||||
"required": true,
|
||||
"min": 1,
|
||||
"max": 1
|
||||
},
|
||||
{
|
||||
"name": "Chicken Nuggets (15)",
|
||||
"list": [
|
||||
{
|
||||
"name": "(Plain)",
|
||||
"price": 0.0
|
||||
},
|
||||
{
|
||||
"name": "(w. Fries)",
|
||||
"price": 2.35
|
||||
},
|
||||
{
|
||||
"name": "(w. Pork Fried Rice)",
|
||||
"price": 2.35
|
||||
},
|
||||
{
|
||||
"name": "(w. Chicken Fried Rice)",
|
||||
"price": 2.59
|
||||
},
|
||||
{
|
||||
"name": "(w. Beef Fried Rice)",
|
||||
"price": 2.59
|
||||
},
|
||||
{
|
||||
"name": "(w. Shrimp Fried Rice)",
|
||||
"price": 2.59
|
||||
}
|
||||
],
|
||||
"required": true,
|
||||
"min": 1,
|
||||
"max": 1
|
||||
},
|
||||
{
|
||||
"name": "Fried Chicken Wings (3pcs wholewings)",
|
||||
"list": [
|
||||
{
|
||||
"name": "(Plain)",
|
||||
"price": 0.0
|
||||
},
|
||||
{
|
||||
"name": "(w. Fries)",
|
||||
"price": 1.32
|
||||
},
|
||||
{
|
||||
"name": "(w. Pork Fried Rice)",
|
||||
"price": 1.32
|
||||
},
|
||||
{
|
||||
"name": "(w. Chicken Fried Rice)",
|
||||
"price": 2.4
|
||||
},
|
||||
{
|
||||
"name": "(w. Beef Fried Rice)",
|
||||
"price": 2.4
|
||||
},
|
||||
{
|
||||
"name": "(w. Shrimp Fried Rice)",
|
||||
"price": 2.4
|
||||
}
|
||||
],
|
||||
"required": true,
|
||||
"min": 1,
|
||||
"max": 1
|
||||
},
|
||||
{
|
||||
"name": "Chicken Wings w. Garlic Sauce",
|
||||
"list": [
|
||||
{
|
||||
"name": "(Plain)",
|
||||
"price": 0.0
|
||||
},
|
||||
{
|
||||
"name": "(w. Fries)",
|
||||
"price": 0.96
|
||||
},
|
||||
{
|
||||
"name": "(w. Pork Fried Rice)",
|
||||
"price": 0.96
|
||||
},
|
||||
{
|
||||
"name": "(w. Chicken Fried Rice)",
|
||||
"price": 0.96
|
||||
},
|
||||
{
|
||||
"name": "(w. Beef Fried Rice)",
|
||||
"price": 2.04
|
||||
},
|
||||
{
|
||||
"name": "(w. Shrimp Fried Rice)",
|
||||
"price": 2.04
|
||||
}
|
||||
],
|
||||
"required": true,
|
||||
"min": 1,
|
||||
"max": 1
|
||||
},
|
||||
{
|
||||
"name": "LC Rice Choice (3rd)",
|
||||
"list": [
|
||||
{
|
||||
"name": "w. White Rice",
|
||||
"price": 0.0
|
||||
},
|
||||
{
|
||||
"name": "w. Plain Fried Rice",
|
||||
"price": 0.0
|
||||
},
|
||||
{
|
||||
"name": "w. Egg Fried Rice",
|
||||
"price": 0.0
|
||||
},
|
||||
{
|
||||
"name": "w. Veg Fried Rice",
|
||||
"price": 0.0
|
||||
},
|
||||
{
|
||||
"name": "w. Chicken Fried Rice",
|
||||
"price": 1.0
|
||||
},
|
||||
{
|
||||
"name": "w. Pork Fried Rice",
|
||||
"price": 0.0
|
||||
},
|
||||
{
|
||||
"name": "w. Shrimp Fried Rice",
|
||||
"price": 2.0
|
||||
},
|
||||
{
|
||||
"name": "w. Beef Fried Rice",
|
||||
"price": 2.0
|
||||
},
|
||||
{
|
||||
"name": "w. Ham Fried Rice",
|
||||
"price": 2.0
|
||||
},
|
||||
{
|
||||
"name": "w. House Special Fried Rice",
|
||||
"price": 2.5
|
||||
},
|
||||
{
|
||||
"name": "w. Plain Lo Mein",
|
||||
"price": 2.0
|
||||
},
|
||||
{
|
||||
"name": "w. Veg Lo Mein",
|
||||
"price": 2.0
|
||||
},
|
||||
{
|
||||
"name": "w. Chicken Lo Mein",
|
||||
"price": 2.0
|
||||
},
|
||||
{
|
||||
"name": "w. Pork Lo Mein",
|
||||
"price": 2.0
|
||||
},
|
||||
{
|
||||
"name": "w. Beef Lo Mein",
|
||||
"price": 2.5
|
||||
},
|
||||
{
|
||||
"name": "w. Shrimp Lo Mein",
|
||||
"price": 2.5
|
||||
},
|
||||
{
|
||||
"name": "w. House Lo Mein",
|
||||
"price": 3.0
|
||||
},
|
||||
{
|
||||
"name": "No Rice",
|
||||
"price": 0.0
|
||||
}
|
||||
],
|
||||
"required": false,
|
||||
"min": 0,
|
||||
"max": 1
|
||||
},
|
||||
{
|
||||
"name": "Side Choice (3rd)",
|
||||
"list": [
|
||||
{
|
||||
"name": "w. Pork Egg Roll",
|
||||
"price": 0.0
|
||||
},
|
||||
{
|
||||
"name": "w. Veg Roll",
|
||||
"price": 0.5
|
||||
},
|
||||
{
|
||||
"name": "w. Coca Cola (can)",
|
||||
"price": 0.0
|
||||
},
|
||||
{
|
||||
"name": "w. Sprite (can)",
|
||||
"price": 0.0
|
||||
},
|
||||
{
|
||||
"name": "w. Brisk (can)",
|
||||
"price": 0.0
|
||||
},
|
||||
{
|
||||
"name": "w. MtnDew (can)",
|
||||
"price": 0.0
|
||||
},
|
||||
{
|
||||
"name": "w. Diet Coke (can)",
|
||||
"price": 0.0
|
||||
},
|
||||
{
|
||||
"name": "w. Pepsi (can)",
|
||||
"price": 0.0
|
||||
},
|
||||
{
|
||||
"name": "w. Orange (can)",
|
||||
"price": 0.0
|
||||
},
|
||||
{
|
||||
"name": "w. Ginger Ale (can)",
|
||||
"price": 0.0
|
||||
},
|
||||
{
|
||||
"name": "w. Dr Pepper (can)",
|
||||
"price": 0.0
|
||||
},
|
||||
{
|
||||
"name": "w. Water",
|
||||
"price": 0.0
|
||||
},
|
||||
{
|
||||
"name": "w. Wonton Soup",
|
||||
"price": 0.0
|
||||
},
|
||||
{
|
||||
"name": "w. Egg Drop Soup",
|
||||
"price": 0.0
|
||||
},
|
||||
{
|
||||
"name": "w. Hot Sour Soup",
|
||||
"price": 0.5
|
||||
}
|
||||
],
|
||||
"required": true,
|
||||
"min": 1,
|
||||
"max": 1
|
||||
},
|
||||
{
|
||||
"name": "Extra Sauce (3rd)",
|
||||
"list": [
|
||||
{
|
||||
"name": "Extra Sauce",
|
||||
"price": 0.0
|
||||
}
|
||||
],
|
||||
"required": false,
|
||||
"min": 0,
|
||||
"max": 1
|
||||
}
|
||||
]
|
47
web/kwaixiaodian/main.py
Normal file
@ -0,0 +1,47 @@
|
||||
import requests
|
||||
import json
|
||||
|
||||
|
||||
headers = {
|
||||
"accept": "application/json, text/plain, */*",
|
||||
"accept-language": "zh-CN,zh;q=0.9,en;q=0.8",
|
||||
"cache-control": "no-cache",
|
||||
"content-type": "application/json",
|
||||
"kpf": "PC_WEB",
|
||||
"kpn": "unknown",
|
||||
"ktrace-str": "3|My40NTgzNjk4Mjg2NzM2NzY5LjgzNjE3OTE4LjE3NDkxMzM2MjIxNzcuMTAwMQ==|My40NTgzNjk4Mjg2NzM2NzY5LjMzMzYzNjg2LjE3NDkxMzM2MjIxNzcuMTAwMA==|0|plateco-kfx-service|plateco|true|src:Js,seqn:2087,rsi:2c7d8430-cd6d-4c8b-80dd-c6a1518eebf0,path:/merchant/shop/detail,rpi:343deb66b6",
|
||||
"origin": "https://app.kwaixiaodian.com",
|
||||
"pragma": "no-cache",
|
||||
"priority": "u=1, i",
|
||||
"referer": "https://app.kwaixiaodian.com/merchant/shop/detail?id=21778354852592",
|
||||
"sec-ch-ua": "\"Google Chrome\";v=\"137\", \"Chromium\";v=\"137\", \"Not/A)Brand\";v=\"24\"",
|
||||
"sec-ch-ua-mobile": "?0",
|
||||
"sec-ch-ua-platform": "\"Windows\"",
|
||||
"sec-fetch-dest": "empty",
|
||||
"sec-fetch-mode": "cors",
|
||||
"sec-fetch-site": "same-origin",
|
||||
"user-agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/137.0.0.0 Safari/537.36"
|
||||
}
|
||||
cookies = {
|
||||
"userId": "174759615",
|
||||
"kuaishou.kwaishop.product.c_st": "Ch5rdWFpc2hvdS5rd2Fpc2hvcC5wcm9kdWN0LmMuc3QSkAEWKm0SWSgQUszOw5_W9GReLVtJ__7lCB6xQ1Qlfbfd81dAEnnOSq5gU0Ijh3QqenODyNcWdIuobvCbDR83ug0eq4cFFwm9ScqTIPSAA4Zq99J6rHi6PCh3Mm38K1NltvM5YrnOXydEbJ2zdRsT0g2Klq656NcKnyOwuniqMh3-S6AfoMog67Hoite3jms9QIEaEigBlj4JBnRnpU1V3Jl5YNDGwyIgdsFFzGOIZ7g29WWC8DIdtvG0cmtpyVe5oOhDcK0CpS8oBTAB",
|
||||
"kuaishou.kwaishop.product.c_ph": "d36c1ef6d691f60e3d921de2af9ab9da0179",
|
||||
"_did": "web_630470819D8BDA"
|
||||
}
|
||||
url = "https://app.kwaixiaodian.com/rest/app/kwaishop/product/c/detail/h5/componentized"
|
||||
params = {
|
||||
"caver": "2",
|
||||
"__NS_hxfalcon": "HUDR_sFnX-DtsEEFXsbDPT3TMP-sk0is6Segc_xRclq7WsRCxYt9QLz_qrd3MALzfkMhkshbENDI1Bul8CEN_rqrzzbU23pRTmhJ9YA-aV0esm8P3lx6tKbF_LW9rzYhnVnQQWedDJlWPn72Ha1xT2QbETHvN6EJViN1L-bUlhQif$HE_3d22bbdfddef8d0a36e177f44a5fd0c28a7776767677a9eedacc6dfb50c5b55a40fee177ed2048ac0d20489e76"
|
||||
}
|
||||
data = {
|
||||
"sourceType": "web",
|
||||
"id": "21778354852592",
|
||||
"itemId": "21778354852592",
|
||||
"cashierParam": "{\"installWechat\":false,\"installAlipay\":false,\"installWechatSdk\":false,\"installAlipaySdk\":false,\"installUnionPaySdk\":false,\"installUnionPay\":false}",
|
||||
"fromSource": "",
|
||||
"_refer": "https://passport.kuaishou.com/"
|
||||
}
|
||||
data = json.dumps(data, separators=(',', ':'))
|
||||
response = requests.post(url, headers=headers, cookies=cookies, params=params, data=data)
|
||||
|
||||
print(response.json())
|
208
web/qj050_com/Requests_Except.py
Normal file
@ -0,0 +1,208 @@
|
||||
import json
|
||||
import re
|
||||
import requests
|
||||
import logging
|
||||
import time
|
||||
from lxml import etree
|
||||
from types import SimpleNamespace
|
||||
from http.cookies import SimpleCookie
|
||||
|
||||
logging.basicConfig(level=logging.DEBUG, format='%(asctime)s - %(levelname)s - %(message)s')
|
||||
|
||||
|
||||
class ExtendedResponse(requests.Response):
|
||||
def xpath(self):
|
||||
try:
|
||||
tree = etree.HTML(self.text)
|
||||
return tree
|
||||
except Exception as e:
|
||||
raise ValueError("XPath解析错误: " + str(e))
|
||||
|
||||
def to_Dict(self):
|
||||
try:
|
||||
data = self.json()
|
||||
return self.dict_to_obj(data)
|
||||
except Exception as e:
|
||||
raise ValueError("JSON转换错误: " + str(e))
|
||||
|
||||
def to_Re_findall(self, regex):
|
||||
try:
|
||||
data = self.text
|
||||
return re.findall(regex, data)
|
||||
except Exception as e:
|
||||
raise ValueError("Re搜索错误: " + str(e))
|
||||
|
||||
def cookies_dict(self):
|
||||
try:
|
||||
# 获取原有的 cookies 字典
|
||||
cookie_dict = self.cookies.get_dict()
|
||||
# 如果响应头中有 Set-Cookie,则解析并补充 cookies
|
||||
if 'Set-Cookie' in self.headers:
|
||||
from http.cookies import SimpleCookie
|
||||
sc = SimpleCookie()
|
||||
sc.load(self.headers['Set-Cookie'])
|
||||
for key, morsel in sc.items():
|
||||
cookie_dict[key] = morsel.value
|
||||
return cookie_dict
|
||||
except Exception as e:
|
||||
raise ValueError("Cookies转换错误: " + str(e))
|
||||
|
||||
def save_cookies(self, filepath, format='json'):
|
||||
"""
|
||||
将当前响应中的cookie信息保存到指定文件中。
|
||||
|
||||
参数:
|
||||
filepath (str): 保存文件的路径
|
||||
format (str): 保存格式,支持 'json'、'pickle' 和 'txt' 三种格式,默认为 'json'
|
||||
"""
|
||||
try:
|
||||
cookie_dict = self.cookies_dict()
|
||||
if format.lower() == 'json':
|
||||
with open(filepath, 'w', encoding='utf-8') as f:
|
||||
json.dump(cookie_dict, f, ensure_ascii=False, indent=4)
|
||||
elif format.lower() == 'pickle':
|
||||
import pickle
|
||||
with open(filepath, 'wb') as f:
|
||||
pickle.dump(cookie_dict, f)
|
||||
elif format.lower() == 'txt':
|
||||
with open(filepath, 'w', encoding='utf-8') as f:
|
||||
for key, value in cookie_dict.items():
|
||||
f.write(f"{key}: {value}\n")
|
||||
else:
|
||||
raise ValueError("不支持的格式,请选择 'json'、'pickle' 或 'txt'")
|
||||
except Exception as e:
|
||||
raise ValueError("保存cookies出错: " + str(e))
|
||||
|
||||
@staticmethod
|
||||
def dict_to_obj(d):
|
||||
if isinstance(d, dict):
|
||||
return SimpleNamespace(**{k: ExtendedResponse.dict_to_obj(v) for k, v in d.items()})
|
||||
elif isinstance(d, list):
|
||||
return [ExtendedResponse.dict_to_obj(item) for item in d]
|
||||
else:
|
||||
return d
|
||||
|
||||
|
||||
class MyRequests:
|
||||
def __init__(self, base_url, protocol='http', retries=3, proxy_options=True, default_timeout=10,
|
||||
default_cookies=None):
|
||||
"""
|
||||
初始化 MyRequests 对象,自动加载本地 cookies 文件(根据 base_url 生成文件名,如 "www_zhrczp_com_cookies.json")中的 cookies,
|
||||
如果文件存在,则将其加载到 session 中;否则使用 default_cookies(如果提供)更新 session。
|
||||
|
||||
参数:
|
||||
base_url (str): 基础 URL
|
||||
protocol (str): 协议(默认为 'http')
|
||||
retries (int): 请求重试次数
|
||||
proxy_options (bool): 是否使用代理
|
||||
default_timeout (int): 默认超时时间
|
||||
default_cookies (dict): 默认的 cookies 字典
|
||||
"""
|
||||
self.base_url = base_url.rstrip('/')
|
||||
self.protocol = protocol
|
||||
self.retries = retries
|
||||
self.default_timeout = default_timeout
|
||||
self.session = requests.Session()
|
||||
|
||||
if proxy_options:
|
||||
self.session.proxies = {"http": "http://127.0.0.1:7890", "https": "http://127.0.0.1:7890"}
|
||||
|
||||
# 优先使用传入的 default_cookies 更新 session
|
||||
if default_cookies:
|
||||
self.session.cookies.update(default_cookies)
|
||||
|
||||
# 根据 base_url 生成 cookies 文件名,将 '.' 替换为 '_'
|
||||
self.cookie_file = f"{self.base_url.replace('.', '_')}_cookies.json"
|
||||
# 尝试加载本地已保存的 cookies 文件
|
||||
try:
|
||||
with open(self.cookie_file, 'r', encoding='utf-8') as f:
|
||||
loaded_cookies = json.load(f)
|
||||
self.session.cookies.update(loaded_cookies)
|
||||
logging.info("成功加载本地 cookies")
|
||||
except FileNotFoundError:
|
||||
logging.info("本地 cookies 文件不存在,将在请求后自动保存")
|
||||
except Exception as e:
|
||||
logging.error("加载本地 cookies 失败:" + str(e))
|
||||
|
||||
def _save_cookies(self):
|
||||
"""
|
||||
将当前 session 中的 cookies 保存到本地文件(基于 base_url 的文件名),以 JSON 格式存储。
|
||||
"""
|
||||
try:
|
||||
with open(self.cookie_file, 'w', encoding='utf-8') as f:
|
||||
json.dump(self.session.cookies.get_dict(), f, ensure_ascii=False, indent=4)
|
||||
logging.info("cookies 已保存到本地文件:" + self.cookie_file)
|
||||
except Exception as e:
|
||||
logging.error("保存 cookies 文件失败:" + str(e))
|
||||
|
||||
def _build_url(self, url):
|
||||
if url.startswith("http://") or url.startswith("https://"):
|
||||
return url
|
||||
return f"{self.protocol}://{self.base_url}/{url.lstrip('/')}"
|
||||
|
||||
def set_default_headers(self, headers):
|
||||
self.session.headers.update(headers)
|
||||
|
||||
def set_default_cookies(self, cookies):
|
||||
self.session.cookies.update(cookies)
|
||||
self._save_cookies()
|
||||
|
||||
def get(self, url, params=None, headers=None, cookies=None, **kwargs):
|
||||
full_url = self._build_url(url)
|
||||
return self._request("GET", full_url, params=params, headers=headers, cookies=cookies, **kwargs)
|
||||
|
||||
def post(self, url, data=None, json=None, headers=None, cookies=None, **kwargs):
|
||||
full_url = self._build_url(url)
|
||||
return self._request("POST", full_url, data=data, json=json, headers=headers, cookies=cookies, **kwargs)
|
||||
|
||||
def update(self, url, data=None, json=None, headers=None, cookies=None, **kwargs):
|
||||
full_url = self._build_url(url)
|
||||
return self._request("PUT", full_url, data=data, json=json, headers=headers, cookies=cookies, **kwargs)
|
||||
|
||||
def delete(self, url, params=None, headers=None, cookies=None, **kwargs):
|
||||
full_url = self._build_url(url)
|
||||
return self._request("DELETE", full_url, params=params, headers=headers, cookies=cookies, **kwargs)
|
||||
|
||||
def _request(self, method, url, retries=None, autosave=False, **kwargs):
|
||||
if retries is None:
|
||||
retries = self.retries
|
||||
if 'timeout' not in kwargs:
|
||||
kwargs['timeout'] = self.default_timeout
|
||||
|
||||
try:
|
||||
response = self.session.request(method, url, **kwargs)
|
||||
response.raise_for_status()
|
||||
self.session.cookies.update(response.cookies)
|
||||
|
||||
if 'Set-Cookie' in response.headers:
|
||||
from http.cookies import SimpleCookie
|
||||
sc = SimpleCookie()
|
||||
sc.load(response.headers['Set-Cookie'])
|
||||
for key, morsel in sc.items():
|
||||
if morsel.value.lower() != 'deleted':
|
||||
self.session.cookies.set(key, morsel.value)
|
||||
|
||||
if autosave:
|
||||
self._save_cookies()
|
||||
|
||||
response.__class__ = ExtendedResponse
|
||||
return response
|
||||
|
||||
except Exception as e:
|
||||
if retries > 0:
|
||||
logging.warning(f"请求 {method} {url} 失败,剩余重试次数 {retries},错误: {e}")
|
||||
time.sleep(2 ** (self.retries - retries))
|
||||
return self._request(method, url, retries=retries - 1, autosave=autosave, **kwargs)
|
||||
else:
|
||||
logging.error(f"请求 {method} {url} 重试次数用尽")
|
||||
raise e
|
||||
|
||||
def get_cookies(self):
|
||||
try:
|
||||
return self.session.cookies.get_dict()
|
||||
except Exception as e:
|
||||
raise ValueError("获取 cookies 失败:" + str(e))
|
||||
|
||||
|
||||
class MR(MyRequests):
|
||||
pass
|
130
web/qj050_com/main.py
Normal file
@ -0,0 +1,130 @@
|
||||
import datetime
|
||||
|
||||
from Requests_Except import *
|
||||
import pandas as pd
|
||||
|
||||
base_url = 'www.qj050.com'
|
||||
protocol = 'https'
|
||||
headers = {
|
||||
'accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.7',
|
||||
'accept-language': 'zh-CN,zh;q=0.9,en;q=0.8',
|
||||
'cache-control': 'no-cache',
|
||||
'content-type': 'application/x-www-form-urlencoded',
|
||||
'origin': 'https://www.qj050.com',
|
||||
'pragma': 'no-cache',
|
||||
'priority': 'u=0, i',
|
||||
'referer': 'https://www.qj050.com/account/quick?login_type=1&ref=/?from=h5',
|
||||
'sec-ch-ua': '"Google Chrome";v="135", "Not-A.Brand";v="8", "Chromium";v="135"',
|
||||
'sec-ch-ua-mobile': '?0',
|
||||
'sec-ch-ua-platform': '"Windows"',
|
||||
'sec-fetch-dest': 'iframe',
|
||||
'sec-fetch-mode': 'navigate',
|
||||
'sec-fetch-site': 'same-origin',
|
||||
'sec-fetch-user': '?1',
|
||||
'upgrade-insecure-requests': '1',
|
||||
'user-agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/135.0.0.0 Safari/537.36',
|
||||
}
|
||||
Requests = MR(base_url, protocol, headers)
|
||||
_keyword = ""
|
||||
pd_data = {
|
||||
'resume_id': [],
|
||||
'姓名': [],
|
||||
'年龄': [],
|
||||
'生日': [],
|
||||
'工作经验': [],
|
||||
'最高学历': [],
|
||||
'婚姻状态': [],
|
||||
'电话': [],
|
||||
'意向岗位': [],
|
||||
'期望薪资': [],
|
||||
'工作性质': [],
|
||||
'求职状态': [],
|
||||
'工作地点': [],
|
||||
'工作经历1': [],
|
||||
'工作经历2': [],
|
||||
'工作经历3': [],
|
||||
'工作经历4': [],
|
||||
}
|
||||
|
||||
|
||||
def login():
|
||||
url = '/account/login'
|
||||
params = {
|
||||
'ref': '/?from=h5',
|
||||
}
|
||||
data = {
|
||||
'_type': '1',
|
||||
'_from': 'quick',
|
||||
'account': '真贤8888',
|
||||
'password': 'zhenxian8888',
|
||||
}
|
||||
response = Requests.post(url, params=params, data=data, autosave=True)
|
||||
response.cookies_dict()
|
||||
|
||||
|
||||
def get_page_for_keyword(keyword):
|
||||
global _keyword
|
||||
_keyword = keyword
|
||||
url = '/api/v1/resumes'
|
||||
params = {
|
||||
'_': str(int(time.time() * 1000 - 10000)),
|
||||
'tab': 'resume',
|
||||
'keyword': keyword,
|
||||
't': str(int(time.time() * 1000)),
|
||||
'pageSize': '100',
|
||||
'pageIndex': '1',
|
||||
'showStatus': 'true',
|
||||
}
|
||||
response = Requests.get(url, params=params)
|
||||
return response.to_Dict()
|
||||
|
||||
|
||||
def get_resumes_info(resumes_id):
|
||||
url = '/api/v1/resume/{}'.format(resumes_id)
|
||||
params = {
|
||||
'_': str(int(time.time() * 1000)),
|
||||
'view_type': 'resumeLibrary',
|
||||
'privacy_description': '1',
|
||||
}
|
||||
response = Requests.get(url, params=params)
|
||||
info = response.to_Dict().data
|
||||
data = {
|
||||
'resume_id': resumes_id,
|
||||
'姓名': info.name,
|
||||
'年龄': info.age,
|
||||
'生日': info.birthday,
|
||||
'工作经验': info.work_exp_value,
|
||||
'最高学历': info.edu_value,
|
||||
'婚姻状态': info.marriage_value,
|
||||
'电话': info.phone,
|
||||
'意向岗位': ','.join([item.name for item in info.infoCateforyArrObj]),
|
||||
'期望薪资': info.salaryDesc,
|
||||
'工作性质': info.work_type_value,
|
||||
'求职状态': info.job_instant_value,
|
||||
'工作地点': info.job_region_value,
|
||||
}
|
||||
for i in range(4): # 0, 1, 2, 3
|
||||
if i < len(info.works):
|
||||
work = info.works[i]
|
||||
data[f'工作经历{i + 1}'] = f"{work.company}:{work.content}"
|
||||
else:
|
||||
data[f'工作经历{i + 1}'] = ''
|
||||
|
||||
return data
|
||||
|
||||
|
||||
def integration(keyword):
|
||||
global _keyword
|
||||
_keyword = keyword
|
||||
page = get_page_for_keyword(_keyword)
|
||||
for item in page.data.items:
|
||||
resumes_info = get_resumes_info(item.id)
|
||||
for key, value in resumes_info.items():
|
||||
pd_data[key].append(value)
|
||||
|
||||
df = pd.DataFrame(pd_data)
|
||||
df.to_excel(f'{datetime.datetime.now().strftime("%Y%m%d_%H%M%S")}_{_keyword}.xlsx', index=False)
|
||||
|
||||
if __name__ == '__main__':
|
||||
integration("财务")
|
||||
# get_resumes_info('36859')
|
6
web/qj050_com/www_qj050_com_cookies.json
Normal file
@ -0,0 +1,6 @@
|
||||
{
|
||||
"token": "eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9.eyJpZCI6NDgxNTMsInVzZXJuYW1lIjoi55yf6LSkODg4OCIsInB3ZCI6IjFiYmJjNzc5OGRkMTFiNTI2YWQ4ZTVmYTYyNWY5MjVkIiwiaWF0IjoxNzQ1NzMxNjEzLCJleHAiOjE3NzcyNjc2MTN9.2NS8XbpDB8kv4zJiwDUqz5259WQ8tLLfqIGy_xmiAnI",
|
||||
"token.sig": "5VDLks18QUImA1K0NklDMuC28TBBTM44s646GwPrY-A",
|
||||
"has_login_log": "yes",
|
||||
"x-trace-id": "5c4888e20dc24d739a4490feccf0c291"
|
||||
}
|
852
web/tsrcw/idlist.json
Normal file
@ -0,0 +1,852 @@
|
||||
[
|
||||
{
|
||||
"id": "3560",
|
||||
"name": "计算机类",
|
||||
"child": [
|
||||
{
|
||||
"cid": "3561",
|
||||
"cname": "系统分析员"
|
||||
},
|
||||
{
|
||||
"cid": "3562",
|
||||
"cname": "软件开发与测试"
|
||||
},
|
||||
{
|
||||
"cid": "3563",
|
||||
"cname": "系统维护/网络管理"
|
||||
},
|
||||
{
|
||||
"cid": "3564",
|
||||
"cname": "网络工程"
|
||||
},
|
||||
{
|
||||
"cid": "3565",
|
||||
"cname": "网站信息管理/内容编辑"
|
||||
},
|
||||
{
|
||||
"cid": "3566",
|
||||
"cname": "网站策划"
|
||||
},
|
||||
{
|
||||
"cid": "3567",
|
||||
"cname": "网页设计制作/网页美工"
|
||||
},
|
||||
{
|
||||
"cid": "3568",
|
||||
"cname": "多媒体设计与开发"
|
||||
},
|
||||
{
|
||||
"cid": "3569",
|
||||
"cname": "计算机辅助设计与绘图"
|
||||
},
|
||||
{
|
||||
"cid": "3570",
|
||||
"cname": "数据库开发与管理"
|
||||
},
|
||||
{
|
||||
"cid": "3571",
|
||||
"cname": "系统集成/技术支持"
|
||||
},
|
||||
{
|
||||
"cid": "3572",
|
||||
"cname": "系统安全管理"
|
||||
},
|
||||
{
|
||||
"cid": "3573",
|
||||
"cname": "计算机类"
|
||||
},
|
||||
{
|
||||
"cid": "3574",
|
||||
"cname": "信息安全工程师"
|
||||
},
|
||||
{
|
||||
"cid": "3575",
|
||||
"cname": "ERP技术/开发应用工程师"
|
||||
},
|
||||
{
|
||||
"cid": "3576",
|
||||
"cname": "系统管理员/网络管理员"
|
||||
},
|
||||
{
|
||||
"cid": "3577",
|
||||
"cname": "硬件测试工程师"
|
||||
},
|
||||
{
|
||||
"cid": "3578",
|
||||
"cname": "信息系统分析员"
|
||||
},
|
||||
{
|
||||
"cid": "3579",
|
||||
"cname": "工程师/软件测试工程师"
|
||||
},
|
||||
{
|
||||
"cid": "3580",
|
||||
"cname": "硬件工程师"
|
||||
},
|
||||
{
|
||||
"cid": "3581",
|
||||
"cname": "数据库工程师/管理员"
|
||||
},
|
||||
{
|
||||
"cid": "3582",
|
||||
"cname": "系统集成工程师"
|
||||
},
|
||||
{
|
||||
"cid": "3583",
|
||||
"cname": "系统架构师"
|
||||
},
|
||||
{
|
||||
"cid": "3584",
|
||||
"cname": "ERP实施顾问"
|
||||
},
|
||||
{
|
||||
"cid": "3585",
|
||||
"cname": "软件UI设计师/工程师"
|
||||
},
|
||||
{
|
||||
"cid": "3586",
|
||||
"cname": "研发工程师 需求工程师"
|
||||
},
|
||||
{
|
||||
"cid": "3587",
|
||||
"cname": "网站运营管理/运营专员"
|
||||
},
|
||||
{
|
||||
"cid": "3588",
|
||||
"cname": "游戏设计/开发"
|
||||
},
|
||||
{
|
||||
"cid": "3589",
|
||||
"cname": "游戏策划"
|
||||
},
|
||||
{
|
||||
"cid": "3590",
|
||||
"cname": "游戏界面设计师"
|
||||
},
|
||||
{
|
||||
"cid": "3591",
|
||||
"cname": "特效设计师"
|
||||
},
|
||||
{
|
||||
"cid": "3592",
|
||||
"cname": "视觉设计师"
|
||||
},
|
||||
{
|
||||
"cid": "3593",
|
||||
"cname": "语音/视频/图形开发工程师"
|
||||
},
|
||||
{
|
||||
"cid": "3594",
|
||||
"cname": "Flash设计/开发"
|
||||
},
|
||||
{
|
||||
"cid": "3595",
|
||||
"cname": "UI/UE设计师/顾问"
|
||||
},
|
||||
{
|
||||
"cid": "3596",
|
||||
"cname": "三维/3D设计/制作"
|
||||
},
|
||||
{
|
||||
"cid": "3597",
|
||||
"cname": "网络优化师/SEO"
|
||||
}
|
||||
]
|
||||
},
|
||||
{
|
||||
"id": "3726",
|
||||
"name": "财务类",
|
||||
"child": [
|
||||
{
|
||||
"cid": "3727",
|
||||
"cname": "财务总监/经理/主任"
|
||||
},
|
||||
{
|
||||
"cid": "3728",
|
||||
"cname": "财务"
|
||||
},
|
||||
{
|
||||
"cid": "3729",
|
||||
"cname": "出纳/收银"
|
||||
},
|
||||
{
|
||||
"cid": "3730",
|
||||
"cname": "统计"
|
||||
},
|
||||
{
|
||||
"cid": "3731",
|
||||
"cname": "审计"
|
||||
},
|
||||
{
|
||||
"cid": "3732",
|
||||
"cname": "税务师/税务专员"
|
||||
},
|
||||
{
|
||||
"cid": "3733",
|
||||
"cname": "财务总监"
|
||||
},
|
||||
{
|
||||
"cid": "3734",
|
||||
"cname": "融资经理"
|
||||
},
|
||||
{
|
||||
"cid": "3735",
|
||||
"cname": "会计师/会计"
|
||||
},
|
||||
{
|
||||
"cid": "3736",
|
||||
"cname": "总帐主管"
|
||||
},
|
||||
{
|
||||
"cid": "3737",
|
||||
"cname": "财务分析经理/主管"
|
||||
},
|
||||
{
|
||||
"cid": "3738",
|
||||
"cname": "财务顾问"
|
||||
},
|
||||
{
|
||||
"cid": "3739",
|
||||
"cname": "成本会计"
|
||||
},
|
||||
{
|
||||
"cid": "3740",
|
||||
"cname": "会计文员"
|
||||
},
|
||||
{
|
||||
"cid": "3741",
|
||||
"cname": "资金专员"
|
||||
},
|
||||
{
|
||||
"cid": "3742",
|
||||
"cname": "财务类"
|
||||
}
|
||||
]
|
||||
},
|
||||
{
|
||||
"id": "3743",
|
||||
"name": "工业/工厂类",
|
||||
"child": [
|
||||
{
|
||||
"cid": "3744",
|
||||
"cname": "工业/工厂类"
|
||||
},
|
||||
{
|
||||
"cid": "3745",
|
||||
"cname": "生产管理"
|
||||
},
|
||||
{
|
||||
"cid": "3746",
|
||||
"cname": "工程管理"
|
||||
},
|
||||
{
|
||||
"cid": "3747",
|
||||
"cname": "品质管理"
|
||||
},
|
||||
{
|
||||
"cid": "3748",
|
||||
"cname": "物料管理"
|
||||
},
|
||||
{
|
||||
"cid": "3749",
|
||||
"cname": "设备管理"
|
||||
},
|
||||
{
|
||||
"cid": "3750",
|
||||
"cname": "仓库管理"
|
||||
},
|
||||
{
|
||||
"cid": "3751",
|
||||
"cname": "计划员/调度"
|
||||
},
|
||||
{
|
||||
"cid": "3752",
|
||||
"cname": "化验员"
|
||||
},
|
||||
{
|
||||
"cid": "3753",
|
||||
"cname": "跟单员"
|
||||
},
|
||||
{
|
||||
"cid": "3754",
|
||||
"cname": "生产经理/车间主任"
|
||||
},
|
||||
{
|
||||
"cid": "3755",
|
||||
"cname": "工程师/副总工程师"
|
||||
},
|
||||
{
|
||||
"cid": "3756",
|
||||
"cname": "质量管理/测试经理(QA/QC经理)"
|
||||
},
|
||||
{
|
||||
"cid": "3757",
|
||||
"cname": "物料管理/物控"
|
||||
},
|
||||
{
|
||||
"cid": "3758",
|
||||
"cname": "设备管理工程师"
|
||||
},
|
||||
{
|
||||
"cid": "3759",
|
||||
"cname": "调度/生产计划协调员"
|
||||
},
|
||||
{
|
||||
"cid": "3760",
|
||||
"cname": "工厂跟单员"
|
||||
},
|
||||
{
|
||||
"cid": "3761",
|
||||
"cname": "采购专员"
|
||||
},
|
||||
{
|
||||
"cid": "3762",
|
||||
"cname": "质量检验员/测试员"
|
||||
},
|
||||
{
|
||||
"cid": "3763",
|
||||
"cname": "认证体系工程师/审核员"
|
||||
},
|
||||
{
|
||||
"cid": "3764",
|
||||
"cname": "采购经理/主管"
|
||||
},
|
||||
{
|
||||
"cid": "3765",
|
||||
"cname": "质量管理/测试主管(QA/QC主管)"
|
||||
},
|
||||
{
|
||||
"cid": "3766",
|
||||
"cname": "质量管理/测试工程师(QA/QC工程师)"
|
||||
},
|
||||
{
|
||||
"cid": "3767",
|
||||
"cname": "仓库经理/主管"
|
||||
},
|
||||
{
|
||||
"cid": "3768",
|
||||
"cname": "生产助理"
|
||||
},
|
||||
{
|
||||
"cid": "3769",
|
||||
"cname": "安全工程师"
|
||||
},
|
||||
{
|
||||
"cid": "3770",
|
||||
"cname": "包装工程师"
|
||||
},
|
||||
{
|
||||
"cid": "3771",
|
||||
"cname": "产品开发/技术/工艺"
|
||||
},
|
||||
{
|
||||
"cid": "3772",
|
||||
"cname": "环境/健康/安全工程师(EHS)"
|
||||
}
|
||||
]
|
||||
},
|
||||
{
|
||||
"id": "3793",
|
||||
"name": "机械/设备维修类",
|
||||
"child": [
|
||||
{
|
||||
"cid": "3794",
|
||||
"cname": "铸造/锻造工程师"
|
||||
},
|
||||
{
|
||||
"cid": "3795",
|
||||
"cname": "机械工程师"
|
||||
},
|
||||
{
|
||||
"cid": "3796",
|
||||
"cname": "注塑工程师"
|
||||
},
|
||||
{
|
||||
"cid": "3797",
|
||||
"cname": "机械/设备维修类"
|
||||
},
|
||||
{
|
||||
"cid": "3798",
|
||||
"cname": "塑性加工/铸造/焊接/切割"
|
||||
},
|
||||
{
|
||||
"cid": "3799",
|
||||
"cname": "精密机械/精密仪器/仪器仪表"
|
||||
},
|
||||
{
|
||||
"cid": "3800",
|
||||
"cname": "机械设计/制造/制图"
|
||||
},
|
||||
{
|
||||
"cid": "3801",
|
||||
"cname": "机电一体化工程师"
|
||||
},
|
||||
{
|
||||
"cid": "3802",
|
||||
"cname": "机床工程师"
|
||||
},
|
||||
{
|
||||
"cid": "3803",
|
||||
"cname": "液压传动工程师"
|
||||
},
|
||||
{
|
||||
"cid": "3804",
|
||||
"cname": "机械自动化/工业自动化"
|
||||
},
|
||||
{
|
||||
"cid": "3805",
|
||||
"cname": "船舶工程师"
|
||||
},
|
||||
{
|
||||
"cid": "3806",
|
||||
"cname": "压力容器/锅炉工程师"
|
||||
},
|
||||
{
|
||||
"cid": "3807",
|
||||
"cname": "工程/设备工程师"
|
||||
},
|
||||
{
|
||||
"cid": "3808",
|
||||
"cname": "金属制品"
|
||||
},
|
||||
{
|
||||
"cid": "3809",
|
||||
"cname": "模具工程师"
|
||||
},
|
||||
{
|
||||
"cid": "3810",
|
||||
"cname": "传感器工程师/光电"
|
||||
},
|
||||
{
|
||||
"cid": "3811",
|
||||
"cname": "检测技术及仪器/计量测试工程师"
|
||||
},
|
||||
{
|
||||
"cid": "3812",
|
||||
"cname": "机械设备与汽车、摩托维修工程师"
|
||||
},
|
||||
{
|
||||
"cid": "3813",
|
||||
"cname": "焊接工程师"
|
||||
}
|
||||
]
|
||||
},
|
||||
{
|
||||
"id": "3870",
|
||||
"name": "设计/广告类",
|
||||
"child": [
|
||||
{
|
||||
"cid": "3871",
|
||||
"cname": "设计/广告类"
|
||||
},
|
||||
{
|
||||
"cid": "3872",
|
||||
"cname": "广告设计/策划"
|
||||
},
|
||||
{
|
||||
"cid": "3873",
|
||||
"cname": "广告制作/平面设计与制作"
|
||||
},
|
||||
{
|
||||
"cid": "3874",
|
||||
"cname": "美术/图形设计"
|
||||
},
|
||||
{
|
||||
"cid": "3875",
|
||||
"cname": "工业设计/产品设计师"
|
||||
},
|
||||
{
|
||||
"cid": "3876",
|
||||
"cname": "服装设计"
|
||||
},
|
||||
{
|
||||
"cid": "3877",
|
||||
"cname": "家具设计师"
|
||||
},
|
||||
{
|
||||
"cid": "3878",
|
||||
"cname": "珠宝设计师"
|
||||
},
|
||||
{
|
||||
"cid": "3879",
|
||||
"cname": "玩具设计师"
|
||||
},
|
||||
{
|
||||
"cid": "3880",
|
||||
"cname": "电脑绘图员"
|
||||
},
|
||||
{
|
||||
"cid": "3881",
|
||||
"cname": "产品包装设计师"
|
||||
},
|
||||
{
|
||||
"cid": "3882",
|
||||
"cname": "形象设计师"
|
||||
},
|
||||
{
|
||||
"cid": "3883",
|
||||
"cname": "策划总监/总经理"
|
||||
},
|
||||
{
|
||||
"cid": "3884",
|
||||
"cname": "陈列/橱窗设计"
|
||||
},
|
||||
{
|
||||
"cid": "3885",
|
||||
"cname": "展览设计"
|
||||
},
|
||||
{
|
||||
"cid": "3886",
|
||||
"cname": "工业品设计师"
|
||||
},
|
||||
{
|
||||
"cid": "3887",
|
||||
"cname": "动画/3D设计"
|
||||
},
|
||||
{
|
||||
"cid": "3888",
|
||||
"cname": "平面设计师"
|
||||
},
|
||||
{
|
||||
"cid": "3889",
|
||||
"cname": "排版设计员"
|
||||
},
|
||||
{
|
||||
"cid": "3890",
|
||||
"cname": "电话采编"
|
||||
},
|
||||
{
|
||||
"cid": "3891",
|
||||
"cname": "广告创意总监"
|
||||
},
|
||||
{
|
||||
"cid": "3892",
|
||||
"cname": "媒介策划/管理"
|
||||
},
|
||||
{
|
||||
"cid": "3893",
|
||||
"cname": "活动策划"
|
||||
},
|
||||
{
|
||||
"cid": "3894",
|
||||
"cname": "活动执行"
|
||||
},
|
||||
{
|
||||
"cid": "3895",
|
||||
"cname": "舞美设计"
|
||||
},
|
||||
{
|
||||
"cid": "3896",
|
||||
"cname": "后期制作/音效师"
|
||||
}
|
||||
]
|
||||
},
|
||||
{
|
||||
"id": "3897",
|
||||
"name": "行政/人事类",
|
||||
"child": [
|
||||
{
|
||||
"cid": "3898",
|
||||
"cname": "行政/人事类"
|
||||
},
|
||||
{
|
||||
"cid": "3899",
|
||||
"cname": "人力资源经理"
|
||||
},
|
||||
{
|
||||
"cid": "3900",
|
||||
"cname": "行政经理/主任/主管"
|
||||
},
|
||||
{
|
||||
"cid": "3901",
|
||||
"cname": "招聘经理/主管"
|
||||
},
|
||||
{
|
||||
"cid": "3902",
|
||||
"cname": "培训经理/主管"
|
||||
},
|
||||
{
|
||||
"cid": "3903",
|
||||
"cname": "薪酬福利经理/主管"
|
||||
},
|
||||
{
|
||||
"cid": "3904",
|
||||
"cname": "绩效考核经理/主管"
|
||||
},
|
||||
{
|
||||
"cid": "3905",
|
||||
"cname": "人力资源总监"
|
||||
},
|
||||
{
|
||||
"cid": "3906",
|
||||
"cname": "行政总监"
|
||||
},
|
||||
{
|
||||
"cid": "3907",
|
||||
"cname": "人事专员"
|
||||
},
|
||||
{
|
||||
"cid": "3908",
|
||||
"cname": "行政专员/助理"
|
||||
},
|
||||
{
|
||||
"cid": "3909",
|
||||
"cname": "前台接待/总机/接待生"
|
||||
},
|
||||
{
|
||||
"cid": "3910",
|
||||
"cname": "高级秘书"
|
||||
},
|
||||
{
|
||||
"cid": "3911",
|
||||
"cname": "ISO专员"
|
||||
},
|
||||
{
|
||||
"cid": "3912",
|
||||
"cname": "文案策划/资料编写"
|
||||
},
|
||||
{
|
||||
"cid": "3913",
|
||||
"cname": "人力资源主管"
|
||||
},
|
||||
{
|
||||
"cid": "3914",
|
||||
"cname": "招聘专员/助理"
|
||||
},
|
||||
{
|
||||
"cid": "3915",
|
||||
"cname": "猎头顾问/助理"
|
||||
},
|
||||
{
|
||||
"cid": "3916",
|
||||
"cname": "人力资源信息系统专员"
|
||||
},
|
||||
{
|
||||
"cid": "3917",
|
||||
"cname": "后勤人员"
|
||||
},
|
||||
{
|
||||
"cid": "3918",
|
||||
"cname": "合同管理"
|
||||
}
|
||||
]
|
||||
},
|
||||
{
|
||||
"id": "3919",
|
||||
"name": "房地产/建筑/物业管理",
|
||||
"child": [
|
||||
{
|
||||
"cid": "3920",
|
||||
"cname": "房地产/建筑/物业管理"
|
||||
},
|
||||
{
|
||||
"cid": "3921",
|
||||
"cname": "结构土木/土建工程师"
|
||||
},
|
||||
{
|
||||
"cid": "3922",
|
||||
"cname": "水电管理"
|
||||
},
|
||||
{
|
||||
"cid": "3923",
|
||||
"cname": "工程预决算/造价师"
|
||||
},
|
||||
{
|
||||
"cid": "3924",
|
||||
"cname": "房地产开发/策划经理/主管"
|
||||
},
|
||||
{
|
||||
"cid": "3925",
|
||||
"cname": "给排水工程师"
|
||||
},
|
||||
{
|
||||
"cid": "3926",
|
||||
"cname": "暖通工程师"
|
||||
},
|
||||
{
|
||||
"cid": "3927",
|
||||
"cname": "物业管理经理/主管"
|
||||
},
|
||||
{
|
||||
"cid": "3928",
|
||||
"cname": "路桥工程师"
|
||||
},
|
||||
{
|
||||
"cid": "3929",
|
||||
"cname": "房地产中介/交易"
|
||||
},
|
||||
{
|
||||
"cid": "3930",
|
||||
"cname": "室内外装潢设计"
|
||||
},
|
||||
{
|
||||
"cid": "3931",
|
||||
"cname": "绘图/建筑制图员"
|
||||
},
|
||||
{
|
||||
"cid": "3932",
|
||||
"cname": "工程监理"
|
||||
},
|
||||
{
|
||||
"cid": "3933",
|
||||
"cname": "物业顾问"
|
||||
},
|
||||
{
|
||||
"cid": "3934",
|
||||
"cname": "管道"
|
||||
},
|
||||
{
|
||||
"cid": "3935",
|
||||
"cname": "建筑施工管理"
|
||||
},
|
||||
{
|
||||
"cid": "3936",
|
||||
"cname": "基础地下工程/岩土工程"
|
||||
},
|
||||
{
|
||||
"cid": "3937",
|
||||
"cname": "港口与航道工程"
|
||||
},
|
||||
{
|
||||
"cid": "3938",
|
||||
"cname": "城镇规划/土地规划"
|
||||
},
|
||||
{
|
||||
"cid": "3939",
|
||||
"cname": "物业管理专员/助理"
|
||||
},
|
||||
{
|
||||
"cid": "3940",
|
||||
"cname": "物业维修人员"
|
||||
},
|
||||
{
|
||||
"cid": "3941",
|
||||
"cname": "物业设施管理人员"
|
||||
},
|
||||
{
|
||||
"cid": "3942",
|
||||
"cname": "房产项目配套工程师"
|
||||
},
|
||||
{
|
||||
"cid": "3943",
|
||||
"cname": "房地产销售人员"
|
||||
},
|
||||
{
|
||||
"cid": "3944",
|
||||
"cname": "房地产评估"
|
||||
},
|
||||
{
|
||||
"cid": "3945",
|
||||
"cname": "施工人员"
|
||||
},
|
||||
{
|
||||
"cid": "3946",
|
||||
"cname": "规划设计师"
|
||||
},
|
||||
{
|
||||
"cid": "3947",
|
||||
"cname": "测绘员/测量员"
|
||||
},
|
||||
{
|
||||
"cid": "3948",
|
||||
"cname": "资料员"
|
||||
},
|
||||
{
|
||||
"cid": "3949",
|
||||
"cname": "安全主任"
|
||||
},
|
||||
{
|
||||
"cid": "3950",
|
||||
"cname": "铁路工程"
|
||||
},
|
||||
{
|
||||
"cid": "3951",
|
||||
"cname": "智能大厦/布线/弱电/安防"
|
||||
},
|
||||
{
|
||||
"cid": "3952",
|
||||
"cname": "房地产项目/开发/策划经理"
|
||||
},
|
||||
{
|
||||
"cid": "3953",
|
||||
"cname": "高级物业顾问"
|
||||
}
|
||||
]
|
||||
},
|
||||
{
|
||||
"id": "3983",
|
||||
"name": "交通运输(海陆空)类",
|
||||
"child": [
|
||||
{
|
||||
"cid": "3984",
|
||||
"cname": "交通运输(海陆空)类"
|
||||
},
|
||||
{
|
||||
"cid": "3985",
|
||||
"cname": "调度员"
|
||||
},
|
||||
{
|
||||
"cid": "3986",
|
||||
"cname": "船员"
|
||||
},
|
||||
{
|
||||
"cid": "3987",
|
||||
"cname": "乘务员"
|
||||
},
|
||||
{
|
||||
"cid": "3988",
|
||||
"cname": "司机"
|
||||
},
|
||||
{
|
||||
"cid": "3989",
|
||||
"cname": "航空/列车/船舶操作维修"
|
||||
},
|
||||
{
|
||||
"cid": "3990",
|
||||
"cname": "公交/地铁"
|
||||
},
|
||||
{
|
||||
"cid": "3991",
|
||||
"cname": "空乘人员"
|
||||
},
|
||||
{
|
||||
"cid": "3992",
|
||||
"cname": "船务/空运陆运操作"
|
||||
}
|
||||
]
|
||||
},
|
||||
{
|
||||
"id": "4088",
|
||||
"name": "地矿冶金类",
|
||||
"child": [
|
||||
{
|
||||
"cid": "4089",
|
||||
"cname": "地矿冶金类"
|
||||
},
|
||||
{
|
||||
"cid": "4090",
|
||||
"cname": "采矿工程师"
|
||||
},
|
||||
{
|
||||
"cid": "4091",
|
||||
"cname": "选矿工程师"
|
||||
},
|
||||
{
|
||||
"cid": "4092",
|
||||
"cname": "矿物加工工程师"
|
||||
},
|
||||
{
|
||||
"cid": "4093",
|
||||
"cname": "矿山管理/采矿管理"
|
||||
},
|
||||
{
|
||||
"cid": "4094",
|
||||
"cname": "矿山/采矿安全管理"
|
||||
},
|
||||
{
|
||||
"cid": "4095",
|
||||
"cname": "爆破技术"
|
||||
}
|
||||
]
|
||||
}
|
||||
]
|
2011
web/tsrcw/index.html
Normal file
16479
web/tsrcw/job_info.json
Normal file
175
web/tsrcw/main.py
Normal file
@ -0,0 +1,175 @@
|
||||
import json
|
||||
import re
|
||||
import time
|
||||
|
||||
import requests
|
||||
from lxml import etree
|
||||
|
||||
|
||||
class Tsrcw:
|
||||
def __init__(self):
|
||||
self.headers = {
|
||||
"accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.7",
|
||||
"accept-language": "zh-CN,zh;q=0.9,en;q=0.8",
|
||||
"cache-control": "no-cache",
|
||||
"pragma": "no-cache",
|
||||
"priority": "u=0, i",
|
||||
"referer": "https://www.tsrcw.com/persondh/latest.aspx",
|
||||
"sec-ch-ua": "\"Chromium\";v=\"134\", \"Not:A-Brand\";v=\"24\", \"Google Chrome\";v=\"134\"",
|
||||
"sec-ch-ua-mobile": "?0",
|
||||
"sec-ch-ua-platform": "\"Windows\"",
|
||||
"sec-fetch-dest": "document",
|
||||
"sec-fetch-mode": "navigate",
|
||||
"sec-fetch-site": "same-origin",
|
||||
"sec-fetch-user": "?1",
|
||||
"upgrade-insecure-requests": "1",
|
||||
"user-agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/134.0.0.0 Safari/537.36"
|
||||
}
|
||||
self.cookies = {
|
||||
"ASP.NET_SessionId": "1hroesd0og5cqszyv42jkf30",
|
||||
"yzmCookiestr": "ImgCode=1132&ExpireDate=2025/3/19 13:22:40&HaveUsed=1",
|
||||
"PersonUser": "name=wxfkali222&key=0A1AD61BFD75D12B25A946E01AA2E894"
|
||||
}
|
||||
|
||||
def get_index2html(self):
|
||||
url = "https://www.tsrcw.com/default.aspx"
|
||||
response = requests.get(url, headers=self.headers, cookies=self.cookies)
|
||||
with open("index.html", "w", encoding="utf-8") as f:
|
||||
f.write(response.text)
|
||||
html = response.text
|
||||
url_list = re.findall(r'url: \'/html/ashx/globla\.ashx\?action=(.*?)\'', html)
|
||||
return url_list
|
||||
|
||||
def get_idlist(self):
|
||||
# url = "https://www.tsrcw.com/html/ashx/globla.ashx"
|
||||
# params = {
|
||||
# "action": "zwlistEight"
|
||||
# }
|
||||
# response = requests.get(url, headers=self.headers, cookies=self.cookies, params=params)
|
||||
# jsonf = json.dumps(response.json().get('msg'), ensure_ascii=False)
|
||||
# with open("idlist.json", "w", encoding="utf-8") as f:
|
||||
# f.write(jsonf)
|
||||
with open("idlist.json", "r", encoding="utf-8") as f:
|
||||
jsonf = f.read()
|
||||
return jsonf
|
||||
|
||||
def get_detaillist(self, jsonf):
|
||||
idlist = json.loads(jsonf)
|
||||
for item in idlist:
|
||||
for c in item.get('child'):
|
||||
cid = c.get('cid')
|
||||
if c.get("cname") == item.get("name"):
|
||||
continue
|
||||
url = "https://www.tsrcw.com/persondh/latest.aspx"
|
||||
params = {
|
||||
"job": "{}".format(cid)
|
||||
}
|
||||
response = requests.get(url, headers=self.headers, cookies=self.cookies, params=params)
|
||||
html = response.text
|
||||
xpathobj = etree.HTML(html)
|
||||
position_name = xpathobj.xpath("//td[@class='text-left']/p/a/text()")
|
||||
position_url = xpathobj.xpath("//td[@class='text-left']/p/a/@href")
|
||||
company_name = xpathobj.xpath("//td[@class='w400']/div/span/a/text()")
|
||||
company_url = xpathobj.xpath("//td[@class='w400']/div/span/a/@href")
|
||||
|
||||
if len(position_url) > 0:
|
||||
position_list = [{
|
||||
"position_name":position_name[index],
|
||||
"position_url":position_url[index],
|
||||
"company_name":company_name[index],
|
||||
"company_url":company_url[index]
|
||||
}for index,i in enumerate(position_name)]
|
||||
if len(position_list) >= 20:
|
||||
params2 = params.copy()
|
||||
params2["page"] = "2" # 整个网站没有第三页的数据
|
||||
response2 = requests.get(url, headers=self.headers, cookies=self.cookies, params=params2)
|
||||
html2 = response2.text
|
||||
xpathobj2 = etree.HTML(html2)
|
||||
position_name2 = xpathobj2.xpath("//td[@class='text-left']/p/a/text()")
|
||||
position_url2 = xpathobj2.xpath("//td[@class='text-left']/p/a/@href")
|
||||
company_name2 = xpathobj2.xpath("//td[@class='w400']/div/span/a/text()")
|
||||
company_url2 = xpathobj2.xpath("//td[@class='w400']/div/span/a/@href")
|
||||
for index,i in enumerate(position_name2):
|
||||
position_list.append({
|
||||
"position_name":position_name2[index],
|
||||
"position_url":position_url2[index],
|
||||
"company_name":company_name2[index],
|
||||
"company_url":company_url2[index]
|
||||
})
|
||||
c["position_list"] = position_list
|
||||
else:
|
||||
c["position_list"] = []
|
||||
|
||||
|
||||
p_list = json.dumps(idlist, ensure_ascii=False)
|
||||
with open("plist.json", "w", encoding="utf-8") as f:
|
||||
f.write(p_list)
|
||||
|
||||
def get_poition_info(self):
|
||||
q = []
|
||||
y = 0
|
||||
with open("plist.json", "r", encoding="utf-8") as f:
|
||||
jsonf = f.read()
|
||||
plist = json.loads(jsonf)
|
||||
for item in plist:
|
||||
for c in item.get('child'):
|
||||
if c.get("cname") == item.get("name"):
|
||||
continue
|
||||
if len(c.get("position_list")) == 0:
|
||||
continue
|
||||
position_list = c.get("position_list")
|
||||
for position in position_list:
|
||||
href = position.get("position_url")
|
||||
url = "https://www.tsrcw.com" + href
|
||||
print(url)
|
||||
response = requests.get(url, headers=self.headers, cookies=self.cookies)
|
||||
html = response.text
|
||||
xpathobj = etree.HTML(html)
|
||||
job_info = {}
|
||||
position_table = xpathobj.xpath("//div[@class='baseinfo']/table/tr")
|
||||
for row in position_table:
|
||||
position_key_list = [key.strip() for key in row.xpath("th/text()") if key.strip()]
|
||||
position_value_list = [''.join(value.xpath(".//text()")).strip() for value in row.xpath("td")]
|
||||
while len(position_value_list) < len(position_key_list):
|
||||
position_value_list.append('') # 在末尾补充空字符串
|
||||
|
||||
for key, value in zip(position_key_list, position_value_list):
|
||||
if ":" in value:
|
||||
value = value.replace(":", "")
|
||||
if "\u3000\u3000" in key:
|
||||
key = key.replace("\u3000\u3000", "")
|
||||
if "\r\n " in value:
|
||||
value = value.replace("\r\n ", "")
|
||||
job_info[key] = value
|
||||
fl = xpathobj.xpath("//div[@class='s12_div']/text()")
|
||||
job_info["福利"] = fl
|
||||
yq = xpathobj.xpath("//div[@class='requirement']/div[@class='content']/text()")
|
||||
yq = [i.replace('\r\n ','').replace('\r','').strip() for i in yq if i.strip()]
|
||||
job_info["要求"] = yq
|
||||
lxk = xpathobj.xpath("//div[@class='contactus']/div[@class='content']/ul/li/span/text()")
|
||||
lxk = [i.replace(' ','').strip() for i in lxk if i.strip()]
|
||||
lxv = xpathobj.xpath("//div[@class='contactus']/div[@class='content']/ul/li/text()")
|
||||
lxv = [i.replace(':','').strip() for i in lxv if i.strip()]
|
||||
lximg = xpathobj.xpath("//div[@class='contactus']/div[@class='content']/ul/li/img/@src")
|
||||
if len(yq) == 0 and len(lxk) == 0:
|
||||
q.append(url)
|
||||
continue
|
||||
|
||||
if lxv[1] == '' and lxv[2] == '':
|
||||
lxv[1] = lximg[0].split('value=')[1]
|
||||
lxv[2] = lximg[1].split('value=')[1]
|
||||
lx = dict(zip(lxk, lxv))
|
||||
job_info["联系"] = lx
|
||||
# time.sleepe11)
|
||||
position["job_info"] = job_info
|
||||
print("=====",y,"=====")
|
||||
y += 1
|
||||
|
||||
with open("job_info.json", "w", encoding="utf-8") as f:
|
||||
f.write(json.dumps(plist, ensure_ascii=False))
|
||||
with open("position_info_back.json", "w", encoding="utf-8") as f:
|
||||
f.write(json.dumps(c, ensure_ascii=False))
|
||||
|
||||
if __name__ == '__main__':
|
||||
tsrcw = Tsrcw()
|
||||
tsrcw.get_poition_info()
|
3344
web/tsrcw/plist.json
Normal file
1
web/tsrcw/position_info_back.json
Normal file
@ -0,0 +1 @@
|
||||
{"cid": "4095", "cname": "爆破技术", "position_list": []}
|
105
web/tstczpw_dtangshan_com/main.py
Normal file
@ -0,0 +1,105 @@
|
||||
import sys, os
|
||||
import time
|
||||
import pandas as pd
|
||||
project_root = os.path.abspath(os.path.join(os.path.dirname(__file__), ".."))
|
||||
if project_root not in sys.path:
|
||||
sys.path.insert(0, project_root)
|
||||
|
||||
from web.Requests_Except import MR
|
||||
|
||||
base_url = 'tstczpw.dtangshan.com'
|
||||
protocol = 'https'
|
||||
|
||||
|
||||
default_headers = {
|
||||
'accept': 'application/json, text/plain, */*',
|
||||
'accept-language': 'zh-CN,zh;q=0.9,en;q=0.8,en-GB;q=0.7,en-US;q=0.6',
|
||||
'authorization': 'Bearer eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9.eyJpZCI6MTE5NDA2LCJ1c2VybmFtZSI6IuS5iOW9pua4hSIsInB3ZCI6IjI5YmE3OTA3ZDUxNTE4MGNlNGU5ZmY0Mzk4ZmI5OGNiIiwiaWF0IjoxNzQ4NDgxNDQxLCJleHAiOjE3ODAwMTc0NDF9.EaF6zHc8TE-OsmUW_no3S9g-Ch7Af5xxoB1FtN0cY2U',
|
||||
'cache-control': 'no-cache',
|
||||
'pragma': 'no-cache',
|
||||
'priority': 'u=1, i',
|
||||
'referer': 'https://tstczpw.dtangshan.com/uc/enterprise/resume-library?tab=resume',
|
||||
'sec-ch-ua': '"Chromium";v="136", "Microsoft Edge";v="136", "Not.A/Brand";v="99"',
|
||||
'sec-ch-ua-mobile': '?0',
|
||||
'sec-ch-ua-platform': '"Windows"',
|
||||
'sec-fetch-dest': 'empty',
|
||||
'sec-fetch-mode': 'cors',
|
||||
'sec-fetch-site': 'same-origin',
|
||||
'user-agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/136.0.0.0 Safari/537.36 Edg/136.0.0.0',
|
||||
'x-platform': '1',
|
||||
'x-site-id': 'undefined'
|
||||
}
|
||||
|
||||
default_cookies = {
|
||||
'x-trace-id': 'b1ed266a69b84acfb41a789948b17cf2',
|
||||
'logged': '1',
|
||||
'__csrf': 'd89b15be-c143-47e1-b6b2-f367e03e54f1',
|
||||
'Hm_lvt_f4456766547e6691e07b5e2eced1f70d': '1748435541,1748481435',
|
||||
'Hm_lpvt_f4456766547e6691e07b5e2eced1f70d': '1748481435',
|
||||
'HMACCOUNT': '212FF4B3AD499E5B',
|
||||
'token': 'eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9.eyJpZCI6MTE5NDA2LCJ1c2VybmFtZSI6IuS5iOW9pua4hSIsInB3ZCI6IjI5YmE3OTA3ZDUxNTE4MGNlNGU5ZmY0Mzk4ZmI5OGNiIiwiaWF0IjoxNzQ4NDgxNDQxLCJleHAiOjE3ODAwMTc0NDF9.EaF6zHc8TE-OsmUW_no3S9g-Ch7Af5xxoB1FtN0cY2U',
|
||||
}
|
||||
|
||||
Requests = MR(base_url, protocol)
|
||||
Requests.set_default_headers(default_headers)
|
||||
Requests.set_default_cookies(default_cookies)
|
||||
|
||||
pd_data = {
|
||||
'resume_id': [],
|
||||
'姓名': [], # user_name
|
||||
'求职区域': [], # area_show
|
||||
'学历': [], # education_level_msg
|
||||
'婚姻': [], # marry_status_show
|
||||
'年龄': [], # user_age
|
||||
'求职状态': [], # work_status_show
|
||||
'工作1经历': [],
|
||||
'工作2经历': [],
|
||||
'工作年限':[],
|
||||
'薪资':[],
|
||||
}
|
||||
|
||||
def page_list(page: int = 1) -> str:
|
||||
url = '/api/v1/resumes'
|
||||
params = {
|
||||
'_': str(int(time.time() * 1000)),
|
||||
'tab': 'resume',
|
||||
'keyword': '财务',
|
||||
't':str(int(time.time() * 1000)-200),
|
||||
'pageSize': '100',
|
||||
'pageIndex': str(page),
|
||||
'showStatus': 'true',
|
||||
}
|
||||
response = Requests.get(url, params=params)
|
||||
return response.to_Dict()
|
||||
|
||||
def info(data):
|
||||
infos = data.data.items
|
||||
for info in infos:
|
||||
pd_data['resume_id'].append(info.id)
|
||||
pd_data['姓名'].append(info.name_value)
|
||||
pd_data['求职状态'].append(info.job_instant_value)
|
||||
pd_data['工作年限'].append(info.work_exp_value)
|
||||
pd_data['婚姻'].append(info.marriage_value)
|
||||
pd_data['年龄'].append(info.age)
|
||||
if (info.job_salary_from == 0 and info.job_salary_to == 0) or (info.job_salary_from is None and info.job_salary_to is None):
|
||||
xinzi = "面议"
|
||||
else:
|
||||
xinzi = f"{info.job_salary_from}~{info.job_salary_to}"
|
||||
pd_data['薪资'].append(xinzi)
|
||||
pd_data['学历'].append(info.edu_value)
|
||||
pd_data['求职区域'].append(info.job_region_value)
|
||||
cateforyArr = info.infoCateforyArrObj
|
||||
if len(cateforyArr) == 0:
|
||||
pd_data['工作1经历'].append('')
|
||||
pd_data['工作2经历'].append('')
|
||||
elif len(cateforyArr) == 1:
|
||||
pd_data['工作1经历'].append(cateforyArr[0].name)
|
||||
pd_data['工作2经历'].append('')
|
||||
elif len(cateforyArr) >= 2:
|
||||
pd_data['工作1经历'].append(cateforyArr[0].name)
|
||||
pd_data['工作2经历'].append(cateforyArr[1].name)
|
||||
|
||||
info(page_list(1))
|
||||
|
||||
df = pd.DataFrame(pd_data)
|
||||
df.to_excel(f'{base_url}_财务.xlsx', index=False)
|
@ -0,0 +1,9 @@
|
||||
{
|
||||
"HMACCOUNT": "212FF4B3AD499E5B",
|
||||
"Hm_lpvt_f4456766547e6691e07b5e2eced1f70d": "1748481435",
|
||||
"Hm_lvt_f4456766547e6691e07b5e2eced1f70d": "1748435541,1748481435",
|
||||
"__csrf": "d89b15be-c143-47e1-b6b2-f367e03e54f1",
|
||||
"logged": "1",
|
||||
"token": "eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9.eyJpZCI6MTE5NDA2LCJ1c2VybmFtZSI6IuS5iOW9pua4hSIsInB3ZCI6IjI5YmE3OTA3ZDUxNTE4MGNlNGU5ZmY0Mzk4ZmI5OGNiIiwiaWF0IjoxNzQ4NDgxNDQxLCJleHAiOjE3ODAwMTc0NDF9.EaF6zHc8TE-OsmUW_no3S9g-Ch7Af5xxoB1FtN0cY2U",
|
||||
"x-trace-id": "b1ed266a69b84acfb41a789948b17cf2"
|
||||
}
|
10680
web/ubereats/Adata.json
Normal file
158
web/ubereats/II/Adata.json
Normal file
@ -0,0 +1,158 @@
|
||||
[
|
||||
{
|
||||
"name": "Braised Whole Fish",
|
||||
"list": [
|
||||
{
|
||||
"name": "For One",
|
||||
"price": 0.0
|
||||
},
|
||||
{
|
||||
"name": "For Two",
|
||||
"price": 3.82
|
||||
}
|
||||
],
|
||||
"required": true,
|
||||
"maxPermitted": 1
|
||||
},
|
||||
{
|
||||
"name": "Mala Tang",
|
||||
"list": [
|
||||
{
|
||||
"name": "For One",
|
||||
"price": 0.0
|
||||
},
|
||||
{
|
||||
"name": "For Two",
|
||||
"price": 4.04
|
||||
}
|
||||
],
|
||||
"required": true,
|
||||
"maxPermitted": 1
|
||||
},
|
||||
{
|
||||
"name": "3PP. TOGO Grilled Fish",
|
||||
"list": [
|
||||
{
|
||||
"name": "TOGO Grilled Fish Tray",
|
||||
"price": 2.0
|
||||
}
|
||||
],
|
||||
"required": true,
|
||||
"maxPermitted": 99
|
||||
},
|
||||
{
|
||||
"name": "Protein Choice(Doordash)",
|
||||
"list": [
|
||||
{
|
||||
"name": "Basil Chicken",
|
||||
"price": 0.0
|
||||
},
|
||||
{
|
||||
"name": "Beef",
|
||||
"price": 0.3
|
||||
},
|
||||
{
|
||||
"name": "Chicken",
|
||||
"price": 0.3
|
||||
},
|
||||
{
|
||||
"name": "Seafood",
|
||||
"price": 0.3
|
||||
},
|
||||
{
|
||||
"name": "Shrimp",
|
||||
"price": 0.3
|
||||
}
|
||||
],
|
||||
"required": true,
|
||||
"maxPermitted": 1
|
||||
},
|
||||
{
|
||||
"name": "Spicy Level",
|
||||
"list": [
|
||||
{
|
||||
"name": "Spicy",
|
||||
"price": 0.0
|
||||
},
|
||||
{
|
||||
"name": "No Spicy",
|
||||
"price": 0.0
|
||||
},
|
||||
{
|
||||
"name": "mild spicy",
|
||||
"price": 0.0
|
||||
},
|
||||
{
|
||||
"name": "medium spicy",
|
||||
"price": 0.0
|
||||
},
|
||||
{
|
||||
"name": "extra spicy",
|
||||
"price": 0.0
|
||||
},
|
||||
{
|
||||
"name": "little spicy",
|
||||
"price": 0.0
|
||||
}
|
||||
],
|
||||
"required": false,
|
||||
"maxPermitted": 1
|
||||
},
|
||||
{
|
||||
"name": "Preparation Choice(Doordash)",
|
||||
"list": [
|
||||
{
|
||||
"name": "No Spicy",
|
||||
"price": 0.0
|
||||
},
|
||||
{
|
||||
"name": "Spicy",
|
||||
"price": 0.0
|
||||
}
|
||||
],
|
||||
"required": true,
|
||||
"maxPermitted": 1
|
||||
},
|
||||
{
|
||||
"name": "Meat Temperature(Doordash)",
|
||||
"list": [
|
||||
{
|
||||
"name": "Medium",
|
||||
"price": 0.0
|
||||
},
|
||||
{
|
||||
"name": "Medium Rare",
|
||||
"price": 0.0
|
||||
},
|
||||
{
|
||||
"name": "Medium Well",
|
||||
"price": 0.0
|
||||
},
|
||||
{
|
||||
"name": "Rare",
|
||||
"price": 0.0
|
||||
},
|
||||
{
|
||||
"name": "Well Done",
|
||||
"price": 0.0
|
||||
}
|
||||
],
|
||||
"required": true,
|
||||
"maxPermitted": 1
|
||||
},
|
||||
{
|
||||
"name": "Sauce Choice(Doordash)",
|
||||
"list": [
|
||||
{
|
||||
"name": "Brown Sauce",
|
||||
"price": 0.0
|
||||
},
|
||||
{
|
||||
"name": "Spicy Sauce",
|
||||
"price": 0.0
|
||||
}
|
||||
],
|
||||
"required": true,
|
||||
"maxPermitted": 1
|
||||
}
|
||||
]
|
24065
web/ubereats/II/data.json
Normal file
710
web/ubereats/II/html.html
Normal file
504
web/ubereats/II/main.py
Normal file
@ -0,0 +1,504 @@
|
||||
import re
|
||||
import time
|
||||
from urllib.parse import unquote
|
||||
import requests
|
||||
import json
|
||||
from lxml import etree
|
||||
from openpyxl import load_workbook
|
||||
from openpyxl.styles import Font, PatternFill, Alignment, Border, Side
|
||||
|
||||
|
||||
class ubereats:
|
||||
def __init__(self):
|
||||
|
||||
self.cookies = {
|
||||
'dId': '201f1380-db90-4fce-b53c-9e5f1a1797db',
|
||||
'uev2.diningMode': 'PICKUP',
|
||||
'marketing_vistor_id': 'b2b08d27-61c5-49a5-8960-3bf57d7dc740',
|
||||
'u-cookie-prefs': 'eyJ2ZXJzaW9uIjoxMDAsImRhdGUiOjE3NDExNzE2MjgyODAsImNvb2tpZUNhdGVnb3JpZXMiOlsiYWxsIl0sImltcGxpY2l0Ijp0cnVlfQ%3D%3D',
|
||||
'uev2.gg': 'true',
|
||||
'_gcl_au': '1.1.639637700.1741171631',
|
||||
'_scid': 'PKHcwyQACjnCw9d1hw_hGGK2mECWKPAw',
|
||||
'_fbp': 'fb.1.1741171631251.75638408989351243',
|
||||
'_ga': 'GA1.1.1953756175.1741171632',
|
||||
'_ScCbts': '%5B%5D',
|
||||
'_yjsu_yjad': '1741171631.b78cd8ba-9e38-46b9-b413-15deb0d5a676',
|
||||
'_sctr': '1%7C1741104000000',
|
||||
'_tt_enable_cookie': '1',
|
||||
'_ttp': '01JNJYND745JBKHC19JF1B3ENF_.tt.1',
|
||||
'_clck': 'oo6f3j%7C2%7Cfu1%7C0%7C1890',
|
||||
'uev2.loc': '%7B%22address%22%3A%7B%22address1%22%3A%22Hellu%20Coffee%22%2C%22address2%22%3A%22137%20Amoy%20St%2C%20%2301-05%20Far%20East%20Square%22%2C%22aptOrSuite%22%3A%22%22%2C%22eaterFormattedAddress%22%3A%22137%20Amoy%20St%2C%20%2301-05%20Far%20East%20Square%2C%20Singapore%20049965%22%2C%22subtitle%22%3A%22137%20Amoy%20St%2C%20%2301-05%20Far%20East%20Square%22%2C%22title%22%3A%22Hellu%20Coffee%22%2C%22uuid%22%3A%22%22%7D%2C%22latitude%22%3A1.2833573%2C%22longitude%22%3A103.8484733%2C%22reference%22%3A%22ChIJ7cXuxpYZ2jERPmwg_xdxMsE%22%2C%22referenceType%22%3A%22google_places%22%2C%22type%22%3A%22google_places%22%2C%22addressComponents%22%3A%7B%22city%22%3A%22%22%2C%22countryCode%22%3A%22SG%22%2C%22firstLevelSubdivisionCode%22%3A%22%22%2C%22postalCode%22%3A%22%22%7D%2C%22categories%22%3A%5B%22CAFE%22%2C%22FOOD_AND_BEVERAGE%22%2C%22RESTAURANT%22%2C%22SHOPS_AND_SERVICES%22%2C%22place%22%5D%2C%22originType%22%3A%22user_autocomplete%22%2C%22source%22%3A%22manual_auto_complete%22%2C%22userState%22%3A%22Unknown%22%7D',
|
||||
'_uetvid': '32f691f0f9af11efad4d6dc246fea42a',
|
||||
'uev2.embed_theme_preference': 'dark',
|
||||
'uev2.id.xp': '662c2f60-f06a-4035-ac3e-d95125b67b55',
|
||||
'uev2.id.session': '4df841ca-6232-462b-a703-922e6b339076',
|
||||
'uev2.ts.session': '1741695951647',
|
||||
'_ua': '{"session_id":"43b8b8f0-a29e-4afc-b413-1eb51d71fa4f","session_time_ms":1741695952168}',
|
||||
'jwt-session': 'eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9.eyJpYXQiOjE3NDE2OTU5NTEsImRhdGEiOnsic2xhdGUtZXhwaXJlcy1hdCI6MTc0MTY5Nzc1MjE2OH0sImV4cCI6MTc0MTc4MjM1MX0.0A-JUI9H8p9GbF8psH4IUB5UYUBOdKxwt0hWuxNBs5k',
|
||||
'utag_main__sn': '6',
|
||||
'utag_main_ses_id': '1741695961152%3Bexp-session',
|
||||
'utag_main__pn': '1%3Bexp-session',
|
||||
'utm_medium': 'search-free-nonbrand',
|
||||
'utm_source': 'google-pas',
|
||||
'_scid_r': 'SSHcwyQACjnCw9d1hw_hGGK2mECWKPAwQvf-Ww',
|
||||
'utag_main__se': '2%3Bexp-session',
|
||||
'utag_main__ss': '0%3Bexp-session',
|
||||
'utag_main__st': '1741697763074%3Bexp-session',
|
||||
'_userUuid': '',
|
||||
'_ga_P1RM71MPFP': 'GS1.1.1741695965.7.1.1741695965.60.0.0',
|
||||
}
|
||||
|
||||
self.headers = {
|
||||
'accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.7',
|
||||
'accept-language': 'zh-CN,zh;q=0.9,en;q=0.8',
|
||||
'cache-control': 'no-cache',
|
||||
'pragma': 'no-cache',
|
||||
'priority': 'u=0, i',
|
||||
'sec-ch-prefers-color-scheme': 'dark',
|
||||
'sec-ch-ua': '"Chromium";v="134", "Not:A-Brand";v="24", "Google Chrome";v="134"',
|
||||
'sec-ch-ua-mobile': '?0',
|
||||
'sec-ch-ua-platform': '"Windows"',
|
||||
'sec-fetch-dest': 'document',
|
||||
'sec-fetch-mode': 'navigate',
|
||||
'sec-fetch-site': 'same-origin',
|
||||
'sec-fetch-user': '?1',
|
||||
'upgrade-insecure-requests': '1',
|
||||
'user-agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/134.0.0.0 Safari/537.36',
|
||||
# 'cookie': 'dId=201f1380-db90-4fce-b53c-9e5f1a1797db; uev2.diningMode=PICKUP; marketing_vistor_id=b2b08d27-61c5-49a5-8960-3bf57d7dc740; u-cookie-prefs=eyJ2ZXJzaW9uIjoxMDAsImRhdGUiOjE3NDExNzE2MjgyODAsImNvb2tpZUNhdGVnb3JpZXMiOlsiYWxsIl0sImltcGxpY2l0Ijp0cnVlfQ%3D%3D; uev2.gg=true; _gcl_au=1.1.639637700.1741171631; _scid=PKHcwyQACjnCw9d1hw_hGGK2mECWKPAw; _fbp=fb.1.1741171631251.75638408989351243; _ga=GA1.1.1953756175.1741171632; _ScCbts=%5B%5D; _yjsu_yjad=1741171631.b78cd8ba-9e38-46b9-b413-15deb0d5a676; _sctr=1%7C1741104000000; _tt_enable_cookie=1; _ttp=01JNJYND745JBKHC19JF1B3ENF_.tt.1; _clck=oo6f3j%7C2%7Cfu1%7C0%7C1890; uev2.loc=%7B%22address%22%3A%7B%22address1%22%3A%22Hellu%20Coffee%22%2C%22address2%22%3A%22137%20Amoy%20St%2C%20%2301-05%20Far%20East%20Square%22%2C%22aptOrSuite%22%3A%22%22%2C%22eaterFormattedAddress%22%3A%22137%20Amoy%20St%2C%20%2301-05%20Far%20East%20Square%2C%20Singapore%20049965%22%2C%22subtitle%22%3A%22137%20Amoy%20St%2C%20%2301-05%20Far%20East%20Square%22%2C%22title%22%3A%22Hellu%20Coffee%22%2C%22uuid%22%3A%22%22%7D%2C%22latitude%22%3A1.2833573%2C%22longitude%22%3A103.8484733%2C%22reference%22%3A%22ChIJ7cXuxpYZ2jERPmwg_xdxMsE%22%2C%22referenceType%22%3A%22google_places%22%2C%22type%22%3A%22google_places%22%2C%22addressComponents%22%3A%7B%22city%22%3A%22%22%2C%22countryCode%22%3A%22SG%22%2C%22firstLevelSubdivisionCode%22%3A%22%22%2C%22postalCode%22%3A%22%22%7D%2C%22categories%22%3A%5B%22CAFE%22%2C%22FOOD_AND_BEVERAGE%22%2C%22RESTAURANT%22%2C%22SHOPS_AND_SERVICES%22%2C%22place%22%5D%2C%22originType%22%3A%22user_autocomplete%22%2C%22source%22%3A%22manual_auto_complete%22%2C%22userState%22%3A%22Unknown%22%7D; _uetvid=32f691f0f9af11efad4d6dc246fea42a; uev2.embed_theme_preference=dark; uev2.id.xp=662c2f60-f06a-4035-ac3e-d95125b67b55; uev2.id.session=4df841ca-6232-462b-a703-922e6b339076; uev2.ts.session=1741695951647; _ua={"session_id":"43b8b8f0-a29e-4afc-b413-1eb51d71fa4f","session_time_ms":1741695952168}; jwt-session=eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9.eyJpYXQiOjE3NDE2OTU5NTEsImRhdGEiOnsic2xhdGUtZXhwaXJlcy1hdCI6MTc0MTY5Nzc1MjE2OH0sImV4cCI6MTc0MTc4MjM1MX0.0A-JUI9H8p9GbF8psH4IUB5UYUBOdKxwt0hWuxNBs5k; utag_main__sn=6; utag_main_ses_id=1741695961152%3Bexp-session; utag_main__pn=1%3Bexp-session; utm_medium=search-free-nonbrand; utm_source=google-pas; _scid_r=SSHcwyQACjnCw9d1hw_hGGK2mECWKPAwQvf-Ww; utag_main__se=2%3Bexp-session; utag_main__ss=0%3Bexp-session; utag_main__st=1741697763074%3Bexp-session; _userUuid=; _ga_P1RM71MPFP=GS1.1.1741695965.7.1.1741695965.60.0.0',
|
||||
}
|
||||
|
||||
self.params = {
|
||||
'diningMode': 'PICKUP',
|
||||
'pl': 'JTdCJTIyYWRkcmVzcyUyMiUzQSUyMkhlbGx1JTIwQ29mZmVlJTIyJTJDJTIycmVmZXJlbmNlJTIyJTNBJTIyQ2hJSjdjWHV4cFlaMmpFUlBtd2dfeGR4TXNFJTIyJTJDJTIycmVmZXJlbmNlVHlwZSUyMiUzQSUyMmdvb2dsZV9wbGFjZXMlMjIlMkMlMjJsYXRpdHVkZSUyMiUzQTEuMjgzMzU3MyUyQyUyMmxvbmdpdHVkZSUyMiUzQTEwMy44NDg0NzMzJTdE',
|
||||
'ps': '1',
|
||||
'utm_campaign': 'CM2508147-search-free-nonbrand-google-pas_e_all_acq_Global',
|
||||
'utm_medium': 'search-free-nonbrand',
|
||||
'utm_source': 'google-pas',
|
||||
}
|
||||
|
||||
self.html = ""
|
||||
self.wb = load_workbook('Menu.xlsx')
|
||||
self.get_Html()
|
||||
self.modify_first_row = self.modify_first_row()
|
||||
|
||||
def clear_sheet(self, sheet):
|
||||
ws = self.wb[sheet]
|
||||
for row in ws.iter_rows(min_row=2): # 首行不清空
|
||||
for cell in row:
|
||||
if cell.value is not None:
|
||||
cell.value = None
|
||||
self.wb.save('Menu.xlsx')
|
||||
|
||||
def clear_except_first_row(self, sheet):
|
||||
ws = self.wb[sheet]
|
||||
|
||||
# **解除所有合并单元格**
|
||||
merged_ranges = list(ws.merged_cells.ranges)
|
||||
for merged_range in merged_ranges:
|
||||
ws.unmerge_cells(str(merged_range))
|
||||
|
||||
# **获取最大行和最大列**
|
||||
max_row = ws.max_row
|
||||
max_col = ws.max_column
|
||||
|
||||
# **清除第二行及之后的所有数据和格式**
|
||||
if max_row > 1:
|
||||
for row in range(2, max_row + 1): # 从第二行开始清除
|
||||
for col in range(1, max_col + 1):
|
||||
cell = ws.cell(row=row, column=col)
|
||||
cell.value = None # 清除数据
|
||||
cell.fill = PatternFill(fill_type=None) # 清除背景色
|
||||
cell.font = Font() # 重置字体
|
||||
cell.alignment = Alignment() # 重置对齐方式
|
||||
cell.border = Border() # 清除边框
|
||||
|
||||
# **删除第二行及之后的所有行**
|
||||
ws.delete_rows(2, max_row - 1 if max_row > 2 else 1)
|
||||
|
||||
# **清除行级别格式**
|
||||
for row in range(2, max_row + 1):
|
||||
if row in ws.row_dimensions:
|
||||
ws.row_dimensions[row].fill = PatternFill(fill_type=None) # 清除行级背景色
|
||||
ws.row_dimensions[row].font = Font() # 清除行级字体
|
||||
ws.row_dimensions[row].alignment = Alignment() # 清除行级对齐方式
|
||||
|
||||
# **保存 Excel**
|
||||
self.wb.save('Menu.xlsx')
|
||||
|
||||
def get_Html(self):
|
||||
|
||||
|
||||
response = requests.get(
|
||||
'https://www.ubereats.com/store/china-kitchen/AhLIMBhFQdKqBmx4v7lzFQ',
|
||||
params=self.params,
|
||||
cookies=self.cookies,
|
||||
headers=self.headers,
|
||||
)
|
||||
response_html = requests.get(self.url, headers=self.headers, cookies=self.cookies, params=self.params)
|
||||
self.html = response.text.encode('utf-8').decode('unicode_escape')
|
||||
with open('html_11.html', 'w',encoding="utf-8") as f:
|
||||
f.write(self.html)
|
||||
# with open('html.html', 'r', encoding='utf-8') as f:
|
||||
# self.html = f.read()
|
||||
|
||||
def get_Menu(self):
|
||||
xpath_info = etree.HTML(self.html)
|
||||
menu_list = xpath_info.xpath("//button[starts-with(@id, 'tabs-desktop-ofd-menu-tab-')]/span/text()")
|
||||
menu_time = xpath_info.xpath("//p[@data-baseweb='typo-paragraphxsmall']/text()")
|
||||
menu_time = menu_time[1].encode('latin-1').decode('utf-8') if menu_list else ""
|
||||
menu_time = re.sub(r'\s+', '', menu_time) # Menu Description
|
||||
self.clear_sheet("Menu")
|
||||
ws = self.wb["Menu"]
|
||||
ws["A2"] = "Third Party Menu"
|
||||
self.clear_sheet("Categories")
|
||||
ws = self.wb["Categories"]
|
||||
for idx, item in enumerate(menu_list, start=2):
|
||||
ws.cell(row=idx, column=1, value="Third Party Menu")
|
||||
ws.cell(row=idx, column=2, value=item)
|
||||
ws.cell(row=idx, column=3, value="") # 翻译
|
||||
ws.cell(row=idx, column=4, value=menu_time)
|
||||
ws.cell(row=idx, column=5, value="")
|
||||
|
||||
self.wb.save('Menu.xlsx')
|
||||
|
||||
def modify_first_row(self):
|
||||
ws = self.wb["Modifier"]
|
||||
source_row = 1
|
||||
row_data = {}
|
||||
|
||||
# 提取第一行数据和格式
|
||||
for col in range(1, ws.max_column + 1):
|
||||
source_cell = ws.cell(row=source_row, column=col)
|
||||
|
||||
row_data[col] = {
|
||||
"value": source_cell.value, # 数据
|
||||
"font": Font(
|
||||
name=source_cell.font.name,
|
||||
size=source_cell.font.size,
|
||||
bold=source_cell.font.bold,
|
||||
italic=source_cell.font.italic,
|
||||
underline=source_cell.font.underline,
|
||||
color=source_cell.font.color.rgb if source_cell.font.color else None
|
||||
),
|
||||
"alignment": Alignment(
|
||||
horizontal=source_cell.alignment.horizontal,
|
||||
vertical=source_cell.alignment.vertical,
|
||||
wrap_text=source_cell.alignment.wrap_text
|
||||
),
|
||||
"fill": PatternFill(
|
||||
fill_type=source_cell.fill.patternType,
|
||||
fgColor=source_cell.fill.fgColor.rgb if source_cell.fill.fgColor else None,
|
||||
bgColor=source_cell.fill.bgColor.rgb if source_cell.fill.bgColor else None
|
||||
) if source_cell.fill and source_cell.fill.patternType else None,
|
||||
"border": Border(
|
||||
left=Side(style=source_cell.border.left.style, color=source_cell.border.left.color),
|
||||
right=Side(style=source_cell.border.right.style, color=source_cell.border.right.color),
|
||||
top=Side(style=source_cell.border.top.style, color=source_cell.border.top.color),
|
||||
bottom=Side(style=source_cell.border.bottom.style, color=source_cell.border.bottom.color),
|
||||
) if source_cell.border else None
|
||||
}
|
||||
row_data["row_height"] = ws.row_dimensions[source_row].height
|
||||
return row_data
|
||||
def get_item(self):
|
||||
# html_info = re.findall(r'<script type="application/json" id=".*?">(.*?)</script>', self.html, re.S)
|
||||
# js2json = unquote(html_info[5])
|
||||
# print(js2json)
|
||||
# json_data = json.loads(js2json)
|
||||
# with open('data.json', 'w', encoding='utf-8') as f:
|
||||
# f.write(json.dumps(json_data, indent=4))
|
||||
# exit()
|
||||
self.clear_except_first_row("Item")
|
||||
self.clear_except_first_row("Modifier")
|
||||
ws = self.wb["Item"]
|
||||
data = []
|
||||
with open('data.json', 'r', encoding='utf-8') as f:
|
||||
json_data = json.load(f)
|
||||
queries = json_data['queries'][0]['state']['data']
|
||||
storeUuid = queries['uuid']
|
||||
sectionUuid = list(queries["catalogSectionsMap"].keys())[0]
|
||||
index = 2
|
||||
for catalog in queries["catalogSectionsMap"][sectionUuid]:
|
||||
playload = catalog['payload']
|
||||
standardItemsPayload = playload['standardItemsPayload']
|
||||
_type = standardItemsPayload['title']['text']
|
||||
for citem in standardItemsPayload['catalogItems']:
|
||||
menuItemUuid = citem['uuid']
|
||||
title = citem['title']
|
||||
price = citem['price'] / 100
|
||||
itemDescription = citem['itemDescription']
|
||||
if "/ ." in itemDescription:
|
||||
itemDescription = itemDescription.replace("/ .", "")
|
||||
if "é" in itemDescription:
|
||||
itemDescription = itemDescription.replace("é", "é")
|
||||
hasCustomizations = citem['hasCustomizations']
|
||||
subsectionUuid = citem['subsectionUuid']
|
||||
if hasCustomizations:
|
||||
modifier = self.get_itemV1(storeUuid, sectionUuid, subsectionUuid, menuItemUuid)
|
||||
if modifier['ism'] != 1:
|
||||
for addons in modifier['addons']:
|
||||
existing_addon = next((item for item in data if item["name"] == addons["name"]), None)
|
||||
|
||||
if existing_addon:
|
||||
existing_items = {item["name"] for item in existing_addon["list"]}
|
||||
new_items = [item for item in addons["list"] if item["name"] not in existing_items]
|
||||
existing_addon["list"].extend(new_items)
|
||||
else:
|
||||
data.append(addons)
|
||||
|
||||
ws.cell(row=index, column=1, value="Online Lunch Menu")
|
||||
ws.cell(row=index, column=2, value=_type)
|
||||
ws.cell(row=index, column=3, value=title)
|
||||
ws.cell(row=index, column=4, value="")
|
||||
ws.cell(row=index, column=5, value=price)
|
||||
ws.cell(row=index, column=7, value=itemDescription)
|
||||
ws.cell(row=index, column=8, value="Sales Tax")
|
||||
if not hasCustomizations:
|
||||
ws.cell(row=index, column=6, value="")
|
||||
else:
|
||||
if modifier['ism'] == 3 or modifier['ism'] == 1:
|
||||
value = ";".join(
|
||||
[f"{format(price if i['price'] == 0.0 else i['price'] + price, '.2f')}/{i['name']}" for i in
|
||||
modifier['sizes']])
|
||||
ws.cell(row=index, column=5, value=value)
|
||||
if modifier['ism'] == 3:
|
||||
v2 = "\n".join([i for i in modifier['nameList']])
|
||||
ws.cell(row=index, column=6, value=v2)
|
||||
if modifier['ism'] == 2:
|
||||
v2 = "\n".join([i['name'] for i in modifier['addons']])
|
||||
ws.cell(row=index, column=6, value=v2)
|
||||
index += 1
|
||||
self.wb.save('Menu.xlsx')
|
||||
|
||||
with open('Adata.json', 'w', encoding='utf-8') as f:
|
||||
json.dump(data, f, ensure_ascii=False, indent=4)
|
||||
|
||||
def write_xlsx(self):
|
||||
ws = self.wb["Modifier"]
|
||||
self.clear_except_first_row("Modifier") # 清除数据,但保留第一行
|
||||
|
||||
with open('Adata.json', 'r', encoding='utf-8') as f:
|
||||
data = json.load(f)
|
||||
|
||||
index = 2 # **确保从第 2 行开始填充数据**
|
||||
|
||||
for i in data:
|
||||
# **确保从 index > 2 才复制格式**
|
||||
if index > 2:
|
||||
ws.row_dimensions[index].height = self.modify_first_row["row_height"]
|
||||
|
||||
for col, cell_data in self.modify_first_row.items():
|
||||
if col == "row_height":
|
||||
continue
|
||||
|
||||
target_cell = ws.cell(row=index, column=col)
|
||||
|
||||
# **正确赋值**
|
||||
target_cell.value = cell_data["value"]
|
||||
|
||||
# **复制格式**
|
||||
if cell_data["font"]:
|
||||
target_cell.font = Font(
|
||||
name=cell_data["font"].name,
|
||||
size=cell_data["font"].size,
|
||||
bold=cell_data["font"].bold,
|
||||
italic=cell_data["font"].italic,
|
||||
underline=cell_data["font"].underline,
|
||||
color=cell_data["font"].color
|
||||
)
|
||||
if cell_data["alignment"]:
|
||||
target_cell.alignment = Alignment(
|
||||
horizontal=cell_data["alignment"].horizontal,
|
||||
vertical=cell_data["alignment"].vertical,
|
||||
wrap_text=cell_data["alignment"].wrap_text
|
||||
)
|
||||
if cell_data["fill"] and cell_data["fill"].patternType:
|
||||
target_cell.fill = PatternFill(
|
||||
fill_type=cell_data["fill"].patternType,
|
||||
fgColor=cell_data["fill"].fgColor.rgb,
|
||||
bgColor=cell_data["fill"].bgColor.rgb
|
||||
)
|
||||
if cell_data["border"]:
|
||||
target_cell.border = Border(
|
||||
left=Side(style=cell_data["border"].left.style, color=cell_data["border"].left.color),
|
||||
right=Side(style=cell_data["border"].right.style, color=cell_data["border"].right.color),
|
||||
top=Side(style=cell_data["border"].top.style, color=cell_data["border"].top.color),
|
||||
bottom=Side(style=cell_data["border"].bottom.style, color=cell_data["border"].bottom.color),
|
||||
)
|
||||
index += 1
|
||||
|
||||
# **填充 JSON 数据**
|
||||
ws.cell(row=index, column=1, value=i['name'])
|
||||
ws.cell(row=index, column=2, value="")
|
||||
ws.cell(row=index, column=7, value="Required" if i['required'] else "Not Required")
|
||||
ws.cell(row=index, column=8, value="1")
|
||||
ws.cell(row=index, column=9, value=i['maxPermitted'])
|
||||
ws.cell(row=index, column=10, value="NO")
|
||||
aindex = index
|
||||
for item in i['list']:
|
||||
ws.cell(row=index, column=3, value=item['name'])
|
||||
ws.cell(row=index, column=6, value=item['price'])
|
||||
|
||||
index += 1
|
||||
index += 1
|
||||
bindex = index
|
||||
if bindex - aindex > 1:
|
||||
ws.merge_cells(start_row=aindex, start_column=1, end_row=bindex - 2, end_column=1)
|
||||
ws.cell(row=aindex, column=1).alignment = Alignment(horizontal="center", vertical="center")
|
||||
|
||||
ws.merge_cells(start_row=aindex, start_column=2, end_row=bindex - 2, end_column=2)
|
||||
ws.cell(row=aindex, column=2).alignment = Alignment(horizontal="center", vertical="center")
|
||||
|
||||
ws.merge_cells(start_row=aindex, start_column=7, end_row=bindex - 2, end_column=7)
|
||||
ws.cell(row=aindex, column=7).alignment = Alignment(horizontal="center", vertical="center")
|
||||
ws.merge_cells(start_row=aindex, start_column=8, end_row=bindex - 2, end_column=8)
|
||||
ws.cell(row=aindex, column=8).alignment = Alignment(horizontal="center", vertical="center")
|
||||
ws.merge_cells(start_row=aindex, start_column=9, end_row=bindex - 2, end_column=9)
|
||||
ws.cell(row=aindex, column=9).alignment = Alignment(horizontal="center", vertical="center")
|
||||
ws.merge_cells(start_row=aindex, start_column=10, end_row=bindex - 2, end_column=10)
|
||||
ws.cell(row=aindex, column=10).alignment = Alignment(horizontal="center", vertical="center")
|
||||
|
||||
self.wb.save('Menu.xlsx')
|
||||
|
||||
def get_itemV1(self, storeUuid, sectionUuid, subsectionUuid, menuItemUuid):
|
||||
cookies = {
|
||||
'dId': '201f1380-db90-4fce-b53c-9e5f1a1797db',
|
||||
'uev2.diningMode': 'PICKUP',
|
||||
'marketing_vistor_id': 'b2b08d27-61c5-49a5-8960-3bf57d7dc740',
|
||||
'u-cookie-prefs': 'eyJ2ZXJzaW9uIjoxMDAsImRhdGUiOjE3NDExNzE2MjgyODAsImNvb2tpZUNhdGVnb3JpZXMiOlsiYWxsIl0sImltcGxpY2l0Ijp0cnVlfQ%3D%3D',
|
||||
'uev2.gg': 'true',
|
||||
'_gcl_au': '1.1.639637700.1741171631',
|
||||
'_scid': 'PKHcwyQACjnCw9d1hw_hGGK2mECWKPAw',
|
||||
'_fbp': 'fb.1.1741171631251.75638408989351243',
|
||||
'_ga': 'GA1.1.1953756175.1741171632',
|
||||
'_ScCbts': '%5B%5D',
|
||||
'_yjsu_yjad': '1741171631.b78cd8ba-9e38-46b9-b413-15deb0d5a676',
|
||||
'_sctr': '1%7C1741104000000',
|
||||
'_tt_enable_cookie': '1',
|
||||
'_ttp': '01JNJYND745JBKHC19JF1B3ENF_.tt.1',
|
||||
'uev2.embed_theme_preference': 'dark',
|
||||
'uev2.id.xp': 'bd485f5e-f8f1-4dce-bf88-c1e92a3cd4c0',
|
||||
'_ua': '{"session_id":"f08842b9-416c-4e82-bed0-25250e9abe14","session_time_ms":1741398128598}',
|
||||
'utm_medium': 'undefined',
|
||||
'utm_source': 'undefined',
|
||||
'_clck': 'oo6f3j%7C2%7Cfu1%7C0%7C1890',
|
||||
'uev2.loc': '%7B%22address%22%3A%7B%22address1%22%3A%22Hellu%20Coffee%22%2C%22address2%22%3A%22137%20Amoy%20St%2C%20%2301-05%20Far%20East%20Square%22%2C%22aptOrSuite%22%3A%22%22%2C%22eaterFormattedAddress%22%3A%22137%20Amoy%20St%2C%20%2301-05%20Far%20East%20Square%2C%20Singapore%20049965%22%2C%22subtitle%22%3A%22137%20Amoy%20St%2C%20%2301-05%20Far%20East%20Square%22%2C%22title%22%3A%22Hellu%20Coffee%22%2C%22uuid%22%3A%22%22%7D%2C%22latitude%22%3A1.2833573%2C%22longitude%22%3A103.8484733%2C%22reference%22%3A%22ChIJ7cXuxpYZ2jERPmwg_xdxMsE%22%2C%22referenceType%22%3A%22google_places%22%2C%22type%22%3A%22google_places%22%2C%22addressComponents%22%3A%7B%22city%22%3A%22%22%2C%22countryCode%22%3A%22SG%22%2C%22firstLevelSubdivisionCode%22%3A%22%22%2C%22postalCode%22%3A%22%22%7D%2C%22categories%22%3A%5B%22CAFE%22%2C%22FOOD_AND_BEVERAGE%22%2C%22RESTAURANT%22%2C%22SHOPS_AND_SERVICES%22%2C%22place%22%5D%2C%22originType%22%3A%22user_autocomplete%22%2C%22source%22%3A%22manual_auto_complete%22%2C%22userState%22%3A%22Unknown%22%7D',
|
||||
'uev2.id.session': '92a4abd6-d9c4-4a11-97ee-ae4a7083eedd',
|
||||
'uev2.ts.session': '1741414539843',
|
||||
'jwt-session': 'eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9.eyJkYXRhIjp7InNsYXRlLWV4cGlyZXMtYXQiOjE3NDE0MTYzNDAyOTl9LCJpYXQiOjE3NDEzOTgxMjksImV4cCI6MTc0MTQ4NDUyOX0.hpvIQEo4HoKsyOfRlVGrxuXN1dO_R_9k_tHCbMe_q3s',
|
||||
'utag_main__sn': '5',
|
||||
'utag_main_ses_id': '1741414545690%3Bexp-session',
|
||||
'utag_main__pn': '1%3Bexp-session',
|
||||
'_scid_r': 'SCHcwyQACjnCw9d1hw_hGGK2mECWKPAwQvf-Wg',
|
||||
'utag_main__se': '2%3Bexp-session',
|
||||
'utag_main__ss': '0%3Bexp-session',
|
||||
'utag_main__st': '1741416357381%3Bexp-session',
|
||||
'_userUuid': '',
|
||||
'_ga_P1RM71MPFP': 'GS1.1.1741414542.6.1.1741414563.39.0.0',
|
||||
'_uetsid': '8fabeee0fbbe11ef9b636786bd2c1b62',
|
||||
'_uetvid': '32f691f0f9af11efad4d6dc246fea42a',
|
||||
}
|
||||
headers = {
|
||||
'accept': '*/*',
|
||||
'accept-language': 'zh-CN,zh;q=0.9',
|
||||
'cache-control': 'no-cache',
|
||||
'content-type': 'application/json',
|
||||
'origin': 'https://www.ubereats.com',
|
||||
'pragma': 'no-cache',
|
||||
'priority': 'u=1, i',
|
||||
# 'referer': 'https://www.ubereats.com/store/orlando-china-ocean/xFyut_WfRn6gd83QgBGLoA?diningMode=PICKUP&mod=quickView&modctx=%257B%2522storeUuid%2522%253A%2522c45caeb7-f59f-467e-a077-cdd080118ba0%2522%252C%2522sectionUuid%2522%253A%2522aa0f2b6d-8a05-575d-824a-814fa08b06d9%2522%252C%2522subsectionUuid%2522%253A%25228a6f7010-f06b-5f85-b417-38d9e96676f7%2522%252C%2522itemUuid%2522%253A%2522bad6eebb-46ff-571e-8d30-d42c40cdb62b%2522%252C%2522showSeeDetailsCTA%2522%253Atrue%257D&ps=1&sc=SEARCH_SUGGESTION',
|
||||
'sec-ch-prefers-color-scheme': 'dark',
|
||||
'sec-ch-ua': '"Chromium";v="134", "Not:A-Brand";v="24", "Google Chrome";v="134"',
|
||||
'sec-ch-ua-mobile': '?0',
|
||||
'sec-ch-ua-platform': '"Windows"',
|
||||
'sec-fetch-dest': 'empty',
|
||||
'sec-fetch-mode': 'cors',
|
||||
'sec-fetch-site': 'same-origin',
|
||||
'user-agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/134.0.0.0 Safari/537.36',
|
||||
'x-csrf-token': 'x',
|
||||
'x-uber-client-gitref': 'a7f1b446e212f9e1ae1ee1c6541dbf565c7c6293'
|
||||
}
|
||||
|
||||
json_data = {
|
||||
'itemRequestType': 'ITEM',
|
||||
'storeUuid': storeUuid,
|
||||
'sectionUuid': sectionUuid,
|
||||
'subsectionUuid': subsectionUuid,
|
||||
'menuItemUuid': menuItemUuid,
|
||||
'cbType': 'EATER_ENDORSED',
|
||||
'contextReferences': [
|
||||
{
|
||||
'type': 'GROUP_ITEMS',
|
||||
'payload': {
|
||||
'type': 'groupItemsContextReferencePayload',
|
||||
'groupItemsContextReferencePayload': {},
|
||||
},
|
||||
'pageContext': 'UNKNOWN',
|
||||
},
|
||||
],
|
||||
}
|
||||
proxies = {
|
||||
'http': 'http://127.0.0.1:7890',
|
||||
'https': 'http://127.0.0.1:7890'
|
||||
}
|
||||
response = requests.post('https://www.ubereats.com/_p/api/getMenuItemV1', cookies=cookies, headers=headers,
|
||||
json=json_data, proxies=None).json()
|
||||
|
||||
size_identifiers = ["(S)", "(L)", "(小)", "(大)", "(Half Gallon)", "(One Gallon)", "1.4pcs", "8pcs", "4pcs"]
|
||||
data = {"ism": 0, "sizes": [], "addons": [], "nameList": []} # **新增 nameList**
|
||||
has_size_option = False
|
||||
has_addon_option = False
|
||||
|
||||
customizationsList = response['data']['customizationsList']
|
||||
|
||||
for customizations in customizationsList:
|
||||
title = customizations['title']
|
||||
customization_entry = {"name": title, "list": []}
|
||||
for item in customizations['options']:
|
||||
option_title = item['title']
|
||||
price = item['price'] / 100
|
||||
is_required = customizations['minPermitted'] > 0
|
||||
customization_entry["required"] = is_required
|
||||
customization_entry["maxPermitted"] = customizations['maxPermitted']
|
||||
# **大小份归一化**
|
||||
if any(option_title.startswith(size) for size in size_identifiers):
|
||||
data['sizes'].append({"name": option_title, "price": price})
|
||||
has_size_option = True
|
||||
else:
|
||||
customization_entry["list"].append({"name": option_title, "price": price})
|
||||
has_addon_option = True
|
||||
|
||||
# **解析子配菜**
|
||||
if "childCustomizationList" in item and len(item['childCustomizationList']) > 0:
|
||||
for child_customization in item["childCustomizationList"]:
|
||||
for child_option in child_customization["options"]:
|
||||
child_option_title = child_option["title"]
|
||||
child_price = child_option["price"] / 100
|
||||
customization_entry["list"].append({"name": child_option_title, "price": child_price})
|
||||
has_addon_option = True # **子配菜也是配菜*
|
||||
if customization_entry["list"]:
|
||||
data["addons"].append(customization_entry)
|
||||
|
||||
# **在 ism=3 时,生成 `nameList`**
|
||||
if has_size_option and has_addon_option:
|
||||
data['ism'] = 3 # **大小份 + 配菜**
|
||||
data['ism'] = 3 # **大小份 + 配菜**
|
||||
rename = data["addons"][0]["name"]
|
||||
data['nameList'] = [f"{size['name']}: {rename}" for size in data["sizes"]]
|
||||
elif has_size_option:
|
||||
data['ism'] = 1 # **只有大小份**
|
||||
elif has_addon_option:
|
||||
data['ism'] = 2 # **只有配菜**
|
||||
|
||||
print(data) # **检查数据是否正确**
|
||||
return data
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
ubereats = ubereats()
|
||||
ubereats.get_Menu()
|
||||
# ubereats.get_item()
|
||||
# ubereats.get_item()
|
||||
# ubereats.write_xlsx()
|
||||
# ubereats.get_itemV1("","","","")
|
||||
# ubereats.get_itemV1("0212c830-1845-41d2-aa06-6c78bfb97315", "516658e0-667f-5063-a3e9-d9d3e13a2e53", "017a5d2c-88c7-5f1e-8c5e-d8bf76ac5d12", "28eaf6a2-f83b-5d67-b20a-1cd59b4ed42c")
|
||||
# ubereats.write_xlsx()
|
710
web/ubereats/html.html
Normal file
710
web/ubereats/html_11.html
Normal file
253
web/ubereats/html_postman.html
Normal file
710
web/ubereats/html_postman2utf8.html
Normal file
451
web/ubereats/main.py
Normal file
@ -0,0 +1,451 @@
|
||||
import re
|
||||
import time
|
||||
from urllib.parse import unquote
|
||||
import requests
|
||||
import json
|
||||
from lxml import etree
|
||||
from openpyxl import load_workbook
|
||||
from openpyxl.styles import Font, PatternFill, Alignment, Border, Side
|
||||
|
||||
|
||||
class ubereats:
|
||||
def __init__(self):
|
||||
self.html = ""
|
||||
self.wb = load_workbook('Menu to 11.05PM.xlsx')
|
||||
self.get_Html()
|
||||
self.modify_first_row = self.modify_first_row()
|
||||
|
||||
def clear_sheet(self, sheet):
|
||||
ws = self.wb[sheet]
|
||||
for row in ws.iter_rows(min_row=2): # 首行不清空
|
||||
for cell in row:
|
||||
if cell.value is not None:
|
||||
cell.value = None
|
||||
self.wb.save('Menu.xlsx')
|
||||
|
||||
def clear_except_first_row(self, sheet):
|
||||
ws = self.wb[sheet]
|
||||
|
||||
# **解除所有合并单元格**
|
||||
merged_ranges = list(ws.merged_cells.ranges)
|
||||
for merged_range in merged_ranges:
|
||||
ws.unmerge_cells(str(merged_range))
|
||||
|
||||
# **获取最大行和最大列**
|
||||
max_row = ws.max_row
|
||||
max_col = ws.max_column
|
||||
|
||||
# **清除第二行及之后的所有数据和格式**
|
||||
if max_row > 1:
|
||||
for row in range(2, max_row + 1): # 从第二行开始清除
|
||||
for col in range(1, max_col + 1):
|
||||
cell = ws.cell(row=row, column=col)
|
||||
cell.value = None # 清除数据
|
||||
cell.fill = PatternFill(fill_type=None) # 清除背景色
|
||||
cell.font = Font() # 重置字体
|
||||
cell.alignment = Alignment() # 重置对齐方式
|
||||
cell.border = Border() # 清除边框
|
||||
|
||||
# **删除第二行及之后的所有行**
|
||||
ws.delete_rows(2, max_row - 1 if max_row > 2 else 1)
|
||||
|
||||
# **清除行级别格式**
|
||||
for row in range(2, max_row + 1):
|
||||
if row in ws.row_dimensions:
|
||||
ws.row_dimensions[row].fill = PatternFill(fill_type=None) # 清除行级背景色
|
||||
ws.row_dimensions[row].font = Font() # 清除行级字体
|
||||
ws.row_dimensions[row].alignment = Alignment() # 清除行级对齐方式
|
||||
|
||||
# **保存 Excel**
|
||||
self.wb.save('Menu.xlsx')
|
||||
|
||||
def get_Html(self):
|
||||
url = "https://www.ubereats.com/store/orlando-china-ocean/xFyut_WfRn6gd83QgBGLoA?diningMode=PICKUP&mod=storeDeliveryTime&modctx=%257B%2522entryPoint%2522%253A%2522store-auto-surface%2522%252C%2522encodedStoreUuid%2522%253A%2522xFyut_WfRn6gd83QgBGLoA%2522%257D&pl=JTdCJTIyYWRkcmVzcyUyMiUzQSUyMkhlbGx1JTIwQ29mZmVlJTIyJTJDJTIycmVmZXJlbmNlJTIyJTNBJTIyQ2hJSjdjWHV4cFlaMmpFUlBtd2dfeGR4TXNFJTIyJTJDJTIycmVmZXJlbmNlVHlwZSUyMiUzQSUyMmdvb2dsZV9wbGFjZXMlMjIlMkMlMjJsYXRpdHVkZSUyMiUzQTEuMjgzMzU3MyUyQyUyMmxvbmdpdHVkZSUyMiUzQTEwMy44NDg0NzMzJTdE&ps=1&sc=SEARCH_SUGGESTION"
|
||||
|
||||
payload = {}
|
||||
headers = {
|
||||
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:136.0) Gecko/20100101 Firefox/136.0',
|
||||
'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8',
|
||||
'Accept-Language': 'zh-CN,zh;q=0.8,zh-TW;q=0.7,zh-HK;q=0.5,en-US;q=0.3,en;q=0.2',
|
||||
'Accept-Encoding': 'gzip, deflate, br, zstd',
|
||||
'Alt-Used': 'www.ubereats.com',
|
||||
'Connection': 'keep-alive',
|
||||
'Cookie': 'uev2.id.xp=13f67607-d8f3-4ef4-ac9c-fae732d3a38c; dId=99b6b840-9ad5-4458-af7e-b832fa6602cb; uev2.id.session=a6af5007-9946-4ab1-a1bc-4d2b69736607; uev2.ts.session=1741796659278; uev2.diningMode=PICKUP; uev2.loc=%7B%22address%22%3A%7B%22address1%22%3A%22Hellu%20Coffee%22%2C%22address2%22%3A%22137%20Amoy%20St%2C%20%2301-05%20Far%20East%20Square%22%2C%22aptOrSuite%22%3A%22%22%2C%22eaterFormattedAddress%22%3A%22137%20Amoy%20St%2C%20%2301-05%20Far%20East%20Square%2C%20Singapore%20049965%22%2C%22subtitle%22%3A%22137%20Amoy%20St%2C%20%2301-05%20Far%20East%20Square%22%2C%22title%22%3A%22Hellu%20Coffee%22%2C%22uuid%22%3A%22%22%7D%2C%22latitude%22%3A1.2833573%2C%22longitude%22%3A103.8484733%2C%22reference%22%3A%22ChIJ7cXuxpYZ2jERPmwg_xdxMsE%22%2C%22referenceType%22%3A%22google_places%22%2C%22type%22%3A%22google_places%22%2C%22addressComponents%22%3A%7B%22city%22%3A%22%22%2C%22countryCode%22%3A%22SG%22%2C%22firstLevelSubdivisionCode%22%3A%22%22%2C%22postalCode%22%3A%22%22%7D%2C%22categories%22%3A%5B%22CAFE%22%2C%22FOOD_AND_BEVERAGE%22%2C%22RESTAURANT%22%2C%22SHOPS_AND_SERVICES%22%2C%22place%22%5D%2C%22originType%22%3A%22user_autocomplete%22%2C%22source%22%3A%22rev_geo_reference%22%2C%22userState%22%3A%22Unknown%22%7D; _ua={"session_id":"bdbf8384-0ffe-4501-90f8-6daa0eea2379","session_time_ms":1741796659351}; marketing_vistor_id=e17c0968-ef3d-4331-8211-c79c2ac7357e; jwt-session=eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9.eyJkYXRhIjp7InNsYXRlLWV4cGlyZXMtYXQiOjE3NDE3OTg0NTkzNTB9LCJpYXQiOjE3NDE3OTY2NjAsImV4cCI6MTc0MTg4MzA2MH0.v-uUZO3RxqF6M29LFRNxNE_LpRMLWx7ApE7b7kPlQMQ; marketing_vistor_id=e17c0968-ef3d-4331-8211-c79c2ac7357e; uev2.diningMode=PICKUP; uev2.id.session=a6af5007-9946-4ab1-a1bc-4d2b69736607; uev2.ts.session=1741796659278',
|
||||
'Upgrade-Insecure-Requests': '1',
|
||||
'Sec-Fetch-Dest': 'document',
|
||||
'Sec-Fetch-Mode': 'navigate',
|
||||
'Sec-Fetch-Site': 'none',
|
||||
'Sec-Fetch-User': '?1',
|
||||
'Priority': 'u=0, i'
|
||||
}
|
||||
|
||||
response = requests.request("GET", url, headers=headers, data=payload)
|
||||
|
||||
self.html = response.text.encode('utf-8').decode('unicode_escape')
|
||||
with open('html_postman2utf8.html', 'w', encoding="utf-8") as f:
|
||||
f.write(self.html)
|
||||
# with open('html_11.html', 'r', encoding='utf-8') as f:
|
||||
# self.html = f.read()
|
||||
|
||||
def get_Menu(self):
|
||||
xpath_info = etree.HTML(self.html)
|
||||
menu_list = xpath_info.xpath("//button[starts-with(@id, 'tabs-desktop-ofd-menu-tab-')]/span/text()")
|
||||
menu_time = xpath_info.xpath("//p[@data-baseweb='typo-paragraphxsmall']/text()")
|
||||
menu_time = menu_time[1].encode('latin-1').decode('utf-8') if menu_list else ""
|
||||
menu_time = re.sub(r'\s+', '', menu_time) # Menu Description
|
||||
self.clear_sheet("Menu")
|
||||
ws = self.wb["Menu"]
|
||||
ws["A2"] = "Third Party Menu"
|
||||
self.clear_sheet("Categories")
|
||||
ws = self.wb["Categories"]
|
||||
for idx, item in enumerate(menu_list, start=2):
|
||||
ws.cell(row=idx, column=1, value="Third Party Menu")
|
||||
ws.cell(row=idx, column=2, value=item)
|
||||
ws.cell(row=idx, column=3, value="") # 翻译
|
||||
ws.cell(row=idx, column=4, value=menu_time)
|
||||
ws.cell(row=idx, column=5, value="")
|
||||
|
||||
self.wb.save('Menu.xlsx')
|
||||
|
||||
def modify_first_row(self):
|
||||
ws = self.wb["Modifier"]
|
||||
source_row = 1
|
||||
row_data = {}
|
||||
|
||||
# 提取第一行数据和格式
|
||||
for col in range(1, ws.max_column + 1):
|
||||
source_cell = ws.cell(row=source_row, column=col)
|
||||
|
||||
row_data[col] = {
|
||||
"value": source_cell.value, # 数据
|
||||
"font": Font(
|
||||
name=source_cell.font.name,
|
||||
size=source_cell.font.size,
|
||||
bold=source_cell.font.bold,
|
||||
italic=source_cell.font.italic,
|
||||
underline=source_cell.font.underline,
|
||||
color=source_cell.font.color.rgb if source_cell.font.color else None
|
||||
),
|
||||
"alignment": Alignment(
|
||||
horizontal=source_cell.alignment.horizontal,
|
||||
vertical=source_cell.alignment.vertical,
|
||||
wrap_text=source_cell.alignment.wrap_text
|
||||
),
|
||||
"fill": PatternFill(
|
||||
fill_type=source_cell.fill.patternType,
|
||||
fgColor=source_cell.fill.fgColor.rgb if source_cell.fill.fgColor else None,
|
||||
bgColor=source_cell.fill.bgColor.rgb if source_cell.fill.bgColor else None
|
||||
) if source_cell.fill and source_cell.fill.patternType else None,
|
||||
"border": Border(
|
||||
left=Side(style=source_cell.border.left.style, color=source_cell.border.left.color),
|
||||
right=Side(style=source_cell.border.right.style, color=source_cell.border.right.color),
|
||||
top=Side(style=source_cell.border.top.style, color=source_cell.border.top.color),
|
||||
bottom=Side(style=source_cell.border.bottom.style, color=source_cell.border.bottom.color),
|
||||
) if source_cell.border else None
|
||||
}
|
||||
row_data["row_height"] = ws.row_dimensions[source_row].height
|
||||
return row_data
|
||||
|
||||
def get_item(self):
|
||||
html_info = re.findall(r'<script type="application/json" id=".*?">(.*?)</script>', self.html, re.S)
|
||||
js2json = unquote(html_info[5])
|
||||
json_data = json.loads(js2json)
|
||||
# with open('data.json', 'w', encoding='utf-8') as f:
|
||||
# f.write(json.dumps(json_data, indent=4))
|
||||
# exit()
|
||||
self.clear_except_first_row("Item")
|
||||
self.clear_except_first_row("Modifier")
|
||||
ws = self.wb["Item"]
|
||||
data = []
|
||||
# with open('data.json', 'r', encoding='utf-8') as f:
|
||||
# json_data = json.load(f)
|
||||
queries = json_data['queries'][0]['state']['data']
|
||||
storeUuid = queries['uuid']
|
||||
sectionUuid = list(queries["catalogSectionsMap"].keys())[0]
|
||||
index = 2
|
||||
for catalog in queries["catalogSectionsMap"][sectionUuid]:
|
||||
playload = catalog['payload']
|
||||
standardItemsPayload = playload['standardItemsPayload']
|
||||
_type = standardItemsPayload['title']['text']
|
||||
for citem in standardItemsPayload['catalogItems']:
|
||||
menuItemUuid = citem['uuid']
|
||||
title = citem['title']
|
||||
price = citem['price'] / 100
|
||||
itemDescription = citem['itemDescription']
|
||||
if "/ ." in itemDescription:
|
||||
itemDescription = itemDescription.replace("/ .", "")
|
||||
if "é" in itemDescription:
|
||||
itemDescription = itemDescription.replace("é", "é")
|
||||
hasCustomizations = citem['hasCustomizations']
|
||||
subsectionUuid = citem['subsectionUuid']
|
||||
if hasCustomizations:
|
||||
modifier = self.get_itemV1(storeUuid, sectionUuid, subsectionUuid, menuItemUuid)
|
||||
if modifier['ism'] != 1:
|
||||
for addons in modifier['addons']:
|
||||
existing_addon = next((item for item in data if item["name"] == addons["name"]), None)
|
||||
|
||||
if existing_addon:
|
||||
existing_items = {item["name"] for item in existing_addon["list"]}
|
||||
new_items = [item for item in addons["list"] if item["name"] not in existing_items]
|
||||
existing_addon["list"].extend(new_items)
|
||||
else:
|
||||
data.append(addons)
|
||||
|
||||
ws.cell(row=index, column=1, value="Online Lunch Menu")
|
||||
ws.cell(row=index, column=2, value=_type)
|
||||
ws.cell(row=index, column=3, value=title)
|
||||
ws.cell(row=index, column=4, value="")
|
||||
ws.cell(row=index, column=5, value=price)
|
||||
ws.cell(row=index, column=7, value=itemDescription)
|
||||
ws.cell(row=index, column=8, value="Sales Tax")
|
||||
if not hasCustomizations:
|
||||
ws.cell(row=index, column=6, value="")
|
||||
else:
|
||||
if modifier['ism'] == 3 or modifier['ism'] == 1:
|
||||
value = ";".join(
|
||||
[f"{format(price if i['price'] == 0.0 else i['price'] + price, '.2f')}/{i['name']}" for
|
||||
i in
|
||||
modifier['sizes']])
|
||||
ws.cell(row=index, column=5, value=value)
|
||||
if modifier['ism'] == 3:
|
||||
v2 = "\n".join([i for i in modifier['nameList']])
|
||||
ws.cell(row=index, column=6, value=v2)
|
||||
if modifier['ism'] == 2:
|
||||
v2 = "\n".join([i['name'] for i in modifier['addons']])
|
||||
ws.cell(row=index, column=6, value=v2)
|
||||
index += 1
|
||||
self.wb.save('Menu.xlsx')
|
||||
|
||||
with open('Adata.json', 'w', encoding='utf-8') as f:
|
||||
json.dump(data, f, ensure_ascii=False, indent=4)
|
||||
|
||||
def write_xlsx(self):
|
||||
ws = self.wb["Modifier"]
|
||||
self.clear_except_first_row("Modifier") # 清除数据,但保留第一行
|
||||
|
||||
with open('Adata.json', 'r', encoding='utf-8') as f:
|
||||
data = json.load(f)
|
||||
|
||||
index = 2 # **确保从第 2 行开始填充数据**
|
||||
|
||||
for i in data:
|
||||
# **确保从 index > 2 才复制格式**
|
||||
if index > 2:
|
||||
ws.row_dimensions[index].height = self.modify_first_row["row_height"]
|
||||
|
||||
for col, cell_data in self.modify_first_row.items():
|
||||
if col == "row_height":
|
||||
continue
|
||||
|
||||
target_cell = ws.cell(row=index, column=col)
|
||||
|
||||
# **正确赋值**
|
||||
target_cell.value = cell_data["value"]
|
||||
|
||||
# **复制格式**
|
||||
if cell_data["font"]:
|
||||
target_cell.font = Font(
|
||||
name=cell_data["font"].name,
|
||||
size=cell_data["font"].size,
|
||||
bold=cell_data["font"].bold,
|
||||
italic=cell_data["font"].italic,
|
||||
underline=cell_data["font"].underline,
|
||||
color=cell_data["font"].color
|
||||
)
|
||||
if cell_data["alignment"]:
|
||||
target_cell.alignment = Alignment(
|
||||
horizontal=cell_data["alignment"].horizontal,
|
||||
vertical=cell_data["alignment"].vertical,
|
||||
wrap_text=cell_data["alignment"].wrap_text
|
||||
)
|
||||
if cell_data["fill"] and cell_data["fill"].patternType:
|
||||
target_cell.fill = PatternFill(
|
||||
fill_type=cell_data["fill"].patternType,
|
||||
fgColor=cell_data["fill"].fgColor.rgb,
|
||||
bgColor=cell_data["fill"].bgColor.rgb
|
||||
)
|
||||
if cell_data["border"]:
|
||||
target_cell.border = Border(
|
||||
left=Side(style=cell_data["border"].left.style, color=cell_data["border"].left.color),
|
||||
right=Side(style=cell_data["border"].right.style,
|
||||
color=cell_data["border"].right.color),
|
||||
top=Side(style=cell_data["border"].top.style, color=cell_data["border"].top.color),
|
||||
bottom=Side(style=cell_data["border"].bottom.style,
|
||||
color=cell_data["border"].bottom.color),
|
||||
)
|
||||
index += 1
|
||||
|
||||
# **填充 JSON 数据**
|
||||
ws.cell(row=index, column=1, value=i['name'])
|
||||
ws.cell(row=index, column=2, value="")
|
||||
ws.cell(row=index, column=7, value="Required" if i['required'] else "Not Required")
|
||||
ws.cell(row=index, column=8, value="1")
|
||||
ws.cell(row=index, column=9, value=i['maxPermitted'])
|
||||
ws.cell(row=index, column=10, value="NO")
|
||||
aindex = index
|
||||
for item in i['list']:
|
||||
ws.cell(row=index, column=3, value=item['name'])
|
||||
ws.cell(row=index, column=6, value=item['price'])
|
||||
|
||||
index += 1
|
||||
index += 1
|
||||
bindex = index
|
||||
if bindex - aindex > 1:
|
||||
ws.merge_cells(start_row=aindex, start_column=1, end_row=bindex - 2, end_column=1)
|
||||
ws.cell(row=aindex, column=1).alignment = Alignment(horizontal="center", vertical="center")
|
||||
|
||||
ws.merge_cells(start_row=aindex, start_column=2, end_row=bindex - 2, end_column=2)
|
||||
ws.cell(row=aindex, column=2).alignment = Alignment(horizontal="center", vertical="center")
|
||||
|
||||
ws.merge_cells(start_row=aindex, start_column=7, end_row=bindex - 2, end_column=7)
|
||||
ws.cell(row=aindex, column=7).alignment = Alignment(horizontal="center", vertical="center")
|
||||
ws.merge_cells(start_row=aindex, start_column=8, end_row=bindex - 2, end_column=8)
|
||||
ws.cell(row=aindex, column=8).alignment = Alignment(horizontal="center", vertical="center")
|
||||
ws.merge_cells(start_row=aindex, start_column=9, end_row=bindex - 2, end_column=9)
|
||||
ws.cell(row=aindex, column=9).alignment = Alignment(horizontal="center", vertical="center")
|
||||
ws.merge_cells(start_row=aindex, start_column=10, end_row=bindex - 2, end_column=10)
|
||||
ws.cell(row=aindex, column=10).alignment = Alignment(horizontal="center", vertical="center")
|
||||
|
||||
self.wb.save('Menu.xlsx')
|
||||
|
||||
def get_itemV1(self, storeUuid, sectionUuid, subsectionUuid, menuItemUuid):
|
||||
cookies = {
|
||||
'dId': '201f1380-db90-4fce-b53c-9e5f1a1797db',
|
||||
'uev2.diningMode': 'PICKUP',
|
||||
'marketing_vistor_id': 'b2b08d27-61c5-49a5-8960-3bf57d7dc740',
|
||||
'u-cookie-prefs': 'eyJ2ZXJzaW9uIjoxMDAsImRhdGUiOjE3NDExNzE2MjgyODAsImNvb2tpZUNhdGVnb3JpZXMiOlsiYWxsIl0sImltcGxpY2l0Ijp0cnVlfQ%3D%3D',
|
||||
'uev2.gg': 'true',
|
||||
'_gcl_au': '1.1.639637700.1741171631',
|
||||
'_scid': 'PKHcwyQACjnCw9d1hw_hGGK2mECWKPAw',
|
||||
'_fbp': 'fb.1.1741171631251.75638408989351243',
|
||||
'_ga': 'GA1.1.1953756175.1741171632',
|
||||
'_ScCbts': '%5B%5D',
|
||||
'_yjsu_yjad': '1741171631.b78cd8ba-9e38-46b9-b413-15deb0d5a676',
|
||||
'_sctr': '1%7C1741104000000',
|
||||
'_tt_enable_cookie': '1',
|
||||
'_ttp': '01JNJYND745JBKHC19JF1B3ENF_.tt.1',
|
||||
'uev2.embed_theme_preference': 'dark',
|
||||
'uev2.id.xp': 'bd485f5e-f8f1-4dce-bf88-c1e92a3cd4c0',
|
||||
'_ua': '{"session_id":"f08842b9-416c-4e82-bed0-25250e9abe14","session_time_ms":1741398128598}',
|
||||
'utm_medium': 'undefined',
|
||||
'utm_source': 'undefined',
|
||||
'_clck': 'oo6f3j%7C2%7Cfu1%7C0%7C1890',
|
||||
'uev2.loc': '%7B%22address%22%3A%7B%22address1%22%3A%22Hellu%20Coffee%22%2C%22address2%22%3A%22137%20Amoy%20St%2C%20%2301-05%20Far%20East%20Square%22%2C%22aptOrSuite%22%3A%22%22%2C%22eaterFormattedAddress%22%3A%22137%20Amoy%20St%2C%20%2301-05%20Far%20East%20Square%2C%20Singapore%20049965%22%2C%22subtitle%22%3A%22137%20Amoy%20St%2C%20%2301-05%20Far%20East%20Square%22%2C%22title%22%3A%22Hellu%20Coffee%22%2C%22uuid%22%3A%22%22%7D%2C%22latitude%22%3A1.2833573%2C%22longitude%22%3A103.8484733%2C%22reference%22%3A%22ChIJ7cXuxpYZ2jERPmwg_xdxMsE%22%2C%22referenceType%22%3A%22google_places%22%2C%22type%22%3A%22google_places%22%2C%22addressComponents%22%3A%7B%22city%22%3A%22%22%2C%22countryCode%22%3A%22SG%22%2C%22firstLevelSubdivisionCode%22%3A%22%22%2C%22postalCode%22%3A%22%22%7D%2C%22categories%22%3A%5B%22CAFE%22%2C%22FOOD_AND_BEVERAGE%22%2C%22RESTAURANT%22%2C%22SHOPS_AND_SERVICES%22%2C%22place%22%5D%2C%22originType%22%3A%22user_autocomplete%22%2C%22source%22%3A%22manual_auto_complete%22%2C%22userState%22%3A%22Unknown%22%7D',
|
||||
'uev2.id.session': '92a4abd6-d9c4-4a11-97ee-ae4a7083eedd',
|
||||
'uev2.ts.session': '1741414539843',
|
||||
'jwt-session': 'eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9.eyJkYXRhIjp7InNsYXRlLWV4cGlyZXMtYXQiOjE3NDE0MTYzNDAyOTl9LCJpYXQiOjE3NDEzOTgxMjksImV4cCI6MTc0MTQ4NDUyOX0.hpvIQEo4HoKsyOfRlVGrxuXN1dO_R_9k_tHCbMe_q3s',
|
||||
'utag_main__sn': '5',
|
||||
'utag_main_ses_id': '1741414545690%3Bexp-session',
|
||||
'utag_main__pn': '1%3Bexp-session',
|
||||
'_scid_r': 'SCHcwyQACjnCw9d1hw_hGGK2mECWKPAwQvf-Wg',
|
||||
'utag_main__se': '2%3Bexp-session',
|
||||
'utag_main__ss': '0%3Bexp-session',
|
||||
'utag_main__st': '1741416357381%3Bexp-session',
|
||||
'_userUuid': '',
|
||||
'_ga_P1RM71MPFP': 'GS1.1.1741414542.6.1.1741414563.39.0.0',
|
||||
'_uetsid': '8fabeee0fbbe11ef9b636786bd2c1b62',
|
||||
'_uetvid': '32f691f0f9af11efad4d6dc246fea42a',
|
||||
}
|
||||
headers = {
|
||||
'accept': '*/*',
|
||||
'accept-language': 'zh-CN,zh;q=0.9',
|
||||
'cache-control': 'no-cache',
|
||||
'content-type': 'application/json',
|
||||
'origin': 'https://www.ubereats.com',
|
||||
'pragma': 'no-cache',
|
||||
'priority': 'u=1, i',
|
||||
# 'referer': 'https://www.ubereats.com/store/orlando-china-ocean/xFyut_WfRn6gd83QgBGLoA?diningMode=PICKUP&mod=quickView&modctx=%257B%2522storeUuid%2522%253A%2522c45caeb7-f59f-467e-a077-cdd080118ba0%2522%252C%2522sectionUuid%2522%253A%2522aa0f2b6d-8a05-575d-824a-814fa08b06d9%2522%252C%2522subsectionUuid%2522%253A%25228a6f7010-f06b-5f85-b417-38d9e96676f7%2522%252C%2522itemUuid%2522%253A%2522bad6eebb-46ff-571e-8d30-d42c40cdb62b%2522%252C%2522showSeeDetailsCTA%2522%253Atrue%257D&ps=1&sc=SEARCH_SUGGESTION',
|
||||
'sec-ch-prefers-color-scheme': 'dark',
|
||||
'sec-ch-ua': '"Chromium";v="134", "Not:A-Brand";v="24", "Google Chrome";v="134"',
|
||||
'sec-ch-ua-mobile': '?0',
|
||||
'sec-ch-ua-platform': '"Windows"',
|
||||
'sec-fetch-dest': 'empty',
|
||||
'sec-fetch-mode': 'cors',
|
||||
'sec-fetch-site': 'same-origin',
|
||||
'user-agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/134.0.0.0 Safari/537.36',
|
||||
'x-csrf-token': 'x',
|
||||
'x-uber-client-gitref': 'a7f1b446e212f9e1ae1ee1c6541dbf565c7c6293'
|
||||
}
|
||||
|
||||
json_data = {
|
||||
'itemRequestType': 'ITEM',
|
||||
'storeUuid': storeUuid,
|
||||
'sectionUuid': sectionUuid,
|
||||
'subsectionUuid': subsectionUuid,
|
||||
'menuItemUuid': menuItemUuid,
|
||||
'cbType': 'EATER_ENDORSED',
|
||||
'contextReferences': [
|
||||
{
|
||||
'type': 'GROUP_ITEMS',
|
||||
'payload': {
|
||||
'type': 'groupItemsContextReferencePayload',
|
||||
'groupItemsContextReferencePayload': {},
|
||||
},
|
||||
'pageContext': 'UNKNOWN',
|
||||
},
|
||||
],
|
||||
}
|
||||
proxies = {
|
||||
'http': 'http://127.0.0.1:7890',
|
||||
'https': 'http://127.0.0.1:7890'
|
||||
}
|
||||
response = requests.post('https://www.ubereats.com/_p/api/getMenuItemV1', cookies=cookies, headers=headers,
|
||||
json=json_data, proxies=None).json()
|
||||
|
||||
size_identifiers = ["(S)", "(L)", "(小)", "(大)", "(Half Gallon)", "(One Gallon)", "1.4pcs", "8pcs", "4pcs"]
|
||||
data = {"ism": 0, "sizes": [], "addons": [], "nameList": []} # **新增 nameList**
|
||||
has_size_option = False
|
||||
has_addon_option = False
|
||||
|
||||
customizationsList = response['data']['customizationsList']
|
||||
|
||||
for customizations in customizationsList:
|
||||
title = customizations['title']
|
||||
customization_entry = {"name": title, "list": []}
|
||||
for item in customizations['options']:
|
||||
option_title = item['title']
|
||||
price = item['price'] / 100
|
||||
is_required = customizations['minPermitted'] > 0
|
||||
customization_entry["required"] = is_required
|
||||
customization_entry["maxPermitted"] = customizations['maxPermitted']
|
||||
if any(option_title.startswith(size) for size in size_identifiers):
|
||||
data['sizes'].append({"name": option_title, "price": price})
|
||||
has_size_option = True
|
||||
else:
|
||||
customization_entry["list"].append({"name": option_title, "price": price})
|
||||
has_addon_option = True
|
||||
|
||||
# **解析子配菜**
|
||||
if "childCustomizationList" in item and len(item['childCustomizationList']) > 0:
|
||||
for child_customization in item["childCustomizationList"]:
|
||||
for child_option in child_customization["options"]:
|
||||
child_option_title = child_option["title"]
|
||||
child_price = child_option["price"] / 100
|
||||
customization_entry["list"].append({"name": child_option_title, "price": child_price})
|
||||
has_addon_option = True # **子配菜也是配菜*
|
||||
if customization_entry["list"]:
|
||||
data["addons"].append(customization_entry)
|
||||
|
||||
# **在 ism=3 时,生成 `nameList`**
|
||||
if has_size_option and has_addon_option:
|
||||
data['ism'] = 3 # **大小份 + 配菜**
|
||||
data['ism'] = 3 # **大小份 + 配菜**
|
||||
rename = data["addons"][0]["name"]
|
||||
data['nameList'] = [f"{size['name']}: {rename}" for size in data["sizes"]]
|
||||
elif has_size_option:
|
||||
data['ism'] = 1 # **只有大小份**
|
||||
elif has_addon_option:
|
||||
data['ism'] = 2 # **只有配菜**
|
||||
|
||||
print(data) # **检查数据是否正确**
|
||||
return data
|
||||
|
||||
if __name__ == '__main__':
|
||||
ubereats = ubereats()
|
||||
ubereats.get_Menu()
|
||||
ubereats.get_item()
|
||||
ubereats.get_item()
|
||||
ubereats.write_xlsx()
|
||||
# ubereats.get_itemV1("","","","")
|
||||
# ubereats.get_itemV1("0212c830-1845-41d2-aa06-6c78bfb97315", "516658e0-667f-5063-a3e9-d9d3e13a2e53", "017a5d2c-88c7-5f1e-8c5e-d8bf76ac5d12", "28eaf6a2-f83b-5d67-b20a-1cd59b4ed42c")
|
||||
# ubereats.write_xlsx()
|
116
web/yutian_top/Download_.py
Normal file
@ -0,0 +1,116 @@
|
||||
import logging
|
||||
|
||||
from Requests_Except import MR
|
||||
from web.yutian_top.main import default_cookies, Requests
|
||||
|
||||
base_url = 'www.yutian.top'
|
||||
protocol = 'https'
|
||||
default_headers = {
|
||||
'accept': 'application/json, text/plain, */*',
|
||||
'accept-language': 'zh-CN,zh;q=0.9',
|
||||
'cache-control': 'no-cache',
|
||||
'content-type': 'application/json;charset=UTF-8',
|
||||
'origin': 'https://www.yutian.top',
|
||||
'pragma': 'no-cache',
|
||||
'priority': 'u=1, i',
|
||||
'referer': 'https://www.yutian.top/enterprise/resume_store/list',
|
||||
'sec-ch-ua': '"Chromium";v="134", "Not:A-Brand";v="24", "Google Chrome";v="134"',
|
||||
'sec-ch-ua-mobile': '?0',
|
||||
'sec-ch-ua-platform': '"Windows"',
|
||||
'sec-fetch-dest': 'empty',
|
||||
'sec-fetch-mode': 'cors',
|
||||
'sec-fetch-site': 'same-origin',
|
||||
'user-agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/134.0.0.0 Safari/537.36',
|
||||
}
|
||||
default_cookies = {
|
||||
'PHPSESSID': 'cac0b0c651b27ad30642869a4304c098',
|
||||
'auth-token': 'eyJ0eXAiOiJKV1QiLCJhbGciOiJIUzI1NiJ9.eyJleHAiOjE3NDU4NDg3MTUsImp0aSI6IjcwNjc4MWQ3LTJjMWYtNGZiZS04ZDM4LWRhMDRmYjdiMjljOSIsIm5hbWUiOiIxODYxNzg3MjE4NSIsInVzZXJfaWQiOiIwM2M2MmI5ODM4Yjk3Y2UzYmQxZTQwNDllZGVlNmI0OCIsInRlbmFudF90b2tlbiI6IjY1OTAxM2RlNjAxZmJmNjg1MzZmYTU0OTc4ODVkMTA2In0.1FH4SBYQu0CYhEzCzMlYBYZ2YsSM9kgkXWmgXcZ88Bs',
|
||||
'company_sign': '',
|
||||
'company_nonce': '',
|
||||
'cuid': '',
|
||||
}
|
||||
|
||||
Requests = MR(base_url, protocol)
|
||||
Requests.set_default_headers(default_headers)
|
||||
Requests.set_default_cookies(default_cookies)
|
||||
|
||||
psdata = {
|
||||
"resume_id": [],
|
||||
"姓名": [], # user_name
|
||||
"电话": [], # phone_encrypt
|
||||
}
|
||||
|
||||
Down = [20891, 19784, 19715, 19280, 18130, 17890, 1770, 17078, 15460, 15424, 14868, 13687, 13517, 11724, 9513, 9454,
|
||||
8161, 3372, 3065, 993, 988]
|
||||
|
||||
|
||||
def buyResumeUserPhone(resume_id):
|
||||
json_data = {
|
||||
'resume_id': resume_id,
|
||||
'from_type': '',
|
||||
}
|
||||
res = Requests.post('/job/company/v1/company/buyResumeUserPhone', json=json_data)
|
||||
return res.to_Dict()
|
||||
|
||||
|
||||
def getResumeDownloadLink(resume_id):
|
||||
json_data = {
|
||||
'resume_id': resume_id,
|
||||
'delivery_id': '',
|
||||
}
|
||||
res = Requests.post('/job/company/v1/company/getResumeDownloadLink', json=json_data)
|
||||
return res.to_Dict()
|
||||
|
||||
|
||||
def getResumeUserPhone(resume_id):
|
||||
json_data = {
|
||||
'resume_id': resume_id,
|
||||
'delivery_id': '',
|
||||
'is_pc': 1,
|
||||
}
|
||||
url = '/job/company/v1/company/getResumeUserPhone'
|
||||
resp = Requests.post(url, json=json_data)
|
||||
return resp.to_Dict()
|
||||
|
||||
|
||||
def get_resume_info(resume_id):
|
||||
json_data = {
|
||||
'resume_id': resume_id,
|
||||
}
|
||||
url = '/job/company/v1/resume/loadResume'
|
||||
resp = Requests.post(url, json=json_data)
|
||||
return resp.to_Dict()
|
||||
|
||||
|
||||
def integrate():
|
||||
for r_id in Down:
|
||||
user_info = get_resume_info(r_id)
|
||||
u_name = user_info.user_name
|
||||
r_info = getResumeUserPhone(r_id)
|
||||
try:
|
||||
phone = r_info.phone
|
||||
except Exception as e:
|
||||
phone = None
|
||||
|
||||
# print(f"姓名: {u_name}, 电话: {phone}")
|
||||
if phone is None :
|
||||
res = buyResumeUserPhone(r_id)
|
||||
print(res, r_id)
|
||||
if res.buy_success:
|
||||
print("购买成功!")
|
||||
r_info = getResumeUserPhone(r_id)
|
||||
phone = r_info.phone
|
||||
psdata['resume_id'].append(r_id)
|
||||
psdata['姓名'].append(u_name)
|
||||
psdata['电话'].append(phone)
|
||||
|
||||
def write_to_excel():
|
||||
import pandas as pd
|
||||
df = pd.DataFrame(psdata)
|
||||
df.to_excel('resume_data.xlsx', index=True)
|
||||
print("数据已写入 resume_data.csv")
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
integrate()
|
||||
write_to_excel()
|
111
web/yutian_top/MRequest.py
Normal file
@ -0,0 +1,111 @@
|
||||
import requests
|
||||
import logging
|
||||
import time
|
||||
from lxml import etree
|
||||
from types import SimpleNamespace
|
||||
|
||||
logging.basicConfig(level=logging.DEBUG, format='%(asctime)s - %(levelname)s - %(message)s')
|
||||
|
||||
class ExtendedResponse(requests.Response):
|
||||
def xpath(self, xpath_expr):
|
||||
try:
|
||||
tree = etree.HTML(self.text)
|
||||
return tree.xpath(xpath_expr)
|
||||
except Exception as e:
|
||||
raise ValueError("XPath解析错误: " + str(e))
|
||||
|
||||
def to_Dict(self):
|
||||
try:
|
||||
data = self.json()
|
||||
return self.dict_to_obj(data)
|
||||
except Exception as e:
|
||||
raise ValueError("JSON转换错误: " + str(e))
|
||||
|
||||
@staticmethod
|
||||
def dict_to_obj(d):
|
||||
if isinstance(d, dict):
|
||||
return SimpleNamespace(**{k: ExtendedResponse.dict_to_obj(v) for k, v in d.items()})
|
||||
elif isinstance(d, list):
|
||||
return [ExtendedResponse.dict_to_obj(item) for item in d]
|
||||
else:
|
||||
return d
|
||||
|
||||
class MyRequests:
|
||||
def __init__(self, base_url, protocol='http', retries=3, proxy_options=True, default_timeout=10, default_cookies=None):
|
||||
self.base_url = base_url.rstrip('/')
|
||||
self.protocol = protocol
|
||||
self.retries = retries
|
||||
self.default_timeout = default_timeout
|
||||
self.session = requests.Session()
|
||||
if proxy_options:
|
||||
self.session.proxies = {"http": "http://127.0.0.1:7890", "https": "http://127.0.0.1:7890"}
|
||||
if default_cookies:
|
||||
self.session.cookies.update(default_cookies)
|
||||
|
||||
def _build_url(self, url):
|
||||
if url.startswith("http://") or url.startswith("https://"):
|
||||
return url
|
||||
return f"{self.protocol}://{self.base_url}/{url.lstrip('/')}"
|
||||
|
||||
def set_default_headers(self, headers):
|
||||
self.session.headers.update(headers)
|
||||
|
||||
def set_default_cookies(self, cookies):
|
||||
self.session.cookies.update(cookies)
|
||||
|
||||
def get(self, url, params=None, headers=None, cookies=None, **kwargs):
|
||||
full_url = self._build_url(url)
|
||||
return self._request("GET", full_url, params=params, headers=headers, cookies=cookies, **kwargs)
|
||||
|
||||
def post(self, url, data=None, json=None, headers=None, cookies=None, **kwargs):
|
||||
full_url = self._build_url(url)
|
||||
return self._request("POST", full_url, data=data, json=json, headers=headers, cookies=cookies, **kwargs)
|
||||
|
||||
def update(self, url, data=None, json=None, headers=None, cookies=None, **kwargs):
|
||||
full_url = self._build_url(url)
|
||||
return self._request("PUT", full_url, data=data, json=json, headers=headers, cookies=cookies, **kwargs)
|
||||
|
||||
def delete(self, url, params=None, headers=None, cookies=None, **kwargs):
|
||||
full_url = self._build_url(url)
|
||||
return self._request("DELETE", full_url, params=params, headers=headers, cookies=cookies, **kwargs)
|
||||
|
||||
def _request(self, method, url, retries=None, **kwargs):
|
||||
if retries is None:
|
||||
retries = self.retries
|
||||
if 'timeout' not in kwargs:
|
||||
kwargs['timeout'] = self.default_timeout
|
||||
if 'headers' in kwargs and kwargs['headers']:
|
||||
headers = kwargs['headers']
|
||||
if 'referer' in headers:
|
||||
headers['referer'] = self._build_url(headers['referer'])
|
||||
try:
|
||||
response = self.session.request(method, url, **kwargs)
|
||||
response.raise_for_status()
|
||||
response.__class__ = ExtendedResponse
|
||||
return response
|
||||
except Exception as e:
|
||||
if retries > 0:
|
||||
logging.warning(f"请求 {method} {url} 失败,剩余重试次数 {retries},错误: {e}")
|
||||
time.sleep(2 ** (self.retries - retries))
|
||||
return self._request(method, url, retries=retries - 1, **kwargs)
|
||||
else:
|
||||
logging.error(f"请求 {method} {url} 重试次数用尽")
|
||||
raise e
|
||||
|
||||
def close(self):
|
||||
self.session.close()
|
||||
|
||||
if __name__ == '__main__':
|
||||
req = MyRequests("httpbin.org", protocol="https", retries=3, proxy_options=True, default_timeout=5, default_cookies={"session": "abc"})
|
||||
req.set_default_headers({"User-Agent": "MyRequests/1.0"})
|
||||
try:
|
||||
resp = req.get("/get", headers={"referer": "/page"})
|
||||
logging.info("状态码: %s", resp.status_code)
|
||||
logging.info("JSON: %s", resp.json())
|
||||
logging.info("XPath: %s", resp.xpath('//title/text()'))
|
||||
obj = resp.to_Dict()
|
||||
logging.info("转换对象: %s", obj)
|
||||
except Exception as ex:
|
||||
logging.error("请求失败: %s", ex)
|
||||
finally:
|
||||
req.close()
|
115
web/yutian_top/Requests_Except.py
Normal file
@ -0,0 +1,115 @@
|
||||
import requests
|
||||
import logging
|
||||
import time
|
||||
import json
|
||||
from lxml import etree
|
||||
from types import SimpleNamespace
|
||||
|
||||
logging.basicConfig(level=logging.DEBUG, format='%(asctime)s - %(levelname)s - %(message)s')
|
||||
|
||||
class ExtendedResponse(requests.Response):
|
||||
def xpath(self, xpath_expr):
|
||||
try:
|
||||
tree = etree.HTML(self.text)
|
||||
return tree.xpath(xpath_expr)
|
||||
except Exception as e:
|
||||
raise ValueError("XPath解析错误: " + str(e))
|
||||
|
||||
def to_Dict(self):
|
||||
try:
|
||||
data = self.json()
|
||||
return self.dict_to_obj(data)
|
||||
except Exception as e:
|
||||
raise ValueError("JSON转换错误: " + str(e))
|
||||
|
||||
@staticmethod
|
||||
def dict_to_obj(d):
|
||||
if isinstance(d, dict):
|
||||
return SimpleNamespace(**{k: ExtendedResponse.dict_to_obj(v) for k, v in d.items()})
|
||||
elif isinstance(d, list):
|
||||
return [ExtendedResponse.dict_to_obj(item) for item in d]
|
||||
else:
|
||||
return d
|
||||
|
||||
class MyRequests:
|
||||
def __init__(self, base_url, protocol='http', retries=3, proxy_options=True, default_timeout=10, default_cookies=None):
|
||||
self.base_url = base_url.rstrip('/')
|
||||
self.protocol = protocol
|
||||
self.retries = retries
|
||||
self.default_timeout = default_timeout
|
||||
self.session = requests.Session()
|
||||
if proxy_options:
|
||||
self.session.proxies = {"http": "http://127.0.0.1:7890", "https": "http://127.0.0.1:7890"}
|
||||
if default_cookies:
|
||||
self.session.cookies.update(default_cookies)
|
||||
|
||||
def _build_url(self, url):
|
||||
if url.startswith("http://") or url.startswith("https://"):
|
||||
return url
|
||||
return f"{self.protocol}://{self.base_url}/{url.lstrip('/')}"
|
||||
|
||||
def set_default_headers(self, headers):
|
||||
self.session.headers.update(headers)
|
||||
|
||||
def set_default_cookies(self, cookies):
|
||||
self.session.cookies.update(cookies)
|
||||
|
||||
def get(self, url, params=None, headers=None, cookies=None, **kwargs):
|
||||
full_url = self._build_url(url)
|
||||
return self._request("GET", full_url, params=params, headers=headers, cookies=cookies, **kwargs)
|
||||
|
||||
def post(self, url, data=None, json=None, headers=None, cookies=None, **kwargs):
|
||||
full_url = self._build_url(url)
|
||||
return self._request("POST", full_url, data=data, json=json, headers=headers, cookies=cookies, **kwargs)
|
||||
|
||||
def update(self, url, data=None, json=None, headers=None, cookies=None, **kwargs):
|
||||
full_url = self._build_url(url)
|
||||
return self._request("PUT", full_url, data=data, json=json, headers=headers, cookies=cookies, **kwargs)
|
||||
|
||||
def delete(self, url, params=None, headers=None, cookies=None, **kwargs):
|
||||
full_url = self._build_url(url)
|
||||
return self._request("DELETE", full_url, params=params, headers=headers, cookies=cookies, **kwargs)
|
||||
|
||||
def _request(self, method, url, retries=None, **kwargs):
|
||||
if retries is None:
|
||||
retries = self.retries
|
||||
if 'timeout' not in kwargs:
|
||||
kwargs['timeout'] = self.default_timeout
|
||||
if 'headers' in kwargs and kwargs['headers']:
|
||||
headers = kwargs['headers']
|
||||
if 'referer' in headers:
|
||||
headers['referer'] = self._build_url(headers['referer'])
|
||||
try:
|
||||
response = self.session.request(method, url, **kwargs)
|
||||
response.raise_for_status()
|
||||
response.__class__ = ExtendedResponse
|
||||
return response
|
||||
except Exception as e:
|
||||
if retries > 0:
|
||||
logging.warning(f"请求 {method} {url} 失败,剩余重试次数 {retries},错误: {e}")
|
||||
time.sleep(2 ** (self.retries - retries))
|
||||
return self._request(method, url, retries=retries - 1, **kwargs)
|
||||
else:
|
||||
logging.error(f"请求 {method} {url} 重试次数用尽")
|
||||
raise e
|
||||
|
||||
def close(self):
|
||||
self.session.close()
|
||||
|
||||
class MR(MyRequests):
|
||||
pass
|
||||
|
||||
if __name__ == '__main__':
|
||||
req = MyRequests("httpbin.org", protocol="https", retries=3, proxy_options=True, default_timeout=5, default_cookies={"session": "abc"})
|
||||
req.set_default_headers({"User-Agent": "MyRequests/1.0"})
|
||||
try:
|
||||
resp = req.get("/get", headers={"referer": "/page"})
|
||||
logging.info("状态码: %s", resp.status_code)
|
||||
logging.info("JSON: %s", resp.json())
|
||||
logging.info("XPath: %s", resp.xpath('//title/text()'))
|
||||
obj = resp.to_Dict()
|
||||
logging.info("转换对象: %s", obj.url)
|
||||
except Exception as ex:
|
||||
logging.error("请求失败: %s", ex)
|
||||
finally:
|
||||
req.close()
|
BIN
web/yutian_top/captcha.png
Normal file
After Width: | Height: | Size: 1.5 KiB |
BIN
web/yutian_top/captcha_pic/0.png
Normal file
After Width: | Height: | Size: 1.8 KiB |
BIN
web/yutian_top/captcha_pic/1.png
Normal file
After Width: | Height: | Size: 1.9 KiB |
BIN
web/yutian_top/captcha_pic/10.png
Normal file
After Width: | Height: | Size: 1.9 KiB |
BIN
web/yutian_top/captcha_pic/100.png
Normal file
After Width: | Height: | Size: 1.6 KiB |
BIN
web/yutian_top/captcha_pic/101.png
Normal file
After Width: | Height: | Size: 1.5 KiB |
BIN
web/yutian_top/captcha_pic/102.png
Normal file
After Width: | Height: | Size: 1.6 KiB |
BIN
web/yutian_top/captcha_pic/103.png
Normal file
After Width: | Height: | Size: 1.6 KiB |
BIN
web/yutian_top/captcha_pic/104.png
Normal file
After Width: | Height: | Size: 1.8 KiB |
BIN
web/yutian_top/captcha_pic/105.png
Normal file
After Width: | Height: | Size: 1.5 KiB |
BIN
web/yutian_top/captcha_pic/106.png
Normal file
After Width: | Height: | Size: 2.0 KiB |
BIN
web/yutian_top/captcha_pic/107.png
Normal file
After Width: | Height: | Size: 1.6 KiB |
BIN
web/yutian_top/captcha_pic/108.png
Normal file
After Width: | Height: | Size: 1.8 KiB |
BIN
web/yutian_top/captcha_pic/109.png
Normal file
After Width: | Height: | Size: 1.8 KiB |
BIN
web/yutian_top/captcha_pic/11.png
Normal file
After Width: | Height: | Size: 1.7 KiB |
BIN
web/yutian_top/captcha_pic/110.png
Normal file
After Width: | Height: | Size: 1.8 KiB |
BIN
web/yutian_top/captcha_pic/111.png
Normal file
After Width: | Height: | Size: 1.5 KiB |
BIN
web/yutian_top/captcha_pic/112.png
Normal file
After Width: | Height: | Size: 1.6 KiB |
BIN
web/yutian_top/captcha_pic/113.png
Normal file
After Width: | Height: | Size: 1.7 KiB |
BIN
web/yutian_top/captcha_pic/114.png
Normal file
After Width: | Height: | Size: 1.8 KiB |
BIN
web/yutian_top/captcha_pic/115.png
Normal file
After Width: | Height: | Size: 1.7 KiB |