Crawler/web/grubhub/main.py

476 lines
23 KiB
Python

import json
import time
import requests
from openpyxl import load_workbook
from openpyxl.styles import Font, PatternFill, Alignment, Border, Side
class Grubhub:
def __init__(self):
self.proxies = {"http": "http://127.0.0.1:7890", "https": "http://127.0.0.1:7890"}
self.token = None
self.get_menuid_lit = {}
self.wb = load_workbook('Menu.xlsx')
self.modify_first_row = self.modify_first_row()
def clear_sheet(self, sheet):
ws = self.wb[sheet]
for row in ws.iter_rows(min_row=2): # 首行不清空
for cell in row:
if cell.value is not None:
cell.value = None
self.wb.save('grubhubMenu.xlsx')
def clear_except_first_row(self, sheet):
ws = self.wb[sheet]
# **解除所有合并单元格**
merged_ranges = list(ws.merged_cells.ranges)
for merged_range in merged_ranges:
ws.unmerge_cells(str(merged_range))
# **获取最大行和最大列**
max_row = ws.max_row
max_col = ws.max_column
# **清除第二行及之后的所有数据和格式**
if max_row > 1:
for row in range(2, max_row + 1): # 从第二行开始清除
for col in range(1, max_col + 1):
cell = ws.cell(row=row, column=col)
cell.value = None # 清除数据
cell.fill = PatternFill(fill_type=None) # 清除背景色
cell.font = Font() # 重置字体
cell.alignment = Alignment() # 重置对齐方式
cell.border = Border() # 清除边框
# **删除第二行及之后的所有行**
ws.delete_rows(2, max_row - 1 if max_row > 2 else 1)
# **清除行级别格式**
for row in range(2, max_row + 1):
if row in ws.row_dimensions:
ws.row_dimensions[row].fill = PatternFill(fill_type=None) # 清除行级背景色
ws.row_dimensions[row].font = Font() # 清除行级字体
ws.row_dimensions[row].alignment = Alignment() # 清除行级对齐方式
# **保存 Excel**
self.wb.save('grubhubMenu.xlsx')
def modify_first_row(self):
ws = self.wb["Modifier"]
source_row = 1
row_data = {}
# 提取第一行数据和格式
for col in range(1, ws.max_column + 1):
source_cell = ws.cell(row=source_row, column=col)
row_data[col] = {
"value": source_cell.value, # 数据
"font": Font(
name=source_cell.font.name,
size=source_cell.font.size,
bold=source_cell.font.bold,
italic=source_cell.font.italic,
underline=source_cell.font.underline,
color=source_cell.font.color.rgb if source_cell.font.color else None
),
"alignment": Alignment(
horizontal=source_cell.alignment.horizontal,
vertical=source_cell.alignment.vertical,
wrap_text=source_cell.alignment.wrap_text
),
"fill": PatternFill(
fill_type=source_cell.fill.patternType,
fgColor=source_cell.fill.fgColor.rgb if source_cell.fill.fgColor else None,
bgColor=source_cell.fill.bgColor.rgb if source_cell.fill.bgColor else None
) if source_cell.fill and source_cell.fill.patternType else None,
"border": Border(
left=Side(style=source_cell.border.left.style, color=source_cell.border.left.color),
right=Side(style=source_cell.border.right.style, color=source_cell.border.right.color),
top=Side(style=source_cell.border.top.style, color=source_cell.border.top.color),
bottom=Side(style=source_cell.border.bottom.style, color=source_cell.border.bottom.color),
) if source_cell.border else None
}
row_data["row_height"] = ws.row_dimensions[source_row].height
return row_data
def get_token(self):
headers = {
'accept': '*/*',
'accept-language': 'zh-CN,zh;q=0.9',
'authorization': 'Bearer',
'cache-control': 'no-cache',
'content-type': 'application/json;charset=UTF-8',
'origin': 'https://www.grubhub.com',
'pragma': 'no-cache',
'priority': 'u=1, i',
'referer': 'https://www.grubhub.com/',
'sec-ch-ua': '"Chromium";v="134", "Not:A-Brand";v="24", "Google Chrome";v="134"',
'sec-ch-ua-mobile': '?0',
'sec-ch-ua-platform': '"Windows"',
'sec-fetch-dest': 'empty',
'sec-fetch-mode': 'cors',
'sec-fetch-site': 'same-site',
'user-agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/134.0.0.0 Safari/537.36',
}
json_data = {
'brand': 'GRUBHUB',
'client_id': 'beta_UmWlpstzQSFmocLy3h1UieYcVST',
'device_id': 1277616243,
'scope': 'anonymous',
}
proxies = {
"http": "http://127.0.0.1:7890",
"https": "http://127.0.0.1:7890"
}
response = requests.post('https://api-gtm.grubhub.com/auth', headers=headers, json=json_data, proxies=proxies)
# print(response.json())
return response.json().get("session_handle", {}).get('access_token')
def get_menu_items(self):
headers = {
'accept': 'application/json',
'accept-language': 'zh-CN,zh;q=0.9',
'authorization': 'Bearer {}'.format(self.token),
'cache-control': 'no-cache',
'if-modified-since': '0',
'origin': 'https://www.grubhub.com',
# 'perimeter-x': 'eyJ1IjoiZTljMjg0OTAtZmU3Ni0xMWVmLTljZGQtM2JjYWU1OWQwYmIwIiwidiI6ImU3YWY1NDVkLWZlNzYtMTFlZi05MDc5LWQxNGEzZThjMWMyZSIsInQiOjE3NDE2OTc3MTMwNjAsImgiOiJjNWNkM2M5ZTU4NTMwNzE4YzQ4YzU1Y2E1NDM3ZWYwMjUwMmY0MGFjMjkyYTJkY2JlZWY5OGEwN2FjMTMyMzFmIn0=',
'pragma': 'no-cache',
'priority': 'u=1, i',
'referer': 'https://www.grubhub.com/',
'sec-ch-ua': '"Chromium";v="134", "Not:A-Brand";v="24", "Google Chrome";v="134"',
'sec-ch-ua-mobile': '?0',
'sec-ch-ua-platform': '"Windows"',
'sec-fetch-dest': 'empty',
'sec-fetch-mode': 'cors',
'sec-fetch-site': 'same-site',
'user-agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/134.0.0.0 Safari/537.36',
}
params = {
'orderType': 'standard',
'version': '4',
}
response = requests.get('https://api-gtm.grubhub.com/restaurants/10316176/menu_items/', params=params,
headers=headers, proxies=self.proxies)
menu_json = json.dumps(response.json(), indent=4)
with open('menu.json', 'w', encoding='utf-8') as f:
f.write(menu_json)
def get_menuid(self):
# headers = {
# 'accept': 'application/json',
# 'accept-language': 'zh-CN,zh;q=0.9',
# 'authorization': 'Bearer {}'.format(self.token),
# 'cache-control': 'no-cache',
# 'if-modified-since': '0',
# 'origin': 'https://www.grubhub.com',
# # 'perimeter-x': 'eyJ1IjoiZTljMjg0OTAtZmU3Ni0xMWVmLTljZGQtM2JjYWU1OWQwYmIwIiwidiI6ImU3YWY1NDVkLWZlNzYtMTFlZi05MDc5LWQxNGEzZThjMWMyZSIsInQiOjE1Mjk5NzEyMDAwMDAsImgiOiJhN2U0MjMwNWY4YTkwMGRlYTA3OTIwZGJmNjkzNjM3MDlhZTg2ZTNiYTFlN2VlMzhkODZkNDA5Njg1OTI2MTRjIn0=',
# 'pragma': 'no-cache',
# 'priority': 'u=1, i',
# 'referer': 'https://www.grubhub.com/',
# 'sec-ch-ua': '"Chromium";v="134", "Not:A-Brand";v="24", "Google Chrome";v="134"',
# 'sec-ch-ua-mobile': '?0',
# 'sec-ch-ua-platform': '"Windows"',
# 'sec-fetch-dest': 'empty',
# 'sec-fetch-mode': 'cors',
# 'sec-fetch-site': 'same-site',
# 'user-agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/134.0.0.0 Safari/537.36',
# }
#
# params = {
# 'orderType': 'STANDARD',
# 'platform': 'WEB',
# 'enhancedFeed': 'true',
# }
#
# response = requests.get('https://api-gtm.grubhub.com/restaurant_gateway/info/volatile/10316176', params=params,
# headers=headers, proxies=self.proxies)
# menu_id_json = json.dumps(response.json(), indent=4)
# with open('menu_id.json', 'w', encoding='utf-8') as f:
# f.write(menu_id_json)
self.clear_sheet("Categories")
ws = self.wb["Categories"]
with open('menu_id.json', 'r', encoding='utf-8') as f:
menu_id = json.load(f)
menu_info = menu_id.get("object", {}).get("data", {}).get("enhanced_feed", [])
menu_id_dic = {}
idx = 2
for menu in menu_info:
if menu.get("id") == "None":
continue
else:
menu_id_dic[menu.get("name")] = menu.get("id")
ws.cell(row=idx, column=1, value="Online All Day Menu")
ws.cell(row=idx, column=2, value=menu.get("name"))
ws.cell(row=idx, column=3, value="") # 翻译
idx = idx + 1
self.get_menuid_lit = menu_id_dic
self.wb.save('grubhubMenu.xlsx')
def get_itme(self):
self.clear_except_first_row("Item")
self.clear_except_first_row("Modifier")
index = 2
s = requests.session()
ws = self.wb["Item"]
data_info = []
size_identifiers = ["(S)", "(L)", "(小)", "(大)", "(Half Gallon)", "(One Gallon)", "1.4pcs", "8pcs", "4pcs"]
for i in self.get_menuid_lit.keys():
print(i, self.get_menuid_lit[i])
headers = {
'accept': 'application/json',
'accept-language': 'zh-CN,zh;q=0.9',
'authorization': 'Bearer {}'.format(self.token),
'cache-control': 'max-age=0',
'if-modified-since': '0',
'origin': 'https://www.grubhub.com',
'perimeter-x': 'eyJ1IjoiY2M3YWQyNDAtZmVlMi0xMWVmLTljZTEtYzU0ODE2NWJhZGM5IiwidiI6IjhmNWZmZmMxLWZlZTItMTFlZi04OGY4LTNlNmE5YjdjNmY1NSIsInQiOjE3NDE3NDQzNTI1NjAsImgiOiI5YWFjOTBkZDBmZTc1N2EzOTJlYmMwM2ViMTNiZGU1YzhhMWY4MDljYzNmOTZlZjdhNDAwZWJlZGVmMDkxOTljIn0=',
'pragma': 'no-cache',
'priority': 'u=1, i',
'referer': 'https://www.grubhub.com/',
'sec-ch-ua': '"Chromium";v="134", "Not:A-Brand";v="24", "Google Chrome";v="134"',
'sec-ch-ua-mobile': '?0',
'sec-ch-ua-platform': '"Windows"',
'sec-fetch-dest': 'empty',
'sec-fetch-mode': 'cors',
'sec-fetch-site': 'same-site',
'user-agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/134.0.0.0 Safari/537.36',
}
params = {
'time': '1741743447761',
'operationId': '8e6c2210-fee2-11ef-a211-03a08d96f471',
'isFutureOrder': 'false',
'restaurantStatus': 'ORDERABLE',
'isNonRestaurantMerchant': 'false',
'merchantTypes': '',
'orderType': 'STANDARD',
'agent': 'false',
'task': 'CATEGORY',
'platform': 'WEB',
}
response = s.get(
'https://api-gtm.grubhub.com/restaurant_gateway/feed/10316176/{}'.format(self.get_menuid_lit[i]),
params=params,
headers=headers,
proxies=self.proxies
)
menucontent = response.json()["object"]["data"]["content"]
for menu in menucontent:
menuid = menu.get("entity").get("item_id")
item_name = menu.get("entity").get("item_name")
price = menu.get("entity").get("item_price").get("pickup").get("value") / 100.0
description = menu.get("entity").get("item_description")
headers = {
'accept': 'application/json',
'accept-language': 'zh-CN,zh;q=0.9',
'authorization': 'Bearer {}'.format(self.token),
'cache-control': 'no-cache',
'if-modified-since': '0',
'origin': 'https://www.grubhub.com',
'perimeter-x': 'eyJ1IjoiY2M3YWQyNDAtZmVlMi0xMWVmLTljZTEtYzU0ODE2NWJhZGM5IiwidiI6IjhmNWZmZmMxLWZlZTItMTFlZi04OGY4LTNlNmE5YjdjNmY1NSIsInQiOjE3NDE3NDQzNTI1NjAsImgiOiI5YWFjOTBkZDBmZTc1N2EzOTJlYmMwM2ViMTNiZGU1YzhhMWY4MDljYzNmOTZlZjdhNDAwZWJlZGVmMDkxOTljIn0=',
'pragma': 'no-cache',
'priority': 'u=1, i',
'referer': 'https://www.grubhub.com/',
'sec-ch-ua': '"Chromium";v="134", "N`ot:A-Brand";v="24", "Google Chrome";v="134"',
'sec-ch-ua-mobile': '?0',
'sec-ch-ua-platform': '"Windows"',
'sec-fetch-dest': 'empty',
'sec-fetch-mode': 'cors',
'sec-fetch-site': 'same-site',
'user-agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/134.0.0.0 Safari/537.36',
}
params = {
'time': '1741743544922',
'hideUnavailableMenuItems': 'true',
'orderType': 'standard',
'version': '4',
}
response = s.get(
'https://api-gtm.grubhub.com/restaurants/10316176/menu_items/{}'.format(menuid),
params=params,
headers=headers,proxies=self.proxies
)
data = {"ism": 0, "sizes": [], "addons": [], "nameList": []} # **新增 nameList**
has_size_option = False
has_addon_option = False
customizationsList = response.json().get('choice_category_list', [])
for customizations in customizationsList:
title = customizations.get('name', '')
customization_entry = {"name": title, "list": [], "required": True if customizations.get('quantity_settings', {}).get('minimum_units',0) >= 1 else False, "min": customizations.get('quantity_settings', {}).get('minimum_units',0), "max": customizations.get('quantity_settings', {}).get('maximum_units',0)}
for item in customizations.get('choice_option_list', []):
option_title = item.get('description', '')
price = item.get('price', {}).get('amount', 0) / 100
# **大小份归一化**
if any(option_title.startswith(size) for size in size_identifiers):
data['sizes'].append({"name": option_title, "price": price})
has_size_option = True
else:
customization_entry["list"].append({"name": option_title, "price": price})
has_addon_option = True
# **如果这个 `title` 是配菜分组,存入 `addons`**
if customization_entry["list"]:
data["addons"].append(customization_entry)
# **在 ism=3 时,生成 `nameList`**
if has_size_option and has_addon_option:
data['ism'] = 3 # **大小份 + 配菜**
rename = data["addons"][0]["name"]
data['nameList'] = [f"{size['name']}: {rename}" for size in data["sizes"]]
elif has_size_option:
data['ism'] = 1 # **只有大小份**
elif has_addon_option:
data['ism'] = 2 # **只有配菜**
ws.cell(row=index, column=1, value="Online All Day Menu")
ws.cell(row=index, column=2, value=i)
ws.cell(row=index, column=3, value=item_name)
ws.cell(row=index, column=4, value="")
ws.cell(row=index, column=5, value=price)
ws.cell(row=index, column=7, value=description)
if data['ism'] == 3 or data['ism'] == 1:
value5 = ";".join(
[f"{format(price if i['price'] == 0.0 else i['price'] + price, '.2f')}/{i['name']}" for i in
data['sizes']])
ws.cell(row=index, column=5, value=value5)
if data['ism'] == 3:
v2 = "\n".join([i for i in data['nameList']])
ws.cell(row=index, column=6, value=v2)
if data['ism'] == 2:
v2 = "\n".join([i['name'] for i in data['addons']])
ws.cell(row=index, column=6, value=v2)
if data['ism'] != 1:
for addons in data['addons']:
existing_addon = next((item for item in data_info if item["name"] == addons["name"]), None)
if existing_addon:
existing_items = {item["name"] for item in existing_addon["list"]}
new_items = [item for item in addons["list"] if item["name"] not in existing_items]
existing_addon["list"].extend(new_items)
else:
data_info.append(addons)
index += 1
self.wb.save('grubhubMenu.xlsx')
with open('menu_item.json', 'w', encoding='utf-8') as f:
f.write(json.dumps(data_info, indent=4))
def write_xlsx(self):
ws = self.wb["Modifier"]
self.clear_except_first_row("Modifier")
with open('menu_item.json', 'r', encoding='utf-8') as f:
data = json.load(f)
index = 2
for i in data:
# **确保从 index > 2 才复制格式**
if index > 2:
ws.row_dimensions[index].height = self.modify_first_row["row_height"]
for col, cell_data in self.modify_first_row.items():
if col == "row_height":
continue
target_cell = ws.cell(row=index, column=col)
# **正确赋值**
target_cell.value = cell_data["value"]
# **复制格式**
if cell_data["font"]:
target_cell.font = Font(
name=cell_data["font"].name,
size=cell_data["font"].size,
bold=cell_data["font"].bold,
italic=cell_data["font"].italic,
underline=cell_data["font"].underline,
color=cell_data["font"].color
)
if cell_data["alignment"]:
target_cell.alignment = Alignment(
horizontal=cell_data["alignment"].horizontal,
vertical=cell_data["alignment"].vertical,
wrap_text=cell_data["alignment"].wrap_text
)
if cell_data["fill"] and cell_data["fill"].patternType:
target_cell.fill = PatternFill(
fill_type=cell_data["fill"].patternType,
fgColor=cell_data["fill"].fgColor.rgb,
bgColor=cell_data["fill"].bgColor.rgb
)
if cell_data["border"]:
target_cell.border = Border(
left=Side(style=cell_data["border"].left.style, color=cell_data["border"].left.color),
right=Side(style=cell_data["border"].right.style,
color=cell_data["border"].right.color),
top=Side(style=cell_data["border"].top.style, color=cell_data["border"].top.color),
bottom=Side(style=cell_data["border"].bottom.style,
color=cell_data["border"].bottom.color),
)
index += 1
# **填充 JSON 数据**
ws.cell(row=index, column=1, value=i['name'])
ws.cell(row=index, column=2, value="")
ws.cell(row=index, column=7, value="Required" if i['required'] else "Not Required")
ws.cell(row=index, column=8, value=i['min'])
ws.cell(row=index, column=9, value=i['max'])
ws.cell(row=index, column=10, value="NO")
aindex = index
for item in i['list']:
ws.cell(row=index, column=3, value=item['name'])
ws.cell(row=index, column=6, value=item['price'])
index += 1
index += 1
bindex = index
if bindex - aindex > 1:
ws.merge_cells(start_row=aindex, start_column=1, end_row=bindex - 2, end_column=1)
ws.cell(row=aindex, column=1).alignment = Alignment(horizontal="center", vertical="center")
ws.merge_cells(start_row=aindex, start_column=2, end_row=bindex - 2, end_column=2)
ws.cell(row=aindex, column=2).alignment = Alignment(horizontal="center", vertical="center")
ws.merge_cells(start_row=aindex, start_column=7, end_row=bindex - 2, end_column=7)
ws.cell(row=aindex, column=7).alignment = Alignment(horizontal="center", vertical="center")
ws.merge_cells(start_row=aindex, start_column=8, end_row=bindex - 2, end_column=8)
ws.cell(row=aindex, column=8).alignment = Alignment(horizontal="center", vertical="center")
ws.merge_cells(start_row=aindex, start_column=9, end_row=bindex - 2, end_column=9)
ws.cell(row=aindex, column=9).alignment = Alignment(horizontal="center", vertical="center")
ws.merge_cells(start_row=aindex, start_column=10, end_row=bindex - 2, end_column=10)
ws.cell(row=aindex, column=10).alignment = Alignment(horizontal="center", vertical="center")
self.wb.save('grubhubMenu.xlsx')
if __name__ == '__main__':
gh = Grubhub()
# gh.token = gh.get_token()
# gh.get_menuid()
# gh.get_itme()
gh.write_xlsx()
# print(gh.token)
# gh.get_menuid()
# gh.get_itme()
# gh.get_jsondata(gh.token)
# gh.get_menu_items()
# gh.get_request_id()