diff --git a/dump_keyword_web.py b/dump_keyword_web.py new file mode 100644 index 0000000..6fc82c8 --- /dev/null +++ b/dump_keyword_web.py @@ -0,0 +1,40 @@ +import json, time +import argparse +from DB import DBVidcon + +def parse_args(): + parser = argparse.ArgumentParser( + description="Dump keyword/title rows into Redis list." + ) + parser.add_argument("-l", "--level", type=int, default=99, + help="value for t.level (default: 99)") + return parser.parse_args() + +def main(): + batch = int(time.time()) + db = DBVidcon() + push = db.push_web + empty = db.web_empty + + if empty(): + rows = db.fetch_keyword_title(level=0) + payload_list = [] + for row in rows: + payload_list.append(json.dumps({**row, "batch": batch}, ensure_ascii=False)) + if len(payload_list) >= 10000: + push(payload_list) + payload_list.clear() + if payload_list: # 收尾 + push(payload_list) + + data = { + "level": 0, + "batch": batch, + "count": len(rows), + } + db.log_batch_start(data) + print(f"✔ 推送 {len(rows)} 行(batch={batch})到 {push.__name__}队列完毕") + db.close() + +if __name__ == "__main__": + main()