chore: initial commit — existing lottoData codebase
Files: - lottery.py (1189 lines) — DoubleColorBallGenerator core engine - fetch_data.py (131 lines) — history data fetcher from 55128.cn - web_executor.py (216 lines) — data fetch Web console (Flask :5000) - app.py (505 lines) — number generation Web service (Flask :8085) - index.html (1171 lines) — frontend SPA - web_console.html (323 lines) — fetch console frontend - deploy/ — systemd service + cron script + logs BIZ-74 architecture review baseline
This commit is contained in:
+132
@@ -0,0 +1,132 @@
|
||||
#!/usr/bin/env python3
|
||||
# -*- coding: utf-8 -*-
|
||||
"""
|
||||
双色球历史数据抓取脚本
|
||||
从 https://www.55128.cn/kjh/fcssq-history-120.htm 抓取数据
|
||||
更新「双色球历史数据.xlsx」文件
|
||||
"""
|
||||
|
||||
import requests
|
||||
from bs4 import BeautifulSoup
|
||||
import pandas as pd
|
||||
from datetime import datetime
|
||||
import os
|
||||
import re
|
||||
|
||||
# 数据源 URL
|
||||
URL = "https://www.55128.cn/kjh/fcssq-history-120.htm"
|
||||
# 输出文件路径
|
||||
SCRIPT_DIR = os.path.dirname(os.path.abspath(__file__))
|
||||
OUTPUT_FILE = os.path.join(SCRIPT_DIR, "双色球历史数据.xlsx")
|
||||
|
||||
# 请求头,模拟浏览器
|
||||
HEADERS = {
|
||||
"User-Agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36",
|
||||
"Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8",
|
||||
"Accept-Language": "zh-CN,zh;q=0.9,en;q=0.8",
|
||||
}
|
||||
|
||||
|
||||
def fetch_lottery_data():
|
||||
"""抓取双色球历史数据"""
|
||||
print(f"[{datetime.now()}] 开始抓取数据...")
|
||||
|
||||
try:
|
||||
response = requests.get(URL, headers=HEADERS, timeout=30)
|
||||
response.raise_for_status()
|
||||
response.encoding = "utf-8"
|
||||
|
||||
soup = BeautifulSoup(response.text, "html.parser")
|
||||
|
||||
# 查找表格数据
|
||||
table = soup.find("table")
|
||||
if not table:
|
||||
print("错误:未找到数据表格")
|
||||
return None
|
||||
|
||||
# 解析表格
|
||||
data_rows = []
|
||||
rows = table.find_all("tr")
|
||||
|
||||
for row in rows:
|
||||
cols = row.find_all(["td", "th"])
|
||||
if len(cols) >= 8:
|
||||
try:
|
||||
row_data = [col.get_text(strip=True) for col in cols]
|
||||
data_rows.append(row_data)
|
||||
except Exception as e:
|
||||
continue
|
||||
|
||||
if not data_rows:
|
||||
print("错误:未解析到任何数据")
|
||||
return None
|
||||
|
||||
print(f"成功解析 {len(data_rows)} 条数据")
|
||||
return data_rows
|
||||
|
||||
except requests.exceptions.RequestException as e:
|
||||
print(f"网络请求错误:{e}")
|
||||
return None
|
||||
except Exception as e:
|
||||
print(f"解析错误:{e}")
|
||||
return None
|
||||
|
||||
|
||||
def save_to_excel(data_rows):
|
||||
"""保存数据到 Excel 文件"""
|
||||
if not data_rows:
|
||||
print("无数据可保存")
|
||||
return False
|
||||
|
||||
try:
|
||||
# 创建 DataFrame
|
||||
num_cols = min(len(row) for row in data_rows)
|
||||
data_rows = [row[:num_cols] for row in data_rows]
|
||||
|
||||
# 列名定义(最多 11 列)
|
||||
columns = ["期号", "开奖日期", "红球 1", "红球 2", "红球 3", "红球 4", "红球 5", "红球 6", "蓝球", "特别号", "奖池"]
|
||||
|
||||
# 如果列数不匹配,使用通用列名
|
||||
actual_columns = columns[:num_cols] if num_cols <= len(columns) else [f"列{i+1}" for i in range(num_cols)]
|
||||
|
||||
df = pd.DataFrame(data_rows, columns=actual_columns)
|
||||
|
||||
# 保存为 Excel
|
||||
df.to_excel(OUTPUT_FILE, index=False, engine="openpyxl")
|
||||
|
||||
print(f"[{datetime.now()}] 数据已保存到:{OUTPUT_FILE}")
|
||||
print(f"共保存 {len(df)} 条记录")
|
||||
return True
|
||||
|
||||
except Exception as e:
|
||||
print(f"保存 Excel 错误:{e}")
|
||||
return False
|
||||
|
||||
|
||||
def main():
|
||||
"""主函数"""
|
||||
print("=" * 60)
|
||||
print("双色球历史数据抓取工具")
|
||||
print("=" * 60)
|
||||
|
||||
# 抓取数据
|
||||
data = fetch_lottery_data()
|
||||
|
||||
if data:
|
||||
# 保存数据
|
||||
success = save_to_excel(data)
|
||||
if success:
|
||||
print("=" * 60)
|
||||
print("任务完成!")
|
||||
print("=" * 60)
|
||||
return 0
|
||||
else:
|
||||
print("保存失败")
|
||||
return 1
|
||||
else:
|
||||
print("抓取失败")
|
||||
return 1
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
exit(main())
|
||||
Reference in New Issue
Block a user