feat: BIZ-75 双色球系统改进
1. P1: 合并双 Flask 服务 — web_executor.py 功能整合到 app.py - /fetch → 抓取控制台页面 - /api/fetch/status → 抓取状态查询 - /api/fetch/execute → 触发抓取(后台线程异步) - web_console.html API 路径已更新 2. P1: fetch_data.py 增加重试机制 + 请求间隔 - REQUEST_DELAY=2s, MAX_RETRIES=3, RETRY_DELAY=5s - 修复缩进 bug(try/except 块缩进错误) 3. P0: 修复 Excel 数据格式兼容性 - fetch_data.py: 跳过网页 header 行,使用标准列名保存 - app.py: 新增 load_history_dataframe() 智能加载函数 - 兼容新旧两种 Excel 格式(一行/两行 header) - 统一列名: 开奖时间|期数|号码|开机号|和值特征|奇偶比|大小比|奇偶形态|跨度|其他 4. 运维: 创建 lotto-app.service systemd 单元 5. 修复 .gitignore(排除运行时数据文件和备份) 6. 创建 requirements.txt
This commit is contained in:
+94
-50
@@ -7,6 +7,7 @@
|
||||
"""
|
||||
|
||||
import requests
|
||||
import time
|
||||
from bs4 import BeautifulSoup
|
||||
import pandas as pd
|
||||
from datetime import datetime
|
||||
@@ -27,77 +28,120 @@ HEADERS = {
|
||||
}
|
||||
|
||||
|
||||
# 请求间隔(秒),避免被封 IP
|
||||
REQUEST_DELAY = 2
|
||||
# 最大重试次数
|
||||
MAX_RETRIES = 3
|
||||
# 重试间隔(秒)
|
||||
RETRY_DELAY = 5
|
||||
|
||||
|
||||
def fetch_lottery_data():
|
||||
"""抓取双色球历史数据"""
|
||||
print(f"[{datetime.now()}] 开始抓取数据...")
|
||||
|
||||
try:
|
||||
response = requests.get(URL, headers=HEADERS, timeout=30)
|
||||
response.raise_for_status()
|
||||
response.encoding = "utf-8"
|
||||
|
||||
soup = BeautifulSoup(response.text, "html.parser")
|
||||
|
||||
# 查找表格数据
|
||||
table = soup.find("table")
|
||||
if not table:
|
||||
print("错误:未找到数据表格")
|
||||
last_error = None
|
||||
for attempt in range(1, MAX_RETRIES + 1):
|
||||
try:
|
||||
# 请求前等待,避免频繁请求
|
||||
if attempt > 1:
|
||||
print(f" 第 {attempt} 次重试,等待 {RETRY_DELAY} 秒...")
|
||||
time.sleep(RETRY_DELAY)
|
||||
else:
|
||||
time.sleep(REQUEST_DELAY)
|
||||
|
||||
response = requests.get(URL, headers=HEADERS, timeout=30)
|
||||
response.raise_for_status()
|
||||
response.encoding = "utf-8"
|
||||
|
||||
soup = BeautifulSoup(response.text, "html.parser")
|
||||
|
||||
# 查找表格数据
|
||||
table = soup.find("table")
|
||||
if not table:
|
||||
print("错误:未找到数据表格")
|
||||
return None
|
||||
|
||||
# 解析表格
|
||||
data_rows = []
|
||||
rows = table.find_all("tr")
|
||||
|
||||
for row in rows:
|
||||
cols = row.find_all(["td", "th"])
|
||||
if len(cols) >= 8:
|
||||
try:
|
||||
row_data = [col.get_text(strip=True) for col in cols]
|
||||
data_rows.append(row_data)
|
||||
except Exception:
|
||||
continue
|
||||
|
||||
if not data_rows:
|
||||
print("错误:未解析到任何数据")
|
||||
return None
|
||||
|
||||
print(f"成功解析 {len(data_rows)} 条数据")
|
||||
return data_rows
|
||||
|
||||
except requests.exceptions.RequestException as e:
|
||||
last_error = e
|
||||
print(f"网络请求错误(第 {attempt} 次):{e}")
|
||||
if attempt < MAX_RETRIES:
|
||||
continue
|
||||
return None
|
||||
|
||||
# 解析表格
|
||||
data_rows = []
|
||||
rows = table.find_all("tr")
|
||||
|
||||
for row in rows:
|
||||
cols = row.find_all(["td", "th"])
|
||||
if len(cols) >= 8:
|
||||
try:
|
||||
row_data = [col.get_text(strip=True) for col in cols]
|
||||
data_rows.append(row_data)
|
||||
except Exception as e:
|
||||
continue
|
||||
|
||||
if not data_rows:
|
||||
print("错误:未解析到任何数据")
|
||||
except Exception as e:
|
||||
last_error = e
|
||||
print(f"解析错误(第 {attempt} 次):{e}")
|
||||
if attempt < MAX_RETRIES:
|
||||
continue
|
||||
return None
|
||||
|
||||
print(f"成功解析 {len(data_rows)} 条数据")
|
||||
return data_rows
|
||||
|
||||
except requests.exceptions.RequestException as e:
|
||||
print(f"网络请求错误:{e}")
|
||||
return None
|
||||
except Exception as e:
|
||||
print(f"解析错误:{e}")
|
||||
return None
|
||||
|
||||
print(f"抓取失败,共尝试 {MAX_RETRIES} 次,最后错误:{last_error}")
|
||||
return None
|
||||
|
||||
|
||||
def save_to_excel(data_rows):
|
||||
"""保存数据到 Excel 文件"""
|
||||
"""保存数据到 Excel 文件
|
||||
|
||||
输出格式与 lottery.py 和 app.py 兼容:
|
||||
- 列名: 开奖时间 | 期数 | 号码 | 开机号 | 和值特征 | 奇偶比 | 大小比 | 奇偶形态 | 跨度 | 其他
|
||||
- 号码列为 6 红球 + 1 蓝球的拼接字符串 (如 '09101316192108')
|
||||
- 第一行为列名,数据从第二行开始
|
||||
"""
|
||||
if not data_rows:
|
||||
print("无数据可保存")
|
||||
return False
|
||||
|
||||
|
||||
try:
|
||||
# 创建 DataFrame
|
||||
# 跳过网页表格的 header 行 (第一行通常是中文标题)
|
||||
# 检查第一行是否是 header (包含 '开奖时间' 或 '期数' 等关键词)
|
||||
first_row = data_rows[0]
|
||||
header_keywords = {'开奖时间', '期数', '号码', '开奖日期'}
|
||||
if any(kw in str(first_row) for kw in header_keywords):
|
||||
data_rows = data_rows[1:]
|
||||
print(f"跳过 header 行,实际数据 {len(data_rows)} 条")
|
||||
|
||||
# 标准列名 (与 lottery.py 期望一致)
|
||||
standard_columns = ['开奖时间', '期数', '号码', '开机号', '和值特征', '奇偶比', '大小比', '奇偶形态', '跨度', '其他']
|
||||
|
||||
# 统一每行列数
|
||||
num_cols = min(len(row) for row in data_rows)
|
||||
data_rows = [row[:num_cols] for row in data_rows]
|
||||
|
||||
# 列名定义(最多 11 列)
|
||||
columns = ["期号", "开奖日期", "红球 1", "红球 2", "红球 3", "红球 4", "红球 5", "红球 6", "蓝球", "特别号", "奖池"]
|
||||
|
||||
# 如果列数不匹配,使用通用列名
|
||||
actual_columns = columns[:num_cols] if num_cols <= len(columns) else [f"列{i+1}" for i in range(num_cols)]
|
||||
|
||||
|
||||
# 使用标准列名 (截取或补全)
|
||||
if num_cols <= len(standard_columns):
|
||||
actual_columns = standard_columns[:num_cols]
|
||||
else:
|
||||
actual_columns = standard_columns + [f'列{i+1}' for i in range(num_cols - len(standard_columns))]
|
||||
|
||||
df = pd.DataFrame(data_rows, columns=actual_columns)
|
||||
|
||||
|
||||
# 保存为 Excel
|
||||
df.to_excel(OUTPUT_FILE, index=False, engine="openpyxl")
|
||||
|
||||
|
||||
print(f"[{datetime.now()}] 数据已保存到:{OUTPUT_FILE}")
|
||||
print(f"共保存 {len(df)} 条记录")
|
||||
return True
|
||||
|
||||
|
||||
except Exception as e:
|
||||
print(f"保存 Excel 错误:{e}")
|
||||
return False
|
||||
|
||||
Reference in New Issue
Block a user