feat: BIZ-75 双色球系统改进
1. P1: 合并双 Flask 服务 — web_executor.py 功能整合到 app.py - /fetch → 抓取控制台页面 - /api/fetch/status → 抓取状态查询 - /api/fetch/execute → 触发抓取(后台线程异步) - web_console.html API 路径已更新 2. P1: fetch_data.py 增加重试机制 + 请求间隔 - REQUEST_DELAY=2s, MAX_RETRIES=3, RETRY_DELAY=5s - 修复缩进 bug(try/except 块缩进错误) 3. P0: 修复 Excel 数据格式兼容性 - fetch_data.py: 跳过网页 header 行,使用标准列名保存 - app.py: 新增 load_history_dataframe() 智能加载函数 - 兼容新旧两种 Excel 格式(一行/两行 header) - 统一列名: 开奖时间|期数|号码|开机号|和值特征|奇偶比|大小比|奇偶形态|跨度|其他 4. 运维: 创建 lotto-app.service systemd 单元 5. 修复 .gitignore(排除运行时数据文件和备份) 6. 创建 requirements.txt
This commit is contained in:
+12
@@ -5,3 +5,15 @@ venv/
|
|||||||
.venv/
|
.venv/
|
||||||
LottoSpider/
|
LottoSpider/
|
||||||
*.log
|
*.log
|
||||||
|
|
||||||
|
# 运行时生成的数据文件
|
||||||
|
.fetch_status.json
|
||||||
|
.generation_records.json
|
||||||
|
lottery/
|
||||||
|
|
||||||
|
# 备份文件
|
||||||
|
*.bak
|
||||||
|
*.bak.*
|
||||||
|
|
||||||
|
# 临时文件
|
||||||
|
*.tmp
|
||||||
|
|||||||
@@ -39,6 +39,10 @@ CONFIG = {
|
|||||||
'auth_enabled': False,
|
'auth_enabled': False,
|
||||||
'max_tickets': 1000,
|
'max_tickets': 1000,
|
||||||
'default_tickets': 10,
|
'default_tickets': 10,
|
||||||
|
# 数据抓取配置(原 web_executor.py 功能)
|
||||||
|
'fetch_script': os.path.join(BASE_DIR, 'fetch_data.py'),
|
||||||
|
'fetch_status_file': os.path.join(BASE_DIR, '.fetch_status.json'),
|
||||||
|
'fetch_timeout': 300, # 抓取超时秒数
|
||||||
}
|
}
|
||||||
|
|
||||||
# ============================================================
|
# ============================================================
|
||||||
@@ -98,6 +102,50 @@ def add_record(strategy, num_tickets, filename):
|
|||||||
os.replace(tmp_path, CONFIG['records_file'])
|
os.replace(tmp_path, CONFIG['records_file'])
|
||||||
return new_record
|
return new_record
|
||||||
|
|
||||||
|
# ============================================================
|
||||||
|
# Excel 历史数据读取辅助
|
||||||
|
# ============================================================
|
||||||
|
# 标准列名 (与 lottery.py 兼容)
|
||||||
|
HISTORY_COLUMNS = ['开奖时间', '期数', '号码', '开机号', '和值特征', '奇偶比', '大小比', '奇偶形态', '跨度', '其他']
|
||||||
|
|
||||||
|
def load_history_dataframe():
|
||||||
|
"""智能加载历史数据 Excel,兼容新旧两种格式。
|
||||||
|
|
||||||
|
新格式 (fetch_data.py 修复后): 第一行是标准列名,数据从第二行开始。
|
||||||
|
旧格式 (修复前): 两行 header,第一行英文列名,第二行中文描述行。
|
||||||
|
|
||||||
|
返回的 DataFrame 统一使用标准列名,数据已跳过所有 header 行。
|
||||||
|
"""
|
||||||
|
import pandas as pd
|
||||||
|
df = pd.read_excel(CONFIG['history_file'], header=None)
|
||||||
|
|
||||||
|
# 检测第一行是否包含标准列名
|
||||||
|
first_row = df.iloc[0].astype(str).tolist()
|
||||||
|
is_standard_header = any(col in first_row for col in ['开奖时间', '期数', '号码'])
|
||||||
|
|
||||||
|
if is_standard_header:
|
||||||
|
# 新格式: 第一行是标准列名,直接使用
|
||||||
|
data_df = df.iloc[1:].copy()
|
||||||
|
num_cols = min(len(data_df.columns), len(HISTORY_COLUMNS))
|
||||||
|
data_df.columns = HISTORY_COLUMNS[:num_cols] + [f'col_{i}' for i in range(num_cols, len(data_df.columns))]
|
||||||
|
else:
|
||||||
|
# 旧格式: 检查是否有两行 header
|
||||||
|
second_row = df.iloc[1].astype(str).tolist() if len(df) > 1 else []
|
||||||
|
has_second_header = any(col in second_row for col in ['开奖时间', '期数', '号码'])
|
||||||
|
|
||||||
|
if has_second_header:
|
||||||
|
# 两行 header,跳过前两行
|
||||||
|
data_df = df.iloc[2:].copy()
|
||||||
|
else:
|
||||||
|
# 只有一行 header,跳过第一行
|
||||||
|
data_df = df.iloc[1:].copy()
|
||||||
|
|
||||||
|
num_cols = min(len(data_df.columns), len(HISTORY_COLUMNS))
|
||||||
|
data_df.columns = HISTORY_COLUMNS[:num_cols] + [f'col_{i}' for i in range(num_cols, len(data_df.columns))]
|
||||||
|
|
||||||
|
data_df = data_df.reset_index(drop=True)
|
||||||
|
return data_df
|
||||||
|
|
||||||
# ============================================================
|
# ============================================================
|
||||||
# 认证装饰器(可选)
|
# 认证装饰器(可选)
|
||||||
# ============================================================
|
# ============================================================
|
||||||
@@ -195,10 +243,8 @@ def get_statistics_data(generator=None):
|
|||||||
if not os.path.exists(CONFIG['history_file']):
|
if not os.path.exists(CONFIG['history_file']):
|
||||||
return {}
|
return {}
|
||||||
|
|
||||||
# 直接解析 Excel,跳过描述行
|
# 使用智能加载函数
|
||||||
df = pd.read_excel(CONFIG['history_file'], header=None)
|
data_df = load_history_dataframe()
|
||||||
data_df = df.iloc[1:].copy()
|
|
||||||
data_df.columns = ['开奖日期', '期号', '红球', '开机号', '和值特征', '奇偶形态', '大小比', '奇偶形态2', '跨度', '其他']
|
|
||||||
|
|
||||||
# 解析红球和蓝球
|
# 解析红球和蓝球
|
||||||
red_ball_counts = Counter()
|
red_ball_counts = Counter()
|
||||||
@@ -207,7 +253,7 @@ def get_statistics_data(generator=None):
|
|||||||
span_values = []
|
span_values = []
|
||||||
|
|
||||||
for _, row in data_df.iterrows():
|
for _, row in data_df.iterrows():
|
||||||
s = str(row['红球']).strip()
|
s = str(row['号码']).strip()
|
||||||
if len(s) >= 14:
|
if len(s) >= 14:
|
||||||
reds = [int(s[i:i+2]) for i in range(0, 12, 2)]
|
reds = [int(s[i:i+2]) for i in range(0, 12, 2)]
|
||||||
blue = int(s[12:14])
|
blue = int(s[12:14])
|
||||||
@@ -232,7 +278,7 @@ def get_statistics_data(generator=None):
|
|||||||
odd_even_ratios = Counter()
|
odd_even_ratios = Counter()
|
||||||
size_ratios = Counter()
|
size_ratios = Counter()
|
||||||
for _, row in data_df.iterrows():
|
for _, row in data_df.iterrows():
|
||||||
oe = str(row['奇偶形态']).strip()
|
oe = str(row['奇偶比']).strip()
|
||||||
sz = str(row['大小比']).strip()
|
sz = str(row['大小比']).strip()
|
||||||
if oe and oe != 'nan':
|
if oe and oe != 'nan':
|
||||||
odd_even_ratios[oe] += 1
|
odd_even_ratios[oe] += 1
|
||||||
@@ -400,14 +446,11 @@ def api_history():
|
|||||||
|
|
||||||
import pandas as pd
|
import pandas as pd
|
||||||
import re
|
import re
|
||||||
df = pd.read_excel(CONFIG['history_file'], header=None)
|
|
||||||
|
|
||||||
# 第一行是描述行,跳过
|
# 使用智能加载函数
|
||||||
data_df = df.iloc[1:].copy()
|
data_df = load_history_dataframe()
|
||||||
data_df.columns = ['开奖日期', '期号', '红球', '开机号', '和值特征', '奇偶形态', '大小比', '奇偶形态2', '跨度', '其他']
|
|
||||||
data_df = data_df.reset_index(drop=True)
|
|
||||||
|
|
||||||
# 解析红球(红球列是6个红球+蓝球的拼接字符串,如 '09101316192108')
|
# 解析红球 (号码列是 6 红球+1 蓝球的拼接字符串,如 '09101316192108')
|
||||||
def parse_red_balls(val):
|
def parse_red_balls(val):
|
||||||
s = str(val).strip()
|
s = str(val).strip()
|
||||||
if len(s) >= 12:
|
if len(s) >= 12:
|
||||||
@@ -420,8 +463,8 @@ def api_history():
|
|||||||
return int(s[12:14])
|
return int(s[12:14])
|
||||||
return None
|
return None
|
||||||
|
|
||||||
data_df['红球列表'] = data_df['红球'].apply(parse_red_balls)
|
data_df['红球列表'] = data_df['号码'].apply(parse_red_balls)
|
||||||
data_df['蓝球'] = data_df['红球'].apply(parse_blue_ball)
|
data_df['蓝球'] = data_df['号码'].apply(parse_blue_ball)
|
||||||
|
|
||||||
# 搜索过滤
|
# 搜索过滤
|
||||||
if search:
|
if search:
|
||||||
@@ -442,13 +485,13 @@ def api_history():
|
|||||||
for _, row in page_df.iterrows():
|
for _, row in page_df.iterrows():
|
||||||
reds = row['红球列表']
|
reds = row['红球列表']
|
||||||
record = {
|
record = {
|
||||||
'开奖日期': str(row['开奖日期']),
|
'开奖日期': str(row['开奖时间']),
|
||||||
'期号': str(row['期号']),
|
'期号': str(row['期数']),
|
||||||
'红球': reds if len(reds) == 6 else [],
|
'红球': reds if len(reds) == 6 else [],
|
||||||
'蓝球': row['蓝球'],
|
'蓝球': row['蓝球'],
|
||||||
'开机号': str(row['开机号']),
|
'开机号': str(row['开机号']),
|
||||||
'和值': str(row['和值特征']),
|
'和值': str(row['和值特征']),
|
||||||
'奇偶形态': str(row['奇偶形态']),
|
'奇偶形态': str(row['奇偶比']),
|
||||||
'大小比': str(row['大小比']),
|
'大小比': str(row['大小比']),
|
||||||
'跨度': str(row['跨度']),
|
'跨度': str(row['跨度']),
|
||||||
}
|
}
|
||||||
@@ -524,18 +567,161 @@ def api_config():
|
|||||||
})
|
})
|
||||||
|
|
||||||
|
|
||||||
|
# ============================================================
|
||||||
|
# 数据抓取控制台(原 web_executor.py 功能整合)
|
||||||
|
# ============================================================
|
||||||
|
# 全局抓取状态
|
||||||
|
fetch_status = {
|
||||||
|
"is_running": False,
|
||||||
|
"last_update": None,
|
||||||
|
"last_record_count": 0,
|
||||||
|
"last_error": None
|
||||||
|
}
|
||||||
|
fetch_lock = threading.Lock()
|
||||||
|
|
||||||
|
def load_fetch_status():
|
||||||
|
"""从文件加载抓取状态"""
|
||||||
|
global fetch_status
|
||||||
|
if os.path.exists(CONFIG['fetch_status_file']):
|
||||||
|
try:
|
||||||
|
with open(CONFIG['fetch_status_file'], 'r', encoding='utf-8') as f:
|
||||||
|
saved = json.load(f)
|
||||||
|
with fetch_lock:
|
||||||
|
# 保留当前 is_running 状态(运行中不覆盖)
|
||||||
|
running = fetch_status.get('is_running', False)
|
||||||
|
fetch_status = saved
|
||||||
|
fetch_status['is_running'] = running
|
||||||
|
except (json.JSONDecodeError, IOError):
|
||||||
|
pass
|
||||||
|
|
||||||
|
def save_fetch_status():
|
||||||
|
"""保存抓取状态到文件"""
|
||||||
|
with fetch_lock:
|
||||||
|
with open(CONFIG['fetch_status_file'], 'w', encoding='utf-8') as f:
|
||||||
|
json.dump(fetch_status, f, ensure_ascii=False, indent=2)
|
||||||
|
|
||||||
|
@app.route('/fetch')
|
||||||
|
def fetch_console():
|
||||||
|
"""数据抓取控制台页面"""
|
||||||
|
return send_from_directory(BASE_DIR, 'web_console.html')
|
||||||
|
|
||||||
|
@app.route('/api/fetch/status')
|
||||||
|
def api_fetch_status():
|
||||||
|
"""获取抓取执行状态"""
|
||||||
|
with fetch_lock:
|
||||||
|
return jsonify({
|
||||||
|
"success": True,
|
||||||
|
"isRunning": fetch_status.get("is_running", False),
|
||||||
|
"lastUpdate": fetch_status.get("last_update"),
|
||||||
|
"recordCount": fetch_status.get("last_record_count", 0),
|
||||||
|
"lastError": fetch_status.get("last_error")
|
||||||
|
})
|
||||||
|
|
||||||
|
@app.route('/api/fetch/execute', methods=['POST'])
|
||||||
|
def api_fetch_execute():
|
||||||
|
"""触发数据抓取"""
|
||||||
|
global fetch_status
|
||||||
|
|
||||||
|
with fetch_lock:
|
||||||
|
if fetch_status.get("is_running", False):
|
||||||
|
return jsonify({
|
||||||
|
"success": False,
|
||||||
|
"error": "任务正在执行中,请稍后再试"
|
||||||
|
}), 409
|
||||||
|
|
||||||
|
# 启动后台执行线程
|
||||||
|
def run_fetch_script():
|
||||||
|
global fetch_status
|
||||||
|
|
||||||
|
with fetch_lock:
|
||||||
|
fetch_status["is_running"] = True
|
||||||
|
fetch_status["last_error"] = None
|
||||||
|
save_fetch_status()
|
||||||
|
|
||||||
|
try:
|
||||||
|
import subprocess
|
||||||
|
print(f"[{datetime.now()}] 开始执行抓取脚本...")
|
||||||
|
|
||||||
|
result = subprocess.run(
|
||||||
|
[sys.executable, CONFIG['fetch_script']],
|
||||||
|
capture_output=True,
|
||||||
|
text=True,
|
||||||
|
timeout=CONFIG['fetch_timeout']
|
||||||
|
)
|
||||||
|
|
||||||
|
if result.returncode == 0:
|
||||||
|
# 解析输出获取记录数
|
||||||
|
record_count = 0
|
||||||
|
for line in result.stdout.split('\n'):
|
||||||
|
if '共保存' in line and '条记录' in line:
|
||||||
|
try:
|
||||||
|
record_count = int(line.split('共保存')[1].split('条记录')[0].strip())
|
||||||
|
except ValueError:
|
||||||
|
pass
|
||||||
|
elif '成功解析' in line and '条数据' in line:
|
||||||
|
try:
|
||||||
|
record_count = int(line.split('成功解析')[1].split('条数据')[0].strip())
|
||||||
|
except ValueError:
|
||||||
|
pass
|
||||||
|
|
||||||
|
with fetch_lock:
|
||||||
|
fetch_status["last_update"] = datetime.now().isoformat()
|
||||||
|
fetch_status["last_record_count"] = record_count
|
||||||
|
fetch_status["is_running"] = False
|
||||||
|
save_fetch_status()
|
||||||
|
|
||||||
|
print(f"✅ 抓取成功,共 {record_count} 条数据")
|
||||||
|
|
||||||
|
else:
|
||||||
|
error_msg = result.stderr or f"脚本执行失败,返回码:{result.returncode}"
|
||||||
|
with fetch_lock:
|
||||||
|
fetch_status["last_error"] = error_msg
|
||||||
|
fetch_status["is_running"] = False
|
||||||
|
save_fetch_status()
|
||||||
|
print(f"❌ {error_msg}")
|
||||||
|
|
||||||
|
except subprocess.TimeoutExpired:
|
||||||
|
error_msg = f"脚本执行超时(超过 {CONFIG['fetch_timeout']} 秒)"
|
||||||
|
with fetch_lock:
|
||||||
|
fetch_status["last_error"] = error_msg
|
||||||
|
fetch_status["is_running"] = False
|
||||||
|
save_fetch_status()
|
||||||
|
print(f"❌ {error_msg}")
|
||||||
|
|
||||||
|
except Exception as e:
|
||||||
|
error_msg = f"执行异常:{str(e)}"
|
||||||
|
with fetch_lock:
|
||||||
|
fetch_status["last_error"] = error_msg
|
||||||
|
fetch_status["is_running"] = False
|
||||||
|
save_fetch_status()
|
||||||
|
print(f"❌ {error_msg}")
|
||||||
|
|
||||||
|
thread = threading.Thread(target=run_fetch_script, daemon=True)
|
||||||
|
thread.start()
|
||||||
|
|
||||||
|
return jsonify({
|
||||||
|
"success": True,
|
||||||
|
"message": "任务已启动,正在执行中..."
|
||||||
|
})
|
||||||
|
|
||||||
|
|
||||||
# ============================================================
|
# ============================================================
|
||||||
# 启动服务
|
# 启动服务
|
||||||
# ============================================================
|
# ============================================================
|
||||||
if __name__ == '__main__':
|
if __name__ == '__main__':
|
||||||
|
# 加载抓取状态
|
||||||
|
load_fetch_status()
|
||||||
|
|
||||||
print('=' * 60)
|
print('=' * 60)
|
||||||
print('🎯 双色球 Web UI 服务')
|
print('🎯 双色球 Web UI 服务(统一)')
|
||||||
print('=' * 60)
|
print('=' * 60)
|
||||||
print(f'\n📂 项目路径: {BASE_DIR}')
|
print(f'\n📂 项目路径: {BASE_DIR}')
|
||||||
print(f'📁 历史数据: {CONFIG["history_file"]}')
|
print(f'📁 历史数据: {CONFIG["history_file"]}')
|
||||||
print(f'📁 生成目录: {CONFIG["lottery_output_dir"]}')
|
print(f'📁 生成目录: {CONFIG["lottery_output_dir"]}')
|
||||||
|
print(f'📁 抓取脚本: {CONFIG["fetch_script"]}')
|
||||||
print(f'\n🌐 服务地址: http://{CONFIG["host"]}:{CONFIG["port"]}')
|
print(f'\n🌐 服务地址: http://{CONFIG["host"]}:{CONFIG["port"]}')
|
||||||
print(f' 局域网访问: http://<本机IP>:{CONFIG["port"]}')
|
print(f' 局域网访问: http://<本机IP>:{CONFIG["port"]}')
|
||||||
|
print(f' 抓取控制台: http://<本机IP>:{CONFIG["port"]}/fetch')
|
||||||
print(f'\n✅ 服务就绪!')
|
print(f'\n✅ 服务就绪!')
|
||||||
print('=' * 60)
|
print('=' * 60)
|
||||||
|
|
||||||
|
|||||||
@@ -0,0 +1,17 @@
|
|||||||
|
[Unit]
|
||||||
|
Description=双色球号码生成 Web 服务 (app.py :8085)
|
||||||
|
After=network.target
|
||||||
|
|
||||||
|
[Service]
|
||||||
|
Type=simple
|
||||||
|
User=vincent
|
||||||
|
WorkingDirectory=/home/vincent/Studio/lottoData
|
||||||
|
ExecStart=/home/vincent/Studio/lottoData/venv/bin/python3 /home/vincent/Studio/lottoData/app.py
|
||||||
|
ExecStartPre=/home/vincent/Studio/lottoData/venv/bin/python3 -c "import flask; import pandas; import openpyxl; import numpy"
|
||||||
|
Restart=on-failure
|
||||||
|
RestartSec=5
|
||||||
|
KillMode=control-group
|
||||||
|
Environment=PYTHONUNBUFFERED=1
|
||||||
|
|
||||||
|
[Install]
|
||||||
|
WantedBy=multi-user.target
|
||||||
+94
-50
@@ -7,6 +7,7 @@
|
|||||||
"""
|
"""
|
||||||
|
|
||||||
import requests
|
import requests
|
||||||
|
import time
|
||||||
from bs4 import BeautifulSoup
|
from bs4 import BeautifulSoup
|
||||||
import pandas as pd
|
import pandas as pd
|
||||||
from datetime import datetime
|
from datetime import datetime
|
||||||
@@ -27,77 +28,120 @@ HEADERS = {
|
|||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
# 请求间隔(秒),避免被封 IP
|
||||||
|
REQUEST_DELAY = 2
|
||||||
|
# 最大重试次数
|
||||||
|
MAX_RETRIES = 3
|
||||||
|
# 重试间隔(秒)
|
||||||
|
RETRY_DELAY = 5
|
||||||
|
|
||||||
|
|
||||||
def fetch_lottery_data():
|
def fetch_lottery_data():
|
||||||
"""抓取双色球历史数据"""
|
"""抓取双色球历史数据"""
|
||||||
print(f"[{datetime.now()}] 开始抓取数据...")
|
print(f"[{datetime.now()}] 开始抓取数据...")
|
||||||
|
|
||||||
try:
|
last_error = None
|
||||||
response = requests.get(URL, headers=HEADERS, timeout=30)
|
for attempt in range(1, MAX_RETRIES + 1):
|
||||||
response.raise_for_status()
|
try:
|
||||||
response.encoding = "utf-8"
|
# 请求前等待,避免频繁请求
|
||||||
|
if attempt > 1:
|
||||||
soup = BeautifulSoup(response.text, "html.parser")
|
print(f" 第 {attempt} 次重试,等待 {RETRY_DELAY} 秒...")
|
||||||
|
time.sleep(RETRY_DELAY)
|
||||||
# 查找表格数据
|
else:
|
||||||
table = soup.find("table")
|
time.sleep(REQUEST_DELAY)
|
||||||
if not table:
|
|
||||||
print("错误:未找到数据表格")
|
response = requests.get(URL, headers=HEADERS, timeout=30)
|
||||||
|
response.raise_for_status()
|
||||||
|
response.encoding = "utf-8"
|
||||||
|
|
||||||
|
soup = BeautifulSoup(response.text, "html.parser")
|
||||||
|
|
||||||
|
# 查找表格数据
|
||||||
|
table = soup.find("table")
|
||||||
|
if not table:
|
||||||
|
print("错误:未找到数据表格")
|
||||||
|
return None
|
||||||
|
|
||||||
|
# 解析表格
|
||||||
|
data_rows = []
|
||||||
|
rows = table.find_all("tr")
|
||||||
|
|
||||||
|
for row in rows:
|
||||||
|
cols = row.find_all(["td", "th"])
|
||||||
|
if len(cols) >= 8:
|
||||||
|
try:
|
||||||
|
row_data = [col.get_text(strip=True) for col in cols]
|
||||||
|
data_rows.append(row_data)
|
||||||
|
except Exception:
|
||||||
|
continue
|
||||||
|
|
||||||
|
if not data_rows:
|
||||||
|
print("错误:未解析到任何数据")
|
||||||
|
return None
|
||||||
|
|
||||||
|
print(f"成功解析 {len(data_rows)} 条数据")
|
||||||
|
return data_rows
|
||||||
|
|
||||||
|
except requests.exceptions.RequestException as e:
|
||||||
|
last_error = e
|
||||||
|
print(f"网络请求错误(第 {attempt} 次):{e}")
|
||||||
|
if attempt < MAX_RETRIES:
|
||||||
|
continue
|
||||||
return None
|
return None
|
||||||
|
except Exception as e:
|
||||||
# 解析表格
|
last_error = e
|
||||||
data_rows = []
|
print(f"解析错误(第 {attempt} 次):{e}")
|
||||||
rows = table.find_all("tr")
|
if attempt < MAX_RETRIES:
|
||||||
|
continue
|
||||||
for row in rows:
|
|
||||||
cols = row.find_all(["td", "th"])
|
|
||||||
if len(cols) >= 8:
|
|
||||||
try:
|
|
||||||
row_data = [col.get_text(strip=True) for col in cols]
|
|
||||||
data_rows.append(row_data)
|
|
||||||
except Exception as e:
|
|
||||||
continue
|
|
||||||
|
|
||||||
if not data_rows:
|
|
||||||
print("错误:未解析到任何数据")
|
|
||||||
return None
|
return None
|
||||||
|
|
||||||
print(f"成功解析 {len(data_rows)} 条数据")
|
print(f"抓取失败,共尝试 {MAX_RETRIES} 次,最后错误:{last_error}")
|
||||||
return data_rows
|
return None
|
||||||
|
|
||||||
except requests.exceptions.RequestException as e:
|
|
||||||
print(f"网络请求错误:{e}")
|
|
||||||
return None
|
|
||||||
except Exception as e:
|
|
||||||
print(f"解析错误:{e}")
|
|
||||||
return None
|
|
||||||
|
|
||||||
|
|
||||||
def save_to_excel(data_rows):
|
def save_to_excel(data_rows):
|
||||||
"""保存数据到 Excel 文件"""
|
"""保存数据到 Excel 文件
|
||||||
|
|
||||||
|
输出格式与 lottery.py 和 app.py 兼容:
|
||||||
|
- 列名: 开奖时间 | 期数 | 号码 | 开机号 | 和值特征 | 奇偶比 | 大小比 | 奇偶形态 | 跨度 | 其他
|
||||||
|
- 号码列为 6 红球 + 1 蓝球的拼接字符串 (如 '09101316192108')
|
||||||
|
- 第一行为列名,数据从第二行开始
|
||||||
|
"""
|
||||||
if not data_rows:
|
if not data_rows:
|
||||||
print("无数据可保存")
|
print("无数据可保存")
|
||||||
return False
|
return False
|
||||||
|
|
||||||
try:
|
try:
|
||||||
# 创建 DataFrame
|
# 跳过网页表格的 header 行 (第一行通常是中文标题)
|
||||||
|
# 检查第一行是否是 header (包含 '开奖时间' 或 '期数' 等关键词)
|
||||||
|
first_row = data_rows[0]
|
||||||
|
header_keywords = {'开奖时间', '期数', '号码', '开奖日期'}
|
||||||
|
if any(kw in str(first_row) for kw in header_keywords):
|
||||||
|
data_rows = data_rows[1:]
|
||||||
|
print(f"跳过 header 行,实际数据 {len(data_rows)} 条")
|
||||||
|
|
||||||
|
# 标准列名 (与 lottery.py 期望一致)
|
||||||
|
standard_columns = ['开奖时间', '期数', '号码', '开机号', '和值特征', '奇偶比', '大小比', '奇偶形态', '跨度', '其他']
|
||||||
|
|
||||||
|
# 统一每行列数
|
||||||
num_cols = min(len(row) for row in data_rows)
|
num_cols = min(len(row) for row in data_rows)
|
||||||
data_rows = [row[:num_cols] for row in data_rows]
|
data_rows = [row[:num_cols] for row in data_rows]
|
||||||
|
|
||||||
# 列名定义(最多 11 列)
|
# 使用标准列名 (截取或补全)
|
||||||
columns = ["期号", "开奖日期", "红球 1", "红球 2", "红球 3", "红球 4", "红球 5", "红球 6", "蓝球", "特别号", "奖池"]
|
if num_cols <= len(standard_columns):
|
||||||
|
actual_columns = standard_columns[:num_cols]
|
||||||
# 如果列数不匹配,使用通用列名
|
else:
|
||||||
actual_columns = columns[:num_cols] if num_cols <= len(columns) else [f"列{i+1}" for i in range(num_cols)]
|
actual_columns = standard_columns + [f'列{i+1}' for i in range(num_cols - len(standard_columns))]
|
||||||
|
|
||||||
df = pd.DataFrame(data_rows, columns=actual_columns)
|
df = pd.DataFrame(data_rows, columns=actual_columns)
|
||||||
|
|
||||||
# 保存为 Excel
|
# 保存为 Excel
|
||||||
df.to_excel(OUTPUT_FILE, index=False, engine="openpyxl")
|
df.to_excel(OUTPUT_FILE, index=False, engine="openpyxl")
|
||||||
|
|
||||||
print(f"[{datetime.now()}] 数据已保存到:{OUTPUT_FILE}")
|
print(f"[{datetime.now()}] 数据已保存到:{OUTPUT_FILE}")
|
||||||
print(f"共保存 {len(df)} 条记录")
|
print(f"共保存 {len(df)} 条记录")
|
||||||
return True
|
return True
|
||||||
|
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
print(f"保存 Excel 错误:{e}")
|
print(f"保存 Excel 错误:{e}")
|
||||||
return False
|
return False
|
||||||
|
|||||||
@@ -0,0 +1,6 @@
|
|||||||
|
flask>=3.0
|
||||||
|
pandas>=2.0
|
||||||
|
numpy>=1.24
|
||||||
|
openpyxl>=3.1
|
||||||
|
requests>=2.31
|
||||||
|
beautifulsoup4>=4.12
|
||||||
+5
-5
@@ -192,7 +192,7 @@
|
|||||||
<h3>📋 使用说明</h3>
|
<h3>📋 使用说明</h3>
|
||||||
<ul>
|
<ul>
|
||||||
<li>✅ 点击「立即执行」按钮抓取最新双色球历史数据</li>
|
<li>✅ 点击「立即执行」按钮抓取最新双色球历史数据</li>
|
||||||
<li>✅ 数据将保存到:<code>/Users/vincent/Studio/lottoData/双色球历史数据.xlsx</code></li>
|
<li>✅ 数据将保存到:<code>/home/vincent/Studio/lottoData/双色球历史数据.xlsx</code></li>
|
||||||
<li>✅ 定时任务:每天自动执行一次(通过系统 cron)</li>
|
<li>✅ 定时任务:每天自动执行一次(通过系统 cron)</li>
|
||||||
<li>✅ 实时监控:执行日志在此页面实时显示</li>
|
<li>✅ 实时监控:执行日志在此页面实时显示</li>
|
||||||
</ul>
|
</ul>
|
||||||
@@ -211,7 +211,7 @@
|
|||||||
</div>
|
</div>
|
||||||
|
|
||||||
<div class="last-update">
|
<div class="last-update">
|
||||||
脚本路径:<code>/Users/vincent/Studio/lottoData/fetch_data.py</code> |
|
脚本路径:<code>/home/vincent/Studio/lottoData/fetch_data.py</code> |
|
||||||
输出文件:<span id="outputFile">双色球历史数据.xlsx</span>
|
输出文件:<span id="outputFile">双色球历史数据.xlsx</span>
|
||||||
</div>
|
</div>
|
||||||
</div>
|
</div>
|
||||||
@@ -246,7 +246,7 @@
|
|||||||
updateStatus('running', '执行中');
|
updateStatus('running', '执行中');
|
||||||
|
|
||||||
try {
|
try {
|
||||||
const response = await fetch('/api/execute', {
|
const response = await fetch('/api/fetch/execute', {
|
||||||
method: 'POST',
|
method: 'POST',
|
||||||
headers: { 'Content-Type': 'application/json' },
|
headers: { 'Content-Type': 'application/json' },
|
||||||
});
|
});
|
||||||
@@ -263,7 +263,7 @@
|
|||||||
// 轮询状态直到完成
|
// 轮询状态直到完成
|
||||||
const pollInterval = setInterval(async () => {
|
const pollInterval = setInterval(async () => {
|
||||||
try {
|
try {
|
||||||
const statusResp = await fetch('/api/status');
|
const statusResp = await fetch('/api/fetch/status');
|
||||||
const status = await statusResp.json();
|
const status = await statusResp.json();
|
||||||
|
|
||||||
if (!status.isRunning) {
|
if (!status.isRunning) {
|
||||||
@@ -303,7 +303,7 @@
|
|||||||
|
|
||||||
async function loadStatus() {
|
async function loadStatus() {
|
||||||
try {
|
try {
|
||||||
const response = await fetch('/api/status');
|
const response = await fetch('/api/fetch/status');
|
||||||
if (response.ok) {
|
if (response.ok) {
|
||||||
const data = await response.json();
|
const data = await response.json();
|
||||||
if (data.lastUpdate) {
|
if (data.lastUpdate) {
|
||||||
|
|||||||
Reference in New Issue
Block a user