fix: 修复代码审查反馈全部问题
审查意见修复清单:
P1 列映射语义修复 (lottery.py):
- _normalize_history_format() 不再将红球2-6映射到开机号/和值特征/奇偶比等
格式A不含这些特征字段,缺失列留空,前端做降级显示
- 删除已用于构建号码列的原始分列,避免数据重复
P2 架构优化:
- 提取 Excel 兼容逻辑到公共模块 history_loader.py
lottery.py 和 app.py 共同引用,消除三处重复代码
- web_executor.py 标记为已废弃,功能已整合到 app.py
部署修复:
- 删除 deploy/lotto-web.service (旧服务),仅保留 lotto-app.service
- 更新 deploy/DEPLOY.md: 端口5000→8085, 接口清单更新, 添加迁移说明
安全加固:
- API Token 改为环境变量读取: os.environ.get('LOTTO_API_TOKEN')
- 错误信息不再暴露内部异常,改为通用错误消息+日志记录
- 目录遍历防护改用 os.path.realpath 检查最终路径
其他:
- .gitignore 补充排除 双色球历史数据.xlsx
- app.py 引用公共模块,简化 get_statistics_data 和 load_history_dataframe
测试验证: 全部 API 测试通过,120条历史数据正确解析
Issue: BIZ-75
This commit is contained in:
@@ -0,0 +1,206 @@
|
||||
#!/usr/bin/env python3
|
||||
# -*- coding: utf-8 -*-
|
||||
"""
|
||||
history_loader.py — 双色球历史数据 Excel 公共加载模块
|
||||
|
||||
统一 Excel 格式检测和列名标准化逻辑,供 lottery.py 和 app.py 共同引用。
|
||||
支持三种格式:
|
||||
格式A: Row0=新列名(期号|开奖日期|红球1~6|蓝球|特别号), Row1=旧列名(开奖时间|期数|号码|...), Row2+=数据
|
||||
格式B: Row0=标准列名(开奖时间|期数|号码|开机号|...), Row1+=数据
|
||||
格式C: 直接含"号码"列的标准 DataFrame
|
||||
"""
|
||||
|
||||
import pandas as pd
|
||||
import os
|
||||
from collections import Counter
|
||||
import re
|
||||
|
||||
# 标准列名(lottery.py 和 app.py 期望的列)
|
||||
LEGACY_COLUMNS = ['开奖时间', '期数', '号码', '开机号', '和值特征', '奇偶比', '大小比', '奇偶形态', '跨度', '其他']
|
||||
|
||||
|
||||
def load_history_dataframe(history_file):
|
||||
"""智能加载历史数据 Excel,兼容多种格式。
|
||||
|
||||
返回统一的 DataFrame,使用 LEGACY_COLUMNS 列名。
|
||||
"""
|
||||
if not os.path.exists(history_file):
|
||||
return pd.DataFrame()
|
||||
|
||||
raw_df = pd.read_excel(history_file, header=None)
|
||||
|
||||
if raw_df.empty:
|
||||
return pd.DataFrame()
|
||||
|
||||
row0_vals = raw_df.iloc[0].astype(str).tolist() if len(raw_df) > 0 else []
|
||||
row1_vals = raw_df.iloc[1].astype(str).tolist() if len(raw_df) > 1 else []
|
||||
|
||||
has_legacy_in_row0 = any(col in row0_vals for col in ['开奖时间', '期数', '号码'])
|
||||
has_legacy_in_row1 = any(col in row1_vals for col in ['开奖时间', '期数', '号码'])
|
||||
has_new_cols_in_row0 = any(col in row0_vals for col in ['期号', '开奖日期', '红球 1'])
|
||||
|
||||
if has_new_cols_in_row0 and has_legacy_in_row1:
|
||||
# 格式A:跳过 Row0(新列名) 和 Row1(旧列名),用旧列名,数据从 Row2 开始
|
||||
data_df = raw_df.iloc[2:].copy()
|
||||
num_cols = len(data_df.columns)
|
||||
data_df.columns = LEGACY_COLUMNS[:min(num_cols, len(LEGACY_COLUMNS))] + \
|
||||
[f'col_{i}' for i in range(min(num_cols, len(LEGACY_COLUMNS)), num_cols)]
|
||||
elif has_legacy_in_row0:
|
||||
# 格式B:Row0 就是标准列名
|
||||
data_df = raw_df.iloc[1:].copy()
|
||||
num_cols = len(data_df.columns)
|
||||
data_df.columns = LEGACY_COLUMNS[:min(num_cols, len(LEGACY_COLUMNS))] + \
|
||||
[f'col_{i}' for i in range(min(num_cols, len(LEGACY_COLUMNS)), num_cols)]
|
||||
else:
|
||||
# 格式C:尝试默认读取
|
||||
data_df = pd.read_excel(history_file)
|
||||
if '号码' not in data_df.columns:
|
||||
# 可能是分列格式,尝试构建号码列
|
||||
red_cols = [f'红球 {i}' for i in range(1, 7)]
|
||||
if not all(c in data_df.columns for c in red_cols):
|
||||
red_cols = [f'红球{i}' for i in range(1, 7)]
|
||||
if all(c in data_df.columns for c in red_cols) and '蓝球' in data_df.columns:
|
||||
data_df['号码'] = data_df.apply(
|
||||
lambda row: _build_number_string(row, red_cols), axis=1)
|
||||
|
||||
num_cols = len(data_df.columns)
|
||||
if not any(c in data_df.columns for c in LEGACY_COLUMNS[:3]):
|
||||
data_df.columns = LEGACY_COLUMNS[:min(num_cols, len(LEGACY_COLUMNS))] + \
|
||||
[f'col_{i}' for i in range(min(num_cols, len(LEGACY_COLUMNS)), num_cols)]
|
||||
|
||||
data_df = data_df.reset_index(drop=True)
|
||||
return data_df
|
||||
|
||||
|
||||
def _build_number_string(row, red_cols):
|
||||
"""将分列红球 + 蓝球拼接为 14 位号码字符串。"""
|
||||
parts = []
|
||||
for c in red_cols:
|
||||
val = row.get(c)
|
||||
if pd.isna(val):
|
||||
return None
|
||||
s = str(int(val)) if isinstance(val, (int, float)) else str(val).strip()
|
||||
parts.append(s.zfill(2))
|
||||
blue_val = row.get('蓝球')
|
||||
if pd.isna(blue_val):
|
||||
return None
|
||||
blue_s = str(int(blue_val)) if isinstance(blue_val, (int, float)) else str(blue_val).strip()
|
||||
return ''.join(parts) + blue_s.zfill(2)
|
||||
|
||||
|
||||
def parse_number_string(numbers_str):
|
||||
"""解析号码字符串为 (红球列表, 蓝球)。
|
||||
|
||||
支持以下格式:
|
||||
- 拼接字符串: '08121821243001' (6红球×2位 + 1蓝球×2位)
|
||||
- 加号分隔: '03,12,16,22,25,28+10'
|
||||
- 空格/逗号分隔: '08 12 18 21 24 30 01'
|
||||
"""
|
||||
if not numbers_str or pd.isna(numbers_str):
|
||||
return [], 0
|
||||
|
||||
s = str(numbers_str).strip()
|
||||
|
||||
# 情况1: 纯拼接字符串(14位或以上,无分隔符)
|
||||
if re.match(r'^\d{14,}$', s):
|
||||
red_balls = [int(s[i:i+2]) for i in range(0, 12, 2)]
|
||||
blue_ball = int(s[12:14])
|
||||
if all(1 <= b <= 33 for b in red_balls) and 1 <= blue_ball <= 16:
|
||||
return red_balls, blue_ball
|
||||
return [], 0
|
||||
|
||||
# 情况2: 加号分隔
|
||||
if '+' in s:
|
||||
parts = s.replace(',', ' ').replace('+', ' ').split()
|
||||
if len(parts) >= 7:
|
||||
try:
|
||||
red_balls = [int(x) for x in parts[:6]]
|
||||
blue_ball = int(parts[6])
|
||||
if all(1 <= b <= 33 for b in red_balls) and 1 <= blue_ball <= 16:
|
||||
return red_balls, blue_ball
|
||||
except ValueError:
|
||||
pass
|
||||
return [], 0
|
||||
|
||||
# 情况3: 正则提取数字
|
||||
number_list = re.findall(r'\d+', s)
|
||||
if len(number_list) >= 7:
|
||||
try:
|
||||
red_balls = [int(x) for x in number_list[:6]]
|
||||
blue_ball = int(number_list[6])
|
||||
if all(1 <= b <= 33 for b in red_balls) and 1 <= blue_ball <= 16:
|
||||
return red_balls, blue_ball
|
||||
except ValueError:
|
||||
pass
|
||||
|
||||
return [], 0
|
||||
|
||||
|
||||
def compute_statistics(history_file):
|
||||
"""从历史数据 Excel 计算统计信息,返回字典。"""
|
||||
if not os.path.exists(history_file):
|
||||
return {}
|
||||
|
||||
data_df = load_history_dataframe(history_file)
|
||||
|
||||
red_ball_counts = Counter()
|
||||
blue_ball_counts = Counter()
|
||||
sum_values = []
|
||||
span_values = []
|
||||
|
||||
for _, row in data_df.iterrows():
|
||||
s = str(row.get('号码', '')).strip()
|
||||
if len(s) >= 14:
|
||||
reds = [int(s[i:i+2]) for i in range(0, 12, 2)]
|
||||
blue = int(s[12:14])
|
||||
if all(1 <= r <= 33 for r in reds) and 1 <= blue <= 16:
|
||||
red_ball_counts.update(reds)
|
||||
blue_ball_counts[blue] += 1
|
||||
sum_values.append(sum(reds))
|
||||
span_values.append(max(reds) - min(reds))
|
||||
|
||||
stats = {}
|
||||
|
||||
if red_ball_counts:
|
||||
sorted_reds = sorted(red_ball_counts.items(), key=lambda x: x[1], reverse=True)
|
||||
stats['hot_reds'] = [x[0] for x in sorted_reds[:15]]
|
||||
stats['cold_reds'] = [x[0] for x in sorted_reds[-15:]]
|
||||
|
||||
if blue_ball_counts:
|
||||
sorted_blues = sorted(blue_ball_counts.items(), key=lambda x: x[1], reverse=True)
|
||||
stats['hot_blues'] = [x[0] for x in sorted_blues[:8]]
|
||||
|
||||
# 奇偶比/大小比统计
|
||||
odd_even_ratios = Counter()
|
||||
size_ratios = Counter()
|
||||
for _, row in data_df.iterrows():
|
||||
oe = str(row.get('奇偶比', '')).strip()
|
||||
sz = str(row.get('大小比', '')).strip()
|
||||
if oe and oe != 'nan':
|
||||
odd_even_ratios[oe] += 1
|
||||
if sz and sz != 'nan':
|
||||
size_ratios[sz] += 1
|
||||
|
||||
if odd_even_ratios:
|
||||
stats['common_odd_even'] = max(odd_even_ratios, key=odd_even_ratios.get)
|
||||
if size_ratios:
|
||||
stats['common_size_ratio'] = max(size_ratios, key=size_ratios.get)
|
||||
|
||||
if sum_values:
|
||||
import numpy as np
|
||||
arr = np.array(sum_values)
|
||||
stats['sum_range'] = {
|
||||
'min': int(arr.min()), 'max': int(arr.max()),
|
||||
'mean': float(arr.mean()), 'std': float(arr.std())
|
||||
}
|
||||
|
||||
if span_values:
|
||||
import numpy as np
|
||||
arr = np.array(span_values)
|
||||
stats['span_range'] = {
|
||||
'min': int(arr.min()), 'max': int(arr.max()),
|
||||
'mean': float(arr.mean()), 'std': float(arr.std())
|
||||
}
|
||||
|
||||
stats['history_count'] = len(data_df)
|
||||
return stats
|
||||
Reference in New Issue
Block a user