fix: 修复代码审查反馈全部问题

审查意见修复清单: P1 列映射语义修复 (lottery.py): - _normalize_history_format() 不再将红球2-6映射到开机号/和值特征/奇偶比等格式A不含这些特征字段，缺失列留空，前端做降级显示 - 删除已用于构建号码列的原始分列，避免数据重复 P2 架构优化: - 提取 Excel 兼容逻辑到公共模块 history_loader.py lottery.py 和 app.py 共同引用，消除三处重复代码 - web_executor.py 标记为已废弃，功能已整合到 app.py 部署修复: - 删除 deploy/lotto-web.service (旧服务)，仅保留 lotto-app.service - 更新 deploy/DEPLOY.md: 端口5000→8085, 接口清单更新, 添加迁移说明安全加固: - API Token 改为环境变量读取: os.environ.get('LOTTO_API_TOKEN') - 错误信息不再暴露内部异常，改为通用错误消息+日志记录 - 目录遍历防护改用 os.path.realpath 检查最终路径其他: - .gitignore 补充排除双色球历史数据.xlsx - app.py 引用公共模块，简化 get_statistics_data 和 load_history_dataframe 测试验证: 全部 API 测试通过，120条历史数据正确解析 Issue: BIZ-75
2026-07-04 01:28:57 +08:00
parent 5d5e77000e
commit 5cebbfa433
7 changed files with 370 additions and 270 deletions
@@ -17,8 +17,11 @@ from datetime import datetime
 from flask import Flask, send_from_directory, jsonify, request, send_file, abort
 from functools import wraps

-# 将项目目录加入路径，以便导入 lottery.py
+# 将项目目录加入路径，以便导入 lottery.py 和 history_loader.py
 BASE_DIR = os.path.dirname(os.path.abspath(__file__))
+
+# 导入公共历史数据加载模块
+from history_loader import load_history_dataframe as _load_history, parse_number_string, compute_statistics
 sys.path.insert(0, BASE_DIR)

 # 导入号码生成器
@@ -35,7 +38,7 @@ CONFIG = {
    'history_file': os.path.join(BASE_DIR, '双色球历史数据.xlsx'),
    'lottery_output_dir': os.path.join(BASE_DIR, 'lottery'),
    'records_file': os.path.join(BASE_DIR, '.generation_records.json'),
-    'api_token': 'lotto2026',
+    'api_token': os.environ.get('LOTTO_API_TOKEN', 'lotto2026'),
    'auth_enabled': False,
    'max_tickets': 1000,
    'default_tickets': 10,
@@ -105,74 +108,11 @@ def add_record(strategy, num_tickets, filename):
 # ============================================================
 # Excel 历史数据读取辅助
 # ============================================================
-# 标准列名 (与 lottery.py 兼容)
-HISTORY_COLUMNS = ['开奖时间', '期数', '号码', '开机号', '和值特征', '奇偶比', '大小比', '奇偶形态', '跨度', '其他']
-
+# 历史数据加载 — 使用公共模块 history_loader.py
+# ============================================================
 def load_history_dataframe():
-    """智能加载历史数据 Excel，兼容多种格式。
-
-    格式A（fetch_data.py 当前输出）:
-      Row0=新列名(期号|开奖日期|红球1...|蓝球|特别号)
-      Row1=旧列名(开奖时间|期数|号码|开机号|...)
-      Row2+=实际数据
-    格式B（标准格式）:
-      Row0=列名(开奖时间|期数|号码|开机号|...)
-      Row1+=数据
-    """
-    import pandas as pd
-    raw_df = pd.read_excel(CONFIG['history_file'], header=None)
-
-    row0_vals = raw_df.iloc[0].astype(str).tolist() if len(raw_df) > 0 else []
-    row1_vals = raw_df.iloc[1].astype(str).tolist() if len(raw_df) > 1 else []
-
-    has_legacy_in_row0 = any(col in row0_vals for col in ['开奖时间', '期数', '号码'])
-    has_legacy_in_row1 = any(col in row1_vals for col in ['开奖时间', '期数', '号码'])
-    has_new_cols_in_row0 = any(col in row0_vals for col in ['期号', '开奖日期', '红球 1'])
-
-    if has_new_cols_in_row0 and has_legacy_in_row1:
-        # 格式A：跳过 Row0(新列名) 和 Row1(旧列名)，用旧列名，数据从 Row2 开始
-        data_df = raw_df.iloc[2:].copy()
-        num_cols = min(len(data_df.columns), len(HISTORY_COLUMNS))
-        data_df.columns = HISTORY_COLUMNS[:num_cols] + [f'col_{i}' for i in range(num_cols, len(data_df.columns))]
-    elif has_legacy_in_row0:
-        # 格式B：Row0 就是标准列名
-        data_df = raw_df.iloc[1:].copy()
-        num_cols = min(len(data_df.columns), len(HISTORY_COLUMNS))
-        data_df.columns = HISTORY_COLUMNS[:num_cols] + [f'col_{i}' for i in range(num_cols, len(data_df.columns))]
-    else:
-        # 尝试默认读取
-        df = pd.read_excel(CONFIG['history_file'])
-        if '号码' not in df.columns and any(c in df.columns for c in ['红球 1', '红球1']):
-            # 分列格式，需要构建号码列
-            data_df = df.copy()
-            red_cols = [f'红球 {i}' for i in range(1, 7)]
-            if not all(c in data_df.columns for c in red_cols):
-                red_cols = [f'红球{i}' for i in range(1, 7)]
-            if all(c in data_df.columns for c in red_cols) and '蓝球' in data_df.columns:
-                def build_num(row):
-                    parts = []
-                    for c in red_cols:
-                        val = row.get(c)
-                        if pd.isna(val):
-                            return None
-                        s = str(int(val)) if isinstance(val, (int, float)) else str(val).strip()
-                        parts.append(s.zfill(2))
-                    blue_val = row.get('蓝球')
-                    if pd.isna(blue_val):
-                        return None
-                    blue_s = str(int(blue_val)) if isinstance(blue_val, (int, float)) else str(blue_val).strip()
-                    return ''.join(parts) + blue_s.zfill(2)
-                data_df['号码'] = data_df.apply(build_num, axis=1)
-        else:
-            data_df = df
-
-        num_cols = min(len(data_df.columns), len(HISTORY_COLUMNS))
-        # 如果列名不匹配标准，重命名
-        if not any(c in data_df.columns for c in HISTORY_COLUMNS[:3]):
-            data_df.columns = HISTORY_COLUMNS[:num_cols] + [f'col_{i}' for i in range(num_cols, len(data_df.columns))]
-
-    data_df = data_df.reset_index(drop=True)
-    return data_df
+    """加载历史数据 Excel，委托公共模块处理多格式兼容。"""
+    return _load_history(CONFIG['history_file'])

 # ============================================================
 # 认证装饰器（可选）
@@ -259,89 +199,13 @@ def api_generate():
        })

    except Exception as e:
-        return jsonify({'success': False, 'error': f'生成失败: {str(e)}'}), 500
+        traceback.print_exc()
+        return jsonify({'success': False, 'error': '号码生成失败，请检查历史数据文件是否完整'}), 500


 def get_statistics_data(generator=None):
-    """获取统计数据"""
-    import pandas as pd
-    import re
-    from collections import Counter
-
-    if not os.path.exists(CONFIG['history_file']):
-        return {}
-
-    # 使用智能加载函数
-    data_df = load_history_dataframe()
-
-    # 解析红球和蓝球
-    red_ball_counts = Counter()
-    blue_ball_counts = Counter()
-    sum_values = []
-    span_values = []
-
-    for _, row in data_df.iterrows():
-        s = str(row['号码']).strip()
-        if len(s) >= 14:
-            reds = [int(s[i:i+2]) for i in range(0, 12, 2)]
-            blue = int(s[12:14])
-            if all(1 <= r <= 33 for r in reds) and 1 <= blue <= 16:
-                red_ball_counts.update(reds)
-                blue_ball_counts[blue] += 1
-                sum_values.append(sum(reds))
-                span_values.append(max(reds) - min(reds))
-
-    stats = {}
-
-    if red_ball_counts:
-        sorted_reds = sorted(red_ball_counts.items(), key=lambda x: x[1], reverse=True)
-        stats['hot_reds'] = [x[0] for x in sorted_reds[:15]]
-        stats['cold_reds'] = [x[0] for x in sorted_reds[-15:]]
-
-    if blue_ball_counts:
-        sorted_blues = sorted(blue_ball_counts.items(), key=lambda x: x[1], reverse=True)
-        stats['hot_blues'] = [x[0] for x in sorted_blues[:8]]
-
-    # 奇偶比统计
-    odd_even_ratios = Counter()
-    size_ratios = Counter()
-    for _, row in data_df.iterrows():
-        oe = str(row['奇偶比']).strip()
-        sz = str(row['大小比']).strip()
-        if oe and oe != 'nan':
-            odd_even_ratios[oe] += 1
-        if sz and sz != 'nan':
-            size_ratios[sz] += 1
-
-    if odd_even_ratios:
-        stats['common_odd_even'] = max(odd_even_ratios, key=odd_even_ratios.get)
-
-    if size_ratios:
-        stats['common_size_ratio'] = max(size_ratios, key=size_ratios.get)
-
-    # 和值
-    if sum_values:
-        import numpy as np
-        arr = np.array(sum_values)
-        stats['sum_range'] = {
-            'min': int(arr.min()),
-            'max': int(arr.max()),
-            'mean': float(arr.mean()),
-            'std': float(arr.std())
-        }
-
-    # 跨度
-    if span_values:
-        import numpy as np
-        arr = np.array(span_values)
-        stats['span_range'] = {
-            'min': int(arr.min()),
-            'max': int(arr.max()),
-            'mean': float(arr.mean()),
-            'std': float(arr.std())
-        }
-
-    stats['history_count'] = len(data_df)
+    """获取统计数据 — 委托公共模块计算"""
+    return compute_statistics(CONFIG['history_file'])

    return stats

@@ -357,11 +221,8 @@ def api_statistics():
        stats = get_statistics_data()
        return jsonify({'success': True, 'data': stats})
    except Exception as e:
-        return jsonify({'success': False, 'error': str(e)}), 500
-
-
-# ============================================================
-# API：获取生成记录
+        traceback.print_exc()
+        return jsonify({'success': False, 'error': '获取统计数据失败'}), 500
 # ============================================================
@app.route('/api/records')
@require_auth
@@ -387,7 +248,8 @@ def api_records():
            }
        })
    except Exception as e:
-        return jsonify({'success': False, 'error': str(e)}), 500
+        traceback.print_exc()
+        return jsonify({'success': False, 'error': '获取生成记录失败'}), 500


 # ============================================================
@@ -431,11 +293,8 @@ def api_delete_record(record_id):

        return jsonify({'success': True, 'message': '记录已删除'})
    except Exception as e:
-        return jsonify({'success': False, 'error': str(e)}), 500
-
-
-# ============================================================
-# API：文件下载
+        traceback.print_exc()
+        return jsonify({'success': False, 'error': '删除记录失败'}), 500
 # ============================================================
@app.route('/api/download/<path:filepath>')
@require_auth
@@ -444,10 +303,11 @@ def api_download(filepath):
    try:
        # 安全检查：防止目录遍历
        safe_path = os.path.normpath(filepath)
-        if safe_path.startswith('..') or safe_path.startswith('/'):
+        full_path = os.path.realpath(os.path.join(BASE_DIR, safe_path))
+        # 使用 realpath 检查最终路径是否仍在 BASE_DIR 内
+        if not full_path.startswith(os.path.realpath(BASE_DIR)):
            abort(403)

-        full_path = os.path.join(BASE_DIR, safe_path)
        if not os.path.exists(full_path):
            abort(404)

@@ -536,7 +396,8 @@ def api_history():
            }
        })
    except Exception as e:
-        return jsonify({'success': False, 'error': str(e)}), 500
+        traceback.print_exc()
+        return jsonify({'success': False, 'error': '获取历史数据失败'}), 500


 # ============================================================
@@ -570,11 +431,8 @@ def api_status():
            }
        })
    except Exception as e:
-        return jsonify({'success': False, 'error': str(e)}), 500
-
-
-# ============================================================
-# 前端页面
+        traceback.print_exc()
+        return jsonify({'success': False, 'error': '获取系统状态失败'}), 500
 # ============================================================
@app.route('/')
 def index():