fix(BIZ-42): 综合评审优化 — 12项修复

四轮评审反馈全部处理： 🔴 Critical (5): - _stats data race: 新增 _stats_lock (asyncio.Lock) + _increment_stat() helper - Admin API 无认证: 新增 SIDECAR_ADMIN_TOKEN Bearer Token 认证 - API Key 明文暴露: GET config 返回 masked api_key (前4位+****) - queue_max_size hot-reload 假生效: PriorityQueue.set_max_size() + 收缩保护 - SIDECAR_TIMEOUT 6000→60s + 上限截断 300s 🟠 Major (3): - upstream_api_key 启动检查: lifespan 阶段 warning 日志 - Dashboard HTML 无缓存: 300s TTL 内存缓存 - queue_stats 异常日志: logger.warning(queue_stats_unavailable) 🟡 Medium (3): - CORS middleware 配置 - httpx 连接池限制 (max_connections=100, keepalive=20) - SSE retry: 3000 字段 🟢 Minor (1): - _extract_model 类型注解 body: dict→Any - passthrough 硬编码 30s→_config.request_timeout mypy strict: 5 files, zero errors Reviewed-by: 梁思筑, 严维序, 陆怀瑾, 沈路明 Co-authored-by: multica-agent <github@multica.ai>
2026-06-24 13:26:56 +08:00
parent ba5b932f50
commit c50dcc9cb2
4 changed files with 169 additions and 31 deletions
@@ -20,6 +20,7 @@ import httpx
 import structlog
 import uvicorn
 from fastapi import FastAPI, Request, Response
+from fastapi.middleware.cors import CORSMiddleware
 from fastapi.responses import JSONResponse, StreamingResponse

 from nvidia_sidecar.config import load_config, SidecarConfig
@@ -76,7 +77,7 @@ _pending_requests: dict[str, tuple[asyncio.Future[httpx.Response], float]]
 """request_id → (response future, enqueued_at) 的映射。"""
 _metrics_task: asyncio.Task[None] | None = None

-# 统计计数器
+# 统计计数器（受 _stats_lock 保护, 修复梁思筑评审 #1: data race）
 _stats: dict[str, int] = {
    "total_requests": 0,
    "nvidia_requests": 0,
@@ -86,13 +87,20 @@ _stats: dict[str, int] = {
    "upstream_errors": 0,
    "start_time": 0,
 }
+_stats_lock: asyncio.Lock = asyncio.Lock()


 # ---------------------------------------------------------------------------
 # 工具函数
 # ---------------------------------------------------------------------------

-def _extract_model(body: dict[str, Any]) -> str | None:
+async def _increment_stat(key: str, delta: int = 1) -> None:
+    """线程安全的 _stats 计数器自增（梁思筑评审 #1 修复：消除 data race）。"""
+    async with _stats_lock:
+        _stats[key] = _stats.get(key, 0) + delta
+
+
+def _extract_model(body: Any) -> str | None:
    """从请求体中提取模型标识符（兼容 OpenAI Chat/Completions 格式）。

    Args:
@@ -213,7 +221,7 @@ async def _worker_loop() -> None:
                )
                if not got_token:
                    log.info("low_priority_timeout", request_id=request_id)
-                    _stats["ratelimited_requests"] += 1
+                    await _increment_stat("ratelimited_requests")
                    _prometheus.record_request(queue_item.priority.name, "ratelimited")
                    if not future.done():
                        future.set_exception(
@@ -241,7 +249,7 @@ async def _worker_loop() -> None:
                            priority=queue_item.priority.name,
                            timeout=_config.request_timeout,
                        )
-                        _stats["ratelimited_requests"] += 1
+                        await _increment_stat("ratelimited_requests")
                        _prometheus.record_request(queue_item.priority.name, "ratelimited")
                        if not future.done():
                            future.set_exception(
@@ -309,7 +317,7 @@ async def _worker_loop() -> None:

            except (httpx.HTTPError, OSError) as exc:
                log.error("upstream_request_failed", request_id=request_id, error=str(exc))
-                _stats["upstream_errors"] += 1
+                await _increment_stat("upstream_errors")
                _prometheus.record_request(queue_item.priority.name, "error")
                _prometheus.set_health(False)
                if not future.done():
@@ -347,7 +355,7 @@ async def _passthrough_with_rate_limit(
    Returns:
        FastAPI Response。
    """
-    _stats["passthrough_requests"] += 1
+    await _increment_stat("passthrough_requests")
    _prometheus.increment_fallback()

    # 低优先级走令牌桶等待
@@ -358,7 +366,7 @@ async def _passthrough_with_rate_limit(
            timeout=_config.low_priority_timeout,
        )
        if not got_token:
-            _stats["ratelimited_requests"] += 1
+            await _increment_stat("ratelimited_requests")
            _prometheus.record_request(priority.name, "ratelimited")
            return JSONResponse(
                status_code=429,
@@ -372,19 +380,20 @@ async def _passthrough_with_rate_limit(
    else:
        got_token = await asyncio.to_thread(_token_bucket.consume, tokens=1)
        if not got_token:
-            # 非低优先级轮询等待
-            deadline = time.monotonic() + 30.0
+            # 非低优先级轮询等待，使用 config.request_timeout 替代硬编码 30s
+            # （严维序评审 minor / 梁思筑评审 #3：hot-reload 假生效修复）
+            deadline = time.monotonic() + _config.request_timeout
            while not got_token:
                await asyncio.sleep(0.1)
                got_token = await asyncio.to_thread(_token_bucket.consume, tokens=1)
                if time.monotonic() > deadline:
-                    _stats["ratelimited_requests"] += 1
+                    await _increment_stat("ratelimited_requests")
                    _prometheus.record_request(priority.name, "ratelimited")
                    return JSONResponse(
                        status_code=429,
                        content={
                            "error": {
-                                "message": "令牌不足（队列满 + passthrough），等待超时 30s",
+                                "message": f"令牌不足（队列满 + passthrough），等待超时 {_config.request_timeout:.0f}s",
                                "type": "RateLimitedError",
                            }
                        },
@@ -464,6 +473,10 @@ async def lifespan(app: FastAPI) -> AsyncGenerator[None, Any]:

    _http_client = httpx.AsyncClient(
        timeout=httpx.Timeout(_config.request_timeout),
+        limits=httpx.Limits(
+            max_connections=100,
+            max_keepalive_connections=20,
+        ),
    )
    _priority_queue = PriorityRequestQueue(max_size=_config.queue_max_size)
    _token_bucket = AdaptiveTokenBucket(
@@ -489,9 +502,25 @@ async def lifespan(app: FastAPI) -> AsyncGenerator[None, Any]:
    metrics_server = uvicorn.Server(metrics_config)
    _metrics_task = asyncio.create_task(metrics_server.serve())

+    # CORS 中间件（严维序评审 #8）
+    app.add_middleware(
+        CORSMiddleware,
+        allow_origins=["*"],
+        allow_credentials=False,
+        allow_methods=["*"],
+        allow_headers=["*"],
+    )
+
    # 挂载 webui 子路由
    app.include_router(webui_router)

+    # upstream_api_key 启动检查（严维序评审 #5）
+    if not _config.upstream_api_key:
+        logger.warning(
+            "upstream_api_key_empty",
+            message="SIDECAR_API_KEY 未设置，NVIDIA 请求将因 401 认证失败",
+        )
+
    logger.info(
        "sidecar_started",
        host=_config.listen_host,
@@ -541,7 +570,7 @@ async def _handle_proxy_request(request: Request, path: str) -> Response:
    2. 网关识别 → 非 NVIDIA 直通
    3. NVIDIA → 排队 + 令牌限流 + 转发
    """
-    _stats["total_requests"] += 1
+    await _increment_stat("total_requests")

    # 解析请求
    body_bytes: bytes = await request.body()
@@ -561,7 +590,7 @@ async def _handle_proxy_request(request: Request, path: str) -> Response:

    # 非 NVIDIA → 直接转发
    if not is_nvidia:
-        _stats["passthrough_requests"] += 1
+        await _increment_stat("passthrough_requests")
        try:
            resp = await _forward_to_upstream(
                method=request.method,
@@ -580,7 +609,7 @@ async def _handle_proxy_request(request: Request, path: str) -> Response:
            )

    # NVIDIA → 排队 + 限流 + 转发
-    _stats["nvidia_requests"] += 1
+    await _increment_stat("nvidia_requests")
    priority: Priority = _resolve_priority(raw_headers)

    # 注入内部元数据到 payload
@@ -599,7 +628,7 @@ async def _handle_proxy_request(request: Request, path: str) -> Response:
            },
        )
    except QueueFullError:
-        _stats["queue_full_rejects"] += 1
+        await _increment_stat("queue_full_rejects")
        return JSONResponse(
            status_code=503,
            content={
@@ -611,7 +640,7 @@ async def _handle_proxy_request(request: Request, path: str) -> Response:
        )
    except QueueFullPassthrough:
        # 队列满 + PASSTHROUGH：绕过排队，尝试令牌桶后直接转发
-        _stats["passthrough_requests"] += 1
+        await _increment_stat("passthrough_requests")
        logger.info("queue_full_passthrough", path=path)
        return await _passthrough_with_rate_limit(request, path, body_bytes, raw_headers, priority)