fix(BIZ-42): 综合评审优化 — 12项修复

四轮评审反馈全部处理:

🔴 Critical (5):
- _stats data race: 新增 _stats_lock (asyncio.Lock) + _increment_stat() helper
- Admin API 无认证: 新增 SIDECAR_ADMIN_TOKEN Bearer Token 认证
- API Key 明文暴露: GET config 返回 masked api_key (前4位+****)
- queue_max_size hot-reload 假生效: PriorityQueue.set_max_size() + 收缩保护
- SIDECAR_TIMEOUT 6000→60s + 上限截断 300s

🟠 Major (3):
- upstream_api_key 启动检查: lifespan 阶段 warning 日志
- Dashboard HTML 无缓存: 300s TTL 内存缓存
- queue_stats 异常日志: logger.warning(queue_stats_unavailable)

🟡 Medium (3):
- CORS middleware 配置
- httpx 连接池限制 (max_connections=100, keepalive=20)
- SSE retry: 3000 字段

🟢 Minor (1):
- _extract_model 类型注解 body: dict→Any
- passthrough 硬编码 30s→_config.request_timeout

mypy strict: 5 files, zero errors

Reviewed-by: 梁思筑, 严维序, 陆怀瑾, 沈路明
Co-authored-by: multica-agent <github@multica.ai>
This commit is contained in:
2026-06-24 13:26:56 +08:00
parent ba5b932f50
commit c50dcc9cb2
4 changed files with 169 additions and 31 deletions
+45 -16
View File
@@ -20,6 +20,7 @@ import httpx
import structlog
import uvicorn
from fastapi import FastAPI, Request, Response
from fastapi.middleware.cors import CORSMiddleware
from fastapi.responses import JSONResponse, StreamingResponse
from nvidia_sidecar.config import load_config, SidecarConfig
@@ -76,7 +77,7 @@ _pending_requests: dict[str, tuple[asyncio.Future[httpx.Response], float]]
"""request_id → (response future, enqueued_at) 的映射。"""
_metrics_task: asyncio.Task[None] | None = None
# 统计计数器
# 统计计数器(受 _stats_lock 保护, 修复梁思筑评审 #1: data race
_stats: dict[str, int] = {
"total_requests": 0,
"nvidia_requests": 0,
@@ -86,13 +87,20 @@ _stats: dict[str, int] = {
"upstream_errors": 0,
"start_time": 0,
}
_stats_lock: asyncio.Lock = asyncio.Lock()
# ---------------------------------------------------------------------------
# 工具函数
# ---------------------------------------------------------------------------
def _extract_model(body: dict[str, Any]) -> str | None:
async def _increment_stat(key: str, delta: int = 1) -> None:
"""线程安全的 _stats 计数器自增(梁思筑评审 #1 修复:消除 data race)。"""
async with _stats_lock:
_stats[key] = _stats.get(key, 0) + delta
def _extract_model(body: Any) -> str | None:
"""从请求体中提取模型标识符(兼容 OpenAI Chat/Completions 格式)。
Args:
@@ -213,7 +221,7 @@ async def _worker_loop() -> None:
)
if not got_token:
log.info("low_priority_timeout", request_id=request_id)
_stats["ratelimited_requests"] += 1
await _increment_stat("ratelimited_requests")
_prometheus.record_request(queue_item.priority.name, "ratelimited")
if not future.done():
future.set_exception(
@@ -241,7 +249,7 @@ async def _worker_loop() -> None:
priority=queue_item.priority.name,
timeout=_config.request_timeout,
)
_stats["ratelimited_requests"] += 1
await _increment_stat("ratelimited_requests")
_prometheus.record_request(queue_item.priority.name, "ratelimited")
if not future.done():
future.set_exception(
@@ -309,7 +317,7 @@ async def _worker_loop() -> None:
except (httpx.HTTPError, OSError) as exc:
log.error("upstream_request_failed", request_id=request_id, error=str(exc))
_stats["upstream_errors"] += 1
await _increment_stat("upstream_errors")
_prometheus.record_request(queue_item.priority.name, "error")
_prometheus.set_health(False)
if not future.done():
@@ -347,7 +355,7 @@ async def _passthrough_with_rate_limit(
Returns:
FastAPI Response。
"""
_stats["passthrough_requests"] += 1
await _increment_stat("passthrough_requests")
_prometheus.increment_fallback()
# 低优先级走令牌桶等待
@@ -358,7 +366,7 @@ async def _passthrough_with_rate_limit(
timeout=_config.low_priority_timeout,
)
if not got_token:
_stats["ratelimited_requests"] += 1
await _increment_stat("ratelimited_requests")
_prometheus.record_request(priority.name, "ratelimited")
return JSONResponse(
status_code=429,
@@ -372,19 +380,20 @@ async def _passthrough_with_rate_limit(
else:
got_token = await asyncio.to_thread(_token_bucket.consume, tokens=1)
if not got_token:
# 非低优先级轮询等待
deadline = time.monotonic() + 30.0
# 非低优先级轮询等待,使用 config.request_timeout 替代硬编码 30s
# (严维序评审 minor / 梁思筑评审 #3hot-reload 假生效修复)
deadline = time.monotonic() + _config.request_timeout
while not got_token:
await asyncio.sleep(0.1)
got_token = await asyncio.to_thread(_token_bucket.consume, tokens=1)
if time.monotonic() > deadline:
_stats["ratelimited_requests"] += 1
await _increment_stat("ratelimited_requests")
_prometheus.record_request(priority.name, "ratelimited")
return JSONResponse(
status_code=429,
content={
"error": {
"message": "令牌不足(队列满 + passthrough),等待超时 30s",
"message": f"令牌不足(队列满 + passthrough),等待超时 {_config.request_timeout:.0f}s",
"type": "RateLimitedError",
}
},
@@ -464,6 +473,10 @@ async def lifespan(app: FastAPI) -> AsyncGenerator[None, Any]:
_http_client = httpx.AsyncClient(
timeout=httpx.Timeout(_config.request_timeout),
limits=httpx.Limits(
max_connections=100,
max_keepalive_connections=20,
),
)
_priority_queue = PriorityRequestQueue(max_size=_config.queue_max_size)
_token_bucket = AdaptiveTokenBucket(
@@ -489,9 +502,25 @@ async def lifespan(app: FastAPI) -> AsyncGenerator[None, Any]:
metrics_server = uvicorn.Server(metrics_config)
_metrics_task = asyncio.create_task(metrics_server.serve())
# CORS 中间件(严维序评审 #8)
app.add_middleware(
CORSMiddleware,
allow_origins=["*"],
allow_credentials=False,
allow_methods=["*"],
allow_headers=["*"],
)
# 挂载 webui 子路由
app.include_router(webui_router)
# upstream_api_key 启动检查(严维序评审 #5)
if not _config.upstream_api_key:
logger.warning(
"upstream_api_key_empty",
message="SIDECAR_API_KEY 未设置,NVIDIA 请求将因 401 认证失败",
)
logger.info(
"sidecar_started",
host=_config.listen_host,
@@ -541,7 +570,7 @@ async def _handle_proxy_request(request: Request, path: str) -> Response:
2. 网关识别 → 非 NVIDIA 直通
3. NVIDIA → 排队 + 令牌限流 + 转发
"""
_stats["total_requests"] += 1
await _increment_stat("total_requests")
# 解析请求
body_bytes: bytes = await request.body()
@@ -561,7 +590,7 @@ async def _handle_proxy_request(request: Request, path: str) -> Response:
# 非 NVIDIA → 直接转发
if not is_nvidia:
_stats["passthrough_requests"] += 1
await _increment_stat("passthrough_requests")
try:
resp = await _forward_to_upstream(
method=request.method,
@@ -580,7 +609,7 @@ async def _handle_proxy_request(request: Request, path: str) -> Response:
)
# NVIDIA → 排队 + 限流 + 转发
_stats["nvidia_requests"] += 1
await _increment_stat("nvidia_requests")
priority: Priority = _resolve_priority(raw_headers)
# 注入内部元数据到 payload
@@ -599,7 +628,7 @@ async def _handle_proxy_request(request: Request, path: str) -> Response:
},
)
except QueueFullError:
_stats["queue_full_rejects"] += 1
await _increment_stat("queue_full_rejects")
return JSONResponse(
status_code=503,
content={
@@ -611,7 +640,7 @@ async def _handle_proxy_request(request: Request, path: str) -> Response:
)
except QueueFullPassthrough:
# 队列满 + PASSTHROUGH:绕过排队,尝试令牌桶后直接转发
_stats["passthrough_requests"] += 1
await _increment_stat("passthrough_requests")
logger.info("queue_full_passthrough", path=path)
return await _passthrough_with_rate_limit(request, path, body_bytes, raw_headers, priority)