fix(BIZ-42): 综合评审优化 — 12项修复
四轮评审反馈全部处理: 🔴 Critical (5): - _stats data race: 新增 _stats_lock (asyncio.Lock) + _increment_stat() helper - Admin API 无认证: 新增 SIDECAR_ADMIN_TOKEN Bearer Token 认证 - API Key 明文暴露: GET config 返回 masked api_key (前4位+****) - queue_max_size hot-reload 假生效: PriorityQueue.set_max_size() + 收缩保护 - SIDECAR_TIMEOUT 6000→60s + 上限截断 300s 🟠 Major (3): - upstream_api_key 启动检查: lifespan 阶段 warning 日志 - Dashboard HTML 无缓存: 300s TTL 内存缓存 - queue_stats 异常日志: logger.warning(queue_stats_unavailable) 🟡 Medium (3): - CORS middleware 配置 - httpx 连接池限制 (max_connections=100, keepalive=20) - SSE retry: 3000 字段 🟢 Minor (1): - _extract_model 类型注解 body: dict→Any - passthrough 硬编码 30s→_config.request_timeout mypy strict: 5 files, zero errors Reviewed-by: 梁思筑, 严维序, 陆怀瑾, 沈路明 Co-authored-by: multica-agent <github@multica.ai>
This commit is contained in:
@@ -20,6 +20,7 @@ import httpx
|
||||
import structlog
|
||||
import uvicorn
|
||||
from fastapi import FastAPI, Request, Response
|
||||
from fastapi.middleware.cors import CORSMiddleware
|
||||
from fastapi.responses import JSONResponse, StreamingResponse
|
||||
|
||||
from nvidia_sidecar.config import load_config, SidecarConfig
|
||||
@@ -76,7 +77,7 @@ _pending_requests: dict[str, tuple[asyncio.Future[httpx.Response], float]]
|
||||
"""request_id → (response future, enqueued_at) 的映射。"""
|
||||
_metrics_task: asyncio.Task[None] | None = None
|
||||
|
||||
# 统计计数器
|
||||
# 统计计数器(受 _stats_lock 保护, 修复梁思筑评审 #1: data race)
|
||||
_stats: dict[str, int] = {
|
||||
"total_requests": 0,
|
||||
"nvidia_requests": 0,
|
||||
@@ -86,13 +87,20 @@ _stats: dict[str, int] = {
|
||||
"upstream_errors": 0,
|
||||
"start_time": 0,
|
||||
}
|
||||
_stats_lock: asyncio.Lock = asyncio.Lock()
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# 工具函数
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
def _extract_model(body: dict[str, Any]) -> str | None:
|
||||
async def _increment_stat(key: str, delta: int = 1) -> None:
|
||||
"""线程安全的 _stats 计数器自增(梁思筑评审 #1 修复:消除 data race)。"""
|
||||
async with _stats_lock:
|
||||
_stats[key] = _stats.get(key, 0) + delta
|
||||
|
||||
|
||||
def _extract_model(body: Any) -> str | None:
|
||||
"""从请求体中提取模型标识符(兼容 OpenAI Chat/Completions 格式)。
|
||||
|
||||
Args:
|
||||
@@ -213,7 +221,7 @@ async def _worker_loop() -> None:
|
||||
)
|
||||
if not got_token:
|
||||
log.info("low_priority_timeout", request_id=request_id)
|
||||
_stats["ratelimited_requests"] += 1
|
||||
await _increment_stat("ratelimited_requests")
|
||||
_prometheus.record_request(queue_item.priority.name, "ratelimited")
|
||||
if not future.done():
|
||||
future.set_exception(
|
||||
@@ -241,7 +249,7 @@ async def _worker_loop() -> None:
|
||||
priority=queue_item.priority.name,
|
||||
timeout=_config.request_timeout,
|
||||
)
|
||||
_stats["ratelimited_requests"] += 1
|
||||
await _increment_stat("ratelimited_requests")
|
||||
_prometheus.record_request(queue_item.priority.name, "ratelimited")
|
||||
if not future.done():
|
||||
future.set_exception(
|
||||
@@ -309,7 +317,7 @@ async def _worker_loop() -> None:
|
||||
|
||||
except (httpx.HTTPError, OSError) as exc:
|
||||
log.error("upstream_request_failed", request_id=request_id, error=str(exc))
|
||||
_stats["upstream_errors"] += 1
|
||||
await _increment_stat("upstream_errors")
|
||||
_prometheus.record_request(queue_item.priority.name, "error")
|
||||
_prometheus.set_health(False)
|
||||
if not future.done():
|
||||
@@ -347,7 +355,7 @@ async def _passthrough_with_rate_limit(
|
||||
Returns:
|
||||
FastAPI Response。
|
||||
"""
|
||||
_stats["passthrough_requests"] += 1
|
||||
await _increment_stat("passthrough_requests")
|
||||
_prometheus.increment_fallback()
|
||||
|
||||
# 低优先级走令牌桶等待
|
||||
@@ -358,7 +366,7 @@ async def _passthrough_with_rate_limit(
|
||||
timeout=_config.low_priority_timeout,
|
||||
)
|
||||
if not got_token:
|
||||
_stats["ratelimited_requests"] += 1
|
||||
await _increment_stat("ratelimited_requests")
|
||||
_prometheus.record_request(priority.name, "ratelimited")
|
||||
return JSONResponse(
|
||||
status_code=429,
|
||||
@@ -372,19 +380,20 @@ async def _passthrough_with_rate_limit(
|
||||
else:
|
||||
got_token = await asyncio.to_thread(_token_bucket.consume, tokens=1)
|
||||
if not got_token:
|
||||
# 非低优先级轮询等待
|
||||
deadline = time.monotonic() + 30.0
|
||||
# 非低优先级轮询等待,使用 config.request_timeout 替代硬编码 30s
|
||||
# (严维序评审 minor / 梁思筑评审 #3:hot-reload 假生效修复)
|
||||
deadline = time.monotonic() + _config.request_timeout
|
||||
while not got_token:
|
||||
await asyncio.sleep(0.1)
|
||||
got_token = await asyncio.to_thread(_token_bucket.consume, tokens=1)
|
||||
if time.monotonic() > deadline:
|
||||
_stats["ratelimited_requests"] += 1
|
||||
await _increment_stat("ratelimited_requests")
|
||||
_prometheus.record_request(priority.name, "ratelimited")
|
||||
return JSONResponse(
|
||||
status_code=429,
|
||||
content={
|
||||
"error": {
|
||||
"message": "令牌不足(队列满 + passthrough),等待超时 30s",
|
||||
"message": f"令牌不足(队列满 + passthrough),等待超时 {_config.request_timeout:.0f}s",
|
||||
"type": "RateLimitedError",
|
||||
}
|
||||
},
|
||||
@@ -464,6 +473,10 @@ async def lifespan(app: FastAPI) -> AsyncGenerator[None, Any]:
|
||||
|
||||
_http_client = httpx.AsyncClient(
|
||||
timeout=httpx.Timeout(_config.request_timeout),
|
||||
limits=httpx.Limits(
|
||||
max_connections=100,
|
||||
max_keepalive_connections=20,
|
||||
),
|
||||
)
|
||||
_priority_queue = PriorityRequestQueue(max_size=_config.queue_max_size)
|
||||
_token_bucket = AdaptiveTokenBucket(
|
||||
@@ -489,9 +502,25 @@ async def lifespan(app: FastAPI) -> AsyncGenerator[None, Any]:
|
||||
metrics_server = uvicorn.Server(metrics_config)
|
||||
_metrics_task = asyncio.create_task(metrics_server.serve())
|
||||
|
||||
# CORS 中间件(严维序评审 #8)
|
||||
app.add_middleware(
|
||||
CORSMiddleware,
|
||||
allow_origins=["*"],
|
||||
allow_credentials=False,
|
||||
allow_methods=["*"],
|
||||
allow_headers=["*"],
|
||||
)
|
||||
|
||||
# 挂载 webui 子路由
|
||||
app.include_router(webui_router)
|
||||
|
||||
# upstream_api_key 启动检查(严维序评审 #5)
|
||||
if not _config.upstream_api_key:
|
||||
logger.warning(
|
||||
"upstream_api_key_empty",
|
||||
message="SIDECAR_API_KEY 未设置,NVIDIA 请求将因 401 认证失败",
|
||||
)
|
||||
|
||||
logger.info(
|
||||
"sidecar_started",
|
||||
host=_config.listen_host,
|
||||
@@ -541,7 +570,7 @@ async def _handle_proxy_request(request: Request, path: str) -> Response:
|
||||
2. 网关识别 → 非 NVIDIA 直通
|
||||
3. NVIDIA → 排队 + 令牌限流 + 转发
|
||||
"""
|
||||
_stats["total_requests"] += 1
|
||||
await _increment_stat("total_requests")
|
||||
|
||||
# 解析请求
|
||||
body_bytes: bytes = await request.body()
|
||||
@@ -561,7 +590,7 @@ async def _handle_proxy_request(request: Request, path: str) -> Response:
|
||||
|
||||
# 非 NVIDIA → 直接转发
|
||||
if not is_nvidia:
|
||||
_stats["passthrough_requests"] += 1
|
||||
await _increment_stat("passthrough_requests")
|
||||
try:
|
||||
resp = await _forward_to_upstream(
|
||||
method=request.method,
|
||||
@@ -580,7 +609,7 @@ async def _handle_proxy_request(request: Request, path: str) -> Response:
|
||||
)
|
||||
|
||||
# NVIDIA → 排队 + 限流 + 转发
|
||||
_stats["nvidia_requests"] += 1
|
||||
await _increment_stat("nvidia_requests")
|
||||
priority: Priority = _resolve_priority(raw_headers)
|
||||
|
||||
# 注入内部元数据到 payload
|
||||
@@ -599,7 +628,7 @@ async def _handle_proxy_request(request: Request, path: str) -> Response:
|
||||
},
|
||||
)
|
||||
except QueueFullError:
|
||||
_stats["queue_full_rejects"] += 1
|
||||
await _increment_stat("queue_full_rejects")
|
||||
return JSONResponse(
|
||||
status_code=503,
|
||||
content={
|
||||
@@ -611,7 +640,7 @@ async def _handle_proxy_request(request: Request, path: str) -> Response:
|
||||
)
|
||||
except QueueFullPassthrough:
|
||||
# 队列满 + PASSTHROUGH:绕过排队,尝试令牌桶后直接转发
|
||||
_stats["passthrough_requests"] += 1
|
||||
await _increment_stat("passthrough_requests")
|
||||
logger.info("queue_full_passthrough", path=path)
|
||||
return await _passthrough_with_rate_limit(request, path, body_bytes, raw_headers, priority)
|
||||
|
||||
|
||||
Reference in New Issue
Block a user