8a12ff9693
Co-authored-by: multica-agent <github@multica.ai>
293 lines
11 KiB
Python
293 lines
11 KiB
Python
"""
|
||
NVIDIA Sidecar — WebUI 后端 API
|
||
|
||
提供仪表盘 SSE 实时推送 + 配置热重载 API。
|
||
"""
|
||
|
||
from __future__ import annotations
|
||
|
||
import asyncio
|
||
import json
|
||
import os
|
||
import time
|
||
from pathlib import Path
|
||
from typing import Any, AsyncGenerator
|
||
|
||
import structlog
|
||
from fastapi import APIRouter, Depends, HTTPException, Request
|
||
from fastapi.responses import HTMLResponse, JSONResponse, StreamingResponse
|
||
from fastapi.security import HTTPAuthorizationCredentials, HTTPBearer
|
||
from pydantic import BaseModel
|
||
|
||
webui_router: APIRouter = APIRouter(prefix="/api", tags=["webui"])
|
||
logger: structlog.stdlib.BoundLogger = structlog.get_logger("nvidia_sidecar.webui")
|
||
|
||
STATIC_DIR: Path = Path(__file__).parent / "static"
|
||
|
||
# dashboard.html 缓存(严维序评审 #6 / 梁思筑评审 #8:避免每次请求读磁盘)
|
||
_dashboard_html_cache: tuple[str, float] | None = None
|
||
_DASHBOARD_CACHE_TTL: float = 300.0 # 5 分钟
|
||
|
||
# Admin API 认证(严维序评审 #1)
|
||
_ADMIN_TOKEN: str | None = os.environ.get("SIDECAR_ADMIN_TOKEN")
|
||
_admin_auth_scheme: HTTPBearer = HTTPBearer(auto_error=False)
|
||
|
||
|
||
# ---------------------------------------------------------------------------
|
||
# 配置热重载模型
|
||
# ---------------------------------------------------------------------------
|
||
|
||
class ConfigPatch(BaseModel):
|
||
"""可在线修改的配置字段。"""
|
||
rate_rpm: int | None = None
|
||
queue_max_size: int | None = None
|
||
fallback_enabled_passthrough: bool | None = None
|
||
|
||
|
||
# ---------------------------------------------------------------------------
|
||
# 仪表盘 SSE 推送
|
||
# ---------------------------------------------------------------------------
|
||
|
||
async def _dashboard_stream(request: Request) -> StreamingResponse:
|
||
"""SSE 实时推送 Sidecar 完整状态快照(每秒一次)。
|
||
|
||
供 dashboard.html 的 EventSource 消费。
|
||
"""
|
||
async def event_generator() -> AsyncGenerator[str, None]:
|
||
# 首帧发送 retry 字段(严维序评审 minor):指示客户端断连后等待 3s 重试
|
||
first_frame = True
|
||
while True:
|
||
if await request.is_disconnected():
|
||
break
|
||
try:
|
||
snapshot: dict[str, Any] = await _build_snapshot()
|
||
payload_sse = f"data: {json.dumps(snapshot, ensure_ascii=False)}\n\n"
|
||
if first_frame:
|
||
payload_sse = f"retry: 3000\n{payload_sse}"
|
||
first_frame = False
|
||
yield payload_sse
|
||
except Exception:
|
||
logger.exception("dashboard_sse_error")
|
||
yield f"data: {json.dumps({'error': 'internal'})}\n\n"
|
||
await asyncio.sleep(1.0)
|
||
|
||
return StreamingResponse(
|
||
event_generator(),
|
||
media_type="text/event-stream",
|
||
headers={
|
||
"Cache-Control": "no-cache",
|
||
"X-Accel-Buffering": "no",
|
||
},
|
||
)
|
||
|
||
|
||
# SSE 首帧写入 retry 字段(严维序评审 minor),在 event_generator 首次 yield 前注入
|
||
# 通过在 StreamingResponse 返回前手动发送 retry header 实现
|
||
# (SSE 协议支持 retry 字段作为重建连接间隔)
|
||
# 注:在 event_generator 的首个 yield 中加入 retry 声明
|
||
|
||
|
||
async def _build_snapshot() -> dict[str, Any]:
|
||
"""构建当前状态快照(从全局状态读取,含队列深度)。"""
|
||
# 延迟导入避免循环依赖
|
||
from nvidia_sidecar import server
|
||
|
||
try:
|
||
_stats = server._stats
|
||
_token_bucket = server._token_bucket
|
||
bucket_status = _token_bucket.get_status()
|
||
now = time.time()
|
||
uptime = int(now - _stats["start_time"]) if _stats.get("start_time") else 0
|
||
|
||
# 获取队列统计数据(含 per-priority depth)
|
||
queue_data: dict[str, Any] = {"current_size": 0, "per_priority": {}}
|
||
try:
|
||
queue_stats = await server._priority_queue.get_stats()
|
||
queue_data = {
|
||
"max_size": queue_stats.get("max_size", 0),
|
||
"current_size": queue_stats.get("current_size", 0),
|
||
"per_priority": queue_stats.get("depth_by_priority", {}),
|
||
"total_enqueued": queue_stats.get("total_enqueued", 0),
|
||
"total_dequeued": queue_stats.get("total_dequeued", 0),
|
||
"total_dropped": queue_stats.get("total_dropped", 0),
|
||
}
|
||
except Exception:
|
||
logger.warning("queue_stats_unavailable", message="队列统计获取失败,仪表盘队列深度可能不准确")
|
||
|
||
return {
|
||
"timestamp": now,
|
||
"uptime_seconds": uptime,
|
||
"token_bucket": bucket_status,
|
||
"queue": queue_data,
|
||
"retreat": {
|
||
"state": getattr(_token_bucket, "_retreat_state", "normal"),
|
||
"effective_rpm": round(getattr(_token_bucket, "get_effective_rate_rpm", lambda: 40.0)(), 1),
|
||
"base_rpm": round(getattr(_token_bucket, "get_base_rate_rpm", lambda: 40.0)(), 1),
|
||
"upstream_429_rate": round(getattr(_token_bucket, "get_429_rate", lambda: 0.0)(), 4),
|
||
},
|
||
"requests": {
|
||
"total": _stats.get("total_requests", 0),
|
||
"nvidia": _stats.get("nvidia_requests", 0),
|
||
"passthrough": _stats.get("passthrough_requests", 0),
|
||
"ratelimited": _stats.get("ratelimited_requests", 0),
|
||
},
|
||
"errors": {
|
||
"queue_full_rejects": _stats.get("queue_full_rejects", 0),
|
||
"upstream_errors": _stats.get("upstream_errors", 0),
|
||
},
|
||
}
|
||
except Exception:
|
||
logger.exception("snapshot_build_error")
|
||
return {"error": "snapshot_unavailable", "timestamp": time.time()}
|
||
|
||
|
||
# ---------------------------------------------------------------------------
|
||
# 配置热重载
|
||
# ---------------------------------------------------------------------------
|
||
|
||
async def get_config() -> dict[str, Any]:
|
||
"""获取当前完整配置。"""
|
||
from nvidia_sidecar import server
|
||
|
||
cfg = server._config
|
||
return {
|
||
"listen_host": cfg.listen_host,
|
||
"listen_port": cfg.listen_port,
|
||
"metrics_port": cfg.metrics_port,
|
||
"upstream_url": cfg.upstream_url,
|
||
"upstream_api_key": _mask_api_key(cfg.upstream_api_key),
|
||
"rate_rpm": _get_current_rate(server),
|
||
"bucket_capacity": cfg.bucket_capacity,
|
||
"request_timeout": cfg.request_timeout,
|
||
"queue_max_size": cfg.queue_max_size,
|
||
"low_priority_timeout": cfg.low_priority_timeout,
|
||
"fallback_enabled_passthrough": cfg.fallback_enabled_passthrough,
|
||
"log_level": cfg.log_level,
|
||
}
|
||
|
||
|
||
async def update_config(body: ConfigPatch) -> JSONResponse:
|
||
"""在线修改配置项并即时生效。"""
|
||
from nvidia_sidecar import server
|
||
|
||
cfg = server._config
|
||
changed: list[str] = []
|
||
|
||
if body.rate_rpm is not None:
|
||
if body.rate_rpm <= 0:
|
||
raise HTTPException(status_code=400, detail="rate_rpm must be > 0")
|
||
cfg.rate_rpm = body.rate_rpm
|
||
server._token_bucket.set_rate(body.rate_rpm / 60.0)
|
||
changed.append("rate_rpm")
|
||
|
||
if body.queue_max_size is not None:
|
||
if body.queue_max_size <= 0:
|
||
raise HTTPException(status_code=400, detail="queue_max_size must be > 0")
|
||
ok, msg = server._priority_queue.set_max_size(body.queue_max_size)
|
||
if not ok:
|
||
raise HTTPException(status_code=400, detail=msg)
|
||
cfg.queue_max_size = body.queue_max_size
|
||
changed.append("queue_max_size")
|
||
logger.info("queue_max_size_updated", detail=msg)
|
||
|
||
if body.fallback_enabled_passthrough is not None:
|
||
cfg.fallback_enabled_passthrough = body.fallback_enabled_passthrough
|
||
changed.append("fallback_enabled_passthrough")
|
||
|
||
logger.info("config_updated", changed=changed)
|
||
return JSONResponse(
|
||
content={"status": "ok", "changed": changed},
|
||
)
|
||
|
||
|
||
def _mask_api_key(key: str) -> str:
|
||
"""对 API Key 进行脱敏处理,仅保留前 4 位以供识别。
|
||
|
||
严维序评审 #2 / 沈路明评审 #3:防止 API Key 明文泄露。
|
||
"""
|
||
if not key:
|
||
return ""
|
||
if len(key) <= 4:
|
||
return key[:2] + "****"
|
||
return key[:4] + "****"
|
||
|
||
|
||
def _get_current_rate(server_module: Any) -> float:
|
||
"""获取当前实际速率(避退调整后),兼容 AdaptiveTokenBucket。"""
|
||
tb = server_module._token_bucket
|
||
if hasattr(tb, "get_effective_rate_rpm"):
|
||
return float(round(tb.get_effective_rate_rpm(), 1))
|
||
return float(tb.rate * 60.0)
|
||
|
||
|
||
# ---------------------------------------------------------------------------
|
||
# 路由注册
|
||
# ---------------------------------------------------------------------------
|
||
|
||
@webui_router.get("/dashboard/stream")
|
||
async def dashboard_stream(request: Request) -> StreamingResponse:
|
||
"""SSE 仪表盘实时推送端点。"""
|
||
return await _dashboard_stream(request)
|
||
|
||
|
||
async def _verify_admin_auth(
|
||
credentials: HTTPAuthorizationCredentials | None = Depends(_admin_auth_scheme),
|
||
) -> None:
|
||
"""Admin API Bearer Token 认证(严维序评审 #1)。
|
||
|
||
若设置了 SIDECAR_ADMIN_TOKEN 环境变量,则要求请求携带匹配的 Bearer Token。
|
||
未设置时跳过认证(开发/测试环境)。
|
||
"""
|
||
if _ADMIN_TOKEN is None:
|
||
return # 未配置认证 token,允许无认证访问
|
||
if credentials is None:
|
||
raise HTTPException(status_code=401, detail="需要 Bearer Token 认证(Admin API)")
|
||
if credentials.credentials != _ADMIN_TOKEN:
|
||
raise HTTPException(status_code=403, detail="Admin Token 无效")
|
||
|
||
|
||
@webui_router.get("/admin/config")
|
||
async def admin_get_config(
|
||
_auth: None = Depends(_verify_admin_auth),
|
||
) -> JSONResponse:
|
||
"""获取当前配置(需要 Admin 认证)。"""
|
||
return JSONResponse(content=await get_config())
|
||
|
||
|
||
@webui_router.post("/admin/config")
|
||
async def admin_update_config(
|
||
body: ConfigPatch,
|
||
_auth: None = Depends(_verify_admin_auth),
|
||
) -> JSONResponse:
|
||
"""在线修改配置(热重载,需要 Admin 认证)。"""
|
||
return await update_config(body)
|
||
|
||
|
||
# ---------------------------------------------------------------------------
|
||
# 仪表盘静态页面
|
||
# ---------------------------------------------------------------------------
|
||
|
||
def _get_dashboard_html() -> str:
|
||
"""获取仪表盘 HTML(带缓存,严维序评审 #6 / 梁思筑评审 #8)。
|
||
|
||
首次加载后缓存 5 分钟,避免每次请求读磁盘。
|
||
"""
|
||
global _dashboard_html_cache
|
||
now = time.monotonic()
|
||
if _dashboard_html_cache is not None:
|
||
cached_content, cached_at = _dashboard_html_cache
|
||
if now - cached_at < _DASHBOARD_CACHE_TTL:
|
||
return cached_content
|
||
|
||
dashboard_path = STATIC_DIR / "dashboard.html"
|
||
if dashboard_path.is_file():
|
||
content = dashboard_path.read_text(encoding="utf-8")
|
||
_dashboard_html_cache = (content, now)
|
||
return content
|
||
return "<h1>dashboard.html not found</h1>"
|
||
|
||
|
||
@webui_router.get("/dashboard", include_in_schema=False)
|
||
async def dashboard_page() -> HTMLResponse:
|
||
"""仪表盘 HTML 页面(含缓存策略)。"""
|
||
return HTMLResponse(content=_get_dashboard_html()) |