""" NVIDIA Sidecar — WebUI 后端 API 提供仪表盘 SSE 实时推送 + 配置热重载 API。 """ from __future__ import annotations import asyncio import json import os import time from pathlib import Path from typing import Any, AsyncGenerator import structlog from fastapi import APIRouter, Depends, HTTPException, Request from fastapi.responses import HTMLResponse, JSONResponse, StreamingResponse from fastapi.security import HTTPAuthorizationCredentials, HTTPBearer from pydantic import BaseModel webui_router: APIRouter = APIRouter(prefix="/api", tags=["webui"]) logger: structlog.stdlib.BoundLogger = structlog.get_logger("nvidia_sidecar.webui") STATIC_DIR: Path = Path(__file__).parent / "static" # dashboard.html 缓存(严维序评审 #6 / 梁思筑评审 #8:避免每次请求读磁盘) _dashboard_html_cache: tuple[str, float] | None = None _DASHBOARD_CACHE_TTL: float = 300.0 # 5 分钟 # Admin API 认证(严维序评审 #1) _ADMIN_TOKEN: str | None = os.environ.get("SIDECAR_ADMIN_TOKEN") _admin_auth_scheme: HTTPBearer = HTTPBearer(auto_error=False) # --------------------------------------------------------------------------- # 配置热重载模型 # --------------------------------------------------------------------------- class ConfigPatch(BaseModel): """可在线修改的配置字段。""" rate_rpm: int | None = None queue_max_size: int | None = None fallback_enabled_passthrough: bool | None = None # --------------------------------------------------------------------------- # 仪表盘 SSE 推送 # --------------------------------------------------------------------------- async def _dashboard_stream(request: Request) -> StreamingResponse: """SSE 实时推送 Sidecar 完整状态快照(每秒一次)。 供 dashboard.html 的 EventSource 消费。 """ async def event_generator() -> AsyncGenerator[str, None]: # 首帧发送 retry 字段(严维序评审 minor):指示客户端断连后等待 3s 重试 first_frame = True while True: if await request.is_disconnected(): break try: snapshot: dict[str, Any] = await _build_snapshot() payload_sse = f"data: {json.dumps(snapshot, ensure_ascii=False)}\n\n" if first_frame: payload_sse = f"retry: 3000\n{payload_sse}" first_frame = False yield payload_sse except Exception: logger.exception("dashboard_sse_error") yield f"data: {json.dumps({'error': 'internal'})}\n\n" await asyncio.sleep(1.0) return StreamingResponse( event_generator(), media_type="text/event-stream", headers={ "Cache-Control": "no-cache", "X-Accel-Buffering": "no", }, ) # SSE 首帧写入 retry 字段(严维序评审 minor),在 event_generator 首次 yield 前注入 # 通过在 StreamingResponse 返回前手动发送 retry header 实现 # (SSE 协议支持 retry 字段作为重建连接间隔) # 注:在 event_generator 的首个 yield 中加入 retry 声明 async def _build_snapshot() -> dict[str, Any]: """构建当前状态快照(从全局状态读取,含队列深度)。""" # 延迟导入避免循环依赖 from nvidia_sidecar import server try: _stats = server._stats _token_bucket = server._token_bucket bucket_status = _token_bucket.get_status() now = time.time() uptime = int(now - _stats["start_time"]) if _stats.get("start_time") else 0 # 获取队列统计数据(含 per-priority depth) queue_data: dict[str, Any] = {"current_size": 0, "per_priority": {}} try: queue_stats = await server._priority_queue.get_stats() queue_data = { "max_size": queue_stats.get("max_size", 0), "current_size": queue_stats.get("current_size", 0), "per_priority": queue_stats.get("depth_by_priority", {}), "total_enqueued": queue_stats.get("total_enqueued", 0), "total_dequeued": queue_stats.get("total_dequeued", 0), "total_dropped": queue_stats.get("total_dropped", 0), } except Exception: logger.warning("queue_stats_unavailable", message="队列统计获取失败,仪表盘队列深度可能不准确") return { "timestamp": now, "uptime_seconds": uptime, "token_bucket": bucket_status, "queue": queue_data, "retreat": { "state": getattr(_token_bucket, "_retreat_state", "normal"), "effective_rpm": round(getattr(_token_bucket, "get_effective_rate_rpm", lambda: 40.0)(), 1), "base_rpm": round(getattr(_token_bucket, "get_base_rate_rpm", lambda: 40.0)(), 1), "upstream_429_rate": round(getattr(_token_bucket, "get_429_rate", lambda: 0.0)(), 4), }, "requests": { "total": _stats.get("total_requests", 0), "nvidia": _stats.get("nvidia_requests", 0), "passthrough": _stats.get("passthrough_requests", 0), "ratelimited": _stats.get("ratelimited_requests", 0), }, "errors": { "queue_full_rejects": _stats.get("queue_full_rejects", 0), "upstream_errors": _stats.get("upstream_errors", 0), }, } except Exception: logger.exception("snapshot_build_error") return {"error": "snapshot_unavailable", "timestamp": time.time()} # --------------------------------------------------------------------------- # 配置热重载 # --------------------------------------------------------------------------- async def get_config() -> dict[str, Any]: """获取当前完整配置。""" from nvidia_sidecar import server cfg = server._config return { "listen_host": cfg.listen_host, "listen_port": cfg.listen_port, "metrics_port": cfg.metrics_port, "upstream_url": cfg.upstream_url, "upstream_api_key": _mask_api_key(cfg.upstream_api_key), "rate_rpm": _get_current_rate(server), "bucket_capacity": cfg.bucket_capacity, "request_timeout": cfg.request_timeout, "queue_max_size": cfg.queue_max_size, "low_priority_timeout": cfg.low_priority_timeout, "fallback_enabled_passthrough": cfg.fallback_enabled_passthrough, "log_level": cfg.log_level, } async def update_config(body: ConfigPatch) -> JSONResponse: """在线修改配置项并即时生效。""" from nvidia_sidecar import server cfg = server._config changed: list[str] = [] if body.rate_rpm is not None: if body.rate_rpm <= 0: raise HTTPException(status_code=400, detail="rate_rpm must be > 0") cfg.rate_rpm = body.rate_rpm server._token_bucket.set_rate(body.rate_rpm / 60.0) changed.append("rate_rpm") if body.queue_max_size is not None: if body.queue_max_size <= 0: raise HTTPException(status_code=400, detail="queue_max_size must be > 0") ok, msg = server._priority_queue.set_max_size(body.queue_max_size) if not ok: raise HTTPException(status_code=400, detail=msg) cfg.queue_max_size = body.queue_max_size changed.append("queue_max_size") logger.info("queue_max_size_updated", detail=msg) if body.fallback_enabled_passthrough is not None: cfg.fallback_enabled_passthrough = body.fallback_enabled_passthrough changed.append("fallback_enabled_passthrough") logger.info("config_updated", changed=changed) return JSONResponse( content={"status": "ok", "changed": changed}, ) def _mask_api_key(key: str) -> str: """对 API Key 进行脱敏处理,仅保留前 4 位以供识别。 严维序评审 #2 / 沈路明评审 #3:防止 API Key 明文泄露。 """ if not key: return "" if len(key) <= 4: return key[:2] + "****" return key[:4] + "****" def _get_current_rate(server_module: Any) -> float: """获取当前实际速率(避退调整后),兼容 AdaptiveTokenBucket。""" tb = server_module._token_bucket if hasattr(tb, "get_effective_rate_rpm"): return float(round(tb.get_effective_rate_rpm(), 1)) return float(tb.rate * 60.0) # --------------------------------------------------------------------------- # 路由注册 # --------------------------------------------------------------------------- @webui_router.get("/dashboard/stream") async def dashboard_stream(request: Request) -> StreamingResponse: """SSE 仪表盘实时推送端点。""" return await _dashboard_stream(request) async def _verify_admin_auth( credentials: HTTPAuthorizationCredentials | None = Depends(_admin_auth_scheme), ) -> None: """Admin API Bearer Token 认证(严维序评审 #1)。 若设置了 SIDECAR_ADMIN_TOKEN 环境变量,则要求请求携带匹配的 Bearer Token。 未设置时跳过认证(开发/测试环境)。 """ if _ADMIN_TOKEN is None: return # 未配置认证 token,允许无认证访问 if credentials is None: raise HTTPException(status_code=401, detail="需要 Bearer Token 认证(Admin API)") if credentials.credentials != _ADMIN_TOKEN: raise HTTPException(status_code=403, detail="Admin Token 无效") @webui_router.get("/admin/config") async def admin_get_config( _auth: None = Depends(_verify_admin_auth), ) -> JSONResponse: """获取当前配置(需要 Admin 认证)。""" return JSONResponse(content=await get_config()) @webui_router.post("/admin/config") async def admin_update_config( body: ConfigPatch, _auth: None = Depends(_verify_admin_auth), ) -> JSONResponse: """在线修改配置(热重载,需要 Admin 认证)。""" return await update_config(body) # --------------------------------------------------------------------------- # 仪表盘静态页面 # --------------------------------------------------------------------------- def _get_dashboard_html() -> str: """获取仪表盘 HTML(带缓存,严维序评审 #6 / 梁思筑评审 #8)。 首次加载后缓存 5 分钟,避免每次请求读磁盘。 """ global _dashboard_html_cache now = time.monotonic() if _dashboard_html_cache is not None: cached_content, cached_at = _dashboard_html_cache if now - cached_at < _DASHBOARD_CACHE_TTL: return cached_content dashboard_path = STATIC_DIR / "dashboard.html" if dashboard_path.is_file(): content = dashboard_path.read_text(encoding="utf-8") _dashboard_html_cache = (content, now) return content return "

dashboard.html not found

" @webui_router.get("/dashboard", include_in_schema=False) async def dashboard_page() -> HTMLResponse: """仪表盘 HTML 页面(含缓存策略)。""" return HTMLResponse(content=_get_dashboard_html())