""" NVIDIA Sidecar — SidecarContext 依赖注入容器 (§BIZ-46 Phase3) 将所有模块级全局状态收敛为单一 dataclass,通过 FastAPI app.state 注入, 消除 webui.py → server 的反向导入,支持可测试性和多实例扩展。 设计文档: docs/architecture/BIZ-46_Phase3_Architecture_Design.md §1 """ from __future__ import annotations import asyncio import time from dataclasses import dataclass, field from typing import TYPE_CHECKING, Any import httpx if TYPE_CHECKING: from nvidia_sidecar.config import SidecarConfig from nvidia_sidecar.rate_limiter import AdaptiveTokenBucket from nvidia_sidecar.priority_queue import PriorityRequestQueue from nvidia_sidecar.metrics import PrometheusMetrics from nvidia_sidecar.health import HealthService @dataclass class SidecarContext: """Sidecar 全局运行时上下文 — 所有核心组件的唯一容器。 通过 ``app.state.sidecar`` 注入 FastAPI,路由通过 ``Depends(get_context)`` 获取。 """ # ---- 核心组件 ---- config: SidecarConfig http_client: httpx.AsyncClient token_bucket: AdaptiveTokenBucket priority_queue: PriorityRequestQueue prometheus: PrometheusMetrics health: HealthService # ---- 运行时状态 ---- pending_requests: dict[str, tuple["asyncio.Future[Any]", float]] = field(default_factory=dict) """request_id → (response future, enqueued_at) 的映射。""" stats: dict[str, int] = field(default_factory=lambda: { "total_requests": 0, "nvidia_requests": 0, "passthrough_requests": 0, "ratelimited_requests": 0, "queue_full_rejects": 0, "upstream_errors": 0, "start_time": 0, }) stats_lock: asyncio.Lock = field(default_factory=asyncio.Lock) # ---- 缓存 ---- snapshot_cache: tuple["dict[str, Any]", float] | None = None """SSE 快照共享缓存: (data, timestamp)。""" snapshot_cache_lock: asyncio.Lock = field(default_factory=asyncio.Lock) SNAPSHOT_CACHE_TTL: float = 1.0 # ---- 便捷方法 ---- async def increment_stat(self, key: str, delta: int = 1) -> None: """线程安全的统计计数器自增。""" async with self.stats_lock: self.stats[key] = self.stats.get(key, 0) + delta @property def uptime_seconds(self) -> int: """服务运行时长(秒)。""" st = self.stats.get("start_time", 0) return int(time.time() - st) if st else 0