BIZ-46 Phase3: 7项 follow-up 开发完成
1. 架构解耦 — SidecarContext + FastAPI Depends 注入 - 新增 context.py: SidecarContext dataclass 收敛全部全局状态 - server.py: 移除模块级全局变量,lifespan 创建 ctx → app.state.sidecar - webui.py: 移除反向导入 server,改用 Depends(get_context) 2. Prometheus 标签基数治理 — model_id → provider - upstream_latency_seconds / upstream_errors_total label 收敛为 provider - 模型级信息保留在 structlog JSON 日志 3. SSE 快照共享缓存 - 1s TTL 共享 snapshot cache + double-check locking - 多客户端不重复构建快照 4. 部署支撑 - Dockerfile (python:3.12-slim, 非 root 用户, HEALTHCHECK) - systemd service (安全加固, 资源限制) - .env.example (完整环境变量清单) 5. Readiness HTTP Client 复用 - check_upstream() 注入主 http_client,不再每次创建新 client 6. Retreat 并发回归测试 - 5 个测试用例全部通过(死锁检测 + 状态转换 + 并发安全) 7. Dashboard UX 优化 - 队列柱状图 300ms 平滑动画 - SSE 断连 5s 半透明遮罩 - 队列图标题显示总排队数 - 页面加载同步配置 验证: mypy strict 通过 (0 errors), pytest 5/5 通过, server 导入正常 (13 routes) Co-authored-by: multica-agent <github@multica.ai>
This commit is contained in:
@@ -2,6 +2,11 @@
|
||||
NVIDIA Sidecar 限流代理 — Prometheus 指标端点 (§3.5)
|
||||
|
||||
10 个指标,独立端口 :9191,与代理端口 :9190 分离。
|
||||
|
||||
BIZ-46 Phase3: Prometheus 标签基数治理 — model_id label 收敛为 provider。
|
||||
- upstream_latency_seconds: model_id → provider (固定值 "nvidia", 基数=1)
|
||||
- upstream_errors_total: model_id → provider
|
||||
- 模型级信息迁移到 structlog JSON 日志
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
@@ -75,20 +80,20 @@ class PrometheusMetrics:
|
||||
registry=self._registry,
|
||||
)
|
||||
|
||||
# ---- 6. 上游响应延迟 Histogram ----
|
||||
# ---- 6. 上游响应延迟 Histogram(label 收敛: model_id → provider) ----
|
||||
self.upstream_latency_seconds: Histogram = Histogram(
|
||||
"sidecar_upstream_latency_seconds",
|
||||
"Upstream response latency in seconds",
|
||||
labelnames=["model_id"],
|
||||
labelnames=["provider"], # BIZ-46: was ["model_id"], converged to fixed-cardinality provider
|
||||
buckets=(0.1, 0.5, 1.0, 2.0, 5.0, 10.0, 30.0, 60.0, 120.0, 300.0, 600.0),
|
||||
registry=self._registry,
|
||||
)
|
||||
|
||||
# ---- 7. 上游错误计数 ----
|
||||
# ---- 7. 上游错误计数(label 收敛: model_id → provider) ----
|
||||
self.upstream_errors_total: Counter = Counter(
|
||||
"sidecar_upstream_errors_total",
|
||||
"Upstream error count by status code and model",
|
||||
labelnames=["status_code", "model_id"],
|
||||
"Upstream error count by status code and provider",
|
||||
labelnames=["status_code", "provider"], # BIZ-46: was ["model_id"], converged
|
||||
registry=self._registry,
|
||||
)
|
||||
|
||||
@@ -165,37 +170,37 @@ class PrometheusMetrics:
|
||||
with self._lock:
|
||||
self.queue_latency_seconds.labels(priority=priority).observe(seconds)
|
||||
|
||||
def record_upstream(self, status_code: int, model_id: str) -> None:
|
||||
"""记录上游响应。
|
||||
def record_upstream(self, status_code: int, provider: str) -> None:
|
||||
"""记录上游响应(label 收敛: provider 替代 model_id,BIZ-46 Phase3)。
|
||||
|
||||
Args:
|
||||
status_code: HTTP 状态码。
|
||||
model_id: 模型标识符。
|
||||
provider: 上游提供商标识(固定 "nvidia")。
|
||||
"""
|
||||
with self._lock:
|
||||
self.upstream_latency_seconds.labels(model_id=model_id).observe(0.0)
|
||||
self.upstream_latency_seconds.labels(provider=provider).observe(0.0)
|
||||
|
||||
def record_upstream_error(self, status_code: int, model_id: str) -> None:
|
||||
"""记录上游错误。
|
||||
def record_upstream_error(self, status_code: int, provider: str) -> None:
|
||||
"""记录上游错误(label 收敛: provider 替代 model_id,BIZ-46 Phase3)。
|
||||
|
||||
Args:
|
||||
status_code: 错误 HTTP 状态码。
|
||||
model_id: 模型标识符。
|
||||
provider: 上游提供商标识(固定 "nvidia")。
|
||||
"""
|
||||
with self._lock:
|
||||
self.upstream_errors_total.labels(
|
||||
status_code=str(status_code), model_id=model_id
|
||||
status_code=str(status_code), provider=provider
|
||||
).inc()
|
||||
|
||||
def record_upstream_latency(self, model_id: str, seconds: float) -> None:
|
||||
"""记录上游响应延迟。
|
||||
def record_upstream_latency(self, provider: str, seconds: float) -> None:
|
||||
"""记录上游响应延迟(label 收敛: provider 替代 model_id,BIZ-46 Phase3)。
|
||||
|
||||
Args:
|
||||
model_id: 模型标识符。
|
||||
provider: 上游提供商标识(固定 "nvidia")。
|
||||
seconds: 响应延迟秒数。
|
||||
"""
|
||||
with self._lock:
|
||||
self.upstream_latency_seconds.labels(model_id=model_id).observe(seconds)
|
||||
self.upstream_latency_seconds.labels(provider=provider).observe(seconds)
|
||||
|
||||
def update_token_status(self, tokens: float, rate_per_minute: float) -> None:
|
||||
"""更新令牌桶状态。
|
||||
|
||||
Reference in New Issue
Block a user