BIZ-46 Phase3: 7项 follow-up 开发完成
1. 架构解耦 — SidecarContext + FastAPI Depends 注入 - 新增 context.py: SidecarContext dataclass 收敛全部全局状态 - server.py: 移除模块级全局变量,lifespan 创建 ctx → app.state.sidecar - webui.py: 移除反向导入 server,改用 Depends(get_context) 2. Prometheus 标签基数治理 — model_id → provider - upstream_latency_seconds / upstream_errors_total label 收敛为 provider - 模型级信息保留在 structlog JSON 日志 3. SSE 快照共享缓存 - 1s TTL 共享 snapshot cache + double-check locking - 多客户端不重复构建快照 4. 部署支撑 - Dockerfile (python:3.12-slim, 非 root 用户, HEALTHCHECK) - systemd service (安全加固, 资源限制) - .env.example (完整环境变量清单) 5. Readiness HTTP Client 复用 - check_upstream() 注入主 http_client,不再每次创建新 client 6. Retreat 并发回归测试 - 5 个测试用例全部通过(死锁检测 + 状态转换 + 并发安全) 7. Dashboard UX 优化 - 队列柱状图 300ms 平滑动画 - SSE 断连 5s 半透明遮罩 - 队列图标题显示总排队数 - 页面加载同步配置 验证: mypy strict 通过 (0 errors), pytest 5/5 通过, server 导入正常 (13 routes) Co-authored-by: multica-agent <github@multica.ai>
This commit is contained in:
@@ -4,11 +4,13 @@ NVIDIA Sidecar 限流代理 — 健康检查端点 (§3.6)
|
||||
提供 Kubernetes / systemd 兼容的健康检查:
|
||||
GET /health — 存活检查
|
||||
GET /health/ready — 就绪检查(含上游连通性)
|
||||
|
||||
BIZ-46 Phase3: Readiness HTTP Client 复用 — 注入主 http_client,
|
||||
不再每次检查创建新 client,降低 K8s/systemd 高频探测的连接开销。
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import asyncio
|
||||
import time
|
||||
from dataclasses import dataclass
|
||||
from typing import Any
|
||||
@@ -38,14 +40,16 @@ class HealthService:
|
||||
async def check_upstream(
|
||||
self,
|
||||
upstream_url: str,
|
||||
http_client: httpx.AsyncClient,
|
||||
timeout: float = 5.0,
|
||||
api_key: str = "",
|
||||
) -> bool:
|
||||
"""检查上游连通性。
|
||||
"""检查上游连通性(复用注入的 http_client,BIZ-46 Phase3)。
|
||||
|
||||
Args:
|
||||
upstream_url: NVIDIA API base URL。
|
||||
timeout: 超时秒数。
|
||||
http_client: 复用的 httpx.AsyncClient(来自 ctx)。
|
||||
timeout: 超时秒数(per-request override)。
|
||||
api_key: 可选的 API Key 用于认证。
|
||||
|
||||
Returns:
|
||||
@@ -56,12 +60,12 @@ class HealthService:
|
||||
if api_key:
|
||||
headers["authorization"] = f"Bearer {api_key}"
|
||||
|
||||
async with httpx.AsyncClient(timeout=timeout) as client:
|
||||
resp = await client.get(
|
||||
f"{upstream_url.rstrip('/')}/v1/models",
|
||||
headers=headers,
|
||||
)
|
||||
return resp.status_code < 500
|
||||
resp = await http_client.get(
|
||||
f"{upstream_url.rstrip('/')}/v1/models",
|
||||
headers=headers,
|
||||
timeout=timeout,
|
||||
)
|
||||
return resp.status_code < 500
|
||||
except Exception:
|
||||
return False
|
||||
|
||||
@@ -125,6 +129,7 @@ class HealthService:
|
||||
queue_max_size: int = 500,
|
||||
available_tokens: float = 0.0,
|
||||
bucket_capacity: int = 40,
|
||||
http_client: httpx.AsyncClient | None = None,
|
||||
) -> dict[str, Any]:
|
||||
"""就绪检查响应。
|
||||
|
||||
@@ -135,11 +140,22 @@ class HealthService:
|
||||
queue_max_size: 队列最大容量。
|
||||
available_tokens: 当前令牌数。
|
||||
bucket_capacity: 桶容量。
|
||||
http_client: 复用的 httpx.AsyncClient(BIZ-46 Phase3)。
|
||||
为 None 时回退到每次创建新 client(兼容旧调用)。
|
||||
|
||||
Returns:
|
||||
readiness JSON payload。
|
||||
"""
|
||||
upstream_ok = await self.check_upstream(upstream_url, api_key=upstream_api_key)
|
||||
if http_client is not None:
|
||||
upstream_ok = await self.check_upstream(
|
||||
upstream_url, http_client=http_client, api_key=upstream_api_key,
|
||||
)
|
||||
else:
|
||||
# 向后兼容:无 http_client 时沿用旧行为
|
||||
upstream_ok = await self.check_upstream_standalone(
|
||||
upstream_url, api_key=upstream_api_key,
|
||||
)
|
||||
|
||||
queue_ok = self.check_queue_healthy(queue_current_size, queue_max_size)
|
||||
token_ok = self.check_token_bucket_healthy(available_tokens, bucket_capacity)
|
||||
all_ready = upstream_ok and queue_ok and token_ok
|
||||
@@ -149,4 +165,34 @@ class HealthService:
|
||||
"upstream_reachable": upstream_ok,
|
||||
"queue_healthy": queue_ok,
|
||||
"token_bucket_healthy": token_ok,
|
||||
}
|
||||
}
|
||||
|
||||
async def check_upstream_standalone(
|
||||
self,
|
||||
upstream_url: str,
|
||||
timeout: float = 5.0,
|
||||
api_key: str = "",
|
||||
) -> bool:
|
||||
"""独立检查上游连通性(向后兼容,每次创建新 client)。
|
||||
|
||||
Args:
|
||||
upstream_url: NVIDIA API base URL。
|
||||
timeout: 超时秒数。
|
||||
api_key: 可选的 API Key。
|
||||
|
||||
Returns:
|
||||
True 上游可达。
|
||||
"""
|
||||
try:
|
||||
headers: dict[str, str] = {}
|
||||
if api_key:
|
||||
headers["authorization"] = f"Bearer {api_key}"
|
||||
|
||||
async with httpx.AsyncClient(timeout=timeout) as client:
|
||||
resp = await client.get(
|
||||
f"{upstream_url.rstrip('/')}/v1/models",
|
||||
headers=headers,
|
||||
)
|
||||
return resp.status_code < 500
|
||||
except Exception:
|
||||
return False
|
||||
Reference in New Issue
Block a user