feat: dashboard UX optimization + real-time backend stats + health probe fix + pool shuffle

- dashboard.html: major UX overhaul (+657/-308 lines) - server.py: /api/admin/backends now returns real-time RPM and model_count - pool_manager.py: random.shuffle backends for load distribution - config.py: health probe endpoint /v1/models → /models - docker-compose.yml: add SIDECAR_PRIMARY_WAIT_MAX_RETRIES=6 BIZ-52 post-review optimizations
2026-07-03 16:32:42 +08:00
parent 18dfb2901b
commit 8531a3b595
5 changed files with 659 additions and 310 deletions
@@ -479,9 +479,24 @@ async def dashboard_sse() -> StreamingResponse:

@app.get("/api/admin/backends")
 async def admin_list_backends(pool: Optional[str] = None) -> list[dict]:
-    """List all backends with masked keys (public read)."""
+    """List all backends with masked keys (public read) and real-time stats."""
    backends = list_backends(pool=pool, decrypt_key=True)
-    return [b.to_dict(mask_key=True) for b in backends]
+    result = []
+    for b in backends:
+        d = b.to_dict(mask_key=True)
+        # Inject real-time rate limiter stats
+        rl_status = rate_limiter.get_status(b.id) if rate_limiter else None
+        if rl_status:
+            utilization = rl_status.get("utilization", 0.0)
+            rpm_current = round(b.rpm_limit * utilization, 1)
+        else:
+            rpm_current = 0
+        d["stats"] = {
+            "rpm_current": rpm_current,
+            "model_count": len(b.model_mappings),
+        }
+        result.append(d)
+    return result


@app.get("/api/admin/backends/{backend_id}")