feat: dashboard UX optimization + real-time backend stats + health probe fix + pool shuffle

- dashboard.html: major UX overhaul (+657/-308 lines)
- server.py: /api/admin/backends now returns real-time RPM and model_count
- pool_manager.py: random.shuffle backends for load distribution
- config.py: health probe endpoint /v1/models → /models
- docker-compose.yml: add SIDECAR_PRIMARY_WAIT_MAX_RETRIES=6

BIZ-52 post-review optimizations
This commit is contained in:
2026-07-03 16:32:42 +08:00
parent 18dfb2901b
commit 8531a3b595
5 changed files with 659 additions and 310 deletions
+1 -1
View File
@@ -39,7 +39,7 @@ class Config:
# Health check # Health check
health_check_interval_seconds: int = 60 health_check_interval_seconds: int = 60
health_check_timeout_seconds: int = 10 health_check_timeout_seconds: int = 10
health_probe_endpoint: str = "/v1/models" health_probe_endpoint: str = "/models"
# Admin auth # Admin auth
admin_token: str = "" admin_token: str = ""
+637 -307
View File
File diff suppressed because it is too large Load Diff
+1
View File
@@ -15,6 +15,7 @@ services:
- SIDECAR_METRICS_PORT=9191 - SIDECAR_METRICS_PORT=9191
- SIDECAR_DB_PATH=/app/data/sidecar_v2.db - SIDECAR_DB_PATH=/app/data/sidecar_v2.db
- SIDECAR_BACKUP_DIR=/app/data/backups - SIDECAR_BACKUP_DIR=/app/data/backups
- SIDECAR_PRIMARY_WAIT_MAX_RETRIES=6
volumes: volumes:
- sidecar-data:/app/data - sidecar-data:/app/data
+3
View File
@@ -1,5 +1,6 @@
"""Provider pool management: primary / fallback pool routing.""" """Provider pool management: primary / fallback pool routing."""
import random
import structlog import structlog
from typing import Optional from typing import Optional
@@ -42,6 +43,7 @@ class PoolManager:
if pool: if pool:
break break
random.shuffle(backends)
return backends return backends
def get_any_healthy_backends(self, pool: Optional[str] = None) -> list[Backend]: def get_any_healthy_backends(self, pool: Optional[str] = None) -> list[Backend]:
@@ -55,6 +57,7 @@ class PoolManager:
backends.append(b) backends.append(b)
if pool: if pool:
break break
random.shuffle(backends)
return backends return backends
def get_pool_status(self) -> dict: def get_pool_status(self) -> dict:
+17 -2
View File
@@ -479,9 +479,24 @@ async def dashboard_sse() -> StreamingResponse:
@app.get("/api/admin/backends") @app.get("/api/admin/backends")
async def admin_list_backends(pool: Optional[str] = None) -> list[dict]: async def admin_list_backends(pool: Optional[str] = None) -> list[dict]:
"""List all backends with masked keys (public read).""" """List all backends with masked keys (public read) and real-time stats."""
backends = list_backends(pool=pool, decrypt_key=True) backends = list_backends(pool=pool, decrypt_key=True)
return [b.to_dict(mask_key=True) for b in backends] result = []
for b in backends:
d = b.to_dict(mask_key=True)
# Inject real-time rate limiter stats
rl_status = rate_limiter.get_status(b.id) if rate_limiter else None
if rl_status:
utilization = rl_status.get("utilization", 0.0)
rpm_current = round(b.rpm_limit * utilization, 1)
else:
rpm_current = 0
d["stats"] = {
"rpm_current": rpm_current,
"model_count": len(b.model_mappings),
}
result.append(d)
return result
@app.get("/api/admin/backends/{backend_id}") @app.get("/api/admin/backends/{backend_id}")