feat: dashboard UX optimization + real-time backend stats + health probe fix + pool shuffle
- dashboard.html: major UX overhaul (+657/-308 lines) - server.py: /api/admin/backends now returns real-time RPM and model_count - pool_manager.py: random.shuffle backends for load distribution - config.py: health probe endpoint /v1/models → /models - docker-compose.yml: add SIDECAR_PRIMARY_WAIT_MAX_RETRIES=6 BIZ-52 post-review optimizations
This commit is contained in:
@@ -39,7 +39,7 @@ class Config:
|
|||||||
# Health check
|
# Health check
|
||||||
health_check_interval_seconds: int = 60
|
health_check_interval_seconds: int = 60
|
||||||
health_check_timeout_seconds: int = 10
|
health_check_timeout_seconds: int = 10
|
||||||
health_probe_endpoint: str = "/v1/models"
|
health_probe_endpoint: str = "/models"
|
||||||
|
|
||||||
# Admin auth
|
# Admin auth
|
||||||
admin_token: str = ""
|
admin_token: str = ""
|
||||||
|
|||||||
+592
-262
File diff suppressed because it is too large
Load Diff
@@ -15,6 +15,7 @@ services:
|
|||||||
- SIDECAR_METRICS_PORT=9191
|
- SIDECAR_METRICS_PORT=9191
|
||||||
- SIDECAR_DB_PATH=/app/data/sidecar_v2.db
|
- SIDECAR_DB_PATH=/app/data/sidecar_v2.db
|
||||||
- SIDECAR_BACKUP_DIR=/app/data/backups
|
- SIDECAR_BACKUP_DIR=/app/data/backups
|
||||||
|
- SIDECAR_PRIMARY_WAIT_MAX_RETRIES=6
|
||||||
volumes:
|
volumes:
|
||||||
- sidecar-data:/app/data
|
- sidecar-data:/app/data
|
||||||
|
|
||||||
|
|||||||
@@ -1,5 +1,6 @@
|
|||||||
"""Provider pool management: primary / fallback pool routing."""
|
"""Provider pool management: primary / fallback pool routing."""
|
||||||
|
|
||||||
|
import random
|
||||||
import structlog
|
import structlog
|
||||||
from typing import Optional
|
from typing import Optional
|
||||||
|
|
||||||
@@ -42,6 +43,7 @@ class PoolManager:
|
|||||||
if pool:
|
if pool:
|
||||||
break
|
break
|
||||||
|
|
||||||
|
random.shuffle(backends)
|
||||||
return backends
|
return backends
|
||||||
|
|
||||||
def get_any_healthy_backends(self, pool: Optional[str] = None) -> list[Backend]:
|
def get_any_healthy_backends(self, pool: Optional[str] = None) -> list[Backend]:
|
||||||
@@ -55,6 +57,7 @@ class PoolManager:
|
|||||||
backends.append(b)
|
backends.append(b)
|
||||||
if pool:
|
if pool:
|
||||||
break
|
break
|
||||||
|
random.shuffle(backends)
|
||||||
return backends
|
return backends
|
||||||
|
|
||||||
def get_pool_status(self) -> dict:
|
def get_pool_status(self) -> dict:
|
||||||
|
|||||||
@@ -479,9 +479,24 @@ async def dashboard_sse() -> StreamingResponse:
|
|||||||
|
|
||||||
@app.get("/api/admin/backends")
|
@app.get("/api/admin/backends")
|
||||||
async def admin_list_backends(pool: Optional[str] = None) -> list[dict]:
|
async def admin_list_backends(pool: Optional[str] = None) -> list[dict]:
|
||||||
"""List all backends with masked keys (public read)."""
|
"""List all backends with masked keys (public read) and real-time stats."""
|
||||||
backends = list_backends(pool=pool, decrypt_key=True)
|
backends = list_backends(pool=pool, decrypt_key=True)
|
||||||
return [b.to_dict(mask_key=True) for b in backends]
|
result = []
|
||||||
|
for b in backends:
|
||||||
|
d = b.to_dict(mask_key=True)
|
||||||
|
# Inject real-time rate limiter stats
|
||||||
|
rl_status = rate_limiter.get_status(b.id) if rate_limiter else None
|
||||||
|
if rl_status:
|
||||||
|
utilization = rl_status.get("utilization", 0.0)
|
||||||
|
rpm_current = round(b.rpm_limit * utilization, 1)
|
||||||
|
else:
|
||||||
|
rpm_current = 0
|
||||||
|
d["stats"] = {
|
||||||
|
"rpm_current": rpm_current,
|
||||||
|
"model_count": len(b.model_mappings),
|
||||||
|
}
|
||||||
|
result.append(d)
|
||||||
|
return result
|
||||||
|
|
||||||
|
|
||||||
@app.get("/api/admin/backends/{backend_id}")
|
@app.get("/api/admin/backends/{backend_id}")
|
||||||
|
|||||||
Reference in New Issue
Block a user