fix(sidecar-v2): incorporate review feedback - P0/P1 fixes

P0 fixes: - Admin API Bearer Token auth middleware - Encryption key missing -> CRITICAL log + sys.exit(1) - Prometheus metrics endpoint (:9191) - requirements.txt + Dockerfile + docker-compose.yml + systemd + nginx P1 fixes: - Dead code removed from _refresh_cooldowns() - Stream detection fixed (text/event-stream only) - Emergency passthrough (10% RPM retry before 503) - Active health probing for backends - SQLite daily backup loop with retention - Chart.js CDN fallback - Key rotation SOP document - JSON log format support - Deploy files: systemd unit + nginx config BIZ-52 review re-entry Co-authored-by: multica-agent <github@multica.ai>
2026-06-25 17:11:35 +08:00
parent 611ebd11a8
commit 4f415fb500
9 changed files with 630 additions and 93 deletions
@@ -0,0 +1,46 @@
 # Sidecar V2 — Multi-Pool Provider Proxy
 FROM python:3.12-slim AS builder
 WORKDIR /app
 # Install dependencies
 COPY requirements.txt .
 RUN pip install --no-cache-dir --upgrade pip && \
    pip install --no-cache-dir -r requirements.txt
 # Copy application code
 COPY config.py crypto.py main.py server.py proxy.py router.py \
     pool_manager.py cooldown_manager.py rate_limiter.py __init__.py \
     dashboard.html ./
 COPY storage/ ./storage/
 # Create data directory
 RUN mkdir -p /app/data /app/data/backups
 FROM python:3.12-slim
 WORKDIR /app
 # Copy built artifacts
 COPY --from=builder /usr/local/lib/python3.12/site-packages /usr/local/lib/python3.12/site-packages
 COPY --from=builder /app /app
 # Environment
 ENV SIDECAR_HOST=0.0.0.0
 ENV SIDECAR_PORT=9190
 ENV SIDECAR_METRICS_PORT=9191
 ENV SIDECAR_DB_PATH=/app/data/sidecar_v2.db
 ENV SIDECAR_BACKUP_DIR=/app/data/backups
 ENV SIDECAR_ENCRYPTION_KEY=
 ENV SIDECAR_ADMIN_TOKEN=
 ENV LOG_FORMAT=json
 ENV PYTHONUNBUFFERED=1
 EXPOSE 9190 9191
 VOLUME ["/app/data"]
 HEALTHCHECK --interval=30s --timeout=5s --retries=3 \
    CMD python3 -c "import urllib.request; urllib.request.urlopen('http://localhost:9190/health')" || exit 1
 ENTRYPOINT ["python3", "main.py"]
@@ -5,6 +5,8 @@
 <meta name="viewport" content="width=device-width, initial-scale=1.0">
 <title>Sidecar V2 — Provider Pool Dashboard</title>
 <script src="https://cdn.jsdelivr.net/npm/chart.js@4.4.0/dist/chart.umd.min.js"></script>
 <!-- CDN fallback: place chart.umd.min.js in static/ for offline deployments -->
 <script>if(typeof Chart==='undefined'){document.write('<script src="/static/chart.umd.min.js"><\/script>')}</script>
 <style>
  :root {
    --bg: #0f1117;
@@ -0,0 +1,90 @@
 # Sidecar V2 — API Key Encryption Rotation SOP
 > 版本: v1.0 | 维护者: 严维序 (opengineer)
 ## 背景
 Sidecar V2 使用 AES-256-GCM 加密存储所有 Provider 的 API Key。加密密钥通过 `SIDECAR_ENCRYPTION_KEY` 环境变量传入，启动时通过 `init_crypto()` 初始化。
 ## ⚠️ 关键警告
 **更换 SIDECAR_ENCRYPTION_KEY 会导致所有已存储的 API Key 永久不可恢复！**
 `crypto.py` 的 `try_decrypt_existing()` 在密钥变更时会静默返回 `None`，已有加密数据将无法解密。请在轮换密钥前执行以下步骤。
 ## 安全轮换步骤
 ### Step 1: 导出当前 API Key 明文（必须）
 ```bash
 # 使用旧密钥启动 sidecar，通过 admin API 导出
 curl -s -H "Authorization: Bearer <ADMIN_TOKEN>" \
  http://127.0.0.1:9190/api/admin/backends | \
  python3 -c "
 import json, sys
 data = json.load(sys.stdin)
 # 注意：api_key 是 masked 的，需要重新从安全渠道获取原始 key
 print(json.dumps(data, indent=2))
 "
 ```
 ### Step 2: 停止服务
 ```bash
 systemctl stop sidecar-v2
 # 或
 docker compose down
 ```
 ### Step 3: 备份数据库
 ```bash
 cp /app/data/sidecar_v2.db /app/data/backups/pre-rotation-$(date +%Y%m%d_%H%M%S).db
 ```
 ### Step 4: 更新密钥
 更新 `/etc/sidecar-v2/env` 或 docker `.env` 文件中的 `SIDECAR_ENCRYPTION_KEY`：
 ```
 SIDECAR_ENCRYPTION_KEY=<new_64_hex_char_key>
 ```
 生成新密钥：
 ```bash
 python3 -c "import secrets; print(secrets.token_hex(32))"
 ```
 ### Step 5: 清空加密 Key 并重新录入
 由于密钥变更后旧加密数据不可读，需要：
 1. 启动服务（此时所有旧 Provider 的 API Key 不可用）
 2. 通过 Admin API 重新录入所有 Provider 的 API Key：
 ```bash
 curl -s -X PUT -H "Authorization: Bearer <ADMIN_TOKEN>" \
  -H "Content-Type: application/json" \
  -d '{"api_key": "<NEW_PLAIN_KEY>"}' \
  http://127.0.0.1:9190/api/admin/backends/<backend_id>
 ```
 ### Step 6: 验证
 ```bash
 # 确认 Provider 状态为 healthy
 curl -s http://127.0.0.1:9190/api/admin/pools
 # 发送测试请求
 curl -s -X POST http://127.0.0.1:9190/v1/chat/completions \
  -H "Content-Type: application/json" \
  -d '{"model":"<model_name>","messages":[{"role":"user","content":"test"}],"max_tokens":5}'
 ```
 ## 应急预案
 如果在密钥轮换过程中出错：
 1. 恢复旧密钥环境变量
 2. 恢复旧数据库备份
 3. 重启服务
 旧 Key 会正常工作，因为未被覆盖的数据仍然用旧密钥加密。
@@ -0,0 +1,56 @@
 # Sidecar V2 — Nginx reverse proxy config (reference)
 # Place at /etc/nginx/sites-available/sidecar-v2.conf
 # SSL certs managed by certbot or manually
 upstream sidecar_v2_main {
    server 127.0.0.1:9190;
 }
 upstream sidecar_v2_metrics {
    server 127.0.0.1:9191;
 }
 server {
    listen 443 ssl http2;
    server_name sidecar.example.com;
    ssl_certificate     /etc/ssl/certs/sidecar.pem;
    ssl_certificate_key /etc/ssl/private/sidecar.key;
    # Dashboard + Admin API (main port)
    location / {
        proxy_pass http://sidecar_v2_main;
        proxy_http_version 1.1;
        proxy_set_header Host $host;
        proxy_set_header X-Real-IP $remote_addr;
        proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for;
        proxy_set_header X-Forwarded-Proto $scheme;
    }
    # SSE support for dashboard real-time data
    location /dashboard/sse {
        proxy_pass http://sidecar_v2_main;
        proxy_http_version 1.1;
        proxy_set_header Connection "";
        proxy_set_header Host $host;
        proxy_set_header X-Real-IP $remote_addr;
        proxy_buffering off;
        proxy_cache off;
        chunked_transfer_encoding off;
        proxy_read_timeout 86400s;
    }
    # Prometheus metrics
    location /metrics {
        proxy_pass http://sidecar_v2_metrics;
        proxy_http_version 1.1;
        proxy_set_header Host $host;
    }
    # Health check
    location /health {
        proxy_pass http://sidecar_v2_main;
        proxy_http_version 1.1;
        proxy_set_header Host $host;
    }
 }
@@ -0,0 +1,23 @@
 [Unit]
 Description=Sidecar V2 — Multi-Pool Provider Proxy
 After=network.target
 [Service]
 Type=simple
 User=openclaw
 Group=openclaw
 WorkingDirectory=/opt/sidecar-v2
 EnvironmentFile=/etc/sidecar-v2/env
 ExecStart=/opt/sidecar-v2/.venv/bin/python3 main.py
 Restart=always
 RestartSec=5
 # Security hardening
 NoNewPrivileges=yes
 ProtectSystem=strict
 ProtectHome=yes
 ReadWritePaths=/opt/sidecar-v2/data
 PrivateTmp=yes
 [Install]
 WantedBy=multi-user.target
@@ -0,0 +1,26 @@
 # Sidecar V2 — Multi-Pool Provider Proxy
 version: "3.9"
 services:
  sidecar-v2:
    build: .
    container_name: sidecar-v2
    restart: unless-stopped
    ports:
      - "9190:9190"  # Main proxy + admin API + dashboard
      - "9191:9191"  # Prometheus metrics
    environment:
      - SIDECAR_ENCRYPTION_KEY=${SIDECAR_ENCRYPTION_KEY}
      - SIDECAR_ADMIN_TOKEN=${SIDECAR_ADMIN_TOKEN:-change-me}
      - LOG_FORMAT=${LOG_FORMAT:-json}
      - SIDECAR_HOST=0.0.0.0
      - SIDECAR_PORT=9190
      - SIDECAR_METRICS_PORT=9191
      - SIDECAR_DB_PATH=/app/data/sidecar_v2.db
      - SIDECAR_BACKUP_DIR=/app/data/backups
    volumes:
      - sidecar-data:/app/data
 volumes:
  sidecar-data:
    driver: local
@@ -82,7 +82,9 @@ def build_response(resp: httpx.Response) -> Response:
        if k.lower() not in ("content-encoding", "transfer-encoding")
    }
-    if "text/event-stream" in content_type or "stream" in content_type:
+    is_sse = "text/event-stream" in content_type
    is_chunked = resp.headers.get("transfer-encoding", "").lower() == "chunked"
    if is_sse or (is_chunked and headers.get("content-type", "") != "application/octet-stream"):
        return StreamingResponse(
            content=resp.aiter_bytes(),
            status_code=resp.status_code,
@@ -176,7 +178,7 @@ async def handle_proxy_request(
    max_retries = config.max_pool_retries
    for attempt in range(max_retries):
        # Check and clear expired cooldowns before picking
-        _refresh_cooldowns(pool_manager)
+        _refresh_cooldowns()
        backend = router.pick_backend(canonical_model)
        if backend is None:
@@ -286,7 +288,7 @@ async def handle_proxy_request(
            )
            continue
-    # All backends exhausted — emergency rate-limited passthrough
+    # All pools exhausted — emergency rate-limited passthrough
    emergency_rpm = int(config.default_rpm_limit * config.emergency_rpm_fraction)
    if emergency_rpm < 1:
        emergency_rpm = 1
@@ -297,7 +299,60 @@ async def handle_proxy_request(
        emergency_rpm=emergency_rpm,
    )
-    # Emergency: just return a clear error telling OpenClaw to use its fallback
+    # Emergency: try to get a token from any fallback backend at reduced RPM
    emergency_retries = 3
    for attempt in range(emergency_retries):
        backends = pool_manager.get_any_healthy_backends()
        for backend in backends:
            if rate_limiter.consume(backend.id, emergency_rpm):
                try:
                    resp = await forward_to_backend(
                        backend=backend,
                        method=request.method,
                        path=path,
                        body=body_bytes if body_bytes else None,
                        headers=raw_headers,
                        stream=is_stream,
                    )
                    elapsed_ms = int((time.monotonic() - start_time) * 1000)
                    if resp.status_code == 429:
                        start_cooldown(backend.id, backend.consecutive_429_count + 1)
                        continue
                    # Success in emergency mode
                    try:
                        resp_json: dict[str, Any] = {}
                        if not is_stream and resp.content:
                            resp_json = json.loads(resp.content)
                    except Exception:
                        resp_json = {}
                    prompt_tokens, completion_tokens, total_tokens = extract_usage_from_response(
                        resp, resp_json, canonical_model
                    )
                    cost_em = calculate_cost(backend, canonical_model, prompt_tokens, completion_tokens)
                    record_usage(
                        backend_id=backend.id,
                        model=canonical_model,
                        prompt_tokens=prompt_tokens,
                        completion_tokens=completion_tokens,
                        cost=cost_em,
                        latency_ms=elapsed_ms,
                    )
                    logger.info(
                        "emergency_passthrough_success",
                        backend_id=backend.id,
                        model=canonical_model,
                        emergency_rpm=emergency_rpm,
                    )
                    return build_response(resp)
                except Exception:
                    continue
    # All emergency attempts failed — return 503 for OpenClaw fallback chain
    return build_error_response(
        503,
        "All provider pools exhausted. OpenClaw fallback chain should activate.",
@@ -305,15 +360,11 @@ async def handle_proxy_request(
    )
-def _refresh_cooldowns(pool_manager: PoolManager) -> None:
+def _refresh_cooldowns() -> None:
-    """Check and clear expired cooldowns for all active backends."""
+    """Check and clear expired cooldowns for backends currently in cooling state.
    for pool in ["primary", "fallback"]:
        backends = pool_manager.get_any_healthy_backends(pool=pool)
        for backend in backends:
            # Only check backends in non-healthy state
            pass
-    # Actually check all backends including cooling ones
+    Only queries backends with status='cooling' (the health_check_loop handles
    the periodic scanning; this is the on-demand refresh before proxy routing)."""
    from storage.backend_store import list_backends
    backends = list_backends(decrypt_key=False)
    for backend in backends:
@@ -0,0 +1,7 @@
 # Sidecar V2 — Multi-Pool Provider Proxy
 fastapi>=0.115.0,<1.0.0
 uvicorn[standard]>=0.30.0,<1.0.0
 httpx>=0.27.0,<1.0.0
 structlog>=24.0.0,<25.0.0
 cryptography>=42.0.0,<44.0.0
 prometheus_client>=0.20.0,<1.0.0
@@ -1,24 +1,27 @@
 """Sidecar V2 — FastAPI server with multi-pool routing, admin API, dashboard SSE."""
 import asyncio
 import json
 import os
 import sys
 import time
 from collections.abc import AsyncGenerator
 from contextlib import asynccontextmanager
-from typing import Any
+from typing import Any, Optional
 import structlog
 from fastapi import Depends, FastAPI, HTTPException, Request, Response
 from fastapi.responses import FileResponse, HTMLResponse, JSONResponse, StreamingResponse
-from fastapi.staticfiles import StaticFiles
+from fastapi.security import HTTPBearer, HTTPAuthorizationCredentials
 from config import config as app_config
-from crypto import init_crypto
+from crypto import init_crypto, is_initialized
 from pool_manager import PoolManager
 from rate_limiter import PerBackendRateLimiter
 from router import Router
 from proxy import handle_proxy_request
-from storage.db import init_db, create_tables, run_integrity_check
+from storage.db import init_db, create_tables, run_integrity_check, get_connection, _DB_PATH
 from storage.backend_store import (
    create_backend, get_backend, list_backends, update_backend,
    delete_backend, get_pool_stats,
@@ -28,7 +31,11 @@ from storage.cooldown_store import get_cooldown_history
 from storage.config_store import get_config, set_config, list_configs, delete_config
 from storage.models import Backend, ModelMapping
-import os
+
 # ──────────────────────────────────────────────────────────
 # Logging
 # ──────────────────────────────────────────────────────────
 _LOG_FORMAT = os.getenv("LOG_FORMAT", "console").lower()
 structlog.configure(
    processors=[
@@ -40,7 +47,11 @@ structlog.configure(
        structlog.processors.StackInfoRenderer(),
        structlog.processors.format_exc_info,
        structlog.processors.UnicodeDecoder(),
-        structlog.dev.ConsoleRenderer(),
+        (
            structlog.processors.JSONRenderer()
            if _LOG_FORMAT == "json"
            else structlog.dev.ConsoleRenderer()
        ),
    ],
    context_class=dict,
    logger_factory=structlog.stdlib.LoggerFactory(),
@@ -50,38 +61,109 @@ structlog.configure(
 logger: structlog.stdlib.BoundLogger = structlog.get_logger("sidecar_v2.server")
-# ──────────────────────────────────────
+# ──────────────────────────────────────────────────────────
 # Admin Auth middleware
 # ──────────────────────────────────────────────────────────
 _security = HTTPBearer(auto_error=False)
 def verify_admin_token(
    credentials: Optional[HTTPAuthorizationCredentials] = Depends(_security),
 ) -> bool:
    """Verify Bearer Token against config.admin_token.
    If admin_token is empty, write operations are rejected.
    READ operations are allowed without auth for dashboard use.
    """
    if not app_config.admin_token:
        # No token configured — allow read, reject write (checked per-endpoint)
        if credentials is None:
            return False
        return False
    if credentials is None:
        return False
    return credentials.credentials == app_config.admin_token
 def require_admin(credentials: Optional[HTTPAuthorizationCredentials] = Depends(_security)):
    """Require admin auth — raise 401 if not authorized."""
    if not app_config.admin_token:
        raise HTTPException(
            status_code=401,
            detail="Admin API not configured: set SIDECAR_ADMIN_TOKEN",
        )
    if credentials is None:
        raise HTTPException(
            status_code=401,
            detail="Missing Authorization header",
            headers={"WWW-Authenticate": "Bearer"},
        )
    if credentials.credentials != app_config.admin_token:
        raise HTTPException(
            status_code=401,
            detail="Invalid admin token",
        )
 # ──────────────────────────────────────────────────────────
 # Global runtime state
-# ──────────────────────────────────────
+# ──────────────────────────────────────────────────────────
-pool_manager: PoolManager | None = None
+pool_manager: Optional[PoolManager] = None
-rate_limiter: PerBackendRateLimiter | None = None
+rate_limiter: Optional[PerBackendRateLimiter] = None
-router: Router | None = None
+router: Optional[Router] = None
 start_time: float = 0.0
 # In-memory metrics counters
 _metrics_counters: dict[str, int] = {}
 _metrics_lock = asyncio.Lock()
 def _inc_metric(key: str, delta: int = 1) -> None:
    """Thread-safe counter increment (deferred via asyncio)."""
    _metrics_counters[key] = _metrics_counters.get(key, 0) + delta
 def get_pm() -> PoolManager:
    assert pool_manager is not None
    return pool_manager
 def get_rl() -> PerBackendRateLimiter:
    assert rate_limiter is not None
    return rate_limiter
 def get_router() -> Router:
    assert router is not None
    return router
-# ──────────────────────────────────────
+# ──────────────────────────────────────────────────────────
 # Lifespan
-# ──────────────────────────────────────
+# ──────────────────────────────────────────────────────────
@asynccontextmanager
 async def lifespan(app: FastAPI) -> AsyncGenerator[None, Any]:
    global pool_manager, rate_limiter, router, start_time
-    # Init crypto
+    # P0: Encryption key is mandatory — refuse to start without it
-    if app_config.encryption_key:
+    if not app_config.encryption_key:
        logger.critical(
            "missing_encryption_key",
            hint="Set SIDECAR_ENCRYPTION_KEY (64 hex chars). Refusing to start."
        )
        sys.exit(1)
    init_crypto(app_config.encryption_key)
    logger.info("crypto_initialized")
    # P0: Warn if admin_token not set
    if not app_config.admin_token:
        logger.warning(
            "admin_token_not_set",
            hint="Admin write endpoints disabled until SIDECAR_ADMIN_TOKEN is configured."
        )
    # Init DB
    init_db()
@@ -101,6 +183,7 @@ async def lifespan(app: FastAPI) -> AsyncGenerator[None, Any]:
    # Start background tasks
    health_task = asyncio.create_task(_health_check_loop())
    stats_task = asyncio.create_task(_stats_aggregation_loop())
    backup_task = asyncio.create_task(_backup_loop())
    logger.info(
        "sidecar_v2_started",
@@ -112,14 +195,10 @@ async def lifespan(app: FastAPI) -> AsyncGenerator[None, Any]:
    try:
        yield
    finally:
-        health_task.cancel()
+        for task in [health_task, stats_task, backup_task]:
-        stats_task.cancel()
+            task.cancel()
            try:
-            await health_task
+                await task
        except asyncio.CancelledError:
            pass
        try:
            await stats_task
            except asyncio.CancelledError:
                pass
        logger.info("sidecar_v2_stopped")
@@ -132,21 +211,86 @@ app = FastAPI(
 )
-# ──────────────────────────────────────
+# ──────────────────────────────────────────────────────────
 # Background tasks
-# ──────────────────────────────────────
+# ──────────────────────────────────────────────────────────
 async def _health_check_loop() -> None:
-    """Periodically check and clear expired cooldowns."""
+    """Periodic health checks: clear expired cooldowns + active probing of backends."""
    from cooldown_manager import check_and_clear_cooldown
-    from storage.backend_store import list_backends as lb
+    import httpx
    while True:
        try:
-            backends = lb(decrypt_key=False)
+            backends = list_backends(decrypt_key=True)
            for b in backends:
                # 1. Clear expired cooldowns
                if b.status == "cooling":
                    check_and_clear_cooldown(b.id)
                # 2. Active health probing for healthy/enabled backends
                if b.status == "healthy" and b.enabled:
                    try:
                        async with httpx.AsyncClient(timeout=httpx.Timeout(
                            app_config.health_check_timeout_seconds
                        )) as client:
                            probe_url = b.api_base_url.rstrip("/") + app_config.health_probe_endpoint
                            headers = {}
                            if b.api_key_plain:
                                headers["Authorization"] = f"Bearer {b.api_key_plain}"
                            start = time.monotonic()
                            resp = await client.get(probe_url, headers=headers)
                            elapsed_ms = int((time.monotonic() - start) * 1000)
                            # Update health state in DB
                            from storage.db import get_connection as _gc
                            with _gc() as conn:
                                conn.execute(
                                    """INSERT INTO backend_health
                                       (backend_id, state, last_latency_ms, last_status_code,
                                        last_check_at)
                                       VALUES (?, 'healthy', ?, ?, datetime('now'))
                                       ON CONFLICT(backend_id) DO UPDATE SET
                                       state = excluded.state,
                                       last_latency_ms = excluded.last_latency_ms,
                                       last_status_code = excluded.last_status_code,
                                       last_check_at = excluded.last_check_at""",
                                    (b.id, elapsed_ms, resp.status_code),
                                )
                                conn.commit()
                            logger.debug(
                                "health_probe_ok",
                                backend_id=b.id,
                                status=resp.status_code,
                                latency_ms=elapsed_ms,
                            )
                    except Exception as probe_err:
                        logger.warning(
                            "health_probe_failed",
                            backend_id=b.id,
                            error=str(probe_err),
                        )
                        # Mark as degraded
                        from storage.db import get_connection as _gc
                        with _gc() as conn:
                            conn.execute(
                                """INSERT INTO backend_health
                                   (backend_id, state, last_check_at)
                                   VALUES (?, 'degraded', datetime('now'))
                                   ON CONFLICT(backend_id) DO UPDATE SET
                                   state = 'degraded',
                                   last_check_at = excluded.last_check_at""",
                                (b.id,),
                            )
                            conn.execute(
                                """UPDATE backend_health SET
                                   consecutive_failures = consecutive_failures + 1
                                   WHERE backend_id = ?""",
                                (b.id,),
                            )
                            conn.commit()
        except Exception:
            logger.exception("health_check_error")
        await asyncio.sleep(app_config.health_check_interval_seconds)
@@ -163,9 +307,50 @@ async def _stats_aggregation_loop() -> None:
        await asyncio.sleep(app_config.stats_refresh_interval_seconds)
-# ──────────────────────────────────────
+async def _backup_loop() -> None:
    """Daily SQLite backup with retention."""
    import shutil
    while True:
        try:
            await asyncio.sleep(86400)  # 24 hours
            backup_dir = app_config.backup_dir
            if not backup_dir:
                continue
            os.makedirs(backup_dir, exist_ok=True)
            backup_name = f"sidecar_v2_{time.strftime('%Y%m%d_%H%M%S', time.gmtime())}.db"
            backup_path = os.path.join(backup_dir, backup_name)
            from storage.db import _DB_PATH as db_path
            import sqlite3
            source = sqlite3.connect(db_path)
            dest = sqlite3.connect(backup_path)
            source.backup(dest)
            dest.close()
            source.close()
            logger.info("db_backup_created", path=backup_path)
            # Retention: remove old backups
            retention_days = app_config.backup_retention_days
            cutoff = time.time() - retention_days * 86400
            for fname in os.listdir(backup_dir):
                if fname.startswith("sidecar_v2_") and fname.endswith(".db"):
                    fpath = os.path.join(backup_dir, fname)
                    if os.path.getmtime(fpath) < cutoff:
                        os.remove(fpath)
                        logger.info("db_backup_retired", path=fpath)
        except Exception:
            logger.exception("backup_error")
 # ──────────────────────────────────────────────────────────
 # Health / Metrics
-# ──────────────────────────────────────
+# ──────────────────────────────────────────────────────────
@app.get("/health")
 async def health() -> dict[str, Any]:
    return {
@@ -175,9 +360,54 @@ async def health() -> dict[str, Any]:
    }
-# ──────────────────────────────────────
+@app.get("/metrics")
 async def metrics() -> Response:
    """Prometheus-compatible metrics endpoint."""
    lines = []
    # Pool provider counts
    pool_status = pool_manager.get_pool_status()
    for pool_name, stats in pool_status.items():
        for key, val in stats.items():
            lines.append(
                f"sidecar_pool_providers{{pool=\"{pool_name}\",type=\"{key}\"}} {val}"
            )
    # Cooldown status
    all_backends = list_backends(decrypt_key=False)
    cooling_count = sum(1 for b in all_backends if b.status == "cooling")
    lines.append(f"sidecar_cooldown_active {cooling_count}")
    # Emergency count
    emergency_count = _metrics_counters.get("emergency_count", 0)
    lines.append(f"sidecar_emergency_count {emergency_count}")
    # DB sizes
    from storage.db import get_db_sizes
    sizes = get_db_sizes()
    lines.append(f"sidecar_db_size_bytes {sizes.get('db_bytes', 0)}")
    lines.append(f"sidecar_wal_size_bytes {sizes.get('wal_bytes', 0)}")
    # Total stats
    total = get_total_stats()
    lines.append(f"sidecar_requests_total {total.get('total_requests', 0) or 0}")
    lines.append(f"sidecar_errors_total {total.get('total_errors', 0) or 0}")
    lines.append(f"sidecar_tokens_total {total.get('total_tokens', 0) or 0}")
    cost = total.get('total_cost', 0) or 0.0
    lines.append(f"sidecar_cost_total {cost}")
    # Uptime
    lines.append(f"sidecar_uptime_seconds {int(time.time() - start_time)}")
    return Response(
        content="\n".join(lines) + "\n",
        media_type="text/plain; charset=utf-8",
    )
 # ──────────────────────────────────────────────────────────
 # Dashboard SSE
-# ──────────────────────────────────────
+# ──────────────────────────────────────────────────────────
@app.get("/dashboard/sse")
 async def dashboard_sse() -> StreamingResponse:
    """SSE endpoint for real-time dashboard data."""
@@ -214,7 +444,7 @@ async def dashboard_sse() -> StreamingResponse:
                    "uptime_seconds": int(time.time() - start_time),
                    "timestamp": time.time(),
                }
-                yield f"data: {__import__('json').dumps(snapshot)}\n\n"
+                yield f"data: {json.dumps(snapshot)}\n\n"
            except Exception:
                logger.exception("sse_error")
@@ -231,20 +461,20 @@ async def dashboard_sse() -> StreamingResponse:
    )
-# ──────────────────────────────────────
+# ──────────────────────────────────────────────────────────
-# Admin: Backend CRUD
+# Admin: Backend CRUD  (READ: public, WRITE: auth required)
-# ──────────────────────────────────────
+# ──────────────────────────────────────────────────────────
@app.get("/api/admin/backends")
-async def admin_list_backends(pool: str | None = None) -> list[dict]:
+async def admin_list_backends(pool: Optional[str] = None) -> list[dict]:
-    """List all backends with masked keys."""
+    """List all backends with masked keys (public read)."""
    backends = list_backends(pool=pool, decrypt_key=True)
    return [b.to_dict(mask_key=True) for b in backends]
@app.get("/api/admin/backends/{backend_id}")
 async def admin_get_backend(backend_id: str) -> dict:
-    """Get a single backend (key masked)."""
+    """Get a single backend (public read, key masked)."""
    b = get_backend(backend_id, decrypt_key=True)
    if b is None:
        raise HTTPException(404, "Backend not found")
@@ -252,8 +482,11 @@ async def admin_get_backend(backend_id: str) -> dict:
@app.post("/api/admin/backends")
-async def admin_create_backend(body: dict[str, Any]) -> dict:
+async def admin_create_backend(
-    """Create a new backend."""
+    body: dict[str, Any],
    _auth=Depends(require_admin),
 ) -> dict:
    """Create a new backend (auth required)."""
    required = ["name", "api_base_url", "api_key"]
    for field in required:
        if field not in body:
@@ -283,18 +516,20 @@ async def admin_create_backend(body: dict[str, Any]) -> dict:
@app.put("/api/admin/backends/{backend_id}")
-async def admin_update_backend(backend_id: str, body: dict[str, Any]) -> dict:
+async def admin_update_backend(
-    """Update a backend."""
+    backend_id: str,
    body: dict[str, Any],
    _auth=Depends(require_admin),
 ) -> dict:
    """Update a backend (auth required)."""
    updates = dict(body)
    # Handle model_mappings
    if "model_mappings" in updates:
        raw = updates["model_mappings"]
        updates["model_mappings"] = {
            k: ModelMapping.from_dict(v) for k, v in raw.items()
        }
    # Handle api_key
    if "api_key" in updates:
        updates["api_key_plain"] = updates.pop("api_key")
@@ -305,40 +540,40 @@ async def admin_update_backend(backend_id: str, body: dict[str, Any]) -> dict:
@app.delete("/api/admin/backends/{backend_id}")
-async def admin_delete_backend(backend_id: str) -> dict:
+async def admin_delete_backend(
-    """Delete a backend."""
+    backend_id: str,
    _auth=Depends(require_admin),
 ) -> dict:
    """Delete a backend (auth required)."""
    ok = delete_backend(backend_id)
    if not ok:
        raise HTTPException(404, "Backend not found")
    return {"status": "deleted", "id": backend_id}
-# ──────────────────────────────────────
+# ──────────────────────────────────────────────────────────
-# Admin: Pool Status
+# Admin: Pool Status (public read)
-# ──────────────────────────────────────
+# ──────────────────────────────────────────────────────────
@app.get("/api/admin/pools")
 async def admin_pool_status() -> dict:
    """Get pool summary."""
    return pool_manager.get_pool_status()
-# ──────────────────────────────────────
+# ──────────────────────────────────────────────────────────
-# Admin: Usage / Stats
+# Admin: Usage / Stats (public read)
-# ──────────────────────────────────────
+# ──────────────────────────────────────────────────────────
@app.get("/api/admin/stats/total")
 async def admin_total_stats() -> dict:
    """Get aggregate usage stats."""
    return get_total_stats()
@app.get("/api/admin/stats/hourly")
 async def admin_hourly_usage(
-    backend_id: str | None = None,
+    backend_id: Optional[str] = None,
    hours: int = 168,
 ) -> list[dict]:
    """Get hourly usage data."""
    since = None
    if hours > 0:
        since = time.strftime(
@@ -350,32 +585,28 @@ async def admin_hourly_usage(
@app.get("/api/admin/stats/daily")
 async def admin_daily_stats(days: int = 30) -> list[dict]:
    """Get daily aggregated stats."""
    return get_daily_stats(days=days)
@app.get("/api/admin/stats/cooldown")
 async def admin_cooldown_history(
-    backend_id: str | None = None,
+    backend_id: Optional[str] = None,
    limit: int = 50,
 ) -> list[dict]:
    """Get cooldown event history."""
    return get_cooldown_history(backend_id=backend_id, limit=limit)
-# ──────────────────────────────────────
+# ──────────────────────────────────────────────────────────
-# Admin: System Config
+# Admin: System Config (read public, write auth required)
-# ──────────────────────────────────────
+# ──────────────────────────────────────────────────────────
@app.get("/api/admin/config")
 async def admin_get_all_config() -> list[dict]:
    """List all system config entries."""
    return list_configs()
@app.get("/api/admin/config/{key}")
 async def admin_get_config(key: str) -> dict:
    """Get a single config value."""
    value = get_config(key)
    if value is None:
        raise HTTPException(404, "Config not found")
@@ -383,8 +614,11 @@ async def admin_get_config(key: str) -> dict:
@app.put("/api/admin/config/{key}")
-async def admin_set_config(key: str, body: dict[str, Any]) -> dict:
+async def admin_set_config(
-    """Set a config value."""
+    key: str,
    body: dict[str, Any],
    _auth=Depends(require_admin),
 ) -> dict:
    value = str(body.get("value", ""))
    description = str(body.get("description", ""))
    set_config(key, value, description)
@@ -392,21 +626,22 @@ async def admin_set_config(key: str, body: dict[str, Any]) -> dict:
@app.delete("/api/admin/config/{key}")
-async def admin_delete_config(key: str) -> dict:
+async def admin_delete_config(
-    """Delete a config entry."""
+    key: str,
    _auth=Depends(require_admin),
 ) -> dict:
    ok = delete_config(key)
    if not ok:
        raise HTTPException(404, "Config not found")
    return {"status": "deleted", "key": key}
-# ──────────────────────────────────────
+# ──────────────────────────────────────────────────────────
-# Dashboard HTML
+# Dashboard HTML (public, but respects admin_token for writes in JS)
-# ──────────────────────────────────────
+# ──────────────────────────────────────────────────────────
@app.get("/dashboard")
 async def dashboard_html() -> HTMLResponse:
    """Serve the dashboard WebUI."""
    dashboard_path = os.path.join(
        os.path.dirname(__file__), "dashboard.html"
    )
@@ -416,12 +651,13 @@ async def dashboard_html() -> HTMLResponse:
    return HTMLResponse("<h1>Dashboard not found</h1>", status_code=404)
-# ──────────────────────────────────────
+# ──────────────────────────────────────────────────────────
 # Proxy Endpoints
-# ──────────────────────────────────────
+# ──────────────────────────────────────────────────────────
@app.post("/v1/chat/completions")
 async def chat_completions(request: Request) -> Response:
    _inc_metric("proxy_requests_total")
    return await handle_proxy_request(
        pool_manager, rate_limiter, router, request, "/v1/chat/completions"
    )
@@ -443,7 +679,7 @@ async def embeddings(request: Request) -> Response:
@app.get("/v1/models")
@app.get("/v1/models/{model_id:path}")
-async def list_models(request: Request, model_id: str | None = None) -> Response:
+async def list_models(request: Request, model_id: Optional[str] = None) -> Response:
    path = f"/v1/models/{model_id}" if model_id else "/v1/models"
    return await handle_proxy_request(
        pool_manager, rate_limiter, router, request, path
@@ -458,9 +694,9 @@ async def catch_all(request: Request, path: str) -> Response:
    )
-# ──────────────────────────────────────
+# ──────────────────────────────────────────────────────────
 # Main
-# ──────────────────────────────────────
+# ──────────────────────────────────────────────────────────
 def main() -> None:
    import uvicorn