fix(sidecar-v2): incorporate review feedback - P0/P1 fixes

P0 fixes:
- Admin API Bearer Token auth middleware
- Encryption key missing -> CRITICAL log + sys.exit(1)
- Prometheus metrics endpoint (:9191)
- requirements.txt + Dockerfile + docker-compose.yml + systemd + nginx

P1 fixes:
- Dead code removed from _refresh_cooldowns()
- Stream detection fixed (text/event-stream only)
- Emergency passthrough (10% RPM retry before 503)
- Active health probing for backends
- SQLite daily backup loop with retention
- Chart.js CDN fallback
- Key rotation SOP document
- JSON log format support
- Deploy files: systemd unit + nginx config

BIZ-52 review re-entry

Co-authored-by: multica-agent <github@multica.ai>
This commit is contained in:
2026-06-25 17:11:35 +08:00
parent 611ebd11a8
commit 4f415fb500
9 changed files with 630 additions and 93 deletions
+317 -81
View File
@@ -1,24 +1,27 @@
"""Sidecar V2 — FastAPI server with multi-pool routing, admin API, dashboard SSE."""
import asyncio
import json
import os
import sys
import time
from collections.abc import AsyncGenerator
from contextlib import asynccontextmanager
from typing import Any
from typing import Any, Optional
import structlog
from fastapi import Depends, FastAPI, HTTPException, Request, Response
from fastapi.responses import FileResponse, HTMLResponse, JSONResponse, StreamingResponse
from fastapi.staticfiles import StaticFiles
from fastapi.security import HTTPBearer, HTTPAuthorizationCredentials
from config import config as app_config
from crypto import init_crypto
from crypto import init_crypto, is_initialized
from pool_manager import PoolManager
from rate_limiter import PerBackendRateLimiter
from router import Router
from proxy import handle_proxy_request
from storage.db import init_db, create_tables, run_integrity_check
from storage.db import init_db, create_tables, run_integrity_check, get_connection, _DB_PATH
from storage.backend_store import (
create_backend, get_backend, list_backends, update_backend,
delete_backend, get_pool_stats,
@@ -28,7 +31,11 @@ from storage.cooldown_store import get_cooldown_history
from storage.config_store import get_config, set_config, list_configs, delete_config
from storage.models import Backend, ModelMapping
import os
# ──────────────────────────────────────────────────────────
# Logging
# ──────────────────────────────────────────────────────────
_LOG_FORMAT = os.getenv("LOG_FORMAT", "console").lower()
structlog.configure(
processors=[
@@ -40,7 +47,11 @@ structlog.configure(
structlog.processors.StackInfoRenderer(),
structlog.processors.format_exc_info,
structlog.processors.UnicodeDecoder(),
structlog.dev.ConsoleRenderer(),
(
structlog.processors.JSONRenderer()
if _LOG_FORMAT == "json"
else structlog.dev.ConsoleRenderer()
),
],
context_class=dict,
logger_factory=structlog.stdlib.LoggerFactory(),
@@ -50,38 +61,109 @@ structlog.configure(
logger: structlog.stdlib.BoundLogger = structlog.get_logger("sidecar_v2.server")
# ──────────────────────────────────────
# ──────────────────────────────────────────────────────────
# Admin Auth middleware
# ──────────────────────────────────────────────────────────
_security = HTTPBearer(auto_error=False)
def verify_admin_token(
credentials: Optional[HTTPAuthorizationCredentials] = Depends(_security),
) -> bool:
"""Verify Bearer Token against config.admin_token.
If admin_token is empty, write operations are rejected.
READ operations are allowed without auth for dashboard use.
"""
if not app_config.admin_token:
# No token configured — allow read, reject write (checked per-endpoint)
if credentials is None:
return False
return False
if credentials is None:
return False
return credentials.credentials == app_config.admin_token
def require_admin(credentials: Optional[HTTPAuthorizationCredentials] = Depends(_security)):
"""Require admin auth — raise 401 if not authorized."""
if not app_config.admin_token:
raise HTTPException(
status_code=401,
detail="Admin API not configured: set SIDECAR_ADMIN_TOKEN",
)
if credentials is None:
raise HTTPException(
status_code=401,
detail="Missing Authorization header",
headers={"WWW-Authenticate": "Bearer"},
)
if credentials.credentials != app_config.admin_token:
raise HTTPException(
status_code=401,
detail="Invalid admin token",
)
# ──────────────────────────────────────────────────────────
# Global runtime state
# ──────────────────────────────────────
pool_manager: PoolManager | None = None
rate_limiter: PerBackendRateLimiter | None = None
router: Router | None = None
# ──────────────────────────────────────────────────────────
pool_manager: Optional[PoolManager] = None
rate_limiter: Optional[PerBackendRateLimiter] = None
router: Optional[Router] = None
start_time: float = 0.0
# In-memory metrics counters
_metrics_counters: dict[str, int] = {}
_metrics_lock = asyncio.Lock()
def _inc_metric(key: str, delta: int = 1) -> None:
"""Thread-safe counter increment (deferred via asyncio)."""
_metrics_counters[key] = _metrics_counters.get(key, 0) + delta
def get_pm() -> PoolManager:
assert pool_manager is not None
return pool_manager
def get_rl() -> PerBackendRateLimiter:
assert rate_limiter is not None
return rate_limiter
def get_router() -> Router:
assert router is not None
return router
# ──────────────────────────────────────
# ──────────────────────────────────────────────────────────
# Lifespan
# ──────────────────────────────────────
# ──────────────────────────────────────────────────────────
@asynccontextmanager
async def lifespan(app: FastAPI) -> AsyncGenerator[None, Any]:
global pool_manager, rate_limiter, router, start_time
# Init crypto
if app_config.encryption_key:
init_crypto(app_config.encryption_key)
# P0: Encryption key is mandatory — refuse to start without it
if not app_config.encryption_key:
logger.critical(
"missing_encryption_key",
hint="Set SIDECAR_ENCRYPTION_KEY (64 hex chars). Refusing to start."
)
sys.exit(1)
init_crypto(app_config.encryption_key)
logger.info("crypto_initialized")
# P0: Warn if admin_token not set
if not app_config.admin_token:
logger.warning(
"admin_token_not_set",
hint="Admin write endpoints disabled until SIDECAR_ADMIN_TOKEN is configured."
)
# Init DB
init_db()
@@ -101,6 +183,7 @@ async def lifespan(app: FastAPI) -> AsyncGenerator[None, Any]:
# Start background tasks
health_task = asyncio.create_task(_health_check_loop())
stats_task = asyncio.create_task(_stats_aggregation_loop())
backup_task = asyncio.create_task(_backup_loop())
logger.info(
"sidecar_v2_started",
@@ -112,16 +195,12 @@ async def lifespan(app: FastAPI) -> AsyncGenerator[None, Any]:
try:
yield
finally:
health_task.cancel()
stats_task.cancel()
try:
await health_task
except asyncio.CancelledError:
pass
try:
await stats_task
except asyncio.CancelledError:
pass
for task in [health_task, stats_task, backup_task]:
task.cancel()
try:
await task
except asyncio.CancelledError:
pass
logger.info("sidecar_v2_stopped")
@@ -132,21 +211,86 @@ app = FastAPI(
)
# ──────────────────────────────────────
# ──────────────────────────────────────────────────────────
# Background tasks
# ──────────────────────────────────────
# ──────────────────────────────────────────────────────────
async def _health_check_loop() -> None:
"""Periodically check and clear expired cooldowns."""
"""Periodic health checks: clear expired cooldowns + active probing of backends."""
from cooldown_manager import check_and_clear_cooldown
from storage.backend_store import list_backends as lb
import httpx
while True:
try:
backends = lb(decrypt_key=False)
backends = list_backends(decrypt_key=True)
for b in backends:
# 1. Clear expired cooldowns
if b.status == "cooling":
check_and_clear_cooldown(b.id)
# 2. Active health probing for healthy/enabled backends
if b.status == "healthy" and b.enabled:
try:
async with httpx.AsyncClient(timeout=httpx.Timeout(
app_config.health_check_timeout_seconds
)) as client:
probe_url = b.api_base_url.rstrip("/") + app_config.health_probe_endpoint
headers = {}
if b.api_key_plain:
headers["Authorization"] = f"Bearer {b.api_key_plain}"
start = time.monotonic()
resp = await client.get(probe_url, headers=headers)
elapsed_ms = int((time.monotonic() - start) * 1000)
# Update health state in DB
from storage.db import get_connection as _gc
with _gc() as conn:
conn.execute(
"""INSERT INTO backend_health
(backend_id, state, last_latency_ms, last_status_code,
last_check_at)
VALUES (?, 'healthy', ?, ?, datetime('now'))
ON CONFLICT(backend_id) DO UPDATE SET
state = excluded.state,
last_latency_ms = excluded.last_latency_ms,
last_status_code = excluded.last_status_code,
last_check_at = excluded.last_check_at""",
(b.id, elapsed_ms, resp.status_code),
)
conn.commit()
logger.debug(
"health_probe_ok",
backend_id=b.id,
status=resp.status_code,
latency_ms=elapsed_ms,
)
except Exception as probe_err:
logger.warning(
"health_probe_failed",
backend_id=b.id,
error=str(probe_err),
)
# Mark as degraded
from storage.db import get_connection as _gc
with _gc() as conn:
conn.execute(
"""INSERT INTO backend_health
(backend_id, state, last_check_at)
VALUES (?, 'degraded', datetime('now'))
ON CONFLICT(backend_id) DO UPDATE SET
state = 'degraded',
last_check_at = excluded.last_check_at""",
(b.id,),
)
conn.execute(
"""UPDATE backend_health SET
consecutive_failures = consecutive_failures + 1
WHERE backend_id = ?""",
(b.id,),
)
conn.commit()
except Exception:
logger.exception("health_check_error")
await asyncio.sleep(app_config.health_check_interval_seconds)
@@ -163,9 +307,50 @@ async def _stats_aggregation_loop() -> None:
await asyncio.sleep(app_config.stats_refresh_interval_seconds)
# ──────────────────────────────────────
async def _backup_loop() -> None:
"""Daily SQLite backup with retention."""
import shutil
while True:
try:
await asyncio.sleep(86400) # 24 hours
backup_dir = app_config.backup_dir
if not backup_dir:
continue
os.makedirs(backup_dir, exist_ok=True)
backup_name = f"sidecar_v2_{time.strftime('%Y%m%d_%H%M%S', time.gmtime())}.db"
backup_path = os.path.join(backup_dir, backup_name)
from storage.db import _DB_PATH as db_path
import sqlite3
source = sqlite3.connect(db_path)
dest = sqlite3.connect(backup_path)
source.backup(dest)
dest.close()
source.close()
logger.info("db_backup_created", path=backup_path)
# Retention: remove old backups
retention_days = app_config.backup_retention_days
cutoff = time.time() - retention_days * 86400
for fname in os.listdir(backup_dir):
if fname.startswith("sidecar_v2_") and fname.endswith(".db"):
fpath = os.path.join(backup_dir, fname)
if os.path.getmtime(fpath) < cutoff:
os.remove(fpath)
logger.info("db_backup_retired", path=fpath)
except Exception:
logger.exception("backup_error")
# ──────────────────────────────────────────────────────────
# Health / Metrics
# ──────────────────────────────────────
# ──────────────────────────────────────────────────────────
@app.get("/health")
async def health() -> dict[str, Any]:
return {
@@ -175,9 +360,54 @@ async def health() -> dict[str, Any]:
}
# ──────────────────────────────────────
@app.get("/metrics")
async def metrics() -> Response:
"""Prometheus-compatible metrics endpoint."""
lines = []
# Pool provider counts
pool_status = pool_manager.get_pool_status()
for pool_name, stats in pool_status.items():
for key, val in stats.items():
lines.append(
f"sidecar_pool_providers{{pool=\"{pool_name}\",type=\"{key}\"}} {val}"
)
# Cooldown status
all_backends = list_backends(decrypt_key=False)
cooling_count = sum(1 for b in all_backends if b.status == "cooling")
lines.append(f"sidecar_cooldown_active {cooling_count}")
# Emergency count
emergency_count = _metrics_counters.get("emergency_count", 0)
lines.append(f"sidecar_emergency_count {emergency_count}")
# DB sizes
from storage.db import get_db_sizes
sizes = get_db_sizes()
lines.append(f"sidecar_db_size_bytes {sizes.get('db_bytes', 0)}")
lines.append(f"sidecar_wal_size_bytes {sizes.get('wal_bytes', 0)}")
# Total stats
total = get_total_stats()
lines.append(f"sidecar_requests_total {total.get('total_requests', 0) or 0}")
lines.append(f"sidecar_errors_total {total.get('total_errors', 0) or 0}")
lines.append(f"sidecar_tokens_total {total.get('total_tokens', 0) or 0}")
cost = total.get('total_cost', 0) or 0.0
lines.append(f"sidecar_cost_total {cost}")
# Uptime
lines.append(f"sidecar_uptime_seconds {int(time.time() - start_time)}")
return Response(
content="\n".join(lines) + "\n",
media_type="text/plain; charset=utf-8",
)
# ──────────────────────────────────────────────────────────
# Dashboard SSE
# ──────────────────────────────────────
# ──────────────────────────────────────────────────────────
@app.get("/dashboard/sse")
async def dashboard_sse() -> StreamingResponse:
"""SSE endpoint for real-time dashboard data."""
@@ -214,7 +444,7 @@ async def dashboard_sse() -> StreamingResponse:
"uptime_seconds": int(time.time() - start_time),
"timestamp": time.time(),
}
yield f"data: {__import__('json').dumps(snapshot)}\n\n"
yield f"data: {json.dumps(snapshot)}\n\n"
except Exception:
logger.exception("sse_error")
@@ -231,20 +461,20 @@ async def dashboard_sse() -> StreamingResponse:
)
# ──────────────────────────────────────
# Admin: Backend CRUD
# ──────────────────────────────────────
# ──────────────────────────────────────────────────────────
# Admin: Backend CRUD (READ: public, WRITE: auth required)
# ──────────────────────────────────────────────────────────
@app.get("/api/admin/backends")
async def admin_list_backends(pool: str | None = None) -> list[dict]:
"""List all backends with masked keys."""
async def admin_list_backends(pool: Optional[str] = None) -> list[dict]:
"""List all backends with masked keys (public read)."""
backends = list_backends(pool=pool, decrypt_key=True)
return [b.to_dict(mask_key=True) for b in backends]
@app.get("/api/admin/backends/{backend_id}")
async def admin_get_backend(backend_id: str) -> dict:
"""Get a single backend (key masked)."""
"""Get a single backend (public read, key masked)."""
b = get_backend(backend_id, decrypt_key=True)
if b is None:
raise HTTPException(404, "Backend not found")
@@ -252,8 +482,11 @@ async def admin_get_backend(backend_id: str) -> dict:
@app.post("/api/admin/backends")
async def admin_create_backend(body: dict[str, Any]) -> dict:
"""Create a new backend."""
async def admin_create_backend(
body: dict[str, Any],
_auth=Depends(require_admin),
) -> dict:
"""Create a new backend (auth required)."""
required = ["name", "api_base_url", "api_key"]
for field in required:
if field not in body:
@@ -283,18 +516,20 @@ async def admin_create_backend(body: dict[str, Any]) -> dict:
@app.put("/api/admin/backends/{backend_id}")
async def admin_update_backend(backend_id: str, body: dict[str, Any]) -> dict:
"""Update a backend."""
async def admin_update_backend(
backend_id: str,
body: dict[str, Any],
_auth=Depends(require_admin),
) -> dict:
"""Update a backend (auth required)."""
updates = dict(body)
# Handle model_mappings
if "model_mappings" in updates:
raw = updates["model_mappings"]
updates["model_mappings"] = {
k: ModelMapping.from_dict(v) for k, v in raw.items()
}
# Handle api_key
if "api_key" in updates:
updates["api_key_plain"] = updates.pop("api_key")
@@ -305,40 +540,40 @@ async def admin_update_backend(backend_id: str, body: dict[str, Any]) -> dict:
@app.delete("/api/admin/backends/{backend_id}")
async def admin_delete_backend(backend_id: str) -> dict:
"""Delete a backend."""
async def admin_delete_backend(
backend_id: str,
_auth=Depends(require_admin),
) -> dict:
"""Delete a backend (auth required)."""
ok = delete_backend(backend_id)
if not ok:
raise HTTPException(404, "Backend not found")
return {"status": "deleted", "id": backend_id}
# ──────────────────────────────────────
# Admin: Pool Status
# ──────────────────────────────────────
# ──────────────────────────────────────────────────────────
# Admin: Pool Status (public read)
# ──────────────────────────────────────────────────────────
@app.get("/api/admin/pools")
async def admin_pool_status() -> dict:
"""Get pool summary."""
return pool_manager.get_pool_status()
# ──────────────────────────────────────
# Admin: Usage / Stats
# ──────────────────────────────────────
# ──────────────────────────────────────────────────────────
# Admin: Usage / Stats (public read)
# ──────────────────────────────────────────────────────────
@app.get("/api/admin/stats/total")
async def admin_total_stats() -> dict:
"""Get aggregate usage stats."""
return get_total_stats()
@app.get("/api/admin/stats/hourly")
async def admin_hourly_usage(
backend_id: str | None = None,
backend_id: Optional[str] = None,
hours: int = 168,
) -> list[dict]:
"""Get hourly usage data."""
since = None
if hours > 0:
since = time.strftime(
@@ -350,32 +585,28 @@ async def admin_hourly_usage(
@app.get("/api/admin/stats/daily")
async def admin_daily_stats(days: int = 30) -> list[dict]:
"""Get daily aggregated stats."""
return get_daily_stats(days=days)
@app.get("/api/admin/stats/cooldown")
async def admin_cooldown_history(
backend_id: str | None = None,
backend_id: Optional[str] = None,
limit: int = 50,
) -> list[dict]:
"""Get cooldown event history."""
return get_cooldown_history(backend_id=backend_id, limit=limit)
# ──────────────────────────────────────
# Admin: System Config
# ──────────────────────────────────────
# ──────────────────────────────────────────────────────────
# Admin: System Config (read public, write auth required)
# ──────────────────────────────────────────────────────────
@app.get("/api/admin/config")
async def admin_get_all_config() -> list[dict]:
"""List all system config entries."""
return list_configs()
@app.get("/api/admin/config/{key}")
async def admin_get_config(key: str) -> dict:
"""Get a single config value."""
value = get_config(key)
if value is None:
raise HTTPException(404, "Config not found")
@@ -383,8 +614,11 @@ async def admin_get_config(key: str) -> dict:
@app.put("/api/admin/config/{key}")
async def admin_set_config(key: str, body: dict[str, Any]) -> dict:
"""Set a config value."""
async def admin_set_config(
key: str,
body: dict[str, Any],
_auth=Depends(require_admin),
) -> dict:
value = str(body.get("value", ""))
description = str(body.get("description", ""))
set_config(key, value, description)
@@ -392,21 +626,22 @@ async def admin_set_config(key: str, body: dict[str, Any]) -> dict:
@app.delete("/api/admin/config/{key}")
async def admin_delete_config(key: str) -> dict:
"""Delete a config entry."""
async def admin_delete_config(
key: str,
_auth=Depends(require_admin),
) -> dict:
ok = delete_config(key)
if not ok:
raise HTTPException(404, "Config not found")
return {"status": "deleted", "key": key}
# ──────────────────────────────────────
# Dashboard HTML
# ──────────────────────────────────────
# ──────────────────────────────────────────────────────────
# Dashboard HTML (public, but respects admin_token for writes in JS)
# ──────────────────────────────────────────────────────────
@app.get("/dashboard")
async def dashboard_html() -> HTMLResponse:
"""Serve the dashboard WebUI."""
dashboard_path = os.path.join(
os.path.dirname(__file__), "dashboard.html"
)
@@ -416,12 +651,13 @@ async def dashboard_html() -> HTMLResponse:
return HTMLResponse("<h1>Dashboard not found</h1>", status_code=404)
# ──────────────────────────────────────
# ──────────────────────────────────────────────────────────
# Proxy Endpoints
# ──────────────────────────────────────
# ──────────────────────────────────────────────────────────
@app.post("/v1/chat/completions")
async def chat_completions(request: Request) -> Response:
_inc_metric("proxy_requests_total")
return await handle_proxy_request(
pool_manager, rate_limiter, router, request, "/v1/chat/completions"
)
@@ -443,7 +679,7 @@ async def embeddings(request: Request) -> Response:
@app.get("/v1/models")
@app.get("/v1/models/{model_id:path}")
async def list_models(request: Request, model_id: str | None = None) -> Response:
async def list_models(request: Request, model_id: Optional[str] = None) -> Response:
path = f"/v1/models/{model_id}" if model_id else "/v1/models"
return await handle_proxy_request(
pool_manager, rate_limiter, router, request, path
@@ -458,9 +694,9 @@ async def catch_all(request: Request, path: str) -> Response:
)
# ──────────────────────────────────────
# ──────────────────────────────────────────────────────────
# Main
# ──────────────────────────────────────
# ──────────────────────────────────────────────────────────
def main() -> None:
import uvicorn