"""Sidecar V2 — FastAPI server with multi-pool routing, admin API, dashboard SSE.""" import asyncio import json import os import sys import time from collections.abc import AsyncGenerator from contextlib import asynccontextmanager from typing import Any, Optional import logging import structlog from fastapi import Depends, FastAPI, HTTPException, Request, Response from fastapi.responses import FileResponse, HTMLResponse, JSONResponse, StreamingResponse from fastapi.security import HTTPBearer, HTTPAuthorizationCredentials from config import config as app_config from crypto import init_crypto, is_initialized from pool_manager import PoolManager from rate_limiter import PerBackendRateLimiter from router import Router from proxy import ( handle_proxy_request, get_emergency_count, get_primary_wait_enter_count, get_primary_wait_recovery_count, get_primary_wait_exhausted_count, ) from storage.db import init_db, create_tables, run_integrity_check, get_connection, _DB_PATH from storage.backend_store import ( create_backend, get_backend, list_backends, update_backend, delete_backend, get_pool_stats, ) from storage.usage_store import get_total_stats, get_hourly_usage, get_daily_stats, aggregate_daily_stats from storage.cooldown_store import get_cooldown_history from storage.config_store import get_config, set_config, list_configs, delete_config from storage.models import Backend, ModelMapping # ────────────────────────────────────────────────────────── # Logging # ────────────────────────────────────────────────────────── _LOG_FORMAT = os.getenv("LOG_FORMAT", "console").lower() logging.basicConfig(level=logging.INFO) structlog.configure( processors=[ structlog.stdlib.filter_by_level, structlog.stdlib.add_logger_name, structlog.stdlib.add_log_level, structlog.stdlib.PositionalArgumentsFormatter(), structlog.processors.TimeStamper(fmt="iso"), structlog.processors.StackInfoRenderer(), structlog.processors.format_exc_info, structlog.processors.UnicodeDecoder(), ( structlog.processors.JSONRenderer() if _LOG_FORMAT == "json" else structlog.dev.ConsoleRenderer() ), ], context_class=dict, logger_factory=structlog.stdlib.LoggerFactory(), wrapper_class=structlog.stdlib.BoundLogger, cache_logger_on_first_use=True, ) logger: structlog.stdlib.BoundLogger = structlog.get_logger("sidecar_v2.server") # ────────────────────────────────────────────────────────── # Admin Auth middleware # ────────────────────────────────────────────────────────── _security = HTTPBearer(auto_error=False) def verify_admin_token( credentials: Optional[HTTPAuthorizationCredentials] = Depends(_security), ) -> bool: """Verify Bearer Token against config.admin_token. If admin_token is empty, write operations are rejected. READ operations are allowed without auth for dashboard use. """ if not app_config.admin_token: # No token configured — allow read, reject write (checked per-endpoint) if credentials is None: return False return False if credentials is None: return False return credentials.credentials == app_config.admin_token def require_admin(credentials: Optional[HTTPAuthorizationCredentials] = Depends(_security)): """Require admin auth — raise 401 if not authorized.""" if not app_config.admin_token: raise HTTPException( status_code=401, detail="Admin API not configured: set SIDECAR_ADMIN_TOKEN", ) if credentials is None: raise HTTPException( status_code=401, detail="Missing Authorization header", headers={"WWW-Authenticate": "Bearer"}, ) if credentials.credentials != app_config.admin_token: raise HTTPException( status_code=401, detail="Invalid admin token", ) # ────────────────────────────────────────────────────────── # Global runtime state # ────────────────────────────────────────────────────────── pool_manager: Optional[PoolManager] = None rate_limiter: Optional[PerBackendRateLimiter] = None router: Optional[Router] = None start_time: float = 0.0 # In-memory metrics counters _metrics_counters: dict[str, int] = {} _metrics_lock = asyncio.Lock() def _inc_metric(key: str, delta: int = 1) -> None: """Thread-safe counter increment (deferred via asyncio).""" _metrics_counters[key] = _metrics_counters.get(key, 0) + delta def get_pm() -> PoolManager: assert pool_manager is not None return pool_manager def get_rl() -> PerBackendRateLimiter: assert rate_limiter is not None return rate_limiter def get_router() -> Router: assert router is not None return router # ────────────────────────────────────────────────────────── # Lifespan # ────────────────────────────────────────────────────────── @asynccontextmanager async def lifespan(app: FastAPI) -> AsyncGenerator[None, Any]: global pool_manager, rate_limiter, router, start_time # P0: Encryption key is mandatory — refuse to start without it if not app_config.encryption_key: logger.critical( "missing_encryption_key", hint="Set SIDECAR_ENCRYPTION_KEY (64 hex chars). Refusing to start." ) sys.exit(1) init_crypto(app_config.encryption_key) logger.info("crypto_initialized") # P0: Warn if admin_token not set if not app_config.admin_token: logger.warning( "admin_token_not_set", hint="Admin write endpoints disabled until SIDECAR_ADMIN_TOKEN is configured." ) # Init DB init_db() create_tables() ok = run_integrity_check() if not ok: logger.error("db_integrity_check_failed") # Init runtime components pool_manager = PoolManager() rate_limiter = PerBackendRateLimiter( refill_interval_ms=app_config.rate_limiter_refill_interval_ms, ) router = Router(pool_manager, rate_limiter) start_time = time.time() # Start background tasks health_task = asyncio.create_task(_health_check_loop()) stats_task = asyncio.create_task(_stats_aggregation_loop()) backup_task = asyncio.create_task(_backup_loop()) logger.info( "sidecar_v2_started", host=app_config.host, port=app_config.port, metrics_port=app_config.metrics_port, ) try: yield finally: for task in [health_task, stats_task, backup_task]: task.cancel() try: await task except asyncio.CancelledError: pass logger.info("sidecar_v2_stopped") app = FastAPI( title="Sidecar V2 — Multi-Pool Provider Proxy", version="2.0.0", lifespan=lifespan, ) # ────────────────────────────────────────────────────────── # Background tasks # ────────────────────────────────────────────────────────── async def _health_check_loop() -> None: """Periodic health checks: clear expired cooldowns + active probing of backends.""" from cooldown_manager import check_and_clear_cooldown import httpx while True: try: backends = list_backends(decrypt_key=True) for b in backends: # 1. Clear expired cooldowns if b.status == "cooling": check_and_clear_cooldown(b.id) # 2. Active health probing for healthy/enabled backends if b.status == "healthy" and b.enabled: try: async with httpx.AsyncClient(timeout=httpx.Timeout( app_config.health_check_timeout_seconds )) as client: probe_url = b.api_base_url.rstrip("/") + app_config.health_probe_endpoint headers = {} if b.api_key_plain: headers["Authorization"] = f"Bearer {b.api_key_plain}" start = time.monotonic() resp = await client.get(probe_url, headers=headers) elapsed_ms = int((time.monotonic() - start) * 1000) # Update health state in DB from storage.db import get_connection as _gc with _gc() as conn: conn.execute( """INSERT INTO backend_health (backend_id, state, last_latency_ms, last_status_code, last_check_at) VALUES (?, 'healthy', ?, ?, datetime('now')) ON CONFLICT(backend_id) DO UPDATE SET state = excluded.state, last_latency_ms = excluded.last_latency_ms, last_status_code = excluded.last_status_code, last_check_at = excluded.last_check_at""", (b.id, elapsed_ms, resp.status_code), ) conn.commit() logger.debug( "health_probe_ok", backend_id=b.id, status=resp.status_code, latency_ms=elapsed_ms, ) except Exception as probe_err: logger.warning( "health_probe_failed", backend_id=b.id, error=str(probe_err), ) # Mark as degraded from storage.db import get_connection as _gc with _gc() as conn: conn.execute( """INSERT INTO backend_health (backend_id, state, last_check_at) VALUES (?, 'degraded', datetime('now')) ON CONFLICT(backend_id) DO UPDATE SET state = 'degraded', last_check_at = excluded.last_check_at""", (b.id,), ) conn.execute( """UPDATE backend_health SET consecutive_failures = consecutive_failures + 1 WHERE backend_id = ?""", (b.id,), ) conn.commit() except Exception: logger.exception("health_check_error") await asyncio.sleep(app_config.health_check_interval_seconds) async def _stats_aggregation_loop() -> None: """Periodically aggregate daily stats.""" while True: try: today = time.strftime("%Y-%m-%d", time.gmtime()) aggregate_daily_stats(today) except Exception: logger.exception("stats_aggregation_error") await asyncio.sleep(app_config.stats_refresh_interval_seconds) async def _backup_loop() -> None: """Daily SQLite backup with retention.""" import shutil while True: try: await asyncio.sleep(86400) # 24 hours backup_dir = app_config.backup_dir if not backup_dir: continue os.makedirs(backup_dir, exist_ok=True) backup_name = f"sidecar_v2_{time.strftime('%Y%m%d_%H%M%S', time.gmtime())}.db" backup_path = os.path.join(backup_dir, backup_name) from storage.db import _DB_PATH as db_path import sqlite3 source = sqlite3.connect(db_path) dest = sqlite3.connect(backup_path) source.backup(dest) dest.close() source.close() logger.info("db_backup_created", path=backup_path) # Retention: remove old backups retention_days = app_config.backup_retention_days cutoff = time.time() - retention_days * 86400 for fname in os.listdir(backup_dir): if fname.startswith("sidecar_v2_") and fname.endswith(".db"): fpath = os.path.join(backup_dir, fname) if os.path.getmtime(fpath) < cutoff: os.remove(fpath) logger.info("db_backup_retired", path=fpath) except Exception: logger.exception("backup_error") # ────────────────────────────────────────────────────────── # Health / Metrics # ────────────────────────────────────────────────────────── @app.get("/health") async def health() -> dict[str, Any]: return { "status": "ok", "version": "2.0.0", "uptime_seconds": int(time.time() - start_time), } @app.get("/metrics") async def metrics() -> Response: """Prometheus-compatible metrics endpoint.""" lines = [] # Pool provider counts pool_status = pool_manager.get_pool_status() for pool_name, stats in pool_status.items(): for key, val in stats.items(): lines.append( f"sidecar_pool_providers{{pool=\"{pool_name}\",type=\"{key}\"}} {val}" ) # Cooldown status all_backends = list_backends(decrypt_key=False) cooling_count = sum(1 for b in all_backends if b.status == "cooling") lines.append(f"sidecar_cooldown_active {cooling_count}") # Emergency count (from proxy module) lines.append(f"sidecar_emergency_count {get_emergency_count()}") # Primary-Wait metrics lines.append(f"sidecar_primary_wait_enter_total {get_primary_wait_enter_count()}") lines.append(f"sidecar_primary_wait_recovery_total {get_primary_wait_recovery_count()}") lines.append(f"sidecar_primary_wait_exhausted_total {get_primary_wait_exhausted_count()}") # DB sizes from storage.db import get_db_sizes sizes = get_db_sizes() lines.append(f"sidecar_db_size_bytes {sizes.get('db_bytes', 0)}") lines.append(f"sidecar_wal_size_bytes {sizes.get('wal_bytes', 0)}") # Total stats total = get_total_stats() lines.append(f"sidecar_requests_total {total.get('total_requests', 0) or 0}") lines.append(f"sidecar_errors_total {total.get('total_errors', 0) or 0}") lines.append(f"sidecar_tokens_total {total.get('total_tokens', 0) or 0}") cost = total.get('total_cost', 0) or 0.0 lines.append(f"sidecar_cost_total {cost}") # Uptime lines.append(f"sidecar_uptime_seconds {int(time.time() - start_time)}") return Response( content="\n".join(lines) + "\n", media_type="text/plain; charset=utf-8", ) # ────────────────────────────────────────────────────────── # Dashboard SSE # ────────────────────────────────────────────────────────── @app.get("/dashboard/sse") async def dashboard_sse() -> StreamingResponse: """SSE endpoint for real-time dashboard data.""" async def event_generator(): while True: try: pool_status = pool_manager.get_pool_status() total_stats = get_total_stats() all_backends = list_backends(decrypt_key=False) backends_list = [] for b in all_backends: rl_status = rate_limiter.get_status(b.id) backends_list.append({ "id": b.id, "name": b.name, "label": b.label, "pool": b.pool, "enabled": b.enabled, "status": b.status, "rpm_limit": b.rpm_limit, "cooldown_until": b.cooldown_until, "consecutive_429_count": b.consecutive_429_count, "model_count": len(b.model_mappings), "rate_limiter": rl_status, }) snapshot = { "type": "snapshot", "pool": pool_status, "total": total_stats, "backends": backends_list, "uptime_seconds": int(time.time() - start_time), "timestamp": time.time(), } yield f"data: {json.dumps(snapshot)}\n\n" except Exception: logger.exception("sse_error") await asyncio.sleep(app_config.dashboard_sse_interval_seconds) return StreamingResponse( event_generator(), media_type="text/event-stream", headers={ "Cache-Control": "no-cache", "Connection": "keep-alive", "X-Accel-Buffering": "no", }, ) # ────────────────────────────────────────────────────────── # Admin: Backend CRUD (READ: public, WRITE: auth required) # ────────────────────────────────────────────────────────── @app.get("/api/admin/backends") async def admin_list_backends(pool: Optional[str] = None) -> list[dict]: """List all backends with masked keys (public read).""" backends = list_backends(pool=pool, decrypt_key=True) return [b.to_dict(mask_key=True) for b in backends] @app.get("/api/admin/backends/{backend_id}") async def admin_get_backend(backend_id: str) -> dict: """Get a single backend (public read, key masked).""" b = get_backend(backend_id, decrypt_key=True) if b is None: raise HTTPException(404, "Backend not found") return b.to_dict(mask_key=True) @app.post("/api/admin/backends") async def admin_create_backend( body: dict[str, Any], _auth=Depends(require_admin), ) -> dict: """Create a new backend (auth required).""" required = ["name", "api_base_url", "api_key"] for field in required: if field not in body: raise HTTPException(400, f"Missing required field: {field}") model_mappings_raw = body.get("model_mappings", {}) model_mappings = {} for canonical_name, mm in model_mappings_raw.items(): model_mappings[canonical_name] = ModelMapping.from_dict(mm) backend = Backend( name=body["name"], label=body.get("label", ""), api_base_url=body["api_base_url"], api_key_plain=body["api_key"], api=body.get("api", "openai-completions"), timeout_seconds=body.get("timeout_seconds", 120), rpm_limit=body.get("rpm_limit", app_config.default_rpm_limit), pool=body.get("pool", "primary"), enabled=body.get("enabled", True), model_mappings=model_mappings, source=body.get("source", "webui"), ) created = create_backend(backend) return created.to_dict(mask_key=True) @app.put("/api/admin/backends/{backend_id}") async def admin_update_backend( backend_id: str, body: dict[str, Any], _auth=Depends(require_admin), ) -> dict: """Update a backend (auth required).""" updates = dict(body) if "model_mappings" in updates: raw = updates["model_mappings"] updates["model_mappings"] = { k: ModelMapping.from_dict(v) for k, v in raw.items() } if "api_key" in updates: updates["api_key_plain"] = updates.pop("api_key") updated = update_backend(backend_id, updates) if updated is None: raise HTTPException(404, "Backend not found") return updated.to_dict(mask_key=True) @app.delete("/api/admin/backends/{backend_id}") async def admin_delete_backend( backend_id: str, _auth=Depends(require_admin), ) -> dict: """Delete a backend (auth required).""" ok = delete_backend(backend_id) if not ok: raise HTTPException(404, "Backend not found") return {"status": "deleted", "id": backend_id} # ────────────────────────────────────────────────────────── # Admin: Pool Status (public read) # ────────────────────────────────────────────────────────── @app.get("/api/admin/pools") async def admin_pool_status() -> dict: return pool_manager.get_pool_status() # ────────────────────────────────────────────────────────── # Admin: Usage / Stats (public read) # ────────────────────────────────────────────────────────── @app.get("/api/admin/stats/total") async def admin_total_stats() -> dict: return get_total_stats() @app.get("/api/admin/stats/hourly") async def admin_hourly_usage( backend_id: Optional[str] = None, hours: int = 168, ) -> list[dict]: since = None if hours > 0: since = time.strftime( "%Y-%m-%dT%H:%M:%SZ", time.gmtime(time.time() - hours * 3600), ) return get_hourly_usage(backend_id=backend_id, since=since, limit=hours) @app.get("/api/admin/stats/daily") async def admin_daily_stats(days: int = 30) -> list[dict]: return get_daily_stats(days=days) @app.get("/api/admin/stats/cooldown") async def admin_cooldown_history( backend_id: Optional[str] = None, limit: int = 50, ) -> list[dict]: return get_cooldown_history(backend_id=backend_id, limit=limit) # ────────────────────────────────────────────────────────── # Admin: System Config (read public, write auth required) # ────────────────────────────────────────────────────────── @app.get("/api/admin/config") async def admin_get_all_config() -> list[dict]: return list_configs() @app.get("/api/admin/config/{key}") async def admin_get_config(key: str) -> dict: value = get_config(key) if value is None: raise HTTPException(404, "Config not found") return {"key": key, "value": value} @app.put("/api/admin/config/{key}") async def admin_set_config( key: str, body: dict[str, Any], _auth=Depends(require_admin), ) -> dict: value = str(body.get("value", "")) description = str(body.get("description", "")) set_config(key, value, description) return {"key": key, "value": value} @app.delete("/api/admin/config/{key}") async def admin_delete_config( key: str, _auth=Depends(require_admin), ) -> dict: ok = delete_config(key) if not ok: raise HTTPException(404, "Config not found") return {"status": "deleted", "key": key} # ────────────────────────────────────────────────────────── # Dashboard HTML (public, but respects admin_token for writes in JS) # ────────────────────────────────────────────────────────── @app.get("/dashboard") async def dashboard_html() -> HTMLResponse: dashboard_path = os.path.join( os.path.dirname(__file__), "dashboard.html" ) if os.path.exists(dashboard_path): with open(dashboard_path, "r") as f: return HTMLResponse(f.read()) return HTMLResponse("