"""Sidecar V2 — FastAPI server with multi-pool routing, admin API, dashboard SSE.""" import asyncio import time from collections.abc import AsyncGenerator from contextlib import asynccontextmanager from typing import Any import structlog from fastapi import Depends, FastAPI, HTTPException, Request, Response from fastapi.responses import FileResponse, HTMLResponse, JSONResponse, StreamingResponse from fastapi.staticfiles import StaticFiles from config import config as app_config from crypto import init_crypto from pool_manager import PoolManager from rate_limiter import PerBackendRateLimiter from router import Router from proxy import handle_proxy_request from storage.db import init_db, create_tables, run_integrity_check from storage.backend_store import ( create_backend, get_backend, list_backends, update_backend, delete_backend, get_pool_stats, ) from storage.usage_store import get_total_stats, get_hourly_usage, get_daily_stats, aggregate_daily_stats from storage.cooldown_store import get_cooldown_history from storage.config_store import get_config, set_config, list_configs, delete_config from storage.models import Backend, ModelMapping import os structlog.configure( processors=[ structlog.stdlib.filter_by_level, structlog.stdlib.add_logger_name, structlog.stdlib.add_log_level, structlog.stdlib.PositionalArgumentsFormatter(), structlog.processors.TimeStamper(fmt="iso"), structlog.processors.StackInfoRenderer(), structlog.processors.format_exc_info, structlog.processors.UnicodeDecoder(), structlog.dev.ConsoleRenderer(), ], context_class=dict, logger_factory=structlog.stdlib.LoggerFactory(), wrapper_class=structlog.stdlib.BoundLogger, cache_logger_on_first_use=True, ) logger: structlog.stdlib.BoundLogger = structlog.get_logger("sidecar_v2.server") # ────────────────────────────────────── # Global runtime state # ────────────────────────────────────── pool_manager: PoolManager | None = None rate_limiter: PerBackendRateLimiter | None = None router: Router | None = None start_time: float = 0.0 def get_pm() -> PoolManager: assert pool_manager is not None return pool_manager def get_rl() -> PerBackendRateLimiter: assert rate_limiter is not None return rate_limiter def get_router() -> Router: assert router is not None return router # ────────────────────────────────────── # Lifespan # ────────────────────────────────────── @asynccontextmanager async def lifespan(app: FastAPI) -> AsyncGenerator[None, Any]: global pool_manager, rate_limiter, router, start_time # Init crypto if app_config.encryption_key: init_crypto(app_config.encryption_key) # Init DB init_db() create_tables() ok = run_integrity_check() if not ok: logger.error("db_integrity_check_failed") # Init runtime components pool_manager = PoolManager() rate_limiter = PerBackendRateLimiter( refill_interval_ms=app_config.rate_limiter_refill_interval_ms, ) router = Router(pool_manager, rate_limiter) start_time = time.time() # Start background tasks health_task = asyncio.create_task(_health_check_loop()) stats_task = asyncio.create_task(_stats_aggregation_loop()) logger.info( "sidecar_v2_started", host=app_config.host, port=app_config.port, metrics_port=app_config.metrics_port, ) try: yield finally: health_task.cancel() stats_task.cancel() try: await health_task except asyncio.CancelledError: pass try: await stats_task except asyncio.CancelledError: pass logger.info("sidecar_v2_stopped") app = FastAPI( title="Sidecar V2 — Multi-Pool Provider Proxy", version="2.0.0", lifespan=lifespan, ) # ────────────────────────────────────── # Background tasks # ────────────────────────────────────── async def _health_check_loop() -> None: """Periodically check and clear expired cooldowns.""" from cooldown_manager import check_and_clear_cooldown from storage.backend_store import list_backends as lb while True: try: backends = lb(decrypt_key=False) for b in backends: if b.status == "cooling": check_and_clear_cooldown(b.id) except Exception: logger.exception("health_check_error") await asyncio.sleep(app_config.health_check_interval_seconds) async def _stats_aggregation_loop() -> None: """Periodically aggregate daily stats.""" while True: try: today = time.strftime("%Y-%m-%d", time.gmtime()) aggregate_daily_stats(today) except Exception: logger.exception("stats_aggregation_error") await asyncio.sleep(app_config.stats_refresh_interval_seconds) # ────────────────────────────────────── # Health / Metrics # ────────────────────────────────────── @app.get("/health") async def health() -> dict[str, Any]: return { "status": "ok", "version": "2.0.0", "uptime_seconds": int(time.time() - start_time), } # ────────────────────────────────────── # Dashboard SSE # ────────────────────────────────────── @app.get("/dashboard/sse") async def dashboard_sse() -> StreamingResponse: """SSE endpoint for real-time dashboard data.""" async def event_generator(): while True: try: pool_status = pool_manager.get_pool_status() total_stats = get_total_stats() all_backends = list_backends(decrypt_key=False) backends_list = [] for b in all_backends: rl_status = rate_limiter.get_status(b.id) backends_list.append({ "id": b.id, "name": b.name, "label": b.label, "pool": b.pool, "enabled": b.enabled, "status": b.status, "rpm_limit": b.rpm_limit, "cooldown_until": b.cooldown_until, "consecutive_429_count": b.consecutive_429_count, "model_count": len(b.model_mappings), "rate_limiter": rl_status, }) snapshot = { "type": "snapshot", "pool": pool_status, "total": total_stats, "backends": backends_list, "uptime_seconds": int(time.time() - start_time), "timestamp": time.time(), } yield f"data: {__import__('json').dumps(snapshot)}\n\n" except Exception: logger.exception("sse_error") await asyncio.sleep(app_config.dashboard_sse_interval_seconds) return StreamingResponse( event_generator(), media_type="text/event-stream", headers={ "Cache-Control": "no-cache", "Connection": "keep-alive", "X-Accel-Buffering": "no", }, ) # ────────────────────────────────────── # Admin: Backend CRUD # ────────────────────────────────────── @app.get("/api/admin/backends") async def admin_list_backends(pool: str | None = None) -> list[dict]: """List all backends with masked keys.""" backends = list_backends(pool=pool, decrypt_key=True) return [b.to_dict(mask_key=True) for b in backends] @app.get("/api/admin/backends/{backend_id}") async def admin_get_backend(backend_id: str) -> dict: """Get a single backend (key masked).""" b = get_backend(backend_id, decrypt_key=True) if b is None: raise HTTPException(404, "Backend not found") return b.to_dict(mask_key=True) @app.post("/api/admin/backends") async def admin_create_backend(body: dict[str, Any]) -> dict: """Create a new backend.""" required = ["name", "api_base_url", "api_key"] for field in required: if field not in body: raise HTTPException(400, f"Missing required field: {field}") model_mappings_raw = body.get("model_mappings", {}) model_mappings = {} for canonical_name, mm in model_mappings_raw.items(): model_mappings[canonical_name] = ModelMapping.from_dict(mm) backend = Backend( name=body["name"], label=body.get("label", ""), api_base_url=body["api_base_url"], api_key_plain=body["api_key"], api=body.get("api", "openai-completions"), timeout_seconds=body.get("timeout_seconds", 120), rpm_limit=body.get("rpm_limit", app_config.default_rpm_limit), pool=body.get("pool", "primary"), enabled=body.get("enabled", True), model_mappings=model_mappings, source=body.get("source", "webui"), ) created = create_backend(backend) return created.to_dict(mask_key=True) @app.put("/api/admin/backends/{backend_id}") async def admin_update_backend(backend_id: str, body: dict[str, Any]) -> dict: """Update a backend.""" updates = dict(body) # Handle model_mappings if "model_mappings" in updates: raw = updates["model_mappings"] updates["model_mappings"] = { k: ModelMapping.from_dict(v) for k, v in raw.items() } # Handle api_key if "api_key" in updates: updates["api_key_plain"] = updates.pop("api_key") updated = update_backend(backend_id, updates) if updated is None: raise HTTPException(404, "Backend not found") return updated.to_dict(mask_key=True) @app.delete("/api/admin/backends/{backend_id}") async def admin_delete_backend(backend_id: str) -> dict: """Delete a backend.""" ok = delete_backend(backend_id) if not ok: raise HTTPException(404, "Backend not found") return {"status": "deleted", "id": backend_id} # ────────────────────────────────────── # Admin: Pool Status # ────────────────────────────────────── @app.get("/api/admin/pools") async def admin_pool_status() -> dict: """Get pool summary.""" return pool_manager.get_pool_status() # ────────────────────────────────────── # Admin: Usage / Stats # ────────────────────────────────────── @app.get("/api/admin/stats/total") async def admin_total_stats() -> dict: """Get aggregate usage stats.""" return get_total_stats() @app.get("/api/admin/stats/hourly") async def admin_hourly_usage( backend_id: str | None = None, hours: int = 168, ) -> list[dict]: """Get hourly usage data.""" since = None if hours > 0: since = time.strftime( "%Y-%m-%dT%H:%M:%SZ", time.gmtime(time.time() - hours * 3600), ) return get_hourly_usage(backend_id=backend_id, since=since, limit=hours) @app.get("/api/admin/stats/daily") async def admin_daily_stats(days: int = 30) -> list[dict]: """Get daily aggregated stats.""" return get_daily_stats(days=days) @app.get("/api/admin/stats/cooldown") async def admin_cooldown_history( backend_id: str | None = None, limit: int = 50, ) -> list[dict]: """Get cooldown event history.""" return get_cooldown_history(backend_id=backend_id, limit=limit) # ────────────────────────────────────── # Admin: System Config # ────────────────────────────────────── @app.get("/api/admin/config") async def admin_get_all_config() -> list[dict]: """List all system config entries.""" return list_configs() @app.get("/api/admin/config/{key}") async def admin_get_config(key: str) -> dict: """Get a single config value.""" value = get_config(key) if value is None: raise HTTPException(404, "Config not found") return {"key": key, "value": value} @app.put("/api/admin/config/{key}") async def admin_set_config(key: str, body: dict[str, Any]) -> dict: """Set a config value.""" value = str(body.get("value", "")) description = str(body.get("description", "")) set_config(key, value, description) return {"key": key, "value": value} @app.delete("/api/admin/config/{key}") async def admin_delete_config(key: str) -> dict: """Delete a config entry.""" ok = delete_config(key) if not ok: raise HTTPException(404, "Config not found") return {"status": "deleted", "key": key} # ────────────────────────────────────── # Dashboard HTML # ────────────────────────────────────── @app.get("/dashboard") async def dashboard_html() -> HTMLResponse: """Serve the dashboard WebUI.""" dashboard_path = os.path.join( os.path.dirname(__file__), "dashboard.html" ) if os.path.exists(dashboard_path): with open(dashboard_path, "r") as f: return HTMLResponse(f.read()) return HTMLResponse("