EnterpriseArchitect/services/nvidia_sidecar/server.py

"""Sidecar V2 — FastAPI server with multi-pool routing, admin API, dashboard SSE."""

import asyncio
import time
from collections.abc import AsyncGenerator
from contextlib import asynccontextmanager
from typing import Any

import structlog
from fastapi import Depends, FastAPI, HTTPException, Request, Response
from fastapi.responses import FileResponse, HTMLResponse, JSONResponse, StreamingResponse
from fastapi.staticfiles import StaticFiles

from config import config as app_config
from crypto import init_crypto
from pool_manager import PoolManager
from rate_limiter import PerBackendRateLimiter
from router import Router
from proxy import handle_proxy_request

from storage.db import init_db, create_tables, run_integrity_check
from storage.backend_store import (
    create_backend, get_backend, list_backends, update_backend,
    delete_backend, get_pool_stats,
)
from storage.usage_store import get_total_stats, get_hourly_usage, get_daily_stats, aggregate_daily_stats
from storage.cooldown_store import get_cooldown_history
from storage.config_store import get_config, set_config, list_configs, delete_config
from storage.models import Backend, ModelMapping

import os

structlog.configure(
    processors=[
        structlog.stdlib.filter_by_level,
        structlog.stdlib.add_logger_name,
        structlog.stdlib.add_log_level,
        structlog.stdlib.PositionalArgumentsFormatter(),
        structlog.processors.TimeStamper(fmt="iso"),
        structlog.processors.StackInfoRenderer(),
        structlog.processors.format_exc_info,
        structlog.processors.UnicodeDecoder(),
        structlog.dev.ConsoleRenderer(),
    ],
    context_class=dict,
    logger_factory=structlog.stdlib.LoggerFactory(),
    wrapper_class=structlog.stdlib.BoundLogger,
    cache_logger_on_first_use=True,
)
logger: structlog.stdlib.BoundLogger = structlog.get_logger("sidecar_v2.server")


# ──────────────────────────────────────
# Global runtime state
# ──────────────────────────────────────
pool_manager: PoolManager | None = None
rate_limiter: PerBackendRateLimiter | None = None
router: Router | None = None
start_time: float = 0.0


def get_pm() -> PoolManager:
    assert pool_manager is not None
    return pool_manager

def get_rl() -> PerBackendRateLimiter:
    assert rate_limiter is not None
    return rate_limiter

def get_router() -> Router:
    assert router is not None
    return router


# ──────────────────────────────────────
# Lifespan
# ──────────────────────────────────────
@asynccontextmanager
async def lifespan(app: FastAPI) -> AsyncGenerator[None, Any]:
    global pool_manager, rate_limiter, router, start_time

    # Init crypto
    if app_config.encryption_key:
        init_crypto(app_config.encryption_key)

    # Init DB
    init_db()
    create_tables()
    ok = run_integrity_check()
    if not ok:
        logger.error("db_integrity_check_failed")

    # Init runtime components
    pool_manager = PoolManager()
    rate_limiter = PerBackendRateLimiter(
        refill_interval_ms=app_config.rate_limiter_refill_interval_ms,
    )
    router = Router(pool_manager, rate_limiter)
    start_time = time.time()

    # Start background tasks
    health_task = asyncio.create_task(_health_check_loop())
    stats_task = asyncio.create_task(_stats_aggregation_loop())

    logger.info(
        "sidecar_v2_started",
        host=app_config.host,
        port=app_config.port,
        metrics_port=app_config.metrics_port,
    )

    try:
        yield
    finally:
        health_task.cancel()
        stats_task.cancel()
        try:
            await health_task
        except asyncio.CancelledError:
            pass
        try:
            await stats_task
        except asyncio.CancelledError:
            pass
        logger.info("sidecar_v2_stopped")


app = FastAPI(
    title="Sidecar V2 — Multi-Pool Provider Proxy",
    version="2.0.0",
    lifespan=lifespan,
)


# ──────────────────────────────────────
# Background tasks
# ──────────────────────────────────────

async def _health_check_loop() -> None:
    """Periodically check and clear expired cooldowns."""
    from cooldown_manager import check_and_clear_cooldown
    from storage.backend_store import list_backends as lb

    while True:
        try:
            backends = lb(decrypt_key=False)
            for b in backends:
                if b.status == "cooling":
                    check_and_clear_cooldown(b.id)
        except Exception:
            logger.exception("health_check_error")
        await asyncio.sleep(app_config.health_check_interval_seconds)


async def _stats_aggregation_loop() -> None:
    """Periodically aggregate daily stats."""
    while True:
        try:
            today = time.strftime("%Y-%m-%d", time.gmtime())
            aggregate_daily_stats(today)
        except Exception:
            logger.exception("stats_aggregation_error")
        await asyncio.sleep(app_config.stats_refresh_interval_seconds)


# ──────────────────────────────────────
# Health / Metrics
# ──────────────────────────────────────
@app.get("/health")
async def health() -> dict[str, Any]:
    return {
        "status": "ok",
        "version": "2.0.0",
        "uptime_seconds": int(time.time() - start_time),
    }


# ──────────────────────────────────────
# Dashboard SSE
# ──────────────────────────────────────
@app.get("/dashboard/sse")
async def dashboard_sse() -> StreamingResponse:
    """SSE endpoint for real-time dashboard data."""

    async def event_generator():
        while True:
            try:
                pool_status = pool_manager.get_pool_status()
                total_stats = get_total_stats()
                all_backends = list_backends(decrypt_key=False)

                backends_list = []
                for b in all_backends:
                    rl_status = rate_limiter.get_status(b.id)
                    backends_list.append({
                        "id": b.id,
                        "name": b.name,
                        "label": b.label,
                        "pool": b.pool,
                        "enabled": b.enabled,
                        "status": b.status,
                        "rpm_limit": b.rpm_limit,
                        "cooldown_until": b.cooldown_until,
                        "consecutive_429_count": b.consecutive_429_count,
                        "model_count": len(b.model_mappings),
                        "rate_limiter": rl_status,
                    })

                snapshot = {
                    "type": "snapshot",
                    "pool": pool_status,
                    "total": total_stats,
                    "backends": backends_list,
                    "uptime_seconds": int(time.time() - start_time),
                    "timestamp": time.time(),
                }
                yield f"data: {__import__('json').dumps(snapshot)}\n\n"
            except Exception:
                logger.exception("sse_error")

            await asyncio.sleep(app_config.dashboard_sse_interval_seconds)

    return StreamingResponse(
        event_generator(),
        media_type="text/event-stream",
        headers={
            "Cache-Control": "no-cache",
            "Connection": "keep-alive",
            "X-Accel-Buffering": "no",
        },
    )


# ──────────────────────────────────────
# Admin: Backend CRUD
# ──────────────────────────────────────

@app.get("/api/admin/backends")
async def admin_list_backends(pool: str | None = None) -> list[dict]:
    """List all backends with masked keys."""
    backends = list_backends(pool=pool, decrypt_key=True)
    return [b.to_dict(mask_key=True) for b in backends]


@app.get("/api/admin/backends/{backend_id}")
async def admin_get_backend(backend_id: str) -> dict:
    """Get a single backend (key masked)."""
    b = get_backend(backend_id, decrypt_key=True)
    if b is None:
        raise HTTPException(404, "Backend not found")
    return b.to_dict(mask_key=True)


@app.post("/api/admin/backends")
async def admin_create_backend(body: dict[str, Any]) -> dict:
    """Create a new backend."""
    required = ["name", "api_base_url", "api_key"]
    for field in required:
        if field not in body:
            raise HTTPException(400, f"Missing required field: {field}")

    model_mappings_raw = body.get("model_mappings", {})
    model_mappings = {}
    for canonical_name, mm in model_mappings_raw.items():
        model_mappings[canonical_name] = ModelMapping.from_dict(mm)

    backend = Backend(
        name=body["name"],
        label=body.get("label", ""),
        api_base_url=body["api_base_url"],
        api_key_plain=body["api_key"],
        api=body.get("api", "openai-completions"),
        timeout_seconds=body.get("timeout_seconds", 120),
        rpm_limit=body.get("rpm_limit", app_config.default_rpm_limit),
        pool=body.get("pool", "primary"),
        enabled=body.get("enabled", True),
        model_mappings=model_mappings,
        source=body.get("source", "webui"),
    )

    created = create_backend(backend)
    return created.to_dict(mask_key=True)


@app.put("/api/admin/backends/{backend_id}")
async def admin_update_backend(backend_id: str, body: dict[str, Any]) -> dict:
    """Update a backend."""
    updates = dict(body)

    # Handle model_mappings
    if "model_mappings" in updates:
        raw = updates["model_mappings"]
        updates["model_mappings"] = {
            k: ModelMapping.from_dict(v) for k, v in raw.items()
        }

    # Handle api_key
    if "api_key" in updates:
        updates["api_key_plain"] = updates.pop("api_key")

    updated = update_backend(backend_id, updates)
    if updated is None:
        raise HTTPException(404, "Backend not found")
    return updated.to_dict(mask_key=True)


@app.delete("/api/admin/backends/{backend_id}")
async def admin_delete_backend(backend_id: str) -> dict:
    """Delete a backend."""
    ok = delete_backend(backend_id)
    if not ok:
        raise HTTPException(404, "Backend not found")
    return {"status": "deleted", "id": backend_id}


# ──────────────────────────────────────
# Admin: Pool Status
# ──────────────────────────────────────

@app.get("/api/admin/pools")
async def admin_pool_status() -> dict:
    """Get pool summary."""
    return pool_manager.get_pool_status()


# ──────────────────────────────────────
# Admin: Usage / Stats
# ──────────────────────────────────────

@app.get("/api/admin/stats/total")
async def admin_total_stats() -> dict:
    """Get aggregate usage stats."""
    return get_total_stats()


@app.get("/api/admin/stats/hourly")
async def admin_hourly_usage(
    backend_id: str | None = None,
    hours: int = 168,
) -> list[dict]:
    """Get hourly usage data."""
    since = None
    if hours > 0:
        since = time.strftime(
            "%Y-%m-%dT%H:%M:%SZ",
            time.gmtime(time.time() - hours * 3600),
        )
    return get_hourly_usage(backend_id=backend_id, since=since, limit=hours)


@app.get("/api/admin/stats/daily")
async def admin_daily_stats(days: int = 30) -> list[dict]:
    """Get daily aggregated stats."""
    return get_daily_stats(days=days)


@app.get("/api/admin/stats/cooldown")
async def admin_cooldown_history(
    backend_id: str | None = None,
    limit: int = 50,
) -> list[dict]:
    """Get cooldown event history."""
    return get_cooldown_history(backend_id=backend_id, limit=limit)


# ──────────────────────────────────────
# Admin: System Config
# ──────────────────────────────────────

@app.get("/api/admin/config")
async def admin_get_all_config() -> list[dict]:
    """List all system config entries."""
    return list_configs()


@app.get("/api/admin/config/{key}")
async def admin_get_config(key: str) -> dict:
    """Get a single config value."""
    value = get_config(key)
    if value is None:
        raise HTTPException(404, "Config not found")
    return {"key": key, "value": value}


@app.put("/api/admin/config/{key}")
async def admin_set_config(key: str, body: dict[str, Any]) -> dict:
    """Set a config value."""
    value = str(body.get("value", ""))
    description = str(body.get("description", ""))
    set_config(key, value, description)
    return {"key": key, "value": value}


@app.delete("/api/admin/config/{key}")
async def admin_delete_config(key: str) -> dict:
    """Delete a config entry."""
    ok = delete_config(key)
    if not ok:
        raise HTTPException(404, "Config not found")
    return {"status": "deleted", "key": key}


# ──────────────────────────────────────
# Dashboard HTML
# ──────────────────────────────────────

@app.get("/dashboard")
async def dashboard_html() -> HTMLResponse:
    """Serve the dashboard WebUI."""
    dashboard_path = os.path.join(
        os.path.dirname(__file__), "dashboard.html"
    )
    if os.path.exists(dashboard_path):
        with open(dashboard_path, "r") as f:
            return HTMLResponse(f.read())
    return HTMLResponse("<h1>Dashboard not found</h1>", status_code=404)


# ──────────────────────────────────────
# Proxy Endpoints
# ──────────────────────────────────────

@app.post("/v1/chat/completions")
async def chat_completions(request: Request) -> Response:
    return await handle_proxy_request(
        pool_manager, rate_limiter, router, request, "/v1/chat/completions"
    )


@app.post("/v1/completions")
async def completions(request: Request) -> Response:
    return await handle_proxy_request(
        pool_manager, rate_limiter, router, request, "/v1/completions"
    )


@app.post("/v1/embeddings")
async def embeddings(request: Request) -> Response:
    return await handle_proxy_request(
        pool_manager, rate_limiter, router, request, "/v1/embeddings"
    )


@app.get("/v1/models")
@app.get("/v1/models/{model_id:path}")
async def list_models(request: Request, model_id: str | None = None) -> Response:
    path = f"/v1/models/{model_id}" if model_id else "/v1/models"
    return await handle_proxy_request(
        pool_manager, rate_limiter, router, request, path
    )


@app.api_route("/{path:path}", methods=["GET", "POST", "PUT", "DELETE", "PATCH", "OPTIONS"])
async def catch_all(request: Request, path: str) -> Response:
    target_path = f"/{path}" if not path.startswith("/") else path
    return await handle_proxy_request(
        pool_manager, rate_limiter, router, request, target_path
    )


# ──────────────────────────────────────
# Main
# ──────────────────────────────────────

def main() -> None:
    import uvicorn

    uvicorn.run(
        "server:app",
        host=app_config.host,
        port=app_config.port,
        log_level=app_config.log_level.lower(),
    )


if __name__ == "__main__":
    main()