feat(sidecar-v2): implement multi-pool provider proxy with cooldown, rate limiting, WebUI
BIZ-52 Step3 开发实现: - storage: backend/usage/cooldown/config CRUD with SQLite WAL - crypto: AES-256-GCM API key encryption - pool_manager: primary/fallback pool routing - cooldown_manager: 429 exponential backoff cooldown - rate_limiter: per-backend token bucket RPM control - router: model → backend routing with pool priority - proxy: multi-pool request forwarding with retry - server: FastAPI admin API + OpenAI-compatible proxy + SSE - dashboard: WebUI with provider CRUD, stats, charts Co-authored-by: multica-agent <github@multica.ai>
This commit is contained in:
@@ -0,0 +1,477 @@
|
||||
"""Sidecar V2 — FastAPI server with multi-pool routing, admin API, dashboard SSE."""
|
||||
|
||||
import asyncio
|
||||
import time
|
||||
from collections.abc import AsyncGenerator
|
||||
from contextlib import asynccontextmanager
|
||||
from typing import Any
|
||||
|
||||
import structlog
|
||||
from fastapi import Depends, FastAPI, HTTPException, Request, Response
|
||||
from fastapi.responses import FileResponse, HTMLResponse, JSONResponse, StreamingResponse
|
||||
from fastapi.staticfiles import StaticFiles
|
||||
|
||||
from config import config as app_config
|
||||
from crypto import init_crypto
|
||||
from pool_manager import PoolManager
|
||||
from rate_limiter import PerBackendRateLimiter
|
||||
from router import Router
|
||||
from proxy import handle_proxy_request
|
||||
|
||||
from storage.db import init_db, create_tables, run_integrity_check
|
||||
from storage.backend_store import (
|
||||
create_backend, get_backend, list_backends, update_backend,
|
||||
delete_backend, get_pool_stats,
|
||||
)
|
||||
from storage.usage_store import get_total_stats, get_hourly_usage, get_daily_stats, aggregate_daily_stats
|
||||
from storage.cooldown_store import get_cooldown_history
|
||||
from storage.config_store import get_config, set_config, list_configs, delete_config
|
||||
from storage.models import Backend, ModelMapping
|
||||
|
||||
import os
|
||||
|
||||
structlog.configure(
|
||||
processors=[
|
||||
structlog.stdlib.filter_by_level,
|
||||
structlog.stdlib.add_logger_name,
|
||||
structlog.stdlib.add_log_level,
|
||||
structlog.stdlib.PositionalArgumentsFormatter(),
|
||||
structlog.processors.TimeStamper(fmt="iso"),
|
||||
structlog.processors.StackInfoRenderer(),
|
||||
structlog.processors.format_exc_info,
|
||||
structlog.processors.UnicodeDecoder(),
|
||||
structlog.dev.ConsoleRenderer(),
|
||||
],
|
||||
context_class=dict,
|
||||
logger_factory=structlog.stdlib.LoggerFactory(),
|
||||
wrapper_class=structlog.stdlib.BoundLogger,
|
||||
cache_logger_on_first_use=True,
|
||||
)
|
||||
logger: structlog.stdlib.BoundLogger = structlog.get_logger("sidecar_v2.server")
|
||||
|
||||
|
||||
# ──────────────────────────────────────
|
||||
# Global runtime state
|
||||
# ──────────────────────────────────────
|
||||
pool_manager: PoolManager | None = None
|
||||
rate_limiter: PerBackendRateLimiter | None = None
|
||||
router: Router | None = None
|
||||
start_time: float = 0.0
|
||||
|
||||
|
||||
def get_pm() -> PoolManager:
|
||||
assert pool_manager is not None
|
||||
return pool_manager
|
||||
|
||||
def get_rl() -> PerBackendRateLimiter:
|
||||
assert rate_limiter is not None
|
||||
return rate_limiter
|
||||
|
||||
def get_router() -> Router:
|
||||
assert router is not None
|
||||
return router
|
||||
|
||||
|
||||
# ──────────────────────────────────────
|
||||
# Lifespan
|
||||
# ──────────────────────────────────────
|
||||
@asynccontextmanager
|
||||
async def lifespan(app: FastAPI) -> AsyncGenerator[None, Any]:
|
||||
global pool_manager, rate_limiter, router, start_time
|
||||
|
||||
# Init crypto
|
||||
if app_config.encryption_key:
|
||||
init_crypto(app_config.encryption_key)
|
||||
|
||||
# Init DB
|
||||
init_db()
|
||||
create_tables()
|
||||
ok = run_integrity_check()
|
||||
if not ok:
|
||||
logger.error("db_integrity_check_failed")
|
||||
|
||||
# Init runtime components
|
||||
pool_manager = PoolManager()
|
||||
rate_limiter = PerBackendRateLimiter(
|
||||
refill_interval_ms=app_config.rate_limiter_refill_interval_ms,
|
||||
)
|
||||
router = Router(pool_manager, rate_limiter)
|
||||
start_time = time.time()
|
||||
|
||||
# Start background tasks
|
||||
health_task = asyncio.create_task(_health_check_loop())
|
||||
stats_task = asyncio.create_task(_stats_aggregation_loop())
|
||||
|
||||
logger.info(
|
||||
"sidecar_v2_started",
|
||||
host=app_config.host,
|
||||
port=app_config.port,
|
||||
metrics_port=app_config.metrics_port,
|
||||
)
|
||||
|
||||
try:
|
||||
yield
|
||||
finally:
|
||||
health_task.cancel()
|
||||
stats_task.cancel()
|
||||
try:
|
||||
await health_task
|
||||
except asyncio.CancelledError:
|
||||
pass
|
||||
try:
|
||||
await stats_task
|
||||
except asyncio.CancelledError:
|
||||
pass
|
||||
logger.info("sidecar_v2_stopped")
|
||||
|
||||
|
||||
app = FastAPI(
|
||||
title="Sidecar V2 — Multi-Pool Provider Proxy",
|
||||
version="2.0.0",
|
||||
lifespan=lifespan,
|
||||
)
|
||||
|
||||
|
||||
# ──────────────────────────────────────
|
||||
# Background tasks
|
||||
# ──────────────────────────────────────
|
||||
|
||||
async def _health_check_loop() -> None:
|
||||
"""Periodically check and clear expired cooldowns."""
|
||||
from cooldown_manager import check_and_clear_cooldown
|
||||
from storage.backend_store import list_backends as lb
|
||||
|
||||
while True:
|
||||
try:
|
||||
backends = lb(decrypt_key=False)
|
||||
for b in backends:
|
||||
if b.status == "cooling":
|
||||
check_and_clear_cooldown(b.id)
|
||||
except Exception:
|
||||
logger.exception("health_check_error")
|
||||
await asyncio.sleep(app_config.health_check_interval_seconds)
|
||||
|
||||
|
||||
async def _stats_aggregation_loop() -> None:
|
||||
"""Periodically aggregate daily stats."""
|
||||
while True:
|
||||
try:
|
||||
today = time.strftime("%Y-%m-%d", time.gmtime())
|
||||
aggregate_daily_stats(today)
|
||||
except Exception:
|
||||
logger.exception("stats_aggregation_error")
|
||||
await asyncio.sleep(app_config.stats_refresh_interval_seconds)
|
||||
|
||||
|
||||
# ──────────────────────────────────────
|
||||
# Health / Metrics
|
||||
# ──────────────────────────────────────
|
||||
@app.get("/health")
|
||||
async def health() -> dict[str, Any]:
|
||||
return {
|
||||
"status": "ok",
|
||||
"version": "2.0.0",
|
||||
"uptime_seconds": int(time.time() - start_time),
|
||||
}
|
||||
|
||||
|
||||
# ──────────────────────────────────────
|
||||
# Dashboard SSE
|
||||
# ──────────────────────────────────────
|
||||
@app.get("/dashboard/sse")
|
||||
async def dashboard_sse() -> StreamingResponse:
|
||||
"""SSE endpoint for real-time dashboard data."""
|
||||
|
||||
async def event_generator():
|
||||
while True:
|
||||
try:
|
||||
pool_status = pool_manager.get_pool_status()
|
||||
total_stats = get_total_stats()
|
||||
all_backends = list_backends(decrypt_key=False)
|
||||
|
||||
backends_list = []
|
||||
for b in all_backends:
|
||||
rl_status = rate_limiter.get_status(b.id)
|
||||
backends_list.append({
|
||||
"id": b.id,
|
||||
"name": b.name,
|
||||
"label": b.label,
|
||||
"pool": b.pool,
|
||||
"enabled": b.enabled,
|
||||
"status": b.status,
|
||||
"rpm_limit": b.rpm_limit,
|
||||
"cooldown_until": b.cooldown_until,
|
||||
"consecutive_429_count": b.consecutive_429_count,
|
||||
"model_count": len(b.model_mappings),
|
||||
"rate_limiter": rl_status,
|
||||
})
|
||||
|
||||
snapshot = {
|
||||
"type": "snapshot",
|
||||
"pool": pool_status,
|
||||
"total": total_stats,
|
||||
"backends": backends_list,
|
||||
"uptime_seconds": int(time.time() - start_time),
|
||||
"timestamp": time.time(),
|
||||
}
|
||||
yield f"data: {__import__('json').dumps(snapshot)}\n\n"
|
||||
except Exception:
|
||||
logger.exception("sse_error")
|
||||
|
||||
await asyncio.sleep(app_config.dashboard_sse_interval_seconds)
|
||||
|
||||
return StreamingResponse(
|
||||
event_generator(),
|
||||
media_type="text/event-stream",
|
||||
headers={
|
||||
"Cache-Control": "no-cache",
|
||||
"Connection": "keep-alive",
|
||||
"X-Accel-Buffering": "no",
|
||||
},
|
||||
)
|
||||
|
||||
|
||||
# ──────────────────────────────────────
|
||||
# Admin: Backend CRUD
|
||||
# ──────────────────────────────────────
|
||||
|
||||
@app.get("/api/admin/backends")
|
||||
async def admin_list_backends(pool: str | None = None) -> list[dict]:
|
||||
"""List all backends with masked keys."""
|
||||
backends = list_backends(pool=pool, decrypt_key=True)
|
||||
return [b.to_dict(mask_key=True) for b in backends]
|
||||
|
||||
|
||||
@app.get("/api/admin/backends/{backend_id}")
|
||||
async def admin_get_backend(backend_id: str) -> dict:
|
||||
"""Get a single backend (key masked)."""
|
||||
b = get_backend(backend_id, decrypt_key=True)
|
||||
if b is None:
|
||||
raise HTTPException(404, "Backend not found")
|
||||
return b.to_dict(mask_key=True)
|
||||
|
||||
|
||||
@app.post("/api/admin/backends")
|
||||
async def admin_create_backend(body: dict[str, Any]) -> dict:
|
||||
"""Create a new backend."""
|
||||
required = ["name", "api_base_url", "api_key"]
|
||||
for field in required:
|
||||
if field not in body:
|
||||
raise HTTPException(400, f"Missing required field: {field}")
|
||||
|
||||
model_mappings_raw = body.get("model_mappings", {})
|
||||
model_mappings = {}
|
||||
for canonical_name, mm in model_mappings_raw.items():
|
||||
model_mappings[canonical_name] = ModelMapping.from_dict(mm)
|
||||
|
||||
backend = Backend(
|
||||
name=body["name"],
|
||||
label=body.get("label", ""),
|
||||
api_base_url=body["api_base_url"],
|
||||
api_key_plain=body["api_key"],
|
||||
api=body.get("api", "openai-completions"),
|
||||
timeout_seconds=body.get("timeout_seconds", 120),
|
||||
rpm_limit=body.get("rpm_limit", app_config.default_rpm_limit),
|
||||
pool=body.get("pool", "primary"),
|
||||
enabled=body.get("enabled", True),
|
||||
model_mappings=model_mappings,
|
||||
source=body.get("source", "webui"),
|
||||
)
|
||||
|
||||
created = create_backend(backend)
|
||||
return created.to_dict(mask_key=True)
|
||||
|
||||
|
||||
@app.put("/api/admin/backends/{backend_id}")
|
||||
async def admin_update_backend(backend_id: str, body: dict[str, Any]) -> dict:
|
||||
"""Update a backend."""
|
||||
updates = dict(body)
|
||||
|
||||
# Handle model_mappings
|
||||
if "model_mappings" in updates:
|
||||
raw = updates["model_mappings"]
|
||||
updates["model_mappings"] = {
|
||||
k: ModelMapping.from_dict(v) for k, v in raw.items()
|
||||
}
|
||||
|
||||
# Handle api_key
|
||||
if "api_key" in updates:
|
||||
updates["api_key_plain"] = updates.pop("api_key")
|
||||
|
||||
updated = update_backend(backend_id, updates)
|
||||
if updated is None:
|
||||
raise HTTPException(404, "Backend not found")
|
||||
return updated.to_dict(mask_key=True)
|
||||
|
||||
|
||||
@app.delete("/api/admin/backends/{backend_id}")
|
||||
async def admin_delete_backend(backend_id: str) -> dict:
|
||||
"""Delete a backend."""
|
||||
ok = delete_backend(backend_id)
|
||||
if not ok:
|
||||
raise HTTPException(404, "Backend not found")
|
||||
return {"status": "deleted", "id": backend_id}
|
||||
|
||||
|
||||
# ──────────────────────────────────────
|
||||
# Admin: Pool Status
|
||||
# ──────────────────────────────────────
|
||||
|
||||
@app.get("/api/admin/pools")
|
||||
async def admin_pool_status() -> dict:
|
||||
"""Get pool summary."""
|
||||
return pool_manager.get_pool_status()
|
||||
|
||||
|
||||
# ──────────────────────────────────────
|
||||
# Admin: Usage / Stats
|
||||
# ──────────────────────────────────────
|
||||
|
||||
@app.get("/api/admin/stats/total")
|
||||
async def admin_total_stats() -> dict:
|
||||
"""Get aggregate usage stats."""
|
||||
return get_total_stats()
|
||||
|
||||
|
||||
@app.get("/api/admin/stats/hourly")
|
||||
async def admin_hourly_usage(
|
||||
backend_id: str | None = None,
|
||||
hours: int = 168,
|
||||
) -> list[dict]:
|
||||
"""Get hourly usage data."""
|
||||
since = None
|
||||
if hours > 0:
|
||||
since = time.strftime(
|
||||
"%Y-%m-%dT%H:%M:%SZ",
|
||||
time.gmtime(time.time() - hours * 3600),
|
||||
)
|
||||
return get_hourly_usage(backend_id=backend_id, since=since, limit=hours)
|
||||
|
||||
|
||||
@app.get("/api/admin/stats/daily")
|
||||
async def admin_daily_stats(days: int = 30) -> list[dict]:
|
||||
"""Get daily aggregated stats."""
|
||||
return get_daily_stats(days=days)
|
||||
|
||||
|
||||
@app.get("/api/admin/stats/cooldown")
|
||||
async def admin_cooldown_history(
|
||||
backend_id: str | None = None,
|
||||
limit: int = 50,
|
||||
) -> list[dict]:
|
||||
"""Get cooldown event history."""
|
||||
return get_cooldown_history(backend_id=backend_id, limit=limit)
|
||||
|
||||
|
||||
# ──────────────────────────────────────
|
||||
# Admin: System Config
|
||||
# ──────────────────────────────────────
|
||||
|
||||
@app.get("/api/admin/config")
|
||||
async def admin_get_all_config() -> list[dict]:
|
||||
"""List all system config entries."""
|
||||
return list_configs()
|
||||
|
||||
|
||||
@app.get("/api/admin/config/{key}")
|
||||
async def admin_get_config(key: str) -> dict:
|
||||
"""Get a single config value."""
|
||||
value = get_config(key)
|
||||
if value is None:
|
||||
raise HTTPException(404, "Config not found")
|
||||
return {"key": key, "value": value}
|
||||
|
||||
|
||||
@app.put("/api/admin/config/{key}")
|
||||
async def admin_set_config(key: str, body: dict[str, Any]) -> dict:
|
||||
"""Set a config value."""
|
||||
value = str(body.get("value", ""))
|
||||
description = str(body.get("description", ""))
|
||||
set_config(key, value, description)
|
||||
return {"key": key, "value": value}
|
||||
|
||||
|
||||
@app.delete("/api/admin/config/{key}")
|
||||
async def admin_delete_config(key: str) -> dict:
|
||||
"""Delete a config entry."""
|
||||
ok = delete_config(key)
|
||||
if not ok:
|
||||
raise HTTPException(404, "Config not found")
|
||||
return {"status": "deleted", "key": key}
|
||||
|
||||
|
||||
# ──────────────────────────────────────
|
||||
# Dashboard HTML
|
||||
# ──────────────────────────────────────
|
||||
|
||||
@app.get("/dashboard")
|
||||
async def dashboard_html() -> HTMLResponse:
|
||||
"""Serve the dashboard WebUI."""
|
||||
dashboard_path = os.path.join(
|
||||
os.path.dirname(__file__), "dashboard.html"
|
||||
)
|
||||
if os.path.exists(dashboard_path):
|
||||
with open(dashboard_path, "r") as f:
|
||||
return HTMLResponse(f.read())
|
||||
return HTMLResponse("<h1>Dashboard not found</h1>", status_code=404)
|
||||
|
||||
|
||||
# ──────────────────────────────────────
|
||||
# Proxy Endpoints
|
||||
# ──────────────────────────────────────
|
||||
|
||||
@app.post("/v1/chat/completions")
|
||||
async def chat_completions(request: Request) -> Response:
|
||||
return await handle_proxy_request(
|
||||
pool_manager, rate_limiter, router, request, "/v1/chat/completions"
|
||||
)
|
||||
|
||||
|
||||
@app.post("/v1/completions")
|
||||
async def completions(request: Request) -> Response:
|
||||
return await handle_proxy_request(
|
||||
pool_manager, rate_limiter, router, request, "/v1/completions"
|
||||
)
|
||||
|
||||
|
||||
@app.post("/v1/embeddings")
|
||||
async def embeddings(request: Request) -> Response:
|
||||
return await handle_proxy_request(
|
||||
pool_manager, rate_limiter, router, request, "/v1/embeddings"
|
||||
)
|
||||
|
||||
|
||||
@app.get("/v1/models")
|
||||
@app.get("/v1/models/{model_id:path}")
|
||||
async def list_models(request: Request, model_id: str | None = None) -> Response:
|
||||
path = f"/v1/models/{model_id}" if model_id else "/v1/models"
|
||||
return await handle_proxy_request(
|
||||
pool_manager, rate_limiter, router, request, path
|
||||
)
|
||||
|
||||
|
||||
@app.api_route("/{path:path}", methods=["GET", "POST", "PUT", "DELETE", "PATCH", "OPTIONS"])
|
||||
async def catch_all(request: Request, path: str) -> Response:
|
||||
target_path = f"/{path}" if not path.startswith("/") else path
|
||||
return await handle_proxy_request(
|
||||
pool_manager, rate_limiter, router, request, target_path
|
||||
)
|
||||
|
||||
|
||||
# ──────────────────────────────────────
|
||||
# Main
|
||||
# ──────────────────────────────────────
|
||||
|
||||
def main() -> None:
|
||||
import uvicorn
|
||||
|
||||
uvicorn.run(
|
||||
"server:app",
|
||||
host=app_config.host,
|
||||
port=app_config.port,
|
||||
log_level=app_config.log_level.lower(),
|
||||
)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
Reference in New Issue
Block a user