4f415fb500
P0 fixes: - Admin API Bearer Token auth middleware - Encryption key missing -> CRITICAL log + sys.exit(1) - Prometheus metrics endpoint (:9191) - requirements.txt + Dockerfile + docker-compose.yml + systemd + nginx P1 fixes: - Dead code removed from _refresh_cooldowns() - Stream detection fixed (text/event-stream only) - Emergency passthrough (10% RPM retry before 503) - Active health probing for backends - SQLite daily backup loop with retention - Chart.js CDN fallback - Key rotation SOP document - JSON log format support - Deploy files: systemd unit + nginx config BIZ-52 review re-entry Co-authored-by: multica-agent <github@multica.ai>
713 lines
27 KiB
Python
713 lines
27 KiB
Python
"""Sidecar V2 — FastAPI server with multi-pool routing, admin API, dashboard SSE."""
|
|
|
|
import asyncio
|
|
import json
|
|
import os
|
|
import sys
|
|
import time
|
|
from collections.abc import AsyncGenerator
|
|
from contextlib import asynccontextmanager
|
|
from typing import Any, Optional
|
|
|
|
import structlog
|
|
from fastapi import Depends, FastAPI, HTTPException, Request, Response
|
|
from fastapi.responses import FileResponse, HTMLResponse, JSONResponse, StreamingResponse
|
|
from fastapi.security import HTTPBearer, HTTPAuthorizationCredentials
|
|
|
|
from config import config as app_config
|
|
from crypto import init_crypto, is_initialized
|
|
from pool_manager import PoolManager
|
|
from rate_limiter import PerBackendRateLimiter
|
|
from router import Router
|
|
from proxy import handle_proxy_request
|
|
|
|
from storage.db import init_db, create_tables, run_integrity_check, get_connection, _DB_PATH
|
|
from storage.backend_store import (
|
|
create_backend, get_backend, list_backends, update_backend,
|
|
delete_backend, get_pool_stats,
|
|
)
|
|
from storage.usage_store import get_total_stats, get_hourly_usage, get_daily_stats, aggregate_daily_stats
|
|
from storage.cooldown_store import get_cooldown_history
|
|
from storage.config_store import get_config, set_config, list_configs, delete_config
|
|
from storage.models import Backend, ModelMapping
|
|
|
|
|
|
# ──────────────────────────────────────────────────────────
|
|
# Logging
|
|
# ──────────────────────────────────────────────────────────
|
|
_LOG_FORMAT = os.getenv("LOG_FORMAT", "console").lower()
|
|
|
|
structlog.configure(
|
|
processors=[
|
|
structlog.stdlib.filter_by_level,
|
|
structlog.stdlib.add_logger_name,
|
|
structlog.stdlib.add_log_level,
|
|
structlog.stdlib.PositionalArgumentsFormatter(),
|
|
structlog.processors.TimeStamper(fmt="iso"),
|
|
structlog.processors.StackInfoRenderer(),
|
|
structlog.processors.format_exc_info,
|
|
structlog.processors.UnicodeDecoder(),
|
|
(
|
|
structlog.processors.JSONRenderer()
|
|
if _LOG_FORMAT == "json"
|
|
else structlog.dev.ConsoleRenderer()
|
|
),
|
|
],
|
|
context_class=dict,
|
|
logger_factory=structlog.stdlib.LoggerFactory(),
|
|
wrapper_class=structlog.stdlib.BoundLogger,
|
|
cache_logger_on_first_use=True,
|
|
)
|
|
logger: structlog.stdlib.BoundLogger = structlog.get_logger("sidecar_v2.server")
|
|
|
|
|
|
# ──────────────────────────────────────────────────────────
|
|
# Admin Auth middleware
|
|
# ──────────────────────────────────────────────────────────
|
|
_security = HTTPBearer(auto_error=False)
|
|
|
|
|
|
def verify_admin_token(
|
|
credentials: Optional[HTTPAuthorizationCredentials] = Depends(_security),
|
|
) -> bool:
|
|
"""Verify Bearer Token against config.admin_token.
|
|
|
|
If admin_token is empty, write operations are rejected.
|
|
READ operations are allowed without auth for dashboard use.
|
|
"""
|
|
if not app_config.admin_token:
|
|
# No token configured — allow read, reject write (checked per-endpoint)
|
|
if credentials is None:
|
|
return False
|
|
return False
|
|
|
|
if credentials is None:
|
|
return False
|
|
|
|
return credentials.credentials == app_config.admin_token
|
|
|
|
|
|
def require_admin(credentials: Optional[HTTPAuthorizationCredentials] = Depends(_security)):
|
|
"""Require admin auth — raise 401 if not authorized."""
|
|
if not app_config.admin_token:
|
|
raise HTTPException(
|
|
status_code=401,
|
|
detail="Admin API not configured: set SIDECAR_ADMIN_TOKEN",
|
|
)
|
|
if credentials is None:
|
|
raise HTTPException(
|
|
status_code=401,
|
|
detail="Missing Authorization header",
|
|
headers={"WWW-Authenticate": "Bearer"},
|
|
)
|
|
if credentials.credentials != app_config.admin_token:
|
|
raise HTTPException(
|
|
status_code=401,
|
|
detail="Invalid admin token",
|
|
)
|
|
|
|
|
|
# ──────────────────────────────────────────────────────────
|
|
# Global runtime state
|
|
# ──────────────────────────────────────────────────────────
|
|
pool_manager: Optional[PoolManager] = None
|
|
rate_limiter: Optional[PerBackendRateLimiter] = None
|
|
router: Optional[Router] = None
|
|
start_time: float = 0.0
|
|
|
|
# In-memory metrics counters
|
|
_metrics_counters: dict[str, int] = {}
|
|
_metrics_lock = asyncio.Lock()
|
|
|
|
|
|
def _inc_metric(key: str, delta: int = 1) -> None:
|
|
"""Thread-safe counter increment (deferred via asyncio)."""
|
|
_metrics_counters[key] = _metrics_counters.get(key, 0) + delta
|
|
|
|
|
|
def get_pm() -> PoolManager:
|
|
assert pool_manager is not None
|
|
return pool_manager
|
|
|
|
|
|
def get_rl() -> PerBackendRateLimiter:
|
|
assert rate_limiter is not None
|
|
return rate_limiter
|
|
|
|
|
|
def get_router() -> Router:
|
|
assert router is not None
|
|
return router
|
|
|
|
|
|
# ──────────────────────────────────────────────────────────
|
|
# Lifespan
|
|
# ──────────────────────────────────────────────────────────
|
|
@asynccontextmanager
|
|
async def lifespan(app: FastAPI) -> AsyncGenerator[None, Any]:
|
|
global pool_manager, rate_limiter, router, start_time
|
|
|
|
# P0: Encryption key is mandatory — refuse to start without it
|
|
if not app_config.encryption_key:
|
|
logger.critical(
|
|
"missing_encryption_key",
|
|
hint="Set SIDECAR_ENCRYPTION_KEY (64 hex chars). Refusing to start."
|
|
)
|
|
sys.exit(1)
|
|
|
|
init_crypto(app_config.encryption_key)
|
|
logger.info("crypto_initialized")
|
|
|
|
# P0: Warn if admin_token not set
|
|
if not app_config.admin_token:
|
|
logger.warning(
|
|
"admin_token_not_set",
|
|
hint="Admin write endpoints disabled until SIDECAR_ADMIN_TOKEN is configured."
|
|
)
|
|
|
|
# Init DB
|
|
init_db()
|
|
create_tables()
|
|
ok = run_integrity_check()
|
|
if not ok:
|
|
logger.error("db_integrity_check_failed")
|
|
|
|
# Init runtime components
|
|
pool_manager = PoolManager()
|
|
rate_limiter = PerBackendRateLimiter(
|
|
refill_interval_ms=app_config.rate_limiter_refill_interval_ms,
|
|
)
|
|
router = Router(pool_manager, rate_limiter)
|
|
start_time = time.time()
|
|
|
|
# Start background tasks
|
|
health_task = asyncio.create_task(_health_check_loop())
|
|
stats_task = asyncio.create_task(_stats_aggregation_loop())
|
|
backup_task = asyncio.create_task(_backup_loop())
|
|
|
|
logger.info(
|
|
"sidecar_v2_started",
|
|
host=app_config.host,
|
|
port=app_config.port,
|
|
metrics_port=app_config.metrics_port,
|
|
)
|
|
|
|
try:
|
|
yield
|
|
finally:
|
|
for task in [health_task, stats_task, backup_task]:
|
|
task.cancel()
|
|
try:
|
|
await task
|
|
except asyncio.CancelledError:
|
|
pass
|
|
logger.info("sidecar_v2_stopped")
|
|
|
|
|
|
app = FastAPI(
|
|
title="Sidecar V2 — Multi-Pool Provider Proxy",
|
|
version="2.0.0",
|
|
lifespan=lifespan,
|
|
)
|
|
|
|
|
|
# ──────────────────────────────────────────────────────────
|
|
# Background tasks
|
|
# ──────────────────────────────────────────────────────────
|
|
|
|
async def _health_check_loop() -> None:
|
|
"""Periodic health checks: clear expired cooldowns + active probing of backends."""
|
|
from cooldown_manager import check_and_clear_cooldown
|
|
import httpx
|
|
|
|
while True:
|
|
try:
|
|
backends = list_backends(decrypt_key=True)
|
|
for b in backends:
|
|
# 1. Clear expired cooldowns
|
|
if b.status == "cooling":
|
|
check_and_clear_cooldown(b.id)
|
|
|
|
# 2. Active health probing for healthy/enabled backends
|
|
if b.status == "healthy" and b.enabled:
|
|
try:
|
|
async with httpx.AsyncClient(timeout=httpx.Timeout(
|
|
app_config.health_check_timeout_seconds
|
|
)) as client:
|
|
probe_url = b.api_base_url.rstrip("/") + app_config.health_probe_endpoint
|
|
headers = {}
|
|
if b.api_key_plain:
|
|
headers["Authorization"] = f"Bearer {b.api_key_plain}"
|
|
|
|
start = time.monotonic()
|
|
resp = await client.get(probe_url, headers=headers)
|
|
elapsed_ms = int((time.monotonic() - start) * 1000)
|
|
|
|
# Update health state in DB
|
|
from storage.db import get_connection as _gc
|
|
with _gc() as conn:
|
|
conn.execute(
|
|
"""INSERT INTO backend_health
|
|
(backend_id, state, last_latency_ms, last_status_code,
|
|
last_check_at)
|
|
VALUES (?, 'healthy', ?, ?, datetime('now'))
|
|
ON CONFLICT(backend_id) DO UPDATE SET
|
|
state = excluded.state,
|
|
last_latency_ms = excluded.last_latency_ms,
|
|
last_status_code = excluded.last_status_code,
|
|
last_check_at = excluded.last_check_at""",
|
|
(b.id, elapsed_ms, resp.status_code),
|
|
)
|
|
conn.commit()
|
|
|
|
logger.debug(
|
|
"health_probe_ok",
|
|
backend_id=b.id,
|
|
status=resp.status_code,
|
|
latency_ms=elapsed_ms,
|
|
)
|
|
except Exception as probe_err:
|
|
logger.warning(
|
|
"health_probe_failed",
|
|
backend_id=b.id,
|
|
error=str(probe_err),
|
|
)
|
|
# Mark as degraded
|
|
from storage.db import get_connection as _gc
|
|
with _gc() as conn:
|
|
conn.execute(
|
|
"""INSERT INTO backend_health
|
|
(backend_id, state, last_check_at)
|
|
VALUES (?, 'degraded', datetime('now'))
|
|
ON CONFLICT(backend_id) DO UPDATE SET
|
|
state = 'degraded',
|
|
last_check_at = excluded.last_check_at""",
|
|
(b.id,),
|
|
)
|
|
conn.execute(
|
|
"""UPDATE backend_health SET
|
|
consecutive_failures = consecutive_failures + 1
|
|
WHERE backend_id = ?""",
|
|
(b.id,),
|
|
)
|
|
conn.commit()
|
|
except Exception:
|
|
logger.exception("health_check_error")
|
|
await asyncio.sleep(app_config.health_check_interval_seconds)
|
|
|
|
|
|
async def _stats_aggregation_loop() -> None:
|
|
"""Periodically aggregate daily stats."""
|
|
while True:
|
|
try:
|
|
today = time.strftime("%Y-%m-%d", time.gmtime())
|
|
aggregate_daily_stats(today)
|
|
except Exception:
|
|
logger.exception("stats_aggregation_error")
|
|
await asyncio.sleep(app_config.stats_refresh_interval_seconds)
|
|
|
|
|
|
async def _backup_loop() -> None:
|
|
"""Daily SQLite backup with retention."""
|
|
import shutil
|
|
|
|
while True:
|
|
try:
|
|
await asyncio.sleep(86400) # 24 hours
|
|
backup_dir = app_config.backup_dir
|
|
if not backup_dir:
|
|
continue
|
|
|
|
os.makedirs(backup_dir, exist_ok=True)
|
|
|
|
backup_name = f"sidecar_v2_{time.strftime('%Y%m%d_%H%M%S', time.gmtime())}.db"
|
|
backup_path = os.path.join(backup_dir, backup_name)
|
|
|
|
from storage.db import _DB_PATH as db_path
|
|
import sqlite3
|
|
|
|
source = sqlite3.connect(db_path)
|
|
dest = sqlite3.connect(backup_path)
|
|
source.backup(dest)
|
|
dest.close()
|
|
source.close()
|
|
|
|
logger.info("db_backup_created", path=backup_path)
|
|
|
|
# Retention: remove old backups
|
|
retention_days = app_config.backup_retention_days
|
|
cutoff = time.time() - retention_days * 86400
|
|
for fname in os.listdir(backup_dir):
|
|
if fname.startswith("sidecar_v2_") and fname.endswith(".db"):
|
|
fpath = os.path.join(backup_dir, fname)
|
|
if os.path.getmtime(fpath) < cutoff:
|
|
os.remove(fpath)
|
|
logger.info("db_backup_retired", path=fpath)
|
|
|
|
except Exception:
|
|
logger.exception("backup_error")
|
|
|
|
|
|
# ──────────────────────────────────────────────────────────
|
|
# Health / Metrics
|
|
# ──────────────────────────────────────────────────────────
|
|
@app.get("/health")
|
|
async def health() -> dict[str, Any]:
|
|
return {
|
|
"status": "ok",
|
|
"version": "2.0.0",
|
|
"uptime_seconds": int(time.time() - start_time),
|
|
}
|
|
|
|
|
|
@app.get("/metrics")
|
|
async def metrics() -> Response:
|
|
"""Prometheus-compatible metrics endpoint."""
|
|
lines = []
|
|
|
|
# Pool provider counts
|
|
pool_status = pool_manager.get_pool_status()
|
|
for pool_name, stats in pool_status.items():
|
|
for key, val in stats.items():
|
|
lines.append(
|
|
f"sidecar_pool_providers{{pool=\"{pool_name}\",type=\"{key}\"}} {val}"
|
|
)
|
|
|
|
# Cooldown status
|
|
all_backends = list_backends(decrypt_key=False)
|
|
cooling_count = sum(1 for b in all_backends if b.status == "cooling")
|
|
lines.append(f"sidecar_cooldown_active {cooling_count}")
|
|
|
|
# Emergency count
|
|
emergency_count = _metrics_counters.get("emergency_count", 0)
|
|
lines.append(f"sidecar_emergency_count {emergency_count}")
|
|
|
|
# DB sizes
|
|
from storage.db import get_db_sizes
|
|
sizes = get_db_sizes()
|
|
lines.append(f"sidecar_db_size_bytes {sizes.get('db_bytes', 0)}")
|
|
lines.append(f"sidecar_wal_size_bytes {sizes.get('wal_bytes', 0)}")
|
|
|
|
# Total stats
|
|
total = get_total_stats()
|
|
lines.append(f"sidecar_requests_total {total.get('total_requests', 0) or 0}")
|
|
lines.append(f"sidecar_errors_total {total.get('total_errors', 0) or 0}")
|
|
lines.append(f"sidecar_tokens_total {total.get('total_tokens', 0) or 0}")
|
|
cost = total.get('total_cost', 0) or 0.0
|
|
lines.append(f"sidecar_cost_total {cost}")
|
|
|
|
# Uptime
|
|
lines.append(f"sidecar_uptime_seconds {int(time.time() - start_time)}")
|
|
|
|
return Response(
|
|
content="\n".join(lines) + "\n",
|
|
media_type="text/plain; charset=utf-8",
|
|
)
|
|
|
|
|
|
# ──────────────────────────────────────────────────────────
|
|
# Dashboard SSE
|
|
# ──────────────────────────────────────────────────────────
|
|
@app.get("/dashboard/sse")
|
|
async def dashboard_sse() -> StreamingResponse:
|
|
"""SSE endpoint for real-time dashboard data."""
|
|
|
|
async def event_generator():
|
|
while True:
|
|
try:
|
|
pool_status = pool_manager.get_pool_status()
|
|
total_stats = get_total_stats()
|
|
all_backends = list_backends(decrypt_key=False)
|
|
|
|
backends_list = []
|
|
for b in all_backends:
|
|
rl_status = rate_limiter.get_status(b.id)
|
|
backends_list.append({
|
|
"id": b.id,
|
|
"name": b.name,
|
|
"label": b.label,
|
|
"pool": b.pool,
|
|
"enabled": b.enabled,
|
|
"status": b.status,
|
|
"rpm_limit": b.rpm_limit,
|
|
"cooldown_until": b.cooldown_until,
|
|
"consecutive_429_count": b.consecutive_429_count,
|
|
"model_count": len(b.model_mappings),
|
|
"rate_limiter": rl_status,
|
|
})
|
|
|
|
snapshot = {
|
|
"type": "snapshot",
|
|
"pool": pool_status,
|
|
"total": total_stats,
|
|
"backends": backends_list,
|
|
"uptime_seconds": int(time.time() - start_time),
|
|
"timestamp": time.time(),
|
|
}
|
|
yield f"data: {json.dumps(snapshot)}\n\n"
|
|
except Exception:
|
|
logger.exception("sse_error")
|
|
|
|
await asyncio.sleep(app_config.dashboard_sse_interval_seconds)
|
|
|
|
return StreamingResponse(
|
|
event_generator(),
|
|
media_type="text/event-stream",
|
|
headers={
|
|
"Cache-Control": "no-cache",
|
|
"Connection": "keep-alive",
|
|
"X-Accel-Buffering": "no",
|
|
},
|
|
)
|
|
|
|
|
|
# ──────────────────────────────────────────────────────────
|
|
# Admin: Backend CRUD (READ: public, WRITE: auth required)
|
|
# ──────────────────────────────────────────────────────────
|
|
|
|
@app.get("/api/admin/backends")
|
|
async def admin_list_backends(pool: Optional[str] = None) -> list[dict]:
|
|
"""List all backends with masked keys (public read)."""
|
|
backends = list_backends(pool=pool, decrypt_key=True)
|
|
return [b.to_dict(mask_key=True) for b in backends]
|
|
|
|
|
|
@app.get("/api/admin/backends/{backend_id}")
|
|
async def admin_get_backend(backend_id: str) -> dict:
|
|
"""Get a single backend (public read, key masked)."""
|
|
b = get_backend(backend_id, decrypt_key=True)
|
|
if b is None:
|
|
raise HTTPException(404, "Backend not found")
|
|
return b.to_dict(mask_key=True)
|
|
|
|
|
|
@app.post("/api/admin/backends")
|
|
async def admin_create_backend(
|
|
body: dict[str, Any],
|
|
_auth=Depends(require_admin),
|
|
) -> dict:
|
|
"""Create a new backend (auth required)."""
|
|
required = ["name", "api_base_url", "api_key"]
|
|
for field in required:
|
|
if field not in body:
|
|
raise HTTPException(400, f"Missing required field: {field}")
|
|
|
|
model_mappings_raw = body.get("model_mappings", {})
|
|
model_mappings = {}
|
|
for canonical_name, mm in model_mappings_raw.items():
|
|
model_mappings[canonical_name] = ModelMapping.from_dict(mm)
|
|
|
|
backend = Backend(
|
|
name=body["name"],
|
|
label=body.get("label", ""),
|
|
api_base_url=body["api_base_url"],
|
|
api_key_plain=body["api_key"],
|
|
api=body.get("api", "openai-completions"),
|
|
timeout_seconds=body.get("timeout_seconds", 120),
|
|
rpm_limit=body.get("rpm_limit", app_config.default_rpm_limit),
|
|
pool=body.get("pool", "primary"),
|
|
enabled=body.get("enabled", True),
|
|
model_mappings=model_mappings,
|
|
source=body.get("source", "webui"),
|
|
)
|
|
|
|
created = create_backend(backend)
|
|
return created.to_dict(mask_key=True)
|
|
|
|
|
|
@app.put("/api/admin/backends/{backend_id}")
|
|
async def admin_update_backend(
|
|
backend_id: str,
|
|
body: dict[str, Any],
|
|
_auth=Depends(require_admin),
|
|
) -> dict:
|
|
"""Update a backend (auth required)."""
|
|
updates = dict(body)
|
|
|
|
if "model_mappings" in updates:
|
|
raw = updates["model_mappings"]
|
|
updates["model_mappings"] = {
|
|
k: ModelMapping.from_dict(v) for k, v in raw.items()
|
|
}
|
|
|
|
if "api_key" in updates:
|
|
updates["api_key_plain"] = updates.pop("api_key")
|
|
|
|
updated = update_backend(backend_id, updates)
|
|
if updated is None:
|
|
raise HTTPException(404, "Backend not found")
|
|
return updated.to_dict(mask_key=True)
|
|
|
|
|
|
@app.delete("/api/admin/backends/{backend_id}")
|
|
async def admin_delete_backend(
|
|
backend_id: str,
|
|
_auth=Depends(require_admin),
|
|
) -> dict:
|
|
"""Delete a backend (auth required)."""
|
|
ok = delete_backend(backend_id)
|
|
if not ok:
|
|
raise HTTPException(404, "Backend not found")
|
|
return {"status": "deleted", "id": backend_id}
|
|
|
|
|
|
# ──────────────────────────────────────────────────────────
|
|
# Admin: Pool Status (public read)
|
|
# ──────────────────────────────────────────────────────────
|
|
|
|
@app.get("/api/admin/pools")
|
|
async def admin_pool_status() -> dict:
|
|
return pool_manager.get_pool_status()
|
|
|
|
|
|
# ──────────────────────────────────────────────────────────
|
|
# Admin: Usage / Stats (public read)
|
|
# ──────────────────────────────────────────────────────────
|
|
|
|
@app.get("/api/admin/stats/total")
|
|
async def admin_total_stats() -> dict:
|
|
return get_total_stats()
|
|
|
|
|
|
@app.get("/api/admin/stats/hourly")
|
|
async def admin_hourly_usage(
|
|
backend_id: Optional[str] = None,
|
|
hours: int = 168,
|
|
) -> list[dict]:
|
|
since = None
|
|
if hours > 0:
|
|
since = time.strftime(
|
|
"%Y-%m-%dT%H:%M:%SZ",
|
|
time.gmtime(time.time() - hours * 3600),
|
|
)
|
|
return get_hourly_usage(backend_id=backend_id, since=since, limit=hours)
|
|
|
|
|
|
@app.get("/api/admin/stats/daily")
|
|
async def admin_daily_stats(days: int = 30) -> list[dict]:
|
|
return get_daily_stats(days=days)
|
|
|
|
|
|
@app.get("/api/admin/stats/cooldown")
|
|
async def admin_cooldown_history(
|
|
backend_id: Optional[str] = None,
|
|
limit: int = 50,
|
|
) -> list[dict]:
|
|
return get_cooldown_history(backend_id=backend_id, limit=limit)
|
|
|
|
|
|
# ──────────────────────────────────────────────────────────
|
|
# Admin: System Config (read public, write auth required)
|
|
# ──────────────────────────────────────────────────────────
|
|
|
|
@app.get("/api/admin/config")
|
|
async def admin_get_all_config() -> list[dict]:
|
|
return list_configs()
|
|
|
|
|
|
@app.get("/api/admin/config/{key}")
|
|
async def admin_get_config(key: str) -> dict:
|
|
value = get_config(key)
|
|
if value is None:
|
|
raise HTTPException(404, "Config not found")
|
|
return {"key": key, "value": value}
|
|
|
|
|
|
@app.put("/api/admin/config/{key}")
|
|
async def admin_set_config(
|
|
key: str,
|
|
body: dict[str, Any],
|
|
_auth=Depends(require_admin),
|
|
) -> dict:
|
|
value = str(body.get("value", ""))
|
|
description = str(body.get("description", ""))
|
|
set_config(key, value, description)
|
|
return {"key": key, "value": value}
|
|
|
|
|
|
@app.delete("/api/admin/config/{key}")
|
|
async def admin_delete_config(
|
|
key: str,
|
|
_auth=Depends(require_admin),
|
|
) -> dict:
|
|
ok = delete_config(key)
|
|
if not ok:
|
|
raise HTTPException(404, "Config not found")
|
|
return {"status": "deleted", "key": key}
|
|
|
|
|
|
# ──────────────────────────────────────────────────────────
|
|
# Dashboard HTML (public, but respects admin_token for writes in JS)
|
|
# ──────────────────────────────────────────────────────────
|
|
|
|
@app.get("/dashboard")
|
|
async def dashboard_html() -> HTMLResponse:
|
|
dashboard_path = os.path.join(
|
|
os.path.dirname(__file__), "dashboard.html"
|
|
)
|
|
if os.path.exists(dashboard_path):
|
|
with open(dashboard_path, "r") as f:
|
|
return HTMLResponse(f.read())
|
|
return HTMLResponse("<h1>Dashboard not found</h1>", status_code=404)
|
|
|
|
|
|
# ──────────────────────────────────────────────────────────
|
|
# Proxy Endpoints
|
|
# ──────────────────────────────────────────────────────────
|
|
|
|
@app.post("/v1/chat/completions")
|
|
async def chat_completions(request: Request) -> Response:
|
|
_inc_metric("proxy_requests_total")
|
|
return await handle_proxy_request(
|
|
pool_manager, rate_limiter, router, request, "/v1/chat/completions"
|
|
)
|
|
|
|
|
|
@app.post("/v1/completions")
|
|
async def completions(request: Request) -> Response:
|
|
return await handle_proxy_request(
|
|
pool_manager, rate_limiter, router, request, "/v1/completions"
|
|
)
|
|
|
|
|
|
@app.post("/v1/embeddings")
|
|
async def embeddings(request: Request) -> Response:
|
|
return await handle_proxy_request(
|
|
pool_manager, rate_limiter, router, request, "/v1/embeddings"
|
|
)
|
|
|
|
|
|
@app.get("/v1/models")
|
|
@app.get("/v1/models/{model_id:path}")
|
|
async def list_models(request: Request, model_id: Optional[str] = None) -> Response:
|
|
path = f"/v1/models/{model_id}" if model_id else "/v1/models"
|
|
return await handle_proxy_request(
|
|
pool_manager, rate_limiter, router, request, path
|
|
)
|
|
|
|
|
|
@app.api_route("/{path:path}", methods=["GET", "POST", "PUT", "DELETE", "PATCH", "OPTIONS"])
|
|
async def catch_all(request: Request, path: str) -> Response:
|
|
target_path = f"/{path}" if not path.startswith("/") else path
|
|
return await handle_proxy_request(
|
|
pool_manager, rate_limiter, router, request, target_path
|
|
)
|
|
|
|
|
|
# ──────────────────────────────────────────────────────────
|
|
# Main
|
|
# ──────────────────────────────────────────────────────────
|
|
|
|
def main() -> None:
|
|
import uvicorn
|
|
|
|
uvicorn.run(
|
|
"server:app",
|
|
host=app_config.host,
|
|
port=app_config.port,
|
|
log_level=app_config.log_level.lower(),
|
|
)
|
|
|
|
|
|
if __name__ == "__main__":
|
|
main() |