fix(sidecar-v2): incorporate review feedback - P0/P1 fixes
P0 fixes: - Admin API Bearer Token auth middleware - Encryption key missing -> CRITICAL log + sys.exit(1) - Prometheus metrics endpoint (:9191) - requirements.txt + Dockerfile + docker-compose.yml + systemd + nginx P1 fixes: - Dead code removed from _refresh_cooldowns() - Stream detection fixed (text/event-stream only) - Emergency passthrough (10% RPM retry before 503) - Active health probing for backends - SQLite daily backup loop with retention - Chart.js CDN fallback - Key rotation SOP document - JSON log format support - Deploy files: systemd unit + nginx config BIZ-52 review re-entry Co-authored-by: multica-agent <github@multica.ai>
This commit is contained in:
@@ -0,0 +1,46 @@
|
||||
# Sidecar V2 — Multi-Pool Provider Proxy
|
||||
FROM python:3.12-slim AS builder
|
||||
|
||||
WORKDIR /app
|
||||
|
||||
# Install dependencies
|
||||
COPY requirements.txt .
|
||||
RUN pip install --no-cache-dir --upgrade pip && \
|
||||
pip install --no-cache-dir -r requirements.txt
|
||||
|
||||
# Copy application code
|
||||
COPY config.py crypto.py main.py server.py proxy.py router.py \
|
||||
pool_manager.py cooldown_manager.py rate_limiter.py __init__.py \
|
||||
dashboard.html ./
|
||||
COPY storage/ ./storage/
|
||||
|
||||
# Create data directory
|
||||
RUN mkdir -p /app/data /app/data/backups
|
||||
|
||||
FROM python:3.12-slim
|
||||
|
||||
WORKDIR /app
|
||||
|
||||
# Copy built artifacts
|
||||
COPY --from=builder /usr/local/lib/python3.12/site-packages /usr/local/lib/python3.12/site-packages
|
||||
COPY --from=builder /app /app
|
||||
|
||||
# Environment
|
||||
ENV SIDECAR_HOST=0.0.0.0
|
||||
ENV SIDECAR_PORT=9190
|
||||
ENV SIDECAR_METRICS_PORT=9191
|
||||
ENV SIDECAR_DB_PATH=/app/data/sidecar_v2.db
|
||||
ENV SIDECAR_BACKUP_DIR=/app/data/backups
|
||||
ENV SIDECAR_ENCRYPTION_KEY=
|
||||
ENV SIDECAR_ADMIN_TOKEN=
|
||||
ENV LOG_FORMAT=json
|
||||
ENV PYTHONUNBUFFERED=1
|
||||
|
||||
EXPOSE 9190 9191
|
||||
|
||||
VOLUME ["/app/data"]
|
||||
|
||||
HEALTHCHECK --interval=30s --timeout=5s --retries=3 \
|
||||
CMD python3 -c "import urllib.request; urllib.request.urlopen('http://localhost:9190/health')" || exit 1
|
||||
|
||||
ENTRYPOINT ["python3", "main.py"]
|
||||
@@ -5,6 +5,8 @@
|
||||
<meta name="viewport" content="width=device-width, initial-scale=1.0">
|
||||
<title>Sidecar V2 — Provider Pool Dashboard</title>
|
||||
<script src="https://cdn.jsdelivr.net/npm/chart.js@4.4.0/dist/chart.umd.min.js"></script>
|
||||
<!-- CDN fallback: place chart.umd.min.js in static/ for offline deployments -->
|
||||
<script>if(typeof Chart==='undefined'){document.write('<script src="/static/chart.umd.min.js"><\/script>')}</script>
|
||||
<style>
|
||||
:root {
|
||||
--bg: #0f1117;
|
||||
|
||||
@@ -0,0 +1,90 @@
|
||||
# Sidecar V2 — API Key Encryption Rotation SOP
|
||||
|
||||
> 版本: v1.0 | 维护者: 严维序 (opengineer)
|
||||
|
||||
## 背景
|
||||
|
||||
Sidecar V2 使用 AES-256-GCM 加密存储所有 Provider 的 API Key。加密密钥通过 `SIDECAR_ENCRYPTION_KEY` 环境变量传入,启动时通过 `init_crypto()` 初始化。
|
||||
|
||||
## ⚠️ 关键警告
|
||||
|
||||
**更换 SIDECAR_ENCRYPTION_KEY 会导致所有已存储的 API Key 永久不可恢复!**
|
||||
|
||||
`crypto.py` 的 `try_decrypt_existing()` 在密钥变更时会静默返回 `None`,已有加密数据将无法解密。请在轮换密钥前执行以下步骤。
|
||||
|
||||
## 安全轮换步骤
|
||||
|
||||
### Step 1: 导出当前 API Key 明文(必须)
|
||||
|
||||
```bash
|
||||
# 使用旧密钥启动 sidecar,通过 admin API 导出
|
||||
curl -s -H "Authorization: Bearer <ADMIN_TOKEN>" \
|
||||
http://127.0.0.1:9190/api/admin/backends | \
|
||||
python3 -c "
|
||||
import json, sys
|
||||
data = json.load(sys.stdin)
|
||||
# 注意:api_key 是 masked 的,需要重新从安全渠道获取原始 key
|
||||
print(json.dumps(data, indent=2))
|
||||
"
|
||||
```
|
||||
|
||||
### Step 2: 停止服务
|
||||
|
||||
```bash
|
||||
systemctl stop sidecar-v2
|
||||
# 或
|
||||
docker compose down
|
||||
```
|
||||
|
||||
### Step 3: 备份数据库
|
||||
|
||||
```bash
|
||||
cp /app/data/sidecar_v2.db /app/data/backups/pre-rotation-$(date +%Y%m%d_%H%M%S).db
|
||||
```
|
||||
|
||||
### Step 4: 更新密钥
|
||||
|
||||
更新 `/etc/sidecar-v2/env` 或 docker `.env` 文件中的 `SIDECAR_ENCRYPTION_KEY`:
|
||||
|
||||
```
|
||||
SIDECAR_ENCRYPTION_KEY=<new_64_hex_char_key>
|
||||
```
|
||||
|
||||
生成新密钥:
|
||||
```bash
|
||||
python3 -c "import secrets; print(secrets.token_hex(32))"
|
||||
```
|
||||
|
||||
### Step 5: 清空加密 Key 并重新录入
|
||||
|
||||
由于密钥变更后旧加密数据不可读,需要:
|
||||
|
||||
1. 启动服务(此时所有旧 Provider 的 API Key 不可用)
|
||||
2. 通过 Admin API 重新录入所有 Provider 的 API Key:
|
||||
```bash
|
||||
curl -s -X PUT -H "Authorization: Bearer <ADMIN_TOKEN>" \
|
||||
-H "Content-Type: application/json" \
|
||||
-d '{"api_key": "<NEW_PLAIN_KEY>"}' \
|
||||
http://127.0.0.1:9190/api/admin/backends/<backend_id>
|
||||
```
|
||||
|
||||
### Step 6: 验证
|
||||
|
||||
```bash
|
||||
# 确认 Provider 状态为 healthy
|
||||
curl -s http://127.0.0.1:9190/api/admin/pools
|
||||
# 发送测试请求
|
||||
curl -s -X POST http://127.0.0.1:9190/v1/chat/completions \
|
||||
-H "Content-Type: application/json" \
|
||||
-d '{"model":"<model_name>","messages":[{"role":"user","content":"test"}],"max_tokens":5}'
|
||||
```
|
||||
|
||||
## 应急预案
|
||||
|
||||
如果在密钥轮换过程中出错:
|
||||
|
||||
1. 恢复旧密钥环境变量
|
||||
2. 恢复旧数据库备份
|
||||
3. 重启服务
|
||||
|
||||
旧 Key 会正常工作,因为未被覆盖的数据仍然用旧密钥加密。
|
||||
@@ -0,0 +1,56 @@
|
||||
# Sidecar V2 — Nginx reverse proxy config (reference)
|
||||
# Place at /etc/nginx/sites-available/sidecar-v2.conf
|
||||
# SSL certs managed by certbot or manually
|
||||
|
||||
upstream sidecar_v2_main {
|
||||
server 127.0.0.1:9190;
|
||||
}
|
||||
|
||||
upstream sidecar_v2_metrics {
|
||||
server 127.0.0.1:9191;
|
||||
}
|
||||
|
||||
server {
|
||||
listen 443 ssl http2;
|
||||
server_name sidecar.example.com;
|
||||
|
||||
ssl_certificate /etc/ssl/certs/sidecar.pem;
|
||||
ssl_certificate_key /etc/ssl/private/sidecar.key;
|
||||
|
||||
# Dashboard + Admin API (main port)
|
||||
location / {
|
||||
proxy_pass http://sidecar_v2_main;
|
||||
proxy_http_version 1.1;
|
||||
proxy_set_header Host $host;
|
||||
proxy_set_header X-Real-IP $remote_addr;
|
||||
proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for;
|
||||
proxy_set_header X-Forwarded-Proto $scheme;
|
||||
}
|
||||
|
||||
# SSE support for dashboard real-time data
|
||||
location /dashboard/sse {
|
||||
proxy_pass http://sidecar_v2_main;
|
||||
proxy_http_version 1.1;
|
||||
proxy_set_header Connection "";
|
||||
proxy_set_header Host $host;
|
||||
proxy_set_header X-Real-IP $remote_addr;
|
||||
proxy_buffering off;
|
||||
proxy_cache off;
|
||||
chunked_transfer_encoding off;
|
||||
proxy_read_timeout 86400s;
|
||||
}
|
||||
|
||||
# Prometheus metrics
|
||||
location /metrics {
|
||||
proxy_pass http://sidecar_v2_metrics;
|
||||
proxy_http_version 1.1;
|
||||
proxy_set_header Host $host;
|
||||
}
|
||||
|
||||
# Health check
|
||||
location /health {
|
||||
proxy_pass http://sidecar_v2_main;
|
||||
proxy_http_version 1.1;
|
||||
proxy_set_header Host $host;
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,23 @@
|
||||
[Unit]
|
||||
Description=Sidecar V2 — Multi-Pool Provider Proxy
|
||||
After=network.target
|
||||
|
||||
[Service]
|
||||
Type=simple
|
||||
User=openclaw
|
||||
Group=openclaw
|
||||
WorkingDirectory=/opt/sidecar-v2
|
||||
EnvironmentFile=/etc/sidecar-v2/env
|
||||
ExecStart=/opt/sidecar-v2/.venv/bin/python3 main.py
|
||||
Restart=always
|
||||
RestartSec=5
|
||||
|
||||
# Security hardening
|
||||
NoNewPrivileges=yes
|
||||
ProtectSystem=strict
|
||||
ProtectHome=yes
|
||||
ReadWritePaths=/opt/sidecar-v2/data
|
||||
PrivateTmp=yes
|
||||
|
||||
[Install]
|
||||
WantedBy=multi-user.target
|
||||
@@ -0,0 +1,26 @@
|
||||
# Sidecar V2 — Multi-Pool Provider Proxy
|
||||
version: "3.9"
|
||||
|
||||
services:
|
||||
sidecar-v2:
|
||||
build: .
|
||||
container_name: sidecar-v2
|
||||
restart: unless-stopped
|
||||
ports:
|
||||
- "9190:9190" # Main proxy + admin API + dashboard
|
||||
- "9191:9191" # Prometheus metrics
|
||||
environment:
|
||||
- SIDECAR_ENCRYPTION_KEY=${SIDECAR_ENCRYPTION_KEY}
|
||||
- SIDECAR_ADMIN_TOKEN=${SIDECAR_ADMIN_TOKEN:-change-me}
|
||||
- LOG_FORMAT=${LOG_FORMAT:-json}
|
||||
- SIDECAR_HOST=0.0.0.0
|
||||
- SIDECAR_PORT=9190
|
||||
- SIDECAR_METRICS_PORT=9191
|
||||
- SIDECAR_DB_PATH=/app/data/sidecar_v2.db
|
||||
- SIDECAR_BACKUP_DIR=/app/data/backups
|
||||
volumes:
|
||||
- sidecar-data:/app/data
|
||||
|
||||
volumes:
|
||||
sidecar-data:
|
||||
driver: local
|
||||
@@ -82,7 +82,9 @@ def build_response(resp: httpx.Response) -> Response:
|
||||
if k.lower() not in ("content-encoding", "transfer-encoding")
|
||||
}
|
||||
|
||||
if "text/event-stream" in content_type or "stream" in content_type:
|
||||
is_sse = "text/event-stream" in content_type
|
||||
is_chunked = resp.headers.get("transfer-encoding", "").lower() == "chunked"
|
||||
if is_sse or (is_chunked and headers.get("content-type", "") != "application/octet-stream"):
|
||||
return StreamingResponse(
|
||||
content=resp.aiter_bytes(),
|
||||
status_code=resp.status_code,
|
||||
@@ -176,7 +178,7 @@ async def handle_proxy_request(
|
||||
max_retries = config.max_pool_retries
|
||||
for attempt in range(max_retries):
|
||||
# Check and clear expired cooldowns before picking
|
||||
_refresh_cooldowns(pool_manager)
|
||||
_refresh_cooldowns()
|
||||
|
||||
backend = router.pick_backend(canonical_model)
|
||||
if backend is None:
|
||||
@@ -286,7 +288,7 @@ async def handle_proxy_request(
|
||||
)
|
||||
continue
|
||||
|
||||
# All backends exhausted — emergency rate-limited passthrough
|
||||
# All pools exhausted — emergency rate-limited passthrough
|
||||
emergency_rpm = int(config.default_rpm_limit * config.emergency_rpm_fraction)
|
||||
if emergency_rpm < 1:
|
||||
emergency_rpm = 1
|
||||
@@ -297,7 +299,60 @@ async def handle_proxy_request(
|
||||
emergency_rpm=emergency_rpm,
|
||||
)
|
||||
|
||||
# Emergency: just return a clear error telling OpenClaw to use its fallback
|
||||
# Emergency: try to get a token from any fallback backend at reduced RPM
|
||||
emergency_retries = 3
|
||||
for attempt in range(emergency_retries):
|
||||
backends = pool_manager.get_any_healthy_backends()
|
||||
for backend in backends:
|
||||
if rate_limiter.consume(backend.id, emergency_rpm):
|
||||
try:
|
||||
resp = await forward_to_backend(
|
||||
backend=backend,
|
||||
method=request.method,
|
||||
path=path,
|
||||
body=body_bytes if body_bytes else None,
|
||||
headers=raw_headers,
|
||||
stream=is_stream,
|
||||
)
|
||||
elapsed_ms = int((time.monotonic() - start_time) * 1000)
|
||||
|
||||
if resp.status_code == 429:
|
||||
start_cooldown(backend.id, backend.consecutive_429_count + 1)
|
||||
continue
|
||||
|
||||
# Success in emergency mode
|
||||
try:
|
||||
resp_json: dict[str, Any] = {}
|
||||
if not is_stream and resp.content:
|
||||
resp_json = json.loads(resp.content)
|
||||
except Exception:
|
||||
resp_json = {}
|
||||
|
||||
prompt_tokens, completion_tokens, total_tokens = extract_usage_from_response(
|
||||
resp, resp_json, canonical_model
|
||||
)
|
||||
cost_em = calculate_cost(backend, canonical_model, prompt_tokens, completion_tokens)
|
||||
|
||||
record_usage(
|
||||
backend_id=backend.id,
|
||||
model=canonical_model,
|
||||
prompt_tokens=prompt_tokens,
|
||||
completion_tokens=completion_tokens,
|
||||
cost=cost_em,
|
||||
latency_ms=elapsed_ms,
|
||||
)
|
||||
|
||||
logger.info(
|
||||
"emergency_passthrough_success",
|
||||
backend_id=backend.id,
|
||||
model=canonical_model,
|
||||
emergency_rpm=emergency_rpm,
|
||||
)
|
||||
return build_response(resp)
|
||||
except Exception:
|
||||
continue
|
||||
|
||||
# All emergency attempts failed — return 503 for OpenClaw fallback chain
|
||||
return build_error_response(
|
||||
503,
|
||||
"All provider pools exhausted. OpenClaw fallback chain should activate.",
|
||||
@@ -305,15 +360,11 @@ async def handle_proxy_request(
|
||||
)
|
||||
|
||||
|
||||
def _refresh_cooldowns(pool_manager: PoolManager) -> None:
|
||||
"""Check and clear expired cooldowns for all active backends."""
|
||||
for pool in ["primary", "fallback"]:
|
||||
backends = pool_manager.get_any_healthy_backends(pool=pool)
|
||||
for backend in backends:
|
||||
# Only check backends in non-healthy state
|
||||
pass
|
||||
def _refresh_cooldowns() -> None:
|
||||
"""Check and clear expired cooldowns for backends currently in cooling state.
|
||||
|
||||
# Actually check all backends including cooling ones
|
||||
Only queries backends with status='cooling' (the health_check_loop handles
|
||||
the periodic scanning; this is the on-demand refresh before proxy routing)."""
|
||||
from storage.backend_store import list_backends
|
||||
backends = list_backends(decrypt_key=False)
|
||||
for backend in backends:
|
||||
|
||||
@@ -0,0 +1,7 @@
|
||||
# Sidecar V2 — Multi-Pool Provider Proxy
|
||||
fastapi>=0.115.0,<1.0.0
|
||||
uvicorn[standard]>=0.30.0,<1.0.0
|
||||
httpx>=0.27.0,<1.0.0
|
||||
structlog>=24.0.0,<25.0.0
|
||||
cryptography>=42.0.0,<44.0.0
|
||||
prometheus_client>=0.20.0,<1.0.0
|
||||
@@ -1,24 +1,27 @@
|
||||
"""Sidecar V2 — FastAPI server with multi-pool routing, admin API, dashboard SSE."""
|
||||
|
||||
import asyncio
|
||||
import json
|
||||
import os
|
||||
import sys
|
||||
import time
|
||||
from collections.abc import AsyncGenerator
|
||||
from contextlib import asynccontextmanager
|
||||
from typing import Any
|
||||
from typing import Any, Optional
|
||||
|
||||
import structlog
|
||||
from fastapi import Depends, FastAPI, HTTPException, Request, Response
|
||||
from fastapi.responses import FileResponse, HTMLResponse, JSONResponse, StreamingResponse
|
||||
from fastapi.staticfiles import StaticFiles
|
||||
from fastapi.security import HTTPBearer, HTTPAuthorizationCredentials
|
||||
|
||||
from config import config as app_config
|
||||
from crypto import init_crypto
|
||||
from crypto import init_crypto, is_initialized
|
||||
from pool_manager import PoolManager
|
||||
from rate_limiter import PerBackendRateLimiter
|
||||
from router import Router
|
||||
from proxy import handle_proxy_request
|
||||
|
||||
from storage.db import init_db, create_tables, run_integrity_check
|
||||
from storage.db import init_db, create_tables, run_integrity_check, get_connection, _DB_PATH
|
||||
from storage.backend_store import (
|
||||
create_backend, get_backend, list_backends, update_backend,
|
||||
delete_backend, get_pool_stats,
|
||||
@@ -28,7 +31,11 @@ from storage.cooldown_store import get_cooldown_history
|
||||
from storage.config_store import get_config, set_config, list_configs, delete_config
|
||||
from storage.models import Backend, ModelMapping
|
||||
|
||||
import os
|
||||
|
||||
# ──────────────────────────────────────────────────────────
|
||||
# Logging
|
||||
# ──────────────────────────────────────────────────────────
|
||||
_LOG_FORMAT = os.getenv("LOG_FORMAT", "console").lower()
|
||||
|
||||
structlog.configure(
|
||||
processors=[
|
||||
@@ -40,7 +47,11 @@ structlog.configure(
|
||||
structlog.processors.StackInfoRenderer(),
|
||||
structlog.processors.format_exc_info,
|
||||
structlog.processors.UnicodeDecoder(),
|
||||
structlog.dev.ConsoleRenderer(),
|
||||
(
|
||||
structlog.processors.JSONRenderer()
|
||||
if _LOG_FORMAT == "json"
|
||||
else structlog.dev.ConsoleRenderer()
|
||||
),
|
||||
],
|
||||
context_class=dict,
|
||||
logger_factory=structlog.stdlib.LoggerFactory(),
|
||||
@@ -50,38 +61,109 @@ structlog.configure(
|
||||
logger: structlog.stdlib.BoundLogger = structlog.get_logger("sidecar_v2.server")
|
||||
|
||||
|
||||
# ──────────────────────────────────────
|
||||
# ──────────────────────────────────────────────────────────
|
||||
# Admin Auth middleware
|
||||
# ──────────────────────────────────────────────────────────
|
||||
_security = HTTPBearer(auto_error=False)
|
||||
|
||||
|
||||
def verify_admin_token(
|
||||
credentials: Optional[HTTPAuthorizationCredentials] = Depends(_security),
|
||||
) -> bool:
|
||||
"""Verify Bearer Token against config.admin_token.
|
||||
|
||||
If admin_token is empty, write operations are rejected.
|
||||
READ operations are allowed without auth for dashboard use.
|
||||
"""
|
||||
if not app_config.admin_token:
|
||||
# No token configured — allow read, reject write (checked per-endpoint)
|
||||
if credentials is None:
|
||||
return False
|
||||
return False
|
||||
|
||||
if credentials is None:
|
||||
return False
|
||||
|
||||
return credentials.credentials == app_config.admin_token
|
||||
|
||||
|
||||
def require_admin(credentials: Optional[HTTPAuthorizationCredentials] = Depends(_security)):
|
||||
"""Require admin auth — raise 401 if not authorized."""
|
||||
if not app_config.admin_token:
|
||||
raise HTTPException(
|
||||
status_code=401,
|
||||
detail="Admin API not configured: set SIDECAR_ADMIN_TOKEN",
|
||||
)
|
||||
if credentials is None:
|
||||
raise HTTPException(
|
||||
status_code=401,
|
||||
detail="Missing Authorization header",
|
||||
headers={"WWW-Authenticate": "Bearer"},
|
||||
)
|
||||
if credentials.credentials != app_config.admin_token:
|
||||
raise HTTPException(
|
||||
status_code=401,
|
||||
detail="Invalid admin token",
|
||||
)
|
||||
|
||||
|
||||
# ──────────────────────────────────────────────────────────
|
||||
# Global runtime state
|
||||
# ──────────────────────────────────────
|
||||
pool_manager: PoolManager | None = None
|
||||
rate_limiter: PerBackendRateLimiter | None = None
|
||||
router: Router | None = None
|
||||
# ──────────────────────────────────────────────────────────
|
||||
pool_manager: Optional[PoolManager] = None
|
||||
rate_limiter: Optional[PerBackendRateLimiter] = None
|
||||
router: Optional[Router] = None
|
||||
start_time: float = 0.0
|
||||
|
||||
# In-memory metrics counters
|
||||
_metrics_counters: dict[str, int] = {}
|
||||
_metrics_lock = asyncio.Lock()
|
||||
|
||||
|
||||
def _inc_metric(key: str, delta: int = 1) -> None:
|
||||
"""Thread-safe counter increment (deferred via asyncio)."""
|
||||
_metrics_counters[key] = _metrics_counters.get(key, 0) + delta
|
||||
|
||||
|
||||
def get_pm() -> PoolManager:
|
||||
assert pool_manager is not None
|
||||
return pool_manager
|
||||
|
||||
|
||||
def get_rl() -> PerBackendRateLimiter:
|
||||
assert rate_limiter is not None
|
||||
return rate_limiter
|
||||
|
||||
|
||||
def get_router() -> Router:
|
||||
assert router is not None
|
||||
return router
|
||||
|
||||
|
||||
# ──────────────────────────────────────
|
||||
# ──────────────────────────────────────────────────────────
|
||||
# Lifespan
|
||||
# ──────────────────────────────────────
|
||||
# ──────────────────────────────────────────────────────────
|
||||
@asynccontextmanager
|
||||
async def lifespan(app: FastAPI) -> AsyncGenerator[None, Any]:
|
||||
global pool_manager, rate_limiter, router, start_time
|
||||
|
||||
# Init crypto
|
||||
if app_config.encryption_key:
|
||||
init_crypto(app_config.encryption_key)
|
||||
# P0: Encryption key is mandatory — refuse to start without it
|
||||
if not app_config.encryption_key:
|
||||
logger.critical(
|
||||
"missing_encryption_key",
|
||||
hint="Set SIDECAR_ENCRYPTION_KEY (64 hex chars). Refusing to start."
|
||||
)
|
||||
sys.exit(1)
|
||||
|
||||
init_crypto(app_config.encryption_key)
|
||||
logger.info("crypto_initialized")
|
||||
|
||||
# P0: Warn if admin_token not set
|
||||
if not app_config.admin_token:
|
||||
logger.warning(
|
||||
"admin_token_not_set",
|
||||
hint="Admin write endpoints disabled until SIDECAR_ADMIN_TOKEN is configured."
|
||||
)
|
||||
|
||||
# Init DB
|
||||
init_db()
|
||||
@@ -101,6 +183,7 @@ async def lifespan(app: FastAPI) -> AsyncGenerator[None, Any]:
|
||||
# Start background tasks
|
||||
health_task = asyncio.create_task(_health_check_loop())
|
||||
stats_task = asyncio.create_task(_stats_aggregation_loop())
|
||||
backup_task = asyncio.create_task(_backup_loop())
|
||||
|
||||
logger.info(
|
||||
"sidecar_v2_started",
|
||||
@@ -112,16 +195,12 @@ async def lifespan(app: FastAPI) -> AsyncGenerator[None, Any]:
|
||||
try:
|
||||
yield
|
||||
finally:
|
||||
health_task.cancel()
|
||||
stats_task.cancel()
|
||||
try:
|
||||
await health_task
|
||||
except asyncio.CancelledError:
|
||||
pass
|
||||
try:
|
||||
await stats_task
|
||||
except asyncio.CancelledError:
|
||||
pass
|
||||
for task in [health_task, stats_task, backup_task]:
|
||||
task.cancel()
|
||||
try:
|
||||
await task
|
||||
except asyncio.CancelledError:
|
||||
pass
|
||||
logger.info("sidecar_v2_stopped")
|
||||
|
||||
|
||||
@@ -132,21 +211,86 @@ app = FastAPI(
|
||||
)
|
||||
|
||||
|
||||
# ──────────────────────────────────────
|
||||
# ──────────────────────────────────────────────────────────
|
||||
# Background tasks
|
||||
# ──────────────────────────────────────
|
||||
# ──────────────────────────────────────────────────────────
|
||||
|
||||
async def _health_check_loop() -> None:
|
||||
"""Periodically check and clear expired cooldowns."""
|
||||
"""Periodic health checks: clear expired cooldowns + active probing of backends."""
|
||||
from cooldown_manager import check_and_clear_cooldown
|
||||
from storage.backend_store import list_backends as lb
|
||||
import httpx
|
||||
|
||||
while True:
|
||||
try:
|
||||
backends = lb(decrypt_key=False)
|
||||
backends = list_backends(decrypt_key=True)
|
||||
for b in backends:
|
||||
# 1. Clear expired cooldowns
|
||||
if b.status == "cooling":
|
||||
check_and_clear_cooldown(b.id)
|
||||
|
||||
# 2. Active health probing for healthy/enabled backends
|
||||
if b.status == "healthy" and b.enabled:
|
||||
try:
|
||||
async with httpx.AsyncClient(timeout=httpx.Timeout(
|
||||
app_config.health_check_timeout_seconds
|
||||
)) as client:
|
||||
probe_url = b.api_base_url.rstrip("/") + app_config.health_probe_endpoint
|
||||
headers = {}
|
||||
if b.api_key_plain:
|
||||
headers["Authorization"] = f"Bearer {b.api_key_plain}"
|
||||
|
||||
start = time.monotonic()
|
||||
resp = await client.get(probe_url, headers=headers)
|
||||
elapsed_ms = int((time.monotonic() - start) * 1000)
|
||||
|
||||
# Update health state in DB
|
||||
from storage.db import get_connection as _gc
|
||||
with _gc() as conn:
|
||||
conn.execute(
|
||||
"""INSERT INTO backend_health
|
||||
(backend_id, state, last_latency_ms, last_status_code,
|
||||
last_check_at)
|
||||
VALUES (?, 'healthy', ?, ?, datetime('now'))
|
||||
ON CONFLICT(backend_id) DO UPDATE SET
|
||||
state = excluded.state,
|
||||
last_latency_ms = excluded.last_latency_ms,
|
||||
last_status_code = excluded.last_status_code,
|
||||
last_check_at = excluded.last_check_at""",
|
||||
(b.id, elapsed_ms, resp.status_code),
|
||||
)
|
||||
conn.commit()
|
||||
|
||||
logger.debug(
|
||||
"health_probe_ok",
|
||||
backend_id=b.id,
|
||||
status=resp.status_code,
|
||||
latency_ms=elapsed_ms,
|
||||
)
|
||||
except Exception as probe_err:
|
||||
logger.warning(
|
||||
"health_probe_failed",
|
||||
backend_id=b.id,
|
||||
error=str(probe_err),
|
||||
)
|
||||
# Mark as degraded
|
||||
from storage.db import get_connection as _gc
|
||||
with _gc() as conn:
|
||||
conn.execute(
|
||||
"""INSERT INTO backend_health
|
||||
(backend_id, state, last_check_at)
|
||||
VALUES (?, 'degraded', datetime('now'))
|
||||
ON CONFLICT(backend_id) DO UPDATE SET
|
||||
state = 'degraded',
|
||||
last_check_at = excluded.last_check_at""",
|
||||
(b.id,),
|
||||
)
|
||||
conn.execute(
|
||||
"""UPDATE backend_health SET
|
||||
consecutive_failures = consecutive_failures + 1
|
||||
WHERE backend_id = ?""",
|
||||
(b.id,),
|
||||
)
|
||||
conn.commit()
|
||||
except Exception:
|
||||
logger.exception("health_check_error")
|
||||
await asyncio.sleep(app_config.health_check_interval_seconds)
|
||||
@@ -163,9 +307,50 @@ async def _stats_aggregation_loop() -> None:
|
||||
await asyncio.sleep(app_config.stats_refresh_interval_seconds)
|
||||
|
||||
|
||||
# ──────────────────────────────────────
|
||||
async def _backup_loop() -> None:
|
||||
"""Daily SQLite backup with retention."""
|
||||
import shutil
|
||||
|
||||
while True:
|
||||
try:
|
||||
await asyncio.sleep(86400) # 24 hours
|
||||
backup_dir = app_config.backup_dir
|
||||
if not backup_dir:
|
||||
continue
|
||||
|
||||
os.makedirs(backup_dir, exist_ok=True)
|
||||
|
||||
backup_name = f"sidecar_v2_{time.strftime('%Y%m%d_%H%M%S', time.gmtime())}.db"
|
||||
backup_path = os.path.join(backup_dir, backup_name)
|
||||
|
||||
from storage.db import _DB_PATH as db_path
|
||||
import sqlite3
|
||||
|
||||
source = sqlite3.connect(db_path)
|
||||
dest = sqlite3.connect(backup_path)
|
||||
source.backup(dest)
|
||||
dest.close()
|
||||
source.close()
|
||||
|
||||
logger.info("db_backup_created", path=backup_path)
|
||||
|
||||
# Retention: remove old backups
|
||||
retention_days = app_config.backup_retention_days
|
||||
cutoff = time.time() - retention_days * 86400
|
||||
for fname in os.listdir(backup_dir):
|
||||
if fname.startswith("sidecar_v2_") and fname.endswith(".db"):
|
||||
fpath = os.path.join(backup_dir, fname)
|
||||
if os.path.getmtime(fpath) < cutoff:
|
||||
os.remove(fpath)
|
||||
logger.info("db_backup_retired", path=fpath)
|
||||
|
||||
except Exception:
|
||||
logger.exception("backup_error")
|
||||
|
||||
|
||||
# ──────────────────────────────────────────────────────────
|
||||
# Health / Metrics
|
||||
# ──────────────────────────────────────
|
||||
# ──────────────────────────────────────────────────────────
|
||||
@app.get("/health")
|
||||
async def health() -> dict[str, Any]:
|
||||
return {
|
||||
@@ -175,9 +360,54 @@ async def health() -> dict[str, Any]:
|
||||
}
|
||||
|
||||
|
||||
# ──────────────────────────────────────
|
||||
@app.get("/metrics")
|
||||
async def metrics() -> Response:
|
||||
"""Prometheus-compatible metrics endpoint."""
|
||||
lines = []
|
||||
|
||||
# Pool provider counts
|
||||
pool_status = pool_manager.get_pool_status()
|
||||
for pool_name, stats in pool_status.items():
|
||||
for key, val in stats.items():
|
||||
lines.append(
|
||||
f"sidecar_pool_providers{{pool=\"{pool_name}\",type=\"{key}\"}} {val}"
|
||||
)
|
||||
|
||||
# Cooldown status
|
||||
all_backends = list_backends(decrypt_key=False)
|
||||
cooling_count = sum(1 for b in all_backends if b.status == "cooling")
|
||||
lines.append(f"sidecar_cooldown_active {cooling_count}")
|
||||
|
||||
# Emergency count
|
||||
emergency_count = _metrics_counters.get("emergency_count", 0)
|
||||
lines.append(f"sidecar_emergency_count {emergency_count}")
|
||||
|
||||
# DB sizes
|
||||
from storage.db import get_db_sizes
|
||||
sizes = get_db_sizes()
|
||||
lines.append(f"sidecar_db_size_bytes {sizes.get('db_bytes', 0)}")
|
||||
lines.append(f"sidecar_wal_size_bytes {sizes.get('wal_bytes', 0)}")
|
||||
|
||||
# Total stats
|
||||
total = get_total_stats()
|
||||
lines.append(f"sidecar_requests_total {total.get('total_requests', 0) or 0}")
|
||||
lines.append(f"sidecar_errors_total {total.get('total_errors', 0) or 0}")
|
||||
lines.append(f"sidecar_tokens_total {total.get('total_tokens', 0) or 0}")
|
||||
cost = total.get('total_cost', 0) or 0.0
|
||||
lines.append(f"sidecar_cost_total {cost}")
|
||||
|
||||
# Uptime
|
||||
lines.append(f"sidecar_uptime_seconds {int(time.time() - start_time)}")
|
||||
|
||||
return Response(
|
||||
content="\n".join(lines) + "\n",
|
||||
media_type="text/plain; charset=utf-8",
|
||||
)
|
||||
|
||||
|
||||
# ──────────────────────────────────────────────────────────
|
||||
# Dashboard SSE
|
||||
# ──────────────────────────────────────
|
||||
# ──────────────────────────────────────────────────────────
|
||||
@app.get("/dashboard/sse")
|
||||
async def dashboard_sse() -> StreamingResponse:
|
||||
"""SSE endpoint for real-time dashboard data."""
|
||||
@@ -214,7 +444,7 @@ async def dashboard_sse() -> StreamingResponse:
|
||||
"uptime_seconds": int(time.time() - start_time),
|
||||
"timestamp": time.time(),
|
||||
}
|
||||
yield f"data: {__import__('json').dumps(snapshot)}\n\n"
|
||||
yield f"data: {json.dumps(snapshot)}\n\n"
|
||||
except Exception:
|
||||
logger.exception("sse_error")
|
||||
|
||||
@@ -231,20 +461,20 @@ async def dashboard_sse() -> StreamingResponse:
|
||||
)
|
||||
|
||||
|
||||
# ──────────────────────────────────────
|
||||
# Admin: Backend CRUD
|
||||
# ──────────────────────────────────────
|
||||
# ──────────────────────────────────────────────────────────
|
||||
# Admin: Backend CRUD (READ: public, WRITE: auth required)
|
||||
# ──────────────────────────────────────────────────────────
|
||||
|
||||
@app.get("/api/admin/backends")
|
||||
async def admin_list_backends(pool: str | None = None) -> list[dict]:
|
||||
"""List all backends with masked keys."""
|
||||
async def admin_list_backends(pool: Optional[str] = None) -> list[dict]:
|
||||
"""List all backends with masked keys (public read)."""
|
||||
backends = list_backends(pool=pool, decrypt_key=True)
|
||||
return [b.to_dict(mask_key=True) for b in backends]
|
||||
|
||||
|
||||
@app.get("/api/admin/backends/{backend_id}")
|
||||
async def admin_get_backend(backend_id: str) -> dict:
|
||||
"""Get a single backend (key masked)."""
|
||||
"""Get a single backend (public read, key masked)."""
|
||||
b = get_backend(backend_id, decrypt_key=True)
|
||||
if b is None:
|
||||
raise HTTPException(404, "Backend not found")
|
||||
@@ -252,8 +482,11 @@ async def admin_get_backend(backend_id: str) -> dict:
|
||||
|
||||
|
||||
@app.post("/api/admin/backends")
|
||||
async def admin_create_backend(body: dict[str, Any]) -> dict:
|
||||
"""Create a new backend."""
|
||||
async def admin_create_backend(
|
||||
body: dict[str, Any],
|
||||
_auth=Depends(require_admin),
|
||||
) -> dict:
|
||||
"""Create a new backend (auth required)."""
|
||||
required = ["name", "api_base_url", "api_key"]
|
||||
for field in required:
|
||||
if field not in body:
|
||||
@@ -283,18 +516,20 @@ async def admin_create_backend(body: dict[str, Any]) -> dict:
|
||||
|
||||
|
||||
@app.put("/api/admin/backends/{backend_id}")
|
||||
async def admin_update_backend(backend_id: str, body: dict[str, Any]) -> dict:
|
||||
"""Update a backend."""
|
||||
async def admin_update_backend(
|
||||
backend_id: str,
|
||||
body: dict[str, Any],
|
||||
_auth=Depends(require_admin),
|
||||
) -> dict:
|
||||
"""Update a backend (auth required)."""
|
||||
updates = dict(body)
|
||||
|
||||
# Handle model_mappings
|
||||
if "model_mappings" in updates:
|
||||
raw = updates["model_mappings"]
|
||||
updates["model_mappings"] = {
|
||||
k: ModelMapping.from_dict(v) for k, v in raw.items()
|
||||
}
|
||||
|
||||
# Handle api_key
|
||||
if "api_key" in updates:
|
||||
updates["api_key_plain"] = updates.pop("api_key")
|
||||
|
||||
@@ -305,40 +540,40 @@ async def admin_update_backend(backend_id: str, body: dict[str, Any]) -> dict:
|
||||
|
||||
|
||||
@app.delete("/api/admin/backends/{backend_id}")
|
||||
async def admin_delete_backend(backend_id: str) -> dict:
|
||||
"""Delete a backend."""
|
||||
async def admin_delete_backend(
|
||||
backend_id: str,
|
||||
_auth=Depends(require_admin),
|
||||
) -> dict:
|
||||
"""Delete a backend (auth required)."""
|
||||
ok = delete_backend(backend_id)
|
||||
if not ok:
|
||||
raise HTTPException(404, "Backend not found")
|
||||
return {"status": "deleted", "id": backend_id}
|
||||
|
||||
|
||||
# ──────────────────────────────────────
|
||||
# Admin: Pool Status
|
||||
# ──────────────────────────────────────
|
||||
# ──────────────────────────────────────────────────────────
|
||||
# Admin: Pool Status (public read)
|
||||
# ──────────────────────────────────────────────────────────
|
||||
|
||||
@app.get("/api/admin/pools")
|
||||
async def admin_pool_status() -> dict:
|
||||
"""Get pool summary."""
|
||||
return pool_manager.get_pool_status()
|
||||
|
||||
|
||||
# ──────────────────────────────────────
|
||||
# Admin: Usage / Stats
|
||||
# ──────────────────────────────────────
|
||||
# ──────────────────────────────────────────────────────────
|
||||
# Admin: Usage / Stats (public read)
|
||||
# ──────────────────────────────────────────────────────────
|
||||
|
||||
@app.get("/api/admin/stats/total")
|
||||
async def admin_total_stats() -> dict:
|
||||
"""Get aggregate usage stats."""
|
||||
return get_total_stats()
|
||||
|
||||
|
||||
@app.get("/api/admin/stats/hourly")
|
||||
async def admin_hourly_usage(
|
||||
backend_id: str | None = None,
|
||||
backend_id: Optional[str] = None,
|
||||
hours: int = 168,
|
||||
) -> list[dict]:
|
||||
"""Get hourly usage data."""
|
||||
since = None
|
||||
if hours > 0:
|
||||
since = time.strftime(
|
||||
@@ -350,32 +585,28 @@ async def admin_hourly_usage(
|
||||
|
||||
@app.get("/api/admin/stats/daily")
|
||||
async def admin_daily_stats(days: int = 30) -> list[dict]:
|
||||
"""Get daily aggregated stats."""
|
||||
return get_daily_stats(days=days)
|
||||
|
||||
|
||||
@app.get("/api/admin/stats/cooldown")
|
||||
async def admin_cooldown_history(
|
||||
backend_id: str | None = None,
|
||||
backend_id: Optional[str] = None,
|
||||
limit: int = 50,
|
||||
) -> list[dict]:
|
||||
"""Get cooldown event history."""
|
||||
return get_cooldown_history(backend_id=backend_id, limit=limit)
|
||||
|
||||
|
||||
# ──────────────────────────────────────
|
||||
# Admin: System Config
|
||||
# ──────────────────────────────────────
|
||||
# ──────────────────────────────────────────────────────────
|
||||
# Admin: System Config (read public, write auth required)
|
||||
# ──────────────────────────────────────────────────────────
|
||||
|
||||
@app.get("/api/admin/config")
|
||||
async def admin_get_all_config() -> list[dict]:
|
||||
"""List all system config entries."""
|
||||
return list_configs()
|
||||
|
||||
|
||||
@app.get("/api/admin/config/{key}")
|
||||
async def admin_get_config(key: str) -> dict:
|
||||
"""Get a single config value."""
|
||||
value = get_config(key)
|
||||
if value is None:
|
||||
raise HTTPException(404, "Config not found")
|
||||
@@ -383,8 +614,11 @@ async def admin_get_config(key: str) -> dict:
|
||||
|
||||
|
||||
@app.put("/api/admin/config/{key}")
|
||||
async def admin_set_config(key: str, body: dict[str, Any]) -> dict:
|
||||
"""Set a config value."""
|
||||
async def admin_set_config(
|
||||
key: str,
|
||||
body: dict[str, Any],
|
||||
_auth=Depends(require_admin),
|
||||
) -> dict:
|
||||
value = str(body.get("value", ""))
|
||||
description = str(body.get("description", ""))
|
||||
set_config(key, value, description)
|
||||
@@ -392,21 +626,22 @@ async def admin_set_config(key: str, body: dict[str, Any]) -> dict:
|
||||
|
||||
|
||||
@app.delete("/api/admin/config/{key}")
|
||||
async def admin_delete_config(key: str) -> dict:
|
||||
"""Delete a config entry."""
|
||||
async def admin_delete_config(
|
||||
key: str,
|
||||
_auth=Depends(require_admin),
|
||||
) -> dict:
|
||||
ok = delete_config(key)
|
||||
if not ok:
|
||||
raise HTTPException(404, "Config not found")
|
||||
return {"status": "deleted", "key": key}
|
||||
|
||||
|
||||
# ──────────────────────────────────────
|
||||
# Dashboard HTML
|
||||
# ──────────────────────────────────────
|
||||
# ──────────────────────────────────────────────────────────
|
||||
# Dashboard HTML (public, but respects admin_token for writes in JS)
|
||||
# ──────────────────────────────────────────────────────────
|
||||
|
||||
@app.get("/dashboard")
|
||||
async def dashboard_html() -> HTMLResponse:
|
||||
"""Serve the dashboard WebUI."""
|
||||
dashboard_path = os.path.join(
|
||||
os.path.dirname(__file__), "dashboard.html"
|
||||
)
|
||||
@@ -416,12 +651,13 @@ async def dashboard_html() -> HTMLResponse:
|
||||
return HTMLResponse("<h1>Dashboard not found</h1>", status_code=404)
|
||||
|
||||
|
||||
# ──────────────────────────────────────
|
||||
# ──────────────────────────────────────────────────────────
|
||||
# Proxy Endpoints
|
||||
# ──────────────────────────────────────
|
||||
# ──────────────────────────────────────────────────────────
|
||||
|
||||
@app.post("/v1/chat/completions")
|
||||
async def chat_completions(request: Request) -> Response:
|
||||
_inc_metric("proxy_requests_total")
|
||||
return await handle_proxy_request(
|
||||
pool_manager, rate_limiter, router, request, "/v1/chat/completions"
|
||||
)
|
||||
@@ -443,7 +679,7 @@ async def embeddings(request: Request) -> Response:
|
||||
|
||||
@app.get("/v1/models")
|
||||
@app.get("/v1/models/{model_id:path}")
|
||||
async def list_models(request: Request, model_id: str | None = None) -> Response:
|
||||
async def list_models(request: Request, model_id: Optional[str] = None) -> Response:
|
||||
path = f"/v1/models/{model_id}" if model_id else "/v1/models"
|
||||
return await handle_proxy_request(
|
||||
pool_manager, rate_limiter, router, request, path
|
||||
@@ -458,9 +694,9 @@ async def catch_all(request: Request, path: str) -> Response:
|
||||
)
|
||||
|
||||
|
||||
# ──────────────────────────────────────
|
||||
# ──────────────────────────────────────────────────────────
|
||||
# Main
|
||||
# ──────────────────────────────────────
|
||||
# ──────────────────────────────────────────────────────────
|
||||
|
||||
def main() -> None:
|
||||
import uvicorn
|
||||
|
||||
Reference in New Issue
Block a user