feat: Sidecar V2 — multi-pool provider proxy with 429 cooldown
- proxy.py: Fix route path duplication (v1/v1 → v1) when upstream base URL already includes /v1 prefix - proxy.py: Fix _emergency_count global variable for metrics tracking - server.py: Add logging.basicConfig(level=logging.INFO) for structlog INFO-level log visibility - Full multi-pool routing: primary → fallback → emergency passthrough - Per-backend rate limiting with RPM-based token bucket - 429 cooldown mechanism with automatic recovery - Dashboard with SSE real-time monitoring - Admin API for backend/pool/config management - SQLite-backed persistence with encrypted API key storage - Docker compose deployment Deployed by opengineer 严维序 as BIZ-50 Step 4
This commit is contained in:
@@ -0,0 +1 @@
|
||||
# Sidecar V2 storage module
|
||||
@@ -0,0 +1,252 @@
|
||||
"""CRUD operations for Backend (provider) management."""
|
||||
|
||||
import json
|
||||
import time
|
||||
from typing import Optional
|
||||
|
||||
from storage.db import get_connection, generate_id
|
||||
from storage.models import Backend, ModelMapping
|
||||
from crypto import encrypt, decrypt
|
||||
|
||||
|
||||
def create_backend(backend: Backend) -> Backend:
|
||||
"""Create a new backend. Encrypts API key before storage."""
|
||||
if not backend.id:
|
||||
backend.id = generate_id("bkd")
|
||||
|
||||
now = time.strftime("%Y-%m-%dT%H:%M:%SZ", time.gmtime())
|
||||
backend.created_at = now
|
||||
backend.updated_at = now
|
||||
|
||||
api_key_encrypted = encrypt(backend.api_key_plain)
|
||||
|
||||
with get_connection() as conn:
|
||||
conn.execute(
|
||||
"""INSERT INTO backends (id, name, label, api_base_url, api_key_encrypted,
|
||||
api, timeout_seconds, rpm_limit, pool, enabled, status, model_mappings_json,
|
||||
source, cooldown_until, consecutive_429_count, metadata_json, created_at, updated_at)
|
||||
VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)""",
|
||||
(
|
||||
backend.id, backend.name, backend.label, backend.api_base_url,
|
||||
api_key_encrypted, backend.api, backend.timeout_seconds,
|
||||
backend.rpm_limit, backend.pool, 1 if backend.enabled else 0,
|
||||
backend.status, json.dumps(_mappings_to_dict(backend.model_mappings)),
|
||||
backend.source, backend.cooldown_until,
|
||||
backend.consecutive_429_count,
|
||||
json.dumps(backend.metadata), backend.created_at, backend.updated_at,
|
||||
),
|
||||
)
|
||||
conn.commit()
|
||||
|
||||
return backend
|
||||
|
||||
|
||||
def get_backend(backend_id: str, decrypt_key: bool = True) -> Optional[Backend]:
|
||||
"""Get a single backend by ID."""
|
||||
with get_connection() as conn:
|
||||
row = conn.execute(
|
||||
"SELECT * FROM backends WHERE id = ?", (backend_id,)
|
||||
).fetchone()
|
||||
|
||||
if row is None:
|
||||
return None
|
||||
|
||||
return _row_to_backend(row, decrypt_key=decrypt_key)
|
||||
|
||||
|
||||
def list_backends(
|
||||
pool: Optional[str] = None,
|
||||
enabled_only: bool = False,
|
||||
decrypt_key: bool = False,
|
||||
) -> list[Backend]:
|
||||
"""List backends, optionally filtered by pool."""
|
||||
with get_connection() as conn:
|
||||
if pool:
|
||||
rows = conn.execute(
|
||||
"SELECT * FROM backends WHERE pool = ? ORDER BY created_at",
|
||||
(pool,),
|
||||
).fetchall()
|
||||
else:
|
||||
rows = conn.execute(
|
||||
"SELECT * FROM backends ORDER BY pool, created_at"
|
||||
).fetchall()
|
||||
|
||||
backends = [_row_to_backend(r, decrypt_key=decrypt_key) for r in rows]
|
||||
if enabled_only:
|
||||
backends = [b for b in backends if b.enabled]
|
||||
return backends
|
||||
|
||||
|
||||
def update_backend(backend_id: str, updates: dict) -> Optional[Backend]:
|
||||
"""Update backend fields. If api_key_plain is provided, re-encrypt."""
|
||||
current = get_backend(backend_id, decrypt_key=True)
|
||||
if current is None:
|
||||
return None
|
||||
|
||||
# Apply updates
|
||||
allowed = {
|
||||
"name", "label", "api_base_url", "api", "timeout_seconds",
|
||||
"rpm_limit", "pool", "enabled", "status", "source",
|
||||
"cooldown_until", "consecutive_429_count", "metadata",
|
||||
}
|
||||
for key, value in updates.items():
|
||||
if key in allowed:
|
||||
setattr(current, key, value)
|
||||
|
||||
current.updated_at = time.strftime("%Y-%m-%dT%H:%M:%SZ", time.gmtime())
|
||||
|
||||
# Handle API key update
|
||||
api_key_encrypted = None
|
||||
if "api_key_plain" in updates and updates["api_key_plain"]:
|
||||
current.api_key_plain = updates["api_key_plain"]
|
||||
api_key_encrypted = encrypt(updates["api_key_plain"])
|
||||
|
||||
# Handle model_mappings update
|
||||
mappings_json = None
|
||||
if "model_mappings" in updates:
|
||||
current.model_mappings = updates["model_mappings"]
|
||||
mappings_json = json.dumps(_mappings_to_dict(current.model_mappings))
|
||||
|
||||
with get_connection() as conn:
|
||||
# Build dynamic UPDATE
|
||||
set_clauses = [
|
||||
"name = ?", "label = ?", "api_base_url = ?", "api = ?",
|
||||
"timeout_seconds = ?", "rpm_limit = ?", "pool = ?", "enabled = ?",
|
||||
"status = ?", "source = ?", "cooldown_until = ?",
|
||||
"consecutive_429_count = ?", "metadata_json = ?", "updated_at = ?",
|
||||
]
|
||||
params = [
|
||||
current.name, current.label, current.api_base_url, current.api,
|
||||
current.timeout_seconds, current.rpm_limit, current.pool,
|
||||
1 if current.enabled else 0, current.status, current.source,
|
||||
current.cooldown_until, current.consecutive_429_count,
|
||||
json.dumps(current.metadata), current.updated_at,
|
||||
]
|
||||
if api_key_encrypted:
|
||||
set_clauses.append("api_key_encrypted = ?")
|
||||
params.append(api_key_encrypted)
|
||||
if mappings_json is not None:
|
||||
set_clauses.append("model_mappings_json = ?")
|
||||
params.append(mappings_json)
|
||||
params.append(backend_id)
|
||||
|
||||
conn.execute(
|
||||
f"UPDATE backends SET {', '.join(set_clauses)} WHERE id = ?",
|
||||
params,
|
||||
)
|
||||
conn.commit()
|
||||
|
||||
return get_backend(backend_id, decrypt_key=False)
|
||||
|
||||
|
||||
def delete_backend(backend_id: str) -> bool:
|
||||
"""Delete a backend. Returns True if deleted."""
|
||||
with get_connection() as conn:
|
||||
cursor = conn.execute("DELETE FROM backends WHERE id = ?", (backend_id,))
|
||||
conn.commit()
|
||||
return cursor.rowcount > 0
|
||||
|
||||
|
||||
def set_backend_status(backend_id: str, status: str) -> bool:
|
||||
"""Quickly set backend status (healthy/cooling/error/disabled)."""
|
||||
now = time.strftime("%Y-%m-%dT%H:%M:%SZ", time.gmtime())
|
||||
with get_connection() as conn:
|
||||
cursor = conn.execute(
|
||||
"UPDATE backends SET status = ?, updated_at = ? WHERE id = ?",
|
||||
(status, now, backend_id),
|
||||
)
|
||||
conn.commit()
|
||||
return cursor.rowcount > 0
|
||||
|
||||
|
||||
def set_backend_cooldown(backend_id: str, cooldown_until: str, count: int) -> bool:
|
||||
"""Set cooldown state on a backend."""
|
||||
now = time.strftime("%Y-%m-%dT%H:%M:%SZ", time.gmtime())
|
||||
with get_connection() as conn:
|
||||
cursor = conn.execute(
|
||||
"""UPDATE backends SET status = 'cooling', cooldown_until = ?,
|
||||
consecutive_429_count = ?, updated_at = ? WHERE id = ?""",
|
||||
(cooldown_until, count, now, backend_id),
|
||||
)
|
||||
conn.commit()
|
||||
return cursor.rowcount > 0
|
||||
|
||||
|
||||
def clear_backend_cooldown(backend_id: str) -> bool:
|
||||
"""Clear cooldown (back to healthy)."""
|
||||
now = time.strftime("%Y-%m-%dT%H:%M:%SZ", time.gmtime())
|
||||
with get_connection() as conn:
|
||||
cursor = conn.execute(
|
||||
"""UPDATE backends SET status = 'healthy', cooldown_until = NULL,
|
||||
consecutive_429_count = 0, updated_at = ? WHERE id = ?""",
|
||||
(now, backend_id),
|
||||
)
|
||||
conn.commit()
|
||||
return cursor.rowcount > 0
|
||||
|
||||
|
||||
def get_pool_stats() -> dict:
|
||||
"""Get summary stats per pool."""
|
||||
with get_connection() as conn:
|
||||
rows = conn.execute(
|
||||
"""SELECT pool, COUNT(*) as total,
|
||||
SUM(CASE WHEN enabled = 1 THEN 1 ELSE 0 END) as enabled,
|
||||
SUM(CASE WHEN status = 'healthy' THEN 1 ELSE 0 END) as healthy,
|
||||
SUM(CASE WHEN status = 'cooling' THEN 1 ELSE 0 END) as cooling,
|
||||
SUM(CASE WHEN status = 'error' THEN 1 ELSE 0 END) as error
|
||||
FROM backends GROUP BY pool"""
|
||||
).fetchall()
|
||||
stats = {}
|
||||
for row in rows:
|
||||
stats[row["pool"]] = {
|
||||
"total": row["total"],
|
||||
"enabled": row["enabled"],
|
||||
"healthy": row["healthy"],
|
||||
"cooling": row["cooling"],
|
||||
"error": row["error"],
|
||||
}
|
||||
return stats
|
||||
|
||||
|
||||
def _row_to_backend(row, decrypt_key: bool = True) -> Backend:
|
||||
"""Convert a DB row to a Backend instance."""
|
||||
mappings_raw = row["model_mappings_json"] or "{}"
|
||||
mappings_dict = json.loads(mappings_raw)
|
||||
|
||||
model_mappings = {}
|
||||
for canonical_name, mm in mappings_dict.items():
|
||||
model_mappings[canonical_name] = ModelMapping.from_dict(mm)
|
||||
|
||||
backend = Backend(
|
||||
id=row["id"],
|
||||
name=row["name"],
|
||||
label=row["label"],
|
||||
api_base_url=row["api_base_url"],
|
||||
api_key_encrypted=row["api_key_encrypted"] or "",
|
||||
api=row["api"],
|
||||
timeout_seconds=row["timeout_seconds"],
|
||||
rpm_limit=row["rpm_limit"],
|
||||
pool=row["pool"],
|
||||
enabled=bool(row["enabled"]),
|
||||
status=row["status"],
|
||||
model_mappings=model_mappings,
|
||||
source=row["source"],
|
||||
cooldown_until=row["cooldown_until"],
|
||||
consecutive_429_count=row["consecutive_429_count"],
|
||||
metadata=json.loads(row["metadata_json"] or "{}"),
|
||||
created_at=row["created_at"],
|
||||
updated_at=row["updated_at"],
|
||||
)
|
||||
|
||||
if decrypt_key and backend.api_key_encrypted:
|
||||
from crypto import try_decrypt_existing
|
||||
plain = try_decrypt_existing(backend.api_key_encrypted)
|
||||
if plain:
|
||||
backend.api_key_plain = plain
|
||||
|
||||
return backend
|
||||
|
||||
|
||||
def _mappings_to_dict(mappings: dict[str, ModelMapping]) -> dict:
|
||||
"""Convert ModelMapping dict to JSON-safe dict."""
|
||||
return {k: v.to_dict() for k, v in mappings.items()}
|
||||
@@ -0,0 +1,55 @@
|
||||
"""System configuration KV store operations."""
|
||||
|
||||
import time
|
||||
from typing import Optional, Any
|
||||
|
||||
from storage.db import get_connection
|
||||
|
||||
|
||||
def get_config(key: str) -> Optional[str]:
|
||||
"""Get a single config value."""
|
||||
with get_connection() as conn:
|
||||
row = conn.execute(
|
||||
"SELECT value FROM system_config WHERE key = ?", (key,)
|
||||
).fetchone()
|
||||
return row["value"] if row else None
|
||||
|
||||
|
||||
def set_config(key: str, value: str, description: str = "") -> None:
|
||||
"""Set or update a config value."""
|
||||
now = time.strftime("%Y-%m-%dT%H:%M:%SZ", time.gmtime())
|
||||
with get_connection() as conn:
|
||||
conn.execute(
|
||||
"""INSERT INTO system_config (key, value, description, updated_at)
|
||||
VALUES (?, ?, ?, ?)
|
||||
ON CONFLICT(key) DO UPDATE SET
|
||||
value = excluded.value,
|
||||
description = excluded.description,
|
||||
updated_at = excluded.updated_at""",
|
||||
(key, value, description, now),
|
||||
)
|
||||
conn.commit()
|
||||
|
||||
|
||||
def delete_config(key: str) -> bool:
|
||||
"""Delete a config value."""
|
||||
with get_connection() as conn:
|
||||
cursor = conn.execute(
|
||||
"DELETE FROM system_config WHERE key = ?", (key,)
|
||||
)
|
||||
conn.commit()
|
||||
return cursor.rowcount > 0
|
||||
|
||||
|
||||
def list_configs() -> list[dict]:
|
||||
"""List all system config entries."""
|
||||
with get_connection() as conn:
|
||||
rows = conn.execute("SELECT * FROM system_config ORDER BY key").fetchall()
|
||||
return [dict(row) for row in rows]
|
||||
|
||||
|
||||
def get_all_configs_as_dict() -> dict[str, str]:
|
||||
"""Get all configs as a simple dict."""
|
||||
with get_connection() as conn:
|
||||
rows = conn.execute("SELECT key, value FROM system_config").fetchall()
|
||||
return {row["key"]: row["value"] for row in rows}
|
||||
@@ -0,0 +1,74 @@
|
||||
"""Cooldown event logging."""
|
||||
|
||||
import time
|
||||
from typing import Optional
|
||||
|
||||
from storage.db import get_connection, generate_id
|
||||
from storage.models import CooldownEvent
|
||||
|
||||
|
||||
def log_cooldown_event(
|
||||
backend_id: str,
|
||||
consecutive_count: int,
|
||||
cooldown_seconds: int,
|
||||
response_summary: str = "",
|
||||
) -> CooldownEvent:
|
||||
"""Record a cooldown event."""
|
||||
event = CooldownEvent(
|
||||
id=generate_id("cev"),
|
||||
backend_id=backend_id,
|
||||
consecutive_count=consecutive_count,
|
||||
cooldown_seconds=cooldown_seconds,
|
||||
response_summary=response_summary,
|
||||
started_at=time.strftime("%Y-%m-%dT%H:%M:%SZ", time.gmtime()),
|
||||
)
|
||||
|
||||
with get_connection() as conn:
|
||||
conn.execute(
|
||||
"""INSERT INTO cooldown_events
|
||||
(id, backend_id, consecutive_count, cooldown_seconds,
|
||||
response_summary, started_at)
|
||||
VALUES (?, ?, ?, ?, ?, ?)""",
|
||||
(event.id, event.backend_id, event.consecutive_count,
|
||||
event.cooldown_seconds, event.response_summary, event.started_at),
|
||||
)
|
||||
conn.commit()
|
||||
|
||||
return event
|
||||
|
||||
|
||||
def end_cooldown_event(backend_id: str) -> bool:
|
||||
"""Mark the latest open cooldown event as ended."""
|
||||
ended_at = time.strftime("%Y-%m-%dT%H:%M:%SZ", time.gmtime())
|
||||
with get_connection() as conn:
|
||||
# Find the latest event for this backend that hasn't ended
|
||||
cursor = conn.execute(
|
||||
"""UPDATE cooldown_events SET ended_at = ?
|
||||
WHERE backend_id = ? AND ended_at IS NULL
|
||||
ORDER BY started_at DESC LIMIT 1""",
|
||||
(ended_at, backend_id),
|
||||
)
|
||||
conn.commit()
|
||||
return cursor.rowcount > 0
|
||||
|
||||
|
||||
def get_cooldown_history(
|
||||
backend_id: Optional[str] = None,
|
||||
limit: int = 50,
|
||||
) -> list[dict]:
|
||||
"""Get cooldown event history."""
|
||||
with get_connection() as conn:
|
||||
if backend_id:
|
||||
rows = conn.execute(
|
||||
"""SELECT * FROM cooldown_events
|
||||
WHERE backend_id = ?
|
||||
ORDER BY started_at DESC LIMIT ?""",
|
||||
(backend_id, limit),
|
||||
).fetchall()
|
||||
else:
|
||||
rows = conn.execute(
|
||||
"""SELECT * FROM cooldown_events
|
||||
ORDER BY started_at DESC LIMIT ?""",
|
||||
(limit,),
|
||||
).fetchall()
|
||||
return [dict(row) for row in rows]
|
||||
+193
@@ -0,0 +1,193 @@
|
||||
"""SQLite database connection management with WAL mode."""
|
||||
|
||||
import os
|
||||
import sqlite3
|
||||
import uuid
|
||||
import structlog
|
||||
from contextlib import contextmanager
|
||||
from typing import Generator
|
||||
|
||||
from config import config
|
||||
|
||||
logger = structlog.get_logger()
|
||||
|
||||
# Module-level DB path
|
||||
_DB_PATH: str = ""
|
||||
|
||||
|
||||
def init_db(db_path: str = "") -> None:
|
||||
"""Initialize the database connection and ensure WAL mode.
|
||||
|
||||
Creates the data directory if needed and verifies integrity.
|
||||
"""
|
||||
global _DB_PATH
|
||||
_DB_PATH = db_path or config.db_path
|
||||
|
||||
# Ensure data directory exists
|
||||
os.makedirs(os.path.dirname(_DB_PATH), exist_ok=True)
|
||||
|
||||
# Test connection and enable WAL
|
||||
conn = _get_raw_connection()
|
||||
try:
|
||||
conn.execute("PRAGMA journal_mode=WAL")
|
||||
conn.execute("PRAGMA wal_autocheckpoint=1000")
|
||||
conn.execute("PRAGMA foreign_keys=ON")
|
||||
conn.execute("PRAGMA busy_timeout=5000")
|
||||
logger.info("db_initialized", path=_DB_PATH, mode="WAL")
|
||||
finally:
|
||||
conn.close()
|
||||
|
||||
|
||||
def _get_raw_connection() -> sqlite3.Connection:
|
||||
"""Get a raw sqlite3 connection."""
|
||||
conn = sqlite3.connect(_DB_PATH, check_same_thread=False)
|
||||
conn.row_factory = sqlite3.Row
|
||||
conn.execute("PRAGMA journal_mode=WAL")
|
||||
conn.execute("PRAGMA foreign_keys=ON")
|
||||
return conn
|
||||
|
||||
|
||||
@contextmanager
|
||||
def get_connection() -> Generator[sqlite3.Connection, None, None]:
|
||||
"""Get a database connection with WAL enabled."""
|
||||
conn = _get_raw_connection()
|
||||
try:
|
||||
yield conn
|
||||
finally:
|
||||
conn.close()
|
||||
|
||||
|
||||
def generate_id(prefix: str = "") -> str:
|
||||
"""Generate a unique ID with optional prefix."""
|
||||
uid = uuid.uuid4().hex[:12]
|
||||
return f"{prefix}_{uid}" if prefix else uid
|
||||
|
||||
|
||||
def create_tables() -> None:
|
||||
"""Create all tables if they don't exist."""
|
||||
with get_connection() as conn:
|
||||
conn.executescript(_DDL)
|
||||
conn.commit()
|
||||
logger.info("tables_created")
|
||||
|
||||
|
||||
def run_integrity_check() -> bool:
|
||||
"""Run PRAGMA integrity_check and return True if OK."""
|
||||
with get_connection() as conn:
|
||||
result = conn.execute("PRAGMA integrity_check").fetchone()
|
||||
ok = result[0] == "ok"
|
||||
if not ok:
|
||||
logger.error("integrity_check_failed", result=result[0])
|
||||
return ok
|
||||
|
||||
|
||||
def get_db_sizes() -> dict:
|
||||
"""Get database and WAL file sizes."""
|
||||
result = {"db_bytes": 0, "wal_bytes": 0}
|
||||
db_path = _DB_PATH
|
||||
if os.path.exists(db_path):
|
||||
result["db_bytes"] = os.path.getsize(db_path)
|
||||
wal_path = db_path + "-wal"
|
||||
if os.path.exists(wal_path):
|
||||
result["wal_bytes"] = os.path.getsize(wal_path)
|
||||
return result
|
||||
|
||||
|
||||
def wal_checkpoint(mode: str = "TRUNCATE") -> None:
|
||||
"""Execute WAL checkpoint."""
|
||||
with get_connection() as conn:
|
||||
conn.execute(f"PRAGMA wal_checkpoint({mode})")
|
||||
|
||||
|
||||
_DDL = """
|
||||
-- Backend configuration table (core)
|
||||
CREATE TABLE IF NOT EXISTS backends (
|
||||
id TEXT PRIMARY KEY,
|
||||
name TEXT NOT NULL,
|
||||
label TEXT DEFAULT '',
|
||||
api_base_url TEXT NOT NULL,
|
||||
api_key_encrypted TEXT NOT NULL,
|
||||
api TEXT NOT NULL DEFAULT 'openai-completions',
|
||||
timeout_seconds INTEGER NOT NULL DEFAULT 120,
|
||||
rpm_limit INTEGER NOT NULL DEFAULT 40,
|
||||
pool TEXT NOT NULL DEFAULT 'primary'
|
||||
CHECK(pool IN ('primary', 'fallback')),
|
||||
enabled INTEGER NOT NULL DEFAULT 1,
|
||||
status TEXT NOT NULL DEFAULT 'healthy'
|
||||
CHECK(status IN ('healthy', 'cooling', 'error', 'disabled')),
|
||||
model_mappings_json TEXT DEFAULT '{}',
|
||||
source TEXT NOT NULL DEFAULT 'webui'
|
||||
CHECK(source IN ('webui', 'env', 'import')),
|
||||
cooldown_until TEXT,
|
||||
consecutive_429_count INTEGER DEFAULT 0,
|
||||
metadata_json TEXT DEFAULT '{}',
|
||||
created_at TEXT NOT NULL DEFAULT (datetime('now')),
|
||||
updated_at TEXT NOT NULL DEFAULT (datetime('now'))
|
||||
);
|
||||
|
||||
-- Usage logs (hour-bucketed, UPSERT-safe)
|
||||
CREATE TABLE IF NOT EXISTS backend_usage_logs (
|
||||
id TEXT PRIMARY KEY,
|
||||
backend_id TEXT NOT NULL REFERENCES backends(id) ON DELETE CASCADE,
|
||||
model TEXT DEFAULT 'unknown',
|
||||
prompt_tokens INTEGER DEFAULT 0,
|
||||
completion_tokens INTEGER DEFAULT 0,
|
||||
total_tokens INTEGER DEFAULT 0,
|
||||
cost REAL DEFAULT 0.0,
|
||||
request_count INTEGER DEFAULT 0,
|
||||
error_count INTEGER DEFAULT 0,
|
||||
avg_latency_ms INTEGER DEFAULT 0,
|
||||
ttft_ms INTEGER DEFAULT 0,
|
||||
hour_bucket TEXT NOT NULL,
|
||||
created_at TEXT NOT NULL DEFAULT (datetime('now'))
|
||||
);
|
||||
CREATE UNIQUE INDEX IF NOT EXISTS idx_usage_backend_hour
|
||||
ON backend_usage_logs(backend_id, hour_bucket);
|
||||
|
||||
-- Cooldown event log
|
||||
CREATE TABLE IF NOT EXISTS cooldown_events (
|
||||
id TEXT PRIMARY KEY,
|
||||
backend_id TEXT NOT NULL REFERENCES backends(id) ON DELETE CASCADE,
|
||||
consecutive_count INTEGER NOT NULL DEFAULT 1,
|
||||
cooldown_seconds INTEGER NOT NULL,
|
||||
response_summary TEXT DEFAULT '',
|
||||
started_at TEXT NOT NULL DEFAULT (datetime('now')),
|
||||
ended_at TEXT
|
||||
);
|
||||
CREATE INDEX IF NOT EXISTS idx_cooldown_backend_time
|
||||
ON cooldown_events(backend_id, started_at);
|
||||
|
||||
-- Backend health state
|
||||
CREATE TABLE IF NOT EXISTS backend_health (
|
||||
backend_id TEXT PRIMARY KEY REFERENCES backends(id) ON DELETE CASCADE,
|
||||
state TEXT NOT NULL DEFAULT 'healthy'
|
||||
CHECK(state IN ('healthy', 'degraded', 'down')),
|
||||
last_latency_ms INTEGER DEFAULT 0,
|
||||
last_status_code INTEGER DEFAULT 200,
|
||||
success_rate_5m REAL DEFAULT 1.0,
|
||||
consecutive_failures INTEGER DEFAULT 0,
|
||||
last_check_at TEXT NOT NULL DEFAULT (datetime('now'))
|
||||
);
|
||||
|
||||
-- System configuration KV store
|
||||
CREATE TABLE IF NOT EXISTS system_config (
|
||||
key TEXT PRIMARY KEY,
|
||||
value TEXT NOT NULL,
|
||||
description TEXT DEFAULT '',
|
||||
updated_at TEXT NOT NULL DEFAULT (datetime('now'))
|
||||
);
|
||||
|
||||
-- Daily aggregated stats
|
||||
CREATE TABLE IF NOT EXISTS daily_stats (
|
||||
id TEXT PRIMARY KEY,
|
||||
date TEXT NOT NULL,
|
||||
pool TEXT NOT NULL CHECK(pool IN ('primary', 'fallback')),
|
||||
total_requests INTEGER DEFAULT 0,
|
||||
total_errors INTEGER DEFAULT 0,
|
||||
total_tokens INTEGER DEFAULT 0,
|
||||
total_cost REAL DEFAULT 0.0,
|
||||
unique_backends INTEGER DEFAULT 0,
|
||||
created_at TEXT NOT NULL DEFAULT (datetime('now'))
|
||||
);
|
||||
CREATE UNIQUE INDEX IF NOT EXISTS idx_daily_date_pool ON daily_stats(date, pool);
|
||||
"""
|
||||
@@ -0,0 +1,161 @@
|
||||
"""Data models for Sidecar V2 — backend-centric, Canonical Name routing."""
|
||||
|
||||
from dataclasses import dataclass, field, asdict
|
||||
from typing import Optional
|
||||
import json
|
||||
|
||||
|
||||
@dataclass
|
||||
class ModelMapping:
|
||||
"""A single model mapping within a backend: Canonical Name → native_id + properties."""
|
||||
|
||||
native_id: str
|
||||
reasoning: bool = False
|
||||
reasoning_effort: bool = False
|
||||
input_modalities: list[str] = field(default_factory=lambda: ["text"])
|
||||
cost: dict = field(default_factory=lambda: {
|
||||
"input": 0.0, "output": 0.0, "cacheRead": 0.0, "cacheWrite": 0.0
|
||||
})
|
||||
context_window: int = 128000
|
||||
max_tokens: int = 65536
|
||||
compat: dict = field(default_factory=dict)
|
||||
|
||||
def to_dict(self) -> dict:
|
||||
return asdict(self)
|
||||
|
||||
@classmethod
|
||||
def from_dict(cls, d: dict) -> "ModelMapping":
|
||||
defaults = {
|
||||
"native_id": "",
|
||||
"reasoning": False,
|
||||
"reasoning_effort": False,
|
||||
"input_modalities": ["text"],
|
||||
"cost": {"input": 0.0, "output": 0.0, "cacheRead": 0.0, "cacheWrite": 0.0},
|
||||
"context_window": 128000,
|
||||
"max_tokens": 65536,
|
||||
"compat": {},
|
||||
}
|
||||
defaults.update(d)
|
||||
return cls(**{k: v for k, v in defaults.items() if k in cls.__dataclass_fields__})
|
||||
|
||||
|
||||
@dataclass
|
||||
class Backend:
|
||||
"""A physical API backend (API Key + URL).
|
||||
|
||||
Represents a single API key endpoint. Multiple backends can serve the same
|
||||
Canonical Models through their model_mappings.
|
||||
"""
|
||||
|
||||
id: str = ""
|
||||
name: str = ""
|
||||
label: str = "" # e.g., "nvidia", "siliconflow" — WebUI tag only
|
||||
api_base_url: str = ""
|
||||
api_key_encrypted: str = ""
|
||||
api: str = "openai-completions"
|
||||
timeout_seconds: int = 120
|
||||
rpm_limit: int = 40
|
||||
pool: str = "primary" # primary | fallback
|
||||
enabled: bool = True
|
||||
status: str = "healthy" # healthy | cooling | error | disabled
|
||||
model_mappings: dict[str, ModelMapping] = field(default_factory=dict)
|
||||
source: str = "webui" # webui | env | import
|
||||
cooldown_until: Optional[str] = None
|
||||
consecutive_429_count: int = 0
|
||||
metadata: dict = field(default_factory=dict)
|
||||
created_at: str = ""
|
||||
updated_at: str = ""
|
||||
|
||||
# Runtime fields (not persisted)
|
||||
api_key_plain: str = "" # decrypted at load time, not serialized to DB
|
||||
|
||||
def has_model(self, canonical_name: str) -> bool:
|
||||
"""Check if backend supports a given Canonical Model."""
|
||||
return canonical_name in self.model_mappings
|
||||
|
||||
def get_native_id(self, canonical_name: str) -> str:
|
||||
"""Get this backend's native model ID for a Canonical Name."""
|
||||
mm = self.model_mappings.get(canonical_name)
|
||||
return mm.native_id if mm else canonical_name
|
||||
|
||||
def get_model_cost(self, canonical_name: str) -> dict:
|
||||
"""Get cost info for a Canonical Model on this backend."""
|
||||
mm = self.model_mappings.get(canonical_name)
|
||||
return mm.cost if mm else {"input": 0.0, "output": 0.0, "cacheRead": 0.0, "cacheWrite": 0.0}
|
||||
|
||||
def to_dict(self, mask_key: bool = True) -> dict:
|
||||
"""Convert to dict for API responses."""
|
||||
d = asdict(self)
|
||||
# Remove runtime-only fields
|
||||
d.pop("api_key_plain", None)
|
||||
d.pop("api_key_encrypted", None)
|
||||
|
||||
# Mask API key
|
||||
if mask_key and self.api_key_plain:
|
||||
d["api_key"] = _mask_key(self.api_key_plain)
|
||||
elif self.api_key_plain:
|
||||
d["api_key"] = self.api_key_plain
|
||||
else:
|
||||
d["api_key"] = ""
|
||||
|
||||
# Convert model_mappings to dict for serialization
|
||||
d["model_mappings"] = {
|
||||
k: v.to_dict() for k, v in self.model_mappings.items()
|
||||
}
|
||||
return d
|
||||
|
||||
|
||||
def _mask_key(key: str) -> str:
|
||||
if len(key) <= 10:
|
||||
return key[:2] + "****"
|
||||
return key[:6] + "****" + key[-4:]
|
||||
|
||||
|
||||
@dataclass
|
||||
class CooldownEvent:
|
||||
id: str = ""
|
||||
backend_id: str = ""
|
||||
consecutive_count: int = 1
|
||||
cooldown_seconds: int = 60
|
||||
response_summary: str = ""
|
||||
started_at: str = ""
|
||||
ended_at: Optional[str] = None
|
||||
|
||||
|
||||
@dataclass
|
||||
class BackendHealth:
|
||||
backend_id: str = ""
|
||||
state: str = "healthy" # healthy | degraded | down
|
||||
last_latency_ms: int = 0
|
||||
last_status_code: int = 200
|
||||
success_rate_5m: float = 1.0
|
||||
consecutive_failures: int = 0
|
||||
last_check_at: str = ""
|
||||
|
||||
|
||||
@dataclass
|
||||
class UsageLog:
|
||||
id: str = ""
|
||||
backend_id: str = ""
|
||||
model: str = "unknown"
|
||||
prompt_tokens: int = 0
|
||||
completion_tokens: int = 0
|
||||
total_tokens: int = 0
|
||||
cost: float = 0.0
|
||||
request_count: int = 0
|
||||
error_count: int = 0
|
||||
avg_latency_ms: int = 0
|
||||
ttft_ms: int = 0
|
||||
hour_bucket: str = ""
|
||||
|
||||
|
||||
@dataclass
|
||||
class DailyStats:
|
||||
id: str = ""
|
||||
date: str = ""
|
||||
pool: str = "primary"
|
||||
total_requests: int = 0
|
||||
total_errors: int = 0
|
||||
total_tokens: int = 0
|
||||
total_cost: float = 0.0
|
||||
unique_backends: int = 0
|
||||
@@ -0,0 +1,155 @@
|
||||
"""Usage logging and daily statistics aggregation."""
|
||||
|
||||
import time
|
||||
from typing import Optional
|
||||
|
||||
from storage.db import get_connection, generate_id
|
||||
|
||||
|
||||
def record_usage(
|
||||
backend_id: str,
|
||||
model: str,
|
||||
prompt_tokens: int,
|
||||
completion_tokens: int,
|
||||
cost: float,
|
||||
latency_ms: int,
|
||||
ttft_ms: int = 0,
|
||||
is_error: bool = False,
|
||||
) -> None:
|
||||
"""Record a single request's usage, hour-bucketed with UPSERT."""
|
||||
hour_bucket = time.strftime("%Y-%m-%dT%H:00:00Z", time.gmtime())
|
||||
uid = generate_id("use")
|
||||
|
||||
with get_connection() as conn:
|
||||
# Try update existing hour bucket
|
||||
cursor = conn.execute(
|
||||
"""UPDATE backend_usage_logs SET
|
||||
prompt_tokens = prompt_tokens + ?,
|
||||
completion_tokens = completion_tokens + ?,
|
||||
total_tokens = total_tokens + ?,
|
||||
cost = cost + ?,
|
||||
request_count = request_count + 1,
|
||||
error_count = error_count + ?,
|
||||
avg_latency_ms = CAST((avg_latency_ms * request_count + ?) / (request_count + 1) AS INTEGER),
|
||||
ttft_ms = CASE WHEN ? > 0 THEN CAST((ttft_ms * request_count + ?) / (request_count + 1) AS INTEGER) ELSE ttft_ms END
|
||||
WHERE backend_id = ? AND hour_bucket = ?""",
|
||||
(
|
||||
prompt_tokens, completion_tokens,
|
||||
prompt_tokens + completion_tokens,
|
||||
cost,
|
||||
1 if is_error else 0,
|
||||
latency_ms,
|
||||
ttft_ms, ttft_ms,
|
||||
backend_id, hour_bucket,
|
||||
),
|
||||
)
|
||||
if cursor.rowcount == 0:
|
||||
# Insert new hour bucket
|
||||
conn.execute(
|
||||
"""INSERT INTO backend_usage_logs
|
||||
(id, backend_id, model, prompt_tokens, completion_tokens,
|
||||
total_tokens, cost, request_count, error_count,
|
||||
avg_latency_ms, ttft_ms, hour_bucket)
|
||||
VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)""",
|
||||
(
|
||||
uid, backend_id, model,
|
||||
prompt_tokens, completion_tokens,
|
||||
prompt_tokens + completion_tokens,
|
||||
cost, 1, 1 if is_error else 0,
|
||||
latency_ms, ttft_ms, hour_bucket,
|
||||
),
|
||||
)
|
||||
conn.commit()
|
||||
|
||||
|
||||
def get_hourly_usage(
|
||||
backend_id: Optional[str] = None,
|
||||
since: Optional[str] = None,
|
||||
limit: int = 168,
|
||||
) -> list[dict]:
|
||||
"""Get hourly usage data, optionally filtered by backend and time range."""
|
||||
with get_connection() as conn:
|
||||
if backend_id and since:
|
||||
rows = conn.execute(
|
||||
"""SELECT * FROM backend_usage_logs
|
||||
WHERE backend_id = ? AND hour_bucket >= ?
|
||||
ORDER BY hour_bucket DESC LIMIT ?""",
|
||||
(backend_id, since, limit),
|
||||
).fetchall()
|
||||
elif backend_id:
|
||||
rows = conn.execute(
|
||||
"""SELECT * FROM backend_usage_logs
|
||||
WHERE backend_id = ? ORDER BY hour_bucket DESC LIMIT ?""",
|
||||
(backend_id, limit),
|
||||
).fetchall()
|
||||
elif since:
|
||||
rows = conn.execute(
|
||||
"""SELECT * FROM backend_usage_logs
|
||||
WHERE hour_bucket >= ? ORDER BY hour_bucket DESC LIMIT ?""",
|
||||
(since, limit),
|
||||
).fetchall()
|
||||
else:
|
||||
rows = conn.execute(
|
||||
"""SELECT * FROM backend_usage_logs
|
||||
ORDER BY hour_bucket DESC LIMIT ?""",
|
||||
(limit,),
|
||||
).fetchall()
|
||||
return [dict(row) for row in rows]
|
||||
|
||||
|
||||
def get_total_stats() -> dict:
|
||||
"""Get aggregate stats across all backends."""
|
||||
with get_connection() as conn:
|
||||
row = conn.execute(
|
||||
"""SELECT
|
||||
SUM(request_count) as total_requests,
|
||||
SUM(error_count) as total_errors,
|
||||
SUM(total_tokens) as total_tokens,
|
||||
SUM(prompt_tokens) as total_prompt_tokens,
|
||||
SUM(completion_tokens) as total_completion_tokens,
|
||||
SUM(cost) as total_cost
|
||||
FROM backend_usage_logs"""
|
||||
).fetchone()
|
||||
if row is None:
|
||||
return {
|
||||
"total_requests": 0, "total_errors": 0,
|
||||
"total_tokens": 0, "total_prompt_tokens": 0,
|
||||
"total_completion_tokens": 0, "total_cost": 0.0,
|
||||
}
|
||||
return dict(row)
|
||||
|
||||
|
||||
def aggregate_daily_stats(date: str) -> None:
|
||||
"""Aggregate hourly usage into daily stats table."""
|
||||
with get_connection() as conn:
|
||||
# Aggregate per pool
|
||||
conn.execute("""DELETE FROM daily_stats WHERE date = ?""", (date,))
|
||||
conn.execute(
|
||||
"""INSERT INTO daily_stats (id, date, pool, total_requests,
|
||||
total_errors, total_tokens, total_cost, unique_backends)
|
||||
SELECT
|
||||
? || '-' || b.pool,
|
||||
?,
|
||||
b.pool,
|
||||
SUM(u.request_count),
|
||||
SUM(u.error_count),
|
||||
SUM(u.total_tokens),
|
||||
SUM(u.cost),
|
||||
COUNT(DISTINCT u.backend_id)
|
||||
FROM backend_usage_logs u
|
||||
JOIN backends b ON u.backend_id = b.id
|
||||
WHERE u.hour_bucket LIKE ?
|
||||
GROUP BY b.pool""",
|
||||
(generate_id("day"), date, date + "%"),
|
||||
)
|
||||
conn.commit()
|
||||
|
||||
|
||||
def get_daily_stats(days: int = 30) -> list[dict]:
|
||||
"""Get daily aggregated stats."""
|
||||
with get_connection() as conn:
|
||||
rows = conn.execute(
|
||||
"""SELECT * FROM daily_stats ORDER BY date DESC LIMIT ?""",
|
||||
(days,),
|
||||
).fetchall()
|
||||
return [dict(row) for row in rows]
|
||||
Reference in New Issue
Block a user