feat(sidecar-v2): implement multi-pool provider proxy with cooldown, rate limiting, WebUI
BIZ-52 Step3 开发实现: - storage: backend/usage/cooldown/config CRUD with SQLite WAL - crypto: AES-256-GCM API key encryption - pool_manager: primary/fallback pool routing - cooldown_manager: 429 exponential backoff cooldown - rate_limiter: per-backend token bucket RPM control - router: model → backend routing with pool priority - proxy: multi-pool request forwarding with retry - server: FastAPI admin API + OpenAI-compatible proxy + SSE - dashboard: WebUI with provider CRUD, stats, charts Co-authored-by: multica-agent <github@multica.ai>
This commit is contained in:
@@ -0,0 +1,165 @@
|
||||
"""System configuration management for Sidecar V2."""
|
||||
|
||||
import os
|
||||
import json
|
||||
from dataclasses import dataclass, field, asdict
|
||||
from typing import Optional
|
||||
|
||||
|
||||
@dataclass
|
||||
class Config:
|
||||
"""Sidecar V2 runtime configuration.
|
||||
|
||||
Sources (priority order):
|
||||
1. Environment variables (highest)
|
||||
2. system_config table in SQLite
|
||||
3. Defaults defined here
|
||||
"""
|
||||
|
||||
# Listen
|
||||
host: str = "127.0.0.1"
|
||||
port: int = 9190
|
||||
metrics_port: int = 9191
|
||||
|
||||
# Queue
|
||||
queue_max_depth: int = 500
|
||||
queue_timeout_seconds: float = 30.0
|
||||
|
||||
# Provider
|
||||
default_rpm_limit: int = 40
|
||||
|
||||
# Cooldown
|
||||
cooldown_base_seconds: float = 30.0
|
||||
cooldown_max_seconds: float = 600.0
|
||||
cooldown_exponential_backoff: bool = True
|
||||
|
||||
# Emergency channel: RPM fraction when all pools exhausted
|
||||
emergency_rpm_fraction: float = 0.10
|
||||
|
||||
# Health check
|
||||
health_check_interval_seconds: int = 60
|
||||
health_check_timeout_seconds: int = 10
|
||||
health_probe_endpoint: str = "/v1/models"
|
||||
|
||||
# Admin auth
|
||||
admin_token: str = ""
|
||||
|
||||
# Encryption
|
||||
encryption_key: str = ""
|
||||
|
||||
# Logging
|
||||
log_level: str = "INFO"
|
||||
|
||||
# Database
|
||||
db_path: str = ""
|
||||
backup_dir: str = ""
|
||||
backup_retention_days: int = 7
|
||||
|
||||
# Rate limiter
|
||||
rate_limiter_refill_interval_ms: int = 50
|
||||
|
||||
# Router
|
||||
router_refresh_interval_seconds: float = 5.0
|
||||
|
||||
# Max pool-internal retries
|
||||
max_pool_retries: int = 5
|
||||
|
||||
# Pre-check cooldown threshold (seconds remaining)
|
||||
cooldown_precheck_threshold_seconds: float = 10.0
|
||||
|
||||
# Dashboard
|
||||
dashboard_sse_interval_seconds: float = 1.0
|
||||
|
||||
# Stats
|
||||
stats_refresh_interval_seconds: float = 30.0
|
||||
|
||||
# Request timeout
|
||||
default_request_timeout_seconds: int = 120
|
||||
|
||||
@classmethod
|
||||
def from_env(cls) -> "Config":
|
||||
"""Load configuration from environment variables."""
|
||||
c = cls()
|
||||
|
||||
# Listen
|
||||
c.host = os.getenv("SIDECAR_HOST", c.host)
|
||||
c.port = int(os.getenv("SIDECAR_PORT", str(c.port)))
|
||||
c.metrics_port = int(os.getenv("SIDECAR_METRICS_PORT", str(c.metrics_port)))
|
||||
|
||||
# Queue
|
||||
c.queue_max_depth = int(os.getenv("SIDECAR_QUEUE_MAX", str(c.queue_max_depth)))
|
||||
c.queue_timeout_seconds = float(
|
||||
os.getenv("SIDECAR_QUEUE_TIMEOUT", str(c.queue_timeout_seconds))
|
||||
)
|
||||
|
||||
# Provider
|
||||
c.default_rpm_limit = int(
|
||||
os.getenv("SIDECAR_RATE_RPM", str(c.default_rpm_limit))
|
||||
)
|
||||
|
||||
# Cooldown
|
||||
c.cooldown_base_seconds = float(
|
||||
os.getenv("SIDECAR_COOLDOWN_BASE", str(c.cooldown_base_seconds))
|
||||
)
|
||||
c.cooldown_max_seconds = float(
|
||||
os.getenv("SIDECAR_COOLDOWN_MAX", str(c.cooldown_max_seconds))
|
||||
)
|
||||
|
||||
# Admin
|
||||
c.admin_token = os.getenv("SIDECAR_ADMIN_TOKEN", c.admin_token)
|
||||
|
||||
# Encryption
|
||||
c.encryption_key = os.getenv("SIDECAR_ENCRYPTION_KEY", c.encryption_key)
|
||||
|
||||
# Logging
|
||||
c.log_level = os.getenv("LOG_LEVEL", c.log_level).upper()
|
||||
|
||||
# Database
|
||||
c.db_path = os.getenv(
|
||||
"SIDECAR_DB_PATH",
|
||||
os.path.join(os.getcwd(), "data", "sidecar_v2.db"),
|
||||
)
|
||||
c.backup_dir = os.getenv(
|
||||
"SIDECAR_BACKUP_DIR",
|
||||
os.path.join(os.getcwd(), "data", "backups"),
|
||||
)
|
||||
|
||||
# V1 compatibility: migrate env vars
|
||||
c._migrate_v1_env()
|
||||
|
||||
return c
|
||||
|
||||
def _migrate_v1_env(self) -> None:
|
||||
"""Migrate V1 environment variables to V2 defaults."""
|
||||
# V1 UPSTREAM endpoint
|
||||
upstream = os.getenv("SIDECAR_UPSTREAM")
|
||||
api_key = os.getenv("SIDECAR_API_KEY")
|
||||
if api_key and self.encryption_key:
|
||||
# These will be used during initial migration
|
||||
os.environ["_SIDECAR_V1_API_KEY"] = api_key
|
||||
os.environ["_SIDECAR_V1_UPSTREAM"] = upstream or "https://integrate.api.nvidia.com/v1"
|
||||
|
||||
def to_db_dict(self) -> dict:
|
||||
"""Serialize to dict for system_config storage."""
|
||||
result = {}
|
||||
for key, value in asdict(self).items():
|
||||
if isinstance(value, bool):
|
||||
result[key] = "true" if value else "false"
|
||||
elif isinstance(value, (int, float)):
|
||||
result[key] = str(value)
|
||||
else:
|
||||
result[key] = value
|
||||
return result
|
||||
|
||||
@classmethod
|
||||
def merge_db(cls, base: "Config", db_config: dict) -> "Config":
|
||||
"""Merge DB config into base config (env vars already applied to base)."""
|
||||
for key, value in base.__dict__.items():
|
||||
if key in db_config and key not in os.environ:
|
||||
# DB values only apply when no env var override
|
||||
setattr(base, key, type(value)(db_config[key]))
|
||||
return base
|
||||
|
||||
|
||||
# Singleton
|
||||
config = Config.from_env()
|
||||
Reference in New Issue
Block a user