Files
sidecar-v2/config.py
T
vincent 18dfb2901b fix: add Primary-Wait Prometheus counters + conservative defaults — BIZ-60 review
P0 changes per 4-reviewer consensus (严维序/陆怀瑾/沈路明/梁思筑):

1. Prometheus metrics counters (proxy.py + server.py):
   - sidecar_primary_wait_enter_total: requests entering Primary-Wait
   - sidecar_primary_wait_recovery_total: successful primary recoveries
   - sidecar_primary_wait_exhausted_total: wait exhausted → emergency

2. Conservative default (config.py):
   - primary_wait_max_retries: 6 → 3 (15s total wait, safe start)
   - Observe recovery rate before increasing to 6

Counters form complete funnel: enter - recovery = exhausted,
enabling Grafana monitoring and ROI validation per COO/PM/Ops.
2026-06-25 22:48:09 +08:00

179 lines
5.1 KiB
Python

"""System configuration management for Sidecar V2."""
import os
import json
from dataclasses import dataclass, field, asdict
from typing import Optional
@dataclass
class Config:
"""Sidecar V2 runtime configuration.
Sources (priority order):
1. Environment variables (highest)
2. system_config table in SQLite
3. Defaults defined here
"""
# Listen
host: str = "127.0.0.1"
port: int = 9190
metrics_port: int = 9191
# Queue
queue_max_depth: int = 500
queue_timeout_seconds: float = 30.0
# Provider
default_rpm_limit: int = 40
# Cooldown
cooldown_base_seconds: float = 30.0
cooldown_max_seconds: float = 600.0
cooldown_exponential_backoff: bool = True
# Emergency channel: RPM fraction when all pools exhausted
emergency_rpm_fraction: float = 0.10
# Health check
health_check_interval_seconds: int = 60
health_check_timeout_seconds: int = 10
health_probe_endpoint: str = "/v1/models"
# Admin auth
admin_token: str = ""
# Encryption
encryption_key: str = ""
# Logging
log_level: str = "INFO"
# Database
db_path: str = ""
backup_dir: str = ""
backup_retention_days: int = 7
# Rate limiter
rate_limiter_refill_interval_ms: int = 50
# Router
router_refresh_interval_seconds: float = 5.0
# Max pool-internal retries
max_pool_retries: int = 5
# Pre-check cooldown threshold (seconds remaining)
cooldown_precheck_threshold_seconds: float = 10.0
# Dashboard
dashboard_sse_interval_seconds: float = 1.0
# Stats
stats_refresh_interval_seconds: float = 30.0
# Primary-Wait: when all primary backends are cooling, wait before fallback
primary_wait_ms: int = 5000
primary_wait_max_retries: int = 3
# Request timeout
default_request_timeout_seconds: int = 120
@classmethod
def from_env(cls) -> "Config":
"""Load configuration from environment variables."""
c = cls()
# Listen
c.host = os.getenv("SIDECAR_HOST", c.host)
c.port = int(os.getenv("SIDECAR_PORT", str(c.port)))
c.metrics_port = int(os.getenv("SIDECAR_METRICS_PORT", str(c.metrics_port)))
# Queue
c.queue_max_depth = int(os.getenv("SIDECAR_QUEUE_MAX", str(c.queue_max_depth)))
c.queue_timeout_seconds = float(
os.getenv("SIDECAR_QUEUE_TIMEOUT", str(c.queue_timeout_seconds))
)
# Provider
c.default_rpm_limit = int(
os.getenv("SIDECAR_RATE_RPM", str(c.default_rpm_limit))
)
# Cooldown
c.cooldown_base_seconds = float(
os.getenv("SIDECAR_COOLDOWN_BASE", str(c.cooldown_base_seconds))
)
c.cooldown_max_seconds = float(
os.getenv("SIDECAR_COOLDOWN_MAX", str(c.cooldown_max_seconds))
)
# Admin
c.admin_token = os.getenv("SIDECAR_ADMIN_TOKEN", c.admin_token)
# Encryption
c.encryption_key = os.getenv("SIDECAR_ENCRYPTION_KEY", c.encryption_key)
# Logging
c.log_level = os.getenv("LOG_LEVEL", c.log_level).upper()
# Primary-Wait
c.primary_wait_ms = int(
os.getenv("SIDECAR_PRIMARY_WAIT_MS", str(c.primary_wait_ms))
)
c.primary_wait_max_retries = int(
os.getenv(
"SIDECAR_PRIMARY_WAIT_MAX_RETRIES", str(c.primary_wait_max_retries)
)
)
# Database
c.db_path = os.getenv(
"SIDECAR_DB_PATH",
os.path.join(os.getcwd(), "data", "sidecar_v2.db"),
)
c.backup_dir = os.getenv(
"SIDECAR_BACKUP_DIR",
os.path.join(os.getcwd(), "data", "backups"),
)
# V1 compatibility: migrate env vars
c._migrate_v1_env()
return c
def _migrate_v1_env(self) -> None:
"""Migrate V1 environment variables to V2 defaults."""
# V1 UPSTREAM endpoint
upstream = os.getenv("SIDECAR_UPSTREAM")
api_key = os.getenv("SIDECAR_API_KEY")
if api_key and self.encryption_key:
# These will be used during initial migration
os.environ["_SIDECAR_V1_API_KEY"] = api_key
os.environ["_SIDECAR_V1_UPSTREAM"] = upstream or "https://integrate.api.nvidia.com/v1"
def to_db_dict(self) -> dict:
"""Serialize to dict for system_config storage."""
result = {}
for key, value in asdict(self).items():
if isinstance(value, bool):
result[key] = "true" if value else "false"
elif isinstance(value, (int, float)):
result[key] = str(value)
else:
result[key] = value
return result
@classmethod
def merge_db(cls, base: "Config", db_config: dict) -> "Config":
"""Merge DB config into base config (env vars already applied to base)."""
for key, value in base.__dict__.items():
if key in db_config and key not in os.environ:
# DB values only apply when no env var override
setattr(base, key, type(value)(db_config[key]))
return base
# Singleton
config = Config.from_env()