feat(sidecar-v2): implement multi-pool provider proxy with cooldown, rate limiting, WebUI
BIZ-52 Step3 开发实现: - storage: backend/usage/cooldown/config CRUD with SQLite WAL - crypto: AES-256-GCM API key encryption - pool_manager: primary/fallback pool routing - cooldown_manager: 429 exponential backoff cooldown - rate_limiter: per-backend token bucket RPM control - router: model → backend routing with pool priority - proxy: multi-pool request forwarding with retry - server: FastAPI admin API + OpenAI-compatible proxy + SSE - dashboard: WebUI with provider CRUD, stats, charts Co-authored-by: multica-agent <github@multica.ai>
This commit is contained in:
@@ -0,0 +1,116 @@
|
||||
"""429 Cooldown management for backends using exponential backoff."""
|
||||
|
||||
import time
|
||||
from datetime import datetime, timezone
|
||||
import structlog
|
||||
from config import config
|
||||
from storage.backend_store import set_backend_cooldown, clear_backend_cooldown
|
||||
from storage.cooldown_store import log_cooldown_event, end_cooldown_event
|
||||
|
||||
logger = structlog.get_logger("sidecar_v2.cooldown_manager")
|
||||
|
||||
|
||||
def calculate_cooldown(consecutive_count: int) -> float:
|
||||
"""Calculate cooldown duration using exponential backoff.
|
||||
|
||||
Formula: base * 2^(consecutive-1), capped at max.
|
||||
"""
|
||||
base = config.cooldown_base_seconds
|
||||
max_seconds = config.cooldown_max_seconds
|
||||
if config.cooldown_exponential_backoff:
|
||||
duration = base * (2 ** (consecutive_count - 1))
|
||||
else:
|
||||
duration = base * consecutive_count
|
||||
return min(duration, max_seconds)
|
||||
|
||||
|
||||
def start_cooldown(backend_id: str, consecutive_count: int) -> float:
|
||||
"""Start cooldown for a backend after 429.
|
||||
|
||||
Returns: cooldown end timestamp.
|
||||
"""
|
||||
duration = calculate_cooldown(consecutive_count)
|
||||
cooldown_until_ts = time.time() + duration
|
||||
cooldown_until = time.strftime(
|
||||
"%Y-%m-%dT%H:%M:%SZ", time.gmtime(cooldown_until_ts)
|
||||
)
|
||||
|
||||
set_backend_cooldown(backend_id, cooldown_until, consecutive_count)
|
||||
log_cooldown_event(
|
||||
backend_id=backend_id,
|
||||
consecutive_count=consecutive_count,
|
||||
cooldown_seconds=int(duration),
|
||||
response_summary=f"429 cooldown triggered (consecutive #{consecutive_count})",
|
||||
)
|
||||
|
||||
logger.info(
|
||||
"cooldown_started",
|
||||
backend_id=backend_id,
|
||||
duration=round(duration, 1),
|
||||
consecutive=consecutive_count,
|
||||
)
|
||||
return duration
|
||||
|
||||
|
||||
def check_and_clear_cooldown(backend_id: str) -> bool:
|
||||
"""Check if cooldown has expired for a backend.
|
||||
|
||||
Returns True if cooldown was cleared (backend is back online).
|
||||
"""
|
||||
from storage.backend_store import get_backend
|
||||
backend = get_backend(backend_id, decrypt_key=False)
|
||||
if backend is None:
|
||||
return False
|
||||
|
||||
if backend.status != "cooling":
|
||||
return False
|
||||
|
||||
cooldown_until = backend.cooldown_until
|
||||
if not cooldown_until:
|
||||
clear_backend_cooldown(backend_id)
|
||||
return True
|
||||
|
||||
# Parse cooldown_until as ISO timestamp
|
||||
try:
|
||||
dt = datetime.fromisoformat(cooldown_until.replace("Z", "+00:00"))
|
||||
cooldown_ts = dt.timestamp()
|
||||
except ValueError:
|
||||
# If parsing fails, clear and move on
|
||||
clear_backend_cooldown(backend_id)
|
||||
return True
|
||||
|
||||
now = time.time()
|
||||
if now >= cooldown_ts:
|
||||
clear_backend_cooldown(backend_id)
|
||||
end_cooldown_event(backend_id)
|
||||
logger.info("cooldown_cleared", backend_id=backend_id)
|
||||
return True
|
||||
|
||||
remaining = cooldown_ts - now
|
||||
logger.debug("cooldown_active", backend_id=backend_id, remaining_seconds=round(remaining, 1))
|
||||
return False
|
||||
|
||||
|
||||
def precheck_cooldown(backend_id: str) -> bool:
|
||||
"""Check if backend should be skipped due to near-expiry cooldown.
|
||||
|
||||
If cooldown will expire within config.cooldown_precheck_threshold_seconds,
|
||||
skip the backend so we don't hit it again right as it expires.
|
||||
"""
|
||||
from storage.backend_store import get_backend
|
||||
backend = get_backend(backend_id, decrypt_key=False)
|
||||
if backend is None or backend.status != "cooling":
|
||||
return False
|
||||
|
||||
cooldown_until = backend.cooldown_until
|
||||
if not cooldown_until:
|
||||
return False
|
||||
|
||||
try:
|
||||
dt = datetime.fromisoformat(cooldown_until.replace("Z", "+00:00"))
|
||||
cooldown_ts = dt.timestamp()
|
||||
except ValueError:
|
||||
return False
|
||||
|
||||
remaining = cooldown_ts - time.time()
|
||||
return 0 < remaining <= config.cooldown_precheck_threshold_seconds
|
||||
Reference in New Issue
Block a user