611ebd11a8
BIZ-52 Step3 开发实现: - storage: backend/usage/cooldown/config CRUD with SQLite WAL - crypto: AES-256-GCM API key encryption - pool_manager: primary/fallback pool routing - cooldown_manager: 429 exponential backoff cooldown - rate_limiter: per-backend token bucket RPM control - router: model → backend routing with pool priority - proxy: multi-pool request forwarding with retry - server: FastAPI admin API + OpenAI-compatible proxy + SSE - dashboard: WebUI with provider CRUD, stats, charts Co-authored-by: multica-agent <github@multica.ai>
116 lines
3.6 KiB
Python
116 lines
3.6 KiB
Python
"""429 Cooldown management for backends using exponential backoff."""
|
|
|
|
import time
|
|
from datetime import datetime, timezone
|
|
import structlog
|
|
from config import config
|
|
from storage.backend_store import set_backend_cooldown, clear_backend_cooldown
|
|
from storage.cooldown_store import log_cooldown_event, end_cooldown_event
|
|
|
|
logger = structlog.get_logger("sidecar_v2.cooldown_manager")
|
|
|
|
|
|
def calculate_cooldown(consecutive_count: int) -> float:
|
|
"""Calculate cooldown duration using exponential backoff.
|
|
|
|
Formula: base * 2^(consecutive-1), capped at max.
|
|
"""
|
|
base = config.cooldown_base_seconds
|
|
max_seconds = config.cooldown_max_seconds
|
|
if config.cooldown_exponential_backoff:
|
|
duration = base * (2 ** (consecutive_count - 1))
|
|
else:
|
|
duration = base * consecutive_count
|
|
return min(duration, max_seconds)
|
|
|
|
|
|
def start_cooldown(backend_id: str, consecutive_count: int) -> float:
|
|
"""Start cooldown for a backend after 429.
|
|
|
|
Returns: cooldown end timestamp.
|
|
"""
|
|
duration = calculate_cooldown(consecutive_count)
|
|
cooldown_until_ts = time.time() + duration
|
|
cooldown_until = time.strftime(
|
|
"%Y-%m-%dT%H:%M:%SZ", time.gmtime(cooldown_until_ts)
|
|
)
|
|
|
|
set_backend_cooldown(backend_id, cooldown_until, consecutive_count)
|
|
log_cooldown_event(
|
|
backend_id=backend_id,
|
|
consecutive_count=consecutive_count,
|
|
cooldown_seconds=int(duration),
|
|
response_summary=f"429 cooldown triggered (consecutive #{consecutive_count})",
|
|
)
|
|
|
|
logger.info(
|
|
"cooldown_started",
|
|
backend_id=backend_id,
|
|
duration=round(duration, 1),
|
|
consecutive=consecutive_count,
|
|
)
|
|
return duration
|
|
|
|
|
|
def check_and_clear_cooldown(backend_id: str) -> bool:
|
|
"""Check if cooldown has expired for a backend.
|
|
|
|
Returns True if cooldown was cleared (backend is back online).
|
|
"""
|
|
from storage.backend_store import get_backend
|
|
backend = get_backend(backend_id, decrypt_key=False)
|
|
if backend is None:
|
|
return False
|
|
|
|
if backend.status != "cooling":
|
|
return False
|
|
|
|
cooldown_until = backend.cooldown_until
|
|
if not cooldown_until:
|
|
clear_backend_cooldown(backend_id)
|
|
return True
|
|
|
|
# Parse cooldown_until as ISO timestamp
|
|
try:
|
|
dt = datetime.fromisoformat(cooldown_until.replace("Z", "+00:00"))
|
|
cooldown_ts = dt.timestamp()
|
|
except ValueError:
|
|
# If parsing fails, clear and move on
|
|
clear_backend_cooldown(backend_id)
|
|
return True
|
|
|
|
now = time.time()
|
|
if now >= cooldown_ts:
|
|
clear_backend_cooldown(backend_id)
|
|
end_cooldown_event(backend_id)
|
|
logger.info("cooldown_cleared", backend_id=backend_id)
|
|
return True
|
|
|
|
remaining = cooldown_ts - now
|
|
logger.debug("cooldown_active", backend_id=backend_id, remaining_seconds=round(remaining, 1))
|
|
return False
|
|
|
|
|
|
def precheck_cooldown(backend_id: str) -> bool:
|
|
"""Check if backend should be skipped due to near-expiry cooldown.
|
|
|
|
If cooldown will expire within config.cooldown_precheck_threshold_seconds,
|
|
skip the backend so we don't hit it again right as it expires.
|
|
"""
|
|
from storage.backend_store import get_backend
|
|
backend = get_backend(backend_id, decrypt_key=False)
|
|
if backend is None or backend.status != "cooling":
|
|
return False
|
|
|
|
cooldown_until = backend.cooldown_until
|
|
if not cooldown_until:
|
|
return False
|
|
|
|
try:
|
|
dt = datetime.fromisoformat(cooldown_until.replace("Z", "+00:00"))
|
|
cooldown_ts = dt.timestamp()
|
|
except ValueError:
|
|
return False
|
|
|
|
remaining = cooldown_ts - time.time()
|
|
return 0 < remaining <= config.cooldown_precheck_threshold_seconds |