feat(sidecar-v2): implement multi-pool provider proxy with cooldown, rate limiting, WebUI

BIZ-52 Step3 开发实现: - storage: backend/usage/cooldown/config CRUD with SQLite WAL - crypto: AES-256-GCM API key encryption - pool_manager: primary/fallback pool routing - cooldown_manager: 429 exponential backoff cooldown - rate_limiter: per-backend token bucket RPM control - router: model → backend routing with pool priority - proxy: multi-pool request forwarding with retry - server: FastAPI admin API + OpenAI-compatible proxy + SSE - dashboard: WebUI with provider CRUD, stats, charts Co-authored-by: multica-agent <github@multica.ai>
2026-06-25 16:39:01 +08:00
parent 4fd89b038d
commit 611ebd11a8
19 changed files with 3034 additions and 0 deletions
@@ -0,0 +1,116 @@
+"""429 Cooldown management for backends using exponential backoff."""
+
+import time
+from datetime import datetime, timezone
+import structlog
+from config import config
+from storage.backend_store import set_backend_cooldown, clear_backend_cooldown
+from storage.cooldown_store import log_cooldown_event, end_cooldown_event
+
+logger = structlog.get_logger("sidecar_v2.cooldown_manager")
+
+
+def calculate_cooldown(consecutive_count: int) -> float:
+    """Calculate cooldown duration using exponential backoff.
+
+    Formula: base * 2^(consecutive-1), capped at max.
+    """
+    base = config.cooldown_base_seconds
+    max_seconds = config.cooldown_max_seconds
+    if config.cooldown_exponential_backoff:
+        duration = base * (2 ** (consecutive_count - 1))
+    else:
+        duration = base * consecutive_count
+    return min(duration, max_seconds)
+
+
+def start_cooldown(backend_id: str, consecutive_count: int) -> float:
+    """Start cooldown for a backend after 429.
+
+    Returns: cooldown end timestamp.
+    """
+    duration = calculate_cooldown(consecutive_count)
+    cooldown_until_ts = time.time() + duration
+    cooldown_until = time.strftime(
+        "%Y-%m-%dT%H:%M:%SZ", time.gmtime(cooldown_until_ts)
+    )
+
+    set_backend_cooldown(backend_id, cooldown_until, consecutive_count)
+    log_cooldown_event(
+        backend_id=backend_id,
+        consecutive_count=consecutive_count,
+        cooldown_seconds=int(duration),
+        response_summary=f"429 cooldown triggered (consecutive #{consecutive_count})",
+    )
+
+    logger.info(
+        "cooldown_started",
+        backend_id=backend_id,
+        duration=round(duration, 1),
+        consecutive=consecutive_count,
+    )
+    return duration
+
+
+def check_and_clear_cooldown(backend_id: str) -> bool:
+    """Check if cooldown has expired for a backend.
+
+    Returns True if cooldown was cleared (backend is back online).
+    """
+    from storage.backend_store import get_backend
+    backend = get_backend(backend_id, decrypt_key=False)
+    if backend is None:
+        return False
+
+    if backend.status != "cooling":
+        return False
+
+    cooldown_until = backend.cooldown_until
+    if not cooldown_until:
+        clear_backend_cooldown(backend_id)
+        return True
+
+    # Parse cooldown_until as ISO timestamp
+    try:
+        dt = datetime.fromisoformat(cooldown_until.replace("Z", "+00:00"))
+        cooldown_ts = dt.timestamp()
+    except ValueError:
+        # If parsing fails, clear and move on
+        clear_backend_cooldown(backend_id)
+        return True
+
+    now = time.time()
+    if now >= cooldown_ts:
+        clear_backend_cooldown(backend_id)
+        end_cooldown_event(backend_id)
+        logger.info("cooldown_cleared", backend_id=backend_id)
+        return True
+
+    remaining = cooldown_ts - now
+    logger.debug("cooldown_active", backend_id=backend_id, remaining_seconds=round(remaining, 1))
+    return False
+
+
+def precheck_cooldown(backend_id: str) -> bool:
+    """Check if backend should be skipped due to near-expiry cooldown.
+
+    If cooldown will expire within config.cooldown_precheck_threshold_seconds,
+    skip the backend so we don't hit it again right as it expires.
+    """
+    from storage.backend_store import get_backend
+    backend = get_backend(backend_id, decrypt_key=False)
+    if backend is None or backend.status != "cooling":
+        return False
+
+    cooldown_until = backend.cooldown_until
+    if not cooldown_until:
+        return False
+
+    try:
+        dt = datetime.fromisoformat(cooldown_until.replace("Z", "+00:00"))
+        cooldown_ts = dt.timestamp()
+    except ValueError:
+        return False
+
+    remaining = cooldown_ts - time.time()
+    return 0 < remaining <= config.cooldown_precheck_threshold_seconds