Files
sidecar-v2/router.py
T
vincent 2d95ae50a5 feat: Sidecar V2 — multi-pool provider proxy with 429 cooldown
- proxy.py: Fix route path duplication (v1/v1 → v1) when upstream
  base URL already includes /v1 prefix
- proxy.py: Fix _emergency_count global variable for metrics tracking
- server.py: Add logging.basicConfig(level=logging.INFO) for structlog
  INFO-level log visibility
- Full multi-pool routing: primary → fallback → emergency passthrough
- Per-backend rate limiting with RPM-based token bucket
- 429 cooldown mechanism with automatic recovery
- Dashboard with SSE real-time monitoring
- Admin API for backend/pool/config management
- SQLite-backed persistence with encrypted API key storage
- Docker compose deployment

Deployed by opengineer 严维序 as BIZ-50 Step 4
2026-06-25 21:20:32 +08:00

62 lines
2.1 KiB
Python

"""Model → Backend routing logic for Sidecar V2."""
import structlog
from typing import Optional
from storage.models import Backend
from pool_manager import PoolManager
from rate_limiter import PerBackendRateLimiter
logger = structlog.get_logger("sidecar_v2.router")
class Router:
"""Routes model requests to the best available backend.
Pick strategy:
1. Primary pool → healthy backends supporting the model
2. Rate-limiter check → skip if RPM exhausted
3. Fallback pool → repeat above
4. If all exhausted → return None (caller handles emergency)
"""
def __init__(self, pool_manager: PoolManager, rate_limiter: PerBackendRateLimiter):
self._pool_manager = pool_manager
self._rate_limiter = rate_limiter
def pick_backend(self, canonical_model: str) -> Optional[Backend]:
"""Pick the best available backend for a model.
Tries primary pool first, then fallback.
Within each pool, skips backends at RPM limit.
Returns None if no backend available.
"""
# Try pools in order
for pool in ["primary", "fallback"]:
backends = self._pool_manager.get_available_backends(
canonical_model, pool=pool
)
for backend in backends:
# Rate-limit check
if self._rate_limiter.consume(
backend.id, backend.rpm_limit
):
return backend
# Skip this backend, try next
logger.debug(
"backend_rate_limited",
backend_id=backend.id,
pool=pool,
model=canonical_model,
)
if not backends:
logger.debug("pool_exhausted", pool=pool, model=canonical_model)
else:
logger.debug("pool_rpm_exhausted", pool=pool, model=canonical_model)
return None
def get_all_pools_exhausted_info(self, canonical_model: str) -> bool:
"""Check if ALL pools are exhausted for a model."""
return not self._pool_manager.is_any_pool_available(canonical_model)