"""Model → Backend routing logic for Sidecar V2.""" import structlog from typing import Optional from storage.models import Backend from pool_manager import PoolManager from rate_limiter import PerBackendRateLimiter logger = structlog.get_logger("sidecar_v2.router") class Router: """Routes model requests to the best available backend. Pick strategy: 1. Primary pool → healthy backends supporting the model 2. Rate-limiter check → skip if RPM exhausted 3. Fallback pool → repeat above 4. If all exhausted → return None (caller handles emergency) """ def __init__(self, pool_manager: PoolManager, rate_limiter: PerBackendRateLimiter): self._pool_manager = pool_manager self._rate_limiter = rate_limiter def pick_backend(self, canonical_model: str) -> Optional[Backend]: """Pick the best available backend for a model. Tries primary pool first, then fallback. Within each pool, skips backends at RPM limit. Returns None if no backend available. """ # Try pools in order for pool in ["primary", "fallback"]: backends = self._pool_manager.get_available_backends( canonical_model, pool=pool ) for backend in backends: # Rate-limit check if self._rate_limiter.consume( backend.id, backend.rpm_limit ): return backend # Skip this backend, try next logger.debug( "backend_rate_limited", backend_id=backend.id, pool=pool, model=canonical_model, ) if not backends: logger.debug("pool_exhausted", pool=pool, model=canonical_model) else: logger.debug("pool_rpm_exhausted", pool=pool, model=canonical_model) return None def pick_primary_backend(self, canonical_model: str) -> Optional[Backend]: """Pick a backend from primary pool only (no fallback). Used by Primary-Wait: when all primary backends are cooling, wait and retry primary exclusively before falling through to fallback. """ backends = self._pool_manager.get_available_backends( canonical_model, pool="primary" ) for backend in backends: if self._rate_limiter.consume(backend.id, backend.rpm_limit): return backend return None def get_all_pools_exhausted_info(self, canonical_model: str) -> bool: """Check if ALL pools are exhausted for a model.""" return not self._pool_manager.is_any_pool_available(canonical_model)