2d95ae50a5
- proxy.py: Fix route path duplication (v1/v1 → v1) when upstream base URL already includes /v1 prefix - proxy.py: Fix _emergency_count global variable for metrics tracking - server.py: Add logging.basicConfig(level=logging.INFO) for structlog INFO-level log visibility - Full multi-pool routing: primary → fallback → emergency passthrough - Per-backend rate limiting with RPM-based token bucket - 429 cooldown mechanism with automatic recovery - Dashboard with SSE real-time monitoring - Admin API for backend/pool/config management - SQLite-backed persistence with encrypted API key storage - Docker compose deployment Deployed by opengineer 严维序 as BIZ-50 Step 4
62 lines
2.1 KiB
Python
62 lines
2.1 KiB
Python
"""Model → Backend routing logic for Sidecar V2."""
|
|
|
|
import structlog
|
|
from typing import Optional
|
|
|
|
from storage.models import Backend
|
|
from pool_manager import PoolManager
|
|
from rate_limiter import PerBackendRateLimiter
|
|
|
|
logger = structlog.get_logger("sidecar_v2.router")
|
|
|
|
|
|
class Router:
|
|
"""Routes model requests to the best available backend.
|
|
|
|
Pick strategy:
|
|
1. Primary pool → healthy backends supporting the model
|
|
2. Rate-limiter check → skip if RPM exhausted
|
|
3. Fallback pool → repeat above
|
|
4. If all exhausted → return None (caller handles emergency)
|
|
"""
|
|
|
|
def __init__(self, pool_manager: PoolManager, rate_limiter: PerBackendRateLimiter):
|
|
self._pool_manager = pool_manager
|
|
self._rate_limiter = rate_limiter
|
|
|
|
def pick_backend(self, canonical_model: str) -> Optional[Backend]:
|
|
"""Pick the best available backend for a model.
|
|
|
|
Tries primary pool first, then fallback.
|
|
Within each pool, skips backends at RPM limit.
|
|
Returns None if no backend available.
|
|
"""
|
|
# Try pools in order
|
|
for pool in ["primary", "fallback"]:
|
|
backends = self._pool_manager.get_available_backends(
|
|
canonical_model, pool=pool
|
|
)
|
|
for backend in backends:
|
|
# Rate-limit check
|
|
if self._rate_limiter.consume(
|
|
backend.id, backend.rpm_limit
|
|
):
|
|
return backend
|
|
# Skip this backend, try next
|
|
logger.debug(
|
|
"backend_rate_limited",
|
|
backend_id=backend.id,
|
|
pool=pool,
|
|
model=canonical_model,
|
|
)
|
|
|
|
if not backends:
|
|
logger.debug("pool_exhausted", pool=pool, model=canonical_model)
|
|
else:
|
|
logger.debug("pool_rpm_exhausted", pool=pool, model=canonical_model)
|
|
|
|
return None
|
|
|
|
def get_all_pools_exhausted_info(self, canonical_model: str) -> bool:
|
|
"""Check if ALL pools are exhausted for a model."""
|
|
return not self._pool_manager.is_any_pool_available(canonical_model) |