feat: Sidecar V2 — multi-pool provider proxy with 429 cooldown
- proxy.py: Fix route path duplication (v1/v1 → v1) when upstream base URL already includes /v1 prefix - proxy.py: Fix _emergency_count global variable for metrics tracking - server.py: Add logging.basicConfig(level=logging.INFO) for structlog INFO-level log visibility - Full multi-pool routing: primary → fallback → emergency passthrough - Per-backend rate limiting with RPM-based token bucket - 429 cooldown mechanism with automatic recovery - Dashboard with SSE real-time monitoring - Admin API for backend/pool/config management - SQLite-backed persistence with encrypted API key storage - Docker compose deployment Deployed by opengineer 严维序 as BIZ-50 Step 4
This commit is contained in:
@@ -0,0 +1,62 @@
|
||||
"""Model → Backend routing logic for Sidecar V2."""
|
||||
|
||||
import structlog
|
||||
from typing import Optional
|
||||
|
||||
from storage.models import Backend
|
||||
from pool_manager import PoolManager
|
||||
from rate_limiter import PerBackendRateLimiter
|
||||
|
||||
logger = structlog.get_logger("sidecar_v2.router")
|
||||
|
||||
|
||||
class Router:
|
||||
"""Routes model requests to the best available backend.
|
||||
|
||||
Pick strategy:
|
||||
1. Primary pool → healthy backends supporting the model
|
||||
2. Rate-limiter check → skip if RPM exhausted
|
||||
3. Fallback pool → repeat above
|
||||
4. If all exhausted → return None (caller handles emergency)
|
||||
"""
|
||||
|
||||
def __init__(self, pool_manager: PoolManager, rate_limiter: PerBackendRateLimiter):
|
||||
self._pool_manager = pool_manager
|
||||
self._rate_limiter = rate_limiter
|
||||
|
||||
def pick_backend(self, canonical_model: str) -> Optional[Backend]:
|
||||
"""Pick the best available backend for a model.
|
||||
|
||||
Tries primary pool first, then fallback.
|
||||
Within each pool, skips backends at RPM limit.
|
||||
Returns None if no backend available.
|
||||
"""
|
||||
# Try pools in order
|
||||
for pool in ["primary", "fallback"]:
|
||||
backends = self._pool_manager.get_available_backends(
|
||||
canonical_model, pool=pool
|
||||
)
|
||||
for backend in backends:
|
||||
# Rate-limit check
|
||||
if self._rate_limiter.consume(
|
||||
backend.id, backend.rpm_limit
|
||||
):
|
||||
return backend
|
||||
# Skip this backend, try next
|
||||
logger.debug(
|
||||
"backend_rate_limited",
|
||||
backend_id=backend.id,
|
||||
pool=pool,
|
||||
model=canonical_model,
|
||||
)
|
||||
|
||||
if not backends:
|
||||
logger.debug("pool_exhausted", pool=pool, model=canonical_model)
|
||||
else:
|
||||
logger.debug("pool_rpm_exhausted", pool=pool, model=canonical_model)
|
||||
|
||||
return None
|
||||
|
||||
def get_all_pools_exhausted_info(self, canonical_model: str) -> bool:
|
||||
"""Check if ALL pools are exhausted for a model."""
|
||||
return not self._pool_manager.is_any_pool_available(canonical_model)
|
||||
Reference in New Issue
Block a user