611ebd11a8
BIZ-52 Step3 开发实现: - storage: backend/usage/cooldown/config CRUD with SQLite WAL - crypto: AES-256-GCM API key encryption - pool_manager: primary/fallback pool routing - cooldown_manager: 429 exponential backoff cooldown - rate_limiter: per-backend token bucket RPM control - router: model → backend routing with pool priority - proxy: multi-pool request forwarding with retry - server: FastAPI admin API + OpenAI-compatible proxy + SSE - dashboard: WebUI with provider CRUD, stats, charts Co-authored-by: multica-agent <github@multica.ai>
62 lines
2.1 KiB
Python
62 lines
2.1 KiB
Python
"""Model → Backend routing logic for Sidecar V2."""
|
|
|
|
import structlog
|
|
from typing import Optional
|
|
|
|
from storage.models import Backend
|
|
from pool_manager import PoolManager
|
|
from rate_limiter import PerBackendRateLimiter
|
|
|
|
logger = structlog.get_logger("sidecar_v2.router")
|
|
|
|
|
|
class Router:
|
|
"""Routes model requests to the best available backend.
|
|
|
|
Pick strategy:
|
|
1. Primary pool → healthy backends supporting the model
|
|
2. Rate-limiter check → skip if RPM exhausted
|
|
3. Fallback pool → repeat above
|
|
4. If all exhausted → return None (caller handles emergency)
|
|
"""
|
|
|
|
def __init__(self, pool_manager: PoolManager, rate_limiter: PerBackendRateLimiter):
|
|
self._pool_manager = pool_manager
|
|
self._rate_limiter = rate_limiter
|
|
|
|
def pick_backend(self, canonical_model: str) -> Optional[Backend]:
|
|
"""Pick the best available backend for a model.
|
|
|
|
Tries primary pool first, then fallback.
|
|
Within each pool, skips backends at RPM limit.
|
|
Returns None if no backend available.
|
|
"""
|
|
# Try pools in order
|
|
for pool in ["primary", "fallback"]:
|
|
backends = self._pool_manager.get_available_backends(
|
|
canonical_model, pool=pool
|
|
)
|
|
for backend in backends:
|
|
# Rate-limit check
|
|
if self._rate_limiter.consume(
|
|
backend.id, backend.rpm_limit
|
|
):
|
|
return backend
|
|
# Skip this backend, try next
|
|
logger.debug(
|
|
"backend_rate_limited",
|
|
backend_id=backend.id,
|
|
pool=pool,
|
|
model=canonical_model,
|
|
)
|
|
|
|
if not backends:
|
|
logger.debug("pool_exhausted", pool=pool, model=canonical_model)
|
|
else:
|
|
logger.debug("pool_rpm_exhausted", pool=pool, model=canonical_model)
|
|
|
|
return None
|
|
|
|
def get_all_pools_exhausted_info(self, canonical_model: str) -> bool:
|
|
"""Check if ALL pools are exhausted for a model."""
|
|
return not self._pool_manager.is_any_pool_available(canonical_model) |