feat(sidecar-v2): implement multi-pool provider proxy with cooldown, rate limiting, WebUI
BIZ-52 Step3 开发实现: - storage: backend/usage/cooldown/config CRUD with SQLite WAL - crypto: AES-256-GCM API key encryption - pool_manager: primary/fallback pool routing - cooldown_manager: 429 exponential backoff cooldown - rate_limiter: per-backend token bucket RPM control - router: model → backend routing with pool priority - proxy: multi-pool request forwarding with retry - server: FastAPI admin API + OpenAI-compatible proxy + SSE - dashboard: WebUI with provider CRUD, stats, charts Co-authored-by: multica-agent <github@multica.ai>
This commit is contained in:
@@ -0,0 +1,111 @@
|
||||
"""Per-backend rate limiter using token bucket algorithm."""
|
||||
|
||||
import threading
|
||||
import time
|
||||
from typing import Any
|
||||
|
||||
|
||||
class PerBackendRateLimiter:
|
||||
"""Manages independent token buckets for each backend.
|
||||
|
||||
Thread-safe. Each backend gets its own bucket with configurable RPM.
|
||||
"""
|
||||
|
||||
def __init__(self, refill_interval_ms: int = 50):
|
||||
self._buckets: dict[str, _TokenBucket] = {}
|
||||
self._lock = threading.Lock()
|
||||
self._refill_interval_ms = refill_interval_ms
|
||||
|
||||
def ensure_bucket(self, backend_id: str, rpm_limit: int) -> None:
|
||||
"""Create or update a bucket for a backend."""
|
||||
with self._lock:
|
||||
if backend_id in self._buckets:
|
||||
existing = self._buckets[backend_id]
|
||||
existing.update_rate(rpm_limit)
|
||||
else:
|
||||
self._buckets[backend_id] = _TokenBucket(
|
||||
rate=rpm_limit / 60.0,
|
||||
capacity=max(rpm_limit, 1),
|
||||
)
|
||||
|
||||
def remove_bucket(self, backend_id: str) -> None:
|
||||
"""Remove a backend's bucket."""
|
||||
with self._lock:
|
||||
self._buckets.pop(backend_id, None)
|
||||
|
||||
def consume(self, backend_id: str, rpm_limit: int, tokens: int = 1) -> bool:
|
||||
"""Try to consume tokens for a backend. Returns True if allowed.
|
||||
|
||||
Auto-creates the bucket if needed.
|
||||
"""
|
||||
self.ensure_bucket(backend_id, rpm_limit)
|
||||
|
||||
with self._lock:
|
||||
bucket = self._buckets.get(backend_id)
|
||||
if bucket is None:
|
||||
return False
|
||||
|
||||
return bucket.consume(tokens)
|
||||
|
||||
def get_status(self, backend_id: str) -> dict[str, Any] | None:
|
||||
"""Get bucket status for a backend."""
|
||||
with self._lock:
|
||||
bucket = self._buckets.get(backend_id)
|
||||
if bucket is None:
|
||||
return None
|
||||
return bucket.get_status()
|
||||
|
||||
def get_all_status(self) -> dict[str, dict[str, Any]]:
|
||||
"""Get status of all buckets."""
|
||||
with self._lock:
|
||||
return {bid: b.get_status() for bid, b in self._buckets.items()}
|
||||
|
||||
|
||||
class _TokenBucket:
|
||||
"""Internal token bucket with refill."""
|
||||
|
||||
def __init__(self, rate: float, capacity: int):
|
||||
self._rate = float(rate)
|
||||
self._capacity = int(capacity)
|
||||
self._tokens = float(capacity)
|
||||
self._last_refill = time.monotonic()
|
||||
self._lock = threading.Lock()
|
||||
|
||||
def _refill(self) -> None:
|
||||
now = time.monotonic()
|
||||
elapsed = now - self._last_refill
|
||||
if elapsed > 0 and self._rate > 0:
|
||||
self._tokens = min(self._tokens + elapsed * self._rate, float(self._capacity))
|
||||
self._last_refill = now
|
||||
|
||||
def consume(self, tokens: int = 1) -> bool:
|
||||
if tokens <= 0:
|
||||
return True
|
||||
with self._lock:
|
||||
self._refill()
|
||||
if self._tokens >= tokens:
|
||||
self._tokens -= tokens
|
||||
return True
|
||||
return False
|
||||
|
||||
def update_rate(self, rpm_limit: int) -> None:
|
||||
new_rate = rpm_limit / 60.0
|
||||
with self._lock:
|
||||
self._refill()
|
||||
self._rate = new_rate
|
||||
self._capacity = max(rpm_limit, 1)
|
||||
self._tokens = min(self._tokens, float(self._capacity))
|
||||
|
||||
def get_status(self) -> dict[str, Any]:
|
||||
with self._lock:
|
||||
self._refill()
|
||||
rate_per_minute = self._rate * 60.0
|
||||
utilization = 0.0 if self._capacity == 0 else (
|
||||
(self._capacity - self._tokens) / self._capacity
|
||||
)
|
||||
return {
|
||||
"tokens": round(self._tokens, 2),
|
||||
"capacity": self._capacity,
|
||||
"rate_per_minute": round(rate_per_minute, 1),
|
||||
"utilization": round(utilization, 4),
|
||||
}
|
||||
Reference in New Issue
Block a user