611ebd11a8
BIZ-52 Step3 开发实现: - storage: backend/usage/cooldown/config CRUD with SQLite WAL - crypto: AES-256-GCM API key encryption - pool_manager: primary/fallback pool routing - cooldown_manager: 429 exponential backoff cooldown - rate_limiter: per-backend token bucket RPM control - router: model → backend routing with pool priority - proxy: multi-pool request forwarding with retry - server: FastAPI admin API + OpenAI-compatible proxy + SSE - dashboard: WebUI with provider CRUD, stats, charts Co-authored-by: multica-agent <github@multica.ai>
111 lines
3.7 KiB
Python
111 lines
3.7 KiB
Python
"""Per-backend rate limiter using token bucket algorithm."""
|
|
|
|
import threading
|
|
import time
|
|
from typing import Any
|
|
|
|
|
|
class PerBackendRateLimiter:
|
|
"""Manages independent token buckets for each backend.
|
|
|
|
Thread-safe. Each backend gets its own bucket with configurable RPM.
|
|
"""
|
|
|
|
def __init__(self, refill_interval_ms: int = 50):
|
|
self._buckets: dict[str, _TokenBucket] = {}
|
|
self._lock = threading.Lock()
|
|
self._refill_interval_ms = refill_interval_ms
|
|
|
|
def ensure_bucket(self, backend_id: str, rpm_limit: int) -> None:
|
|
"""Create or update a bucket for a backend."""
|
|
with self._lock:
|
|
if backend_id in self._buckets:
|
|
existing = self._buckets[backend_id]
|
|
existing.update_rate(rpm_limit)
|
|
else:
|
|
self._buckets[backend_id] = _TokenBucket(
|
|
rate=rpm_limit / 60.0,
|
|
capacity=max(rpm_limit, 1),
|
|
)
|
|
|
|
def remove_bucket(self, backend_id: str) -> None:
|
|
"""Remove a backend's bucket."""
|
|
with self._lock:
|
|
self._buckets.pop(backend_id, None)
|
|
|
|
def consume(self, backend_id: str, rpm_limit: int, tokens: int = 1) -> bool:
|
|
"""Try to consume tokens for a backend. Returns True if allowed.
|
|
|
|
Auto-creates the bucket if needed.
|
|
"""
|
|
self.ensure_bucket(backend_id, rpm_limit)
|
|
|
|
with self._lock:
|
|
bucket = self._buckets.get(backend_id)
|
|
if bucket is None:
|
|
return False
|
|
|
|
return bucket.consume(tokens)
|
|
|
|
def get_status(self, backend_id: str) -> dict[str, Any] | None:
|
|
"""Get bucket status for a backend."""
|
|
with self._lock:
|
|
bucket = self._buckets.get(backend_id)
|
|
if bucket is None:
|
|
return None
|
|
return bucket.get_status()
|
|
|
|
def get_all_status(self) -> dict[str, dict[str, Any]]:
|
|
"""Get status of all buckets."""
|
|
with self._lock:
|
|
return {bid: b.get_status() for bid, b in self._buckets.items()}
|
|
|
|
|
|
class _TokenBucket:
|
|
"""Internal token bucket with refill."""
|
|
|
|
def __init__(self, rate: float, capacity: int):
|
|
self._rate = float(rate)
|
|
self._capacity = int(capacity)
|
|
self._tokens = float(capacity)
|
|
self._last_refill = time.monotonic()
|
|
self._lock = threading.Lock()
|
|
|
|
def _refill(self) -> None:
|
|
now = time.monotonic()
|
|
elapsed = now - self._last_refill
|
|
if elapsed > 0 and self._rate > 0:
|
|
self._tokens = min(self._tokens + elapsed * self._rate, float(self._capacity))
|
|
self._last_refill = now
|
|
|
|
def consume(self, tokens: int = 1) -> bool:
|
|
if tokens <= 0:
|
|
return True
|
|
with self._lock:
|
|
self._refill()
|
|
if self._tokens >= tokens:
|
|
self._tokens -= tokens
|
|
return True
|
|
return False
|
|
|
|
def update_rate(self, rpm_limit: int) -> None:
|
|
new_rate = rpm_limit / 60.0
|
|
with self._lock:
|
|
self._refill()
|
|
self._rate = new_rate
|
|
self._capacity = max(rpm_limit, 1)
|
|
self._tokens = min(self._tokens, float(self._capacity))
|
|
|
|
def get_status(self) -> dict[str, Any]:
|
|
with self._lock:
|
|
self._refill()
|
|
rate_per_minute = self._rate * 60.0
|
|
utilization = 0.0 if self._capacity == 0 else (
|
|
(self._capacity - self._tokens) / self._capacity
|
|
)
|
|
return {
|
|
"tokens": round(self._tokens, 2),
|
|
"capacity": self._capacity,
|
|
"rate_per_minute": round(rate_per_minute, 1),
|
|
"utilization": round(utilization, 4),
|
|
} |