"""Per-backend rate limiter using token bucket algorithm.""" import threading import time from typing import Any class PerBackendRateLimiter: """Manages independent token buckets for each backend. Thread-safe. Each backend gets its own bucket with configurable RPM. """ def __init__(self, refill_interval_ms: int = 50): self._buckets: dict[str, _TokenBucket] = {} self._lock = threading.Lock() self._refill_interval_ms = refill_interval_ms def ensure_bucket(self, backend_id: str, rpm_limit: int) -> None: """Create or update a bucket for a backend.""" with self._lock: if backend_id in self._buckets: existing = self._buckets[backend_id] existing.update_rate(rpm_limit) else: self._buckets[backend_id] = _TokenBucket( rate=rpm_limit / 60.0, capacity=max(rpm_limit, 1), ) def remove_bucket(self, backend_id: str) -> None: """Remove a backend's bucket.""" with self._lock: self._buckets.pop(backend_id, None) def consume(self, backend_id: str, rpm_limit: int, tokens: int = 1) -> bool: """Try to consume tokens for a backend. Returns True if allowed. Auto-creates the bucket if needed. """ self.ensure_bucket(backend_id, rpm_limit) with self._lock: bucket = self._buckets.get(backend_id) if bucket is None: return False return bucket.consume(tokens) def get_status(self, backend_id: str) -> dict[str, Any] | None: """Get bucket status for a backend.""" with self._lock: bucket = self._buckets.get(backend_id) if bucket is None: return None return bucket.get_status() def get_all_status(self) -> dict[str, dict[str, Any]]: """Get status of all buckets.""" with self._lock: return {bid: b.get_status() for bid, b in self._buckets.items()} class _TokenBucket: """Internal token bucket with refill.""" def __init__(self, rate: float, capacity: int): self._rate = float(rate) self._capacity = int(capacity) self._tokens = float(capacity) self._last_refill = time.monotonic() self._lock = threading.Lock() def _refill(self) -> None: now = time.monotonic() elapsed = now - self._last_refill if elapsed > 0 and self._rate > 0: self._tokens = min(self._tokens + elapsed * self._rate, float(self._capacity)) self._last_refill = now def consume(self, tokens: int = 1) -> bool: if tokens <= 0: return True with self._lock: self._refill() if self._tokens >= tokens: self._tokens -= tokens return True return False def update_rate(self, rpm_limit: int) -> None: new_rate = rpm_limit / 60.0 with self._lock: self._refill() self._rate = new_rate self._capacity = max(rpm_limit, 1) self._tokens = min(self._tokens, float(self._capacity)) def get_status(self) -> dict[str, Any]: with self._lock: self._refill() rate_per_minute = self._rate * 60.0 utilization = 0.0 if self._capacity == 0 else ( (self._capacity - self._tokens) / self._capacity ) return { "tokens": round(self._tokens, 2), "capacity": self._capacity, "rate_per_minute": round(rate_per_minute, 1), "utilization": round(utilization, 4), }