feat: Sidecar V2 — multi-pool provider proxy with 429 cooldown

- proxy.py: Fix route path duplication (v1/v1 → v1) when upstream base URL already includes /v1 prefix - proxy.py: Fix _emergency_count global variable for metrics tracking - server.py: Add logging.basicConfig(level=logging.INFO) for structlog INFO-level log visibility - Full multi-pool routing: primary → fallback → emergency passthrough - Per-backend rate limiting with RPM-based token bucket - 429 cooldown mechanism with automatic recovery - Dashboard with SSE real-time monitoring - Admin API for backend/pool/config management - SQLite-backed persistence with encrypted API key storage - Docker compose deployment Deployed by opengineer 严维序 as BIZ-50 Step 4
2026-06-25 21:20:32 +08:00
commit 2d95ae50a5
26 changed files with 3625 additions and 0 deletions
@@ -0,0 +1,111 @@
+"""Per-backend rate limiter using token bucket algorithm."""
+
+import threading
+import time
+from typing import Any
+
+
+class PerBackendRateLimiter:
+    """Manages independent token buckets for each backend.
+
+    Thread-safe. Each backend gets its own bucket with configurable RPM.
+    """
+
+    def __init__(self, refill_interval_ms: int = 50):
+        self._buckets: dict[str, _TokenBucket] = {}
+        self._lock = threading.Lock()
+        self._refill_interval_ms = refill_interval_ms
+
+    def ensure_bucket(self, backend_id: str, rpm_limit: int) -> None:
+        """Create or update a bucket for a backend."""
+        with self._lock:
+            if backend_id in self._buckets:
+                existing = self._buckets[backend_id]
+                existing.update_rate(rpm_limit)
+            else:
+                self._buckets[backend_id] = _TokenBucket(
+                    rate=rpm_limit / 60.0,
+                    capacity=max(rpm_limit, 1),
+                )
+
+    def remove_bucket(self, backend_id: str) -> None:
+        """Remove a backend's bucket."""
+        with self._lock:
+            self._buckets.pop(backend_id, None)
+
+    def consume(self, backend_id: str, rpm_limit: int, tokens: int = 1) -> bool:
+        """Try to consume tokens for a backend. Returns True if allowed.
+
+        Auto-creates the bucket if needed.
+        """
+        self.ensure_bucket(backend_id, rpm_limit)
+
+        with self._lock:
+            bucket = self._buckets.get(backend_id)
+            if bucket is None:
+                return False
+
+        return bucket.consume(tokens)
+
+    def get_status(self, backend_id: str) -> dict[str, Any] | None:
+        """Get bucket status for a backend."""
+        with self._lock:
+            bucket = self._buckets.get(backend_id)
+            if bucket is None:
+                return None
+            return bucket.get_status()
+
+    def get_all_status(self) -> dict[str, dict[str, Any]]:
+        """Get status of all buckets."""
+        with self._lock:
+            return {bid: b.get_status() for bid, b in self._buckets.items()}
+
+
+class _TokenBucket:
+    """Internal token bucket with refill."""
+
+    def __init__(self, rate: float, capacity: int):
+        self._rate = float(rate)
+        self._capacity = int(capacity)
+        self._tokens = float(capacity)
+        self._last_refill = time.monotonic()
+        self._lock = threading.Lock()
+
+    def _refill(self) -> None:
+        now = time.monotonic()
+        elapsed = now - self._last_refill
+        if elapsed > 0 and self._rate > 0:
+            self._tokens = min(self._tokens + elapsed * self._rate, float(self._capacity))
+        self._last_refill = now
+
+    def consume(self, tokens: int = 1) -> bool:
+        if tokens <= 0:
+            return True
+        with self._lock:
+            self._refill()
+            if self._tokens >= tokens:
+                self._tokens -= tokens
+                return True
+            return False
+
+    def update_rate(self, rpm_limit: int) -> None:
+        new_rate = rpm_limit / 60.0
+        with self._lock:
+            self._refill()
+            self._rate = new_rate
+            self._capacity = max(rpm_limit, 1)
+            self._tokens = min(self._tokens, float(self._capacity))
+
+    def get_status(self) -> dict[str, Any]:
+        with self._lock:
+            self._refill()
+            rate_per_minute = self._rate * 60.0
+            utilization = 0.0 if self._capacity == 0 else (
+                (self._capacity - self._tokens) / self._capacity
+            )
+            return {
+                "tokens": round(self._tokens, 2),
+                "capacity": self._capacity,
+                "rate_per_minute": round(rate_per_minute, 1),
+                "utilization": round(utilization, 4),
+            }