feat(sidecar-v2): implement multi-pool provider proxy with cooldown, rate limiting, WebUI

BIZ-52 Step3 开发实现: - storage: backend/usage/cooldown/config CRUD with SQLite WAL - crypto: AES-256-GCM API key encryption - pool_manager: primary/fallback pool routing - cooldown_manager: 429 exponential backoff cooldown - rate_limiter: per-backend token bucket RPM control - router: model → backend routing with pool priority - proxy: multi-pool request forwarding with retry - server: FastAPI admin API + OpenAI-compatible proxy + SSE - dashboard: WebUI with provider CRUD, stats, charts Co-authored-by: multica-agent <github@multica.ai>
2026-06-25 16:39:01 +08:00
parent 4fd89b038d
commit 611ebd11a8
19 changed files with 3034 additions and 0 deletions
@@ -0,0 +1,155 @@
+"""Usage logging and daily statistics aggregation."""
+
+import time
+from typing import Optional
+
+from storage.db import get_connection, generate_id
+
+
+def record_usage(
+    backend_id: str,
+    model: str,
+    prompt_tokens: int,
+    completion_tokens: int,
+    cost: float,
+    latency_ms: int,
+    ttft_ms: int = 0,
+    is_error: bool = False,
+) -> None:
+    """Record a single request's usage, hour-bucketed with UPSERT."""
+    hour_bucket = time.strftime("%Y-%m-%dT%H:00:00Z", time.gmtime())
+    uid = generate_id("use")
+
+    with get_connection() as conn:
+        # Try update existing hour bucket
+        cursor = conn.execute(
+            """UPDATE backend_usage_logs SET
+               prompt_tokens = prompt_tokens + ?,
+               completion_tokens = completion_tokens + ?,
+               total_tokens = total_tokens + ?,
+               cost = cost + ?,
+               request_count = request_count + 1,
+               error_count = error_count + ?,
+               avg_latency_ms = CAST((avg_latency_ms * request_count + ?) / (request_count + 1) AS INTEGER),
+               ttft_ms = CASE WHEN ? > 0 THEN CAST((ttft_ms * request_count + ?) / (request_count + 1) AS INTEGER) ELSE ttft_ms END
+               WHERE backend_id = ? AND hour_bucket = ?""",
+            (
+                prompt_tokens, completion_tokens,
+                prompt_tokens + completion_tokens,
+                cost,
+                1 if is_error else 0,
+                latency_ms,
+                ttft_ms, ttft_ms,
+                backend_id, hour_bucket,
+            ),
+        )
+        if cursor.rowcount == 0:
+            # Insert new hour bucket
+            conn.execute(
+                """INSERT INTO backend_usage_logs
+                   (id, backend_id, model, prompt_tokens, completion_tokens,
+                    total_tokens, cost, request_count, error_count,
+                    avg_latency_ms, ttft_ms, hour_bucket)
+                   VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)""",
+                (
+                    uid, backend_id, model,
+                    prompt_tokens, completion_tokens,
+                    prompt_tokens + completion_tokens,
+                    cost, 1, 1 if is_error else 0,
+                    latency_ms, ttft_ms, hour_bucket,
+                ),
+            )
+        conn.commit()
+
+
+def get_hourly_usage(
+    backend_id: Optional[str] = None,
+    since: Optional[str] = None,
+    limit: int = 168,
+) -> list[dict]:
+    """Get hourly usage data, optionally filtered by backend and time range."""
+    with get_connection() as conn:
+        if backend_id and since:
+            rows = conn.execute(
+                """SELECT * FROM backend_usage_logs
+                   WHERE backend_id = ? AND hour_bucket >= ?
+                   ORDER BY hour_bucket DESC LIMIT ?""",
+                (backend_id, since, limit),
+            ).fetchall()
+        elif backend_id:
+            rows = conn.execute(
+                """SELECT * FROM backend_usage_logs
+                   WHERE backend_id = ? ORDER BY hour_bucket DESC LIMIT ?""",
+                (backend_id, limit),
+            ).fetchall()
+        elif since:
+            rows = conn.execute(
+                """SELECT * FROM backend_usage_logs
+                   WHERE hour_bucket >= ? ORDER BY hour_bucket DESC LIMIT ?""",
+                (since, limit),
+            ).fetchall()
+        else:
+            rows = conn.execute(
+                """SELECT * FROM backend_usage_logs
+                   ORDER BY hour_bucket DESC LIMIT ?""",
+                (limit,),
+            ).fetchall()
+    return [dict(row) for row in rows]
+
+
+def get_total_stats() -> dict:
+    """Get aggregate stats across all backends."""
+    with get_connection() as conn:
+        row = conn.execute(
+            """SELECT
+               SUM(request_count) as total_requests,
+               SUM(error_count) as total_errors,
+               SUM(total_tokens) as total_tokens,
+               SUM(prompt_tokens) as total_prompt_tokens,
+               SUM(completion_tokens) as total_completion_tokens,
+               SUM(cost) as total_cost
+               FROM backend_usage_logs"""
+        ).fetchone()
+    if row is None:
+        return {
+            "total_requests": 0, "total_errors": 0,
+            "total_tokens": 0, "total_prompt_tokens": 0,
+            "total_completion_tokens": 0, "total_cost": 0.0,
+        }
+    return dict(row)
+
+
+def aggregate_daily_stats(date: str) -> None:
+    """Aggregate hourly usage into daily stats table."""
+    with get_connection() as conn:
+        # Aggregate per pool
+        conn.execute("""DELETE FROM daily_stats WHERE date = ?""", (date,))
+        conn.execute(
+            """INSERT INTO daily_stats (id, date, pool, total_requests,
+               total_errors, total_tokens, total_cost, unique_backends)
+               SELECT
+                   ? || '-' || b.pool,
+                   ?,
+                   b.pool,
+                   SUM(u.request_count),
+                   SUM(u.error_count),
+                   SUM(u.total_tokens),
+                   SUM(u.cost),
+                   COUNT(DISTINCT u.backend_id)
+               FROM backend_usage_logs u
+               JOIN backends b ON u.backend_id = b.id
+               WHERE u.hour_bucket LIKE ?
+               GROUP BY b.pool""",
+            (generate_id("day"), date, date + "%"),
+        )
+        conn.commit()
+
+
+def get_daily_stats(days: int = 30) -> list[dict]:
+    """Get daily aggregated stats."""
+    with get_connection() as conn:
+        rows = conn.execute(
+            """SELECT * FROM daily_stats ORDER BY date DESC LIMIT ?""",
+            (days,),
+        ).fetchall()
+    return [dict(row) for row in rows]