feat(sidecar-v2): implement multi-pool provider proxy with cooldown, rate limiting, WebUI
BIZ-52 Step3 开发实现: - storage: backend/usage/cooldown/config CRUD with SQLite WAL - crypto: AES-256-GCM API key encryption - pool_manager: primary/fallback pool routing - cooldown_manager: 429 exponential backoff cooldown - rate_limiter: per-backend token bucket RPM control - router: model → backend routing with pool priority - proxy: multi-pool request forwarding with retry - server: FastAPI admin API + OpenAI-compatible proxy + SSE - dashboard: WebUI with provider CRUD, stats, charts Co-authored-by: multica-agent <github@multica.ai>
This commit is contained in:
@@ -0,0 +1,155 @@
|
||||
"""Usage logging and daily statistics aggregation."""
|
||||
|
||||
import time
|
||||
from typing import Optional
|
||||
|
||||
from storage.db import get_connection, generate_id
|
||||
|
||||
|
||||
def record_usage(
|
||||
backend_id: str,
|
||||
model: str,
|
||||
prompt_tokens: int,
|
||||
completion_tokens: int,
|
||||
cost: float,
|
||||
latency_ms: int,
|
||||
ttft_ms: int = 0,
|
||||
is_error: bool = False,
|
||||
) -> None:
|
||||
"""Record a single request's usage, hour-bucketed with UPSERT."""
|
||||
hour_bucket = time.strftime("%Y-%m-%dT%H:00:00Z", time.gmtime())
|
||||
uid = generate_id("use")
|
||||
|
||||
with get_connection() as conn:
|
||||
# Try update existing hour bucket
|
||||
cursor = conn.execute(
|
||||
"""UPDATE backend_usage_logs SET
|
||||
prompt_tokens = prompt_tokens + ?,
|
||||
completion_tokens = completion_tokens + ?,
|
||||
total_tokens = total_tokens + ?,
|
||||
cost = cost + ?,
|
||||
request_count = request_count + 1,
|
||||
error_count = error_count + ?,
|
||||
avg_latency_ms = CAST((avg_latency_ms * request_count + ?) / (request_count + 1) AS INTEGER),
|
||||
ttft_ms = CASE WHEN ? > 0 THEN CAST((ttft_ms * request_count + ?) / (request_count + 1) AS INTEGER) ELSE ttft_ms END
|
||||
WHERE backend_id = ? AND hour_bucket = ?""",
|
||||
(
|
||||
prompt_tokens, completion_tokens,
|
||||
prompt_tokens + completion_tokens,
|
||||
cost,
|
||||
1 if is_error else 0,
|
||||
latency_ms,
|
||||
ttft_ms, ttft_ms,
|
||||
backend_id, hour_bucket,
|
||||
),
|
||||
)
|
||||
if cursor.rowcount == 0:
|
||||
# Insert new hour bucket
|
||||
conn.execute(
|
||||
"""INSERT INTO backend_usage_logs
|
||||
(id, backend_id, model, prompt_tokens, completion_tokens,
|
||||
total_tokens, cost, request_count, error_count,
|
||||
avg_latency_ms, ttft_ms, hour_bucket)
|
||||
VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)""",
|
||||
(
|
||||
uid, backend_id, model,
|
||||
prompt_tokens, completion_tokens,
|
||||
prompt_tokens + completion_tokens,
|
||||
cost, 1, 1 if is_error else 0,
|
||||
latency_ms, ttft_ms, hour_bucket,
|
||||
),
|
||||
)
|
||||
conn.commit()
|
||||
|
||||
|
||||
def get_hourly_usage(
|
||||
backend_id: Optional[str] = None,
|
||||
since: Optional[str] = None,
|
||||
limit: int = 168,
|
||||
) -> list[dict]:
|
||||
"""Get hourly usage data, optionally filtered by backend and time range."""
|
||||
with get_connection() as conn:
|
||||
if backend_id and since:
|
||||
rows = conn.execute(
|
||||
"""SELECT * FROM backend_usage_logs
|
||||
WHERE backend_id = ? AND hour_bucket >= ?
|
||||
ORDER BY hour_bucket DESC LIMIT ?""",
|
||||
(backend_id, since, limit),
|
||||
).fetchall()
|
||||
elif backend_id:
|
||||
rows = conn.execute(
|
||||
"""SELECT * FROM backend_usage_logs
|
||||
WHERE backend_id = ? ORDER BY hour_bucket DESC LIMIT ?""",
|
||||
(backend_id, limit),
|
||||
).fetchall()
|
||||
elif since:
|
||||
rows = conn.execute(
|
||||
"""SELECT * FROM backend_usage_logs
|
||||
WHERE hour_bucket >= ? ORDER BY hour_bucket DESC LIMIT ?""",
|
||||
(since, limit),
|
||||
).fetchall()
|
||||
else:
|
||||
rows = conn.execute(
|
||||
"""SELECT * FROM backend_usage_logs
|
||||
ORDER BY hour_bucket DESC LIMIT ?""",
|
||||
(limit,),
|
||||
).fetchall()
|
||||
return [dict(row) for row in rows]
|
||||
|
||||
|
||||
def get_total_stats() -> dict:
|
||||
"""Get aggregate stats across all backends."""
|
||||
with get_connection() as conn:
|
||||
row = conn.execute(
|
||||
"""SELECT
|
||||
SUM(request_count) as total_requests,
|
||||
SUM(error_count) as total_errors,
|
||||
SUM(total_tokens) as total_tokens,
|
||||
SUM(prompt_tokens) as total_prompt_tokens,
|
||||
SUM(completion_tokens) as total_completion_tokens,
|
||||
SUM(cost) as total_cost
|
||||
FROM backend_usage_logs"""
|
||||
).fetchone()
|
||||
if row is None:
|
||||
return {
|
||||
"total_requests": 0, "total_errors": 0,
|
||||
"total_tokens": 0, "total_prompt_tokens": 0,
|
||||
"total_completion_tokens": 0, "total_cost": 0.0,
|
||||
}
|
||||
return dict(row)
|
||||
|
||||
|
||||
def aggregate_daily_stats(date: str) -> None:
|
||||
"""Aggregate hourly usage into daily stats table."""
|
||||
with get_connection() as conn:
|
||||
# Aggregate per pool
|
||||
conn.execute("""DELETE FROM daily_stats WHERE date = ?""", (date,))
|
||||
conn.execute(
|
||||
"""INSERT INTO daily_stats (id, date, pool, total_requests,
|
||||
total_errors, total_tokens, total_cost, unique_backends)
|
||||
SELECT
|
||||
? || '-' || b.pool,
|
||||
?,
|
||||
b.pool,
|
||||
SUM(u.request_count),
|
||||
SUM(u.error_count),
|
||||
SUM(u.total_tokens),
|
||||
SUM(u.cost),
|
||||
COUNT(DISTINCT u.backend_id)
|
||||
FROM backend_usage_logs u
|
||||
JOIN backends b ON u.backend_id = b.id
|
||||
WHERE u.hour_bucket LIKE ?
|
||||
GROUP BY b.pool""",
|
||||
(generate_id("day"), date, date + "%"),
|
||||
)
|
||||
conn.commit()
|
||||
|
||||
|
||||
def get_daily_stats(days: int = 30) -> list[dict]:
|
||||
"""Get daily aggregated stats."""
|
||||
with get_connection() as conn:
|
||||
rows = conn.execute(
|
||||
"""SELECT * FROM daily_stats ORDER BY date DESC LIMIT ?""",
|
||||
(days,),
|
||||
).fetchall()
|
||||
return [dict(row) for row in rows]
|
||||
Reference in New Issue
Block a user