611ebd11a8
BIZ-52 Step3 开发实现: - storage: backend/usage/cooldown/config CRUD with SQLite WAL - crypto: AES-256-GCM API key encryption - pool_manager: primary/fallback pool routing - cooldown_manager: 429 exponential backoff cooldown - rate_limiter: per-backend token bucket RPM control - router: model → backend routing with pool priority - proxy: multi-pool request forwarding with retry - server: FastAPI admin API + OpenAI-compatible proxy + SSE - dashboard: WebUI with provider CRUD, stats, charts Co-authored-by: multica-agent <github@multica.ai>
155 lines
5.5 KiB
Python
155 lines
5.5 KiB
Python
"""Usage logging and daily statistics aggregation."""
|
|
|
|
import time
|
|
from typing import Optional
|
|
|
|
from storage.db import get_connection, generate_id
|
|
|
|
|
|
def record_usage(
|
|
backend_id: str,
|
|
model: str,
|
|
prompt_tokens: int,
|
|
completion_tokens: int,
|
|
cost: float,
|
|
latency_ms: int,
|
|
ttft_ms: int = 0,
|
|
is_error: bool = False,
|
|
) -> None:
|
|
"""Record a single request's usage, hour-bucketed with UPSERT."""
|
|
hour_bucket = time.strftime("%Y-%m-%dT%H:00:00Z", time.gmtime())
|
|
uid = generate_id("use")
|
|
|
|
with get_connection() as conn:
|
|
# Try update existing hour bucket
|
|
cursor = conn.execute(
|
|
"""UPDATE backend_usage_logs SET
|
|
prompt_tokens = prompt_tokens + ?,
|
|
completion_tokens = completion_tokens + ?,
|
|
total_tokens = total_tokens + ?,
|
|
cost = cost + ?,
|
|
request_count = request_count + 1,
|
|
error_count = error_count + ?,
|
|
avg_latency_ms = CAST((avg_latency_ms * request_count + ?) / (request_count + 1) AS INTEGER),
|
|
ttft_ms = CASE WHEN ? > 0 THEN CAST((ttft_ms * request_count + ?) / (request_count + 1) AS INTEGER) ELSE ttft_ms END
|
|
WHERE backend_id = ? AND hour_bucket = ?""",
|
|
(
|
|
prompt_tokens, completion_tokens,
|
|
prompt_tokens + completion_tokens,
|
|
cost,
|
|
1 if is_error else 0,
|
|
latency_ms,
|
|
ttft_ms, ttft_ms,
|
|
backend_id, hour_bucket,
|
|
),
|
|
)
|
|
if cursor.rowcount == 0:
|
|
# Insert new hour bucket
|
|
conn.execute(
|
|
"""INSERT INTO backend_usage_logs
|
|
(id, backend_id, model, prompt_tokens, completion_tokens,
|
|
total_tokens, cost, request_count, error_count,
|
|
avg_latency_ms, ttft_ms, hour_bucket)
|
|
VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)""",
|
|
(
|
|
uid, backend_id, model,
|
|
prompt_tokens, completion_tokens,
|
|
prompt_tokens + completion_tokens,
|
|
cost, 1, 1 if is_error else 0,
|
|
latency_ms, ttft_ms, hour_bucket,
|
|
),
|
|
)
|
|
conn.commit()
|
|
|
|
|
|
def get_hourly_usage(
|
|
backend_id: Optional[str] = None,
|
|
since: Optional[str] = None,
|
|
limit: int = 168,
|
|
) -> list[dict]:
|
|
"""Get hourly usage data, optionally filtered by backend and time range."""
|
|
with get_connection() as conn:
|
|
if backend_id and since:
|
|
rows = conn.execute(
|
|
"""SELECT * FROM backend_usage_logs
|
|
WHERE backend_id = ? AND hour_bucket >= ?
|
|
ORDER BY hour_bucket DESC LIMIT ?""",
|
|
(backend_id, since, limit),
|
|
).fetchall()
|
|
elif backend_id:
|
|
rows = conn.execute(
|
|
"""SELECT * FROM backend_usage_logs
|
|
WHERE backend_id = ? ORDER BY hour_bucket DESC LIMIT ?""",
|
|
(backend_id, limit),
|
|
).fetchall()
|
|
elif since:
|
|
rows = conn.execute(
|
|
"""SELECT * FROM backend_usage_logs
|
|
WHERE hour_bucket >= ? ORDER BY hour_bucket DESC LIMIT ?""",
|
|
(since, limit),
|
|
).fetchall()
|
|
else:
|
|
rows = conn.execute(
|
|
"""SELECT * FROM backend_usage_logs
|
|
ORDER BY hour_bucket DESC LIMIT ?""",
|
|
(limit,),
|
|
).fetchall()
|
|
return [dict(row) for row in rows]
|
|
|
|
|
|
def get_total_stats() -> dict:
|
|
"""Get aggregate stats across all backends."""
|
|
with get_connection() as conn:
|
|
row = conn.execute(
|
|
"""SELECT
|
|
SUM(request_count) as total_requests,
|
|
SUM(error_count) as total_errors,
|
|
SUM(total_tokens) as total_tokens,
|
|
SUM(prompt_tokens) as total_prompt_tokens,
|
|
SUM(completion_tokens) as total_completion_tokens,
|
|
SUM(cost) as total_cost
|
|
FROM backend_usage_logs"""
|
|
).fetchone()
|
|
if row is None:
|
|
return {
|
|
"total_requests": 0, "total_errors": 0,
|
|
"total_tokens": 0, "total_prompt_tokens": 0,
|
|
"total_completion_tokens": 0, "total_cost": 0.0,
|
|
}
|
|
return dict(row)
|
|
|
|
|
|
def aggregate_daily_stats(date: str) -> None:
|
|
"""Aggregate hourly usage into daily stats table."""
|
|
with get_connection() as conn:
|
|
# Aggregate per pool
|
|
conn.execute("""DELETE FROM daily_stats WHERE date = ?""", (date,))
|
|
conn.execute(
|
|
"""INSERT INTO daily_stats (id, date, pool, total_requests,
|
|
total_errors, total_tokens, total_cost, unique_backends)
|
|
SELECT
|
|
? || '-' || b.pool,
|
|
?,
|
|
b.pool,
|
|
SUM(u.request_count),
|
|
SUM(u.error_count),
|
|
SUM(u.total_tokens),
|
|
SUM(u.cost),
|
|
COUNT(DISTINCT u.backend_id)
|
|
FROM backend_usage_logs u
|
|
JOIN backends b ON u.backend_id = b.id
|
|
WHERE u.hour_bucket LIKE ?
|
|
GROUP BY b.pool""",
|
|
(generate_id("day"), date, date + "%"),
|
|
)
|
|
conn.commit()
|
|
|
|
|
|
def get_daily_stats(days: int = 30) -> list[dict]:
|
|
"""Get daily aggregated stats."""
|
|
with get_connection() as conn:
|
|
rows = conn.execute(
|
|
"""SELECT * FROM daily_stats ORDER BY date DESC LIMIT ?""",
|
|
(days,),
|
|
).fetchall()
|
|
return [dict(row) for row in rows] |