From 18dfb2901bbce4cf9d9a44453d86d99af2a0a4a8 Mon Sep 17 00:00:00 2001 From: bizwings Date: Thu, 25 Jun 2026 22:48:09 +0800 Subject: [PATCH] =?UTF-8?q?fix:=20add=20Primary-Wait=20Prometheus=20counte?= =?UTF-8?q?rs=20+=20conservative=20defaults=20=E2=80=94=20BIZ-60=20review?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit P0 changes per 4-reviewer consensus (严维序/陆怀瑾/沈路明/梁思筑): 1. Prometheus metrics counters (proxy.py + server.py): - sidecar_primary_wait_enter_total: requests entering Primary-Wait - sidecar_primary_wait_recovery_total: successful primary recoveries - sidecar_primary_wait_exhausted_total: wait exhausted → emergency 2. Conservative default (config.py): - primary_wait_max_retries: 6 → 3 (15s total wait, safe start) - Observe recovery rate before increasing to 6 Counters form complete funnel: enter - recovery = exhausted, enabling Grafana monitoring and ROI validation per COO/PM/Ops. --- config.py | 2 +- proxy.py | 24 ++++++++++++++++++++++++ server.py | 13 ++++++++++++- 3 files changed, 37 insertions(+), 2 deletions(-) diff --git a/config.py b/config.py index 8cbe147..b1f3d75 100644 --- a/config.py +++ b/config.py @@ -75,7 +75,7 @@ class Config: # Primary-Wait: when all primary backends are cooling, wait before fallback primary_wait_ms: int = 5000 - primary_wait_max_retries: int = 6 + primary_wait_max_retries: int = 3 # Request timeout default_request_timeout_seconds: int = 120 diff --git a/proxy.py b/proxy.py index ba8a651..7cec512 100644 --- a/proxy.py +++ b/proxy.py @@ -21,11 +21,28 @@ from storage.usage_store import record_usage # Emergency activation counter (read by metrics endpoint) _emergency_count: int = 0 +# Primary-Wait metrics counters (read by metrics endpoint) +_primary_wait_enter_count: int = 0 +_primary_wait_recovery_count: int = 0 +_primary_wait_exhausted_count: int = 0 + def get_emergency_count() -> int: return _emergency_count +def get_primary_wait_enter_count() -> int: + return _primary_wait_enter_count + + +def get_primary_wait_recovery_count() -> int: + return _primary_wait_recovery_count + + +def get_primary_wait_exhausted_count() -> int: + return _primary_wait_exhausted_count + + logger: structlog.stdlib.BoundLogger = structlog.get_logger("sidecar_v2.proxy") @@ -309,6 +326,9 @@ async def handle_proxy_request( continue # --- Primary-Wait: wait for primary pool recovery before fallback/emergency --- + global _primary_wait_enter_count, _primary_wait_recovery_count, _primary_wait_exhausted_count + _primary_wait_enter_count += 1 + pwl = logger.bind(phase="primary_wait") for pw_attempt in range(config.primary_wait_max_retries): await asyncio.sleep(config.primary_wait_ms / 1000.0) @@ -356,6 +376,7 @@ async def handle_proxy_request( continue # Primary recovered — success + _primary_wait_recovery_count += 1 resp_json: dict[str, Any] = {} try: if not is_stream and resp.content: @@ -417,6 +438,9 @@ async def handle_proxy_request( ) continue + # Primary-Wait all retries exhausted + _primary_wait_exhausted_count += 1 + # All pools exhausted (including primary-wait retries) — emergency rate-limited passthrough emergency_rpm = int(config.default_rpm_limit * config.emergency_rpm_fraction) if emergency_rpm < 1: diff --git a/server.py b/server.py index a1b9b7d..ec22a4f 100644 --- a/server.py +++ b/server.py @@ -20,7 +20,13 @@ from crypto import init_crypto, is_initialized from pool_manager import PoolManager from rate_limiter import PerBackendRateLimiter from router import Router -from proxy import handle_proxy_request, get_emergency_count +from proxy import ( + handle_proxy_request, + get_emergency_count, + get_primary_wait_enter_count, + get_primary_wait_recovery_count, + get_primary_wait_exhausted_count, +) from storage.db import init_db, create_tables, run_integrity_check, get_connection, _DB_PATH from storage.backend_store import ( @@ -383,6 +389,11 @@ async def metrics() -> Response: # Emergency count (from proxy module) lines.append(f"sidecar_emergency_count {get_emergency_count()}") + # Primary-Wait metrics + lines.append(f"sidecar_primary_wait_enter_total {get_primary_wait_enter_count()}") + lines.append(f"sidecar_primary_wait_recovery_total {get_primary_wait_recovery_count()}") + lines.append(f"sidecar_primary_wait_exhausted_total {get_primary_wait_exhausted_count()}") + # DB sizes from storage.db import get_db_sizes sizes = get_db_sizes()