diff --git a/config.py b/config.py index 8cbe147..b1f3d75 100644 --- a/config.py +++ b/config.py @@ -75,7 +75,7 @@ class Config: # Primary-Wait: when all primary backends are cooling, wait before fallback primary_wait_ms: int = 5000 - primary_wait_max_retries: int = 6 + primary_wait_max_retries: int = 3 # Request timeout default_request_timeout_seconds: int = 120 diff --git a/proxy.py b/proxy.py index ba8a651..7cec512 100644 --- a/proxy.py +++ b/proxy.py @@ -21,11 +21,28 @@ from storage.usage_store import record_usage # Emergency activation counter (read by metrics endpoint) _emergency_count: int = 0 +# Primary-Wait metrics counters (read by metrics endpoint) +_primary_wait_enter_count: int = 0 +_primary_wait_recovery_count: int = 0 +_primary_wait_exhausted_count: int = 0 + def get_emergency_count() -> int: return _emergency_count +def get_primary_wait_enter_count() -> int: + return _primary_wait_enter_count + + +def get_primary_wait_recovery_count() -> int: + return _primary_wait_recovery_count + + +def get_primary_wait_exhausted_count() -> int: + return _primary_wait_exhausted_count + + logger: structlog.stdlib.BoundLogger = structlog.get_logger("sidecar_v2.proxy") @@ -309,6 +326,9 @@ async def handle_proxy_request( continue # --- Primary-Wait: wait for primary pool recovery before fallback/emergency --- + global _primary_wait_enter_count, _primary_wait_recovery_count, _primary_wait_exhausted_count + _primary_wait_enter_count += 1 + pwl = logger.bind(phase="primary_wait") for pw_attempt in range(config.primary_wait_max_retries): await asyncio.sleep(config.primary_wait_ms / 1000.0) @@ -356,6 +376,7 @@ async def handle_proxy_request( continue # Primary recovered — success + _primary_wait_recovery_count += 1 resp_json: dict[str, Any] = {} try: if not is_stream and resp.content: @@ -417,6 +438,9 @@ async def handle_proxy_request( ) continue + # Primary-Wait all retries exhausted + _primary_wait_exhausted_count += 1 + # All pools exhausted (including primary-wait retries) — emergency rate-limited passthrough emergency_rpm = int(config.default_rpm_limit * config.emergency_rpm_fraction) if emergency_rpm < 1: diff --git a/server.py b/server.py index a1b9b7d..ec22a4f 100644 --- a/server.py +++ b/server.py @@ -20,7 +20,13 @@ from crypto import init_crypto, is_initialized from pool_manager import PoolManager from rate_limiter import PerBackendRateLimiter from router import Router -from proxy import handle_proxy_request, get_emergency_count +from proxy import ( + handle_proxy_request, + get_emergency_count, + get_primary_wait_enter_count, + get_primary_wait_recovery_count, + get_primary_wait_exhausted_count, +) from storage.db import init_db, create_tables, run_integrity_check, get_connection, _DB_PATH from storage.backend_store import ( @@ -383,6 +389,11 @@ async def metrics() -> Response: # Emergency count (from proxy module) lines.append(f"sidecar_emergency_count {get_emergency_count()}") + # Primary-Wait metrics + lines.append(f"sidecar_primary_wait_enter_total {get_primary_wait_enter_count()}") + lines.append(f"sidecar_primary_wait_recovery_total {get_primary_wait_recovery_count()}") + lines.append(f"sidecar_primary_wait_exhausted_total {get_primary_wait_exhausted_count()}") + # DB sizes from storage.db import get_db_sizes sizes = get_db_sizes()