fix(BIZ-26): 限流范围收窄到 NVIDIA 网关

- 新增网关识别逻辑:只识别 nvidia / nvidiavx18088980513 为限流目标
- volcengine-plan、siliconflow、deepseek 等非 NVIDIA 网关默认不进入令牌桶
- RequestScheduler 增加 gateway/model 参数与 _should_rate_limit 判断
- 未知网关默认不限流,避免误伤其他通道
- 补充网关范围测试与使用文档说明

Co-authored-by: multica-agent <github@multica.ai>
This commit is contained in:
2026-06-23 16:12:02 +08:00
parent 7f1edfb2fd
commit 4b31322be3
3 changed files with 153 additions and 16 deletions
+91 -4
View File
@@ -25,6 +25,57 @@ from enum import IntEnum
from datetime import datetime, timedelta
# ============================================================================
# 网关识别:只对 NVIDIA 网关限流
# ============================================================================
NVIDIA_GATEWAY_ALIASES = {
"nvidia",
"nvidia-gateway",
"nvidia_gateway",
"nvidiavx18088980513",
}
UNLIMITED_GATEWAY_ALIASES = {
"volcengine",
"volcengine-plan",
"siliconflow",
"deepseek",
"deepseek-api",
}
def normalize_gateway_name(value: Optional[str]) -> Optional[str]:
"""
归一化网关/模型名称。
输入可以是:
- provider: nvidia / volcengine-plan / siliconflow / deepseek
- model: nvidiavx18088980513/deepseek-ai/deepseek-v4-pro
- model: volcengine-plan/ark-code-latest
返回 provider 前缀的小写形式。未知则返回 None。
"""
if not value:
return None
text = str(value).strip().lower()
if not text:
return None
return text.split("/", 1)[0]
def is_nvidia_gateway(value: Optional[str]) -> bool:
"""判断请求是否走 NVIDIA 网关。未知网关默认不限流。"""
provider = normalize_gateway_name(value)
if provider is None:
return False
if provider in NVIDIA_GATEWAY_ALIASES:
return True
if provider in UNLIMITED_GATEWAY_ALIASES:
return False
return provider.startswith("nvidia")
# ============================================================================
# 优先级枚举
# ============================================================================
@@ -50,6 +101,8 @@ class Request:
payload: Any = field(compare=False)
callback: Optional[Callable] = field(compare=False, default=None)
fallback_model: Optional[str] = field(compare=False, default=None)
gateway: Optional[str] = field(compare=False, default=None)
model: Optional[str] = field(compare=False, default=None)
def __post_init__(self):
if self.timestamp is None:
@@ -69,7 +122,10 @@ class Request:
class TokenBucket:
"""
令牌桶限流器
NVIDIA 网关专用令牌桶限流器
注意:令牌桶本身只负责节流算法;是否启用由 RequestScheduler._should_rate_limit()
按 gateway/model 判断。volcengine-plan、siliconflow、DeepSeek 等非 NVIDIA 网关不会进入此桶。
参数:
rate: 令牌生成速率(个/秒),默认 40 RPM = 0.67 个/秒
@@ -362,6 +418,28 @@ class RequestScheduler:
# 记录错误但不中断工作线程
print(f"[RequestScheduler] Worker error: {e}")
def _extract_gateway_hint(self, request: Request) -> Optional[str]:
"""从 request.gateway / request.model / payload 中提取网关提示。"""
if request.gateway:
return request.gateway
if request.model:
return request.model
if isinstance(request.payload, dict):
for key in ("gateway", "provider", "model", "model_id"):
value = request.payload.get(key)
if value:
return str(value)
return None
def _should_rate_limit(self, request: Request) -> bool:
"""
只对 NVIDIA 网关请求启用令牌桶。
设计原则:未知网关默认不限制,避免误伤 volcengine-plan / siliconflow / DeepSeek
等其他 API 网关。要被限流,调用方必须显式传 gateway/model,且能识别为 NVIDIA。
"""
return is_nvidia_gateway(self._extract_gateway_hint(request))
def _process_request(self, request: Request) -> None:
"""
处理单个请求
@@ -372,7 +450,12 @@ class RequestScheduler:
"""
self.stats["total_requests"] += 1
# 尝试获取令牌
# 只对 NVIDIA 网关请求启用令牌桶;其他网关直接执行
if not self._should_rate_limit(request):
self._execute_request(request)
return
# NVIDIA 网关请求:尝试获取令牌
if request.priority <= Priority.HIGH:
# 高优先级:无限等待
got_token = self.token_bucket.wait_for_token(timeout=None)
@@ -419,7 +502,9 @@ class RequestScheduler:
priority: Priority = Priority.NORMAL,
callback: Optional[Callable] = None,
fallback_model: Optional[str] = None,
request_id: Optional[str] = None
request_id: Optional[str] = None,
gateway: Optional[str] = None,
model: Optional[str] = None
) -> str:
"""
提交请求到调度队列
@@ -440,7 +525,9 @@ class RequestScheduler:
request_id=request_id,
payload=payload,
callback=callback,
fallback_model=fallback_model
fallback_model=fallback_model,
gateway=gateway,
model=model
)
self.request_queue.put(req)