fix(BIZ-26): 限流范围收窄到 NVIDIA 网关

- 新增网关识别逻辑:只识别 nvidia / nvidiavx18088980513 为限流目标
- volcengine-plan、siliconflow、deepseek 等非 NVIDIA 网关默认不进入令牌桶
- RequestScheduler 增加 gateway/model 参数与 _should_rate_limit 判断
- 未知网关默认不限流,避免误伤其他通道
- 补充网关范围测试与使用文档说明

Co-authored-by: multica-agent <github@multica.ai>
This commit is contained in:
2026-06-23 16:12:02 +08:00
parent 7f1edfb2fd
commit 4b31322be3
3 changed files with 153 additions and 16 deletions
+28 -2
View File
@@ -28,6 +28,7 @@ from rate_limiter import (
Priority,
retry_with_backoff,
CoordinatedPoller,
is_nvidia_gateway,
)
@@ -240,9 +241,10 @@ def test_rate_limit_stress():
for i in range(50):
priority = Priority.NORMAL if i % 10 != 0 else Priority.URGENT
scheduler.submit(
payload={"index": i},
payload={"index": i, "provider": "nvidia"},
priority=priority,
callback=callback
callback=callback,
gateway="nvidia"
)
print("提交完成,等待处理...")
@@ -266,6 +268,29 @@ def test_rate_limit_stress():
print("\n✅ 压力测试完成\n")
def test_gateway_scope():
"""测试限流范围:只对 NVIDIA 网关生效"""
print("=" * 60)
print("测试 7: 网关范围识别(只限 NVIDIA)")
print("=" * 60)
assert is_nvidia_gateway("nvidia") is True
assert is_nvidia_gateway("nvidiavx18088980513/deepseek-ai/deepseek-v4-pro") is True
assert is_nvidia_gateway("volcengine-plan/ark-code-latest") is False
assert is_nvidia_gateway("siliconflow/Qwen/Qwen3") is False
assert is_nvidia_gateway("deepseek/deepseek-chat") is False
assert is_nvidia_gateway(None) is False
scheduler = RequestScheduler(rate=1/60, capacity=1, enable_cache=True)
# 先耗尽 NVIDIA 桶
scheduler.submit(payload={"provider": "nvidia", "i": 1}, priority=Priority.NORMAL, callback=lambda x: x, gateway="nvidia")
# 非 NVIDIA 请求应直接执行,不受桶状态影响
non_nv = {"provider": "volcengine-plan", "i": 2}
assert scheduler._should_rate_limit(type("R", (), {"gateway": "volcengine-plan", "model": None, "payload": non_nv})()) is False
print("✅ 网关范围识别测试完成:volcengine-plan/siliconflow/DeepSeek 不限流,NVIDIA 限流\n")
def main():
"""运行所有测试"""
print("\n")
@@ -284,6 +309,7 @@ def main():
test_retry_decorator()
test_coordinated_poller()
test_rate_limit_stress()
test_gateway_scope()
print("\n")
print("" + "=" * 58 + "")