4b31322be3
- 新增网关识别逻辑:只识别 nvidia / nvidiavx18088980513 为限流目标 - volcengine-plan、siliconflow、deepseek 等非 NVIDIA 网关默认不进入令牌桶 - RequestScheduler 增加 gateway/model 参数与 _should_rate_limit 判断 - 未知网关默认不限流,避免误伤其他通道 - 补充网关范围测试与使用文档说明 Co-authored-by: multica-agent <github@multica.ai>
332 lines
9.8 KiB
Python
332 lines
9.8 KiB
Python
#!/usr/bin/env python3
|
||
"""
|
||
BIZ-26 限流器测试脚本
|
||
|
||
测试场景:
|
||
1. 令牌桶限流功能
|
||
2. 优先级队列调度
|
||
3. 缓存管理器
|
||
4. 重试机制
|
||
5. 429 错误模拟
|
||
|
||
运行方式:
|
||
python3 scripts/test_rate_limiter.py
|
||
"""
|
||
|
||
import sys
|
||
import time
|
||
import threading
|
||
from datetime import datetime
|
||
|
||
# 添加脚本目录到路径
|
||
sys.path.insert(0, "/home/vincent/.openclaw/workspace/costcodev/EnterpriseArchitect/scripts")
|
||
|
||
from rate_limiter import (
|
||
TokenBucket,
|
||
CacheManager,
|
||
RequestScheduler,
|
||
Priority,
|
||
retry_with_backoff,
|
||
CoordinatedPoller,
|
||
is_nvidia_gateway,
|
||
)
|
||
|
||
|
||
def test_token_bucket():
|
||
"""测试令牌桶限流器"""
|
||
print("=" * 60)
|
||
print("测试 1: 令牌桶限流器")
|
||
print("=" * 60)
|
||
|
||
# 创建限流器:40 RPM = 0.67 令牌/秒
|
||
bucket = TokenBucket(rate=40/60, capacity=40)
|
||
|
||
print(f"\n初始状态:{bucket.get_status()}")
|
||
|
||
# 快速消费 10 个令牌
|
||
print("\n快速消费 10 个令牌...")
|
||
success_count = 0
|
||
for i in range(10):
|
||
if bucket.consume():
|
||
success_count += 1
|
||
|
||
print(f"成功消费:{success_count}/10")
|
||
print(f"消费后状态:{bucket.get_status()}")
|
||
|
||
# 测试等待获取令牌
|
||
print("\n测试等待获取令牌...")
|
||
start = time.time()
|
||
got_token = bucket.wait_for_token(timeout=2.0)
|
||
elapsed = time.time() - start
|
||
|
||
print(f"等待耗时:{elapsed:.3f}s, 获取成功:{got_token}")
|
||
print(f"等待后状态:{bucket.get_status()}")
|
||
|
||
print("\n✅ 令牌桶测试完成\n")
|
||
|
||
|
||
def test_cache_manager():
|
||
"""测试缓存管理器"""
|
||
print("=" * 60)
|
||
print("测试 2: 缓存管理器")
|
||
print("=" * 60)
|
||
|
||
cache = CacheManager()
|
||
|
||
# 测试 WorkBoard 缓存(TTL 5 分钟)
|
||
print("\n1. 设置 WorkBoard 缓存(TTL 5 分钟)")
|
||
cache.set("workboard", {"query": "status=todo"}, [{"id": "card1", "title": "Test"}])
|
||
|
||
# 立即读取
|
||
result = cache.get("workboard", {"query": "status=todo"})
|
||
print(f" 立即读取:{result is not None}")
|
||
|
||
# 测试配置缓存(TTL 1 小时)
|
||
print("\n2. 设置配置缓存(TTL 1 小时)")
|
||
cache.set("config", "agent_list", ["costcodev", "secretary", "coo"])
|
||
result = cache.get("config", "agent_list")
|
||
print(f" 读取配置:{result}")
|
||
|
||
# 测试缓存统计
|
||
print("\n3. 缓存统计")
|
||
stats = cache.get_stats()
|
||
print(f" 总条目数:{stats['total_entries']}")
|
||
print(f" 按类别:{stats['by_category']}")
|
||
|
||
# 测试缓存删除
|
||
print("\n4. 删除缓存")
|
||
deleted = cache.delete("workboard", {"query": "status=todo"})
|
||
print(f" 删除成功:{deleted}")
|
||
result = cache.get("workboard", {"query": "status=todo"})
|
||
print(f" 删除后读取:{result is None}")
|
||
|
||
print("\n✅ 缓存管理器测试完成\n")
|
||
|
||
|
||
def test_priority_queue():
|
||
"""测试优先级队列调度"""
|
||
print("=" * 60)
|
||
print("测试 3: 优先级队列调度(简化版,不启动工作线程)")
|
||
print("=" * 60)
|
||
|
||
scheduler = RequestScheduler(rate=40/60, capacity=40, enable_cache=True)
|
||
|
||
# 模拟请求处理结果
|
||
results = []
|
||
|
||
def record_result(data):
|
||
results.append((time.time(), data))
|
||
return data
|
||
|
||
# 提交不同优先级的请求(不启动工作线程,只测试队列)
|
||
print("\n提交请求(按顺序):")
|
||
scheduler.submit(
|
||
payload={"task": "normal_1"},
|
||
priority=Priority.NORMAL,
|
||
callback=record_result
|
||
)
|
||
print(" 1. 正常优先级:normal_1")
|
||
|
||
scheduler.submit(
|
||
payload={"task": "urgent_1"},
|
||
priority=Priority.URGENT,
|
||
callback=record_result
|
||
)
|
||
print(" 2. 紧急优先级:urgent_1")
|
||
|
||
scheduler.submit(
|
||
payload={"task": "low_1"},
|
||
priority=Priority.LOW,
|
||
callback=record_result
|
||
)
|
||
print(" 3. 低优先级:low_1")
|
||
|
||
scheduler.submit(
|
||
payload={"task": "high_1"},
|
||
priority=Priority.HIGH,
|
||
callback=record_result
|
||
)
|
||
print(" 4. 高优先级:high_1")
|
||
|
||
# 查看队列大小
|
||
print(f"\n队列大小:{scheduler.get_queue_size()}")
|
||
|
||
# 查看状态
|
||
status = scheduler.get_status()
|
||
print(f"初始令牌数:{status['token_bucket']['tokens']}")
|
||
|
||
print("\n✅ 优先级队列测试完成(仅提交,未处理)\n")
|
||
|
||
|
||
def test_retry_decorator():
|
||
"""测试重试装饰器"""
|
||
print("=" * 60)
|
||
print("测试 4: 重试装饰器")
|
||
print("=" * 60)
|
||
|
||
attempt_count = [0]
|
||
|
||
@retry_with_backoff(max_retries=3, base_delay=0.1, jitter=False)
|
||
def flaky_function():
|
||
attempt_count[0] += 1
|
||
if attempt_count[0] < 3:
|
||
raise Exception(f"模拟失败 (尝试 {attempt_count[0]})")
|
||
return f"成功 (尝试 {attempt_count[0]})"
|
||
|
||
print("\n调用易失败函数(前 2 次失败,第 3 次成功)...")
|
||
start = time.time()
|
||
result = flaky_function()
|
||
elapsed = time.time() - start
|
||
|
||
print(f"结果:{result}")
|
||
print(f"总尝试次数:{attempt_count[0]}")
|
||
print(f"总耗时:{elapsed:.3f}s")
|
||
|
||
print("\n✅ 重试装饰器测试完成\n")
|
||
|
||
|
||
def test_coordinated_poller():
|
||
"""测试统一轮询器"""
|
||
print("=" * 60)
|
||
print("测试 5: COO 统一轮询器(简化版,短间隔测试)")
|
||
print("=" * 60)
|
||
|
||
scheduler = RequestScheduler(rate=40/60, capacity=40)
|
||
poller = CoordinatedPoller(scheduler, poll_interval=2) # 2 秒轮询一次(测试用)
|
||
|
||
received_results = []
|
||
|
||
def on_poll_result(result):
|
||
received_results.append((datetime.now().strftime("%H:%M:%S"), result))
|
||
print(f" [{datetime.now().strftime('%H:%M:%S')}] 收到轮询结果")
|
||
|
||
poller.subscribe(on_poll_result)
|
||
|
||
print("\n启动轮询器(轮询间隔 2 秒,运行 5 秒后停止)...")
|
||
poller.start()
|
||
|
||
# 等待 5 秒
|
||
time.sleep(5)
|
||
|
||
poller.stop()
|
||
|
||
print(f"\n收到结果次数:{len(received_results)}")
|
||
for ts, result in received_results:
|
||
print(f" {ts}: {result['timestamp'][:19]}")
|
||
|
||
print("\n✅ 统一轮询器测试完成\n")
|
||
|
||
|
||
def test_rate_limit_stress():
|
||
"""压力测试:快速提交大量请求"""
|
||
print("=" * 60)
|
||
print("测试 6: 压力测试(40 RPM 限制下提交 50 个请求)")
|
||
print("=" * 60)
|
||
|
||
scheduler = RequestScheduler(rate=40/60, capacity=40, enable_cache=True)
|
||
scheduler.start()
|
||
|
||
completed = []
|
||
failed = []
|
||
lock = threading.Lock()
|
||
|
||
def callback(data):
|
||
with lock:
|
||
completed.append(data)
|
||
return data
|
||
|
||
print("\n快速提交 50 个请求...")
|
||
start_time = time.time()
|
||
|
||
for i in range(50):
|
||
priority = Priority.NORMAL if i % 10 != 0 else Priority.URGENT
|
||
scheduler.submit(
|
||
payload={"index": i, "provider": "nvidia"},
|
||
priority=priority,
|
||
callback=callback,
|
||
gateway="nvidia"
|
||
)
|
||
|
||
print("提交完成,等待处理...")
|
||
|
||
# 等待 10 秒
|
||
time.sleep(10)
|
||
|
||
elapsed = time.time() - start_time
|
||
|
||
# 查看统计
|
||
status = scheduler.get_status()
|
||
print(f"\n耗时:{elapsed:.2f}s")
|
||
print(f"队列大小:{status['queue_size']}")
|
||
print(f"已完成:{status['stats']['completed_requests']}")
|
||
print(f"失败:{status['stats']['failed_requests']}")
|
||
print(f"降级:{status['stats']['fallback_requests']}")
|
||
print(f"令牌桶状态:{status['token_bucket']}")
|
||
|
||
scheduler.stop()
|
||
|
||
print("\n✅ 压力测试完成\n")
|
||
|
||
|
||
def test_gateway_scope():
|
||
"""测试限流范围:只对 NVIDIA 网关生效"""
|
||
print("=" * 60)
|
||
print("测试 7: 网关范围识别(只限 NVIDIA)")
|
||
print("=" * 60)
|
||
|
||
assert is_nvidia_gateway("nvidia") is True
|
||
assert is_nvidia_gateway("nvidiavx18088980513/deepseek-ai/deepseek-v4-pro") is True
|
||
assert is_nvidia_gateway("volcengine-plan/ark-code-latest") is False
|
||
assert is_nvidia_gateway("siliconflow/Qwen/Qwen3") is False
|
||
assert is_nvidia_gateway("deepseek/deepseek-chat") is False
|
||
assert is_nvidia_gateway(None) is False
|
||
|
||
scheduler = RequestScheduler(rate=1/60, capacity=1, enable_cache=True)
|
||
# 先耗尽 NVIDIA 桶
|
||
scheduler.submit(payload={"provider": "nvidia", "i": 1}, priority=Priority.NORMAL, callback=lambda x: x, gateway="nvidia")
|
||
# 非 NVIDIA 请求应直接执行,不受桶状态影响
|
||
non_nv = {"provider": "volcengine-plan", "i": 2}
|
||
assert scheduler._should_rate_limit(type("R", (), {"gateway": "volcengine-plan", "model": None, "payload": non_nv})()) is False
|
||
|
||
print("✅ 网关范围识别测试完成:volcengine-plan/siliconflow/DeepSeek 不限流,NVIDIA 限流\n")
|
||
|
||
|
||
def main():
|
||
"""运行所有测试"""
|
||
print("\n")
|
||
print("╔" + "=" * 58 + "╗")
|
||
print("║" + " " * 58 + "║")
|
||
print("║" + " BIZ-26 限流器测试套件".center(58) + "║")
|
||
print("║" + " API 请求优先级队列 + 令牌桶限流".center(58) + "║")
|
||
print("║" + " " * 58 + "║")
|
||
print("╚" + "=" * 58 + "╝")
|
||
print()
|
||
|
||
try:
|
||
test_token_bucket()
|
||
test_cache_manager()
|
||
test_priority_queue()
|
||
test_retry_decorator()
|
||
test_coordinated_poller()
|
||
test_rate_limit_stress()
|
||
test_gateway_scope()
|
||
|
||
print("\n")
|
||
print("╔" + "=" * 58 + "╗")
|
||
print("║" + " " * 58 + "║")
|
||
print("║" + " ✅ 所有测试完成".center(58) + "║")
|
||
print("║" + " " * 58 + "║")
|
||
print("╚" + "=" * 58 + "╝")
|
||
print()
|
||
|
||
except KeyboardInterrupt:
|
||
print("\n\n⚠️ 测试被用户中断\n")
|
||
except Exception as e:
|
||
print(f"\n\n❌ 测试出错:{e}\n")
|
||
import traceback
|
||
traceback.print_exc()
|
||
sys.exit(1)
|
||
|
||
|
||
if __name__ == "__main__":
|
||
main() |