8a12ff9693
Co-authored-by: multica-agent <github@multica.ai>
41 lines
963 B
Python
41 lines
963 B
Python
"""
|
||
NVIDIA Sidecar 限流代理 — 核心代理模块。
|
||
|
||
为 OpenAI Chat Completions 兼容 API 提供四层防护:
|
||
1. 请求接收(FastAPI)
|
||
2. 网关识别 → 非 NVIDIA 直通
|
||
3. 优先级排队 → 令牌桶限流
|
||
4. httpx 异步转发到 NVIDIA 上游
|
||
"""
|
||
|
||
from __future__ import annotations
|
||
|
||
from nvidia_sidecar.config import SidecarConfig, load_config
|
||
from nvidia_sidecar.rate_limiter import (
|
||
Priority,
|
||
TokenBucket,
|
||
is_nvidia_gateway,
|
||
normalize_gateway_name,
|
||
)
|
||
from nvidia_sidecar.priority_queue import (
|
||
PriorityQueueItem,
|
||
PriorityRequestQueue,
|
||
QueueFullError,
|
||
QueueFullPassthrough,
|
||
QueueFullPolicy,
|
||
)
|
||
|
||
__version__ = "0.1.0"
|
||
__all__ = [
|
||
"SidecarConfig",
|
||
"load_config",
|
||
"Priority",
|
||
"TokenBucket",
|
||
"is_nvidia_gateway",
|
||
"normalize_gateway_name",
|
||
"PriorityQueueItem",
|
||
"PriorityRequestQueue",
|
||
"QueueFullError",
|
||
"QueueFullPassthrough",
|
||
"QueueFullPolicy",
|
||
] |