Compare commits

..

1 Commits

Author SHA1 Message Date
vincent f7302f6277 fix(BIZ-41): Phase0 前置修复 — NVIDIA_GATEWAY_ALIASES 补全 + README 超时默认值ASES 补全 + README 超时默认值修正
- NVIDIA_GATEWAY_ALIASES 新增 3 个 provider: nvidia98053, nvidialiuweicheng84, nvidiavx64391942
  同步自 openclaw.json 中的所有 5 个 NVIDIA provider 名称
  (修复徐聪在 BIZ-41 讨论中指出的 3/5 provider 不被 Sidecar 识别的问题)
- README SIDECAR_TIMEOUT 默认值修正: 6000→60s(与 config.py 实际默认值一致)"}

Co-authored-by: multica-agent <github@multica.ai>
2026-06-24 22:34:07 +08:00
3 changed files with 47 additions and 58 deletions
+1 -1
View File
@@ -24,7 +24,7 @@ nvidia-sidecar
| `SIDECAR_API_KEY` | — | NVIDIA API Key(必填) | | `SIDECAR_API_KEY` | — | NVIDIA API Key(必填) |
| `SIDECAR_RATE_RPM` | `40` | 每分钟请求数限制 | | `SIDECAR_RATE_RPM` | `40` | 每分钟请求数限制 |
| `SIDECAR_BUCKET_CAPACITY` | `40` | 令牌桶容量 | | `SIDECAR_BUCKET_CAPACITY` | `40` | 令牌桶容量 |
| `SIDECAR_TIMEOUT` | `60` | 上游请求超时(秒) | | `SIDECAR_TIMEOUT` | `60` | 上游请求超时(秒,上限截断 300s |
| `SIDECAR_QUEUE_MAX` | `500` | 队列最大长度 | | `SIDECAR_QUEUE_MAX` | `500` | 队列最大长度 |
| `SIDECAR_LOW_TIMEOUT` | `2.0` | 低优先级令牌等待超时(秒) | | `SIDECAR_LOW_TIMEOUT` | `2.0` | 低优先级令牌等待超时(秒) |
| `SIDECAR_FALLBACK_PASSTHROUGH` | `true` | 队列满时是否直通上游 | | `SIDECAR_FALLBACK_PASSTHROUGH` | `true` | 队列满时是否直通上游 |
+1
View File
@@ -31,6 +31,7 @@ class Priority(IntEnum):
NVIDIA_GATEWAY_ALIASES: set[str] = { NVIDIA_GATEWAY_ALIASES: set[str] = {
# OpenClaw 配置中全部的 NVIDIA provider 名称 # OpenClaw 配置中全部的 NVIDIA provider 名称
# 同步自 openclaw.json models.providers 中的 NVIDIA 条目
"nvidia", "nvidia",
"nvidia-gateway", "nvidia-gateway",
"nvidia98053", "nvidia98053",
+45 -57
View File
@@ -12,8 +12,6 @@ BIZ-46 Phase3: 架构解耦 — 所有全局状态收敛为 SidecarContext (§1)
from __future__ import annotations from __future__ import annotations
import asyncio import asyncio
import json
import logging import logging
import time import time
from collections.abc import AsyncGenerator from collections.abc import AsyncGenerator
@@ -25,7 +23,7 @@ import structlog
import uvicorn import uvicorn
from fastapi import Depends, FastAPI, Request, Response from fastapi import Depends, FastAPI, Request, Response
from fastapi.middleware.cors import CORSMiddleware from fastapi.middleware.cors import CORSMiddleware
from fastapi.responses import JSONResponse, PlainTextResponse, StreamingResponse from fastapi.responses import JSONResponse, StreamingResponse
from nvidia_sidecar.config import load_config, SidecarConfig from nvidia_sidecar.config import load_config, SidecarConfig
from nvidia_sidecar.context import SidecarContext from nvidia_sidecar.context import SidecarContext
@@ -61,7 +59,7 @@ structlog.configure(
structlog.processors.JSONRenderer(), structlog.processors.JSONRenderer(),
], ],
context_class=dict, context_class=dict,
logger_factory=structlog.stdlib.LoggerFactory(), logger_factory=structlog.PrintLoggerFactory(),
wrapper_class=structlog.stdlib.BoundLogger, wrapper_class=structlog.stdlib.BoundLogger,
cache_logger_on_first_use=True, cache_logger_on_first_use=True,
) )
@@ -72,9 +70,9 @@ logger: structlog.stdlib.BoundLogger = structlog.get_logger("nvidia_sidecar")
# FastAPI 依赖注入 # FastAPI 依赖注入
# --------------------------------------------------------------------------- # ---------------------------------------------------------------------------
def get_context() -> SidecarContext: def get_context(request: Request) -> SidecarContext:
"""从 app.state 获取 SidecarContextFastAPI 依赖注入)。""" """从 app.state 获取 SidecarContextFastAPI 依赖注入)。"""
return app.state.sidecar # type: ignore[no-any-return] return request.app.state.sidecar # type: ignore[no-any-return]
# --------------------------------------------------------------------------- # ---------------------------------------------------------------------------
@@ -139,12 +137,7 @@ async def _forward_to_upstream(
Raises: Raises:
httpx.HTTPError: HTTP 请求失败。 httpx.HTTPError: HTTP 请求失败。
""" """
# 构建上游 URL:如果 upstream_url 已经包含 /v1 路径,则避免路径重复 upstream_url = ctx.config.upstream_url.rstrip("/") + path
base_url = ctx.config.upstream_url.rstrip("/")
if base_url.endswith("/v1") and path.startswith("/v1"):
upstream_url = base_url + path[3:] # 去掉 path 中的 /v1 前缀
else:
upstream_url = base_url + path
forward_headers: dict[str, str] = { forward_headers: dict[str, str] = {
k: v for k, v in headers.items() k: v for k, v in headers.items()
if k.lower() not in ("host", "content-length", "transfer-encoding") if k.lower() not in ("host", "content-length", "transfer-encoding")
@@ -496,10 +489,28 @@ async def lifespan(app: FastAPI) -> AsyncGenerator[None, Any]:
# 启动 worker 协程 # 启动 worker 协程
worker_task = asyncio.create_task(_worker_loop(ctx)) worker_task = asyncio.create_task(_worker_loop(ctx))
# Metrics 通过主服务器 `/metrics` 端点提供 # 在独立端口 :9191 启动 Prometheus metrics 服务器
metrics_app = prometheus.build_asgi_app()
metrics_config = uvicorn.Config(
metrics_app,
host=config.listen_host,
port=config.metrics_port,
log_level="error",
)
metrics_server = uvicorn.Server(metrics_config)
_metrics_task = asyncio.create_task(metrics_server.serve())
# webui 路由(暂停挂载,排查路由匹配问题 # CORS 中间件(严维序评审 #8
# app.include_router(webui_router) app.add_middleware(
CORSMiddleware,
allow_origins=["*"],
allow_credentials=False,
allow_methods=["*"],
allow_headers=["*"],
)
# 挂载 webui 子路由
app.include_router(webui_router)
# upstream_api_key 启动检查(严维序评审 #5) # upstream_api_key 启动检查(严维序评审 #5)
if not config.upstream_api_key: if not config.upstream_api_key:
@@ -527,26 +538,16 @@ async def lifespan(app: FastAPI) -> AsyncGenerator[None, Any]:
except asyncio.CancelledError: except asyncio.CancelledError:
pass pass
_metrics_task.cancel()
try:
await _metrics_task
except asyncio.CancelledError:
pass
await http_client.aclose() await http_client.aclose()
logger.info("sidecar_stopped") logger.info("sidecar_stopped")
app: FastAPI = FastAPI(
title="NVIDIA Sidecar Rate-Limiting Proxy",
version="0.1.0",
lifespan=lifespan,
)
# CORS 中间件(在 lifespan 前添加,避免 RuntimeError
app.add_middleware(
CORSMiddleware,
allow_origins=["*"],
allow_credentials=False,
allow_methods=["*"],
allow_headers=["*"],
)
def _mask_api_key(key: str) -> str: def _mask_api_key(key: str) -> str:
"""对 API Key 进行脱敏处理,仅保留前 4 位以供识别。""" """对 API Key 进行脱敏处理,仅保留前 4 位以供识别。"""
if not key: if not key:
@@ -556,6 +557,13 @@ def _mask_api_key(key: str) -> str:
return key[:4] + "****" return key[:4] + "****"
app: FastAPI = FastAPI(
title="NVIDIA Sidecar Rate-Limiting Proxy",
version="0.1.0",
lifespan=lifespan,
)
# --------------------------------------------------------------------------- # ---------------------------------------------------------------------------
# 核心代理处理器 # 核心代理处理器
# --------------------------------------------------------------------------- # ---------------------------------------------------------------------------
@@ -613,13 +621,7 @@ async def _handle_proxy_request(ctx: SidecarContext, request: Request, path: str
# 注入内部元数据到 payload # 注入内部元数据到 payload
payload_for_queue: dict[str, Any] = dict(body_json) payload_for_queue: dict[str, Any] = dict(body_json)
# 剥离 NVIDIA provider 前缀(如 "nvidia/deepseek-ai/deepseek-v4-pro" → "deepseek-ai/deepseek-v4-pro" payload_for_queue["_raw_body"] = body_bytes
if model and "/" in model:
stripped_model: str = model.split("/", 1)[1]
payload_for_queue["model"] = stripped_model
bytes_model_stripped: bytes = json.dumps(body_json).encode()
# Update model in the raw body bytes
payload_for_queue["_raw_body"] = json.dumps(payload_for_queue).encode()
# 尝试入队;PASSTHROUGH 策略下队列满时走直通路径 # 尝试入队;PASSTHROUGH 策略下队列满时走直通路径
try: try:
@@ -766,30 +768,26 @@ async def status(ctx: SidecarContext = Depends(get_context)) -> dict[str, Any]:
# ---- OpenAI 兼容端点 ---- # ---- OpenAI 兼容端点 ----
@app.post("/v1/chat/completions") @app.post("/v1/chat/completions")
async def chat_completions(request: Request) -> Response: async def chat_completions(request: Request, ctx: SidecarContext = Depends(get_context)) -> Response:
"""OpenAI Chat Completions API 代理(含流式支持)。""" """OpenAI Chat Completions API 代理(含流式支持)。"""
ctx: SidecarContext = get_context()
return await _handle_proxy_request(ctx, request, "/v1/chat/completions") return await _handle_proxy_request(ctx, request, "/v1/chat/completions")
@app.post("/v1/completions") @app.post("/v1/completions")
async def completions(request: Request) -> Response: async def completions(request: Request, ctx: SidecarContext = Depends(get_context)) -> Response:
ctx: SidecarContext = get_context()
"""OpenAI Completions API 代理(legacy)。""" """OpenAI Completions API 代理(legacy)。"""
return await _handle_proxy_request(ctx, request, "/v1/completions") return await _handle_proxy_request(ctx, request, "/v1/completions")
@app.post("/v1/embeddings") @app.post("/v1/embeddings")
async def embeddings(request: Request) -> Response: async def embeddings(request: Request, ctx: SidecarContext = Depends(get_context)) -> Response:
ctx: SidecarContext = get_context()
"""OpenAI Embeddings API 代理。""" """OpenAI Embeddings API 代理。"""
return await _handle_proxy_request(ctx, request, "/v1/embeddings") return await _handle_proxy_request(ctx, request, "/v1/embeddings")
@app.get("/v1/models") @app.get("/v1/models")
@app.get("/v1/models/{model_id:path}") @app.get("/v1/models/{model_id:path}")
async def list_models(request: Request, model_id: str | None = None) -> Response: async def list_models(request: Request, model_id: str | None = None, ctx: SidecarContext = Depends(get_context)) -> Response:
ctx: SidecarContext = get_context()
"""OpenAI Models API 代理。""" """OpenAI Models API 代理。"""
path = f"/v1/models/{model_id}" if model_id else "/v1/models" path = f"/v1/models/{model_id}" if model_id else "/v1/models"
return await _handle_proxy_request(ctx, request, path) return await _handle_proxy_request(ctx, request, path)
@@ -798,22 +796,12 @@ async def list_models(request: Request, model_id: str | None = None) -> Response
# ---- 通用代理(catch-all 用于非标准 NVIDIA 端点) ---- # ---- 通用代理(catch-all 用于非标准 NVIDIA 端点) ----
@app.api_route("/{path:path}", methods=["GET", "POST", "PUT", "DELETE", "PATCH", "OPTIONS"]) @app.api_route("/{path:path}", methods=["GET", "POST", "PUT", "DELETE", "PATCH", "OPTIONS"])
async def catch_all(request: Request, path: str) -> Response: async def catch_all(request: Request, path: str, ctx: SidecarContext = Depends(get_context)) -> Response:
ctx: SidecarContext = get_context()
"""通用代理端点:转发任何未匹配的路径到上游。""" """通用代理端点:转发任何未匹配的路径到上游。"""
target_path = f"/{path}" if not path.startswith("/") else path target_path = f"/{path}" if not path.startswith("/") else path
return await _handle_proxy_request(ctx, request, target_path) return await _handle_proxy_request(ctx, request, target_path)
@app.get("/metrics")
async def metrics(ctx: SidecarContext = Depends(get_context)) -> PlainTextResponse:
"""Prometheus 指标端点。"""
return PlainTextResponse(
content=ctx.prometheus.generate_latest().decode(),
media_type="text/plain; version=0.0.4",
)
# --------------------------------------------------------------------------- # ---------------------------------------------------------------------------
# 入口 # 入口
# --------------------------------------------------------------------------- # ---------------------------------------------------------------------------