c50dcc9cb2
四轮评审反馈全部处理: 🔴 Critical (5): - _stats data race: 新增 _stats_lock (asyncio.Lock) + _increment_stat() helper - Admin API 无认证: 新增 SIDECAR_ADMIN_TOKEN Bearer Token 认证 - API Key 明文暴露: GET config 返回 masked api_key (前4位+****) - queue_max_size hot-reload 假生效: PriorityQueue.set_max_size() + 收缩保护 - SIDECAR_TIMEOUT 6000→60s + 上限截断 300s 🟠 Major (3): - upstream_api_key 启动检查: lifespan 阶段 warning 日志 - Dashboard HTML 无缓存: 300s TTL 内存缓存 - queue_stats 异常日志: logger.warning(queue_stats_unavailable) 🟡 Medium (3): - CORS middleware 配置 - httpx 连接池限制 (max_connections=100, keepalive=20) - SSE retry: 3000 字段 🟢 Minor (1): - _extract_model 类型注解 body: dict→Any - passthrough 硬编码 30s→_config.request_timeout mypy strict: 5 files, zero errors Reviewed-by: 梁思筑, 严维序, 陆怀瑾, 沈路明 Co-authored-by: multica-agent <github@multica.ai>
221 lines
6.5 KiB
Python
221 lines
6.5 KiB
Python
"""
|
||
NVIDIA Sidecar 限流代理 — 配置管理模块 (§3.1)
|
||
|
||
集中管理 Sidecar 运行参数,支持环境变量覆盖和 YAML 配置文件。
|
||
"""
|
||
|
||
from __future__ import annotations
|
||
|
||
import os
|
||
import warnings
|
||
from dataclasses import dataclass, field
|
||
from pathlib import Path
|
||
from typing import Any
|
||
|
||
|
||
@dataclass
|
||
class SidecarConfig:
|
||
"""Sidecar 运行配置数据类。
|
||
|
||
所有字段可通过环境变量覆盖,优先级:环境变量 > YAML 配置文件 > 默认值。
|
||
"""
|
||
|
||
# ---- 网络 ----
|
||
listen_host: str = field(
|
||
default="127.0.0.1",
|
||
metadata={"env": "SIDECAR_HOST"},
|
||
)
|
||
listen_port: int = field(
|
||
default=9190,
|
||
metadata={"env": "SIDECAR_PORT"},
|
||
)
|
||
metrics_port: int = field(
|
||
default=9191,
|
||
metadata={"env": "SIDECAR_METRICS_PORT"},
|
||
)
|
||
|
||
# ---- 上游 ----
|
||
upstream_url: str = field(
|
||
default="https://integrate.api.nvidia.com/v1",
|
||
metadata={"env": "SIDECAR_UPSTREAM"},
|
||
)
|
||
upstream_api_key: str = field(
|
||
default="",
|
||
metadata={"env": "SIDECAR_API_KEY"},
|
||
)
|
||
|
||
# ---- 限流 ----
|
||
rate_rpm: int = field(
|
||
default=40,
|
||
metadata={"env": "SIDECAR_RATE_RPM"},
|
||
)
|
||
bucket_capacity: int = field(
|
||
default=40,
|
||
metadata={"env": "SIDECAR_BUCKET_CAPACITY"},
|
||
)
|
||
|
||
# ---- 超时 ----
|
||
request_timeout: float = field(
|
||
default=60.0,
|
||
metadata={"env": "SIDECAR_TIMEOUT"},
|
||
)
|
||
|
||
# ---- 队列 ----
|
||
queue_max_size: int = field(
|
||
default=500,
|
||
metadata={"env": "SIDECAR_QUEUE_MAX"},
|
||
)
|
||
low_priority_timeout: float = field(
|
||
default=2.0,
|
||
metadata={"env": "SIDECAR_LOW_TIMEOUT"},
|
||
)
|
||
|
||
# ---- 降级 ----
|
||
fallback_enabled_passthrough: bool = field(
|
||
default=True,
|
||
metadata={"env": "SIDECAR_FALLBACK_PASSTHROUGH"},
|
||
)
|
||
|
||
# ---- 日志 ----
|
||
log_level: str = field(
|
||
default="INFO",
|
||
metadata={"env": "SIDECAR_LOG_LEVEL"},
|
||
)
|
||
|
||
|
||
def _apply_env_overrides(config: SidecarConfig) -> SidecarConfig:
|
||
"""用环境变量覆盖配置字段。
|
||
|
||
遍历 SidecarConfig 的 dataclass fields,对每个声明了 ``metadata={"env": ...}``
|
||
的字段检查环境变量是否存在,存在则用对应类型转换后覆盖。
|
||
"""
|
||
import dataclasses as _dc
|
||
|
||
# 使用 typing.get_type_hints 解析 from __future__ import annotations
|
||
# 引入的字符串化类型注解 (PEP 563)
|
||
try:
|
||
resolved_types = __import__("typing").get_type_hints(type(config))
|
||
except Exception:
|
||
resolved_types = {}
|
||
|
||
for fld in _dc.fields(config):
|
||
env_key: str | None = fld.metadata.get("env")
|
||
if env_key is None:
|
||
continue
|
||
env_val = os.environ.get(env_key)
|
||
if env_val is None:
|
||
continue
|
||
|
||
target_type = resolved_types.get(fld.name, fld.type)
|
||
target_type_name: str = getattr(target_type, "__name__", str(target_type))
|
||
try:
|
||
if target_type is bool or target_type == "bool":
|
||
parsed: bool = env_val.strip().lower() in ("true", "1", "yes", "on")
|
||
setattr(config, fld.name, parsed)
|
||
elif target_type is int or target_type == "int":
|
||
setattr(config, fld.name, int(env_val))
|
||
elif target_type is float or target_type == "float":
|
||
setattr(config, fld.name, float(env_val))
|
||
else:
|
||
setattr(config, fld.name, env_val)
|
||
except (ValueError, TypeError) as exc:
|
||
warnings.warn(
|
||
f"无法将环境变量 {env_key}={env_val!r} 转换为 {target_type_name}: {exc}"
|
||
)
|
||
|
||
return config
|
||
|
||
|
||
def _validate_config(config: SidecarConfig) -> list[str]:
|
||
"""验证配置合理性,返回警告/问题列表。"""
|
||
issues: list[str] = []
|
||
|
||
# 端口冲突检查
|
||
if config.listen_port == config.metrics_port:
|
||
issues.append(
|
||
f"listen_port ({config.listen_port}) 与 metrics_port ({config.metrics_port}) 相同"
|
||
)
|
||
|
||
# rate_rpm 边界检查
|
||
if config.rate_rpm <= 0:
|
||
issues.append(
|
||
f"rate_rpm ({config.rate_rpm}) 无效,回退到默认值 40"
|
||
)
|
||
config.rate_rpm = 40
|
||
|
||
# queue_max_size 合理性
|
||
if config.queue_max_size <= 0:
|
||
issues.append(
|
||
f"queue_max_size ({config.queue_max_size}) 无效,回退到默认值 500"
|
||
)
|
||
config.queue_max_size = 500
|
||
|
||
# request_timeout 合理性
|
||
if config.request_timeout <= 0:
|
||
issues.append(
|
||
f"request_timeout ({config.request_timeout}) 无效,回退到默认值 60"
|
||
)
|
||
config.request_timeout = 60.0
|
||
elif config.request_timeout > 300.0:
|
||
issues.append(
|
||
f"request_timeout ({config.request_timeout}) 异常偏高,已截断为 300"
|
||
)
|
||
config.request_timeout = 300.0
|
||
|
||
return issues
|
||
|
||
|
||
def load_config(path: str | None = None) -> SidecarConfig:
|
||
"""加载 Sidecar 配置。
|
||
|
||
加载顺序(后者覆盖前者):
|
||
1. 默认值(SidecarConfig dataclass defaults)
|
||
2. YAML 配置文件(如果 path 提供)
|
||
3. 环境变量覆盖
|
||
|
||
Args:
|
||
path: 可选 YAML 配置文件路径。为 None 时只使用默认值 + 环境变量。
|
||
|
||
Returns:
|
||
经过验证的 SidecarConfig 实例。
|
||
|
||
Raises:
|
||
FileNotFoundError: path 指定的文件不存在。
|
||
yaml.YAMLError: YAML 解析失败。
|
||
"""
|
||
config = SidecarConfig()
|
||
|
||
if path is not None:
|
||
import yaml
|
||
|
||
cfg_path = Path(path)
|
||
if not cfg_path.is_file():
|
||
raise FileNotFoundError(f"配置文件不存在: {cfg_path}")
|
||
|
||
try:
|
||
raw: dict[str, Any] = yaml.safe_load(cfg_path.read_text(encoding="utf-8")) or {}
|
||
except yaml.YAMLError as exc:
|
||
raise yaml.YAMLError(f"YAML 解析失败 ({cfg_path}): {exc}") from exc
|
||
|
||
# 覆盖已声明的字段
|
||
for fld_name in (
|
||
"listen_host", "listen_port", "metrics_port",
|
||
"upstream_url", "upstream_api_key",
|
||
"rate_rpm", "bucket_capacity",
|
||
"request_timeout",
|
||
"queue_max_size", "low_priority_timeout",
|
||
"fallback_enabled_passthrough",
|
||
"log_level",
|
||
):
|
||
if fld_name in raw:
|
||
setattr(config, fld_name, raw[fld_name])
|
||
|
||
# 环境变量覆盖(最高优先级)
|
||
config = _apply_env_overrides(config)
|
||
|
||
# 验证
|
||
issues = _validate_config(config)
|
||
for issue in issues:
|
||
warnings.warn(issue)
|
||
|
||
return config |