BIZ-38: CacheManager + CoordinatedPoller + multica_proxy — 共享心跳脚本v1.0

Co-authored-by: multica-agent <github@multica.ai>
2026-06-24 11:23:05 +08:00
parent 6b5f53a0fd
commit 93e8a1011b
3 changed files with 1555 additions and 0 deletions
@@ -0,0 +1,772 @@
+"""
+BIZ-26: API 请求优先级队列 + 令牌桶限流器
+
+实现方案参考：plans/BIZ-13_运行稳定性保障方案.md
+
+功能清单：
+1. 四级优先级请求队列（紧急 > 高 > 正常 > 低）
+2. 令牌桶限流器（40 RPM 上限）
+3. 超限自动降级和等待策略
+4. 请求合并（COO 统一轮询）
+5. 查询结果缓存（WorkBoard 5 分钟、配置 1 小时、知识库 1 天）
+
+作者：徐聪（costcodev）
+日期：2026-06-23
+"""
+
+import time
+import threading
+import queue
+import hashlib
+import json
+from typing import Any, Callable, Dict, List, Optional, Tuple
+from dataclasses import dataclass, field
+from enum import IntEnum
+from datetime import datetime, timedelta
+
+
+# ============================================================================
+# 网关识别：只对 NVIDIA 网关限流
+# ============================================================================
+
+NVIDIA_GATEWAY_ALIASES = {
+    "nvidia",
+    "nvidia-gateway",
+    "nvidia_gateway",
+    "nvidiavx18088980513",
+}
+
+UNLIMITED_GATEWAY_ALIASES = {
+    "volcengine",
+    "volcengine-plan",
+    "siliconflow",
+    "deepseek",
+    "deepseek-api",
+}
+
+
+def normalize_gateway_name(value: Optional[str]) -> Optional[str]:
+    """
+    归一化网关/模型名称。
+
+    输入可以是：
+    - provider: nvidia / volcengine-plan / siliconflow / deepseek
+    - model: nvidiavx18088980513/deepseek-ai/deepseek-v4-pro
+    - model: volcengine-plan/ark-code-latest
+
+    返回 provider 前缀的小写形式。未知则返回 None。
+    """
+    if not value:
+        return None
+    text = str(value).strip().lower()
+    if not text:
+        return None
+    return text.split("/", 1)[0]
+
+
+def is_nvidia_gateway(value: Optional[str]) -> bool:
+    """判断请求是否走 NVIDIA 网关。未知网关默认不限流。"""
+    provider = normalize_gateway_name(value)
+    if provider is None:
+        return False
+    if provider in NVIDIA_GATEWAY_ALIASES:
+        return True
+    if provider in UNLIMITED_GATEWAY_ALIASES:
+        return False
+    return provider.startswith("nvidia")
+
+
+# ============================================================================
+# 优先级枚举
+# ============================================================================
+
+class Priority(IntEnum):
+    """请求优先级：数值越小优先级越高"""
+    URGENT = 1      # 紧急：Vincent 直接任务
+    HIGH = 2        # 高：阻塞性任务
+    NORMAL = 3      # 正常：常规任务
+    LOW = 4         # 低：后台优化任务
+
+
+# ============================================================================
+# 请求数据类
+# ============================================================================
+
+@dataclass(order=True)
+class Request:
+    """优先级队列中的请求项"""
+    priority: int
+    timestamp: float = field(compare=False)
+    request_id: str = field(compare=False)
+    payload: Any = field(compare=False)
+    callback: Optional[Callable] = field(compare=False, default=None)
+    fallback_model: Optional[str] = field(compare=False, default=None)
+    gateway: Optional[str] = field(compare=False, default=None)
+    model: Optional[str] = field(compare=False, default=None)
+    
+    def __post_init__(self):
+        if self.timestamp is None:
+            self.timestamp = time.time()
+        if self.request_id is None:
+            self.request_id = self._generate_id()
+    
+    @staticmethod
+    def _generate_id() -> str:
+        """生成请求 ID"""
+        return hashlib.md5(f"{time.time()}-{threading.current_thread().ident}".encode()).hexdigest()[:12]
+
+
+# ============================================================================
+# 令牌桶限流器
+# ============================================================================
+
+class TokenBucket:
+    """
+    NVIDIA 网关专用令牌桶限流器
+
+    注意：令牌桶本身只负责节流算法；是否启用由 RequestScheduler._should_rate_limit()
+    按 gateway/model 判断。volcengine-plan、siliconflow、DeepSeek 等非 NVIDIA 网关不会进入此桶。
+    
+    参数：
+        rate: 令牌生成速率（个/秒），默认 40 RPM = 0.67 个/秒
+        capacity: 桶容量（最大令牌数），默认 40
+    """
+    
+    def __init__(self, rate: float = 40/60, capacity: int = 40):
+        self.rate = rate  # 令牌/秒
+        self.capacity = capacity
+        self.tokens = capacity
+        self.last_update = time.time()
+        self._lock = threading.Lock()
+    
+    def _refill(self) -> None:
+        """补充令牌（内部调用，需要持有锁）"""
+        now = time.time()
+        elapsed = now - self.last_update
+        new_tokens = elapsed * self.rate
+        self.tokens = min(self.capacity, self.tokens + new_tokens)
+        self.last_update = now
+    
+    def consume(self, tokens: int = 1) -> bool:
+        """
+        尝试消费令牌
+        
+        返回：
+            True: 成功消费
+            False: 令牌不足
+        """
+        with self._lock:
+            self._refill()
+            if self.tokens >= tokens:
+                self.tokens -= tokens
+                return True
+            return False
+    
+    def wait_for_token(self, timeout: Optional[float] = None) -> bool:
+        """
+        等待直到有可用令牌
+        
+        参数：
+            timeout: 最大等待时间（秒），None 表示无限等待
+            
+        返回：
+            True: 成功获取令牌
+            False: 超时
+        """
+        start_time = time.time()
+        while True:
+            if self.consume():
+                return True
+            
+            if timeout is not None:
+                elapsed = time.time() - start_time
+                if elapsed >= timeout:
+                    return False
+            
+            # 计算等待时间（直到下一个令牌生成）
+            with self._lock:
+                self._refill()
+                if self.tokens < 1:
+                    wait_time = (1 - self.tokens) / self.rate
+                else:
+                    wait_time = 0.01
+            
+            # 等待后重试
+            time_to_wait = min(wait_time, 0.1)  # 最多等待 100ms
+            if timeout is not None:
+                remaining = timeout - (time.time() - start_time)
+                if remaining <= 0:
+                    return False
+                time_to_wait = min(time_to_wait, remaining)
+            
+            time.sleep(time_to_wait)
+    
+    def get_status(self) -> Dict[str, Any]:
+        """获取限流器状态"""
+        with self._lock:
+            self._refill()
+            return {
+                "tokens": round(self.tokens, 2),
+                "capacity": self.capacity,
+                "rate_per_second": round(self.rate, 3),
+                "rate_per_minute": round(self.rate * 60, 1),
+                "utilization": round(1 - self.tokens / self.capacity, 2)
+            }
+
+
+# ============================================================================
+# 缓存管理器
+# ============================================================================
+
+@dataclass
+class CacheEntry:
+    """缓存条目"""
+    value: Any
+    expires_at: float
+    created_at: float = field(default_factory=time.time)
+    access_count: int = field(default=0)
+
+
+class CacheManager:
+    """
+    查询结果缓存管理器
+    
+    缓存策略：
+    - WorkBoard 状态：5 分钟
+    - Agent 配置：1 小时
+    - 知识库内容：1 天
+    - 用户信息：1 天
+    """
+    
+    # 默认 TTL 配置（秒）
+    DEFAULT_TTL = {
+        "workboard": 5 * 60,        # 5 分钟
+        "config": 1 * 60 * 60,      # 1 小时
+        "knowledge": 24 * 60 * 60,  # 1 天
+        "user": 24 * 60 * 60,       # 1 天
+    }
+    
+    def __init__(self):
+        self._cache: Dict[str, CacheEntry] = {}
+        self._lock = threading.Lock()
+    
+    def _generate_key(self, category: str, query: Any) -> str:
+        """生成缓存键"""
+        query_str = json.dumps(query, sort_keys=True) if not isinstance(query, str) else query
+        return hashlib.md5(f"{category}:{query_str}".encode()).hexdigest()
+    
+    def get(self, category: str, query: Any) -> Optional[Any]:
+        """
+        获取缓存
+        
+        参数：
+            category: 缓存类别（workboard/config/knowledge/user）
+            query: 查询条件（用于生成缓存键）
+            
+        返回：
+            缓存值，如果不存在或已过期则返回 None
+        """
+        key = self._generate_key(category, query)
+        
+        with self._lock:
+            entry = self._cache.get(key)
+            if entry is None:
+                return None
+            
+            # 检查是否过期
+            if time.time() > entry.expires_at:
+                del self._cache[key]
+                return None
+            
+            # 更新访问计数
+            entry.access_count += 1
+            return entry.value
+    
+    def set(self, category: str, query: Any, value: Any, ttl: Optional[int] = None) -> None:
+        """
+        设置缓存
+        
+        参数：
+            category: 缓存类别
+            query: 查询条件
+            value: 缓存值
+            ttl: 存活时间（秒），None 表示使用默认值
+        """
+        key = self._generate_key(category, query)
+        
+        if ttl is None:
+            ttl = self.DEFAULT_TTL.get(category, 300)  # 默认 5 分钟
+        
+        with self._lock:
+            self._cache[key] = CacheEntry(
+                value=value,
+                expires_at=time.time() + ttl
+            )
+    
+    def delete(self, category: str, query: Any) -> bool:
+        """删除缓存"""
+        key = self._generate_key(category, query)
+        with self._lock:
+            if key in self._cache:
+                del self._cache[key]
+                return True
+            return False
+    
+    def clear_expired(self) -> int:
+        """清理所有过期缓存，返回清理数量"""
+        now = time.time()
+        with self._lock:
+            expired_keys = [k for k, v in self._cache.items() if now > v.expires_at]
+            for key in expired_keys:
+                del self._cache[key]
+            return len(expired_keys)
+    
+    def get_stats(self) -> Dict[str, Any]:
+        """获取缓存统计"""
+        now = time.time()
+        with self._lock:
+            total = len(self._cache)
+            expired = sum(1 for v in self._cache.values() if now > v.expires_at)
+            
+            # 按类别统计
+            by_category: Dict[str, int] = {}
+            for key, entry in self._cache.items():
+                # 从 key 中提取 category（格式：category:hash）
+                category = key.split(":")[0] if ":" in key else "unknown"
+                by_category[category] = by_category.get(category, 0) + 1
+            
+            return {
+                "total_entries": total,
+                "expired_entries": expired,
+                "valid_entries": total - expired,
+                "by_category": by_category
+            }
+    
+    def clear(self) -> None:
+        """清空所有缓存"""
+        with self._lock:
+            self._cache.clear()
+
+
+# ============================================================================
+# 请求调度器
+# ============================================================================
+
+class RequestScheduler:
+    """
+    请求调度器：结合优先级队列和令牌桶限流
+    
+    功能：
+    1. 接收不同优先级的请求
+    2. 按优先级和 FIF0 顺序调度
+    3. 通过令牌桶控制发送速率
+    4. 支持降级策略（低优先级切备用模型）
+    """
+    
+    def __init__(
+        self,
+        rate: float = 40/60,
+        capacity: int = 40,
+        enable_cache: bool = True
+    ):
+        self.token_bucket = TokenBucket(rate=rate, capacity=capacity)
+        self.cache = CacheManager() if enable_cache else None
+        
+        # 优先级队列（使用 heap 实现）
+        self.request_queue: queue.PriorityQueue[Request] = queue.PriorityQueue()
+        
+        # 工作线程
+        self._worker_thread: Optional[threading.Thread] = None
+        self._running = False
+        self._lock = threading.Lock()
+        
+        # 统计信息
+        self.stats = {
+            "total_requests": 0,
+            "completed_requests": 0,
+            "failed_requests": 0,
+            "fallback_requests": 0,
+            "cache_hits": 0,
+            "cache_misses": 0,
+        }
+    
+    def start(self) -> None:
+        """启动调度器工作线程"""
+        if self._running:
+            return
+        
+        self._running = True
+        self._worker_thread = threading.Thread(target=self._worker_loop, daemon=True)
+        self._worker_thread.start()
+    
+    def stop(self) -> None:
+        """停止调度器"""
+        self._running = False
+        if self._worker_thread:
+            self._worker_thread.join(timeout=5.0)
+    
+    def _worker_loop(self) -> None:
+        """工作线程主循环"""
+        while self._running:
+            try:
+                # 从队列获取请求（带超时）
+                request = self.request_queue.get(timeout=1.0)
+                self._process_request(request)
+            except queue.Empty:
+                continue
+            except Exception as e:
+                # 记录错误但不中断工作线程
+                print(f"[RequestScheduler] Worker error: {e}")
+    
+    def _extract_gateway_hint(self, request: Request) -> Optional[str]:
+        """从 request.gateway / request.model / payload 中提取网关提示。"""
+        if request.gateway:
+            return request.gateway
+        if request.model:
+            return request.model
+        if isinstance(request.payload, dict):
+            for key in ("gateway", "provider", "model", "model_id"):
+                value = request.payload.get(key)
+                if value:
+                    return str(value)
+        return None
+
+    def _should_rate_limit(self, request: Request) -> bool:
+        """
+        只对 NVIDIA 网关请求启用令牌桶。
+
+        设计原则：未知网关默认不限制，避免误伤 volcengine-plan / siliconflow / DeepSeek
+        等其他 API 网关。要被限流，调用方必须显式传 gateway/model，且能识别为 NVIDIA。
+        """
+        return is_nvidia_gateway(self._extract_gateway_hint(request))
+
+    def _process_request(self, request: Request) -> None:
+        """
+        处理单个请求
+        
+        策略：
+        1. 高优先级（URGENT/HIGH）：等待令牌
+        2. 低优先级（NORMAL/LOW）：尝试获取令牌，失败则降级或丢弃
+        """
+        self.stats["total_requests"] += 1
+        
+        # 只对 NVIDIA 网关请求启用令牌桶；其他网关直接执行
+        if not self._should_rate_limit(request):
+            self._execute_request(request)
+            return
+
+        # NVIDIA 网关请求：尝试获取令牌
+        if request.priority <= Priority.HIGH:
+            # 高优先级：无限等待
+            got_token = self.token_bucket.wait_for_token(timeout=None)
+        else:
+            # 低优先级：最多等待 2 秒
+            got_token = self.token_bucket.wait_for_token(timeout=2.0)
+        
+        if got_token:
+            # 成功获取令牌，执行请求
+            self._execute_request(request)
+        else:
+            # 未能获取令牌，执行降级策略
+            self._handle_fallback(request)
+    
+    def _execute_request(self, request: Request) -> None:
+        """执行请求"""
+        try:
+            if request.callback:
+                result = request.callback(request.payload)
+                self.stats["completed_requests"] += 1
+                return result
+            else:
+                self.stats["completed_requests"] += 1
+        except Exception as e:
+            self.stats["failed_requests"] += 1
+            print(f"[RequestScheduler] Request {request.request_id} failed: {e}")
+            raise
+    
+    def _handle_fallback(self, request: Request) -> None:
+        """处理降级（令牌不足）"""
+        self.stats["fallback_requests"] += 1
+        
+        if request.priority == Priority.LOW:
+            # 低优先级：直接丢弃或切换到备用模型
+            print(f"[RequestScheduler] Low priority request {request.request_id} dropped due to rate limit")
+        else:
+            # 正常优先级：放回队列稍后重试
+            request.timestamp = time.time()
+            self.request_queue.put(request)
+    
+    def submit(
+        self,
+        payload: Any,
+        priority: Priority = Priority.NORMAL,
+        callback: Optional[Callable] = None,
+        fallback_model: Optional[str] = None,
+        request_id: Optional[str] = None,
+        gateway: Optional[str] = None,
+        model: Optional[str] = None
+    ) -> str:
+        """
+        提交请求到调度队列
+        
+        参数：
+            payload: 请求数据
+            priority: 优先级
+            callback: 回调函数
+            fallback_model: 备用模型名称
+            request_id: 请求 ID（可选，默认自动生成）
+            
+        返回：
+            请求 ID
+        """
+        req = Request(
+            priority=priority,
+            timestamp=time.time(),
+            request_id=request_id,
+            payload=payload,
+            callback=callback,
+            fallback_model=fallback_model,
+            gateway=gateway,
+            model=model
+        )
+        
+        self.request_queue.put(req)
+        return req.request_id
+    
+    def submit_sync(
+        self,
+        payload: Any,
+        priority: Priority = Priority.NORMAL,
+        timeout: Optional[float] = None
+    ) -> Any:
+        """
+        同步提交并等待结果
+        
+        参数：
+            payload: 请求数据
+            priority: 优先级
+            timeout: 超时时间（秒）
+            
+        返回：
+            请求结果
+        """
+        result_holder = {"result": None, "error": None, "done": False}
+        condition = threading.Condition()
+        
+        def callback(data):
+            with condition:
+                try:
+                    # 实际执行逻辑（这里只是一个占位符）
+                    result_holder["result"] = data
+                except Exception as e:
+                    result_holder["error"] = e
+                finally:
+                    result_holder["done"] = True
+                    condition.notify_all()
+        
+        # 提交请求
+        self.submit(payload=payload, priority=priority, callback=lambda _: callback(payload))
+        
+        # 等待结果
+        with condition:
+            if not result_holder["done"]:
+                condition.wait(timeout=timeout)
+        
+        if result_holder["error"]:
+            raise result_holder["error"]
+        return result_holder["result"]
+    
+    def get_queue_size(self) -> int:
+        """获取当前队列大小"""
+        return self.request_queue.qsize()
+    
+    def get_status(self) -> Dict[str, Any]:
+        """获取调度器状态"""
+        return {
+            "running": self._running,
+            "queue_size": self.get_queue_size(),
+            "token_bucket": self.token_bucket.get_status(),
+            "cache": self.cache.get_stats() if self.cache else None,
+            "stats": self.stats.copy()
+        }
+
+
+# ============================================================================
+# 重试装饰器
+# ============================================================================
+
+def retry_with_backoff(
+    max_retries: int = 3,
+    base_delay: float = 1.0,
+    exponential_base: int = 2,
+    jitter: bool = True,
+    exceptions: Tuple = (Exception,)
+):
+    """
+    指数退避重试装饰器
+    
+    参数：
+        max_retries: 最大重试次数
+        base_delay: 基础延迟（秒）
+        exponential_base: 指数底数
+        jitter: 是否添加随机抖动
+        exceptions: 需要重试的异常类型
+    """
+    import random
+    
+    def decorator(func: Callable) -> Callable:
+        def wrapper(*args, **kwargs):
+            last_exception = None
+            
+            for attempt in range(max_retries + 1):
+                try:
+                    return func(*args, **kwargs)
+                except exceptions as e:
+                    last_exception = e
+                    
+                    if attempt == max_retries:
+                        break
+                    
+                    # 计算延迟时间
+                    delay = base_delay * (exponential_base ** attempt)
+                    if jitter:
+                        delay += random.uniform(0, base_delay)
+                    
+                    print(f"[retry_with_backoff] Attempt {attempt + 1} failed: {e}. Retrying in {delay:.2f}s...")
+                    time.sleep(delay)
+            
+            raise last_exception
+        
+        return wrapper
+    return decorator
+
+
+# ============================================================================
+# COO 统一轮询器（请求合并）
+# ============================================================================
+
+class CoordinatedPoller:
+    """
+    COO 统一轮询器：替代各 Agent 独立轮询
+    
+    功能：
+    1. 定期轮询 WorkBoard
+    2. 广播结果给所有订阅者
+    3. 减少总请求数（40 RPM × N → 40 RPM）
+    """
+    
+    def __init__(self, scheduler: RequestScheduler, poll_interval: int = 15*60):
+        self.scheduler = scheduler
+        self.poll_interval = poll_interval  # 轮询间隔（秒）
+        self._subscribers: List[Callable] = []
+        self._running = False
+        self._worker: Optional[threading.Thread] = None
+    
+    def subscribe(self, callback: Callable) -> None:
+        """订阅轮询结果"""
+        self._subscribers.append(callback)
+    
+    def unsubscribe(self, callback: Callable) -> None:
+        """取消订阅"""
+        if callback in self._subscribers:
+            self._subscribers.remove(callback)
+    
+    def start(self) -> None:
+        """启动轮询器"""
+        if self._running:
+            return
+        
+        self._running = True
+        self._worker = threading.Thread(target=self._poll_loop, daemon=True)
+        self._worker.start()
+    
+    def stop(self) -> None:
+        """停止轮询器"""
+        self._running = False
+        if self._worker:
+            self._worker.join(timeout=5.0)
+    
+    def _poll_loop(self) -> None:
+        """轮询主循环"""
+        while self._running:
+            try:
+                # 执行轮询（这里只是一个框架，实际逻辑需要接入 multica CLI）
+                result = self._perform_poll()
+                
+                # 广播给所有订阅者
+                for subscriber in self._subscribers:
+                    try:
+                        subscriber(result)
+                    except Exception as e:
+                        print(f"[CoordinatedPoller] Subscriber callback error: {e}")
+                
+            except Exception as e:
+                print(f"[CoordinatedPoller] Poll error: {e}")
+            
+            # 等待下一个轮询周期
+            time.sleep(self.poll_interval)
+    
+    def _perform_poll(self) -> Dict[str, Any]:
+        """
+        执行实际轮询
+        
+        TODO: 接入 multica CLI:
+        - multica issue list --status in_progress
+        - multica workboard list
+        """
+        # 这里应该调用 multica CLI
+        # 当前只是返回一个示例结果
+        return {
+            "timestamp": datetime.now().isoformat(),
+            "issues": [],
+            "workboard_cards": []
+        }
+
+
+# ============================================================================
+# 使用示例
+# ============================================================================
+
+if __name__ == "__main__":
+    # 创建调度器（40 RPM）
+    scheduler = RequestScheduler(rate=40/60, capacity=40)
+    scheduler.start()
+    
+    # 示例：提交不同优先级的请求
+    def sample_callback(data):
+        print(f"Processing: {data}")
+        time.sleep(0.5)  # 模拟处理时间
+        return "OK"
+    
+    # 紧急请求
+    scheduler.submit(
+        payload={"task": "urgent_task"},
+        priority=Priority.URGENT,
+        callback=sample_callback
+    )
+    
+    # 正常请求
+    scheduler.submit(
+        payload={"task": "normal_task"},
+        priority=Priority.NORMAL,
+        callback=sample_callback
+    )
+    
+    # 低优先级请求
+    scheduler.submit(
+        payload={"task": "low_priority_task"},
+        priority=Priority.LOW,
+        callback=sample_callback
+    )
+    
+    # 等待处理完成
+    time.sleep(2)
+    
+    # 查看状态
+    print("\n=== Scheduler Status ===")
+    print(json.dumps(scheduler.get_status(), indent=2))
+    
+    # 停止调度器
+    scheduler.stop()
+    
+    print("\n示例运行完成")