#!/usr/bin/env python3 """ 配置文件冗余检测脚本 检测 Markdown 文件中的内联冗余内容 用法: python3 detect_redundancy.py """ import sys import os import hashlib def detect_inline_blocks(file_path, threshold=20): """检测超过阈值行的内联内容块""" if not os.path.exists(file_path): print(f"❌ 文件不存在:{file_path}") return [] with open(file_path, 'r', encoding='utf-8') as f: lines = f.readlines() blocks = [] current_block = [] in_code_block = False in_heading = False for i, line in enumerate(lines): stripped = line.strip() # 检测代码块 if stripped.startswith('```'): in_code_block = not in_code_block continue # 跳过代码块内容 if in_code_block: continue # 检测标题行 is_heading = stripped.startswith('#') # 检测空行 is_empty = not stripped # 密度内容:非空、非标题、非代码块的连续行 if stripped and not is_heading and not in_code_block: if not current_block or (not is_empty and current_block[-1][1].strip()): current_block.append((i + 1, line)) else: # 遇到空行,结束当前块 if len(current_block) >= threshold: blocks.append({ 'start_line': current_block[0][0], 'end_line': current_block[-1][0], 'line_count': len(current_block), 'preview': ''.join([l for _, l in current_block[:3]]).strip()[:100] }) current_block = [] else: # 空行或标题行,结束当前块 if len(current_block) >= threshold: blocks.append({ 'start_line': current_block[0][0], 'end_line': current_block[-1][0], 'line_count': len(current_block), 'preview': ''.join([l for _, l in current_block[:3]]).strip()[:100] }) current_block = [] # 处理最后一个块 if len(current_block) >= threshold: blocks.append({ 'start_line': current_block[0][0], 'end_line': current_block[-1][0], 'line_count': len(current_block), 'preview': ''.join([l for _, l in current_block[:3]]).strip()[:100] }) return blocks def estimate_token_savings(lines_count): """估算 Token 节省(粗略:1 行 ≈ 10 tokens)""" return lines_count * 10 def format_report(file_path, blocks): """格式化检测报告""" if not blocks: return f"✅ {file_path}: 未发现冗余内容块\n" report = [f"📋 {file_path}"] total_lines = 0 for i, block in enumerate(blocks, 1): report.append(f"\n ⚠️ 建议 {i}:") report.append(f" 位置:第 {block['start_line']}-{block['end_line']} 行 ({block['line_count']}行)") report.append(f" 预估节省:{estimate_token_savings(block['line_count'])} tokens") report.append(f" 预览:{block['preview']}...") total_lines += block['line_count'] report.append(f"\n 📊 合计可节省:~{estimate_token_savings(total_lines)} tokens\n") return '\n'.join(report) def main(): if len(sys.argv) < 2: print("用法:python3 detect_redundancy.py [file_path...]") print("示例:python3 detect_redundancy.py AGENTS.md SOUL.md") sys.exit(1) print("=" * 60) print("配置文件冗余检测报告") print("=" * 60) print() total_savings = 0 total_blocks = 0 for file_path in sys.argv[1:]: blocks = detect_inline_blocks(file_path) report = format_report(file_path, blocks) print(report) for block in blocks: total_savings += estimate_token_savings(block['line_count']) total_blocks += 1 print("=" * 60) print(f"📊 汇总:发现 {total_blocks} 个冗余块,预估节省 {total_savings} tokens") print("=" * 60) if __name__ == '__main__': main()