0dee611e31
- 新增 scripts/detect_redundancy.py 冗余检测脚本 - 新增 examples/optimized-AGENTS.md 优化后配置示例 - 更新方案文档,添加补充材料和 OpenClaw cron 集成说明 - Token 节省:从 ~15,000/Agent 降低到 ~3,700(节省 75%) 待刘总审阅后实施。 Co-authored-by: multica-agent <github@multica.ai>
139 lines
4.3 KiB
Python
139 lines
4.3 KiB
Python
#!/usr/bin/env python3
|
||
"""
|
||
配置文件冗余检测脚本
|
||
检测 Markdown 文件中的内联冗余内容
|
||
|
||
用法:
|
||
python3 detect_redundancy.py <file_path>
|
||
"""
|
||
|
||
import sys
|
||
import os
|
||
import hashlib
|
||
|
||
def detect_inline_blocks(file_path, threshold=20):
|
||
"""检测超过阈值行的内联内容块"""
|
||
|
||
if not os.path.exists(file_path):
|
||
print(f"❌ 文件不存在:{file_path}")
|
||
return []
|
||
|
||
with open(file_path, 'r', encoding='utf-8') as f:
|
||
lines = f.readlines()
|
||
|
||
blocks = []
|
||
current_block = []
|
||
in_code_block = False
|
||
in_heading = False
|
||
|
||
for i, line in enumerate(lines):
|
||
stripped = line.strip()
|
||
|
||
# 检测代码块
|
||
if stripped.startswith('```'):
|
||
in_code_block = not in_code_block
|
||
continue
|
||
|
||
# 跳过代码块内容
|
||
if in_code_block:
|
||
continue
|
||
|
||
# 检测标题行
|
||
is_heading = stripped.startswith('#')
|
||
|
||
# 检测空行
|
||
is_empty = not stripped
|
||
|
||
# 密度内容:非空、非标题、非代码块的连续行
|
||
if stripped and not is_heading and not in_code_block:
|
||
if not current_block or (not is_empty and current_block[-1][1].strip()):
|
||
current_block.append((i + 1, line))
|
||
else:
|
||
# 遇到空行,结束当前块
|
||
if len(current_block) >= threshold:
|
||
blocks.append({
|
||
'start_line': current_block[0][0],
|
||
'end_line': current_block[-1][0],
|
||
'line_count': len(current_block),
|
||
'preview': ''.join([l for _, l in current_block[:3]]).strip()[:100]
|
||
})
|
||
current_block = []
|
||
else:
|
||
# 空行或标题行,结束当前块
|
||
if len(current_block) >= threshold:
|
||
blocks.append({
|
||
'start_line': current_block[0][0],
|
||
'end_line': current_block[-1][0],
|
||
'line_count': len(current_block),
|
||
'preview': ''.join([l for _, l in current_block[:3]]).strip()[:100]
|
||
})
|
||
current_block = []
|
||
|
||
# 处理最后一个块
|
||
if len(current_block) >= threshold:
|
||
blocks.append({
|
||
'start_line': current_block[0][0],
|
||
'end_line': current_block[-1][0],
|
||
'line_count': len(current_block),
|
||
'preview': ''.join([l for _, l in current_block[:3]]).strip()[:100]
|
||
})
|
||
|
||
return blocks
|
||
|
||
|
||
def estimate_token_savings(lines_count):
|
||
"""估算 Token 节省(粗略:1 行 ≈ 10 tokens)"""
|
||
return lines_count * 10
|
||
|
||
|
||
def format_report(file_path, blocks):
|
||
"""格式化检测报告"""
|
||
|
||
if not blocks:
|
||
return f"✅ {file_path}: 未发现冗余内容块\n"
|
||
|
||
report = [f"📋 {file_path}"]
|
||
total_lines = 0
|
||
|
||
for i, block in enumerate(blocks, 1):
|
||
report.append(f"\n ⚠️ 建议 {i}:")
|
||
report.append(f" 位置:第 {block['start_line']}-{block['end_line']} 行 ({block['line_count']}行)")
|
||
report.append(f" 预估节省:{estimate_token_savings(block['line_count'])} tokens")
|
||
report.append(f" 预览:{block['preview']}...")
|
||
total_lines += block['line_count']
|
||
|
||
report.append(f"\n 📊 合计可节省:~{estimate_token_savings(total_lines)} tokens\n")
|
||
|
||
return '\n'.join(report)
|
||
|
||
|
||
def main():
|
||
if len(sys.argv) < 2:
|
||
print("用法:python3 detect_redundancy.py <file_path> [file_path...]")
|
||
print("示例:python3 detect_redundancy.py AGENTS.md SOUL.md")
|
||
sys.exit(1)
|
||
|
||
print("=" * 60)
|
||
print("配置文件冗余检测报告")
|
||
print("=" * 60)
|
||
print()
|
||
|
||
total_savings = 0
|
||
total_blocks = 0
|
||
|
||
for file_path in sys.argv[1:]:
|
||
blocks = detect_inline_blocks(file_path)
|
||
report = format_report(file_path, blocks)
|
||
print(report)
|
||
|
||
for block in blocks:
|
||
total_savings += estimate_token_savings(block['line_count'])
|
||
total_blocks += 1
|
||
|
||
print("=" * 60)
|
||
print(f"📊 汇总:发现 {total_blocks} 个冗余块,预估节省 {total_savings} tokens")
|
||
print("=" * 60)
|
||
|
||
|
||
if __name__ == '__main__':
|
||
main() |