Files
EnterpriseArchitect/scripts/detect_redundancy.py
T
vincent 0dee611e31 BIZ-15: 添加检测脚本和优化示例,完善 OpenClaw cron 集成
- 新增 scripts/detect_redundancy.py 冗余检测脚本
- 新增 examples/optimized-AGENTS.md 优化后配置示例
- 更新方案文档,添加补充材料和 OpenClaw cron 集成说明
- Token 节省:从 ~15,000/Agent 降低到 ~3,700(节省 75%)

待刘总审阅后实施。

Co-authored-by: multica-agent <github@multica.ai>
2026-06-22 05:01:25 +08:00

139 lines
4.3 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
#!/usr/bin/env python3
"""
配置文件冗余检测脚本
检测 Markdown 文件中的内联冗余内容
用法:
python3 detect_redundancy.py <file_path>
"""
import sys
import os
import hashlib
def detect_inline_blocks(file_path, threshold=20):
"""检测超过阈值行的内联内容块"""
if not os.path.exists(file_path):
print(f"❌ 文件不存在:{file_path}")
return []
with open(file_path, 'r', encoding='utf-8') as f:
lines = f.readlines()
blocks = []
current_block = []
in_code_block = False
in_heading = False
for i, line in enumerate(lines):
stripped = line.strip()
# 检测代码块
if stripped.startswith('```'):
in_code_block = not in_code_block
continue
# 跳过代码块内容
if in_code_block:
continue
# 检测标题行
is_heading = stripped.startswith('#')
# 检测空行
is_empty = not stripped
# 密度内容:非空、非标题、非代码块的连续行
if stripped and not is_heading and not in_code_block:
if not current_block or (not is_empty and current_block[-1][1].strip()):
current_block.append((i + 1, line))
else:
# 遇到空行,结束当前块
if len(current_block) >= threshold:
blocks.append({
'start_line': current_block[0][0],
'end_line': current_block[-1][0],
'line_count': len(current_block),
'preview': ''.join([l for _, l in current_block[:3]]).strip()[:100]
})
current_block = []
else:
# 空行或标题行,结束当前块
if len(current_block) >= threshold:
blocks.append({
'start_line': current_block[0][0],
'end_line': current_block[-1][0],
'line_count': len(current_block),
'preview': ''.join([l for _, l in current_block[:3]]).strip()[:100]
})
current_block = []
# 处理最后一个块
if len(current_block) >= threshold:
blocks.append({
'start_line': current_block[0][0],
'end_line': current_block[-1][0],
'line_count': len(current_block),
'preview': ''.join([l for _, l in current_block[:3]]).strip()[:100]
})
return blocks
def estimate_token_savings(lines_count):
"""估算 Token 节省(粗略:1 行 ≈ 10 tokens"""
return lines_count * 10
def format_report(file_path, blocks):
"""格式化检测报告"""
if not blocks:
return f"✅ {file_path}: 未发现冗余内容块\n"
report = [f"📋 {file_path}"]
total_lines = 0
for i, block in enumerate(blocks, 1):
report.append(f"\n ⚠️ 建议 {i}:")
report.append(f" 位置:第 {block['start_line']}-{block['end_line']} 行 ({block['line_count']}行)")
report.append(f" 预估节省:{estimate_token_savings(block['line_count'])} tokens")
report.append(f" 预览:{block['preview']}...")
total_lines += block['line_count']
report.append(f"\n 📊 合计可节省:~{estimate_token_savings(total_lines)} tokens\n")
return '\n'.join(report)
def main():
if len(sys.argv) < 2:
print("用法:python3 detect_redundancy.py <file_path> [file_path...]")
print("示例:python3 detect_redundancy.py AGENTS.md SOUL.md")
sys.exit(1)
print("=" * 60)
print("配置文件冗余检测报告")
print("=" * 60)
print()
total_savings = 0
total_blocks = 0
for file_path in sys.argv[1:]:
blocks = detect_inline_blocks(file_path)
report = format_report(file_path, blocks)
print(report)
for block in blocks:
total_savings += estimate_token_savings(block['line_count'])
total_blocks += 1
print("=" * 60)
print(f"📊 汇总:发现 {total_blocks} 个冗余块,预估节省 {total_savings} tokens")
print("=" * 60)
if __name__ == '__main__':
main()