feat: add AI-powered directory analysis via Claude API
Adds --ai flag that sends the directory tree, file categories, and sampled file contents to Claude for analysis. Produces a brief summary at the top of the report and a detailed breakdown at the end. Requires ANTHROPIC_API_KEY env var; degrades gracefully without it. Uses only stdlib (urllib) to keep the zero-dependency constraint. Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
parent
d6f36ecea5
commit
bcf7d12b4a
3 changed files with 209 additions and 0 deletions
|
|
@ -13,6 +13,7 @@ from luminos_lib.recency import find_recent_files
|
||||||
from luminos_lib.disk import get_disk_usage, top_directories
|
from luminos_lib.disk import get_disk_usage, top_directories
|
||||||
from luminos_lib.watch import watch_loop
|
from luminos_lib.watch import watch_loop
|
||||||
from luminos_lib.report import format_report
|
from luminos_lib.report import format_report
|
||||||
|
from luminos_lib.ai import analyze_directory
|
||||||
|
|
||||||
|
|
||||||
def scan(target, depth=3, show_hidden=False):
|
def scan(target, depth=3, show_hidden=False):
|
||||||
|
|
@ -56,6 +57,9 @@ def main():
|
||||||
help="Output report as JSON")
|
help="Output report as JSON")
|
||||||
parser.add_argument("-o", "--output", metavar="FILE",
|
parser.add_argument("-o", "--output", metavar="FILE",
|
||||||
help="Write report to a file")
|
help="Write report to a file")
|
||||||
|
parser.add_argument("--ai", action="store_true",
|
||||||
|
help="Use Claude AI to analyze directory purpose "
|
||||||
|
"(requires ANTHROPIC_API_KEY)")
|
||||||
parser.add_argument("--watch", action="store_true",
|
parser.add_argument("--watch", action="store_true",
|
||||||
help="Re-scan every 30 seconds and show diffs")
|
help="Re-scan every 30 seconds and show diffs")
|
||||||
|
|
||||||
|
|
@ -74,6 +78,11 @@ def main():
|
||||||
|
|
||||||
report = scan(target, depth=args.depth, show_hidden=args.all)
|
report = scan(target, depth=args.depth, show_hidden=args.all)
|
||||||
|
|
||||||
|
if args.ai:
|
||||||
|
brief, detailed = analyze_directory(report, target)
|
||||||
|
report["ai_brief"] = brief
|
||||||
|
report["ai_detailed"] = detailed
|
||||||
|
|
||||||
if args.json_output:
|
if args.json_output:
|
||||||
output = json.dumps(report, indent=2, default=str)
|
output = json.dumps(report, indent=2, default=str)
|
||||||
else:
|
else:
|
||||||
|
|
|
||||||
182
luminos_lib/ai.py
Normal file
182
luminos_lib/ai.py
Normal file
|
|
@ -0,0 +1,182 @@
|
||||||
|
"""AI-powered directory analysis using the Claude API (stdlib only)."""
|
||||||
|
|
||||||
|
import json
|
||||||
|
import os
|
||||||
|
import sys
|
||||||
|
import urllib.request
|
||||||
|
import urllib.error
|
||||||
|
|
||||||
|
API_URL = "https://api.anthropic.com/v1/messages"
|
||||||
|
MODEL = "claude-sonnet-4-20250514"
|
||||||
|
MAX_FILE_SAMPLE_BYTES = 2048
|
||||||
|
MAX_FILES_TO_SAMPLE = 30
|
||||||
|
|
||||||
|
|
||||||
|
def _get_api_key():
|
||||||
|
"""Read the Anthropic API key from the environment."""
|
||||||
|
key = os.environ.get("ANTHROPIC_API_KEY", "")
|
||||||
|
if not key:
|
||||||
|
print("Warning: ANTHROPIC_API_KEY not set. Skipping AI analysis.",
|
||||||
|
file=sys.stderr)
|
||||||
|
return key
|
||||||
|
|
||||||
|
|
||||||
|
def _sample_file(path, max_bytes=MAX_FILE_SAMPLE_BYTES):
|
||||||
|
"""Read the first max_bytes of a text file. Returns None for binary."""
|
||||||
|
try:
|
||||||
|
with open(path, "r", errors="replace") as f:
|
||||||
|
return f.read(max_bytes)
|
||||||
|
except (OSError, UnicodeDecodeError):
|
||||||
|
return None
|
||||||
|
|
||||||
|
|
||||||
|
def _build_context(report, target):
|
||||||
|
"""Build a textual context from the scan report for the AI prompt."""
|
||||||
|
parts = []
|
||||||
|
|
||||||
|
parts.append(f"Directory: {target}")
|
||||||
|
parts.append("")
|
||||||
|
|
||||||
|
# Tree structure
|
||||||
|
tree_text = report.get("tree_rendered", "")
|
||||||
|
if tree_text:
|
||||||
|
parts.append("=== Directory tree ===")
|
||||||
|
parts.append(tree_text)
|
||||||
|
parts.append("")
|
||||||
|
|
||||||
|
# File categories
|
||||||
|
cats = report.get("file_categories", {})
|
||||||
|
if cats:
|
||||||
|
parts.append("=== File categories ===")
|
||||||
|
for cat, count in sorted(cats.items(), key=lambda x: -x[1]):
|
||||||
|
parts.append(f" {cat}: {count}")
|
||||||
|
parts.append("")
|
||||||
|
|
||||||
|
# Languages
|
||||||
|
langs = report.get("languages", [])
|
||||||
|
loc = report.get("lines_of_code", {})
|
||||||
|
if langs:
|
||||||
|
parts.append("=== Languages detected ===")
|
||||||
|
for lang in sorted(loc, key=loc.get, reverse=True):
|
||||||
|
parts.append(f" {lang}: {loc[lang]} lines")
|
||||||
|
parts.append("")
|
||||||
|
|
||||||
|
# Sample file contents
|
||||||
|
classified = report.get("classified_files", [])
|
||||||
|
# Prioritize source and config files for sampling
|
||||||
|
priority = {"source": 0, "config": 1, "document": 2, "data": 3}
|
||||||
|
samplable = sorted(classified,
|
||||||
|
key=lambda f: priority.get(f["category"], 99))
|
||||||
|
sampled = 0
|
||||||
|
samples = []
|
||||||
|
for f in samplable:
|
||||||
|
if sampled >= MAX_FILES_TO_SAMPLE:
|
||||||
|
break
|
||||||
|
content = _sample_file(f["path"])
|
||||||
|
if content and content.strip():
|
||||||
|
rel = os.path.relpath(f["path"], target)
|
||||||
|
samples.append(f"--- {rel} ---\n{content}")
|
||||||
|
sampled += 1
|
||||||
|
|
||||||
|
if samples:
|
||||||
|
parts.append("=== File samples (first ~2KB each) ===")
|
||||||
|
parts.append("\n\n".join(samples))
|
||||||
|
|
||||||
|
return "\n".join(parts)
|
||||||
|
|
||||||
|
|
||||||
|
def _call_claude(api_key, context):
|
||||||
|
"""Call the Claude API and return the response text."""
|
||||||
|
prompt = (
|
||||||
|
"You are analyzing a directory on a file system. Based on the tree "
|
||||||
|
"structure, file types, languages, and file content samples below, "
|
||||||
|
"produce two sections:\n\n"
|
||||||
|
"1. **BRIEF SUMMARY** (2-4 sentences): What is this directory? What is "
|
||||||
|
"its purpose? What kind of project or data does it contain?\n\n"
|
||||||
|
"2. **DETAILED BREAKDOWN**: A thorough analysis covering:\n"
|
||||||
|
" - The overall purpose and architecture of the project/directory\n"
|
||||||
|
" - Key components and what they do\n"
|
||||||
|
" - Technologies and frameworks in use\n"
|
||||||
|
" - Notable patterns, conventions, or design decisions\n"
|
||||||
|
" - Any potential concerns (e.g., missing tests, large binaries, "
|
||||||
|
"stale files)\n\n"
|
||||||
|
"Format your response exactly as:\n"
|
||||||
|
"BRIEF: <your brief summary>\n\n"
|
||||||
|
"DETAILED:\n<your detailed breakdown>\n\n"
|
||||||
|
"Be specific and concrete — reference actual filenames and directories. "
|
||||||
|
"Do not hedge or use filler phrases."
|
||||||
|
)
|
||||||
|
|
||||||
|
body = json.dumps({
|
||||||
|
"model": MODEL,
|
||||||
|
"max_tokens": 2048,
|
||||||
|
"messages": [
|
||||||
|
{"role": "user", "content": f"{prompt}\n\n{context}"},
|
||||||
|
],
|
||||||
|
}).encode("utf-8")
|
||||||
|
|
||||||
|
req = urllib.request.Request(
|
||||||
|
API_URL,
|
||||||
|
data=body,
|
||||||
|
headers={
|
||||||
|
"Content-Type": "application/json",
|
||||||
|
"x-api-key": api_key,
|
||||||
|
"anthropic-version": "2023-06-01",
|
||||||
|
},
|
||||||
|
method="POST",
|
||||||
|
)
|
||||||
|
|
||||||
|
try:
|
||||||
|
with urllib.request.urlopen(req, timeout=60) as resp:
|
||||||
|
data = json.loads(resp.read().decode("utf-8"))
|
||||||
|
# Extract text from the response
|
||||||
|
for block in data.get("content", []):
|
||||||
|
if block.get("type") == "text":
|
||||||
|
return block["text"]
|
||||||
|
return ""
|
||||||
|
except urllib.error.HTTPError as e:
|
||||||
|
body = e.read().decode("utf-8", errors="replace")
|
||||||
|
print(f"Warning: Claude API error {e.code}: {body}", file=sys.stderr)
|
||||||
|
return ""
|
||||||
|
except (urllib.error.URLError, OSError, json.JSONDecodeError) as e:
|
||||||
|
print(f"Warning: Claude API request failed: {e}", file=sys.stderr)
|
||||||
|
return ""
|
||||||
|
|
||||||
|
|
||||||
|
def _parse_response(text):
|
||||||
|
"""Parse the AI response into brief and detailed sections."""
|
||||||
|
brief = ""
|
||||||
|
detailed = ""
|
||||||
|
|
||||||
|
if "BRIEF:" in text:
|
||||||
|
after_brief = text.split("BRIEF:", 1)[1]
|
||||||
|
if "DETAILED:" in after_brief:
|
||||||
|
brief = after_brief.split("DETAILED:", 1)[0].strip()
|
||||||
|
detailed = after_brief.split("DETAILED:", 1)[1].strip()
|
||||||
|
else:
|
||||||
|
brief = after_brief.strip()
|
||||||
|
elif "DETAILED:" in text:
|
||||||
|
detailed = text.split("DETAILED:", 1)[1].strip()
|
||||||
|
else:
|
||||||
|
# Fallback: use the whole thing as brief
|
||||||
|
brief = text.strip()
|
||||||
|
|
||||||
|
return brief, detailed
|
||||||
|
|
||||||
|
|
||||||
|
def analyze_directory(report, target):
|
||||||
|
"""Run AI analysis on the directory. Returns (brief, detailed) strings.
|
||||||
|
|
||||||
|
Returns ("", "") if the API key is missing or the request fails.
|
||||||
|
"""
|
||||||
|
api_key = _get_api_key()
|
||||||
|
if not api_key:
|
||||||
|
return "", ""
|
||||||
|
|
||||||
|
print(" [AI] Analyzing directory with Claude...", file=sys.stderr)
|
||||||
|
context = _build_context(report, target)
|
||||||
|
raw = _call_claude(api_key, context)
|
||||||
|
if not raw:
|
||||||
|
return "", ""
|
||||||
|
|
||||||
|
return _parse_response(raw)
|
||||||
|
|
@ -11,6 +11,15 @@ def format_report(report, target):
|
||||||
lines.append(f" Target: {target}")
|
lines.append(f" Target: {target}")
|
||||||
lines.append(sep)
|
lines.append(sep)
|
||||||
|
|
||||||
|
# AI brief summary (top of report)
|
||||||
|
ai_brief = report.get("ai_brief", "")
|
||||||
|
if ai_brief:
|
||||||
|
lines.append("")
|
||||||
|
lines.append(">> SUMMARY (AI)")
|
||||||
|
lines.append("-" * 40)
|
||||||
|
for paragraph in ai_brief.split("\n"):
|
||||||
|
lines.append(f" {paragraph}")
|
||||||
|
|
||||||
# Directory tree
|
# Directory tree
|
||||||
lines.append("")
|
lines.append("")
|
||||||
lines.append(">> DIRECTORY TREE")
|
lines.append(">> DIRECTORY TREE")
|
||||||
|
|
@ -78,6 +87,15 @@ def format_report(report, target):
|
||||||
else:
|
else:
|
||||||
lines.append(" No usage data available.")
|
lines.append(" No usage data available.")
|
||||||
|
|
||||||
|
# AI detailed breakdown (end of report)
|
||||||
|
ai_detailed = report.get("ai_detailed", "")
|
||||||
|
if ai_detailed:
|
||||||
|
lines.append("")
|
||||||
|
lines.append(">> DETAILED AI ANALYSIS")
|
||||||
|
lines.append("-" * 40)
|
||||||
|
for paragraph in ai_detailed.split("\n"):
|
||||||
|
lines.append(f" {paragraph}")
|
||||||
|
|
||||||
lines.append("")
|
lines.append("")
|
||||||
lines.append(sep)
|
lines.append(sep)
|
||||||
lines.append(" End of report.")
|
lines.append(" End of report.")
|
||||||
|
|
|
||||||
Loading…
Reference in a new issue