From bcf7d12b4a29f92d8729957a3b1af4bcc1001257 Mon Sep 17 00:00:00 2001 From: Jeff Smith Date: Mon, 30 Mar 2026 10:03:48 -0600 Subject: [PATCH] feat: add AI-powered directory analysis via Claude API Adds --ai flag that sends the directory tree, file categories, and sampled file contents to Claude for analysis. Produces a brief summary at the top of the report and a detailed breakdown at the end. Requires ANTHROPIC_API_KEY env var; degrades gracefully without it. Uses only stdlib (urllib) to keep the zero-dependency constraint. Co-Authored-By: Claude Opus 4.6 (1M context) --- luminos.py | 9 +++ luminos_lib/ai.py | 182 ++++++++++++++++++++++++++++++++++++++++++ luminos_lib/report.py | 18 +++++ 3 files changed, 209 insertions(+) create mode 100644 luminos_lib/ai.py diff --git a/luminos.py b/luminos.py index f9a4e6f..a8cf242 100644 --- a/luminos.py +++ b/luminos.py @@ -13,6 +13,7 @@ from luminos_lib.recency import find_recent_files from luminos_lib.disk import get_disk_usage, top_directories from luminos_lib.watch import watch_loop from luminos_lib.report import format_report +from luminos_lib.ai import analyze_directory def scan(target, depth=3, show_hidden=False): @@ -56,6 +57,9 @@ def main(): help="Output report as JSON") parser.add_argument("-o", "--output", metavar="FILE", help="Write report to a file") + parser.add_argument("--ai", action="store_true", + help="Use Claude AI to analyze directory purpose " + "(requires ANTHROPIC_API_KEY)") parser.add_argument("--watch", action="store_true", help="Re-scan every 30 seconds and show diffs") @@ -74,6 +78,11 @@ def main(): report = scan(target, depth=args.depth, show_hidden=args.all) + if args.ai: + brief, detailed = analyze_directory(report, target) + report["ai_brief"] = brief + report["ai_detailed"] = detailed + if args.json_output: output = json.dumps(report, indent=2, default=str) else: diff --git a/luminos_lib/ai.py b/luminos_lib/ai.py new file mode 100644 index 0000000..b9d39f8 --- /dev/null +++ b/luminos_lib/ai.py @@ -0,0 +1,182 @@ +"""AI-powered directory analysis using the Claude API (stdlib only).""" + +import json +import os +import sys +import urllib.request +import urllib.error + +API_URL = "https://api.anthropic.com/v1/messages" +MODEL = "claude-sonnet-4-20250514" +MAX_FILE_SAMPLE_BYTES = 2048 +MAX_FILES_TO_SAMPLE = 30 + + +def _get_api_key(): + """Read the Anthropic API key from the environment.""" + key = os.environ.get("ANTHROPIC_API_KEY", "") + if not key: + print("Warning: ANTHROPIC_API_KEY not set. Skipping AI analysis.", + file=sys.stderr) + return key + + +def _sample_file(path, max_bytes=MAX_FILE_SAMPLE_BYTES): + """Read the first max_bytes of a text file. Returns None for binary.""" + try: + with open(path, "r", errors="replace") as f: + return f.read(max_bytes) + except (OSError, UnicodeDecodeError): + return None + + +def _build_context(report, target): + """Build a textual context from the scan report for the AI prompt.""" + parts = [] + + parts.append(f"Directory: {target}") + parts.append("") + + # Tree structure + tree_text = report.get("tree_rendered", "") + if tree_text: + parts.append("=== Directory tree ===") + parts.append(tree_text) + parts.append("") + + # File categories + cats = report.get("file_categories", {}) + if cats: + parts.append("=== File categories ===") + for cat, count in sorted(cats.items(), key=lambda x: -x[1]): + parts.append(f" {cat}: {count}") + parts.append("") + + # Languages + langs = report.get("languages", []) + loc = report.get("lines_of_code", {}) + if langs: + parts.append("=== Languages detected ===") + for lang in sorted(loc, key=loc.get, reverse=True): + parts.append(f" {lang}: {loc[lang]} lines") + parts.append("") + + # Sample file contents + classified = report.get("classified_files", []) + # Prioritize source and config files for sampling + priority = {"source": 0, "config": 1, "document": 2, "data": 3} + samplable = sorted(classified, + key=lambda f: priority.get(f["category"], 99)) + sampled = 0 + samples = [] + for f in samplable: + if sampled >= MAX_FILES_TO_SAMPLE: + break + content = _sample_file(f["path"]) + if content and content.strip(): + rel = os.path.relpath(f["path"], target) + samples.append(f"--- {rel} ---\n{content}") + sampled += 1 + + if samples: + parts.append("=== File samples (first ~2KB each) ===") + parts.append("\n\n".join(samples)) + + return "\n".join(parts) + + +def _call_claude(api_key, context): + """Call the Claude API and return the response text.""" + prompt = ( + "You are analyzing a directory on a file system. Based on the tree " + "structure, file types, languages, and file content samples below, " + "produce two sections:\n\n" + "1. **BRIEF SUMMARY** (2-4 sentences): What is this directory? What is " + "its purpose? What kind of project or data does it contain?\n\n" + "2. **DETAILED BREAKDOWN**: A thorough analysis covering:\n" + " - The overall purpose and architecture of the project/directory\n" + " - Key components and what they do\n" + " - Technologies and frameworks in use\n" + " - Notable patterns, conventions, or design decisions\n" + " - Any potential concerns (e.g., missing tests, large binaries, " + "stale files)\n\n" + "Format your response exactly as:\n" + "BRIEF: \n\n" + "DETAILED:\n\n\n" + "Be specific and concrete — reference actual filenames and directories. " + "Do not hedge or use filler phrases." + ) + + body = json.dumps({ + "model": MODEL, + "max_tokens": 2048, + "messages": [ + {"role": "user", "content": f"{prompt}\n\n{context}"}, + ], + }).encode("utf-8") + + req = urllib.request.Request( + API_URL, + data=body, + headers={ + "Content-Type": "application/json", + "x-api-key": api_key, + "anthropic-version": "2023-06-01", + }, + method="POST", + ) + + try: + with urllib.request.urlopen(req, timeout=60) as resp: + data = json.loads(resp.read().decode("utf-8")) + # Extract text from the response + for block in data.get("content", []): + if block.get("type") == "text": + return block["text"] + return "" + except urllib.error.HTTPError as e: + body = e.read().decode("utf-8", errors="replace") + print(f"Warning: Claude API error {e.code}: {body}", file=sys.stderr) + return "" + except (urllib.error.URLError, OSError, json.JSONDecodeError) as e: + print(f"Warning: Claude API request failed: {e}", file=sys.stderr) + return "" + + +def _parse_response(text): + """Parse the AI response into brief and detailed sections.""" + brief = "" + detailed = "" + + if "BRIEF:" in text: + after_brief = text.split("BRIEF:", 1)[1] + if "DETAILED:" in after_brief: + brief = after_brief.split("DETAILED:", 1)[0].strip() + detailed = after_brief.split("DETAILED:", 1)[1].strip() + else: + brief = after_brief.strip() + elif "DETAILED:" in text: + detailed = text.split("DETAILED:", 1)[1].strip() + else: + # Fallback: use the whole thing as brief + brief = text.strip() + + return brief, detailed + + +def analyze_directory(report, target): + """Run AI analysis on the directory. Returns (brief, detailed) strings. + + Returns ("", "") if the API key is missing or the request fails. + """ + api_key = _get_api_key() + if not api_key: + return "", "" + + print(" [AI] Analyzing directory with Claude...", file=sys.stderr) + context = _build_context(report, target) + raw = _call_claude(api_key, context) + if not raw: + return "", "" + + return _parse_response(raw) diff --git a/luminos_lib/report.py b/luminos_lib/report.py index dcd046b..bcd98df 100644 --- a/luminos_lib/report.py +++ b/luminos_lib/report.py @@ -11,6 +11,15 @@ def format_report(report, target): lines.append(f" Target: {target}") lines.append(sep) + # AI brief summary (top of report) + ai_brief = report.get("ai_brief", "") + if ai_brief: + lines.append("") + lines.append(">> SUMMARY (AI)") + lines.append("-" * 40) + for paragraph in ai_brief.split("\n"): + lines.append(f" {paragraph}") + # Directory tree lines.append("") lines.append(">> DIRECTORY TREE") @@ -78,6 +87,15 @@ def format_report(report, target): else: lines.append(" No usage data available.") + # AI detailed breakdown (end of report) + ai_detailed = report.get("ai_detailed", "") + if ai_detailed: + lines.append("") + lines.append(">> DETAILED AI ANALYSIS") + lines.append("-" * 40) + for paragraph in ai_detailed.split("\n"): + lines.append(f" {paragraph}") + lines.append("") lines.append(sep) lines.append(" End of report.")