From bcf7d12b4a29f92d8729957a3b1af4bcc1001257 Mon Sep 17 00:00:00 2001
From: Jeff Smith <jeff@unbiasedgeek.com>
Date: Mon, 30 Mar 2026 10:03:48 -0600
Subject: [PATCH] feat: add AI-powered directory analysis via Claude API

Adds --ai flag that sends the directory tree, file categories, and
sampled file contents to Claude for analysis. Produces a brief
summary at the top of the report and a detailed breakdown at the
end. Requires ANTHROPIC_API_KEY env var; degrades gracefully without it.
Uses only stdlib (urllib) to keep the zero-dependency constraint.

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
---
 luminos.py            |   9 +++
 luminos_lib/ai.py     | 182 ++++++++++++++++++++++++++++++++++++++++++
 luminos_lib/report.py |  18 +++++
 3 files changed, 209 insertions(+)
 create mode 100644 luminos_lib/ai.py

diff --git a/luminos.py b/luminos.py
index f9a4e6f..a8cf242 100644
--- a/luminos.py
+++ b/luminos.py
@@ -13,6 +13,7 @@ from luminos_lib.recency import find_recent_files
 from luminos_lib.disk import get_disk_usage, top_directories
 from luminos_lib.watch import watch_loop
 from luminos_lib.report import format_report
+from luminos_lib.ai import analyze_directory
 
 
 def scan(target, depth=3, show_hidden=False):
@@ -56,6 +57,9 @@ def main():
                         help="Output report as JSON")
     parser.add_argument("-o", "--output", metavar="FILE",
                         help="Write report to a file")
+    parser.add_argument("--ai", action="store_true",
+                        help="Use Claude AI to analyze directory purpose "
+                             "(requires ANTHROPIC_API_KEY)")
     parser.add_argument("--watch", action="store_true",
                         help="Re-scan every 30 seconds and show diffs")
 
@@ -74,6 +78,11 @@ def main():
 
     report = scan(target, depth=args.depth, show_hidden=args.all)
 
+    if args.ai:
+        brief, detailed = analyze_directory(report, target)
+        report["ai_brief"] = brief
+        report["ai_detailed"] = detailed
+
     if args.json_output:
         output = json.dumps(report, indent=2, default=str)
     else:
diff --git a/luminos_lib/ai.py b/luminos_lib/ai.py
new file mode 100644
index 0000000..b9d39f8
--- /dev/null
+++ b/luminos_lib/ai.py
@@ -0,0 +1,182 @@
+"""AI-powered directory analysis using the Claude API (stdlib only)."""
+
+import json
+import os
+import sys
+import urllib.request
+import urllib.error
+
+API_URL = "https://api.anthropic.com/v1/messages"
+MODEL = "claude-sonnet-4-20250514"
+MAX_FILE_SAMPLE_BYTES = 2048
+MAX_FILES_TO_SAMPLE = 30
+
+
+def _get_api_key():
+    """Read the Anthropic API key from the environment."""
+    key = os.environ.get("ANTHROPIC_API_KEY", "")
+    if not key:
+        print("Warning: ANTHROPIC_API_KEY not set. Skipping AI analysis.",
+              file=sys.stderr)
+    return key
+
+
+def _sample_file(path, max_bytes=MAX_FILE_SAMPLE_BYTES):
+    """Read the first max_bytes of a text file. Returns None for binary."""
+    try:
+        with open(path, "r", errors="replace") as f:
+            return f.read(max_bytes)
+    except (OSError, UnicodeDecodeError):
+        return None
+
+
+def _build_context(report, target):
+    """Build a textual context from the scan report for the AI prompt."""
+    parts = []
+
+    parts.append(f"Directory: {target}")
+    parts.append("")
+
+    # Tree structure
+    tree_text = report.get("tree_rendered", "")
+    if tree_text:
+        parts.append("=== Directory tree ===")
+        parts.append(tree_text)
+        parts.append("")
+
+    # File categories
+    cats = report.get("file_categories", {})
+    if cats:
+        parts.append("=== File categories ===")
+        for cat, count in sorted(cats.items(), key=lambda x: -x[1]):
+            parts.append(f"  {cat}: {count}")
+        parts.append("")
+
+    # Languages
+    langs = report.get("languages", [])
+    loc = report.get("lines_of_code", {})
+    if langs:
+        parts.append("=== Languages detected ===")
+        for lang in sorted(loc, key=loc.get, reverse=True):
+            parts.append(f"  {lang}: {loc[lang]} lines")
+        parts.append("")
+
+    # Sample file contents
+    classified = report.get("classified_files", [])
+    # Prioritize source and config files for sampling
+    priority = {"source": 0, "config": 1, "document": 2, "data": 3}
+    samplable = sorted(classified,
+                       key=lambda f: priority.get(f["category"], 99))
+    sampled = 0
+    samples = []
+    for f in samplable:
+        if sampled >= MAX_FILES_TO_SAMPLE:
+            break
+        content = _sample_file(f["path"])
+        if content and content.strip():
+            rel = os.path.relpath(f["path"], target)
+            samples.append(f"--- {rel} ---\n{content}")
+            sampled += 1
+
+    if samples:
+        parts.append("=== File samples (first ~2KB each) ===")
+        parts.append("\n\n".join(samples))
+
+    return "\n".join(parts)
+
+
+def _call_claude(api_key, context):
+    """Call the Claude API and return the response text."""
+    prompt = (
+        "You are analyzing a directory on a file system. Based on the tree "
+        "structure, file types, languages, and file content samples below, "
+        "produce two sections:\n\n"
+        "1. **BRIEF SUMMARY** (2-4 sentences): What is this directory? What is "
+        "its purpose? What kind of project or data does it contain?\n\n"
+        "2. **DETAILED BREAKDOWN**: A thorough analysis covering:\n"
+        "   - The overall purpose and architecture of the project/directory\n"
+        "   - Key components and what they do\n"
+        "   - Technologies and frameworks in use\n"
+        "   - Notable patterns, conventions, or design decisions\n"
+        "   - Any potential concerns (e.g., missing tests, large binaries, "
+        "stale files)\n\n"
+        "Format your response exactly as:\n"
+        "BRIEF: <your brief summary>\n\n"
+        "DETAILED:\n<your detailed breakdown>\n\n"
+        "Be specific and concrete — reference actual filenames and directories. "
+        "Do not hedge or use filler phrases."
+    )
+
+    body = json.dumps({
+        "model": MODEL,
+        "max_tokens": 2048,
+        "messages": [
+            {"role": "user", "content": f"{prompt}\n\n{context}"},
+        ],
+    }).encode("utf-8")
+
+    req = urllib.request.Request(
+        API_URL,
+        data=body,
+        headers={
+            "Content-Type": "application/json",
+            "x-api-key": api_key,
+            "anthropic-version": "2023-06-01",
+        },
+        method="POST",
+    )
+
+    try:
+        with urllib.request.urlopen(req, timeout=60) as resp:
+            data = json.loads(resp.read().decode("utf-8"))
+            # Extract text from the response
+            for block in data.get("content", []):
+                if block.get("type") == "text":
+                    return block["text"]
+            return ""
+    except urllib.error.HTTPError as e:
+        body = e.read().decode("utf-8", errors="replace")
+        print(f"Warning: Claude API error {e.code}: {body}", file=sys.stderr)
+        return ""
+    except (urllib.error.URLError, OSError, json.JSONDecodeError) as e:
+        print(f"Warning: Claude API request failed: {e}", file=sys.stderr)
+        return ""
+
+
+def _parse_response(text):
+    """Parse the AI response into brief and detailed sections."""
+    brief = ""
+    detailed = ""
+
+    if "BRIEF:" in text:
+        after_brief = text.split("BRIEF:", 1)[1]
+        if "DETAILED:" in after_brief:
+            brief = after_brief.split("DETAILED:", 1)[0].strip()
+            detailed = after_brief.split("DETAILED:", 1)[1].strip()
+        else:
+            brief = after_brief.strip()
+    elif "DETAILED:" in text:
+        detailed = text.split("DETAILED:", 1)[1].strip()
+    else:
+        # Fallback: use the whole thing as brief
+        brief = text.strip()
+
+    return brief, detailed
+
+
+def analyze_directory(report, target):
+    """Run AI analysis on the directory. Returns (brief, detailed) strings.
+
+    Returns ("", "") if the API key is missing or the request fails.
+    """
+    api_key = _get_api_key()
+    if not api_key:
+        return "", ""
+
+    print("  [AI] Analyzing directory with Claude...", file=sys.stderr)
+    context = _build_context(report, target)
+    raw = _call_claude(api_key, context)
+    if not raw:
+        return "", ""
+
+    return _parse_response(raw)
diff --git a/luminos_lib/report.py b/luminos_lib/report.py
index dcd046b..bcd98df 100644
--- a/luminos_lib/report.py
+++ b/luminos_lib/report.py
@@ -11,6 +11,15 @@ def format_report(report, target):
     lines.append(f"  Target: {target}")
     lines.append(sep)
 
+    # AI brief summary (top of report)
+    ai_brief = report.get("ai_brief", "")
+    if ai_brief:
+        lines.append("")
+        lines.append(">> SUMMARY (AI)")
+        lines.append("-" * 40)
+        for paragraph in ai_brief.split("\n"):
+            lines.append(f"  {paragraph}")
+
     # Directory tree
     lines.append("")
     lines.append(">> DIRECTORY TREE")
@@ -78,6 +87,15 @@ def format_report(report, target):
     else:
         lines.append("  No usage data available.")
 
+    # AI detailed breakdown (end of report)
+    ai_detailed = report.get("ai_detailed", "")
+    if ai_detailed:
+        lines.append("")
+        lines.append(">> DETAILED AI ANALYSIS")
+        lines.append("-" * 40)
+        for paragraph in ai_detailed.split("\n"):
+            lines.append(f"  {paragraph}")
+
     lines.append("")
     lines.append(sep)
     lines.append("  End of report.")