luminos/luminos_lib/ai.py

"""AI-powered directory analysis using a multi-pass, cache-driven agent loop.

Architecture:
  1. Discover all directories under the target
  2. Sort leaves-first (deepest directories first)
  3. Run an isolated agent loop per directory (max 10 turns each)
  4. Cache every file and directory summary to disk
  5. Run a final synthesis pass reading only directory cache entries

Uses the Anthropic SDK for streaming, automatic retries, and token counting.
Uses tree-sitter for AST parsing and python-magic for file classification.
"""

import json
import os
import subprocess
import sys
from collections import namedtuple
from datetime import datetime, timezone

import anthropic
import magic
from luminos_lib.ast_parser import parse_structure
from luminos_lib.cache import _CacheManager, _get_investigation_id
from luminos_lib.prompts import (
    _DIR_SYSTEM_PROMPT,
    _PLANNING_SYSTEM_PROMPT,
    _SURVEY_SYSTEM_PROMPT,
    _SYNTHESIS_SYSTEM_PROMPT,
)
from luminos_lib.tree import build_tree, render_tree

MODEL = "claude-sonnet-4-20250514"

# Context budget: trigger early exit when a single API call's input_tokens
# (the actual size of the context window in use, NOT the cumulative sum
# across turns) approaches the model's real context limit. Sonnet 4 has
# a 200k context window; we leave a 30% safety margin for the response
# and any tool result we're about to append.
MAX_CONTEXT = 200_000
CONTEXT_BUDGET = int(MAX_CONTEXT * 0.70)

# Pricing per 1M tokens (Claude Sonnet).
INPUT_PRICE_PER_M = 3.00
OUTPUT_PRICE_PER_M = 15.00

# Directories to always skip during investigation.
_SKIP_DIRS = {
    ".git", "__pycache__", "node_modules", ".tox", ".mypy_cache",
    ".pytest_cache", ".venv", "venv", ".env", "dist", "build",
    ".eggs", "*.egg-info", ".svn", ".hg",
}

# Commands the run_command tool is allowed to execute.
_COMMAND_WHITELIST = {"wc", "file", "grep", "head", "tail", "stat", "du", "find"}

# ---------------------------------------------------------------------------
# Helpers
# ---------------------------------------------------------------------------

def _get_api_key():
    """Read the Anthropic API key from the environment."""
    key = os.environ.get("ANTHROPIC_API_KEY", "")
    if not key:
        print("Warning: ANTHROPIC_API_KEY not set. Skipping AI analysis.",
              file=sys.stderr)
    return key


def _path_is_safe(path, target):
    """Return True if *path* resolves to somewhere inside *target*."""
    real = os.path.realpath(path)
    target_real = os.path.realpath(target)
    return real == target_real or real.startswith(target_real + os.sep)


def _now_iso():
    return datetime.now(timezone.utc).isoformat()


def _should_skip_dir(name):
    """Return True if a directory name matches the skip list."""
    if name in _SKIP_DIRS:
        return True
    for pattern in _SKIP_DIRS:
        if pattern.startswith("*") and name.endswith(pattern[1:]):
            return True
    return False


# ---------------------------------------------------------------------------
# Token tracker
# ---------------------------------------------------------------------------

class _TokenTracker:
    """Track token usage across API calls.

    Two distinct quantities are tracked:
      - cumulative totals (total_*, loop_*) — for cost reporting
      - last_input — the size of the context window on the most recent
        call, used to detect approaching the model's context limit

    Cumulative input is NOT a meaningful proxy for context size: each
    turn's input_tokens already includes the full message history, so
    summing across turns double-counts everything. Use last_input for
    budget decisions, totals for billing. (See #44.)
    """

    def __init__(self):
        self.total_input = 0
        self.total_output = 0
        self.loop_input = 0
        self.loop_output = 0
        self.last_input = 0
        self._loop_turns = 0

    def record(self, usage):
        """Record usage from a single API call."""
        inp = getattr(usage, "input_tokens", 0)
        out = getattr(usage, "output_tokens", 0)
        self.total_input += inp
        self.total_output += out
        self.loop_input += inp
        self.loop_output += out
        self.last_input = inp
        self._loop_turns += 1

    def reset_loop(self):
        """Reset per-loop counters (called between directory loops)."""
        self.loop_input = 0
        self.loop_output = 0
        self.last_input = 0
        self._loop_turns = 0

    @property
    def loop_total(self):
        return self.loop_input + self.loop_output

    def budget_exceeded(self):
        """True when the most recent call's context exceeded the budget."""
        return self.last_input > CONTEXT_BUDGET

    def summary(self):
        cost_in = self.total_input * INPUT_PRICE_PER_M / 1_000_000
        cost_out = self.total_output * OUTPUT_PRICE_PER_M / 1_000_000
        cost = cost_in + cost_out
        return (f"{self.total_input:,} input / {self.total_output:,} output "
                f"(approx ${cost:.2f})")


# ---------------------------------------------------------------------------
# Tool definitions
# ---------------------------------------------------------------------------

# ---------------------------------------------------------------------------
# Tool registry
#
# Tools are declared once via register_tool() at the bottom of the tool
# implementations section. Each registration lands its schema in one or
# more scope lists (_DIR_TOOLS / _SYNTHESIS_TOOLS / _SURVEY_TOOLS) and
# its handler in _TOOL_DISPATCH (used by _execute_tool()).
#
# Tools intercepted by the loop body — submit_report and submit_survey —
# register their schema only and have no handler entry.
# ---------------------------------------------------------------------------

_DIR_TOOLS = []
_SYNTHESIS_TOOLS = []
_SURVEY_TOOLS = []
_PLANNING_TOOLS = []
_TOOL_DISPATCH = {}

_TOOL_REGISTRIES = {
    "dir": _DIR_TOOLS,
    "synthesis": _SYNTHESIS_TOOLS,
    "survey": _SURVEY_TOOLS,
    "planning": _PLANNING_TOOLS,
}


def register_tool(name, description, schema, scopes, handler=None):
    """Register a tool's schema in one or more loop scopes and its handler.

    A single tool can be registered multiple times with different schemas
    in different scopes (submit_report has different schemas for the dir
    and synthesis loops). The handler is global — pass handler= once and
    omit it on subsequent registrations under the same name.
    """
    schema_entry = {
        "name": name,
        "description": description,
        "input_schema": schema,
    }
    for scope in scopes:
        _TOOL_REGISTRIES[scope].append(schema_entry)
    if handler is not None:
        _TOOL_DISPATCH[name] = handler


# ---------------------------------------------------------------------------
# Tool implementations
# ---------------------------------------------------------------------------

def _tool_read_file(args, target, _cache):
    path = args.get("path", "")
    max_bytes = args.get("max_bytes", 4096)
    if not os.path.isabs(path):
        path = os.path.join(target, path)
    if not _path_is_safe(path, target):
        return f"Error: path '{path}' is outside the target directory."
    try:
        file_size = os.path.getsize(path)
        with open(path, "r", errors="replace") as f:
            content = f.read(max_bytes)
        if not content:
            return "(empty file)"
        if file_size > max_bytes:
            content += (
                f"\n\n[TRUNCATED — showed {max_bytes} of {file_size} bytes. "
                f"Call again with a larger max_bytes or use "
                f"run_command('tail -n ... {os.path.relpath(path, target)}') "
                f"to see the rest.]"
            )
        return content
    except OSError as e:
        return f"Error reading file: {e}"


def _tool_list_directory(args, target, _cache):
    path = args.get("path", target)
    show_hidden = args.get("show_hidden", False)
    if not os.path.isabs(path):
        path = os.path.join(target, path)
    if not _path_is_safe(path, target):
        return f"Error: path '{path}' is outside the target directory."
    if not os.path.isdir(path):
        return f"Error: '{path}' is not a directory."
    try:
        entries = sorted(os.listdir(path))
        lines = []
        for name in entries:
            if not show_hidden and name.startswith("."):
                continue
            full = os.path.join(path, name)
            try:
                st = os.stat(full)
                mime = magic.from_file(full, mime=True) if not os.path.isdir(full) else None
                if os.path.isdir(full):
                    lines.append(f"  {name}/  (dir)")
                else:
                    mime_str = f"  [{mime}]" if mime else ""
                    lines.append(f"  {name}  ({st.st_size} bytes){mime_str}")
            except OSError:
                lines.append(f"  {name}  (stat failed)")
        return "\n".join(lines) if lines else "(empty directory)"
    except OSError as e:
        return f"Error listing directory: {e}"


def _tool_run_command(args, target, _cache):
    command = args.get("command", "")
    parts = command.split()
    if not parts:
        return "Error: empty command."
    binary = os.path.basename(parts[0])
    if binary not in _COMMAND_WHITELIST:
        return (
            f"Error: '{binary}' is not allowed. "
            f"Whitelist: {', '.join(sorted(_COMMAND_WHITELIST))}"
        )
    try:
        result = subprocess.run(
            command, shell=True, capture_output=True, text=True,
            timeout=15, cwd=target,
        )
        output = result.stdout
        if result.returncode != 0 and result.stderr:
            output += f"\n(stderr: {result.stderr.strip()})"
        return output.strip() if output.strip() else "(no output)"
    except subprocess.TimeoutExpired:
        return "Error: command timed out after 15 seconds."
    except OSError as e:
        return f"Error running command: {e}"


def _tool_parse_structure(args, target, _cache):
    path = args.get("path", "")
    if not os.path.isabs(path):
        path = os.path.join(target, path)
    if not _path_is_safe(path, target):
        return f"Error: path '{path}' is outside the target directory."
    return parse_structure(path)


def _tool_write_cache(args, _target, cache):
    cache_type = args.get("cache_type", "")
    path = args.get("path", "")
    data = args.get("data", {})
    if cache_type not in ("file", "dir"):
        return "Error: cache_type must be 'file' or 'dir'."
    return cache.write_entry(cache_type, path, data)


def _tool_read_cache(args, _target, cache):
    cache_type = args.get("cache_type", "")
    path = args.get("path", "")
    if cache_type not in ("file", "dir"):
        return "Error: cache_type must be 'file' or 'dir'."
    entry = cache.read_entry(cache_type, path)
    if entry is None:
        return "null"
    return json.dumps(entry, indent=2)


def _tool_list_cache(args, _target, cache):
    cache_type = args.get("cache_type", "")
    if cache_type not in ("file", "dir"):
        return "Error: cache_type must be 'file' or 'dir'."
    paths = cache.list_entries(cache_type)
    if not paths:
        return "(no cached entries)"
    return "\n".join(paths)


def _tool_think(args, _target, _cache):
    obs = args.get("observation", "")
    hyp = args.get("hypothesis", "")
    nxt = args.get("next_action", "")
    print(f"  [AI] THINK", file=sys.stderr)
    print(f"       observation: {obs}", file=sys.stderr)
    print(f"       hypothesis:  {hyp}", file=sys.stderr)
    print(f"       next_action: {nxt}", file=sys.stderr)
    return "ok"


def _tool_checkpoint(args, _target, _cache):
    learned = args.get("learned", "")
    unknown = args.get("still_unknown", "")
    phase = args.get("next_phase", "")
    print(f"  [AI] CHECKPOINT", file=sys.stderr)
    print(f"       learned:       {learned}", file=sys.stderr)
    print(f"       still_unknown: {unknown}", file=sys.stderr)
    print(f"       next_phase:    {phase}", file=sys.stderr)
    return "ok"


def _tool_flag(args, _target, cache):
    path = args.get("path", "general")
    finding = args.get("finding", "")
    severity = args.get("severity", "info")
    print(f"  [AI] FLAG [{severity.upper()}] {path}", file=sys.stderr)
    print(f"       {finding}", file=sys.stderr)
    flags_path = os.path.join(cache.root, "flags.jsonl")
    entry = {"path": path, "finding": finding, "severity": severity}
    try:
        with open(flags_path, "a") as f:
            f.write(json.dumps(entry) + "\n")
    except OSError:
        pass
    return "ok"


# ---------------------------------------------------------------------------
# Tool registrations
#
# Order within each scope is preserved to keep the agent-visible tool list
# stable. Tools that appear in two scopes (flag) and tools whose schema
# differs by scope (submit_report) are registered once per scope.
# ---------------------------------------------------------------------------

_FLAG_DESCRIPTION = (
    "Mark a file, directory, or finding as notable or anomalous. "
    "Call this immediately when you discover something surprising, "
    "concerning, or important — do not save it for the report."
)
_FLAG_SCHEMA = {
    "type": "object",
    "properties": {
        "path": {
            "type": "string",
            "description": "Relative path, or 'general'.",
        },
        "finding": {
            "type": "string",
            "description": "What you found.",
        },
        "severity": {
            "type": "string",
            "enum": ["info", "concern", "critical"],
            "description": "info | concern | critical",
        },
    },
    "required": ["path", "finding", "severity"],
}


# --- Dir loop tools ---

register_tool(
    name="read_file",
    description=(
        "Read and return the contents of a file. Path must be inside "
        "the target directory."
    ),
    schema={
        "type": "object",
        "properties": {
            "path": {
                "type": "string",
                "description": "Absolute or relative path to the file.",
            },
            "max_bytes": {
                "type": "integer",
                "description": "Maximum bytes to read (default 4096).",
            },
        },
        "required": ["path"],
    },
    scopes=["dir"],
    handler=_tool_read_file,
)

register_tool(
    name="list_directory",
    description=(
        "List the contents of a directory with file sizes and types."
    ),
    schema={
        "type": "object",
        "properties": {
            "path": {
                "type": "string",
                "description": "Absolute or relative path to the directory.",
            },
            "show_hidden": {
                "type": "boolean",
                "description": "Include hidden files (default false).",
            },
        },
        "required": ["path"],
    },
    scopes=["dir"],
    handler=_tool_list_directory,
)

register_tool(
    name="run_command",
    description=(
        "Run a read-only shell command. Allowed binaries: "
        "wc, file, grep, head, tail, stat, du, find."
    ),
    schema={
        "type": "object",
        "properties": {
            "command": {
                "type": "string",
                "description": "The shell command to execute.",
            },
        },
        "required": ["command"],
    },
    scopes=["dir"],
    handler=_tool_run_command,
)

register_tool(
    name="parse_structure",
    description=(
        "Parse a source file using tree-sitter and return its structural "
        "skeleton: functions, classes, imports, and code metrics. "
        "Supported: Python, JavaScript, TypeScript, Rust, Go."
    ),
    schema={
        "type": "object",
        "properties": {
            "path": {
                "type": "string",
                "description": "Path to the source file to parse.",
            },
        },
        "required": ["path"],
    },
    scopes=["dir"],
    handler=_tool_parse_structure,
)

register_tool(
    name="write_cache",
    description=(
        "Write a summary cache entry for a file or directory. The data "
        "must NOT contain raw file contents — summaries only."
    ),
    schema={
        "type": "object",
        "properties": {
            "cache_type": {
                "type": "string",
                "enum": ["file", "dir"],
                "description": "'file' or 'dir'.",
            },
            "path": {
                "type": "string",
                "description": "The path being cached.",
            },
            "data": {
                "type": "object",
                "description": (
                    "Cache entry. Files: {path, relative_path, size_bytes, "
                    "category, summary, notable, notable_reason, "
                    "confidence, confidence_reason, cached_at}. "
                    "Dirs: {path, relative_path, child_count, summary, "
                    "dominant_category, notable_files, "
                    "confidence, confidence_reason, cached_at}. "
                    "Always set confidence (0.0–1.0); see system prompt "
                    "for calibration. Set confidence_reason only when "
                    "confidence < 0.7."
                ),
            },
        },
        "required": ["cache_type", "path", "data"],
    },
    scopes=["dir"],
    handler=_tool_write_cache,
)

register_tool(
    name="think",
    description=(
        "Record your reasoning before choosing which file or directory "
        "to investigate next. Call this when deciding what to look at "
        "— not before every individual tool call."
    ),
    schema={
        "type": "object",
        "properties": {
            "observation": {
                "type": "string",
                "description": "What you have observed so far.",
            },
            "hypothesis": {
                "type": "string",
                "description": "Your hypothesis about the directory.",
            },
            "next_action": {
                "type": "string",
                "description": "What you plan to investigate next and why.",
            },
        },
        "required": ["observation", "hypothesis", "next_action"],
    },
    scopes=["dir"],
    handler=_tool_think,
)

register_tool(
    name="checkpoint",
    description=(
        "Summarize what you have learned so far about this directory "
        "and what you still need to determine. Call this after completing "
        "a significant cluster of files — not after every file."
    ),
    schema={
        "type": "object",
        "properties": {
            "learned": {
                "type": "string",
                "description": "What you have learned so far.",
            },
            "still_unknown": {
                "type": "string",
                "description": "What you still need to determine.",
            },
            "next_phase": {
                "type": "string",
                "description": "What you will investigate next.",
            },
        },
        "required": ["learned", "still_unknown", "next_phase"],
    },
    scopes=["dir"],
    handler=_tool_checkpoint,
)

register_tool(
    name="flag",
    description=_FLAG_DESCRIPTION,
    schema=_FLAG_SCHEMA,
    scopes=["dir"],
    handler=_tool_flag,
)

register_tool(
    name="submit_report",
    description=(
        "Submit the directory summary. This ends the investigation loop."
    ),
    schema={
        "type": "object",
        "properties": {
            "summary": {
                "type": "string",
                "description": "1-3 sentence summary of the directory.",
            },
            "completeness": {
                "type": "number",
                "description": (
                    "Self-rated investigation completeness (0.0-1.0). "
                    "1.0 = examined every relevant file thoroughly. "
                    "0.5 = examined about half, or skimmed most. "
                    "< 0.3 = barely scratched the surface."
                ),
            },
        },
        "required": ["summary", "completeness"],
    },
    scopes=["dir"],
)


# --- Synthesis tools ---

register_tool(
    name="read_cache",
    description="Read a previously cached summary for a file or directory.",
    schema={
        "type": "object",
        "properties": {
            "cache_type": {
                "type": "string",
                "enum": ["file", "dir"],
            },
            "path": {
                "type": "string",
                "description": "The path to look up.",
            },
        },
        "required": ["cache_type", "path"],
    },
    scopes=["synthesis"],
    handler=_tool_read_cache,
)

register_tool(
    name="list_cache",
    description="List all cached entry paths of a given type.",
    schema={
        "type": "object",
        "properties": {
            "cache_type": {
                "type": "string",
                "enum": ["file", "dir"],
            },
        },
        "required": ["cache_type"],
    },
    scopes=["synthesis"],
    handler=_tool_list_cache,
)

register_tool(
    name="flag",
    description=_FLAG_DESCRIPTION,
    schema=_FLAG_SCHEMA,
    scopes=["synthesis"],
)

register_tool(
    name="submit_report",
    description="Submit the final analysis report.",
    schema={
        "type": "object",
        "properties": {
            "brief": {
                "type": "string",
                "description": "2-4 sentence summary.",
            },
            "detailed": {
                "type": "string",
                "description": "Thorough breakdown.",
            },
        },
        "required": ["brief", "detailed"],
    },
    scopes=["synthesis"],
)


# --- Survey tools ---

register_tool(
    name="submit_survey",
    description=(
        "Submit the reconnaissance survey. Call exactly once."
    ),
    schema={
        "type": "object",
        "properties": {
            "description": {
                "type": "string",
                "description": "Plain-language description of the target.",
            },
            "approach": {
                "type": "string",
                "description": "Recommended analytical approach.",
            },
            "relevant_tools": {
                "type": "array",
                "items": {"type": "string"},
                "description": "Tool names the dir loop should lean on.",
            },
            "skip_tools": {
                "type": "array",
                "items": {"type": "string"},
                "description": "Tool names whose use would be wrong here.",
            },
            "domain_notes": {
                "type": "string",
                "description": "Short actionable hint, or empty string.",
            },
            "confidence": {
                "type": "number",
                "description": "0.0–1.0 confidence in this survey.",
            },
        },
        "required": [
            "description", "approach", "relevant_tools",
            "skip_tools", "domain_notes", "confidence",
        ],
    },
    scopes=["survey"],
)

# --- Planning tools ---

register_tool(
    name="submit_plan",
    description=(
        "Submit the investigation plan. Call exactly once."
    ),
    schema={
        "type": "object",
        "properties": {
            "priority_dirs": {
                "type": "array",
                "items": {
                    "type": "object",
                    "properties": {
                        "path": {
                            "type": "string",
                            "description": "Relative directory path.",
                        },
                        "reason": {
                            "type": "string",
                            "description": "Why this dir deserves deep investigation.",
                        },
                        "suggested_turns": {
                            "type": "integer",
                            "description": "Suggested turns (15-20).",
                        },
                    },
                    "required": ["path", "reason", "suggested_turns"],
                },
                "description": "Directories to investigate deeply.",
            },
            "shallow_dirs": {
                "type": "array",
                "items": {
                    "type": "object",
                    "properties": {
                        "path": {
                            "type": "string",
                            "description": "Relative directory path.",
                        },
                        "reason": {
                            "type": "string",
                            "description": "Why a shallow pass is sufficient.",
                        },
                    },
                    "required": ["path", "reason"],
                },
                "description": "Directories needing only a quick pass.",
            },
            "skip_dirs": {
                "type": "array",
                "items": {
                    "type": "object",
                    "properties": {
                        "path": {
                            "type": "string",
                            "description": "Relative directory path.",
                        },
                        "reason": {
                            "type": "string",
                            "description": "Why this dir should be skipped.",
                        },
                    },
                    "required": ["path", "reason"],
                },
                "description": "Directories to skip entirely.",
            },
            "investigation_order": {
                "type": "string",
                "enum": ["leaf-first", "priority-first"],
                "description": "leaf-first or priority-first (leaf-first within bands).",
            },
            "notes": {
                "type": "string",
                "description": "Cross-cutting notes for per-directory agents, or empty.",
            },
        },
        "required": [
            "priority_dirs", "shallow_dirs", "skip_dirs",
            "investigation_order", "notes",
        ],
    },
    scopes=["planning"],
)


def _execute_tool(name, args, target, cache, dir_rel, turn, verbose=False):
    """Execute a tool by name and return the result string."""
    handler = _TOOL_DISPATCH.get(name)
    if handler is None:
        return f"Error: unknown tool '{name}'."
    result = handler(args, target, cache)

    cache.log_turn(dir_rel, turn, name,
                   {k: v for k, v in args.items() if k != "data"},
                   len(result))

    if verbose:
        preview = result[:200] + "..." if len(result) > 200 else result
        print(f"  [AI]     <- {len(result)} chars: {preview}", file=sys.stderr)

    return result


# ---------------------------------------------------------------------------
# Streaming API caller
# ---------------------------------------------------------------------------

def _call_api_streaming(client, system, messages, tools, tracker):
    """Call Claude via streaming. Print tool decisions in real-time.

    Returns (content_blocks, usage) where content_blocks is the list of
    content blocks from the response.
    """
    with client.messages.stream(
        model=MODEL,
        max_tokens=4096,
        system=system,
        messages=messages,
        tools=tools,
    ) as stream:
        # Print tool call names as they arrive
        current_tool = None
        for event in stream:
            if event.type == "content_block_start":
                block = event.content_block
                if block.type == "tool_use":
                    current_tool = block.name
                    # We'll print the full args after the block is complete
            elif event.type == "content_block_stop":
                current_tool = None

        response = stream.get_final_message()

    tracker.record(response.usage)
    return response.content, response.usage


# ---------------------------------------------------------------------------
# Directory discovery
# ---------------------------------------------------------------------------

def _discover_directories(target, show_hidden=False, exclude=None):
    """Walk the target and return all directories sorted leaves-first."""
    extra = set(exclude or [])
    dirs = []
    target_real = os.path.realpath(target)
    for root, subdirs, _files in os.walk(target_real, topdown=True):
        subdirs[:] = [
            d for d in subdirs
            if not _should_skip_dir(d)
            and d not in extra
            and (show_hidden or not d.startswith("."))
        ]
        dirs.append(root)
    dirs.sort(key=lambda d: (-d.count(os.sep), d))
    return dirs


# ---------------------------------------------------------------------------
# Per-directory agent loop
# ---------------------------------------------------------------------------

def _build_dir_context(dir_path):
    lines = []
    try:
        entries = sorted(os.listdir(dir_path))
        for name in entries:
            if name.startswith("."):
                continue
            full = os.path.join(dir_path, name)
            try:
                st = os.stat(full)
                if os.path.isdir(full):
                    lines.append(f"  {name}/  (dir)")
                else:
                    mime = magic.from_file(full, mime=True)
                    lines.append(f"  {name}  ({st.st_size} bytes)  [{mime}]")
            except OSError:
                lines.append(f"  {name}  (stat failed)")
    except OSError:
        lines.append("  (could not list directory)")
    return "Directory contents:\n" + "\n".join(lines) if lines else "(empty)"


def _get_child_summaries(dir_path, cache):
    parts = []
    try:
        for name in sorted(os.listdir(dir_path)):
            child = os.path.join(dir_path, name)
            if not os.path.isdir(child):
                continue
            entry = cache.read_entry("dir", child)
            if entry:
                rel = entry.get("relative_path", name)
                summary = entry.get("summary", "(no summary)")
                parts.append(f"- {rel}/: {summary}")
    except OSError:
        pass
    if parts:
        return "\n".join(parts)
    # Distinguish actual leaves from parents whose children haven't been
    # investigated yet. The old placeholder claimed "leaf directory" even
    # when children existed but were not yet cached, which silently
    # degraded parent context.
    try:
        has_subdirs = any(
            os.path.isdir(os.path.join(dir_path, name))
            for name in os.listdir(dir_path)
            if not name.startswith(".")
        )
    except OSError:
        has_subdirs = False
    if has_subdirs:
        return "(child directories exist but have not been investigated yet)"
    return "(none: this is a leaf directory)"


_SURVEY_CONFIDENCE_THRESHOLD = 0.5
_PROTECTED_DIR_TOOLS = {"submit_report"}

# Survey-skip thresholds. Skip the survey only when BOTH are below.
# See #46 for the plan to revisit these with empirical data.
_SURVEY_MIN_FILES = 5
_SURVEY_MIN_DIRS = 2


def _default_survey():
    """Synthetic survey for targets too small to justify the API call.

    confidence=0.0 ensures _filter_dir_tools() never enforces skip_tools
    based on this synthetic value — the dir loop keeps its full toolbox.
    """
    return {
        "description": "Small target — survey skipped.",
        "approach": (
            "The target is small enough to investigate exhaustively. "
            "Read every file directly."
        ),
        "relevant_tools": [],
        "skip_tools": [],
        "domain_notes": "",
        "confidence": 0.0,
    }


def _format_survey_block(survey):
    """Render survey output as a labeled text block for the dir prompt."""
    if not survey:
        return "(no survey available)"
    lines = [
        f"Description: {survey.get('description', '')}",
        f"Approach: {survey.get('approach', '')}",
    ]
    notes = survey.get("domain_notes", "")
    if notes:
        lines.append(f"Domain notes: {notes}")
    relevant = survey.get("relevant_tools") or []
    if relevant:
        lines.append(f"Relevant tools (lean on these): {', '.join(relevant)}")
    skip = survey.get("skip_tools") or []
    if skip:
        lines.append(f"Skip tools (already removed from your toolbox): "
                     f"{', '.join(skip)}")
    return "\n".join(lines)


def _filter_dir_tools(survey):
    """Return _DIR_TOOLS with skip_tools removed, gated on confidence.

    - Returns full list if survey is None or confidence < threshold.
    - Always preserves control-flow tools in _PROTECTED_DIR_TOOLS.
    - Tool names in skip_tools that don't match anything are silently ignored.
    """
    if not survey:
        return list(_DIR_TOOLS)
    try:
        confidence = float(survey.get("confidence", 0.0) or 0.0)
    except (TypeError, ValueError):
        confidence = 0.0
    if confidence < _SURVEY_CONFIDENCE_THRESHOLD:
        return list(_DIR_TOOLS)
    skip = set(survey.get("skip_tools") or []) - _PROTECTED_DIR_TOOLS
    if not skip:
        return list(_DIR_TOOLS)
    return [t for t in _DIR_TOOLS if t["name"] not in skip]


_DirLoopContext = namedtuple(
    "_DirLoopContext", ["dir_rel", "system", "dir_tools", "messages"],
)


def _build_dir_loop_context(dir_path, target, cache, survey, max_turns):
    """Assemble the static inputs the dir loop needs before its first turn.

    Pure data assembly: reads the cache for child summaries, builds the
    formatted system prompt, filters the tool list, and returns the seed
    user message. No writes.
    """
    dir_rel = os.path.relpath(dir_path, target)
    if dir_rel == ".":
        dir_rel = os.path.basename(target)

    context = _build_dir_context(dir_path)
    child_summaries = _get_child_summaries(dir_path, cache)
    survey_context = _format_survey_block(survey)
    dir_tools = _filter_dir_tools(survey)

    system = _DIR_SYSTEM_PROMPT.format(
        dir_path=dir_path,
        dir_rel=dir_rel,
        max_turns=max_turns,
        context=context,
        child_summaries=child_summaries,
        survey_context=survey_context,
    )

    messages = [
        {
            "role": "user",
            "content": (
                "Investigate this directory now. Use parse_structure for "
                "source files, read_file for others, cache summaries, and "
                "call submit_report. Batch tool calls for efficiency."
            ),
        },
    ]

    return _DirLoopContext(
        dir_rel=dir_rel, system=system, dir_tools=dir_tools, messages=messages,
    )


def _flush_partial_dir_entry(dir_path, target, cache):
    """Write a partial dir cache entry from any already-cached file entries.

    Called when the per-loop context budget is exceeded before the agent
    reaches submit_report. Idempotent: returns "" without writing if a dir
    entry already exists. Returns the partial summary string (empty if no
    file entries were available to synthesize from).
    """
    if cache.has_entry("dir", dir_path):
        return ""

    dir_real = os.path.realpath(dir_path)
    file_entries = [
        e for e in cache.read_all_entries("file")
        if os.path.realpath(e.get("path", "")).startswith(dir_real + os.sep)
        or os.path.dirname(
            os.path.join(target, e.get("relative_path", ""))
        ) == dir_real
    ]

    if file_entries:
        file_summaries = [
            e["summary"] for e in file_entries if e.get("summary")
        ]
        notable = [
            e.get("relative_path", e.get("path", ""))
            for e in file_entries if e.get("notable")
        ]
        partial_summary = " ".join(file_summaries)
        cache.write_entry("dir", dir_path, {
            "path": dir_path,
            "relative_path": os.path.relpath(dir_path, target),
            "child_count": len([
                n for n in os.listdir(dir_path)
                if not n.startswith(".")
            ]) if os.path.isdir(dir_path) else 0,
            "summary": partial_summary,
            "dominant_category": "unknown",
            "notable_files": notable,
            "partial": True,
            "partial_reason": "context budget reached",
            "cached_at": _now_iso(),
        })
        return partial_summary

    cache.write_entry("dir", dir_path, {
        "path": dir_path,
        "relative_path": os.path.relpath(dir_path, target),
        "child_count": 0,
        "summary": ("Investigation incomplete — context budget "
                    "reached before any files were processed."),
        "dominant_category": "unknown",
        "notable_files": [],
        "partial": True,
        "partial_reason": (
            "context budget reached before files processed"),
        "cached_at": _now_iso(),
    })
    return ""


def _handle_turn_response(content_blocks, messages, target, cache, dir_rel,
                          turn, verbose):
    """Process one turn's response: print, append, dispatch tools.

    Mutates `messages` in place: appends the assistant message, then either
    a "please call submit_report" nudge (no tool_uses) or the tool_results
    user message. Recognizes submit_report as the loop's done signal and
    extracts its summary. Returns (done, summary).
    """
    for b in content_blocks:
        if b.type == "text" and b.text.strip():
            for line in b.text.strip().split("\n"):
                print(f"  [AI]   {line}", file=sys.stderr)

    tool_uses = [b for b in content_blocks if b.type == "tool_use"]
    for tu in tool_uses:
        arg_summary = ", ".join(
            f"{k}={v!r}" for k, v in tu.input.items() if k != "data"
        ) if tu.input else ""
        print(f"  [AI]   -> {tu.name}({arg_summary})", file=sys.stderr)

    messages.append({
        "role": "assistant",
        "content": [_block_to_dict(b) for b in content_blocks],
    })

    if not tool_uses:
        messages.append({
            "role": "user",
            "content": "Please call submit_report with your summary.",
        })
        return False, None, None

    tool_results = []
    done = False
    summary = None
    completeness = None
    for tu in tool_uses:
        if tu.name == "submit_report":
            summary = tu.input.get("summary", "")
            try:
                completeness = float(tu.input.get("completeness", 0) or 0)
            except (TypeError, ValueError):
                completeness = None
            tool_results.append({
                "type": "tool_result",
                "tool_use_id": tu.id,
                "content": "Summary submitted.",
            })
            done = True
        else:
            result_text = _execute_tool(
                tu.name, tu.input, target, cache, dir_rel,
                turn + 1, verbose=verbose,
            )
            tool_results.append({
                "type": "tool_result",
                "tool_use_id": tu.id,
                "content": result_text,
            })

    messages.append({"role": "user", "content": tool_results})
    return done, summary, completeness


def _run_dir_loop(client, target, cache, tracker, dir_path, max_turns=14,
                  verbose=False, survey=None):
    """Run an isolated agent loop for a single directory.

    Returns (summary, completeness) where completeness is the agent's
    self-rated investigation thoroughness (0.0-1.0), or None if not reported.
    """
    ctx = _build_dir_loop_context(
        dir_path, target, cache, survey, max_turns,
    )
    summary = None
    completeness = None

    for turn in range(max_turns):
        if tracker.budget_exceeded():
            print(f"  [AI]   Context budget reached — exiting early "
                  f"(context size {tracker.last_input:,} > "
                  f"{CONTEXT_BUDGET:,} budget; "
                  f"loop spend {tracker.loop_total:,} tokens)",
                  file=sys.stderr)
            partial = _flush_partial_dir_entry(dir_path, target, cache)
            if partial and not summary:
                summary = partial
            break

        try:
            content_blocks, _usage = _call_api_streaming(
                client, ctx.system, ctx.messages, ctx.dir_tools, tracker,
            )
        except anthropic.APIError as e:
            print(f"  [AI]   API error: {e}", file=sys.stderr)
            break

        done, turn_summary, turn_completeness = _handle_turn_response(
            content_blocks, ctx.messages, target, cache,
            ctx.dir_rel, turn, verbose,
        )
        if turn_summary is not None:
            summary = turn_summary
        if turn_completeness is not None:
            completeness = turn_completeness
        if done:
            break
    else:
        print(f"  [AI]   Warning: max turns reached for {ctx.dir_rel}",
              file=sys.stderr)

    return summary, completeness


def _block_to_dict(block):
    """Convert an SDK content block to a plain dict for message history."""
    if block.type == "text":
        return {"type": "text", "text": block.text}
    elif block.type == "tool_use":
        return {"type": "tool_use", "id": block.id,
                "name": block.name, "input": block.input}
    return {"type": block.type}


# ---------------------------------------------------------------------------
# Synthesis pass
# ---------------------------------------------------------------------------

def _format_survey_signals(signals):
    """Render the survey_signals dict as a labeled text block."""
    if not signals or not signals.get("total_files"):
        return "(no files classified)"

    lines = [f"Total files: {signals.get('total_files', 0)}", ""]

    ext_hist = signals.get("extension_histogram") or {}
    if ext_hist:
        lines.append("Extensions (top, by count):")
        for ext, n in ext_hist.items():
            lines.append(f"  {ext}: {n}")
        lines.append("")

    descs = signals.get("file_descriptions") or {}
    if descs:
        lines.append("file --brief output (top, by count):")
        for desc, n in descs.items():
            lines.append(f"  {desc}: {n}")
        lines.append("")

    samples = signals.get("filename_samples") or []
    if samples:
        lines.append("Filename samples (evenly drawn):")
        for name in samples:
            lines.append(f"  {name}")

    return "\n".join(lines).rstrip()


def _run_survey(client, target, report, tracker, max_turns=3, verbose=False):
    """Run the reconnaissance survey pass.

    Returns a survey dict on success, or None on failure / out-of-turns.
    Survey is advisory — callers must treat None as "no survey context".
    """
    signals = report.get("survey_signals") or {}
    survey_signals_text = _format_survey_signals(signals)

    try:
        tree_node = build_tree(target, max_depth=2)
        tree_preview = render_tree(tree_node)
    except Exception:
        tree_preview = "(tree unavailable)"

    tool_names = [t["name"] for t in _DIR_TOOLS if t["name"] != "submit_report"]
    available_tools = ", ".join(tool_names)

    system = _SURVEY_SYSTEM_PROMPT.format(
        target=target,
        survey_signals=survey_signals_text,
        tree_preview=tree_preview,
        available_tools=available_tools,
    )

    messages = [
        {
            "role": "user",
            "content": (
                "All inputs are in the system prompt above. Call "
                "submit_survey now — no other tool calls needed."
            ),
        },
    ]

    survey = None

    for turn in range(max_turns):
        try:
            content_blocks, _usage = _call_api_streaming(
                client, system, messages, _SURVEY_TOOLS, tracker,
            )
        except anthropic.APIError as e:
            print(f"  [AI]   API error: {e}", file=sys.stderr)
            return None

        for b in content_blocks:
            if b.type == "text" and b.text.strip():
                for line in b.text.strip().split("\n"):
                    print(f"  [AI]   {line}", file=sys.stderr)

        tool_uses = [b for b in content_blocks if b.type == "tool_use"]
        for tu in tool_uses:
            arg_summary = ", ".join(
                f"{k}={v!r}" for k, v in tu.input.items()
            ) if tu.input else ""
            print(f"  [AI]   -> {tu.name}({arg_summary})", file=sys.stderr)

        messages.append({
            "role": "assistant",
            "content": [_block_to_dict(b) for b in content_blocks],
        })

        if not tool_uses:
            messages.append({
                "role": "user",
                "content": "Please call submit_survey.",
            })
            continue

        tool_results = []
        done = False
        for tu in tool_uses:
            if tu.name == "submit_survey":
                survey = {
                    "description": tu.input.get("description", ""),
                    "approach": tu.input.get("approach", ""),
                    "relevant_tools": tu.input.get("relevant_tools", []) or [],
                    "skip_tools": tu.input.get("skip_tools", []) or [],
                    "domain_notes": tu.input.get("domain_notes", ""),
                    "confidence": float(tu.input.get("confidence", 0.0) or 0.0),
                }
                tool_results.append({
                    "type": "tool_result",
                    "tool_use_id": tu.id,
                    "content": "Survey received. Thank you.",
                })
                done = True
            else:
                tool_results.append({
                    "type": "tool_result",
                    "tool_use_id": tu.id,
                    "content": "Unknown tool. Call submit_survey.",
                    "is_error": True,
                })

        messages.append({"role": "user", "content": tool_results})

        if done:
            break
    else:
        print("  [AI] Warning: survey ran out of turns.", file=sys.stderr)

    return survey


# ---------------------------------------------------------------------------
# Planning pass
# ---------------------------------------------------------------------------

# Turn allocation defaults.
_DEFAULT_TURNS = 10
_SHALLOW_TURNS = 5
_MAX_TURNS_CEILING = 25
_BASE_TURNS_PER_DIR = 10


def _default_plan():
    """Fallback plan when planning is skipped or fails.

    All directories get default turns, leaf-first order, no overrides.
    """
    return {
        "priority_dirs": [],
        "shallow_dirs": [],
        "skip_dirs": [],
        "investigation_order": "leaf-first",
        "notes": "",
    }


def _run_planning(client, target, survey, report, all_dirs, tracker,
                  cached_dirs=None, max_turns=3, verbose=False):
    """Run the planning pass. Returns a plan dict or None on failure.

    The planning pass decides where to invest investigation depth.
    It runs after the survey and before the per-directory loops.
    """
    cached_dirs = cached_dirs or []
    dir_count = len(all_dirs)
    global_budget = _BASE_TURNS_PER_DIR * dir_count

    survey_context = _format_survey_block(survey) if survey else "(no survey available)"

    try:
        tree_node = build_tree(target, max_depth=6)
        tree_text = render_tree(tree_node)
    except Exception:
        tree_text = "(tree unavailable)"

    signals = report.get("survey_signals") or {}
    file_signals = _format_survey_signals(signals)

    cached_rel = []
    for d in cached_dirs:
        cached_rel.append(os.path.relpath(d, target))
    cached_text = ", ".join(cached_rel) if cached_rel else "(none)"

    system = _PLANNING_SYSTEM_PROMPT.format(
        target=target,
        survey_context=survey_context,
        tree_text=tree_text,
        file_signals=file_signals,
        dir_count=dir_count,
        cached_dirs=cached_text,
        default_turns=_DEFAULT_TURNS,
        global_budget=global_budget,
    )

    messages = [
        {
            "role": "user",
            "content": (
                "All inputs are in the system prompt above. Call "
                "submit_plan now."
            ),
        },
    ]

    plan = None

    for turn in range(max_turns):
        try:
            content_blocks, _usage = _call_api_streaming(
                client, system, messages, _PLANNING_TOOLS, tracker,
            )
        except anthropic.APIError as e:
            print(f"  [AI]   API error: {e}", file=sys.stderr)
            return None

        for b in content_blocks:
            if b.type == "text" and b.text.strip():
                for line in b.text.strip().split("\n"):
                    print(f"  [AI]   {line}", file=sys.stderr)

        tool_uses = [b for b in content_blocks if b.type == "tool_use"]
        for tu in tool_uses:
            arg_summary = ", ".join(
                f"{k}={v!r}" for k, v in tu.input.items()
            ) if tu.input else ""
            print(f"  [AI]   -> {tu.name}({arg_summary})", file=sys.stderr)

        messages.append({
            "role": "assistant",
            "content": [_block_to_dict(b) for b in content_blocks],
        })

        if not tool_uses:
            messages.append({
                "role": "user",
                "content": "Please call submit_plan.",
            })
            continue

        tool_results = []
        done = False
        for tu in tool_uses:
            if tu.name == "submit_plan":
                plan = {
                    "priority_dirs": tu.input.get("priority_dirs", []) or [],
                    "shallow_dirs": tu.input.get("shallow_dirs", []) or [],
                    "skip_dirs": tu.input.get("skip_dirs", []) or [],
                    "investigation_order": tu.input.get(
                        "investigation_order", "leaf-first"
                    ),
                    "notes": tu.input.get("notes", ""),
                }
                tool_results.append({
                    "type": "tool_result",
                    "tool_use_id": tu.id,
                    "content": "Plan received. Thank you.",
                })
                done = True
            else:
                tool_results.append({
                    "type": "tool_result",
                    "tool_use_id": tu.id,
                    "content": "Unknown tool. Call submit_plan.",
                    "is_error": True,
                })

        messages.append({"role": "user", "content": tool_results})

        if done:
            break
    else:
        print("  [AI] Warning: planning ran out of turns.", file=sys.stderr)

    return plan


def _apply_plan(all_dirs, to_investigate, plan, target):
    """Apply the plan to produce an ordered dir list and turn map.

    Returns (ordered_dirs, turn_map) where:
      - ordered_dirs: list of absolute dir paths in investigation order
      - turn_map: dict of {abs_dir_path: max_turns}

    Pure function: no I/O, no cache, no API calls.
    """
    if plan is None:
        return list(to_investigate), {}

    # Build lookup from relative path to absolute path.
    # The target root maps to "." via relpath, but the planner sees
    # basename(target) in the tree output and uses that as the path.
    # Register both so either form matches (#76).
    rel_to_abs = {}
    for d in all_dirs:
        rel = os.path.relpath(d, target)
        rel_to_abs[rel] = d
        if rel == ".":
            rel_to_abs[os.path.basename(d)] = d

    # Classify directories by tier.
    skip_set = set()
    priority_set = set()
    shallow_set = set()
    turn_map = {}
    unmatched = []

    for entry in plan.get("skip_dirs", []):
        rel = entry.get("path", "")
        if rel in rel_to_abs:
            skip_set.add(rel_to_abs[rel])
        else:
            unmatched.append(rel)

    for entry in plan.get("priority_dirs", []):
        rel = entry.get("path", "")
        suggested = entry.get("suggested_turns", 15)
        capped = min(suggested, _MAX_TURNS_CEILING)
        if rel in rel_to_abs:
            abs_path = rel_to_abs[rel]
            priority_set.add(abs_path)
            turn_map[abs_path] = capped
        else:
            unmatched.append(rel)

    for entry in plan.get("shallow_dirs", []):
        rel = entry.get("path", "")
        if rel in rel_to_abs:
            abs_path = rel_to_abs[rel]
            shallow_set.add(abs_path)
            turn_map[abs_path] = _SHALLOW_TURNS
        else:
            unmatched.append(rel)

    if unmatched:
        print(
            f"  [AI] Warning: plan referenced unknown dirs: "
            f"{', '.join(unmatched)}",
            file=sys.stderr,
        )

    # Remove skipped dirs from the investigation list.
    remaining = [d for d in to_investigate if d not in skip_set]

    # Order by bands. Both strategies preserve leaf-first within bands.
    order = plan.get("investigation_order", "leaf-first")

    if order == "priority-first":
        priority_band = [d for d in remaining if d in priority_set]
        shallow_band = [d for d in remaining if d in shallow_set]
        default_band = [
            d for d in remaining
            if d not in priority_set and d not in shallow_set
        ]
        ordered = priority_band + default_band + shallow_band
    else:
        # leaf-first: keep the original order (already leaf-first from
        # _discover_directories), just remove skipped dirs.
        ordered = remaining

    return ordered, turn_map


def _write_plan_evaluation(cache, plan, turn_utilization):
    """Write plan_evaluation.json comparing plan predictions to actual results.

    This is the planning pass's report card: did we allocate turns well?
    """
    # Build a lookup of what the plan predicted per dir.
    predicted = {}
    for entry in (plan or {}).get("priority_dirs", []):
        predicted[entry["path"]] = {
            "tier": "priority",
            "suggested_turns": entry.get("suggested_turns", 15),
        }
    for entry in (plan or {}).get("shallow_dirs", []):
        predicted[entry["path"]] = {
            "tier": "shallow",
            "suggested_turns": _SHALLOW_TURNS,
        }
    for entry in (plan or {}).get("skip_dirs", []):
        predicted[entry["path"]] = {
            "tier": "skip",
            "suggested_turns": 0,
        }

    # Compare predictions to actual turn utilization.
    per_dir = []
    total_allocated = 0
    total_used = 0
    for record in turn_utilization:
        dir_rel = record["dir"]
        allocated = record["turns_allocated"]
        used = record["turns_used"]
        total_allocated += allocated
        total_used += used

        pred = predicted.get(dir_rel, {})
        entry = {
            "dir": dir_rel,
            "planned_tier": pred.get("tier", "default"),
            "turns_allocated": allocated,
            "turns_used": used,
            "utilization": round(used / allocated, 2) if allocated else 0,
        }

        # Include completeness from turn utilization record (#74).
        record_completeness = record.get("completeness")
        if record_completeness is not None:
            entry["completeness"] = record_completeness

        # Read confidence from the cached dir entry if available.
        dir_entry = cache.read_entry("dir", os.path.join(
            cache.target, dir_rel,
        ))
        if dir_entry:
            entry["confidence"] = dir_entry.get("confidence")

        per_dir.append(entry)

    evaluation = {
        "plan_order": (plan or {}).get("investigation_order", "leaf-first"),
        "total_dirs_investigated": len(turn_utilization),
        "total_turns_allocated": total_allocated,
        "total_turns_used": total_used,
        "overall_utilization": (
            round(total_used / total_allocated, 2) if total_allocated else 0
        ),
        "per_directory": per_dir,
        "evaluated_at": _now_iso(),
    }

    try:
        eval_path = os.path.join(cache.root, "plan_evaluation.json")
        with open(eval_path, "w") as f:
            json.dump(evaluation, f, indent=2)
        print(
            f"  [AI] Plan evaluation: {total_used}/{total_allocated} turns used "
            f"({evaluation['overall_utilization']:.0%} utilization)",
            file=sys.stderr,
        )
    except OSError:
        pass


def _run_synthesis(client, target, cache, tracker, max_turns=5, verbose=False):
    """Run the final synthesis pass. Returns (brief, detailed)."""
    dir_entries = cache.read_all_entries("dir")

    summary_lines = []
    for entry in dir_entries:
        rel = entry.get("relative_path", "?")
        summary = entry.get("summary", "(no summary)")
        dominant = entry.get("dominant_category", "?")
        notable = entry.get("notable_files", [])
        summary_lines.append(f"### {rel}/")
        summary_lines.append(f"Category: {dominant}")
        summary_lines.append(f"Summary: {summary}")
        if notable:
            summary_lines.append(f"Notable files: {', '.join(notable)}")
        summary_lines.append("")

    summaries_text = "\n".join(summary_lines) if summary_lines else "(none)"

    system = _SYNTHESIS_SYSTEM_PROMPT.format(
        target=target,
        summaries_text=summaries_text,
    )

    messages = [
        {
            "role": "user",
            "content": (
                "All directory summaries are in the system prompt above. "
                "Synthesize them into a cohesive report and call "
                "submit_report immediately — no other tool calls needed."
            ),
        },
    ]

    brief, detailed = "", ""

    for turn in range(max_turns):
        try:
            content_blocks, usage = _call_api_streaming(
                client, system, messages, _SYNTHESIS_TOOLS, tracker,
            )
        except anthropic.APIError as e:
            print(f"  [AI]   API error: {e}", file=sys.stderr)
            break

        # Print text blocks to stderr
        for b in content_blocks:
            if b.type == "text" and b.text.strip():
                for line in b.text.strip().split("\n"):
                    print(f"  [AI]   {line}", file=sys.stderr)

        tool_uses = [b for b in content_blocks if b.type == "tool_use"]
        for tu in tool_uses:
            arg_summary = ", ".join(
                f"{k}={v!r}" for k, v in tu.input.items() if k != "data"
            ) if tu.input else ""
            print(f"  [AI]   -> {tu.name}({arg_summary})", file=sys.stderr)

        messages.append({
            "role": "assistant",
            "content": [_block_to_dict(b) for b in content_blocks],
        })

        if not tool_uses:
            messages.append({
                "role": "user",
                "content": "Please call submit_report with your analysis.",
            })
            continue

        tool_results = []
        done = False
        for tu in tool_uses:
            if tu.name == "submit_report":
                brief = tu.input.get("brief", "")
                detailed = tu.input.get("detailed", "")
                tool_results.append({
                    "type": "tool_result",
                    "tool_use_id": tu.id,
                    "content": "Report submitted. Thank you.",
                })
                done = True
            else:
                result_text = _execute_tool(
                    tu.name, tu.input, target, cache, "(synthesis)",
                    turn + 1, verbose=verbose,
                )
                tool_results.append({
                    "type": "tool_result",
                    "tool_use_id": tu.id,
                    "content": result_text,
                })

        messages.append({"role": "user", "content": tool_results})

        if done:
            break
    else:
        print("  [AI] Warning: synthesis ran out of turns.", file=sys.stderr)
        brief, detailed = _synthesize_from_cache(cache)

    return brief, detailed


def _synthesize_from_cache(cache):
    """Build a best-effort report from cached directory summaries."""
    dir_entries = cache.read_all_entries("dir")
    if not dir_entries:
        return ("(AI analysis incomplete — no data was cached)", "")

    brief_parts = []
    detail_parts = []
    for entry in dir_entries:
        rel = entry.get("relative_path", "?")
        summary = entry.get("summary", "")
        if summary:
            detail_parts.append(f"**{rel}/**: {summary}")
            brief_parts.append(summary)

    brief = brief_parts[0] if brief_parts else "(AI analysis incomplete)"
    detailed = "\n\n".join(detail_parts) if detail_parts else ""
    return brief, detailed


# ---------------------------------------------------------------------------
# Main orchestrator
# ---------------------------------------------------------------------------

def _run_investigation(client, target, report, show_hidden=False,
                       fresh=False, verbose=False, exclude=None):
    """Orchestrate the multi-pass investigation. Returns (brief, detailed, flags)."""
    investigation_id, is_new = _get_investigation_id(target, fresh=fresh)
    cache = _CacheManager(investigation_id, target)
    tracker = _TokenTracker()

    if is_new:
        cache.write_meta(MODEL, _now_iso())

    print(f"  [AI] Investigation ID: {investigation_id}"
          f"{'' if is_new else ' (resumed)'}", file=sys.stderr)
    print(f"  [AI] Cache: {cache.root}/", file=sys.stderr)

    all_dirs = _discover_directories(target, show_hidden=show_hidden,
                                     exclude=exclude)

    total_files = sum((report.get("file_categories") or {}).values())
    total_dirs = len(all_dirs)
    if total_files < _SURVEY_MIN_FILES and total_dirs < _SURVEY_MIN_DIRS:
        print(
            f"  [AI] Survey skipped — {total_files} files, {total_dirs} dirs "
            f"(below threshold).",
            file=sys.stderr,
        )
        survey = _default_survey()
    else:
        print("  [AI] Survey pass...", file=sys.stderr)
        survey = _run_survey(client, target, report, tracker, verbose=verbose)
    if survey:
        print(
            f"  [AI] Survey: {survey['description']} "
            f"(confidence {survey['confidence']:.2f})",
            file=sys.stderr,
        )
        if survey.get("domain_notes"):
            print(f"  [AI] Survey notes: {survey['domain_notes']}", file=sys.stderr)
        if survey.get("relevant_tools"):
            print(
                f"  [AI] Survey relevant_tools: {', '.join(survey['relevant_tools'])}",
                file=sys.stderr,
            )
        if survey.get("skip_tools"):
            print(
                f"  [AI] Survey skip_tools: {', '.join(survey['skip_tools'])}",
                file=sys.stderr,
            )
    else:
        print("  [AI] Survey unavailable — proceeding without it.", file=sys.stderr)

    to_investigate = []
    cached_dirs = []
    for d in all_dirs:
        if cache.has_entry("dir", d):
            cached_dirs.append(d)
            rel = os.path.relpath(d, target)
            print(f"  [AI] Skipping (cached): {rel}/", file=sys.stderr)
        else:
            to_investigate.append(d)

    cached_count = len(cached_dirs)
    if cached_count:
        print(f"  [AI] Directories cached: {cached_count}", file=sys.stderr)
    print(f"  [AI] Directories to investigate: {len(to_investigate)}",
          file=sys.stderr)

    # Planning pass: decide where to invest depth.
    if total_files < _SURVEY_MIN_FILES and total_dirs < _SURVEY_MIN_DIRS:
        print("  [AI] Planning skipped (small target).", file=sys.stderr)
        plan = _default_plan()
    else:
        plan_path = os.path.join(cache.root, "plan.json")
        if not fresh and os.path.exists(plan_path):
            try:
                with open(plan_path) as f:
                    plan = json.load(f)
                print("  [AI] Plan loaded from cache.", file=sys.stderr)
            except (OSError, json.JSONDecodeError):
                plan = None
        else:
            plan = None

        if plan is None:
            print("  [AI] Planning pass...", file=sys.stderr)
            plan = _run_planning(
                client, target, survey, report, all_dirs, tracker,
                cached_dirs=cached_dirs, verbose=verbose,
            )
            if plan is None:
                print("  [AI] Planning failed, using defaults.",
                      file=sys.stderr)
                plan = _default_plan()
            else:
                # Save plan to cache (#11).
                try:
                    with open(os.path.join(cache.root, "plan.json"), "w") as f:
                        json.dump(plan, f, indent=2)
                except OSError:
                    pass

    ordered, turn_map = _apply_plan(all_dirs, to_investigate, plan, target)

    # Log plan summary.
    skip_count = len(to_investigate) - len(ordered)
    priority_count = sum(
        1 for d in ordered if turn_map.get(d, _DEFAULT_TURNS) > _DEFAULT_TURNS
    )
    if skip_count or priority_count:
        print(
            f"  [AI] Plan: {priority_count} priority, "
            f"{skip_count} skipped, "
            f"{len(ordered) - priority_count} default/shallow",
            file=sys.stderr,
        )
    if plan.get("notes"):
        print(f"  [AI] Plan notes: {plan['notes']}", file=sys.stderr)

    total = len(ordered)
    turn_utilization = []

    for i, dir_path in enumerate(ordered, 1):
        dir_rel = os.path.relpath(dir_path, target)
        if dir_rel == ".":
            dir_rel = os.path.basename(target)
        max_turns = turn_map.get(dir_path, _DEFAULT_TURNS)
        print(
            f"  [AI] Investigating: {dir_rel}/ ({i}/{total}, "
            f"{max_turns} turns)",
            file=sys.stderr,
        )

        tracker.reset_loop()
        summary, completeness = _run_dir_loop(
            client, target, cache, tracker, dir_path,
            max_turns=max_turns, verbose=verbose, survey=survey,
        )

        # Track turn utilization for quality metrics (#74).
        turns_used = tracker._loop_turns
        turn_utilization.append({
            "dir": dir_rel,
            "turns_allocated": max_turns,
            "turns_used": turns_used,
            "completeness": completeness,
        })

        if summary and not cache.has_entry("dir", dir_path):
            entry = {
                "path": dir_path,
                "relative_path": os.path.relpath(dir_path, target),
                "child_count": len([
                    n for n in os.listdir(dir_path)
                    if not n.startswith(".")
                ]) if os.path.isdir(dir_path) else 0,
                "summary": summary,
                "dominant_category": "unknown",
                "notable_files": [],
                "cached_at": _now_iso(),
            }
            if completeness is not None:
                entry["completeness"] = completeness
            cache.write_entry("dir", dir_path, entry)

    cache.update_meta(
        directories_investigated=total + cached_count,
        end_time=_now_iso(),
    )

    # Emit plan evaluation (#74).
    _write_plan_evaluation(cache, plan, turn_utilization)

    print("  [AI] Synthesis pass...", file=sys.stderr)
    brief, detailed = _run_synthesis(
        client, target, cache, tracker, verbose=verbose,
    )

    # Read flags from flags.jsonl
    flags = []
    flags_path = os.path.join(cache.root, "flags.jsonl")
    try:
        with open(flags_path) as f:
            for line in f:
                line = line.strip()
                if line:
                    flags.append(json.loads(line))
    except (OSError, json.JSONDecodeError):
        pass

    print(f"  [AI] Total tokens used: {tracker.summary()}", file=sys.stderr)

    return brief, detailed, flags


# ---------------------------------------------------------------------------
# Public interface
# ---------------------------------------------------------------------------

def analyze_directory(report, target, verbose_tools=False, fresh=False,
                      exclude=None):
    """Run AI analysis on the directory. Returns (brief, detailed, flags).

    Returns ("", "", []) if the API key is missing.
    """
    api_key = _get_api_key()
    if not api_key:
        return "", "", []

    print("  [AI] Starting multi-pass investigation...", file=sys.stderr)

    client = anthropic.Anthropic(api_key=api_key)

    try:
        brief, detailed, flags = _run_investigation(
            client, target, report, fresh=fresh, verbose=verbose_tools,
            exclude=exclude,
        )
    except Exception as e:
        print(f"Warning: AI analysis failed: {e}", file=sys.stderr)
        return "", "", []

    if not brief and not detailed:
        print("  [AI] Warning: agent produced no output.", file=sys.stderr)

    print("  [AI] Investigation complete.", file=sys.stderr)
    return brief, detailed, flags
-												refactor: replace single-shot API with multi-pass agentic investigation

Rewrites ai.py from a single Claude API call into a multi-pass,
cache-driven agent architecture:

- Per-directory isolated agent loops (max 10 turns each) with context
  discarded between directories
- Leaves-first processing order so child summaries inform parents
- Disk cache (/tmp/luminos/{uuid}/) persists across runs for resumability
- Investigation ID persistence keyed by target realpath
- Separate synthesis pass reads only directory-level cache entries
- Replaces urllib with Anthropic SDK (streaming, automatic retries)
- Token counting with 70% context budget threshold for early exit
- parse_structure tool via tree-sitter (Python, JS, Rust, Go)
- python-magic integration for MIME-aware directory listings
- Cost tracking printed at end of investigation

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>

											
										
										
											2026-03-30 12:13:55 -06:00
+								"""AI-powered directory analysis using a multi-pass, cache-driven agent loop.
-												feat: add AI-powered directory analysis via Claude API

Adds --ai flag that sends the directory tree, file categories, and
sampled file contents to Claude for analysis. Produces a brief
summary at the top of the report and a detailed breakdown at the
end. Requires ANTHROPIC_API_KEY env var; degrades gracefully without it.
Uses only stdlib (urllib) to keep the zero-dependency constraint.

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>

											
										
										
											2026-03-30 10:03:48 -06:00
-												refactor: replace single-shot API with multi-pass agentic investigation

Rewrites ai.py from a single Claude API call into a multi-pass,
cache-driven agent architecture:

- Per-directory isolated agent loops (max 10 turns each) with context
  discarded between directories
- Leaves-first processing order so child summaries inform parents
- Disk cache (/tmp/luminos/{uuid}/) persists across runs for resumability
- Investigation ID persistence keyed by target realpath
- Separate synthesis pass reads only directory-level cache entries
- Replaces urllib with Anthropic SDK (streaming, automatic retries)
- Token counting with 70% context budget threshold for early exit
- parse_structure tool via tree-sitter (Python, JS, Rust, Go)
- python-magic integration for MIME-aware directory listings
- Cost tracking printed at end of investigation

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>

											
										
										
											2026-03-30 12:13:55 -06:00
+								Architecture:
 . Discover all directories under the target
 . Sort leaves-first (deepest directories first)
 . Run an isolated agent loop per directory (max 10 turns each)
 . Cache every file and directory summary to disk
 . Run a final synthesis pass reading only directory cache entries
 								Uses the Anthropic SDK for streaming, automatic retries, and token counting.
 								Uses tree-sitter for AST parsing and python-magic for file classification.
 								"""
-												feat: add AI-powered directory analysis via Claude API

Adds --ai flag that sends the directory tree, file categories, and
sampled file contents to Claude for analysis. Produces a brief
summary at the top of the report and a detailed breakdown at the
end. Requires ANTHROPIC_API_KEY env var; degrades gracefully without it.
Uses only stdlib (urllib) to keep the zero-dependency constraint.

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>

											
										
										
											2026-03-30 10:03:48 -06:00
+								import json
 								import os
-												refactor: replace single-shot API with multi-pass agentic investigation

Rewrites ai.py from a single Claude API call into a multi-pass,
cache-driven agent architecture:

- Per-directory isolated agent loops (max 10 turns each) with context
  discarded between directories
- Leaves-first processing order so child summaries inform parents
- Disk cache (/tmp/luminos/{uuid}/) persists across runs for resumability
- Investigation ID persistence keyed by target realpath
- Separate synthesis pass reads only directory-level cache entries
- Replaces urllib with Anthropic SDK (streaming, automatic retries)
- Token counting with 70% context budget threshold for early exit
- parse_structure tool via tree-sitter (Python, JS, Rust, Go)
- python-magic integration for MIME-aware directory listings
- Cost tracking printed at end of investigation

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>

											
										
										
											2026-03-30 12:13:55 -06:00
+								import subprocess
-												feat: add AI-powered directory analysis via Claude API

Adds --ai flag that sends the directory tree, file categories, and
sampled file contents to Claude for analysis. Produces a brief
summary at the top of the report and a detailed breakdown at the
end. Requires ANTHROPIC_API_KEY env var; degrades gracefully without it.
Uses only stdlib (urllib) to keep the zero-dependency constraint.

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>

											
										
										
											2026-03-30 10:03:48 -06:00
+								import sys
-												refactor(ai): extract _run_dir_loop into three focused helpers (#57)

_run_dir_loop was ~160 lines holding four conceptual layers in one
function: pre-loop setup, budget check + partial-flush, API call +
response printing, and tool dispatch + done detection. Phase 3 dynamic
turn allocation will inject more state into the same code path, so
this debt is paid before that lands.

Three new helpers above _run_dir_loop:

- _build_dir_loop_context(): pure setup. Builds the dir context, child
  summaries, survey block, filtered tool list, system prompt, and seed
  user message. Returns a _DirLoopContext namedtuple.
- _flush_partial_dir_entry(): idempotent partial-cache writer for the
  budget-exceeded path. Returns the partial summary string. Idempotent
  via cache.has_entry() guard, so callers can call it without checking.
- _handle_turn_response(): per-turn response processing. Prints text
  blocks and tool decisions, appends the assistant message, dispatches
  tools (or nudges the agent to call submit_report), appends
  tool_results. Returns (done, summary).

_run_dir_loop is now a ~25-line coordinator: build context, then
for-loop calls budget check, API, and turn handler in sequence.

No behavior change. 164 tests pass. Internals.md §4 updated for the
new structure and the file:line refs that drifted.

											
										
										
											2026-04-11 10:02:21 -06:00
+								from collections import namedtuple
-												refactor: replace single-shot API with multi-pass agentic investigation

Rewrites ai.py from a single Claude API call into a multi-pass,
cache-driven agent architecture:

- Per-directory isolated agent loops (max 10 turns each) with context
  discarded between directories
- Leaves-first processing order so child summaries inform parents
- Disk cache (/tmp/luminos/{uuid}/) persists across runs for resumability
- Investigation ID persistence keyed by target realpath
- Separate synthesis pass reads only directory-level cache entries
- Replaces urllib with Anthropic SDK (streaming, automatic retries)
- Token counting with 70% context budget threshold for early exit
- parse_structure tool via tree-sitter (Python, JS, Rust, Go)
- python-magic integration for MIME-aware directory listings
- Cost tracking printed at end of investigation

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>

											
										
										
											2026-03-30 12:13:55 -06:00
+								from datetime import datetime, timezone
 								import anthropic
 								import magic
-												refactor: extract AST parsing into luminos_lib/ast_parser.py

Moves all tree-sitter parsing logic from ai.py into a dedicated module.
Replaces the if/elif language chain with a _LANGUAGE_HANDLERS registry
mapping language names to handler functions.

Extracted: _tool_parse_structure body, _get_ts_parser, _child_by_type,
_text, and all per-language helpers (_py_func_sig, _py_class, etc.).
ai.py retains a thin wrapper for path validation.

Public API: parse_structure(path) -> JSON string

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>

											
										
										
											2026-03-30 14:34:02 -06:00
+								from luminos_lib.ast_parser import parse_structure
-												chore: remove dead clear_cache from ai.py, deduplicate CACHE_ROOT

- Delete unused clear_cache() from ai.py (luminos.py imports it from
  capabilities.py)
- Remove CACHE_ROOT import from ai.py (was only used by dead function)
- Replace local CACHE_ROOT constant in capabilities.py with import
  from cache.py (single source of truth)

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>

											
										
										
											2026-03-30 13:52:38 -06:00
+								from luminos_lib.cache import _CacheManager, _get_investigation_id
-												feat(ai): add _run_survey() and submit_survey tool (#5)

Adds the reconnaissance survey pass: a fast, ≤3-turn LLM call that
characterizes the target before any directory investigation begins.
The survey receives the file-type distribution (from the base scan),
a top-2-level tree preview, and the list of available dir-loop tools,
and returns description / approach / relevant_tools / skip_tools /
domain_notes / confidence via a single submit_survey tool call.

Wired into _run_investigation() before the directory loop. Output is
logged but not yet consumed — that wiring is #6. Survey failure is
non-fatal: if the call errors or runs out of turns, the investigation
proceeds without survey context.

Also adds a Band-Aid to _SURVEY_SYSTEM_PROMPT warning the LLM that
the file-type histogram is biased toward source code (the underlying
classifier has no concept of mail, notebooks, ledgers, etc.) and to
trust the tree preview when they conflict. The proper fix is #42.

											
										
										
											2026-04-06 21:49:59 -06:00
+								from luminos_lib.prompts import (
 								    _DIR_SYSTEM_PROMPT,
-												feat(ai): implement Phase 3 investigation planning (#8, #9, #10, #11, #74)

Add a planning pass that runs after survey and before dir loops. The
planner classifies directories into priority/shallow/skip tiers and
allocates turns accordingly, replacing the fixed max_turns=14 per
directory with dynamic allocation from a global budget.

Planning pass:
- _PLANNING_SYSTEM_PROMPT in prompts.py with submit_plan tool
- _run_planning() follows the same single-turn pattern as _run_survey()
- submit_plan tool registered in new "planning" scope
- _apply_plan() pure function: band-sorted ordering (leaf-first within
  bands), turn map, skip-dir removal
- _default_plan() fallback when planning is skipped or fails
- Plan cached as plan.json for resumed runs

Dynamic turn allocation:
- Priority dirs: 15-20 turns (capped at 25)
- Shallow dirs: 5 turns
- Default: 10 turns
- Skip dirs: excluded entirely
- Orchestrator passes per-dir max_turns to _run_dir_loop()

Quality instrumentation:
- _TokenTracker._loop_turns counts API calls per dir loop
- completeness field (0.0-1.0) added to dir-scope submit_report
- plan_evaluation.json emitted after dir loops comparing plan predictions
  to actual turn utilization, completeness, and confidence
- Turn utilization logged per directory during investigation

Also fixes _get_child_summaries() to distinguish actual leaf directories
from parents whose children have not been investigated yet, replacing
the misleading "this is a leaf directory" placeholder.

26 new tests (260 total, all passing).

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>

											
										
										
											2026-04-12 20:21:49 -06:00
+								    _PLANNING_SYSTEM_PROMPT,
-												feat(ai): add _run_survey() and submit_survey tool (#5)

Adds the reconnaissance survey pass: a fast, ≤3-turn LLM call that
characterizes the target before any directory investigation begins.
The survey receives the file-type distribution (from the base scan),
a top-2-level tree preview, and the list of available dir-loop tools,
and returns description / approach / relevant_tools / skip_tools /
domain_notes / confidence via a single submit_survey tool call.

Wired into _run_investigation() before the directory loop. Output is
logged but not yet consumed — that wiring is #6. Survey failure is
non-fatal: if the call errors or runs out of turns, the investigation
proceeds without survey context.

Also adds a Band-Aid to _SURVEY_SYSTEM_PROMPT warning the LLM that
the file-type histogram is biased toward source code (the underlying
classifier has no concept of mail, notebooks, ledgers, etc.) and to
trust the tree preview when they conflict. The proper fix is #42.

											
										
										
											2026-04-06 21:49:59 -06:00
+								    _SURVEY_SYSTEM_PROMPT,
 								    _SYNTHESIS_SYSTEM_PROMPT,
 								)
 								from luminos_lib.tree import build_tree, render_tree
-												feat: add AI-powered directory analysis via Claude API

Adds --ai flag that sends the directory tree, file categories, and
sampled file contents to Claude for analysis. Produces a brief
summary at the top of the report and a detailed breakdown at the
end. Requires ANTHROPIC_API_KEY env var; degrades gracefully without it.
Uses only stdlib (urllib) to keep the zero-dependency constraint.

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>

											
										
										
											2026-03-30 10:03:48 -06:00
 								MODEL = "claude-sonnet-4-20250514"
-												refactor: replace single-shot API with multi-pass agentic investigation

Rewrites ai.py from a single Claude API call into a multi-pass,
cache-driven agent architecture:

- Per-directory isolated agent loops (max 10 turns each) with context
  discarded between directories
- Leaves-first processing order so child summaries inform parents
- Disk cache (/tmp/luminos/{uuid}/) persists across runs for resumability
- Investigation ID persistence keyed by target realpath
- Separate synthesis pass reads only directory-level cache entries
- Replaces urllib with Anthropic SDK (streaming, automatic retries)
- Token counting with 70% context budget threshold for early exit
- parse_structure tool via tree-sitter (Python, JS, Rust, Go)
- python-magic integration for MIME-aware directory listings
- Cost tracking printed at end of investigation

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>

											
										
										
											2026-03-30 12:13:55 -06:00
-												fix(ai): correct context budget metric — track per-call, not sum (#44)

The dir loop was exiting early on small targets (a 13-file Python lib
hit the budget at 92k–139k cumulative tokens) because _TokenTracker
compared the SUM of input_tokens across all turns to the context
window size. input_tokens from each API response is the size of the
full prompt sent on that turn (system + every prior message + new
tool results), so summing across turns multi-counts everything. The
real per-call context size never approached the limit.

Verified empirically: on luminos_lib pre-fix, the loop bailed when
the most recent call's input_tokens was 20,535 (~10% of Sonnet's
200k window) but the cumulative sum was 134,983.

Changes:
- _TokenTracker now tracks last_input (the most recent call's
  input_tokens), separate from the cumulative loop_input/total_input
  used for cost reporting.
- budget_exceeded() returns last_input > CONTEXT_BUDGET, not the
  cumulative sum.
- MAX_CONTEXT bumped from 180_000 to 200_000 (Sonnet 4's real
  context window). CONTEXT_BUDGET stays at 70% = 140,000.
- Early-exit message now shows context size, threshold, AND
  cumulative spend separately so future debugging is unambiguous.

Smoke test on luminos_lib: investigation completes without early
exit (~$0.37). 6 unit tests added covering the new semantics,
including the key regression: a sequence of small calls whose sum
exceeds the budget must NOT trip the check.

Wiki Architecture page updated.

#51 filed for the separate message-history-growth issue.

											
										
										
											2026-04-06 22:49:25 -06:00
+								# Context budget: trigger early exit when a single API call's input_tokens
 								# (the actual size of the context window in use, NOT the cumulative sum
 								# across turns) approaches the model's real context limit. Sonnet 4 has
 								# a 200k context window; we leave a 30% safety margin for the response
 								# and any tool result we're about to append.
 								MAX_CONTEXT = 200_000
-												refactor: replace single-shot API with multi-pass agentic investigation

Rewrites ai.py from a single Claude API call into a multi-pass,
cache-driven agent architecture:

- Per-directory isolated agent loops (max 10 turns each) with context
  discarded between directories
- Leaves-first processing order so child summaries inform parents
- Disk cache (/tmp/luminos/{uuid}/) persists across runs for resumability
- Investigation ID persistence keyed by target realpath
- Separate synthesis pass reads only directory-level cache entries
- Replaces urllib with Anthropic SDK (streaming, automatic retries)
- Token counting with 70% context budget threshold for early exit
- parse_structure tool via tree-sitter (Python, JS, Rust, Go)
- python-magic integration for MIME-aware directory listings
- Cost tracking printed at end of investigation

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>

											
										
										
											2026-03-30 12:13:55 -06:00
+								CONTEXT_BUDGET = int(MAX_CONTEXT * 0.70)
 								# Pricing per 1M tokens (Claude Sonnet).
 								INPUT_PRICE_PER_M = 3.00
 								OUTPUT_PRICE_PER_M = 15.00
 								# Directories to always skip during investigation.
 								_SKIP_DIRS = {
 								    ".git", "__pycache__", "node_modules", ".tox", ".mypy_cache",
 								    ".pytest_cache", ".venv", "venv", ".env", "dist", "build",
 								    ".eggs", "*.egg-info", ".svn", ".hg",
 								}
 								# Commands the run_command tool is allowed to execute.
 								_COMMAND_WHITELIST = {"wc", "file", "grep", "head", "tail", "stat", "du", "find"}
-												feat: add AI-powered directory analysis via Claude API

Adds --ai flag that sends the directory tree, file categories, and
sampled file contents to Claude for analysis. Produces a brief
summary at the top of the report and a detailed breakdown at the
end. Requires ANTHROPIC_API_KEY env var; degrades gracefully without it.
Uses only stdlib (urllib) to keep the zero-dependency constraint.

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>

											
										
										
											2026-03-30 10:03:48 -06:00
-												refactor: replace single-shot API with multi-pass agentic investigation

Rewrites ai.py from a single Claude API call into a multi-pass,
cache-driven agent architecture:

- Per-directory isolated agent loops (max 10 turns each) with context
  discarded between directories
- Leaves-first processing order so child summaries inform parents
- Disk cache (/tmp/luminos/{uuid}/) persists across runs for resumability
- Investigation ID persistence keyed by target realpath
- Separate synthesis pass reads only directory-level cache entries
- Replaces urllib with Anthropic SDK (streaming, automatic retries)
- Token counting with 70% context budget threshold for early exit
- parse_structure tool via tree-sitter (Python, JS, Rust, Go)
- python-magic integration for MIME-aware directory listings
- Cost tracking printed at end of investigation

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>

											
										
										
											2026-03-30 12:13:55 -06:00
+								# ---------------------------------------------------------------------------
 								# Helpers
 								# ---------------------------------------------------------------------------
-												feat: add AI-powered directory analysis via Claude API

Adds --ai flag that sends the directory tree, file categories, and
sampled file contents to Claude for analysis. Produces a brief
summary at the top of the report and a detailed breakdown at the
end. Requires ANTHROPIC_API_KEY env var; degrades gracefully without it.
Uses only stdlib (urllib) to keep the zero-dependency constraint.

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>

											
										
										
											2026-03-30 10:03:48 -06:00
 								def _get_api_key():
 								    """Read the Anthropic API key from the environment."""
 								    key = os.environ.get("ANTHROPIC_API_KEY", "")
 								    if not key:
 								        print("Warning: ANTHROPIC_API_KEY not set. Skipping AI analysis.",
 								              file=sys.stderr)
 								    return key
-												refactor: replace single-shot API with multi-pass agentic investigation

Rewrites ai.py from a single Claude API call into a multi-pass,
cache-driven agent architecture:

- Per-directory isolated agent loops (max 10 turns each) with context
  discarded between directories
- Leaves-first processing order so child summaries inform parents
- Disk cache (/tmp/luminos/{uuid}/) persists across runs for resumability
- Investigation ID persistence keyed by target realpath
- Separate synthesis pass reads only directory-level cache entries
- Replaces urllib with Anthropic SDK (streaming, automatic retries)
- Token counting with 70% context budget threshold for early exit
- parse_structure tool via tree-sitter (Python, JS, Rust, Go)
- python-magic integration for MIME-aware directory listings
- Cost tracking printed at end of investigation

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>

											
										
										
											2026-03-30 12:13:55 -06:00
+								def _path_is_safe(path, target):
 								    """Return True if *path* resolves to somewhere inside *target*."""
 								    real = os.path.realpath(path)
 								    target_real = os.path.realpath(target)
 								    return real == target_real or real.startswith(target_real + os.sep)
 								def _now_iso():
 								    return datetime.now(timezone.utc).isoformat()
 								def _should_skip_dir(name):
 								    """Return True if a directory name matches the skip list."""
 								    if name in _SKIP_DIRS:
 								        return True
 								    for pattern in _SKIP_DIRS:
 								        if pattern.startswith("*") and name.endswith(pattern[1:]):
 								            return True
 								    return False
 								# ---------------------------------------------------------------------------
 								# Token tracker
 								# ---------------------------------------------------------------------------
 								class _TokenTracker:
-												fix(ai): correct context budget metric — track per-call, not sum (#44)

The dir loop was exiting early on small targets (a 13-file Python lib
hit the budget at 92k–139k cumulative tokens) because _TokenTracker
compared the SUM of input_tokens across all turns to the context
window size. input_tokens from each API response is the size of the
full prompt sent on that turn (system + every prior message + new
tool results), so summing across turns multi-counts everything. The
real per-call context size never approached the limit.

Verified empirically: on luminos_lib pre-fix, the loop bailed when
the most recent call's input_tokens was 20,535 (~10% of Sonnet's
200k window) but the cumulative sum was 134,983.

Changes:
- _TokenTracker now tracks last_input (the most recent call's
  input_tokens), separate from the cumulative loop_input/total_input
  used for cost reporting.
- budget_exceeded() returns last_input > CONTEXT_BUDGET, not the
  cumulative sum.
- MAX_CONTEXT bumped from 180_000 to 200_000 (Sonnet 4's real
  context window). CONTEXT_BUDGET stays at 70% = 140,000.
- Early-exit message now shows context size, threshold, AND
  cumulative spend separately so future debugging is unambiguous.

Smoke test on luminos_lib: investigation completes without early
exit (~$0.37). 6 unit tests added covering the new semantics,
including the key regression: a sequence of small calls whose sum
exceeds the budget must NOT trip the check.

Wiki Architecture page updated.

#51 filed for the separate message-history-growth issue.

											
										
										
											2026-04-06 22:49:25 -06:00
+								    """Track token usage across API calls.
 								    Two distinct quantities are tracked:
 								      - cumulative totals (total_*, loop_*) — for cost reporting
 								      - last_input — the size of the context window on the most recent
 								        call, used to detect approaching the model's context limit
 								    Cumulative input is NOT a meaningful proxy for context size: each
 								    turn's input_tokens already includes the full message history, so
 								    summing across turns double-counts everything. Use last_input for
 								    budget decisions, totals for billing. (See #44.)
 								    """
-												refactor: replace single-shot API with multi-pass agentic investigation

Rewrites ai.py from a single Claude API call into a multi-pass,
cache-driven agent architecture:

- Per-directory isolated agent loops (max 10 turns each) with context
  discarded between directories
- Leaves-first processing order so child summaries inform parents
- Disk cache (/tmp/luminos/{uuid}/) persists across runs for resumability
- Investigation ID persistence keyed by target realpath
- Separate synthesis pass reads only directory-level cache entries
- Replaces urllib with Anthropic SDK (streaming, automatic retries)
- Token counting with 70% context budget threshold for early exit
- parse_structure tool via tree-sitter (Python, JS, Rust, Go)
- python-magic integration for MIME-aware directory listings
- Cost tracking printed at end of investigation

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>

											
										
										
											2026-03-30 12:13:55 -06:00
 								    def __init__(self):
 								        self.total_input = 0
 								        self.total_output = 0
 								        self.loop_input = 0
 								        self.loop_output = 0
-												fix(ai): correct context budget metric — track per-call, not sum (#44)

The dir loop was exiting early on small targets (a 13-file Python lib
hit the budget at 92k–139k cumulative tokens) because _TokenTracker
compared the SUM of input_tokens across all turns to the context
window size. input_tokens from each API response is the size of the
full prompt sent on that turn (system + every prior message + new
tool results), so summing across turns multi-counts everything. The
real per-call context size never approached the limit.

Verified empirically: on luminos_lib pre-fix, the loop bailed when
the most recent call's input_tokens was 20,535 (~10% of Sonnet's
200k window) but the cumulative sum was 134,983.

Changes:
- _TokenTracker now tracks last_input (the most recent call's
  input_tokens), separate from the cumulative loop_input/total_input
  used for cost reporting.
- budget_exceeded() returns last_input > CONTEXT_BUDGET, not the
  cumulative sum.
- MAX_CONTEXT bumped from 180_000 to 200_000 (Sonnet 4's real
  context window). CONTEXT_BUDGET stays at 70% = 140,000.
- Early-exit message now shows context size, threshold, AND
  cumulative spend separately so future debugging is unambiguous.

Smoke test on luminos_lib: investigation completes without early
exit (~$0.37). 6 unit tests added covering the new semantics,
including the key regression: a sequence of small calls whose sum
exceeds the budget must NOT trip the check.

Wiki Architecture page updated.

#51 filed for the separate message-history-growth issue.

											
										
										
											2026-04-06 22:49:25 -06:00
+								        self.last_input = 0
-												feat(ai): implement Phase 3 investigation planning (#8, #9, #10, #11, #74)

Add a planning pass that runs after survey and before dir loops. The
planner classifies directories into priority/shallow/skip tiers and
allocates turns accordingly, replacing the fixed max_turns=14 per
directory with dynamic allocation from a global budget.

Planning pass:
- _PLANNING_SYSTEM_PROMPT in prompts.py with submit_plan tool
- _run_planning() follows the same single-turn pattern as _run_survey()
- submit_plan tool registered in new "planning" scope
- _apply_plan() pure function: band-sorted ordering (leaf-first within
  bands), turn map, skip-dir removal
- _default_plan() fallback when planning is skipped or fails
- Plan cached as plan.json for resumed runs

Dynamic turn allocation:
- Priority dirs: 15-20 turns (capped at 25)
- Shallow dirs: 5 turns
- Default: 10 turns
- Skip dirs: excluded entirely
- Orchestrator passes per-dir max_turns to _run_dir_loop()

Quality instrumentation:
- _TokenTracker._loop_turns counts API calls per dir loop
- completeness field (0.0-1.0) added to dir-scope submit_report
- plan_evaluation.json emitted after dir loops comparing plan predictions
  to actual turn utilization, completeness, and confidence
- Turn utilization logged per directory during investigation

Also fixes _get_child_summaries() to distinguish actual leaf directories
from parents whose children have not been investigated yet, replacing
the misleading "this is a leaf directory" placeholder.

26 new tests (260 total, all passing).

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>

											
										
										
											2026-04-12 20:21:49 -06:00
+								        self._loop_turns = 0
-												refactor: replace single-shot API with multi-pass agentic investigation

Rewrites ai.py from a single Claude API call into a multi-pass,
cache-driven agent architecture:

- Per-directory isolated agent loops (max 10 turns each) with context
  discarded between directories
- Leaves-first processing order so child summaries inform parents
- Disk cache (/tmp/luminos/{uuid}/) persists across runs for resumability
- Investigation ID persistence keyed by target realpath
- Separate synthesis pass reads only directory-level cache entries
- Replaces urllib with Anthropic SDK (streaming, automatic retries)
- Token counting with 70% context budget threshold for early exit
- parse_structure tool via tree-sitter (Python, JS, Rust, Go)
- python-magic integration for MIME-aware directory listings
- Cost tracking printed at end of investigation

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>

											
										
										
											2026-03-30 12:13:55 -06:00
 								    def record(self, usage):
 								        """Record usage from a single API call."""
 								        inp = getattr(usage, "input_tokens", 0)
 								        out = getattr(usage, "output_tokens", 0)
 								        self.total_input += inp
 								        self.total_output += out
 								        self.loop_input += inp
 								        self.loop_output += out
-												fix(ai): correct context budget metric — track per-call, not sum (#44)

The dir loop was exiting early on small targets (a 13-file Python lib
hit the budget at 92k–139k cumulative tokens) because _TokenTracker
compared the SUM of input_tokens across all turns to the context
window size. input_tokens from each API response is the size of the
full prompt sent on that turn (system + every prior message + new
tool results), so summing across turns multi-counts everything. The
real per-call context size never approached the limit.

Verified empirically: on luminos_lib pre-fix, the loop bailed when
the most recent call's input_tokens was 20,535 (~10% of Sonnet's
200k window) but the cumulative sum was 134,983.

Changes:
- _TokenTracker now tracks last_input (the most recent call's
  input_tokens), separate from the cumulative loop_input/total_input
  used for cost reporting.
- budget_exceeded() returns last_input > CONTEXT_BUDGET, not the
  cumulative sum.
- MAX_CONTEXT bumped from 180_000 to 200_000 (Sonnet 4's real
  context window). CONTEXT_BUDGET stays at 70% = 140,000.
- Early-exit message now shows context size, threshold, AND
  cumulative spend separately so future debugging is unambiguous.

Smoke test on luminos_lib: investigation completes without early
exit (~$0.37). 6 unit tests added covering the new semantics,
including the key regression: a sequence of small calls whose sum
exceeds the budget must NOT trip the check.

Wiki Architecture page updated.

#51 filed for the separate message-history-growth issue.

											
										
										
											2026-04-06 22:49:25 -06:00
+								        self.last_input = inp
-												feat(ai): implement Phase 3 investigation planning (#8, #9, #10, #11, #74)

Add a planning pass that runs after survey and before dir loops. The
planner classifies directories into priority/shallow/skip tiers and
allocates turns accordingly, replacing the fixed max_turns=14 per
directory with dynamic allocation from a global budget.

Planning pass:
- _PLANNING_SYSTEM_PROMPT in prompts.py with submit_plan tool
- _run_planning() follows the same single-turn pattern as _run_survey()
- submit_plan tool registered in new "planning" scope
- _apply_plan() pure function: band-sorted ordering (leaf-first within
  bands), turn map, skip-dir removal
- _default_plan() fallback when planning is skipped or fails
- Plan cached as plan.json for resumed runs

Dynamic turn allocation:
- Priority dirs: 15-20 turns (capped at 25)
- Shallow dirs: 5 turns
- Default: 10 turns
- Skip dirs: excluded entirely
- Orchestrator passes per-dir max_turns to _run_dir_loop()

Quality instrumentation:
- _TokenTracker._loop_turns counts API calls per dir loop
- completeness field (0.0-1.0) added to dir-scope submit_report
- plan_evaluation.json emitted after dir loops comparing plan predictions
  to actual turn utilization, completeness, and confidence
- Turn utilization logged per directory during investigation

Also fixes _get_child_summaries() to distinguish actual leaf directories
from parents whose children have not been investigated yet, replacing
the misleading "this is a leaf directory" placeholder.

26 new tests (260 total, all passing).

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>

											
										
										
											2026-04-12 20:21:49 -06:00
+								        self._loop_turns += 1
-												refactor: replace single-shot API with multi-pass agentic investigation

Rewrites ai.py from a single Claude API call into a multi-pass,
cache-driven agent architecture:

- Per-directory isolated agent loops (max 10 turns each) with context
  discarded between directories
- Leaves-first processing order so child summaries inform parents
- Disk cache (/tmp/luminos/{uuid}/) persists across runs for resumability
- Investigation ID persistence keyed by target realpath
- Separate synthesis pass reads only directory-level cache entries
- Replaces urllib with Anthropic SDK (streaming, automatic retries)
- Token counting with 70% context budget threshold for early exit
- parse_structure tool via tree-sitter (Python, JS, Rust, Go)
- python-magic integration for MIME-aware directory listings
- Cost tracking printed at end of investigation

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>

											
										
										
											2026-03-30 12:13:55 -06:00
 								    def reset_loop(self):
 								        """Reset per-loop counters (called between directory loops)."""
 								        self.loop_input = 0
 								        self.loop_output = 0
-												fix(ai): correct context budget metric — track per-call, not sum (#44)

The dir loop was exiting early on small targets (a 13-file Python lib
hit the budget at 92k–139k cumulative tokens) because _TokenTracker
compared the SUM of input_tokens across all turns to the context
window size. input_tokens from each API response is the size of the
full prompt sent on that turn (system + every prior message + new
tool results), so summing across turns multi-counts everything. The
real per-call context size never approached the limit.

Verified empirically: on luminos_lib pre-fix, the loop bailed when
the most recent call's input_tokens was 20,535 (~10% of Sonnet's
200k window) but the cumulative sum was 134,983.

Changes:
- _TokenTracker now tracks last_input (the most recent call's
  input_tokens), separate from the cumulative loop_input/total_input
  used for cost reporting.
- budget_exceeded() returns last_input > CONTEXT_BUDGET, not the
  cumulative sum.
- MAX_CONTEXT bumped from 180_000 to 200_000 (Sonnet 4's real
  context window). CONTEXT_BUDGET stays at 70% = 140,000.
- Early-exit message now shows context size, threshold, AND
  cumulative spend separately so future debugging is unambiguous.

Smoke test on luminos_lib: investigation completes without early
exit (~$0.37). 6 unit tests added covering the new semantics,
including the key regression: a sequence of small calls whose sum
exceeds the budget must NOT trip the check.

Wiki Architecture page updated.

#51 filed for the separate message-history-growth issue.

											
										
										
											2026-04-06 22:49:25 -06:00
+								        self.last_input = 0
-												feat(ai): implement Phase 3 investigation planning (#8, #9, #10, #11, #74)

Add a planning pass that runs after survey and before dir loops. The
planner classifies directories into priority/shallow/skip tiers and
allocates turns accordingly, replacing the fixed max_turns=14 per
directory with dynamic allocation from a global budget.

Planning pass:
- _PLANNING_SYSTEM_PROMPT in prompts.py with submit_plan tool
- _run_planning() follows the same single-turn pattern as _run_survey()
- submit_plan tool registered in new "planning" scope
- _apply_plan() pure function: band-sorted ordering (leaf-first within
  bands), turn map, skip-dir removal
- _default_plan() fallback when planning is skipped or fails
- Plan cached as plan.json for resumed runs

Dynamic turn allocation:
- Priority dirs: 15-20 turns (capped at 25)
- Shallow dirs: 5 turns
- Default: 10 turns
- Skip dirs: excluded entirely
- Orchestrator passes per-dir max_turns to _run_dir_loop()

Quality instrumentation:
- _TokenTracker._loop_turns counts API calls per dir loop
- completeness field (0.0-1.0) added to dir-scope submit_report
- plan_evaluation.json emitted after dir loops comparing plan predictions
  to actual turn utilization, completeness, and confidence
- Turn utilization logged per directory during investigation

Also fixes _get_child_summaries() to distinguish actual leaf directories
from parents whose children have not been investigated yet, replacing
the misleading "this is a leaf directory" placeholder.

26 new tests (260 total, all passing).

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>

											
										
										
											2026-04-12 20:21:49 -06:00
+								        self._loop_turns = 0
-												refactor: replace single-shot API with multi-pass agentic investigation

Rewrites ai.py from a single Claude API call into a multi-pass,
cache-driven agent architecture:

- Per-directory isolated agent loops (max 10 turns each) with context
  discarded between directories
- Leaves-first processing order so child summaries inform parents
- Disk cache (/tmp/luminos/{uuid}/) persists across runs for resumability
- Investigation ID persistence keyed by target realpath
- Separate synthesis pass reads only directory-level cache entries
- Replaces urllib with Anthropic SDK (streaming, automatic retries)
- Token counting with 70% context budget threshold for early exit
- parse_structure tool via tree-sitter (Python, JS, Rust, Go)
- python-magic integration for MIME-aware directory listings
- Cost tracking printed at end of investigation

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>

											
										
										
											2026-03-30 12:13:55 -06:00
 								    @property
 								    def loop_total(self):
 								        return self.loop_input + self.loop_output
 								    def budget_exceeded(self):
-												fix(ai): correct context budget metric — track per-call, not sum (#44)

The dir loop was exiting early on small targets (a 13-file Python lib
hit the budget at 92k–139k cumulative tokens) because _TokenTracker
compared the SUM of input_tokens across all turns to the context
window size. input_tokens from each API response is the size of the
full prompt sent on that turn (system + every prior message + new
tool results), so summing across turns multi-counts everything. The
real per-call context size never approached the limit.

Verified empirically: on luminos_lib pre-fix, the loop bailed when
the most recent call's input_tokens was 20,535 (~10% of Sonnet's
200k window) but the cumulative sum was 134,983.

Changes:
- _TokenTracker now tracks last_input (the most recent call's
  input_tokens), separate from the cumulative loop_input/total_input
  used for cost reporting.
- budget_exceeded() returns last_input > CONTEXT_BUDGET, not the
  cumulative sum.
- MAX_CONTEXT bumped from 180_000 to 200_000 (Sonnet 4's real
  context window). CONTEXT_BUDGET stays at 70% = 140,000.
- Early-exit message now shows context size, threshold, AND
  cumulative spend separately so future debugging is unambiguous.

Smoke test on luminos_lib: investigation completes without early
exit (~$0.37). 6 unit tests added covering the new semantics,
including the key regression: a sequence of small calls whose sum
exceeds the budget must NOT trip the check.

Wiki Architecture page updated.

#51 filed for the separate message-history-growth issue.

											
										
										
											2026-04-06 22:49:25 -06:00
+								        """True when the most recent call's context exceeded the budget."""
 								        return self.last_input > CONTEXT_BUDGET
-												refactor: replace single-shot API with multi-pass agentic investigation

Rewrites ai.py from a single Claude API call into a multi-pass,
cache-driven agent architecture:

- Per-directory isolated agent loops (max 10 turns each) with context
  discarded between directories
- Leaves-first processing order so child summaries inform parents
- Disk cache (/tmp/luminos/{uuid}/) persists across runs for resumability
- Investigation ID persistence keyed by target realpath
- Separate synthesis pass reads only directory-level cache entries
- Replaces urllib with Anthropic SDK (streaming, automatic retries)
- Token counting with 70% context budget threshold for early exit
- parse_structure tool via tree-sitter (Python, JS, Rust, Go)
- python-magic integration for MIME-aware directory listings
- Cost tracking printed at end of investigation

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>

											
										
										
											2026-03-30 12:13:55 -06:00
 								    def summary(self):
 								        cost_in = self.total_input * INPUT_PRICE_PER_M / 1_000_000
 								        cost_out = self.total_output * OUTPUT_PRICE_PER_M / 1_000_000
 								        cost = cost_in + cost_out
 								        return (f"{self.total_input:,} input / {self.total_output:,} output "
 								                f"(approx ${cost:.2f})")
 								# ---------------------------------------------------------------------------
 								# Tool definitions
 								# ---------------------------------------------------------------------------
-												refactor(ai): single-source tool registration via register_tool() (#56)

Adding a tool used to require updating two parallel structures in
ai.py: a name->handler entry in _TOOL_DISPATCH and a schema dict in
_DIR_TOOLS (or _SYNTHESIS_TOOLS or _SURVEY_TOOLS). Forgetting one half
was silent. Internals.md §9.1 documented this as a 5-step process.

Replaced both with a single register_tool() call per (tool, scope):

    register_tool(
        name="read_file",
        description="...",
        schema={...},
        scopes=["dir"],
        handler=_tool_read_file,
    )

The function appends the schema to one or more scope lists
(_DIR_TOOLS / _SYNTHESIS_TOOLS / _SURVEY_TOOLS) and lands the handler
in _TOOL_DISPATCH. Tools intercepted by the loop body (submit_report,
submit_survey) register schema only with handler=None.

Tools whose schema differs by scope (submit_report has different shapes
in dir vs synthesis loops) get one register_tool() call per scope.
flag is also registered twice because it appears in dir + synthesis at
different positions in each list — the order is preserved with two
calls rather than reordered for fewer calls.

Verification:
- _DIR_TOOLS, _SYNTHESIS_TOOLS, _SURVEY_TOOLS contain the same names
  in the same order as before.
- _TOOL_DISPATCH contains the same 10 handlers as before.
- 164 tests pass.

No behavior change. Phase 3.5 (#39) MCP backend will eventually replace
this with dynamic discovery from the connected MCP server, at which
point register_tool() collapses to a one-line forward.

											
										
										
											2026-04-11 10:18:40 -06:00
+								# ---------------------------------------------------------------------------
 								# Tool registry
 								#
 								# Tools are declared once via register_tool() at the bottom of the tool
 								# implementations section. Each registration lands its schema in one or
 								# more scope lists (_DIR_TOOLS / _SYNTHESIS_TOOLS / _SURVEY_TOOLS) and
 								# its handler in _TOOL_DISPATCH (used by _execute_tool()).
 								#
 								# Tools intercepted by the loop body — submit_report and submit_survey —
 								# register their schema only and have no handler entry.
 								# ---------------------------------------------------------------------------
 								_DIR_TOOLS = []
 								_SYNTHESIS_TOOLS = []
 								_SURVEY_TOOLS = []
-												feat(ai): implement Phase 3 investigation planning (#8, #9, #10, #11, #74)

Add a planning pass that runs after survey and before dir loops. The
planner classifies directories into priority/shallow/skip tiers and
allocates turns accordingly, replacing the fixed max_turns=14 per
directory with dynamic allocation from a global budget.

Planning pass:
- _PLANNING_SYSTEM_PROMPT in prompts.py with submit_plan tool
- _run_planning() follows the same single-turn pattern as _run_survey()
- submit_plan tool registered in new "planning" scope
- _apply_plan() pure function: band-sorted ordering (leaf-first within
  bands), turn map, skip-dir removal
- _default_plan() fallback when planning is skipped or fails
- Plan cached as plan.json for resumed runs

Dynamic turn allocation:
- Priority dirs: 15-20 turns (capped at 25)
- Shallow dirs: 5 turns
- Default: 10 turns
- Skip dirs: excluded entirely
- Orchestrator passes per-dir max_turns to _run_dir_loop()

Quality instrumentation:
- _TokenTracker._loop_turns counts API calls per dir loop
- completeness field (0.0-1.0) added to dir-scope submit_report
- plan_evaluation.json emitted after dir loops comparing plan predictions
  to actual turn utilization, completeness, and confidence
- Turn utilization logged per directory during investigation

Also fixes _get_child_summaries() to distinguish actual leaf directories
from parents whose children have not been investigated yet, replacing
the misleading "this is a leaf directory" placeholder.

26 new tests (260 total, all passing).

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>

											
										
										
											2026-04-12 20:21:49 -06:00
+								_PLANNING_TOOLS = []
-												refactor(ai): single-source tool registration via register_tool() (#56)

Adding a tool used to require updating two parallel structures in
ai.py: a name->handler entry in _TOOL_DISPATCH and a schema dict in
_DIR_TOOLS (or _SYNTHESIS_TOOLS or _SURVEY_TOOLS). Forgetting one half
was silent. Internals.md §9.1 documented this as a 5-step process.

Replaced both with a single register_tool() call per (tool, scope):

    register_tool(
        name="read_file",
        description="...",
        schema={...},
        scopes=["dir"],
        handler=_tool_read_file,
    )

The function appends the schema to one or more scope lists
(_DIR_TOOLS / _SYNTHESIS_TOOLS / _SURVEY_TOOLS) and lands the handler
in _TOOL_DISPATCH. Tools intercepted by the loop body (submit_report,
submit_survey) register schema only with handler=None.

Tools whose schema differs by scope (submit_report has different shapes
in dir vs synthesis loops) get one register_tool() call per scope.
flag is also registered twice because it appears in dir + synthesis at
different positions in each list — the order is preserved with two
calls rather than reordered for fewer calls.

Verification:
- _DIR_TOOLS, _SYNTHESIS_TOOLS, _SURVEY_TOOLS contain the same names
  in the same order as before.
- _TOOL_DISPATCH contains the same 10 handlers as before.
- 164 tests pass.

No behavior change. Phase 3.5 (#39) MCP backend will eventually replace
this with dynamic discovery from the connected MCP server, at which
point register_tool() collapses to a one-line forward.

											
										
										
											2026-04-11 10:18:40 -06:00
+								_TOOL_DISPATCH = {}
 								_TOOL_REGISTRIES = {
 								    "dir": _DIR_TOOLS,
 								    "synthesis": _SYNTHESIS_TOOLS,
 								    "survey": _SURVEY_TOOLS,
-												feat(ai): implement Phase 3 investigation planning (#8, #9, #10, #11, #74)

Add a planning pass that runs after survey and before dir loops. The
planner classifies directories into priority/shallow/skip tiers and
allocates turns accordingly, replacing the fixed max_turns=14 per
directory with dynamic allocation from a global budget.

Planning pass:
- _PLANNING_SYSTEM_PROMPT in prompts.py with submit_plan tool
- _run_planning() follows the same single-turn pattern as _run_survey()
- submit_plan tool registered in new "planning" scope
- _apply_plan() pure function: band-sorted ordering (leaf-first within
  bands), turn map, skip-dir removal
- _default_plan() fallback when planning is skipped or fails
- Plan cached as plan.json for resumed runs

Dynamic turn allocation:
- Priority dirs: 15-20 turns (capped at 25)
- Shallow dirs: 5 turns
- Default: 10 turns
- Skip dirs: excluded entirely
- Orchestrator passes per-dir max_turns to _run_dir_loop()

Quality instrumentation:
- _TokenTracker._loop_turns counts API calls per dir loop
- completeness field (0.0-1.0) added to dir-scope submit_report
- plan_evaluation.json emitted after dir loops comparing plan predictions
  to actual turn utilization, completeness, and confidence
- Turn utilization logged per directory during investigation

Also fixes _get_child_summaries() to distinguish actual leaf directories
from parents whose children have not been investigated yet, replacing
the misleading "this is a leaf directory" placeholder.

26 new tests (260 total, all passing).

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>

											
										
										
											2026-04-12 20:21:49 -06:00
+								    "planning": _PLANNING_TOOLS,
-												refactor(ai): single-source tool registration via register_tool() (#56)

Adding a tool used to require updating two parallel structures in
ai.py: a name->handler entry in _TOOL_DISPATCH and a schema dict in
_DIR_TOOLS (or _SYNTHESIS_TOOLS or _SURVEY_TOOLS). Forgetting one half
was silent. Internals.md §9.1 documented this as a 5-step process.

Replaced both with a single register_tool() call per (tool, scope):

    register_tool(
        name="read_file",
        description="...",
        schema={...},
        scopes=["dir"],
        handler=_tool_read_file,
    )

The function appends the schema to one or more scope lists
(_DIR_TOOLS / _SYNTHESIS_TOOLS / _SURVEY_TOOLS) and lands the handler
in _TOOL_DISPATCH. Tools intercepted by the loop body (submit_report,
submit_survey) register schema only with handler=None.

Tools whose schema differs by scope (submit_report has different shapes
in dir vs synthesis loops) get one register_tool() call per scope.
flag is also registered twice because it appears in dir + synthesis at
different positions in each list — the order is preserved with two
calls rather than reordered for fewer calls.

Verification:
- _DIR_TOOLS, _SYNTHESIS_TOOLS, _SURVEY_TOOLS contain the same names
  in the same order as before.
- _TOOL_DISPATCH contains the same 10 handlers as before.
- 164 tests pass.

No behavior change. Phase 3.5 (#39) MCP backend will eventually replace
this with dynamic discovery from the connected MCP server, at which
point register_tool() collapses to a one-line forward.

											
										
										
											2026-04-11 10:18:40 -06:00
+								}
 								def register_tool(name, description, schema, scopes, handler=None):
 								    """Register a tool's schema in one or more loop scopes and its handler.
 								    A single tool can be registered multiple times with different schemas
 								    in different scopes (submit_report has different schemas for the dir
 								    and synthesis loops). The handler is global — pass handler= once and
 								    omit it on subsequent registrations under the same name.
 								    """
 								    schema_entry = {
 								        "name": name,
 								        "description": description,
 								        "input_schema": schema,
 								    }
 								    for scope in scopes:
 								        _TOOL_REGISTRIES[scope].append(schema_entry)
 								    if handler is not None:
 								        _TOOL_DISPATCH[name] = handler
-												refactor: replace single-shot API with multi-pass agentic investigation

Rewrites ai.py from a single Claude API call into a multi-pass,
cache-driven agent architecture:

- Per-directory isolated agent loops (max 10 turns each) with context
  discarded between directories
- Leaves-first processing order so child summaries inform parents
- Disk cache (/tmp/luminos/{uuid}/) persists across runs for resumability
- Investigation ID persistence keyed by target realpath
- Separate synthesis pass reads only directory-level cache entries
- Replaces urllib with Anthropic SDK (streaming, automatic retries)
- Token counting with 70% context budget threshold for early exit
- parse_structure tool via tree-sitter (Python, JS, Rust, Go)
- python-magic integration for MIME-aware directory listings
- Cost tracking printed at end of investigation

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>

											
										
										
											2026-03-30 12:13:55 -06:00
 								# ---------------------------------------------------------------------------
 								# Tool implementations
 								# ---------------------------------------------------------------------------
 								def _tool_read_file(args, target, _cache):
 								    path = args.get("path", "")
 								    max_bytes = args.get("max_bytes", 4096)
 								    if not os.path.isabs(path):
 								        path = os.path.join(target, path)
 								    if not _path_is_safe(path, target):
 								        return f"Error: path '{path}' is outside the target directory."
 								    try:
 								        file_size = os.path.getsize(path)
-												feat: add AI-powered directory analysis via Claude API

Adds --ai flag that sends the directory tree, file categories, and
sampled file contents to Claude for analysis. Produces a brief
summary at the top of the report and a detailed breakdown at the
end. Requires ANTHROPIC_API_KEY env var; degrades gracefully without it.
Uses only stdlib (urllib) to keep the zero-dependency constraint.

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>

											
										
										
											2026-03-30 10:03:48 -06:00
+								        with open(path, "r", errors="replace") as f:
-												refactor: replace single-shot API with multi-pass agentic investigation

Rewrites ai.py from a single Claude API call into a multi-pass,
cache-driven agent architecture:

- Per-directory isolated agent loops (max 10 turns each) with context
  discarded between directories
- Leaves-first processing order so child summaries inform parents
- Disk cache (/tmp/luminos/{uuid}/) persists across runs for resumability
- Investigation ID persistence keyed by target realpath
- Separate synthesis pass reads only directory-level cache entries
- Replaces urllib with Anthropic SDK (streaming, automatic retries)
- Token counting with 70% context budget threshold for early exit
- parse_structure tool via tree-sitter (Python, JS, Rust, Go)
- python-magic integration for MIME-aware directory listings
- Cost tracking printed at end of investigation

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>

											
										
										
											2026-03-30 12:13:55 -06:00
+								            content = f.read(max_bytes)
 								        if not content:
 								            return "(empty file)"
 								        if file_size > max_bytes:
 								            content += (
 								                f"\n\n[TRUNCATED — showed {max_bytes} of {file_size} bytes. "
 								                f"Call again with a larger max_bytes or use "
 								                f"run_command('tail -n ... {os.path.relpath(path, target)}') "
 								                f"to see the rest.]"
 								            )
 								        return content
 								    except OSError as e:
 								        return f"Error reading file: {e}"
 								def _tool_list_directory(args, target, _cache):
 								    path = args.get("path", target)
 								    show_hidden = args.get("show_hidden", False)
 								    if not os.path.isabs(path):
 								        path = os.path.join(target, path)
 								    if not _path_is_safe(path, target):
 								        return f"Error: path '{path}' is outside the target directory."
 								    if not os.path.isdir(path):
 								        return f"Error: '{path}' is not a directory."
 								    try:
 								        entries = sorted(os.listdir(path))
 								        lines = []
 								        for name in entries:
 								            if not show_hidden and name.startswith("."):
 								                continue
 								            full = os.path.join(path, name)
 								            try:
 								                st = os.stat(full)
 								                mime = magic.from_file(full, mime=True) if not os.path.isdir(full) else None
 								                if os.path.isdir(full):
 								                    lines.append(f"  {name}/  (dir)")
 								                else:
 								                    mime_str = f"  [{mime}]" if mime else ""
 								                    lines.append(f"  {name}  ({st.st_size} bytes){mime_str}")
 								            except OSError:
 								                lines.append(f"  {name}  (stat failed)")
 								        return "\n".join(lines) if lines else "(empty directory)"
 								    except OSError as e:
 								        return f"Error listing directory: {e}"
 								def _tool_run_command(args, target, _cache):
 								    command = args.get("command", "")
 								    parts = command.split()
 								    if not parts:
 								        return "Error: empty command."
 								    binary = os.path.basename(parts[0])
 								    if binary not in _COMMAND_WHITELIST:
 								        return (
 								            f"Error: '{binary}' is not allowed. "
 								            f"Whitelist: {', '.join(sorted(_COMMAND_WHITELIST))}"
 								        )
 								    try:
 								        result = subprocess.run(
 								            command, shell=True, capture_output=True, text=True,
 								            timeout=15, cwd=target,
 								        )
 								        output = result.stdout
 								        if result.returncode != 0 and result.stderr:
 								            output += f"\n(stderr: {result.stderr.strip()})"
 								        return output.strip() if output.strip() else "(no output)"
 								    except subprocess.TimeoutExpired:
 								        return "Error: command timed out after 15 seconds."
 								    except OSError as e:
 								        return f"Error running command: {e}"
 								def _tool_parse_structure(args, target, _cache):
 								    path = args.get("path", "")
 								    if not os.path.isabs(path):
 								        path = os.path.join(target, path)
 								    if not _path_is_safe(path, target):
 								        return f"Error: path '{path}' is outside the target directory."
-												refactor: extract AST parsing into luminos_lib/ast_parser.py

Moves all tree-sitter parsing logic from ai.py into a dedicated module.
Replaces the if/elif language chain with a _LANGUAGE_HANDLERS registry
mapping language names to handler functions.

Extracted: _tool_parse_structure body, _get_ts_parser, _child_by_type,
_text, and all per-language helpers (_py_func_sig, _py_class, etc.).
ai.py retains a thin wrapper for path validation.

Public API: parse_structure(path) -> JSON string

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>

											
										
										
											2026-03-30 14:34:02 -06:00
+								    return parse_structure(path)
-												refactor: replace single-shot API with multi-pass agentic investigation

Rewrites ai.py from a single Claude API call into a multi-pass,
cache-driven agent architecture:

- Per-directory isolated agent loops (max 10 turns each) with context
  discarded between directories
- Leaves-first processing order so child summaries inform parents
- Disk cache (/tmp/luminos/{uuid}/) persists across runs for resumability
- Investigation ID persistence keyed by target realpath
- Separate synthesis pass reads only directory-level cache entries
- Replaces urllib with Anthropic SDK (streaming, automatic retries)
- Token counting with 70% context budget threshold for early exit
- parse_structure tool via tree-sitter (Python, JS, Rust, Go)
- python-magic integration for MIME-aware directory listings
- Cost tracking printed at end of investigation

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>

											
										
										
											2026-03-30 12:13:55 -06:00
 								def _tool_write_cache(args, _target, cache):
 								    cache_type = args.get("cache_type", "")
 								    path = args.get("path", "")
 								    data = args.get("data", {})
 								    if cache_type not in ("file", "dir"):
 								        return "Error: cache_type must be 'file' or 'dir'."
 								    return cache.write_entry(cache_type, path, data)
 								def _tool_read_cache(args, _target, cache):
 								    cache_type = args.get("cache_type", "")
 								    path = args.get("path", "")
 								    if cache_type not in ("file", "dir"):
 								        return "Error: cache_type must be 'file' or 'dir'."
 								    entry = cache.read_entry(cache_type, path)
 								    if entry is None:
 								        return "null"
 								    return json.dumps(entry, indent=2)
-												feat: add AI-powered directory analysis via Claude API

Adds --ai flag that sends the directory tree, file categories, and
sampled file contents to Claude for analysis. Produces a brief
summary at the top of the report and a detailed breakdown at the
end. Requires ANTHROPIC_API_KEY env var; degrades gracefully without it.
Uses only stdlib (urllib) to keep the zero-dependency constraint.

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>

											
										
										
											2026-03-30 10:03:48 -06:00
-												refactor: replace single-shot API with multi-pass agentic investigation

Rewrites ai.py from a single Claude API call into a multi-pass,
cache-driven agent architecture:

- Per-directory isolated agent loops (max 10 turns each) with context
  discarded between directories
- Leaves-first processing order so child summaries inform parents
- Disk cache (/tmp/luminos/{uuid}/) persists across runs for resumability
- Investigation ID persistence keyed by target realpath
- Separate synthesis pass reads only directory-level cache entries
- Replaces urllib with Anthropic SDK (streaming, automatic retries)
- Token counting with 70% context budget threshold for early exit
- parse_structure tool via tree-sitter (Python, JS, Rust, Go)
- python-magic integration for MIME-aware directory listings
- Cost tracking printed at end of investigation

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>

											
										
										
											2026-03-30 12:13:55 -06:00
+								def _tool_list_cache(args, _target, cache):
 								    cache_type = args.get("cache_type", "")
 								    if cache_type not in ("file", "dir"):
 								        return "Error: cache_type must be 'file' or 'dir'."
 								    paths = cache.list_entries(cache_type)
 								    if not paths:
 								        return "(no cached entries)"
 								    return "\n".join(paths)
-												feat: add AI-powered directory analysis via Claude API

Adds --ai flag that sends the directory tree, file categories, and
sampled file contents to Claude for analysis. Produces a brief
summary at the top of the report and a detailed breakdown at the
end. Requires ANTHROPIC_API_KEY env var; degrades gracefully without it.
Uses only stdlib (urllib) to keep the zero-dependency constraint.

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>

											
										
										
											2026-03-30 10:03:48 -06:00
-												refactor: replace single-shot API with multi-pass agentic investigation

Rewrites ai.py from a single Claude API call into a multi-pass,
cache-driven agent architecture:

- Per-directory isolated agent loops (max 10 turns each) with context
  discarded between directories
- Leaves-first processing order so child summaries inform parents
- Disk cache (/tmp/luminos/{uuid}/) persists across runs for resumability
- Investigation ID persistence keyed by target realpath
- Separate synthesis pass reads only directory-level cache entries
- Replaces urllib with Anthropic SDK (streaming, automatic retries)
- Token counting with 70% context budget threshold for early exit
- parse_structure tool via tree-sitter (Python, JS, Rust, Go)
- python-magic integration for MIME-aware directory listings
- Cost tracking printed at end of investigation

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>

											
										
										
											2026-03-30 12:13:55 -06:00
-												feat: add chain-of-thought observability tools

Adds think, checkpoint, and flag tools for agent reasoning visibility:
- think: records observation/hypothesis/next_action before investigation
- checkpoint: summarizes learned/unknown/next_phase after file clusters
- flag: marks notable findings to flags.jsonl with severity levels

Additional changes:
- Step numbering in investigation system prompt
- Text blocks from agent now printed to stderr (step labels visible)
- flag tool available in both investigation and synthesis passes
- analyze_directory() returns (brief, detailed, flags) three-tuple
- format_flags() in report.py renders flags sorted by severity
- Per-directory max_turns increased from 10 to 14

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>

											
										
										
											2026-03-30 13:02:19 -06:00
+								def _tool_think(args, _target, _cache):
 								    obs = args.get("observation", "")
 								    hyp = args.get("hypothesis", "")
 								    nxt = args.get("next_action", "")
 								    print(f"  [AI] THINK", file=sys.stderr)
 								    print(f"       observation: {obs}", file=sys.stderr)
 								    print(f"       hypothesis:  {hyp}", file=sys.stderr)
 								    print(f"       next_action: {nxt}", file=sys.stderr)
 								    return "ok"
 								def _tool_checkpoint(args, _target, _cache):
 								    learned = args.get("learned", "")
 								    unknown = args.get("still_unknown", "")
 								    phase = args.get("next_phase", "")
 								    print(f"  [AI] CHECKPOINT", file=sys.stderr)
 								    print(f"       learned:       {learned}", file=sys.stderr)
 								    print(f"       still_unknown: {unknown}", file=sys.stderr)
 								    print(f"       next_phase:    {phase}", file=sys.stderr)
 								    return "ok"
 								def _tool_flag(args, _target, cache):
 								    path = args.get("path", "general")
 								    finding = args.get("finding", "")
 								    severity = args.get("severity", "info")
 								    print(f"  [AI] FLAG [{severity.upper()}] {path}", file=sys.stderr)
 								    print(f"       {finding}", file=sys.stderr)
 								    flags_path = os.path.join(cache.root, "flags.jsonl")
 								    entry = {"path": path, "finding": finding, "severity": severity}
 								    try:
 								        with open(flags_path, "a") as f:
 								            f.write(json.dumps(entry) + "\n")
 								    except OSError:
 								        pass
 								    return "ok"
-												refactor(ai): single-source tool registration via register_tool() (#56)

Adding a tool used to require updating two parallel structures in
ai.py: a name->handler entry in _TOOL_DISPATCH and a schema dict in
_DIR_TOOLS (or _SYNTHESIS_TOOLS or _SURVEY_TOOLS). Forgetting one half
was silent. Internals.md §9.1 documented this as a 5-step process.

Replaced both with a single register_tool() call per (tool, scope):

    register_tool(
        name="read_file",
        description="...",
        schema={...},
        scopes=["dir"],
        handler=_tool_read_file,
    )

The function appends the schema to one or more scope lists
(_DIR_TOOLS / _SYNTHESIS_TOOLS / _SURVEY_TOOLS) and lands the handler
in _TOOL_DISPATCH. Tools intercepted by the loop body (submit_report,
submit_survey) register schema only with handler=None.

Tools whose schema differs by scope (submit_report has different shapes
in dir vs synthesis loops) get one register_tool() call per scope.
flag is also registered twice because it appears in dir + synthesis at
different positions in each list — the order is preserved with two
calls rather than reordered for fewer calls.

Verification:
- _DIR_TOOLS, _SYNTHESIS_TOOLS, _SURVEY_TOOLS contain the same names
  in the same order as before.
- _TOOL_DISPATCH contains the same 10 handlers as before.
- 164 tests pass.

No behavior change. Phase 3.5 (#39) MCP backend will eventually replace
this with dynamic discovery from the connected MCP server, at which
point register_tool() collapses to a one-line forward.

											
										
										
											2026-04-11 10:18:40 -06:00
+								# ---------------------------------------------------------------------------
 								# Tool registrations
 								#
 								# Order within each scope is preserved to keep the agent-visible tool list
 								# stable. Tools that appear in two scopes (flag) and tools whose schema
 								# differs by scope (submit_report) are registered once per scope.
 								# ---------------------------------------------------------------------------
 								_FLAG_DESCRIPTION = (
 								    "Mark a file, directory, or finding as notable or anomalous. "
 								    "Call this immediately when you discover something surprising, "
 								    "concerning, or important — do not save it for the report."
 								)
 								_FLAG_SCHEMA = {
 								    "type": "object",
 								    "properties": {
 								        "path": {
 								            "type": "string",
 								            "description": "Relative path, or 'general'.",
 								        },
 								        "finding": {
 								            "type": "string",
 								            "description": "What you found.",
 								        },
 								        "severity": {
 								            "type": "string",
 								            "enum": ["info", "concern", "critical"],
 								            "description": "info | concern | critical",
 								        },
 								    },
 								    "required": ["path", "finding", "severity"],
-												refactor: replace single-shot API with multi-pass agentic investigation

Rewrites ai.py from a single Claude API call into a multi-pass,
cache-driven agent architecture:

- Per-directory isolated agent loops (max 10 turns each) with context
  discarded between directories
- Leaves-first processing order so child summaries inform parents
- Disk cache (/tmp/luminos/{uuid}/) persists across runs for resumability
- Investigation ID persistence keyed by target realpath
- Separate synthesis pass reads only directory-level cache entries
- Replaces urllib with Anthropic SDK (streaming, automatic retries)
- Token counting with 70% context budget threshold for early exit
- parse_structure tool via tree-sitter (Python, JS, Rust, Go)
- python-magic integration for MIME-aware directory listings
- Cost tracking printed at end of investigation

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>

											
										
										
											2026-03-30 12:13:55 -06:00
+								}
-												refactor(ai): single-source tool registration via register_tool() (#56)

Adding a tool used to require updating two parallel structures in
ai.py: a name->handler entry in _TOOL_DISPATCH and a schema dict in
_DIR_TOOLS (or _SYNTHESIS_TOOLS or _SURVEY_TOOLS). Forgetting one half
was silent. Internals.md §9.1 documented this as a 5-step process.

Replaced both with a single register_tool() call per (tool, scope):

    register_tool(
        name="read_file",
        description="...",
        schema={...},
        scopes=["dir"],
        handler=_tool_read_file,
    )

The function appends the schema to one or more scope lists
(_DIR_TOOLS / _SYNTHESIS_TOOLS / _SURVEY_TOOLS) and lands the handler
in _TOOL_DISPATCH. Tools intercepted by the loop body (submit_report,
submit_survey) register schema only with handler=None.

Tools whose schema differs by scope (submit_report has different shapes
in dir vs synthesis loops) get one register_tool() call per scope.
flag is also registered twice because it appears in dir + synthesis at
different positions in each list — the order is preserved with two
calls rather than reordered for fewer calls.

Verification:
- _DIR_TOOLS, _SYNTHESIS_TOOLS, _SURVEY_TOOLS contain the same names
  in the same order as before.
- _TOOL_DISPATCH contains the same 10 handlers as before.
- 164 tests pass.

No behavior change. Phase 3.5 (#39) MCP backend will eventually replace
this with dynamic discovery from the connected MCP server, at which
point register_tool() collapses to a one-line forward.

											
										
										
											2026-04-11 10:18:40 -06:00
+								# --- Dir loop tools ---
 								register_tool(
 								    name="read_file",
 								    description=(
 								        "Read and return the contents of a file. Path must be inside "
 								        "the target directory."
 								    ),
 								    schema={
 								        "type": "object",
 								        "properties": {
 								            "path": {
 								                "type": "string",
 								                "description": "Absolute or relative path to the file.",
 								            },
 								            "max_bytes": {
 								                "type": "integer",
 								                "description": "Maximum bytes to read (default 4096).",
 								            },
 								        },
 								        "required": ["path"],
 								    },
 								    scopes=["dir"],
 								    handler=_tool_read_file,
 								)
 								register_tool(
 								    name="list_directory",
 								    description=(
 								        "List the contents of a directory with file sizes and types."
 								    ),
 								    schema={
 								        "type": "object",
 								        "properties": {
 								            "path": {
 								                "type": "string",
 								                "description": "Absolute or relative path to the directory.",
 								            },
 								            "show_hidden": {
 								                "type": "boolean",
 								                "description": "Include hidden files (default false).",
 								            },
 								        },
 								        "required": ["path"],
 								    },
 								    scopes=["dir"],
 								    handler=_tool_list_directory,
 								)
 								register_tool(
 								    name="run_command",
 								    description=(
 								        "Run a read-only shell command. Allowed binaries: "
 								        "wc, file, grep, head, tail, stat, du, find."
 								    ),
 								    schema={
 								        "type": "object",
 								        "properties": {
 								            "command": {
 								                "type": "string",
 								                "description": "The shell command to execute.",
 								            },
 								        },
 								        "required": ["command"],
 								    },
 								    scopes=["dir"],
 								    handler=_tool_run_command,
 								)
 								register_tool(
 								    name="parse_structure",
 								    description=(
 								        "Parse a source file using tree-sitter and return its structural "
 								        "skeleton: functions, classes, imports, and code metrics. "
 								        "Supported: Python, JavaScript, TypeScript, Rust, Go."
 								    ),
 								    schema={
 								        "type": "object",
 								        "properties": {
 								            "path": {
 								                "type": "string",
 								                "description": "Path to the source file to parse.",
 								            },
 								        },
 								        "required": ["path"],
 								    },
 								    scopes=["dir"],
 								    handler=_tool_parse_structure,
 								)
 								register_tool(
 								    name="write_cache",
 								    description=(
 								        "Write a summary cache entry for a file or directory. The data "
 								        "must NOT contain raw file contents — summaries only."
 								    ),
 								    schema={
 								        "type": "object",
 								        "properties": {
 								            "cache_type": {
 								                "type": "string",
 								                "enum": ["file", "dir"],
 								                "description": "'file' or 'dir'.",
 								            },
 								            "path": {
 								                "type": "string",
 								                "description": "The path being cached.",
 								            },
 								            "data": {
 								                "type": "object",
 								                "description": (
 								                    "Cache entry. Files: {path, relative_path, size_bytes, "
 								                    "category, summary, notable, notable_reason, "
 								                    "confidence, confidence_reason, cached_at}. "
 								                    "Dirs: {path, relative_path, child_count, summary, "
 								                    "dominant_category, notable_files, "
 								                    "confidence, confidence_reason, cached_at}. "
 								                    "Always set confidence (0.0–1.0); see system prompt "
 								                    "for calibration. Set confidence_reason only when "
 								                    "confidence < 0.7."
 								                ),
 								            },
 								        },
 								        "required": ["cache_type", "path", "data"],
 								    },
 								    scopes=["dir"],
 								    handler=_tool_write_cache,
 								)
 								register_tool(
 								    name="think",
 								    description=(
 								        "Record your reasoning before choosing which file or directory "
 								        "to investigate next. Call this when deciding what to look at "
 								        "— not before every individual tool call."
 								    ),
 								    schema={
 								        "type": "object",
 								        "properties": {
 								            "observation": {
 								                "type": "string",
 								                "description": "What you have observed so far.",
 								            },
 								            "hypothesis": {
 								                "type": "string",
 								                "description": "Your hypothesis about the directory.",
 								            },
 								            "next_action": {
 								                "type": "string",
 								                "description": "What you plan to investigate next and why.",
 								            },
 								        },
 								        "required": ["observation", "hypothesis", "next_action"],
 								    },
 								    scopes=["dir"],
 								    handler=_tool_think,
 								)
 								register_tool(
 								    name="checkpoint",
 								    description=(
 								        "Summarize what you have learned so far about this directory "
 								        "and what you still need to determine. Call this after completing "
 								        "a significant cluster of files — not after every file."
 								    ),
 								    schema={
 								        "type": "object",
 								        "properties": {
 								            "learned": {
 								                "type": "string",
 								                "description": "What you have learned so far.",
 								            },
 								            "still_unknown": {
 								                "type": "string",
 								                "description": "What you still need to determine.",
 								            },
 								            "next_phase": {
 								                "type": "string",
 								                "description": "What you will investigate next.",
 								            },
 								        },
 								        "required": ["learned", "still_unknown", "next_phase"],
 								    },
 								    scopes=["dir"],
 								    handler=_tool_checkpoint,
 								)
 								register_tool(
 								    name="flag",
 								    description=_FLAG_DESCRIPTION,
 								    schema=_FLAG_SCHEMA,
 								    scopes=["dir"],
 								    handler=_tool_flag,
 								)
 								register_tool(
 								    name="submit_report",
 								    description=(
 								        "Submit the directory summary. This ends the investigation loop."
 								    ),
 								    schema={
 								        "type": "object",
 								        "properties": {
 								            "summary": {
 								                "type": "string",
 								                "description": "1-3 sentence summary of the directory.",
 								            },
-												feat(ai): implement Phase 3 investigation planning (#8, #9, #10, #11, #74)

Add a planning pass that runs after survey and before dir loops. The
planner classifies directories into priority/shallow/skip tiers and
allocates turns accordingly, replacing the fixed max_turns=14 per
directory with dynamic allocation from a global budget.

Planning pass:
- _PLANNING_SYSTEM_PROMPT in prompts.py with submit_plan tool
- _run_planning() follows the same single-turn pattern as _run_survey()
- submit_plan tool registered in new "planning" scope
- _apply_plan() pure function: band-sorted ordering (leaf-first within
  bands), turn map, skip-dir removal
- _default_plan() fallback when planning is skipped or fails
- Plan cached as plan.json for resumed runs

Dynamic turn allocation:
- Priority dirs: 15-20 turns (capped at 25)
- Shallow dirs: 5 turns
- Default: 10 turns
- Skip dirs: excluded entirely
- Orchestrator passes per-dir max_turns to _run_dir_loop()

Quality instrumentation:
- _TokenTracker._loop_turns counts API calls per dir loop
- completeness field (0.0-1.0) added to dir-scope submit_report
- plan_evaluation.json emitted after dir loops comparing plan predictions
  to actual turn utilization, completeness, and confidence
- Turn utilization logged per directory during investigation

Also fixes _get_child_summaries() to distinguish actual leaf directories
from parents whose children have not been investigated yet, replacing
the misleading "this is a leaf directory" placeholder.

26 new tests (260 total, all passing).

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>

											
										
										
											2026-04-12 20:21:49 -06:00
+								            "completeness": {
 								                "type": "number",
 								                "description": (
 								                    "Self-rated investigation completeness (0.0-1.0). "
 								                    "1.0 = examined every relevant file thoroughly. "
 								                    "0.5 = examined about half, or skimmed most. "
 								                    "< 0.3 = barely scratched the surface."
 								                ),
 								            },
-												refactor(ai): single-source tool registration via register_tool() (#56)

Adding a tool used to require updating two parallel structures in
ai.py: a name->handler entry in _TOOL_DISPATCH and a schema dict in
_DIR_TOOLS (or _SYNTHESIS_TOOLS or _SURVEY_TOOLS). Forgetting one half
was silent. Internals.md §9.1 documented this as a 5-step process.

Replaced both with a single register_tool() call per (tool, scope):

    register_tool(
        name="read_file",
        description="...",
        schema={...},
        scopes=["dir"],
        handler=_tool_read_file,
    )

The function appends the schema to one or more scope lists
(_DIR_TOOLS / _SYNTHESIS_TOOLS / _SURVEY_TOOLS) and lands the handler
in _TOOL_DISPATCH. Tools intercepted by the loop body (submit_report,
submit_survey) register schema only with handler=None.

Tools whose schema differs by scope (submit_report has different shapes
in dir vs synthesis loops) get one register_tool() call per scope.
flag is also registered twice because it appears in dir + synthesis at
different positions in each list — the order is preserved with two
calls rather than reordered for fewer calls.

Verification:
- _DIR_TOOLS, _SYNTHESIS_TOOLS, _SURVEY_TOOLS contain the same names
  in the same order as before.
- _TOOL_DISPATCH contains the same 10 handlers as before.
- 164 tests pass.

No behavior change. Phase 3.5 (#39) MCP backend will eventually replace
this with dynamic discovery from the connected MCP server, at which
point register_tool() collapses to a one-line forward.

											
										
										
											2026-04-11 10:18:40 -06:00
+								        },
-												feat(ai): implement Phase 3 investigation planning (#8, #9, #10, #11, #74)

Add a planning pass that runs after survey and before dir loops. The
planner classifies directories into priority/shallow/skip tiers and
allocates turns accordingly, replacing the fixed max_turns=14 per
directory with dynamic allocation from a global budget.

Planning pass:
- _PLANNING_SYSTEM_PROMPT in prompts.py with submit_plan tool
- _run_planning() follows the same single-turn pattern as _run_survey()
- submit_plan tool registered in new "planning" scope
- _apply_plan() pure function: band-sorted ordering (leaf-first within
  bands), turn map, skip-dir removal
- _default_plan() fallback when planning is skipped or fails
- Plan cached as plan.json for resumed runs

Dynamic turn allocation:
- Priority dirs: 15-20 turns (capped at 25)
- Shallow dirs: 5 turns
- Default: 10 turns
- Skip dirs: excluded entirely
- Orchestrator passes per-dir max_turns to _run_dir_loop()

Quality instrumentation:
- _TokenTracker._loop_turns counts API calls per dir loop
- completeness field (0.0-1.0) added to dir-scope submit_report
- plan_evaluation.json emitted after dir loops comparing plan predictions
  to actual turn utilization, completeness, and confidence
- Turn utilization logged per directory during investigation

Also fixes _get_child_summaries() to distinguish actual leaf directories
from parents whose children have not been investigated yet, replacing
the misleading "this is a leaf directory" placeholder.

26 new tests (260 total, all passing).

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>

											
										
										
											2026-04-12 20:21:49 -06:00
+								        "required": ["summary", "completeness"],
-												refactor(ai): single-source tool registration via register_tool() (#56)

Adding a tool used to require updating two parallel structures in
ai.py: a name->handler entry in _TOOL_DISPATCH and a schema dict in
_DIR_TOOLS (or _SYNTHESIS_TOOLS or _SURVEY_TOOLS). Forgetting one half
was silent. Internals.md §9.1 documented this as a 5-step process.

Replaced both with a single register_tool() call per (tool, scope):

    register_tool(
        name="read_file",
        description="...",
        schema={...},
        scopes=["dir"],
        handler=_tool_read_file,
    )

The function appends the schema to one or more scope lists
(_DIR_TOOLS / _SYNTHESIS_TOOLS / _SURVEY_TOOLS) and lands the handler
in _TOOL_DISPATCH. Tools intercepted by the loop body (submit_report,
submit_survey) register schema only with handler=None.

Tools whose schema differs by scope (submit_report has different shapes
in dir vs synthesis loops) get one register_tool() call per scope.
flag is also registered twice because it appears in dir + synthesis at
different positions in each list — the order is preserved with two
calls rather than reordered for fewer calls.

Verification:
- _DIR_TOOLS, _SYNTHESIS_TOOLS, _SURVEY_TOOLS contain the same names
  in the same order as before.
- _TOOL_DISPATCH contains the same 10 handlers as before.
- 164 tests pass.

No behavior change. Phase 3.5 (#39) MCP backend will eventually replace
this with dynamic discovery from the connected MCP server, at which
point register_tool() collapses to a one-line forward.

											
										
										
											2026-04-11 10:18:40 -06:00
+								    },
 								    scopes=["dir"],
 								)
 								# --- Synthesis tools ---
 								register_tool(
 								    name="read_cache",
 								    description="Read a previously cached summary for a file or directory.",
 								    schema={
 								        "type": "object",
 								        "properties": {
 								            "cache_type": {
 								                "type": "string",
 								                "enum": ["file", "dir"],
 								            },
 								            "path": {
 								                "type": "string",
 								                "description": "The path to look up.",
 								            },
 								        },
 								        "required": ["cache_type", "path"],
 								    },
 								    scopes=["synthesis"],
 								    handler=_tool_read_cache,
 								)
 								register_tool(
 								    name="list_cache",
 								    description="List all cached entry paths of a given type.",
 								    schema={
 								        "type": "object",
 								        "properties": {
 								            "cache_type": {
 								                "type": "string",
 								                "enum": ["file", "dir"],
 								            },
 								        },
 								        "required": ["cache_type"],
 								    },
 								    scopes=["synthesis"],
 								    handler=_tool_list_cache,
 								)
 								register_tool(
 								    name="flag",
 								    description=_FLAG_DESCRIPTION,
 								    schema=_FLAG_SCHEMA,
 								    scopes=["synthesis"],
 								)
 								register_tool(
 								    name="submit_report",
 								    description="Submit the final analysis report.",
 								    schema={
 								        "type": "object",
 								        "properties": {
 								            "brief": {
 								                "type": "string",
 								                "description": "2-4 sentence summary.",
 								            },
 								            "detailed": {
 								                "type": "string",
 								                "description": "Thorough breakdown.",
 								            },
 								        },
 								        "required": ["brief", "detailed"],
 								    },
 								    scopes=["synthesis"],
 								)
 								# --- Survey tools ---
 								register_tool(
 								    name="submit_survey",
 								    description=(
 								        "Submit the reconnaissance survey. Call exactly once."
 								    ),
 								    schema={
 								        "type": "object",
 								        "properties": {
 								            "description": {
 								                "type": "string",
 								                "description": "Plain-language description of the target.",
 								            },
 								            "approach": {
 								                "type": "string",
 								                "description": "Recommended analytical approach.",
 								            },
 								            "relevant_tools": {
 								                "type": "array",
 								                "items": {"type": "string"},
 								                "description": "Tool names the dir loop should lean on.",
 								            },
 								            "skip_tools": {
 								                "type": "array",
 								                "items": {"type": "string"},
 								                "description": "Tool names whose use would be wrong here.",
 								            },
 								            "domain_notes": {
 								                "type": "string",
 								                "description": "Short actionable hint, or empty string.",
 								            },
 								            "confidence": {
 								                "type": "number",
 								                "description": "0.0–1.0 confidence in this survey.",
 								            },
 								        },
 								        "required": [
 								            "description", "approach", "relevant_tools",
 								            "skip_tools", "domain_notes", "confidence",
 								        ],
 								    },
 								    scopes=["survey"],
 								)
-												feat(ai): implement Phase 3 investigation planning (#8, #9, #10, #11, #74)

Add a planning pass that runs after survey and before dir loops. The
planner classifies directories into priority/shallow/skip tiers and
allocates turns accordingly, replacing the fixed max_turns=14 per
directory with dynamic allocation from a global budget.

Planning pass:
- _PLANNING_SYSTEM_PROMPT in prompts.py with submit_plan tool
- _run_planning() follows the same single-turn pattern as _run_survey()
- submit_plan tool registered in new "planning" scope
- _apply_plan() pure function: band-sorted ordering (leaf-first within
  bands), turn map, skip-dir removal
- _default_plan() fallback when planning is skipped or fails
- Plan cached as plan.json for resumed runs

Dynamic turn allocation:
- Priority dirs: 15-20 turns (capped at 25)
- Shallow dirs: 5 turns
- Default: 10 turns
- Skip dirs: excluded entirely
- Orchestrator passes per-dir max_turns to _run_dir_loop()

Quality instrumentation:
- _TokenTracker._loop_turns counts API calls per dir loop
- completeness field (0.0-1.0) added to dir-scope submit_report
- plan_evaluation.json emitted after dir loops comparing plan predictions
  to actual turn utilization, completeness, and confidence
- Turn utilization logged per directory during investigation

Also fixes _get_child_summaries() to distinguish actual leaf directories
from parents whose children have not been investigated yet, replacing
the misleading "this is a leaf directory" placeholder.

26 new tests (260 total, all passing).

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>

											
										
										
											2026-04-12 20:21:49 -06:00
+								# --- Planning tools ---
 								register_tool(
 								    name="submit_plan",
 								    description=(
 								        "Submit the investigation plan. Call exactly once."
 								    ),
 								    schema={
 								        "type": "object",
 								        "properties": {
 								            "priority_dirs": {
 								                "type": "array",
 								                "items": {
 								                    "type": "object",
 								                    "properties": {
 								                        "path": {
 								                            "type": "string",
 								                            "description": "Relative directory path.",
 								                        },
 								                        "reason": {
 								                            "type": "string",
 								                            "description": "Why this dir deserves deep investigation.",
 								                        },
 								                        "suggested_turns": {
 								                            "type": "integer",
 								                            "description": "Suggested turns (15-20).",
 								                        },
 								                    },
 								                    "required": ["path", "reason", "suggested_turns"],
 								                },
 								                "description": "Directories to investigate deeply.",
 								            },
 								            "shallow_dirs": {
 								                "type": "array",
 								                "items": {
 								                    "type": "object",
 								                    "properties": {
 								                        "path": {
 								                            "type": "string",
 								                            "description": "Relative directory path.",
 								                        },
 								                        "reason": {
 								                            "type": "string",
 								                            "description": "Why a shallow pass is sufficient.",
 								                        },
 								                    },
 								                    "required": ["path", "reason"],
 								                },
 								                "description": "Directories needing only a quick pass.",
 								            },
 								            "skip_dirs": {
 								                "type": "array",
 								                "items": {
 								                    "type": "object",
 								                    "properties": {
 								                        "path": {
 								                            "type": "string",
 								                            "description": "Relative directory path.",
 								                        },
 								                        "reason": {
 								                            "type": "string",
 								                            "description": "Why this dir should be skipped.",
 								                        },
 								                    },
 								                    "required": ["path", "reason"],
 								                },
 								                "description": "Directories to skip entirely.",
 								            },
 								            "investigation_order": {
 								                "type": "string",
 								                "enum": ["leaf-first", "priority-first"],
 								                "description": "leaf-first or priority-first (leaf-first within bands).",
 								            },
 								            "notes": {
 								                "type": "string",
 								                "description": "Cross-cutting notes for per-directory agents, or empty.",
 								            },
 								        },
 								        "required": [
 								            "priority_dirs", "shallow_dirs", "skip_dirs",
 								            "investigation_order", "notes",
 								        ],
 								    },
 								    scopes=["planning"],
 								)
-												refactor(ai): single-source tool registration via register_tool() (#56)

Adding a tool used to require updating two parallel structures in
ai.py: a name->handler entry in _TOOL_DISPATCH and a schema dict in
_DIR_TOOLS (or _SYNTHESIS_TOOLS or _SURVEY_TOOLS). Forgetting one half
was silent. Internals.md §9.1 documented this as a 5-step process.

Replaced both with a single register_tool() call per (tool, scope):

    register_tool(
        name="read_file",
        description="...",
        schema={...},
        scopes=["dir"],
        handler=_tool_read_file,
    )

The function appends the schema to one or more scope lists
(_DIR_TOOLS / _SYNTHESIS_TOOLS / _SURVEY_TOOLS) and lands the handler
in _TOOL_DISPATCH. Tools intercepted by the loop body (submit_report,
submit_survey) register schema only with handler=None.

Tools whose schema differs by scope (submit_report has different shapes
in dir vs synthesis loops) get one register_tool() call per scope.
flag is also registered twice because it appears in dir + synthesis at
different positions in each list — the order is preserved with two
calls rather than reordered for fewer calls.

Verification:
- _DIR_TOOLS, _SYNTHESIS_TOOLS, _SURVEY_TOOLS contain the same names
  in the same order as before.
- _TOOL_DISPATCH contains the same 10 handlers as before.
- 164 tests pass.

No behavior change. Phase 3.5 (#39) MCP backend will eventually replace
this with dynamic discovery from the connected MCP server, at which
point register_tool() collapses to a one-line forward.

											
										
										
											2026-04-11 10:18:40 -06:00
-												refactor: replace single-shot API with multi-pass agentic investigation

Rewrites ai.py from a single Claude API call into a multi-pass,
cache-driven agent architecture:

- Per-directory isolated agent loops (max 10 turns each) with context
  discarded between directories
- Leaves-first processing order so child summaries inform parents
- Disk cache (/tmp/luminos/{uuid}/) persists across runs for resumability
- Investigation ID persistence keyed by target realpath
- Separate synthesis pass reads only directory-level cache entries
- Replaces urllib with Anthropic SDK (streaming, automatic retries)
- Token counting with 70% context budget threshold for early exit
- parse_structure tool via tree-sitter (Python, JS, Rust, Go)
- python-magic integration for MIME-aware directory listings
- Cost tracking printed at end of investigation

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>

											
										
										
											2026-03-30 12:13:55 -06:00
+								def _execute_tool(name, args, target, cache, dir_rel, turn, verbose=False):
 								    """Execute a tool by name and return the result string."""
 								    handler = _TOOL_DISPATCH.get(name)
 								    if handler is None:
 								        return f"Error: unknown tool '{name}'."
 								    result = handler(args, target, cache)
 								    cache.log_turn(dir_rel, turn, name,
 								                   {k: v for k, v in args.items() if k != "data"},
 								                   len(result))
 								    if verbose:
 								        preview = result[:200] + "..." if len(result) > 200 else result
 								        print(f"  [AI]     <- {len(result)} chars: {preview}", file=sys.stderr)
 								    return result
 								# ---------------------------------------------------------------------------
 								# Streaming API caller
 								# ---------------------------------------------------------------------------
 								def _call_api_streaming(client, system, messages, tools, tracker):
 								    """Call Claude via streaming. Print tool decisions in real-time.
 								    Returns (content_blocks, usage) where content_blocks is the list of
 								    content blocks from the response.
 								    """
 								    with client.messages.stream(
 								        model=MODEL,
 								        max_tokens=4096,
 								        system=system,
 								        messages=messages,
 								        tools=tools,
 								    ) as stream:
 								        # Print tool call names as they arrive
 								        current_tool = None
 								        for event in stream:
 								            if event.type == "content_block_start":
 								                block = event.content_block
 								                if block.type == "tool_use":
 								                    current_tool = block.name
 								                    # We'll print the full args after the block is complete
 								            elif event.type == "content_block_stop":
 								                current_tool = None
 								        response = stream.get_final_message()
 								    tracker.record(response.usage)
 								    return response.content, response.usage
 								# ---------------------------------------------------------------------------
 								# Directory discovery
 								# ---------------------------------------------------------------------------
-												feat: add -x/--exclude flag to exclude directories from scan and AI analysis

											
										
										
											2026-04-06 14:32:12 -06:00
+								def _discover_directories(target, show_hidden=False, exclude=None):
-												refactor: replace single-shot API with multi-pass agentic investigation

Rewrites ai.py from a single Claude API call into a multi-pass,
cache-driven agent architecture:

- Per-directory isolated agent loops (max 10 turns each) with context
  discarded between directories
- Leaves-first processing order so child summaries inform parents
- Disk cache (/tmp/luminos/{uuid}/) persists across runs for resumability
- Investigation ID persistence keyed by target realpath
- Separate synthesis pass reads only directory-level cache entries
- Replaces urllib with Anthropic SDK (streaming, automatic retries)
- Token counting with 70% context budget threshold for early exit
- parse_structure tool via tree-sitter (Python, JS, Rust, Go)
- python-magic integration for MIME-aware directory listings
- Cost tracking printed at end of investigation

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>

											
										
										
											2026-03-30 12:13:55 -06:00
+								    """Walk the target and return all directories sorted leaves-first."""
-												feat: add -x/--exclude flag to exclude directories from scan and AI analysis

											
										
										
											2026-04-06 14:32:12 -06:00
+								    extra = set(exclude or [])
-												refactor: replace single-shot API with multi-pass agentic investigation

Rewrites ai.py from a single Claude API call into a multi-pass,
cache-driven agent architecture:

- Per-directory isolated agent loops (max 10 turns each) with context
  discarded between directories
- Leaves-first processing order so child summaries inform parents
- Disk cache (/tmp/luminos/{uuid}/) persists across runs for resumability
- Investigation ID persistence keyed by target realpath
- Separate synthesis pass reads only directory-level cache entries
- Replaces urllib with Anthropic SDK (streaming, automatic retries)
- Token counting with 70% context budget threshold for early exit
- parse_structure tool via tree-sitter (Python, JS, Rust, Go)
- python-magic integration for MIME-aware directory listings
- Cost tracking printed at end of investigation

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>

											
										
										
											2026-03-30 12:13:55 -06:00
+								    dirs = []
 								    target_real = os.path.realpath(target)
 								    for root, subdirs, _files in os.walk(target_real, topdown=True):
 								        subdirs[:] = [
 								            d for d in subdirs
 								            if not _should_skip_dir(d)
-												feat: add -x/--exclude flag to exclude directories from scan and AI analysis

											
										
										
											2026-04-06 14:32:12 -06:00
+								            and d not in extra
-												refactor: replace single-shot API with multi-pass agentic investigation

Rewrites ai.py from a single Claude API call into a multi-pass,
cache-driven agent architecture:

- Per-directory isolated agent loops (max 10 turns each) with context
  discarded between directories
- Leaves-first processing order so child summaries inform parents
- Disk cache (/tmp/luminos/{uuid}/) persists across runs for resumability
- Investigation ID persistence keyed by target realpath
- Separate synthesis pass reads only directory-level cache entries
- Replaces urllib with Anthropic SDK (streaming, automatic retries)
- Token counting with 70% context budget threshold for early exit
- parse_structure tool via tree-sitter (Python, JS, Rust, Go)
- python-magic integration for MIME-aware directory listings
- Cost tracking printed at end of investigation

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>

											
										
										
											2026-03-30 12:13:55 -06:00
+								            and (show_hidden or not d.startswith("."))
 								        ]
 								        dirs.append(root)
 								    dirs.sort(key=lambda d: (-d.count(os.sep), d))
 								    return dirs
 								# ---------------------------------------------------------------------------
 								# Per-directory agent loop
 								# ---------------------------------------------------------------------------
 								def _build_dir_context(dir_path):
 								    lines = []
 								    try:
 								        entries = sorted(os.listdir(dir_path))
 								        for name in entries:
 								            if name.startswith("."):
 								                continue
 								            full = os.path.join(dir_path, name)
 								            try:
 								                st = os.stat(full)
 								                if os.path.isdir(full):
 								                    lines.append(f"  {name}/  (dir)")
 								                else:
 								                    mime = magic.from_file(full, mime=True)
 								                    lines.append(f"  {name}  ({st.st_size} bytes)  [{mime}]")
 								            except OSError:
 								                lines.append(f"  {name}  (stat failed)")
 								    except OSError:
 								        lines.append("  (could not list directory)")
 								    return "Directory contents:\n" + "\n".join(lines) if lines else "(empty)"
 								def _get_child_summaries(dir_path, cache):
-												feat: add AI-powered directory analysis via Claude API

Adds --ai flag that sends the directory tree, file categories, and
sampled file contents to Claude for analysis. Produces a brief
summary at the top of the report and a detailed breakdown at the
end. Requires ANTHROPIC_API_KEY env var; degrades gracefully without it.
Uses only stdlib (urllib) to keep the zero-dependency constraint.

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>

											
										
										
											2026-03-30 10:03:48 -06:00
+								    parts = []
-												refactor: replace single-shot API with multi-pass agentic investigation

Rewrites ai.py from a single Claude API call into a multi-pass,
cache-driven agent architecture:

- Per-directory isolated agent loops (max 10 turns each) with context
  discarded between directories
- Leaves-first processing order so child summaries inform parents
- Disk cache (/tmp/luminos/{uuid}/) persists across runs for resumability
- Investigation ID persistence keyed by target realpath
- Separate synthesis pass reads only directory-level cache entries
- Replaces urllib with Anthropic SDK (streaming, automatic retries)
- Token counting with 70% context budget threshold for early exit
- parse_structure tool via tree-sitter (Python, JS, Rust, Go)
- python-magic integration for MIME-aware directory listings
- Cost tracking printed at end of investigation

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>

											
										
										
											2026-03-30 12:13:55 -06:00
+								    try:
 								        for name in sorted(os.listdir(dir_path)):
 								            child = os.path.join(dir_path, name)
 								            if not os.path.isdir(child):
 								                continue
 								            entry = cache.read_entry("dir", child)
 								            if entry:
 								                rel = entry.get("relative_path", name)
 								                summary = entry.get("summary", "(no summary)")
 								                parts.append(f"- {rel}/: {summary}")
 								    except OSError:
 								        pass
-												feat(ai): implement Phase 3 investigation planning (#8, #9, #10, #11, #74)

Add a planning pass that runs after survey and before dir loops. The
planner classifies directories into priority/shallow/skip tiers and
allocates turns accordingly, replacing the fixed max_turns=14 per
directory with dynamic allocation from a global budget.

Planning pass:
- _PLANNING_SYSTEM_PROMPT in prompts.py with submit_plan tool
- _run_planning() follows the same single-turn pattern as _run_survey()
- submit_plan tool registered in new "planning" scope
- _apply_plan() pure function: band-sorted ordering (leaf-first within
  bands), turn map, skip-dir removal
- _default_plan() fallback when planning is skipped or fails
- Plan cached as plan.json for resumed runs

Dynamic turn allocation:
- Priority dirs: 15-20 turns (capped at 25)
- Shallow dirs: 5 turns
- Default: 10 turns
- Skip dirs: excluded entirely
- Orchestrator passes per-dir max_turns to _run_dir_loop()

Quality instrumentation:
- _TokenTracker._loop_turns counts API calls per dir loop
- completeness field (0.0-1.0) added to dir-scope submit_report
- plan_evaluation.json emitted after dir loops comparing plan predictions
  to actual turn utilization, completeness, and confidence
- Turn utilization logged per directory during investigation

Also fixes _get_child_summaries() to distinguish actual leaf directories
from parents whose children have not been investigated yet, replacing
the misleading "this is a leaf directory" placeholder.

26 new tests (260 total, all passing).

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>

											
										
										
											2026-04-12 20:21:49 -06:00
+								    if parts:
 								        return "\n".join(parts)
 								    # Distinguish actual leaves from parents whose children haven't been
 								    # investigated yet. The old placeholder claimed "leaf directory" even
 								    # when children existed but were not yet cached, which silently
 								    # degraded parent context.
 								    try:
 								        has_subdirs = any(
 								            os.path.isdir(os.path.join(dir_path, name))
 								            for name in os.listdir(dir_path)
 								            if not name.startswith(".")
 								        )
 								    except OSError:
 								        has_subdirs = False
 								    if has_subdirs:
 								        return "(child directories exist but have not been investigated yet)"
 								    return "(none: this is a leaf directory)"
-												feat: add AI-powered directory analysis via Claude API

Adds --ai flag that sends the directory tree, file categories, and
sampled file contents to Claude for analysis. Produces a brief
summary at the top of the report and a detailed breakdown at the
end. Requires ANTHROPIC_API_KEY env var; degrades gracefully without it.
Uses only stdlib (urllib) to keep the zero-dependency constraint.

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>

											
										
										
											2026-03-30 10:03:48 -06:00
-												refactor: replace single-shot API with multi-pass agentic investigation

Rewrites ai.py from a single Claude API call into a multi-pass,
cache-driven agent architecture:

- Per-directory isolated agent loops (max 10 turns each) with context
  discarded between directories
- Leaves-first processing order so child summaries inform parents
- Disk cache (/tmp/luminos/{uuid}/) persists across runs for resumability
- Investigation ID persistence keyed by target realpath
- Separate synthesis pass reads only directory-level cache entries
- Replaces urllib with Anthropic SDK (streaming, automatic retries)
- Token counting with 70% context budget threshold for early exit
- parse_structure tool via tree-sitter (Python, JS, Rust, Go)
- python-magic integration for MIME-aware directory listings
- Cost tracking printed at end of investigation

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>

											
										
										
											2026-03-30 12:13:55 -06:00
-												feat(ai): wire survey output into dir loop (#6)

The survey pass now actually steers dir loop behavior, in two ways:

1. Prompt injection: a new {survey_context} placeholder in
   _DIR_SYSTEM_PROMPT receives the survey description, approach,
   domain_notes, relevant_tools, and skip_tools so the dir-loop agent
   has investigation context before its first turn.

2. Tool schema filtering: _filter_dir_tools() removes any tool listed
   in skip_tools from the schema passed to the API, gated on
   survey confidence >= 0.5. Control-flow tools (submit_report) are
   always preserved. This is hard enforcement — the agent literally
   cannot call a filtered tool, which the smoke test for #5 showed
   was necessary (prompt-only guidance was ignored).

Smoke test on luminos_lib: zero run_command invocations (vs 2 before),
context budget no longer exhausted (87k vs 133k), cost ~$0.34 (vs
$0.46), investigation completes instead of early-exiting.

Adds tests/test_ai_filter.py with 14 tests covering _filter_dir_tools
and _format_survey_block — both pure helpers, no live API needed.

											
										
										
											2026-04-06 22:07:12 -06:00
+								_SURVEY_CONFIDENCE_THRESHOLD = 0.5
 								_PROTECTED_DIR_TOOLS = {"submit_report"}
-												feat(ai): skip survey pass for tiny targets (#7)

Adds a gate in _run_investigation that skips the survey API call when
a target has both fewer than _SURVEY_MIN_FILES (5) files AND fewer
than _SURVEY_MIN_DIRS (2) directories. AND semantics handle the
deep-narrow edge case correctly: a target with 4 files spread across
50 directories still gets a survey because dir count amortizes the
cost across 50 dir loops.

When skipped, _default_survey() supplies a synthetic dict with
confidence=0.0 — chosen specifically so _filter_dir_tools() never
enforces skip_tools from a synthetic value. The dir loop receives
a generic "small target, read everything" framing in its prompt and
keeps its full toolbox.

Reorders _discover_directories() to run before the survey gate so
total_dirs is available without a second walk.

#46 tracks revisiting the threshold values with empirical data after
Phase 2 ships and we've run --ai on a variety of real targets.

Smoke tested on a 2-file target: gate triggers, default survey
substituted, dir loop completes normally. Adds 4 unit tests for
_default_survey() covering schema, confidence guard, filter
interaction, and empty skip_tools.

											
										
										
											2026-04-06 22:19:25 -06:00
+								# Survey-skip thresholds. Skip the survey only when BOTH are below.
 								# See #46 for the plan to revisit these with empirical data.
 								_SURVEY_MIN_FILES = 5
 								_SURVEY_MIN_DIRS = 2
 								def _default_survey():
 								    """Synthetic survey for targets too small to justify the API call.
 								    confidence=0.0 ensures _filter_dir_tools() never enforces skip_tools
 								    based on this synthetic value — the dir loop keeps its full toolbox.
 								    """
 								    return {
 								        "description": "Small target — survey skipped.",
 								        "approach": (
 								            "The target is small enough to investigate exhaustively. "
 								            "Read every file directly."
 								        ),
 								        "relevant_tools": [],
 								        "skip_tools": [],
 								        "domain_notes": "",
 								        "confidence": 0.0,
 								    }
-												feat(ai): wire survey output into dir loop (#6)

The survey pass now actually steers dir loop behavior, in two ways:

1. Prompt injection: a new {survey_context} placeholder in
   _DIR_SYSTEM_PROMPT receives the survey description, approach,
   domain_notes, relevant_tools, and skip_tools so the dir-loop agent
   has investigation context before its first turn.

2. Tool schema filtering: _filter_dir_tools() removes any tool listed
   in skip_tools from the schema passed to the API, gated on
   survey confidence >= 0.5. Control-flow tools (submit_report) are
   always preserved. This is hard enforcement — the agent literally
   cannot call a filtered tool, which the smoke test for #5 showed
   was necessary (prompt-only guidance was ignored).

Smoke test on luminos_lib: zero run_command invocations (vs 2 before),
context budget no longer exhausted (87k vs 133k), cost ~$0.34 (vs
$0.46), investigation completes instead of early-exiting.

Adds tests/test_ai_filter.py with 14 tests covering _filter_dir_tools
and _format_survey_block — both pure helpers, no live API needed.

											
										
										
											2026-04-06 22:07:12 -06:00
 								def _format_survey_block(survey):
 								    """Render survey output as a labeled text block for the dir prompt."""
 								    if not survey:
 								        return "(no survey available)"
 								    lines = [
 								        f"Description: {survey.get('description', '')}",
 								        f"Approach: {survey.get('approach', '')}",
 								    ]
 								    notes = survey.get("domain_notes", "")
 								    if notes:
 								        lines.append(f"Domain notes: {notes}")
 								    relevant = survey.get("relevant_tools") or []
 								    if relevant:
 								        lines.append(f"Relevant tools (lean on these): {', '.join(relevant)}")
 								    skip = survey.get("skip_tools") or []
 								    if skip:
 								        lines.append(f"Skip tools (already removed from your toolbox): "
 								                     f"{', '.join(skip)}")
 								    return "\n".join(lines)
 								def _filter_dir_tools(survey):
 								    """Return _DIR_TOOLS with skip_tools removed, gated on confidence.
 								    - Returns full list if survey is None or confidence < threshold.
 								    - Always preserves control-flow tools in _PROTECTED_DIR_TOOLS.
 								    - Tool names in skip_tools that don't match anything are silently ignored.
 								    """
 								    if not survey:
 								        return list(_DIR_TOOLS)
 								    try:
 								        confidence = float(survey.get("confidence", 0.0) or 0.0)
 								    except (TypeError, ValueError):
 								        confidence = 0.0
 								    if confidence < _SURVEY_CONFIDENCE_THRESHOLD:
 								        return list(_DIR_TOOLS)
 								    skip = set(survey.get("skip_tools") or []) - _PROTECTED_DIR_TOOLS
 								    if not skip:
 								        return list(_DIR_TOOLS)
 								    return [t for t in _DIR_TOOLS if t["name"] not in skip]
-												refactor(ai): extract _run_dir_loop into three focused helpers (#57)

_run_dir_loop was ~160 lines holding four conceptual layers in one
function: pre-loop setup, budget check + partial-flush, API call +
response printing, and tool dispatch + done detection. Phase 3 dynamic
turn allocation will inject more state into the same code path, so
this debt is paid before that lands.

Three new helpers above _run_dir_loop:

- _build_dir_loop_context(): pure setup. Builds the dir context, child
  summaries, survey block, filtered tool list, system prompt, and seed
  user message. Returns a _DirLoopContext namedtuple.
- _flush_partial_dir_entry(): idempotent partial-cache writer for the
  budget-exceeded path. Returns the partial summary string. Idempotent
  via cache.has_entry() guard, so callers can call it without checking.
- _handle_turn_response(): per-turn response processing. Prints text
  blocks and tool decisions, appends the assistant message, dispatches
  tools (or nudges the agent to call submit_report), appends
  tool_results. Returns (done, summary).

_run_dir_loop is now a ~25-line coordinator: build context, then
for-loop calls budget check, API, and turn handler in sequence.

No behavior change. 164 tests pass. Internals.md §4 updated for the
new structure and the file:line refs that drifted.

											
										
										
											2026-04-11 10:02:21 -06:00
+								_DirLoopContext = namedtuple(
 								    "_DirLoopContext", ["dir_rel", "system", "dir_tools", "messages"],
 								)
 								def _build_dir_loop_context(dir_path, target, cache, survey, max_turns):
 								    """Assemble the static inputs the dir loop needs before its first turn.
 								    Pure data assembly: reads the cache for child summaries, builds the
 								    formatted system prompt, filters the tool list, and returns the seed
 								    user message. No writes.
 								    """
-												refactor: replace single-shot API with multi-pass agentic investigation

Rewrites ai.py from a single Claude API call into a multi-pass,
cache-driven agent architecture:

- Per-directory isolated agent loops (max 10 turns each) with context
  discarded between directories
- Leaves-first processing order so child summaries inform parents
- Disk cache (/tmp/luminos/{uuid}/) persists across runs for resumability
- Investigation ID persistence keyed by target realpath
- Separate synthesis pass reads only directory-level cache entries
- Replaces urllib with Anthropic SDK (streaming, automatic retries)
- Token counting with 70% context budget threshold for early exit
- parse_structure tool via tree-sitter (Python, JS, Rust, Go)
- python-magic integration for MIME-aware directory listings
- Cost tracking printed at end of investigation

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>

											
										
										
											2026-03-30 12:13:55 -06:00
+								    dir_rel = os.path.relpath(dir_path, target)
 								    if dir_rel == ".":
 								        dir_rel = os.path.basename(target)
 								    context = _build_dir_context(dir_path)
 								    child_summaries = _get_child_summaries(dir_path, cache)
-												feat(ai): wire survey output into dir loop (#6)

The survey pass now actually steers dir loop behavior, in two ways:

1. Prompt injection: a new {survey_context} placeholder in
   _DIR_SYSTEM_PROMPT receives the survey description, approach,
   domain_notes, relevant_tools, and skip_tools so the dir-loop agent
   has investigation context before its first turn.

2. Tool schema filtering: _filter_dir_tools() removes any tool listed
   in skip_tools from the schema passed to the API, gated on
   survey confidence >= 0.5. Control-flow tools (submit_report) are
   always preserved. This is hard enforcement — the agent literally
   cannot call a filtered tool, which the smoke test for #5 showed
   was necessary (prompt-only guidance was ignored).

Smoke test on luminos_lib: zero run_command invocations (vs 2 before),
context budget no longer exhausted (87k vs 133k), cost ~$0.34 (vs
$0.46), investigation completes instead of early-exiting.

Adds tests/test_ai_filter.py with 14 tests covering _filter_dir_tools
and _format_survey_block — both pure helpers, no live API needed.

											
										
										
											2026-04-06 22:07:12 -06:00
+								    survey_context = _format_survey_block(survey)
 								    dir_tools = _filter_dir_tools(survey)
-												refactor: replace single-shot API with multi-pass agentic investigation

Rewrites ai.py from a single Claude API call into a multi-pass,
cache-driven agent architecture:

- Per-directory isolated agent loops (max 10 turns each) with context
  discarded between directories
- Leaves-first processing order so child summaries inform parents
- Disk cache (/tmp/luminos/{uuid}/) persists across runs for resumability
- Investigation ID persistence keyed by target realpath
- Separate synthesis pass reads only directory-level cache entries
- Replaces urllib with Anthropic SDK (streaming, automatic retries)
- Token counting with 70% context budget threshold for early exit
- parse_structure tool via tree-sitter (Python, JS, Rust, Go)
- python-magic integration for MIME-aware directory listings
- Cost tracking printed at end of investigation

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>

											
										
										
											2026-03-30 12:13:55 -06:00
 								    system = _DIR_SYSTEM_PROMPT.format(
 								        dir_path=dir_path,
 								        dir_rel=dir_rel,
 								        max_turns=max_turns,
 								        context=context,
 								        child_summaries=child_summaries,
-												feat(ai): wire survey output into dir loop (#6)

The survey pass now actually steers dir loop behavior, in two ways:

1. Prompt injection: a new {survey_context} placeholder in
   _DIR_SYSTEM_PROMPT receives the survey description, approach,
   domain_notes, relevant_tools, and skip_tools so the dir-loop agent
   has investigation context before its first turn.

2. Tool schema filtering: _filter_dir_tools() removes any tool listed
   in skip_tools from the schema passed to the API, gated on
   survey confidence >= 0.5. Control-flow tools (submit_report) are
   always preserved. This is hard enforcement — the agent literally
   cannot call a filtered tool, which the smoke test for #5 showed
   was necessary (prompt-only guidance was ignored).

Smoke test on luminos_lib: zero run_command invocations (vs 2 before),
context budget no longer exhausted (87k vs 133k), cost ~$0.34 (vs
$0.46), investigation completes instead of early-exiting.

Adds tests/test_ai_filter.py with 14 tests covering _filter_dir_tools
and _format_survey_block — both pure helpers, no live API needed.

											
										
										
											2026-04-06 22:07:12 -06:00
+								        survey_context=survey_context,
-												feat: add AI-powered directory analysis via Claude API

Adds --ai flag that sends the directory tree, file categories, and
sampled file contents to Claude for analysis. Produces a brief
summary at the top of the report and a detailed breakdown at the
end. Requires ANTHROPIC_API_KEY env var; degrades gracefully without it.
Uses only stdlib (urllib) to keep the zero-dependency constraint.

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>

											
										
										
											2026-03-30 10:03:48 -06:00
+								    )
-												refactor: replace single-shot API with multi-pass agentic investigation

Rewrites ai.py from a single Claude API call into a multi-pass,
cache-driven agent architecture:

- Per-directory isolated agent loops (max 10 turns each) with context
  discarded between directories
- Leaves-first processing order so child summaries inform parents
- Disk cache (/tmp/luminos/{uuid}/) persists across runs for resumability
- Investigation ID persistence keyed by target realpath
- Separate synthesis pass reads only directory-level cache entries
- Replaces urllib with Anthropic SDK (streaming, automatic retries)
- Token counting with 70% context budget threshold for early exit
- parse_structure tool via tree-sitter (Python, JS, Rust, Go)
- python-magic integration for MIME-aware directory listings
- Cost tracking printed at end of investigation

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>

											
										
										
											2026-03-30 12:13:55 -06:00
+								    messages = [
 								        {
 								            "role": "user",
 								            "content": (
 								                "Investigate this directory now. Use parse_structure for "
 								                "source files, read_file for others, cache summaries, and "
 								                "call submit_report. Batch tool calls for efficiency."
 								            ),
-												feat: add AI-powered directory analysis via Claude API

Adds --ai flag that sends the directory tree, file categories, and
sampled file contents to Claude for analysis. Produces a brief
summary at the top of the report and a detailed breakdown at the
end. Requires ANTHROPIC_API_KEY env var; degrades gracefully without it.
Uses only stdlib (urllib) to keep the zero-dependency constraint.

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>

											
										
										
											2026-03-30 10:03:48 -06:00
+								        },
-												refactor: replace single-shot API with multi-pass agentic investigation

Rewrites ai.py from a single Claude API call into a multi-pass,
cache-driven agent architecture:

- Per-directory isolated agent loops (max 10 turns each) with context
  discarded between directories
- Leaves-first processing order so child summaries inform parents
- Disk cache (/tmp/luminos/{uuid}/) persists across runs for resumability
- Investigation ID persistence keyed by target realpath
- Separate synthesis pass reads only directory-level cache entries
- Replaces urllib with Anthropic SDK (streaming, automatic retries)
- Token counting with 70% context budget threshold for early exit
- parse_structure tool via tree-sitter (Python, JS, Rust, Go)
- python-magic integration for MIME-aware directory listings
- Cost tracking printed at end of investigation

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>

											
										
										
											2026-03-30 12:13:55 -06:00
+								    ]
-												refactor(ai): extract _run_dir_loop into three focused helpers (#57)

_run_dir_loop was ~160 lines holding four conceptual layers in one
function: pre-loop setup, budget check + partial-flush, API call +
response printing, and tool dispatch + done detection. Phase 3 dynamic
turn allocation will inject more state into the same code path, so
this debt is paid before that lands.

Three new helpers above _run_dir_loop:

- _build_dir_loop_context(): pure setup. Builds the dir context, child
  summaries, survey block, filtered tool list, system prompt, and seed
  user message. Returns a _DirLoopContext namedtuple.
- _flush_partial_dir_entry(): idempotent partial-cache writer for the
  budget-exceeded path. Returns the partial summary string. Idempotent
  via cache.has_entry() guard, so callers can call it without checking.
- _handle_turn_response(): per-turn response processing. Prints text
  blocks and tool decisions, appends the assistant message, dispatches
  tools (or nudges the agent to call submit_report), appends
  tool_results. Returns (done, summary).

_run_dir_loop is now a ~25-line coordinator: build context, then
for-loop calls budget check, API, and turn handler in sequence.

No behavior change. 164 tests pass. Internals.md §4 updated for the
new structure and the file:line refs that drifted.

											
										
										
											2026-04-11 10:02:21 -06:00
+								    return _DirLoopContext(
 								        dir_rel=dir_rel, system=system, dir_tools=dir_tools, messages=messages,
 								    )
 								def _flush_partial_dir_entry(dir_path, target, cache):
 								    """Write a partial dir cache entry from any already-cached file entries.
 								    Called when the per-loop context budget is exceeded before the agent
 								    reaches submit_report. Idempotent: returns "" without writing if a dir
 								    entry already exists. Returns the partial summary string (empty if no
 								    file entries were available to synthesize from).
 								    """
 								    if cache.has_entry("dir", dir_path):
 								        return ""
 								    dir_real = os.path.realpath(dir_path)
 								    file_entries = [
 								        e for e in cache.read_all_entries("file")
 								        if os.path.realpath(e.get("path", "")).startswith(dir_real + os.sep)
 								        or os.path.dirname(
 								            os.path.join(target, e.get("relative_path", ""))
 								        ) == dir_real
 								    ]
 								    if file_entries:
 								        file_summaries = [
 								            e["summary"] for e in file_entries if e.get("summary")
 								        ]
 								        notable = [
 								            e.get("relative_path", e.get("path", ""))
 								            for e in file_entries if e.get("notable")
 								        ]
 								        partial_summary = " ".join(file_summaries)
 								        cache.write_entry("dir", dir_path, {
 								            "path": dir_path,
 								            "relative_path": os.path.relpath(dir_path, target),
 								            "child_count": len([
 								                n for n in os.listdir(dir_path)
 								                if not n.startswith(".")
 								            ]) if os.path.isdir(dir_path) else 0,
 								            "summary": partial_summary,
 								            "dominant_category": "unknown",
 								            "notable_files": notable,
 								            "partial": True,
 								            "partial_reason": "context budget reached",
 								            "cached_at": _now_iso(),
 								        })
 								        return partial_summary
 								    cache.write_entry("dir", dir_path, {
 								        "path": dir_path,
 								        "relative_path": os.path.relpath(dir_path, target),
 								        "child_count": 0,
 								        "summary": ("Investigation incomplete — context budget "
 								                    "reached before any files were processed."),
 								        "dominant_category": "unknown",
 								        "notable_files": [],
 								        "partial": True,
 								        "partial_reason": (
 								            "context budget reached before files processed"),
 								        "cached_at": _now_iso(),
 								    })
 								    return ""
 								def _handle_turn_response(content_blocks, messages, target, cache, dir_rel,
 								                          turn, verbose):
 								    """Process one turn's response: print, append, dispatch tools.
 								    Mutates `messages` in place: appends the assistant message, then either
 								    a "please call submit_report" nudge (no tool_uses) or the tool_results
 								    user message. Recognizes submit_report as the loop's done signal and
 								    extracts its summary. Returns (done, summary).
 								    """
 								    for b in content_blocks:
 								        if b.type == "text" and b.text.strip():
 								            for line in b.text.strip().split("\n"):
 								                print(f"  [AI]   {line}", file=sys.stderr)
 								    tool_uses = [b for b in content_blocks if b.type == "tool_use"]
 								    for tu in tool_uses:
 								        arg_summary = ", ".join(
 								            f"{k}={v!r}" for k, v in tu.input.items() if k != "data"
 								        ) if tu.input else ""
 								        print(f"  [AI]   -> {tu.name}({arg_summary})", file=sys.stderr)
 								    messages.append({
 								        "role": "assistant",
 								        "content": [_block_to_dict(b) for b in content_blocks],
 								    })
 								    if not tool_uses:
 								        messages.append({
 								            "role": "user",
 								            "content": "Please call submit_report with your summary.",
 								        })
-												feat(ai): implement Phase 3 investigation planning (#8, #9, #10, #11, #74)

Add a planning pass that runs after survey and before dir loops. The
planner classifies directories into priority/shallow/skip tiers and
allocates turns accordingly, replacing the fixed max_turns=14 per
directory with dynamic allocation from a global budget.

Planning pass:
- _PLANNING_SYSTEM_PROMPT in prompts.py with submit_plan tool
- _run_planning() follows the same single-turn pattern as _run_survey()
- submit_plan tool registered in new "planning" scope
- _apply_plan() pure function: band-sorted ordering (leaf-first within
  bands), turn map, skip-dir removal
- _default_plan() fallback when planning is skipped or fails
- Plan cached as plan.json for resumed runs

Dynamic turn allocation:
- Priority dirs: 15-20 turns (capped at 25)
- Shallow dirs: 5 turns
- Default: 10 turns
- Skip dirs: excluded entirely
- Orchestrator passes per-dir max_turns to _run_dir_loop()

Quality instrumentation:
- _TokenTracker._loop_turns counts API calls per dir loop
- completeness field (0.0-1.0) added to dir-scope submit_report
- plan_evaluation.json emitted after dir loops comparing plan predictions
  to actual turn utilization, completeness, and confidence
- Turn utilization logged per directory during investigation

Also fixes _get_child_summaries() to distinguish actual leaf directories
from parents whose children have not been investigated yet, replacing
the misleading "this is a leaf directory" placeholder.

26 new tests (260 total, all passing).

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>

											
										
										
											2026-04-12 20:21:49 -06:00
+								        return False, None, None
-												refactor(ai): extract _run_dir_loop into three focused helpers (#57)

_run_dir_loop was ~160 lines holding four conceptual layers in one
function: pre-loop setup, budget check + partial-flush, API call +
response printing, and tool dispatch + done detection. Phase 3 dynamic
turn allocation will inject more state into the same code path, so
this debt is paid before that lands.

Three new helpers above _run_dir_loop:

- _build_dir_loop_context(): pure setup. Builds the dir context, child
  summaries, survey block, filtered tool list, system prompt, and seed
  user message. Returns a _DirLoopContext namedtuple.
- _flush_partial_dir_entry(): idempotent partial-cache writer for the
  budget-exceeded path. Returns the partial summary string. Idempotent
  via cache.has_entry() guard, so callers can call it without checking.
- _handle_turn_response(): per-turn response processing. Prints text
  blocks and tool decisions, appends the assistant message, dispatches
  tools (or nudges the agent to call submit_report), appends
  tool_results. Returns (done, summary).

_run_dir_loop is now a ~25-line coordinator: build context, then
for-loop calls budget check, API, and turn handler in sequence.

No behavior change. 164 tests pass. Internals.md §4 updated for the
new structure and the file:line refs that drifted.

											
										
										
											2026-04-11 10:02:21 -06:00
 								    tool_results = []
 								    done = False
 								    summary = None
-												feat(ai): implement Phase 3 investigation planning (#8, #9, #10, #11, #74)

Add a planning pass that runs after survey and before dir loops. The
planner classifies directories into priority/shallow/skip tiers and
allocates turns accordingly, replacing the fixed max_turns=14 per
directory with dynamic allocation from a global budget.

Planning pass:
- _PLANNING_SYSTEM_PROMPT in prompts.py with submit_plan tool
- _run_planning() follows the same single-turn pattern as _run_survey()
- submit_plan tool registered in new "planning" scope
- _apply_plan() pure function: band-sorted ordering (leaf-first within
  bands), turn map, skip-dir removal
- _default_plan() fallback when planning is skipped or fails
- Plan cached as plan.json for resumed runs

Dynamic turn allocation:
- Priority dirs: 15-20 turns (capped at 25)
- Shallow dirs: 5 turns
- Default: 10 turns
- Skip dirs: excluded entirely
- Orchestrator passes per-dir max_turns to _run_dir_loop()

Quality instrumentation:
- _TokenTracker._loop_turns counts API calls per dir loop
- completeness field (0.0-1.0) added to dir-scope submit_report
- plan_evaluation.json emitted after dir loops comparing plan predictions
  to actual turn utilization, completeness, and confidence
- Turn utilization logged per directory during investigation

Also fixes _get_child_summaries() to distinguish actual leaf directories
from parents whose children have not been investigated yet, replacing
the misleading "this is a leaf directory" placeholder.

26 new tests (260 total, all passing).

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>

											
										
										
											2026-04-12 20:21:49 -06:00
+								    completeness = None
-												refactor(ai): extract _run_dir_loop into three focused helpers (#57)

_run_dir_loop was ~160 lines holding four conceptual layers in one
function: pre-loop setup, budget check + partial-flush, API call +
response printing, and tool dispatch + done detection. Phase 3 dynamic
turn allocation will inject more state into the same code path, so
this debt is paid before that lands.

Three new helpers above _run_dir_loop:

- _build_dir_loop_context(): pure setup. Builds the dir context, child
  summaries, survey block, filtered tool list, system prompt, and seed
  user message. Returns a _DirLoopContext namedtuple.
- _flush_partial_dir_entry(): idempotent partial-cache writer for the
  budget-exceeded path. Returns the partial summary string. Idempotent
  via cache.has_entry() guard, so callers can call it without checking.
- _handle_turn_response(): per-turn response processing. Prints text
  blocks and tool decisions, appends the assistant message, dispatches
  tools (or nudges the agent to call submit_report), appends
  tool_results. Returns (done, summary).

_run_dir_loop is now a ~25-line coordinator: build context, then
for-loop calls budget check, API, and turn handler in sequence.

No behavior change. 164 tests pass. Internals.md §4 updated for the
new structure and the file:line refs that drifted.

											
										
										
											2026-04-11 10:02:21 -06:00
+								    for tu in tool_uses:
 								        if tu.name == "submit_report":
 								            summary = tu.input.get("summary", "")
-												feat(ai): implement Phase 3 investigation planning (#8, #9, #10, #11, #74)

Add a planning pass that runs after survey and before dir loops. The
planner classifies directories into priority/shallow/skip tiers and
allocates turns accordingly, replacing the fixed max_turns=14 per
directory with dynamic allocation from a global budget.

Planning pass:
- _PLANNING_SYSTEM_PROMPT in prompts.py with submit_plan tool
- _run_planning() follows the same single-turn pattern as _run_survey()
- submit_plan tool registered in new "planning" scope
- _apply_plan() pure function: band-sorted ordering (leaf-first within
  bands), turn map, skip-dir removal
- _default_plan() fallback when planning is skipped or fails
- Plan cached as plan.json for resumed runs

Dynamic turn allocation:
- Priority dirs: 15-20 turns (capped at 25)
- Shallow dirs: 5 turns
- Default: 10 turns
- Skip dirs: excluded entirely
- Orchestrator passes per-dir max_turns to _run_dir_loop()

Quality instrumentation:
- _TokenTracker._loop_turns counts API calls per dir loop
- completeness field (0.0-1.0) added to dir-scope submit_report
- plan_evaluation.json emitted after dir loops comparing plan predictions
  to actual turn utilization, completeness, and confidence
- Turn utilization logged per directory during investigation

Also fixes _get_child_summaries() to distinguish actual leaf directories
from parents whose children have not been investigated yet, replacing
the misleading "this is a leaf directory" placeholder.

26 new tests (260 total, all passing).

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>

											
										
										
											2026-04-12 20:21:49 -06:00
+								            try:
 								                completeness = float(tu.input.get("completeness", 0) or 0)
 								            except (TypeError, ValueError):
 								                completeness = None
-												refactor(ai): extract _run_dir_loop into three focused helpers (#57)

_run_dir_loop was ~160 lines holding four conceptual layers in one
function: pre-loop setup, budget check + partial-flush, API call +
response printing, and tool dispatch + done detection. Phase 3 dynamic
turn allocation will inject more state into the same code path, so
this debt is paid before that lands.

Three new helpers above _run_dir_loop:

- _build_dir_loop_context(): pure setup. Builds the dir context, child
  summaries, survey block, filtered tool list, system prompt, and seed
  user message. Returns a _DirLoopContext namedtuple.
- _flush_partial_dir_entry(): idempotent partial-cache writer for the
  budget-exceeded path. Returns the partial summary string. Idempotent
  via cache.has_entry() guard, so callers can call it without checking.
- _handle_turn_response(): per-turn response processing. Prints text
  blocks and tool decisions, appends the assistant message, dispatches
  tools (or nudges the agent to call submit_report), appends
  tool_results. Returns (done, summary).

_run_dir_loop is now a ~25-line coordinator: build context, then
for-loop calls budget check, API, and turn handler in sequence.

No behavior change. 164 tests pass. Internals.md §4 updated for the
new structure and the file:line refs that drifted.

											
										
										
											2026-04-11 10:02:21 -06:00
+								            tool_results.append({
 								                "type": "tool_result",
 								                "tool_use_id": tu.id,
 								                "content": "Summary submitted.",
 								            })
 								            done = True
 								        else:
 								            result_text = _execute_tool(
 								                tu.name, tu.input, target, cache, dir_rel,
 								                turn + 1, verbose=verbose,
 								            )
 								            tool_results.append({
 								                "type": "tool_result",
 								                "tool_use_id": tu.id,
 								                "content": result_text,
 								            })
 								    messages.append({"role": "user", "content": tool_results})
-												feat(ai): implement Phase 3 investigation planning (#8, #9, #10, #11, #74)

Add a planning pass that runs after survey and before dir loops. The
planner classifies directories into priority/shallow/skip tiers and
allocates turns accordingly, replacing the fixed max_turns=14 per
directory with dynamic allocation from a global budget.

Planning pass:
- _PLANNING_SYSTEM_PROMPT in prompts.py with submit_plan tool
- _run_planning() follows the same single-turn pattern as _run_survey()
- submit_plan tool registered in new "planning" scope
- _apply_plan() pure function: band-sorted ordering (leaf-first within
  bands), turn map, skip-dir removal
- _default_plan() fallback when planning is skipped or fails
- Plan cached as plan.json for resumed runs

Dynamic turn allocation:
- Priority dirs: 15-20 turns (capped at 25)
- Shallow dirs: 5 turns
- Default: 10 turns
- Skip dirs: excluded entirely
- Orchestrator passes per-dir max_turns to _run_dir_loop()

Quality instrumentation:
- _TokenTracker._loop_turns counts API calls per dir loop
- completeness field (0.0-1.0) added to dir-scope submit_report
- plan_evaluation.json emitted after dir loops comparing plan predictions
  to actual turn utilization, completeness, and confidence
- Turn utilization logged per directory during investigation

Also fixes _get_child_summaries() to distinguish actual leaf directories
from parents whose children have not been investigated yet, replacing
the misleading "this is a leaf directory" placeholder.

26 new tests (260 total, all passing).

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>

											
										
										
											2026-04-12 20:21:49 -06:00
+								    return done, summary, completeness
-												refactor(ai): extract _run_dir_loop into three focused helpers (#57)

_run_dir_loop was ~160 lines holding four conceptual layers in one
function: pre-loop setup, budget check + partial-flush, API call +
response printing, and tool dispatch + done detection. Phase 3 dynamic
turn allocation will inject more state into the same code path, so
this debt is paid before that lands.

Three new helpers above _run_dir_loop:

- _build_dir_loop_context(): pure setup. Builds the dir context, child
  summaries, survey block, filtered tool list, system prompt, and seed
  user message. Returns a _DirLoopContext namedtuple.
- _flush_partial_dir_entry(): idempotent partial-cache writer for the
  budget-exceeded path. Returns the partial summary string. Idempotent
  via cache.has_entry() guard, so callers can call it without checking.
- _handle_turn_response(): per-turn response processing. Prints text
  blocks and tool decisions, appends the assistant message, dispatches
  tools (or nudges the agent to call submit_report), appends
  tool_results. Returns (done, summary).

_run_dir_loop is now a ~25-line coordinator: build context, then
for-loop calls budget check, API, and turn handler in sequence.

No behavior change. 164 tests pass. Internals.md §4 updated for the
new structure and the file:line refs that drifted.

											
										
										
											2026-04-11 10:02:21 -06:00
 								def _run_dir_loop(client, target, cache, tracker, dir_path, max_turns=14,
 								                  verbose=False, survey=None):
-												feat(ai): implement Phase 3 investigation planning (#8, #9, #10, #11, #74)

Add a planning pass that runs after survey and before dir loops. The
planner classifies directories into priority/shallow/skip tiers and
allocates turns accordingly, replacing the fixed max_turns=14 per
directory with dynamic allocation from a global budget.

Planning pass:
- _PLANNING_SYSTEM_PROMPT in prompts.py with submit_plan tool
- _run_planning() follows the same single-turn pattern as _run_survey()
- submit_plan tool registered in new "planning" scope
- _apply_plan() pure function: band-sorted ordering (leaf-first within
  bands), turn map, skip-dir removal
- _default_plan() fallback when planning is skipped or fails
- Plan cached as plan.json for resumed runs

Dynamic turn allocation:
- Priority dirs: 15-20 turns (capped at 25)
- Shallow dirs: 5 turns
- Default: 10 turns
- Skip dirs: excluded entirely
- Orchestrator passes per-dir max_turns to _run_dir_loop()

Quality instrumentation:
- _TokenTracker._loop_turns counts API calls per dir loop
- completeness field (0.0-1.0) added to dir-scope submit_report
- plan_evaluation.json emitted after dir loops comparing plan predictions
  to actual turn utilization, completeness, and confidence
- Turn utilization logged per directory during investigation

Also fixes _get_child_summaries() to distinguish actual leaf directories
from parents whose children have not been investigated yet, replacing
the misleading "this is a leaf directory" placeholder.

26 new tests (260 total, all passing).

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>

											
										
										
											2026-04-12 20:21:49 -06:00
+								    """Run an isolated agent loop for a single directory.
 								    Returns (summary, completeness) where completeness is the agent's
 								    self-rated investigation thoroughness (0.0-1.0), or None if not reported.
 								    """
-												refactor(ai): extract _run_dir_loop into three focused helpers (#57)

_run_dir_loop was ~160 lines holding four conceptual layers in one
function: pre-loop setup, budget check + partial-flush, API call +
response printing, and tool dispatch + done detection. Phase 3 dynamic
turn allocation will inject more state into the same code path, so
this debt is paid before that lands.

Three new helpers above _run_dir_loop:

- _build_dir_loop_context(): pure setup. Builds the dir context, child
  summaries, survey block, filtered tool list, system prompt, and seed
  user message. Returns a _DirLoopContext namedtuple.
- _flush_partial_dir_entry(): idempotent partial-cache writer for the
  budget-exceeded path. Returns the partial summary string. Idempotent
  via cache.has_entry() guard, so callers can call it without checking.
- _handle_turn_response(): per-turn response processing. Prints text
  blocks and tool decisions, appends the assistant message, dispatches
  tools (or nudges the agent to call submit_report), appends
  tool_results. Returns (done, summary).

_run_dir_loop is now a ~25-line coordinator: build context, then
for-loop calls budget check, API, and turn handler in sequence.

No behavior change. 164 tests pass. Internals.md §4 updated for the
new structure and the file:line refs that drifted.

											
										
										
											2026-04-11 10:02:21 -06:00
+								    ctx = _build_dir_loop_context(
 								        dir_path, target, cache, survey, max_turns,
 								    )
-												refactor: replace single-shot API with multi-pass agentic investigation

Rewrites ai.py from a single Claude API call into a multi-pass,
cache-driven agent architecture:

- Per-directory isolated agent loops (max 10 turns each) with context
  discarded between directories
- Leaves-first processing order so child summaries inform parents
- Disk cache (/tmp/luminos/{uuid}/) persists across runs for resumability
- Investigation ID persistence keyed by target realpath
- Separate synthesis pass reads only directory-level cache entries
- Replaces urllib with Anthropic SDK (streaming, automatic retries)
- Token counting with 70% context budget threshold for early exit
- parse_structure tool via tree-sitter (Python, JS, Rust, Go)
- python-magic integration for MIME-aware directory listings
- Cost tracking printed at end of investigation

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>

											
										
										
											2026-03-30 12:13:55 -06:00
+								    summary = None
-												feat(ai): implement Phase 3 investigation planning (#8, #9, #10, #11, #74)

Add a planning pass that runs after survey and before dir loops. The
planner classifies directories into priority/shallow/skip tiers and
allocates turns accordingly, replacing the fixed max_turns=14 per
directory with dynamic allocation from a global budget.

Planning pass:
- _PLANNING_SYSTEM_PROMPT in prompts.py with submit_plan tool
- _run_planning() follows the same single-turn pattern as _run_survey()
- submit_plan tool registered in new "planning" scope
- _apply_plan() pure function: band-sorted ordering (leaf-first within
  bands), turn map, skip-dir removal
- _default_plan() fallback when planning is skipped or fails
- Plan cached as plan.json for resumed runs

Dynamic turn allocation:
- Priority dirs: 15-20 turns (capped at 25)
- Shallow dirs: 5 turns
- Default: 10 turns
- Skip dirs: excluded entirely
- Orchestrator passes per-dir max_turns to _run_dir_loop()

Quality instrumentation:
- _TokenTracker._loop_turns counts API calls per dir loop
- completeness field (0.0-1.0) added to dir-scope submit_report
- plan_evaluation.json emitted after dir loops comparing plan predictions
  to actual turn utilization, completeness, and confidence
- Turn utilization logged per directory during investigation

Also fixes _get_child_summaries() to distinguish actual leaf directories
from parents whose children have not been investigated yet, replacing
the misleading "this is a leaf directory" placeholder.

26 new tests (260 total, all passing).

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>

											
										
										
											2026-04-12 20:21:49 -06:00
+								    completeness = None
-												refactor: replace single-shot API with multi-pass agentic investigation

Rewrites ai.py from a single Claude API call into a multi-pass,
cache-driven agent architecture:

- Per-directory isolated agent loops (max 10 turns each) with context
  discarded between directories
- Leaves-first processing order so child summaries inform parents
- Disk cache (/tmp/luminos/{uuid}/) persists across runs for resumability
- Investigation ID persistence keyed by target realpath
- Separate synthesis pass reads only directory-level cache entries
- Replaces urllib with Anthropic SDK (streaming, automatic retries)
- Token counting with 70% context budget threshold for early exit
- parse_structure tool via tree-sitter (Python, JS, Rust, Go)
- python-magic integration for MIME-aware directory listings
- Cost tracking printed at end of investigation

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>

											
										
										
											2026-03-30 12:13:55 -06:00
 								    for turn in range(max_turns):
 								        if tracker.budget_exceeded():
 								            print(f"  [AI]   Context budget reached — exiting early "
-												fix(ai): correct context budget metric — track per-call, not sum (#44)

The dir loop was exiting early on small targets (a 13-file Python lib
hit the budget at 92k–139k cumulative tokens) because _TokenTracker
compared the SUM of input_tokens across all turns to the context
window size. input_tokens from each API response is the size of the
full prompt sent on that turn (system + every prior message + new
tool results), so summing across turns multi-counts everything. The
real per-call context size never approached the limit.

Verified empirically: on luminos_lib pre-fix, the loop bailed when
the most recent call's input_tokens was 20,535 (~10% of Sonnet's
200k window) but the cumulative sum was 134,983.

Changes:
- _TokenTracker now tracks last_input (the most recent call's
  input_tokens), separate from the cumulative loop_input/total_input
  used for cost reporting.
- budget_exceeded() returns last_input > CONTEXT_BUDGET, not the
  cumulative sum.
- MAX_CONTEXT bumped from 180_000 to 200_000 (Sonnet 4's real
  context window). CONTEXT_BUDGET stays at 70% = 140,000.
- Early-exit message now shows context size, threshold, AND
  cumulative spend separately so future debugging is unambiguous.

Smoke test on luminos_lib: investigation completes without early
exit (~$0.37). 6 unit tests added covering the new semantics,
including the key regression: a sequence of small calls whose sum
exceeds the budget must NOT trip the check.

Wiki Architecture page updated.

#51 filed for the separate message-history-growth issue.

											
										
										
											2026-04-06 22:49:25 -06:00
+								                  f"(context size {tracker.last_input:,} > "
 								                  f"{CONTEXT_BUDGET:,} budget; "
 								                  f"loop spend {tracker.loop_total:,} tokens)",
 								                  file=sys.stderr)
-												refactor(ai): extract _run_dir_loop into three focused helpers (#57)

_run_dir_loop was ~160 lines holding four conceptual layers in one
function: pre-loop setup, budget check + partial-flush, API call +
response printing, and tool dispatch + done detection. Phase 3 dynamic
turn allocation will inject more state into the same code path, so
this debt is paid before that lands.

Three new helpers above _run_dir_loop:

- _build_dir_loop_context(): pure setup. Builds the dir context, child
  summaries, survey block, filtered tool list, system prompt, and seed
  user message. Returns a _DirLoopContext namedtuple.
- _flush_partial_dir_entry(): idempotent partial-cache writer for the
  budget-exceeded path. Returns the partial summary string. Idempotent
  via cache.has_entry() guard, so callers can call it without checking.
- _handle_turn_response(): per-turn response processing. Prints text
  blocks and tool decisions, appends the assistant message, dispatches
  tools (or nudges the agent to call submit_report), appends
  tool_results. Returns (done, summary).

_run_dir_loop is now a ~25-line coordinator: build context, then
for-loop calls budget check, API, and turn handler in sequence.

No behavior change. 164 tests pass. Internals.md §4 updated for the
new structure and the file:line refs that drifted.

											
										
										
											2026-04-11 10:02:21 -06:00
+								            partial = _flush_partial_dir_entry(dir_path, target, cache)
 								            if partial and not summary:
 								                summary = partial
-												refactor: replace single-shot API with multi-pass agentic investigation

Rewrites ai.py from a single Claude API call into a multi-pass,
cache-driven agent architecture:

- Per-directory isolated agent loops (max 10 turns each) with context
  discarded between directories
- Leaves-first processing order so child summaries inform parents
- Disk cache (/tmp/luminos/{uuid}/) persists across runs for resumability
- Investigation ID persistence keyed by target realpath
- Separate synthesis pass reads only directory-level cache entries
- Replaces urllib with Anthropic SDK (streaming, automatic retries)
- Token counting with 70% context budget threshold for early exit
- parse_structure tool via tree-sitter (Python, JS, Rust, Go)
- python-magic integration for MIME-aware directory listings
- Cost tracking printed at end of investigation

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>

											
										
										
											2026-03-30 12:13:55 -06:00
+								            break
 								        try:
-												refactor(ai): extract _run_dir_loop into three focused helpers (#57)

_run_dir_loop was ~160 lines holding four conceptual layers in one
function: pre-loop setup, budget check + partial-flush, API call +
response printing, and tool dispatch + done detection. Phase 3 dynamic
turn allocation will inject more state into the same code path, so
this debt is paid before that lands.

Three new helpers above _run_dir_loop:

- _build_dir_loop_context(): pure setup. Builds the dir context, child
  summaries, survey block, filtered tool list, system prompt, and seed
  user message. Returns a _DirLoopContext namedtuple.
- _flush_partial_dir_entry(): idempotent partial-cache writer for the
  budget-exceeded path. Returns the partial summary string. Idempotent
  via cache.has_entry() guard, so callers can call it without checking.
- _handle_turn_response(): per-turn response processing. Prints text
  blocks and tool decisions, appends the assistant message, dispatches
  tools (or nudges the agent to call submit_report), appends
  tool_results. Returns (done, summary).

_run_dir_loop is now a ~25-line coordinator: build context, then
for-loop calls budget check, API, and turn handler in sequence.

No behavior change. 164 tests pass. Internals.md §4 updated for the
new structure and the file:line refs that drifted.

											
										
										
											2026-04-11 10:02:21 -06:00
+								            content_blocks, _usage = _call_api_streaming(
 								                client, ctx.system, ctx.messages, ctx.dir_tools, tracker,
-												refactor: replace single-shot API with multi-pass agentic investigation

Rewrites ai.py from a single Claude API call into a multi-pass,
cache-driven agent architecture:

- Per-directory isolated agent loops (max 10 turns each) with context
  discarded between directories
- Leaves-first processing order so child summaries inform parents
- Disk cache (/tmp/luminos/{uuid}/) persists across runs for resumability
- Investigation ID persistence keyed by target realpath
- Separate synthesis pass reads only directory-level cache entries
- Replaces urllib with Anthropic SDK (streaming, automatic retries)
- Token counting with 70% context budget threshold for early exit
- parse_structure tool via tree-sitter (Python, JS, Rust, Go)
- python-magic integration for MIME-aware directory listings
- Cost tracking printed at end of investigation

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>

											
										
										
											2026-03-30 12:13:55 -06:00
+								            )
 								        except anthropic.APIError as e:
 								            print(f"  [AI]   API error: {e}", file=sys.stderr)
 								            break
-												feat(ai): implement Phase 3 investigation planning (#8, #9, #10, #11, #74)

Add a planning pass that runs after survey and before dir loops. The
planner classifies directories into priority/shallow/skip tiers and
allocates turns accordingly, replacing the fixed max_turns=14 per
directory with dynamic allocation from a global budget.

Planning pass:
- _PLANNING_SYSTEM_PROMPT in prompts.py with submit_plan tool
- _run_planning() follows the same single-turn pattern as _run_survey()
- submit_plan tool registered in new "planning" scope
- _apply_plan() pure function: band-sorted ordering (leaf-first within
  bands), turn map, skip-dir removal
- _default_plan() fallback when planning is skipped or fails
- Plan cached as plan.json for resumed runs

Dynamic turn allocation:
- Priority dirs: 15-20 turns (capped at 25)
- Shallow dirs: 5 turns
- Default: 10 turns
- Skip dirs: excluded entirely
- Orchestrator passes per-dir max_turns to _run_dir_loop()

Quality instrumentation:
- _TokenTracker._loop_turns counts API calls per dir loop
- completeness field (0.0-1.0) added to dir-scope submit_report
- plan_evaluation.json emitted after dir loops comparing plan predictions
  to actual turn utilization, completeness, and confidence
- Turn utilization logged per directory during investigation

Also fixes _get_child_summaries() to distinguish actual leaf directories
from parents whose children have not been investigated yet, replacing
the misleading "this is a leaf directory" placeholder.

26 new tests (260 total, all passing).

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>

											
										
										
											2026-04-12 20:21:49 -06:00
+								        done, turn_summary, turn_completeness = _handle_turn_response(
-												refactor(ai): extract _run_dir_loop into three focused helpers (#57)

_run_dir_loop was ~160 lines holding four conceptual layers in one
function: pre-loop setup, budget check + partial-flush, API call +
response printing, and tool dispatch + done detection. Phase 3 dynamic
turn allocation will inject more state into the same code path, so
this debt is paid before that lands.

Three new helpers above _run_dir_loop:

- _build_dir_loop_context(): pure setup. Builds the dir context, child
  summaries, survey block, filtered tool list, system prompt, and seed
  user message. Returns a _DirLoopContext namedtuple.
- _flush_partial_dir_entry(): idempotent partial-cache writer for the
  budget-exceeded path. Returns the partial summary string. Idempotent
  via cache.has_entry() guard, so callers can call it without checking.
- _handle_turn_response(): per-turn response processing. Prints text
  blocks and tool decisions, appends the assistant message, dispatches
  tools (or nudges the agent to call submit_report), appends
  tool_results. Returns (done, summary).

_run_dir_loop is now a ~25-line coordinator: build context, then
for-loop calls budget check, API, and turn handler in sequence.

No behavior change. 164 tests pass. Internals.md §4 updated for the
new structure and the file:line refs that drifted.

											
										
										
											2026-04-11 10:02:21 -06:00
+								            content_blocks, ctx.messages, target, cache,
 								            ctx.dir_rel, turn, verbose,
 								        )
 								        if turn_summary is not None:
 								            summary = turn_summary
-												feat(ai): implement Phase 3 investigation planning (#8, #9, #10, #11, #74)

Add a planning pass that runs after survey and before dir loops. The
planner classifies directories into priority/shallow/skip tiers and
allocates turns accordingly, replacing the fixed max_turns=14 per
directory with dynamic allocation from a global budget.

Planning pass:
- _PLANNING_SYSTEM_PROMPT in prompts.py with submit_plan tool
- _run_planning() follows the same single-turn pattern as _run_survey()
- submit_plan tool registered in new "planning" scope
- _apply_plan() pure function: band-sorted ordering (leaf-first within
  bands), turn map, skip-dir removal
- _default_plan() fallback when planning is skipped or fails
- Plan cached as plan.json for resumed runs

Dynamic turn allocation:
- Priority dirs: 15-20 turns (capped at 25)
- Shallow dirs: 5 turns
- Default: 10 turns
- Skip dirs: excluded entirely
- Orchestrator passes per-dir max_turns to _run_dir_loop()

Quality instrumentation:
- _TokenTracker._loop_turns counts API calls per dir loop
- completeness field (0.0-1.0) added to dir-scope submit_report
- plan_evaluation.json emitted after dir loops comparing plan predictions
  to actual turn utilization, completeness, and confidence
- Turn utilization logged per directory during investigation

Also fixes _get_child_summaries() to distinguish actual leaf directories
from parents whose children have not been investigated yet, replacing
the misleading "this is a leaf directory" placeholder.

26 new tests (260 total, all passing).

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>

											
										
										
											2026-04-12 20:21:49 -06:00
+								        if turn_completeness is not None:
 								            completeness = turn_completeness
-												refactor: replace single-shot API with multi-pass agentic investigation

Rewrites ai.py from a single Claude API call into a multi-pass,
cache-driven agent architecture:

- Per-directory isolated agent loops (max 10 turns each) with context
  discarded between directories
- Leaves-first processing order so child summaries inform parents
- Disk cache (/tmp/luminos/{uuid}/) persists across runs for resumability
- Investigation ID persistence keyed by target realpath
- Separate synthesis pass reads only directory-level cache entries
- Replaces urllib with Anthropic SDK (streaming, automatic retries)
- Token counting with 70% context budget threshold for early exit
- parse_structure tool via tree-sitter (Python, JS, Rust, Go)
- python-magic integration for MIME-aware directory listings
- Cost tracking printed at end of investigation

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>

											
										
										
											2026-03-30 12:13:55 -06:00
+								        if done:
 								            break
 								    else:
-												refactor(ai): extract _run_dir_loop into three focused helpers (#57)

_run_dir_loop was ~160 lines holding four conceptual layers in one
function: pre-loop setup, budget check + partial-flush, API call +
response printing, and tool dispatch + done detection. Phase 3 dynamic
turn allocation will inject more state into the same code path, so
this debt is paid before that lands.

Three new helpers above _run_dir_loop:

- _build_dir_loop_context(): pure setup. Builds the dir context, child
  summaries, survey block, filtered tool list, system prompt, and seed
  user message. Returns a _DirLoopContext namedtuple.
- _flush_partial_dir_entry(): idempotent partial-cache writer for the
  budget-exceeded path. Returns the partial summary string. Idempotent
  via cache.has_entry() guard, so callers can call it without checking.
- _handle_turn_response(): per-turn response processing. Prints text
  blocks and tool decisions, appends the assistant message, dispatches
  tools (or nudges the agent to call submit_report), appends
  tool_results. Returns (done, summary).

_run_dir_loop is now a ~25-line coordinator: build context, then
for-loop calls budget check, API, and turn handler in sequence.

No behavior change. 164 tests pass. Internals.md §4 updated for the
new structure and the file:line refs that drifted.

											
										
										
											2026-04-11 10:02:21 -06:00
+								        print(f"  [AI]   Warning: max turns reached for {ctx.dir_rel}",
-												refactor: replace single-shot API with multi-pass agentic investigation

Rewrites ai.py from a single Claude API call into a multi-pass,
cache-driven agent architecture:

- Per-directory isolated agent loops (max 10 turns each) with context
  discarded between directories
- Leaves-first processing order so child summaries inform parents
- Disk cache (/tmp/luminos/{uuid}/) persists across runs for resumability
- Investigation ID persistence keyed by target realpath
- Separate synthesis pass reads only directory-level cache entries
- Replaces urllib with Anthropic SDK (streaming, automatic retries)
- Token counting with 70% context budget threshold for early exit
- parse_structure tool via tree-sitter (Python, JS, Rust, Go)
- python-magic integration for MIME-aware directory listings
- Cost tracking printed at end of investigation

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>

											
										
										
											2026-03-30 12:13:55 -06:00
+								              file=sys.stderr)
-												feat(ai): implement Phase 3 investigation planning (#8, #9, #10, #11, #74)

Add a planning pass that runs after survey and before dir loops. The
planner classifies directories into priority/shallow/skip tiers and
allocates turns accordingly, replacing the fixed max_turns=14 per
directory with dynamic allocation from a global budget.

Planning pass:
- _PLANNING_SYSTEM_PROMPT in prompts.py with submit_plan tool
- _run_planning() follows the same single-turn pattern as _run_survey()
- submit_plan tool registered in new "planning" scope
- _apply_plan() pure function: band-sorted ordering (leaf-first within
  bands), turn map, skip-dir removal
- _default_plan() fallback when planning is skipped or fails
- Plan cached as plan.json for resumed runs

Dynamic turn allocation:
- Priority dirs: 15-20 turns (capped at 25)
- Shallow dirs: 5 turns
- Default: 10 turns
- Skip dirs: excluded entirely
- Orchestrator passes per-dir max_turns to _run_dir_loop()

Quality instrumentation:
- _TokenTracker._loop_turns counts API calls per dir loop
- completeness field (0.0-1.0) added to dir-scope submit_report
- plan_evaluation.json emitted after dir loops comparing plan predictions
  to actual turn utilization, completeness, and confidence
- Turn utilization logged per directory during investigation

Also fixes _get_child_summaries() to distinguish actual leaf directories
from parents whose children have not been investigated yet, replacing
the misleading "this is a leaf directory" placeholder.

26 new tests (260 total, all passing).

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>

											
										
										
											2026-04-12 20:21:49 -06:00
+								    return summary, completeness
-												refactor: replace single-shot API with multi-pass agentic investigation

Rewrites ai.py from a single Claude API call into a multi-pass,
cache-driven agent architecture:

- Per-directory isolated agent loops (max 10 turns each) with context
  discarded between directories
- Leaves-first processing order so child summaries inform parents
- Disk cache (/tmp/luminos/{uuid}/) persists across runs for resumability
- Investigation ID persistence keyed by target realpath
- Separate synthesis pass reads only directory-level cache entries
- Replaces urllib with Anthropic SDK (streaming, automatic retries)
- Token counting with 70% context budget threshold for early exit
- parse_structure tool via tree-sitter (Python, JS, Rust, Go)
- python-magic integration for MIME-aware directory listings
- Cost tracking printed at end of investigation

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>

											
										
										
											2026-03-30 12:13:55 -06:00
 								def _block_to_dict(block):
 								    """Convert an SDK content block to a plain dict for message history."""
 								    if block.type == "text":
 								        return {"type": "text", "text": block.text}
 								    elif block.type == "tool_use":
 								        return {"type": "tool_use", "id": block.id,
 								                "name": block.name, "input": block.input}
 								    return {"type": block.type}
 								# ---------------------------------------------------------------------------
 								# Synthesis pass
 								# ---------------------------------------------------------------------------
-												feat(filetypes): expose raw signals to survey, remove classifier bias (#42)

The survey pass no longer receives the bucketed file_categories
histogram, which was biased toward source-code targets and would
mislabel mail, notebooks, ledgers, and other non-code domains as
"source" via the file --brief "text" pattern fallback.

Adds filetypes.survey_signals(), which assembles raw signals from
the same `classified` data the bucketer already processes — no new
walks, no new dependencies:
  total_files       — total count
  extension_histogram — top 20 extensions, raw, no taxonomy
  file_descriptions   — top 20 `file --brief` outputs, by count
  filename_samples    — 20 names, evenly drawn (not first-20)

`survey --brief` descriptions are truncated at 80 chars before
counting so prefixes group correctly without exploding key cardinality.

The Band-Aid in _SURVEY_SYSTEM_PROMPT (warning the LLM that the
histogram was biased toward source code) is removed and replaced
with neutral guidance on how to read the raw signals together.
The {file_type_distribution} placeholder is renamed to
{survey_signals} to reflect the broader content.

luminos.py base scan computes survey_signals once and stores it on
report["survey_signals"]; AI consumers read from there.

summarize_categories() and report["file_categories"] are unchanged
— the terminal report still uses the bucketed view (#49 tracks
fixing that follow-up).

Smoke tested on two targets:
- luminos_lib: identical-quality survey ("Python library package",
  confidence 0.85), unchanged behavior on code targets.
- A synthetic Maildir of 8 messages with `:2,S` flag suffixes:
  survey now correctly identifies it as "A Maildir-format mailbox
  containing 8 email messages" with confidence 0.90, names the
  Maildir naming convention in domain_notes, and correctly marks
  parse_structure as a skip tool. Before #42 this would have been
  "8 source files."

Adds 8 unit tests for survey_signals covering empty input, extension
histogram, description aggregation/truncation, top-N cap, and
even-stride filename sampling.

#48 tracks the unit-of-analysis limitation (file is the wrong unit
for mbox, SQLite, archives, notebooks) — explicitly out of scope
for #42 and documented in survey_signals' docstring.

											
										
										
											2026-04-06 22:36:14 -06:00
+								def _format_survey_signals(signals):
 								    """Render the survey_signals dict as a labeled text block."""
 								    if not signals or not signals.get("total_files"):
 								        return "(no files classified)"
 								    lines = [f"Total files: {signals.get('total_files', 0)}", ""]
 								    ext_hist = signals.get("extension_histogram") or {}
 								    if ext_hist:
 								        lines.append("Extensions (top, by count):")
 								        for ext, n in ext_hist.items():
 								            lines.append(f"  {ext}: {n}")
 								        lines.append("")
 								    descs = signals.get("file_descriptions") or {}
 								    if descs:
 								        lines.append("file --brief output (top, by count):")
 								        for desc, n in descs.items():
 								            lines.append(f"  {desc}: {n}")
 								        lines.append("")
 								    samples = signals.get("filename_samples") or []
 								    if samples:
 								        lines.append("Filename samples (evenly drawn):")
 								        for name in samples:
 								            lines.append(f"  {name}")
 								    return "\n".join(lines).rstrip()
-												feat(ai): add _run_survey() and submit_survey tool (#5)

Adds the reconnaissance survey pass: a fast, ≤3-turn LLM call that
characterizes the target before any directory investigation begins.
The survey receives the file-type distribution (from the base scan),
a top-2-level tree preview, and the list of available dir-loop tools,
and returns description / approach / relevant_tools / skip_tools /
domain_notes / confidence via a single submit_survey tool call.

Wired into _run_investigation() before the directory loop. Output is
logged but not yet consumed — that wiring is #6. Survey failure is
non-fatal: if the call errors or runs out of turns, the investigation
proceeds without survey context.

Also adds a Band-Aid to _SURVEY_SYSTEM_PROMPT warning the LLM that
the file-type histogram is biased toward source code (the underlying
classifier has no concept of mail, notebooks, ledgers, etc.) and to
trust the tree preview when they conflict. The proper fix is #42.

											
										
										
											2026-04-06 21:49:59 -06:00
+								def _run_survey(client, target, report, tracker, max_turns=3, verbose=False):
 								    """Run the reconnaissance survey pass.
 								    Returns a survey dict on success, or None on failure / out-of-turns.
 								    Survey is advisory — callers must treat None as "no survey context".
 								    """
-												feat(filetypes): expose raw signals to survey, remove classifier bias (#42)

The survey pass no longer receives the bucketed file_categories
histogram, which was biased toward source-code targets and would
mislabel mail, notebooks, ledgers, and other non-code domains as
"source" via the file --brief "text" pattern fallback.

Adds filetypes.survey_signals(), which assembles raw signals from
the same `classified` data the bucketer already processes — no new
walks, no new dependencies:
  total_files       — total count
  extension_histogram — top 20 extensions, raw, no taxonomy
  file_descriptions   — top 20 `file --brief` outputs, by count
  filename_samples    — 20 names, evenly drawn (not first-20)

`survey --brief` descriptions are truncated at 80 chars before
counting so prefixes group correctly without exploding key cardinality.

The Band-Aid in _SURVEY_SYSTEM_PROMPT (warning the LLM that the
histogram was biased toward source code) is removed and replaced
with neutral guidance on how to read the raw signals together.
The {file_type_distribution} placeholder is renamed to
{survey_signals} to reflect the broader content.

luminos.py base scan computes survey_signals once and stores it on
report["survey_signals"]; AI consumers read from there.

summarize_categories() and report["file_categories"] are unchanged
— the terminal report still uses the bucketed view (#49 tracks
fixing that follow-up).

Smoke tested on two targets:
- luminos_lib: identical-quality survey ("Python library package",
  confidence 0.85), unchanged behavior on code targets.
- A synthetic Maildir of 8 messages with `:2,S` flag suffixes:
  survey now correctly identifies it as "A Maildir-format mailbox
  containing 8 email messages" with confidence 0.90, names the
  Maildir naming convention in domain_notes, and correctly marks
  parse_structure as a skip tool. Before #42 this would have been
  "8 source files."

Adds 8 unit tests for survey_signals covering empty input, extension
histogram, description aggregation/truncation, top-N cap, and
even-stride filename sampling.

#48 tracks the unit-of-analysis limitation (file is the wrong unit
for mbox, SQLite, archives, notebooks) — explicitly out of scope
for #42 and documented in survey_signals' docstring.

											
										
										
											2026-04-06 22:36:14 -06:00
+								    signals = report.get("survey_signals") or {}
 								    survey_signals_text = _format_survey_signals(signals)
-												feat(ai): add _run_survey() and submit_survey tool (#5)

Adds the reconnaissance survey pass: a fast, ≤3-turn LLM call that
characterizes the target before any directory investigation begins.
The survey receives the file-type distribution (from the base scan),
a top-2-level tree preview, and the list of available dir-loop tools,
and returns description / approach / relevant_tools / skip_tools /
domain_notes / confidence via a single submit_survey tool call.

Wired into _run_investigation() before the directory loop. Output is
logged but not yet consumed — that wiring is #6. Survey failure is
non-fatal: if the call errors or runs out of turns, the investigation
proceeds without survey context.

Also adds a Band-Aid to _SURVEY_SYSTEM_PROMPT warning the LLM that
the file-type histogram is biased toward source code (the underlying
classifier has no concept of mail, notebooks, ledgers, etc.) and to
trust the tree preview when they conflict. The proper fix is #42.

											
										
										
											2026-04-06 21:49:59 -06:00
 								    try:
 								        tree_node = build_tree(target, max_depth=2)
 								        tree_preview = render_tree(tree_node)
 								    except Exception:
 								        tree_preview = "(tree unavailable)"
 								    tool_names = [t["name"] for t in _DIR_TOOLS if t["name"] != "submit_report"]
 								    available_tools = ", ".join(tool_names)
 								    system = _SURVEY_SYSTEM_PROMPT.format(
 								        target=target,
-												feat(filetypes): expose raw signals to survey, remove classifier bias (#42)

The survey pass no longer receives the bucketed file_categories
histogram, which was biased toward source-code targets and would
mislabel mail, notebooks, ledgers, and other non-code domains as
"source" via the file --brief "text" pattern fallback.

Adds filetypes.survey_signals(), which assembles raw signals from
the same `classified` data the bucketer already processes — no new
walks, no new dependencies:
  total_files       — total count
  extension_histogram — top 20 extensions, raw, no taxonomy
  file_descriptions   — top 20 `file --brief` outputs, by count
  filename_samples    — 20 names, evenly drawn (not first-20)

`survey --brief` descriptions are truncated at 80 chars before
counting so prefixes group correctly without exploding key cardinality.

The Band-Aid in _SURVEY_SYSTEM_PROMPT (warning the LLM that the
histogram was biased toward source code) is removed and replaced
with neutral guidance on how to read the raw signals together.
The {file_type_distribution} placeholder is renamed to
{survey_signals} to reflect the broader content.

luminos.py base scan computes survey_signals once and stores it on
report["survey_signals"]; AI consumers read from there.

summarize_categories() and report["file_categories"] are unchanged
— the terminal report still uses the bucketed view (#49 tracks
fixing that follow-up).

Smoke tested on two targets:
- luminos_lib: identical-quality survey ("Python library package",
  confidence 0.85), unchanged behavior on code targets.
- A synthetic Maildir of 8 messages with `:2,S` flag suffixes:
  survey now correctly identifies it as "A Maildir-format mailbox
  containing 8 email messages" with confidence 0.90, names the
  Maildir naming convention in domain_notes, and correctly marks
  parse_structure as a skip tool. Before #42 this would have been
  "8 source files."

Adds 8 unit tests for survey_signals covering empty input, extension
histogram, description aggregation/truncation, top-N cap, and
even-stride filename sampling.

#48 tracks the unit-of-analysis limitation (file is the wrong unit
for mbox, SQLite, archives, notebooks) — explicitly out of scope
for #42 and documented in survey_signals' docstring.

											
										
										
											2026-04-06 22:36:14 -06:00
+								        survey_signals=survey_signals_text,
-												feat(ai): add _run_survey() and submit_survey tool (#5)

Adds the reconnaissance survey pass: a fast, ≤3-turn LLM call that
characterizes the target before any directory investigation begins.
The survey receives the file-type distribution (from the base scan),
a top-2-level tree preview, and the list of available dir-loop tools,
and returns description / approach / relevant_tools / skip_tools /
domain_notes / confidence via a single submit_survey tool call.

Wired into _run_investigation() before the directory loop. Output is
logged but not yet consumed — that wiring is #6. Survey failure is
non-fatal: if the call errors or runs out of turns, the investigation
proceeds without survey context.

Also adds a Band-Aid to _SURVEY_SYSTEM_PROMPT warning the LLM that
the file-type histogram is biased toward source code (the underlying
classifier has no concept of mail, notebooks, ledgers, etc.) and to
trust the tree preview when they conflict. The proper fix is #42.

											
										
										
											2026-04-06 21:49:59 -06:00
+								        tree_preview=tree_preview,
 								        available_tools=available_tools,
 								    )
 								    messages = [
 								        {
 								            "role": "user",
 								            "content": (
 								                "All inputs are in the system prompt above. Call "
 								                "submit_survey now — no other tool calls needed."
 								            ),
 								        },
 								    ]
 								    survey = None
 								    for turn in range(max_turns):
 								        try:
 								            content_blocks, _usage = _call_api_streaming(
 								                client, system, messages, _SURVEY_TOOLS, tracker,
 								            )
 								        except anthropic.APIError as e:
 								            print(f"  [AI]   API error: {e}", file=sys.stderr)
 								            return None
 								        for b in content_blocks:
 								            if b.type == "text" and b.text.strip():
 								                for line in b.text.strip().split("\n"):
 								                    print(f"  [AI]   {line}", file=sys.stderr)
 								        tool_uses = [b for b in content_blocks if b.type == "tool_use"]
 								        for tu in tool_uses:
 								            arg_summary = ", ".join(
 								                f"{k}={v!r}" for k, v in tu.input.items()
 								            ) if tu.input else ""
 								            print(f"  [AI]   -> {tu.name}({arg_summary})", file=sys.stderr)
 								        messages.append({
 								            "role": "assistant",
 								            "content": [_block_to_dict(b) for b in content_blocks],
 								        })
 								        if not tool_uses:
 								            messages.append({
 								                "role": "user",
 								                "content": "Please call submit_survey.",
 								            })
 								            continue
 								        tool_results = []
 								        done = False
 								        for tu in tool_uses:
 								            if tu.name == "submit_survey":
 								                survey = {
 								                    "description": tu.input.get("description", ""),
 								                    "approach": tu.input.get("approach", ""),
 								                    "relevant_tools": tu.input.get("relevant_tools", []) or [],
 								                    "skip_tools": tu.input.get("skip_tools", []) or [],
 								                    "domain_notes": tu.input.get("domain_notes", ""),
 								                    "confidence": float(tu.input.get("confidence", 0.0) or 0.0),
 								                }
 								                tool_results.append({
 								                    "type": "tool_result",
 								                    "tool_use_id": tu.id,
 								                    "content": "Survey received. Thank you.",
 								                })
 								                done = True
 								            else:
 								                tool_results.append({
 								                    "type": "tool_result",
 								                    "tool_use_id": tu.id,
 								                    "content": "Unknown tool. Call submit_survey.",
 								                    "is_error": True,
 								                })
 								        messages.append({"role": "user", "content": tool_results})
 								        if done:
 								            break
 								    else:
 								        print("  [AI] Warning: survey ran out of turns.", file=sys.stderr)
 								    return survey
-												feat(ai): implement Phase 3 investigation planning (#8, #9, #10, #11, #74)

Add a planning pass that runs after survey and before dir loops. The
planner classifies directories into priority/shallow/skip tiers and
allocates turns accordingly, replacing the fixed max_turns=14 per
directory with dynamic allocation from a global budget.

Planning pass:
- _PLANNING_SYSTEM_PROMPT in prompts.py with submit_plan tool
- _run_planning() follows the same single-turn pattern as _run_survey()
- submit_plan tool registered in new "planning" scope
- _apply_plan() pure function: band-sorted ordering (leaf-first within
  bands), turn map, skip-dir removal
- _default_plan() fallback when planning is skipped or fails
- Plan cached as plan.json for resumed runs

Dynamic turn allocation:
- Priority dirs: 15-20 turns (capped at 25)
- Shallow dirs: 5 turns
- Default: 10 turns
- Skip dirs: excluded entirely
- Orchestrator passes per-dir max_turns to _run_dir_loop()

Quality instrumentation:
- _TokenTracker._loop_turns counts API calls per dir loop
- completeness field (0.0-1.0) added to dir-scope submit_report
- plan_evaluation.json emitted after dir loops comparing plan predictions
  to actual turn utilization, completeness, and confidence
- Turn utilization logged per directory during investigation

Also fixes _get_child_summaries() to distinguish actual leaf directories
from parents whose children have not been investigated yet, replacing
the misleading "this is a leaf directory" placeholder.

26 new tests (260 total, all passing).

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>

											
										
										
											2026-04-12 20:21:49 -06:00
+								# ---------------------------------------------------------------------------
 								# Planning pass
 								# ---------------------------------------------------------------------------
 								# Turn allocation defaults.
 								_DEFAULT_TURNS = 10
 								_SHALLOW_TURNS = 5
 								_MAX_TURNS_CEILING = 25
 								_BASE_TURNS_PER_DIR = 10
 								def _default_plan():
 								    """Fallback plan when planning is skipped or fails.
 								    All directories get default turns, leaf-first order, no overrides.
 								    """
 								    return {
 								        "priority_dirs": [],
 								        "shallow_dirs": [],
 								        "skip_dirs": [],
 								        "investigation_order": "leaf-first",
 								        "notes": "",
 								    }
 								def _run_planning(client, target, survey, report, all_dirs, tracker,
 								                  cached_dirs=None, max_turns=3, verbose=False):
 								    """Run the planning pass. Returns a plan dict or None on failure.
 								    The planning pass decides where to invest investigation depth.
 								    It runs after the survey and before the per-directory loops.
 								    """
 								    cached_dirs = cached_dirs or []
 								    dir_count = len(all_dirs)
 								    global_budget = _BASE_TURNS_PER_DIR * dir_count
 								    survey_context = _format_survey_block(survey) if survey else "(no survey available)"
 								    try:
 								        tree_node = build_tree(target, max_depth=6)
 								        tree_text = render_tree(tree_node)
 								    except Exception:
 								        tree_text = "(tree unavailable)"
 								    signals = report.get("survey_signals") or {}
 								    file_signals = _format_survey_signals(signals)
 								    cached_rel = []
 								    for d in cached_dirs:
 								        cached_rel.append(os.path.relpath(d, target))
 								    cached_text = ", ".join(cached_rel) if cached_rel else "(none)"
 								    system = _PLANNING_SYSTEM_PROMPT.format(
 								        target=target,
 								        survey_context=survey_context,
 								        tree_text=tree_text,
 								        file_signals=file_signals,
 								        dir_count=dir_count,
 								        cached_dirs=cached_text,
 								        default_turns=_DEFAULT_TURNS,
 								        global_budget=global_budget,
 								    )
 								    messages = [
 								        {
 								            "role": "user",
 								            "content": (
 								                "All inputs are in the system prompt above. Call "
 								                "submit_plan now."
 								            ),
 								        },
 								    ]
 								    plan = None
 								    for turn in range(max_turns):
 								        try:
 								            content_blocks, _usage = _call_api_streaming(
 								                client, system, messages, _PLANNING_TOOLS, tracker,
 								            )
 								        except anthropic.APIError as e:
 								            print(f"  [AI]   API error: {e}", file=sys.stderr)
 								            return None
 								        for b in content_blocks:
 								            if b.type == "text" and b.text.strip():
 								                for line in b.text.strip().split("\n"):
 								                    print(f"  [AI]   {line}", file=sys.stderr)
 								        tool_uses = [b for b in content_blocks if b.type == "tool_use"]
 								        for tu in tool_uses:
 								            arg_summary = ", ".join(
 								                f"{k}={v!r}" for k, v in tu.input.items()
 								            ) if tu.input else ""
 								            print(f"  [AI]   -> {tu.name}({arg_summary})", file=sys.stderr)
 								        messages.append({
 								            "role": "assistant",
 								            "content": [_block_to_dict(b) for b in content_blocks],
 								        })
 								        if not tool_uses:
 								            messages.append({
 								                "role": "user",
 								                "content": "Please call submit_plan.",
 								            })
 								            continue
 								        tool_results = []
 								        done = False
 								        for tu in tool_uses:
 								            if tu.name == "submit_plan":
 								                plan = {
 								                    "priority_dirs": tu.input.get("priority_dirs", []) or [],
 								                    "shallow_dirs": tu.input.get("shallow_dirs", []) or [],
 								                    "skip_dirs": tu.input.get("skip_dirs", []) or [],
 								                    "investigation_order": tu.input.get(
 								                        "investigation_order", "leaf-first"
 								                    ),
 								                    "notes": tu.input.get("notes", ""),
 								                }
 								                tool_results.append({
 								                    "type": "tool_result",
 								                    "tool_use_id": tu.id,
 								                    "content": "Plan received. Thank you.",
 								                })
 								                done = True
 								            else:
 								                tool_results.append({
 								                    "type": "tool_result",
 								                    "tool_use_id": tu.id,
 								                    "content": "Unknown tool. Call submit_plan.",
 								                    "is_error": True,
 								                })
 								        messages.append({"role": "user", "content": tool_results})
 								        if done:
 								            break
 								    else:
 								        print("  [AI] Warning: planning ran out of turns.", file=sys.stderr)
 								    return plan
 								def _apply_plan(all_dirs, to_investigate, plan, target):
 								    """Apply the plan to produce an ordered dir list and turn map.
 								    Returns (ordered_dirs, turn_map) where:
 								      - ordered_dirs: list of absolute dir paths in investigation order
 								      - turn_map: dict of {abs_dir_path: max_turns}
 								    Pure function: no I/O, no cache, no API calls.
 								    """
 								    if plan is None:
 								        return list(to_investigate), {}
 								    # Build lookup from relative path to absolute path.
-												fix(ai): match target root dir by basename in _apply_plan() (#76)

The planner sees basename(target) in the tree output (e.g. "luminos_lib")
and uses that as the path in its plan. But _apply_plan() mapped the
target root to "." via os.path.relpath(), so the planner's path never
matched and the allocation was silently dropped.

Fix: register both "." and basename(target) as aliases for the target
root in the lookup table. Also log a warning when plan paths don't
match any known directory, so future mismatches are visible.

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>

											
										
										
											2026-04-12 20:38:55 -06:00
+								    # The target root maps to "." via relpath, but the planner sees
 								    # basename(target) in the tree output and uses that as the path.
 								    # Register both so either form matches (#76).
-												feat(ai): implement Phase 3 investigation planning (#8, #9, #10, #11, #74)

Add a planning pass that runs after survey and before dir loops. The
planner classifies directories into priority/shallow/skip tiers and
allocates turns accordingly, replacing the fixed max_turns=14 per
directory with dynamic allocation from a global budget.

Planning pass:
- _PLANNING_SYSTEM_PROMPT in prompts.py with submit_plan tool
- _run_planning() follows the same single-turn pattern as _run_survey()
- submit_plan tool registered in new "planning" scope
- _apply_plan() pure function: band-sorted ordering (leaf-first within
  bands), turn map, skip-dir removal
- _default_plan() fallback when planning is skipped or fails
- Plan cached as plan.json for resumed runs

Dynamic turn allocation:
- Priority dirs: 15-20 turns (capped at 25)
- Shallow dirs: 5 turns
- Default: 10 turns
- Skip dirs: excluded entirely
- Orchestrator passes per-dir max_turns to _run_dir_loop()

Quality instrumentation:
- _TokenTracker._loop_turns counts API calls per dir loop
- completeness field (0.0-1.0) added to dir-scope submit_report
- plan_evaluation.json emitted after dir loops comparing plan predictions
  to actual turn utilization, completeness, and confidence
- Turn utilization logged per directory during investigation

Also fixes _get_child_summaries() to distinguish actual leaf directories
from parents whose children have not been investigated yet, replacing
the misleading "this is a leaf directory" placeholder.

26 new tests (260 total, all passing).

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>

											
										
										
											2026-04-12 20:21:49 -06:00
+								    rel_to_abs = {}
 								    for d in all_dirs:
 								        rel = os.path.relpath(d, target)
 								        rel_to_abs[rel] = d
-												fix(ai): match target root dir by basename in _apply_plan() (#76)

The planner sees basename(target) in the tree output (e.g. "luminos_lib")
and uses that as the path in its plan. But _apply_plan() mapped the
target root to "." via os.path.relpath(), so the planner's path never
matched and the allocation was silently dropped.

Fix: register both "." and basename(target) as aliases for the target
root in the lookup table. Also log a warning when plan paths don't
match any known directory, so future mismatches are visible.

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>

											
										
										
											2026-04-12 20:38:55 -06:00
+								        if rel == ".":
 								            rel_to_abs[os.path.basename(d)] = d
-												feat(ai): implement Phase 3 investigation planning (#8, #9, #10, #11, #74)

Add a planning pass that runs after survey and before dir loops. The
planner classifies directories into priority/shallow/skip tiers and
allocates turns accordingly, replacing the fixed max_turns=14 per
directory with dynamic allocation from a global budget.

Planning pass:
- _PLANNING_SYSTEM_PROMPT in prompts.py with submit_plan tool
- _run_planning() follows the same single-turn pattern as _run_survey()
- submit_plan tool registered in new "planning" scope
- _apply_plan() pure function: band-sorted ordering (leaf-first within
  bands), turn map, skip-dir removal
- _default_plan() fallback when planning is skipped or fails
- Plan cached as plan.json for resumed runs

Dynamic turn allocation:
- Priority dirs: 15-20 turns (capped at 25)
- Shallow dirs: 5 turns
- Default: 10 turns
- Skip dirs: excluded entirely
- Orchestrator passes per-dir max_turns to _run_dir_loop()

Quality instrumentation:
- _TokenTracker._loop_turns counts API calls per dir loop
- completeness field (0.0-1.0) added to dir-scope submit_report
- plan_evaluation.json emitted after dir loops comparing plan predictions
  to actual turn utilization, completeness, and confidence
- Turn utilization logged per directory during investigation

Also fixes _get_child_summaries() to distinguish actual leaf directories
from parents whose children have not been investigated yet, replacing
the misleading "this is a leaf directory" placeholder.

26 new tests (260 total, all passing).

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>

											
										
										
											2026-04-12 20:21:49 -06:00
 								    # Classify directories by tier.
 								    skip_set = set()
 								    priority_set = set()
 								    shallow_set = set()
 								    turn_map = {}
-												fix(ai): match target root dir by basename in _apply_plan() (#76)

The planner sees basename(target) in the tree output (e.g. "luminos_lib")
and uses that as the path in its plan. But _apply_plan() mapped the
target root to "." via os.path.relpath(), so the planner's path never
matched and the allocation was silently dropped.

Fix: register both "." and basename(target) as aliases for the target
root in the lookup table. Also log a warning when plan paths don't
match any known directory, so future mismatches are visible.

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>

											
										
										
											2026-04-12 20:38:55 -06:00
+								    unmatched = []
-												feat(ai): implement Phase 3 investigation planning (#8, #9, #10, #11, #74)

Add a planning pass that runs after survey and before dir loops. The
planner classifies directories into priority/shallow/skip tiers and
allocates turns accordingly, replacing the fixed max_turns=14 per
directory with dynamic allocation from a global budget.

Planning pass:
- _PLANNING_SYSTEM_PROMPT in prompts.py with submit_plan tool
- _run_planning() follows the same single-turn pattern as _run_survey()
- submit_plan tool registered in new "planning" scope
- _apply_plan() pure function: band-sorted ordering (leaf-first within
  bands), turn map, skip-dir removal
- _default_plan() fallback when planning is skipped or fails
- Plan cached as plan.json for resumed runs

Dynamic turn allocation:
- Priority dirs: 15-20 turns (capped at 25)
- Shallow dirs: 5 turns
- Default: 10 turns
- Skip dirs: excluded entirely
- Orchestrator passes per-dir max_turns to _run_dir_loop()

Quality instrumentation:
- _TokenTracker._loop_turns counts API calls per dir loop
- completeness field (0.0-1.0) added to dir-scope submit_report
- plan_evaluation.json emitted after dir loops comparing plan predictions
  to actual turn utilization, completeness, and confidence
- Turn utilization logged per directory during investigation

Also fixes _get_child_summaries() to distinguish actual leaf directories
from parents whose children have not been investigated yet, replacing
the misleading "this is a leaf directory" placeholder.

26 new tests (260 total, all passing).

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>

											
										
										
											2026-04-12 20:21:49 -06:00
 								    for entry in plan.get("skip_dirs", []):
 								        rel = entry.get("path", "")
 								        if rel in rel_to_abs:
 								            skip_set.add(rel_to_abs[rel])
-												fix(ai): match target root dir by basename in _apply_plan() (#76)

The planner sees basename(target) in the tree output (e.g. "luminos_lib")
and uses that as the path in its plan. But _apply_plan() mapped the
target root to "." via os.path.relpath(), so the planner's path never
matched and the allocation was silently dropped.

Fix: register both "." and basename(target) as aliases for the target
root in the lookup table. Also log a warning when plan paths don't
match any known directory, so future mismatches are visible.

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>

											
										
										
											2026-04-12 20:38:55 -06:00
+								        else:
 								            unmatched.append(rel)
-												feat(ai): implement Phase 3 investigation planning (#8, #9, #10, #11, #74)

Add a planning pass that runs after survey and before dir loops. The
planner classifies directories into priority/shallow/skip tiers and
allocates turns accordingly, replacing the fixed max_turns=14 per
directory with dynamic allocation from a global budget.

Planning pass:
- _PLANNING_SYSTEM_PROMPT in prompts.py with submit_plan tool
- _run_planning() follows the same single-turn pattern as _run_survey()
- submit_plan tool registered in new "planning" scope
- _apply_plan() pure function: band-sorted ordering (leaf-first within
  bands), turn map, skip-dir removal
- _default_plan() fallback when planning is skipped or fails
- Plan cached as plan.json for resumed runs

Dynamic turn allocation:
- Priority dirs: 15-20 turns (capped at 25)
- Shallow dirs: 5 turns
- Default: 10 turns
- Skip dirs: excluded entirely
- Orchestrator passes per-dir max_turns to _run_dir_loop()

Quality instrumentation:
- _TokenTracker._loop_turns counts API calls per dir loop
- completeness field (0.0-1.0) added to dir-scope submit_report
- plan_evaluation.json emitted after dir loops comparing plan predictions
  to actual turn utilization, completeness, and confidence
- Turn utilization logged per directory during investigation

Also fixes _get_child_summaries() to distinguish actual leaf directories
from parents whose children have not been investigated yet, replacing
the misleading "this is a leaf directory" placeholder.

26 new tests (260 total, all passing).

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>

											
										
										
											2026-04-12 20:21:49 -06:00
 								    for entry in plan.get("priority_dirs", []):
 								        rel = entry.get("path", "")
 								        suggested = entry.get("suggested_turns", 15)
 								        capped = min(suggested, _MAX_TURNS_CEILING)
 								        if rel in rel_to_abs:
 								            abs_path = rel_to_abs[rel]
 								            priority_set.add(abs_path)
 								            turn_map[abs_path] = capped
-												fix(ai): match target root dir by basename in _apply_plan() (#76)

The planner sees basename(target) in the tree output (e.g. "luminos_lib")
and uses that as the path in its plan. But _apply_plan() mapped the
target root to "." via os.path.relpath(), so the planner's path never
matched and the allocation was silently dropped.

Fix: register both "." and basename(target) as aliases for the target
root in the lookup table. Also log a warning when plan paths don't
match any known directory, so future mismatches are visible.

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>

											
										
										
											2026-04-12 20:38:55 -06:00
+								        else:
 								            unmatched.append(rel)
-												feat(ai): implement Phase 3 investigation planning (#8, #9, #10, #11, #74)

Add a planning pass that runs after survey and before dir loops. The
planner classifies directories into priority/shallow/skip tiers and
allocates turns accordingly, replacing the fixed max_turns=14 per
directory with dynamic allocation from a global budget.

Planning pass:
- _PLANNING_SYSTEM_PROMPT in prompts.py with submit_plan tool
- _run_planning() follows the same single-turn pattern as _run_survey()
- submit_plan tool registered in new "planning" scope
- _apply_plan() pure function: band-sorted ordering (leaf-first within
  bands), turn map, skip-dir removal
- _default_plan() fallback when planning is skipped or fails
- Plan cached as plan.json for resumed runs

Dynamic turn allocation:
- Priority dirs: 15-20 turns (capped at 25)
- Shallow dirs: 5 turns
- Default: 10 turns
- Skip dirs: excluded entirely
- Orchestrator passes per-dir max_turns to _run_dir_loop()

Quality instrumentation:
- _TokenTracker._loop_turns counts API calls per dir loop
- completeness field (0.0-1.0) added to dir-scope submit_report
- plan_evaluation.json emitted after dir loops comparing plan predictions
  to actual turn utilization, completeness, and confidence
- Turn utilization logged per directory during investigation

Also fixes _get_child_summaries() to distinguish actual leaf directories
from parents whose children have not been investigated yet, replacing
the misleading "this is a leaf directory" placeholder.

26 new tests (260 total, all passing).

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>

											
										
										
											2026-04-12 20:21:49 -06:00
 								    for entry in plan.get("shallow_dirs", []):
 								        rel = entry.get("path", "")
 								        if rel in rel_to_abs:
 								            abs_path = rel_to_abs[rel]
 								            shallow_set.add(abs_path)
 								            turn_map[abs_path] = _SHALLOW_TURNS
-												fix(ai): match target root dir by basename in _apply_plan() (#76)

The planner sees basename(target) in the tree output (e.g. "luminos_lib")
and uses that as the path in its plan. But _apply_plan() mapped the
target root to "." via os.path.relpath(), so the planner's path never
matched and the allocation was silently dropped.

Fix: register both "." and basename(target) as aliases for the target
root in the lookup table. Also log a warning when plan paths don't
match any known directory, so future mismatches are visible.

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>

											
										
										
											2026-04-12 20:38:55 -06:00
+								        else:
 								            unmatched.append(rel)
 								    if unmatched:
 								        print(
 								            f"  [AI] Warning: plan referenced unknown dirs: "
 								            f"{', '.join(unmatched)}",
 								            file=sys.stderr,
 								        )
-												feat(ai): implement Phase 3 investigation planning (#8, #9, #10, #11, #74)

Add a planning pass that runs after survey and before dir loops. The
planner classifies directories into priority/shallow/skip tiers and
allocates turns accordingly, replacing the fixed max_turns=14 per
directory with dynamic allocation from a global budget.

Planning pass:
- _PLANNING_SYSTEM_PROMPT in prompts.py with submit_plan tool
- _run_planning() follows the same single-turn pattern as _run_survey()
- submit_plan tool registered in new "planning" scope
- _apply_plan() pure function: band-sorted ordering (leaf-first within
  bands), turn map, skip-dir removal
- _default_plan() fallback when planning is skipped or fails
- Plan cached as plan.json for resumed runs

Dynamic turn allocation:
- Priority dirs: 15-20 turns (capped at 25)
- Shallow dirs: 5 turns
- Default: 10 turns
- Skip dirs: excluded entirely
- Orchestrator passes per-dir max_turns to _run_dir_loop()

Quality instrumentation:
- _TokenTracker._loop_turns counts API calls per dir loop
- completeness field (0.0-1.0) added to dir-scope submit_report
- plan_evaluation.json emitted after dir loops comparing plan predictions
  to actual turn utilization, completeness, and confidence
- Turn utilization logged per directory during investigation

Also fixes _get_child_summaries() to distinguish actual leaf directories
from parents whose children have not been investigated yet, replacing
the misleading "this is a leaf directory" placeholder.

26 new tests (260 total, all passing).

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>

											
										
										
											2026-04-12 20:21:49 -06:00
 								    # Remove skipped dirs from the investigation list.
 								    remaining = [d for d in to_investigate if d not in skip_set]
 								    # Order by bands. Both strategies preserve leaf-first within bands.
 								    order = plan.get("investigation_order", "leaf-first")
 								    if order == "priority-first":
 								        priority_band = [d for d in remaining if d in priority_set]
 								        shallow_band = [d for d in remaining if d in shallow_set]
 								        default_band = [
 								            d for d in remaining
 								            if d not in priority_set and d not in shallow_set
 								        ]
 								        ordered = priority_band + default_band + shallow_band
 								    else:
 								        # leaf-first: keep the original order (already leaf-first from
 								        # _discover_directories), just remove skipped dirs.
 								        ordered = remaining
 								    return ordered, turn_map
 								def _write_plan_evaluation(cache, plan, turn_utilization):
 								    """Write plan_evaluation.json comparing plan predictions to actual results.
 								    This is the planning pass's report card: did we allocate turns well?
 								    """
 								    # Build a lookup of what the plan predicted per dir.
 								    predicted = {}
 								    for entry in (plan or {}).get("priority_dirs", []):
 								        predicted[entry["path"]] = {
 								            "tier": "priority",
 								            "suggested_turns": entry.get("suggested_turns", 15),
 								        }
 								    for entry in (plan or {}).get("shallow_dirs", []):
 								        predicted[entry["path"]] = {
 								            "tier": "shallow",
 								            "suggested_turns": _SHALLOW_TURNS,
 								        }
 								    for entry in (plan or {}).get("skip_dirs", []):
 								        predicted[entry["path"]] = {
 								            "tier": "skip",
 								            "suggested_turns": 0,
 								        }
 								    # Compare predictions to actual turn utilization.
 								    per_dir = []
 								    total_allocated = 0
 								    total_used = 0
 								    for record in turn_utilization:
 								        dir_rel = record["dir"]
 								        allocated = record["turns_allocated"]
 								        used = record["turns_used"]
 								        total_allocated += allocated
 								        total_used += used
 								        pred = predicted.get(dir_rel, {})
 								        entry = {
 								            "dir": dir_rel,
 								            "planned_tier": pred.get("tier", "default"),
 								            "turns_allocated": allocated,
 								            "turns_used": used,
 								            "utilization": round(used / allocated, 2) if allocated else 0,
 								        }
 								        # Include completeness from turn utilization record (#74).
 								        record_completeness = record.get("completeness")
 								        if record_completeness is not None:
 								            entry["completeness"] = record_completeness
 								        # Read confidence from the cached dir entry if available.
 								        dir_entry = cache.read_entry("dir", os.path.join(
 								            cache.target, dir_rel,
 								        ))
 								        if dir_entry:
 								            entry["confidence"] = dir_entry.get("confidence")
 								        per_dir.append(entry)
 								    evaluation = {
 								        "plan_order": (plan or {}).get("investigation_order", "leaf-first"),
 								        "total_dirs_investigated": len(turn_utilization),
 								        "total_turns_allocated": total_allocated,
 								        "total_turns_used": total_used,
 								        "overall_utilization": (
 								            round(total_used / total_allocated, 2) if total_allocated else 0
 								        ),
 								        "per_directory": per_dir,
 								        "evaluated_at": _now_iso(),
 								    }
 								    try:
 								        eval_path = os.path.join(cache.root, "plan_evaluation.json")
 								        with open(eval_path, "w") as f:
 								            json.dump(evaluation, f, indent=2)
 								        print(
 								            f"  [AI] Plan evaluation: {total_used}/{total_allocated} turns used "
 								            f"({evaluation['overall_utilization']:.0%} utilization)",
 								            file=sys.stderr,
 								        )
 								    except OSError:
 								        pass
-												refactor: replace single-shot API with multi-pass agentic investigation

Rewrites ai.py from a single Claude API call into a multi-pass,
cache-driven agent architecture:

- Per-directory isolated agent loops (max 10 turns each) with context
  discarded between directories
- Leaves-first processing order so child summaries inform parents
- Disk cache (/tmp/luminos/{uuid}/) persists across runs for resumability
- Investigation ID persistence keyed by target realpath
- Separate synthesis pass reads only directory-level cache entries
- Replaces urllib with Anthropic SDK (streaming, automatic retries)
- Token counting with 70% context budget threshold for early exit
- parse_structure tool via tree-sitter (Python, JS, Rust, Go)
- python-magic integration for MIME-aware directory listings
- Cost tracking printed at end of investigation

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>

											
										
										
											2026-03-30 12:13:55 -06:00
+								def _run_synthesis(client, target, cache, tracker, max_turns=5, verbose=False):
 								    """Run the final synthesis pass. Returns (brief, detailed)."""
 								    dir_entries = cache.read_all_entries("dir")
 								    summary_lines = []
 								    for entry in dir_entries:
 								        rel = entry.get("relative_path", "?")
 								        summary = entry.get("summary", "(no summary)")
 								        dominant = entry.get("dominant_category", "?")
 								        notable = entry.get("notable_files", [])
 								        summary_lines.append(f"### {rel}/")
 								        summary_lines.append(f"Category: {dominant}")
 								        summary_lines.append(f"Summary: {summary}")
 								        if notable:
 								            summary_lines.append(f"Notable files: {', '.join(notable)}")
 								        summary_lines.append("")
 								    summaries_text = "\n".join(summary_lines) if summary_lines else "(none)"
 								    system = _SYNTHESIS_SYSTEM_PROMPT.format(
 								        target=target,
 								        summaries_text=summaries_text,
-												feat: add AI-powered directory analysis via Claude API

Adds --ai flag that sends the directory tree, file categories, and
sampled file contents to Claude for analysis. Produces a brief
summary at the top of the report and a detailed breakdown at the
end. Requires ANTHROPIC_API_KEY env var; degrades gracefully without it.
Uses only stdlib (urllib) to keep the zero-dependency constraint.

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>

											
										
										
											2026-03-30 10:03:48 -06:00
+								    )
-												refactor: replace single-shot API with multi-pass agentic investigation

Rewrites ai.py from a single Claude API call into a multi-pass,
cache-driven agent architecture:

- Per-directory isolated agent loops (max 10 turns each) with context
  discarded between directories
- Leaves-first processing order so child summaries inform parents
- Disk cache (/tmp/luminos/{uuid}/) persists across runs for resumability
- Investigation ID persistence keyed by target realpath
- Separate synthesis pass reads only directory-level cache entries
- Replaces urllib with Anthropic SDK (streaming, automatic retries)
- Token counting with 70% context budget threshold for early exit
- parse_structure tool via tree-sitter (Python, JS, Rust, Go)
- python-magic integration for MIME-aware directory listings
- Cost tracking printed at end of investigation

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>

											
										
										
											2026-03-30 12:13:55 -06:00
+								    messages = [
 								        {
 								            "role": "user",
 								            "content": (
 								                "All directory summaries are in the system prompt above. "
 								                "Synthesize them into a cohesive report and call "
 								                "submit_report immediately — no other tool calls needed."
 								            ),
 								        },
 								    ]
 								    brief, detailed = "", ""
 								    for turn in range(max_turns):
 								        try:
 								            content_blocks, usage = _call_api_streaming(
 								                client, system, messages, _SYNTHESIS_TOOLS, tracker,
 								            )
 								        except anthropic.APIError as e:
 								            print(f"  [AI]   API error: {e}", file=sys.stderr)
 								            break
-												feat: add chain-of-thought observability tools

Adds think, checkpoint, and flag tools for agent reasoning visibility:
- think: records observation/hypothesis/next_action before investigation
- checkpoint: summarizes learned/unknown/next_phase after file clusters
- flag: marks notable findings to flags.jsonl with severity levels

Additional changes:
- Step numbering in investigation system prompt
- Text blocks from agent now printed to stderr (step labels visible)
- flag tool available in both investigation and synthesis passes
- analyze_directory() returns (brief, detailed, flags) three-tuple
- format_flags() in report.py renders flags sorted by severity
- Per-directory max_turns increased from 10 to 14

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>

											
										
										
											2026-03-30 13:02:19 -06:00
+								        # Print text blocks to stderr
 								        for b in content_blocks:
 								            if b.type == "text" and b.text.strip():
 								                for line in b.text.strip().split("\n"):
 								                    print(f"  [AI]   {line}", file=sys.stderr)
-												refactor: replace single-shot API with multi-pass agentic investigation

Rewrites ai.py from a single Claude API call into a multi-pass,
cache-driven agent architecture:

- Per-directory isolated agent loops (max 10 turns each) with context
  discarded between directories
- Leaves-first processing order so child summaries inform parents
- Disk cache (/tmp/luminos/{uuid}/) persists across runs for resumability
- Investigation ID persistence keyed by target realpath
- Separate synthesis pass reads only directory-level cache entries
- Replaces urllib with Anthropic SDK (streaming, automatic retries)
- Token counting with 70% context budget threshold for early exit
- parse_structure tool via tree-sitter (Python, JS, Rust, Go)
- python-magic integration for MIME-aware directory listings
- Cost tracking printed at end of investigation

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>

											
										
										
											2026-03-30 12:13:55 -06:00
+								        tool_uses = [b for b in content_blocks if b.type == "tool_use"]
 								        for tu in tool_uses:
 								            arg_summary = ", ".join(
 								                f"{k}={v!r}" for k, v in tu.input.items() if k != "data"
 								            ) if tu.input else ""
 								            print(f"  [AI]   -> {tu.name}({arg_summary})", file=sys.stderr)
 								        messages.append({
 								            "role": "assistant",
 								            "content": [_block_to_dict(b) for b in content_blocks],
 								        })
 								        if not tool_uses:
 								            messages.append({
 								                "role": "user",
 								                "content": "Please call submit_report with your analysis.",
 								            })
 								            continue
 								        tool_results = []
 								        done = False
 								        for tu in tool_uses:
 								            if tu.name == "submit_report":
 								                brief = tu.input.get("brief", "")
 								                detailed = tu.input.get("detailed", "")
 								                tool_results.append({
 								                    "type": "tool_result",
 								                    "tool_use_id": tu.id,
 								                    "content": "Report submitted. Thank you.",
 								                })
 								                done = True
 								            else:
 								                result_text = _execute_tool(
 								                    tu.name, tu.input, target, cache, "(synthesis)",
 								                    turn + 1, verbose=verbose,
 								                )
 								                tool_results.append({
 								                    "type": "tool_result",
 								                    "tool_use_id": tu.id,
 								                    "content": result_text,
 								                })
 								        messages.append({"role": "user", "content": tool_results})
 								        if done:
 								            break
-												feat: add AI-powered directory analysis via Claude API

Adds --ai flag that sends the directory tree, file categories, and
sampled file contents to Claude for analysis. Produces a brief
summary at the top of the report and a detailed breakdown at the
end. Requires ANTHROPIC_API_KEY env var; degrades gracefully without it.
Uses only stdlib (urllib) to keep the zero-dependency constraint.

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>

											
										
										
											2026-03-30 10:03:48 -06:00
+								    else:
-												refactor: replace single-shot API with multi-pass agentic investigation

Rewrites ai.py from a single Claude API call into a multi-pass,
cache-driven agent architecture:

- Per-directory isolated agent loops (max 10 turns each) with context
  discarded between directories
- Leaves-first processing order so child summaries inform parents
- Disk cache (/tmp/luminos/{uuid}/) persists across runs for resumability
- Investigation ID persistence keyed by target realpath
- Separate synthesis pass reads only directory-level cache entries
- Replaces urllib with Anthropic SDK (streaming, automatic retries)
- Token counting with 70% context budget threshold for early exit
- parse_structure tool via tree-sitter (Python, JS, Rust, Go)
- python-magic integration for MIME-aware directory listings
- Cost tracking printed at end of investigation

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>

											
										
										
											2026-03-30 12:13:55 -06:00
+								        print("  [AI] Warning: synthesis ran out of turns.", file=sys.stderr)
 								        brief, detailed = _synthesize_from_cache(cache)
 								    return brief, detailed
 								def _synthesize_from_cache(cache):
 								    """Build a best-effort report from cached directory summaries."""
 								    dir_entries = cache.read_all_entries("dir")
 								    if not dir_entries:
 								        return ("(AI analysis incomplete — no data was cached)", "")
 								    brief_parts = []
 								    detail_parts = []
 								    for entry in dir_entries:
 								        rel = entry.get("relative_path", "?")
 								        summary = entry.get("summary", "")
 								        if summary:
 								            detail_parts.append(f"**{rel}/**: {summary}")
 								            brief_parts.append(summary)
 								    brief = brief_parts[0] if brief_parts else "(AI analysis incomplete)"
 								    detailed = "\n\n".join(detail_parts) if detail_parts else ""
 								    return brief, detailed
 								# ---------------------------------------------------------------------------
 								# Main orchestrator
 								# ---------------------------------------------------------------------------
 								def _run_investigation(client, target, report, show_hidden=False,
-												feat: add -x/--exclude flag to exclude directories from scan and AI analysis

											
										
										
											2026-04-06 14:32:12 -06:00
+								                       fresh=False, verbose=False, exclude=None):
-												feat: add chain-of-thought observability tools

Adds think, checkpoint, and flag tools for agent reasoning visibility:
- think: records observation/hypothesis/next_action before investigation
- checkpoint: summarizes learned/unknown/next_phase after file clusters
- flag: marks notable findings to flags.jsonl with severity levels

Additional changes:
- Step numbering in investigation system prompt
- Text blocks from agent now printed to stderr (step labels visible)
- flag tool available in both investigation and synthesis passes
- analyze_directory() returns (brief, detailed, flags) three-tuple
- format_flags() in report.py renders flags sorted by severity
- Per-directory max_turns increased from 10 to 14

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>

											
										
										
											2026-03-30 13:02:19 -06:00
+								    """Orchestrate the multi-pass investigation. Returns (brief, detailed, flags)."""
-												refactor: replace single-shot API with multi-pass agentic investigation

Rewrites ai.py from a single Claude API call into a multi-pass,
cache-driven agent architecture:

- Per-directory isolated agent loops (max 10 turns each) with context
  discarded between directories
- Leaves-first processing order so child summaries inform parents
- Disk cache (/tmp/luminos/{uuid}/) persists across runs for resumability
- Investigation ID persistence keyed by target realpath
- Separate synthesis pass reads only directory-level cache entries
- Replaces urllib with Anthropic SDK (streaming, automatic retries)
- Token counting with 70% context budget threshold for early exit
- parse_structure tool via tree-sitter (Python, JS, Rust, Go)
- python-magic integration for MIME-aware directory listings
- Cost tracking printed at end of investigation

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>

											
										
										
											2026-03-30 12:13:55 -06:00
+								    investigation_id, is_new = _get_investigation_id(target, fresh=fresh)
 								    cache = _CacheManager(investigation_id, target)
 								    tracker = _TokenTracker()
 								    if is_new:
 								        cache.write_meta(MODEL, _now_iso())
 								    print(f"  [AI] Investigation ID: {investigation_id}"
 								          f"{'' if is_new else ' (resumed)'}", file=sys.stderr)
 								    print(f"  [AI] Cache: {cache.root}/", file=sys.stderr)
-												feat(ai): skip survey pass for tiny targets (#7)

Adds a gate in _run_investigation that skips the survey API call when
a target has both fewer than _SURVEY_MIN_FILES (5) files AND fewer
than _SURVEY_MIN_DIRS (2) directories. AND semantics handle the
deep-narrow edge case correctly: a target with 4 files spread across
50 directories still gets a survey because dir count amortizes the
cost across 50 dir loops.

When skipped, _default_survey() supplies a synthetic dict with
confidence=0.0 — chosen specifically so _filter_dir_tools() never
enforces skip_tools from a synthetic value. The dir loop receives
a generic "small target, read everything" framing in its prompt and
keeps its full toolbox.

Reorders _discover_directories() to run before the survey gate so
total_dirs is available without a second walk.

#46 tracks revisiting the threshold values with empirical data after
Phase 2 ships and we've run --ai on a variety of real targets.

Smoke tested on a 2-file target: gate triggers, default survey
substituted, dir loop completes normally. Adds 4 unit tests for
_default_survey() covering schema, confidence guard, filter
interaction, and empty skip_tools.

											
										
										
											2026-04-06 22:19:25 -06:00
+								    all_dirs = _discover_directories(target, show_hidden=show_hidden,
 								                                     exclude=exclude)
 								    total_files = sum((report.get("file_categories") or {}).values())
 								    total_dirs = len(all_dirs)
 								    if total_files < _SURVEY_MIN_FILES and total_dirs < _SURVEY_MIN_DIRS:
 								        print(
 								            f"  [AI] Survey skipped — {total_files} files, {total_dirs} dirs "
 								            f"(below threshold).",
 								            file=sys.stderr,
 								        )
 								        survey = _default_survey()
 								    else:
 								        print("  [AI] Survey pass...", file=sys.stderr)
 								        survey = _run_survey(client, target, report, tracker, verbose=verbose)
-												feat(ai): add _run_survey() and submit_survey tool (#5)

Adds the reconnaissance survey pass: a fast, ≤3-turn LLM call that
characterizes the target before any directory investigation begins.
The survey receives the file-type distribution (from the base scan),
a top-2-level tree preview, and the list of available dir-loop tools,
and returns description / approach / relevant_tools / skip_tools /
domain_notes / confidence via a single submit_survey tool call.

Wired into _run_investigation() before the directory loop. Output is
logged but not yet consumed — that wiring is #6. Survey failure is
non-fatal: if the call errors or runs out of turns, the investigation
proceeds without survey context.

Also adds a Band-Aid to _SURVEY_SYSTEM_PROMPT warning the LLM that
the file-type histogram is biased toward source code (the underlying
classifier has no concept of mail, notebooks, ledgers, etc.) and to
trust the tree preview when they conflict. The proper fix is #42.

											
										
										
											2026-04-06 21:49:59 -06:00
+								    if survey:
 								        print(
 								            f"  [AI] Survey: {survey['description']} "
 								            f"(confidence {survey['confidence']:.2f})",
 								            file=sys.stderr,
 								        )
 								        if survey.get("domain_notes"):
 								            print(f"  [AI] Survey notes: {survey['domain_notes']}", file=sys.stderr)
 								        if survey.get("relevant_tools"):
 								            print(
 								                f"  [AI] Survey relevant_tools: {', '.join(survey['relevant_tools'])}",
 								                file=sys.stderr,
 								            )
 								        if survey.get("skip_tools"):
 								            print(
 								                f"  [AI] Survey skip_tools: {', '.join(survey['skip_tools'])}",
 								                file=sys.stderr,
 								            )
 								    else:
 								        print("  [AI] Survey unavailable — proceeding without it.", file=sys.stderr)
-												refactor: replace single-shot API with multi-pass agentic investigation

Rewrites ai.py from a single Claude API call into a multi-pass,
cache-driven agent architecture:

- Per-directory isolated agent loops (max 10 turns each) with context
  discarded between directories
- Leaves-first processing order so child summaries inform parents
- Disk cache (/tmp/luminos/{uuid}/) persists across runs for resumability
- Investigation ID persistence keyed by target realpath
- Separate synthesis pass reads only directory-level cache entries
- Replaces urllib with Anthropic SDK (streaming, automatic retries)
- Token counting with 70% context budget threshold for early exit
- parse_structure tool via tree-sitter (Python, JS, Rust, Go)
- python-magic integration for MIME-aware directory listings
- Cost tracking printed at end of investigation

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>

											
										
										
											2026-03-30 12:13:55 -06:00
+								    to_investigate = []
-												feat(ai): implement Phase 3 investigation planning (#8, #9, #10, #11, #74)

Add a planning pass that runs after survey and before dir loops. The
planner classifies directories into priority/shallow/skip tiers and
allocates turns accordingly, replacing the fixed max_turns=14 per
directory with dynamic allocation from a global budget.

Planning pass:
- _PLANNING_SYSTEM_PROMPT in prompts.py with submit_plan tool
- _run_planning() follows the same single-turn pattern as _run_survey()
- submit_plan tool registered in new "planning" scope
- _apply_plan() pure function: band-sorted ordering (leaf-first within
  bands), turn map, skip-dir removal
- _default_plan() fallback when planning is skipped or fails
- Plan cached as plan.json for resumed runs

Dynamic turn allocation:
- Priority dirs: 15-20 turns (capped at 25)
- Shallow dirs: 5 turns
- Default: 10 turns
- Skip dirs: excluded entirely
- Orchestrator passes per-dir max_turns to _run_dir_loop()

Quality instrumentation:
- _TokenTracker._loop_turns counts API calls per dir loop
- completeness field (0.0-1.0) added to dir-scope submit_report
- plan_evaluation.json emitted after dir loops comparing plan predictions
  to actual turn utilization, completeness, and confidence
- Turn utilization logged per directory during investigation

Also fixes _get_child_summaries() to distinguish actual leaf directories
from parents whose children have not been investigated yet, replacing
the misleading "this is a leaf directory" placeholder.

26 new tests (260 total, all passing).

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>

											
										
										
											2026-04-12 20:21:49 -06:00
+								    cached_dirs = []
-												refactor: replace single-shot API with multi-pass agentic investigation

Rewrites ai.py from a single Claude API call into a multi-pass,
cache-driven agent architecture:

- Per-directory isolated agent loops (max 10 turns each) with context
  discarded between directories
- Leaves-first processing order so child summaries inform parents
- Disk cache (/tmp/luminos/{uuid}/) persists across runs for resumability
- Investigation ID persistence keyed by target realpath
- Separate synthesis pass reads only directory-level cache entries
- Replaces urllib with Anthropic SDK (streaming, automatic retries)
- Token counting with 70% context budget threshold for early exit
- parse_structure tool via tree-sitter (Python, JS, Rust, Go)
- python-magic integration for MIME-aware directory listings
- Cost tracking printed at end of investigation

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>

											
										
										
											2026-03-30 12:13:55 -06:00
+								    for d in all_dirs:
 								        if cache.has_entry("dir", d):
-												feat(ai): implement Phase 3 investigation planning (#8, #9, #10, #11, #74)

Add a planning pass that runs after survey and before dir loops. The
planner classifies directories into priority/shallow/skip tiers and
allocates turns accordingly, replacing the fixed max_turns=14 per
directory with dynamic allocation from a global budget.

Planning pass:
- _PLANNING_SYSTEM_PROMPT in prompts.py with submit_plan tool
- _run_planning() follows the same single-turn pattern as _run_survey()
- submit_plan tool registered in new "planning" scope
- _apply_plan() pure function: band-sorted ordering (leaf-first within
  bands), turn map, skip-dir removal
- _default_plan() fallback when planning is skipped or fails
- Plan cached as plan.json for resumed runs

Dynamic turn allocation:
- Priority dirs: 15-20 turns (capped at 25)
- Shallow dirs: 5 turns
- Default: 10 turns
- Skip dirs: excluded entirely
- Orchestrator passes per-dir max_turns to _run_dir_loop()

Quality instrumentation:
- _TokenTracker._loop_turns counts API calls per dir loop
- completeness field (0.0-1.0) added to dir-scope submit_report
- plan_evaluation.json emitted after dir loops comparing plan predictions
  to actual turn utilization, completeness, and confidence
- Turn utilization logged per directory during investigation

Also fixes _get_child_summaries() to distinguish actual leaf directories
from parents whose children have not been investigated yet, replacing
the misleading "this is a leaf directory" placeholder.

26 new tests (260 total, all passing).

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>

											
										
										
											2026-04-12 20:21:49 -06:00
+								            cached_dirs.append(d)
-												refactor: replace single-shot API with multi-pass agentic investigation

Rewrites ai.py from a single Claude API call into a multi-pass,
cache-driven agent architecture:

- Per-directory isolated agent loops (max 10 turns each) with context
  discarded between directories
- Leaves-first processing order so child summaries inform parents
- Disk cache (/tmp/luminos/{uuid}/) persists across runs for resumability
- Investigation ID persistence keyed by target realpath
- Separate synthesis pass reads only directory-level cache entries
- Replaces urllib with Anthropic SDK (streaming, automatic retries)
- Token counting with 70% context budget threshold for early exit
- parse_structure tool via tree-sitter (Python, JS, Rust, Go)
- python-magic integration for MIME-aware directory listings
- Cost tracking printed at end of investigation

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>

											
										
										
											2026-03-30 12:13:55 -06:00
+								            rel = os.path.relpath(d, target)
 								            print(f"  [AI] Skipping (cached): {rel}/", file=sys.stderr)
 								        else:
 								            to_investigate.append(d)
-												feat(ai): implement Phase 3 investigation planning (#8, #9, #10, #11, #74)

Add a planning pass that runs after survey and before dir loops. The
planner classifies directories into priority/shallow/skip tiers and
allocates turns accordingly, replacing the fixed max_turns=14 per
directory with dynamic allocation from a global budget.

Planning pass:
- _PLANNING_SYSTEM_PROMPT in prompts.py with submit_plan tool
- _run_planning() follows the same single-turn pattern as _run_survey()
- submit_plan tool registered in new "planning" scope
- _apply_plan() pure function: band-sorted ordering (leaf-first within
  bands), turn map, skip-dir removal
- _default_plan() fallback when planning is skipped or fails
- Plan cached as plan.json for resumed runs

Dynamic turn allocation:
- Priority dirs: 15-20 turns (capped at 25)
- Shallow dirs: 5 turns
- Default: 10 turns
- Skip dirs: excluded entirely
- Orchestrator passes per-dir max_turns to _run_dir_loop()

Quality instrumentation:
- _TokenTracker._loop_turns counts API calls per dir loop
- completeness field (0.0-1.0) added to dir-scope submit_report
- plan_evaluation.json emitted after dir loops comparing plan predictions
  to actual turn utilization, completeness, and confidence
- Turn utilization logged per directory during investigation

Also fixes _get_child_summaries() to distinguish actual leaf directories
from parents whose children have not been investigated yet, replacing
the misleading "this is a leaf directory" placeholder.

26 new tests (260 total, all passing).

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>

											
										
										
											2026-04-12 20:21:49 -06:00
+								    cached_count = len(cached_dirs)
-												refactor: replace single-shot API with multi-pass agentic investigation

Rewrites ai.py from a single Claude API call into a multi-pass,
cache-driven agent architecture:

- Per-directory isolated agent loops (max 10 turns each) with context
  discarded between directories
- Leaves-first processing order so child summaries inform parents
- Disk cache (/tmp/luminos/{uuid}/) persists across runs for resumability
- Investigation ID persistence keyed by target realpath
- Separate synthesis pass reads only directory-level cache entries
- Replaces urllib with Anthropic SDK (streaming, automatic retries)
- Token counting with 70% context budget threshold for early exit
- parse_structure tool via tree-sitter (Python, JS, Rust, Go)
- python-magic integration for MIME-aware directory listings
- Cost tracking printed at end of investigation

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>

											
										
										
											2026-03-30 12:13:55 -06:00
+								    if cached_count:
 								        print(f"  [AI] Directories cached: {cached_count}", file=sys.stderr)
-												feat(ai): implement Phase 3 investigation planning (#8, #9, #10, #11, #74)

Add a planning pass that runs after survey and before dir loops. The
planner classifies directories into priority/shallow/skip tiers and
allocates turns accordingly, replacing the fixed max_turns=14 per
directory with dynamic allocation from a global budget.

Planning pass:
- _PLANNING_SYSTEM_PROMPT in prompts.py with submit_plan tool
- _run_planning() follows the same single-turn pattern as _run_survey()
- submit_plan tool registered in new "planning" scope
- _apply_plan() pure function: band-sorted ordering (leaf-first within
  bands), turn map, skip-dir removal
- _default_plan() fallback when planning is skipped or fails
- Plan cached as plan.json for resumed runs

Dynamic turn allocation:
- Priority dirs: 15-20 turns (capped at 25)
- Shallow dirs: 5 turns
- Default: 10 turns
- Skip dirs: excluded entirely
- Orchestrator passes per-dir max_turns to _run_dir_loop()

Quality instrumentation:
- _TokenTracker._loop_turns counts API calls per dir loop
- completeness field (0.0-1.0) added to dir-scope submit_report
- plan_evaluation.json emitted after dir loops comparing plan predictions
  to actual turn utilization, completeness, and confidence
- Turn utilization logged per directory during investigation

Also fixes _get_child_summaries() to distinguish actual leaf directories
from parents whose children have not been investigated yet, replacing
the misleading "this is a leaf directory" placeholder.

26 new tests (260 total, all passing).

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>

											
										
										
											2026-04-12 20:21:49 -06:00
+								    print(f"  [AI] Directories to investigate: {len(to_investigate)}",
 								          file=sys.stderr)
-												refactor: replace single-shot API with multi-pass agentic investigation

Rewrites ai.py from a single Claude API call into a multi-pass,
cache-driven agent architecture:

- Per-directory isolated agent loops (max 10 turns each) with context
  discarded between directories
- Leaves-first processing order so child summaries inform parents
- Disk cache (/tmp/luminos/{uuid}/) persists across runs for resumability
- Investigation ID persistence keyed by target realpath
- Separate synthesis pass reads only directory-level cache entries
- Replaces urllib with Anthropic SDK (streaming, automatic retries)
- Token counting with 70% context budget threshold for early exit
- parse_structure tool via tree-sitter (Python, JS, Rust, Go)
- python-magic integration for MIME-aware directory listings
- Cost tracking printed at end of investigation

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>

											
										
										
											2026-03-30 12:13:55 -06:00
-												feat(ai): implement Phase 3 investigation planning (#8, #9, #10, #11, #74)

Add a planning pass that runs after survey and before dir loops. The
planner classifies directories into priority/shallow/skip tiers and
allocates turns accordingly, replacing the fixed max_turns=14 per
directory with dynamic allocation from a global budget.

Planning pass:
- _PLANNING_SYSTEM_PROMPT in prompts.py with submit_plan tool
- _run_planning() follows the same single-turn pattern as _run_survey()
- submit_plan tool registered in new "planning" scope
- _apply_plan() pure function: band-sorted ordering (leaf-first within
  bands), turn map, skip-dir removal
- _default_plan() fallback when planning is skipped or fails
- Plan cached as plan.json for resumed runs

Dynamic turn allocation:
- Priority dirs: 15-20 turns (capped at 25)
- Shallow dirs: 5 turns
- Default: 10 turns
- Skip dirs: excluded entirely
- Orchestrator passes per-dir max_turns to _run_dir_loop()

Quality instrumentation:
- _TokenTracker._loop_turns counts API calls per dir loop
- completeness field (0.0-1.0) added to dir-scope submit_report
- plan_evaluation.json emitted after dir loops comparing plan predictions
  to actual turn utilization, completeness, and confidence
- Turn utilization logged per directory during investigation

Also fixes _get_child_summaries() to distinguish actual leaf directories
from parents whose children have not been investigated yet, replacing
the misleading "this is a leaf directory" placeholder.

26 new tests (260 total, all passing).

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>

											
										
										
											2026-04-12 20:21:49 -06:00
+								    # Planning pass: decide where to invest depth.
 								    if total_files < _SURVEY_MIN_FILES and total_dirs < _SURVEY_MIN_DIRS:
 								        print("  [AI] Planning skipped (small target).", file=sys.stderr)
 								        plan = _default_plan()
 								    else:
 								        plan_path = os.path.join(cache.root, "plan.json")
 								        if not fresh and os.path.exists(plan_path):
 								            try:
 								                with open(plan_path) as f:
 								                    plan = json.load(f)
 								                print("  [AI] Plan loaded from cache.", file=sys.stderr)
 								            except (OSError, json.JSONDecodeError):
 								                plan = None
 								        else:
 								            plan = None
 								        if plan is None:
 								            print("  [AI] Planning pass...", file=sys.stderr)
 								            plan = _run_planning(
 								                client, target, survey, report, all_dirs, tracker,
 								                cached_dirs=cached_dirs, verbose=verbose,
 								            )
 								            if plan is None:
 								                print("  [AI] Planning failed, using defaults.",
 								                      file=sys.stderr)
 								                plan = _default_plan()
 								            else:
 								                # Save plan to cache (#11).
 								                try:
 								                    with open(os.path.join(cache.root, "plan.json"), "w") as f:
 								                        json.dump(plan, f, indent=2)
 								                except OSError:
 								                    pass
 								    ordered, turn_map = _apply_plan(all_dirs, to_investigate, plan, target)
 								    # Log plan summary.
 								    skip_count = len(to_investigate) - len(ordered)
 								    priority_count = sum(
 for d in ordered if turn_map.get(d, _DEFAULT_TURNS) > _DEFAULT_TURNS
 								    )
 								    if skip_count or priority_count:
 								        print(
 								            f"  [AI] Plan: {priority_count} priority, "
 								            f"{skip_count} skipped, "
 								            f"{len(ordered) - priority_count} default/shallow",
 								            file=sys.stderr,
 								        )
 								    if plan.get("notes"):
 								        print(f"  [AI] Plan notes: {plan['notes']}", file=sys.stderr)
 								    total = len(ordered)
 								    turn_utilization = []
 								    for i, dir_path in enumerate(ordered, 1):
-												refactor: replace single-shot API with multi-pass agentic investigation

Rewrites ai.py from a single Claude API call into a multi-pass,
cache-driven agent architecture:

- Per-directory isolated agent loops (max 10 turns each) with context
  discarded between directories
- Leaves-first processing order so child summaries inform parents
- Disk cache (/tmp/luminos/{uuid}/) persists across runs for resumability
- Investigation ID persistence keyed by target realpath
- Separate synthesis pass reads only directory-level cache entries
- Replaces urllib with Anthropic SDK (streaming, automatic retries)
- Token counting with 70% context budget threshold for early exit
- parse_structure tool via tree-sitter (Python, JS, Rust, Go)
- python-magic integration for MIME-aware directory listings
- Cost tracking printed at end of investigation

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>

											
										
										
											2026-03-30 12:13:55 -06:00
+								        dir_rel = os.path.relpath(dir_path, target)
 								        if dir_rel == ".":
 								            dir_rel = os.path.basename(target)
-												feat(ai): implement Phase 3 investigation planning (#8, #9, #10, #11, #74)

Add a planning pass that runs after survey and before dir loops. The
planner classifies directories into priority/shallow/skip tiers and
allocates turns accordingly, replacing the fixed max_turns=14 per
directory with dynamic allocation from a global budget.

Planning pass:
- _PLANNING_SYSTEM_PROMPT in prompts.py with submit_plan tool
- _run_planning() follows the same single-turn pattern as _run_survey()
- submit_plan tool registered in new "planning" scope
- _apply_plan() pure function: band-sorted ordering (leaf-first within
  bands), turn map, skip-dir removal
- _default_plan() fallback when planning is skipped or fails
- Plan cached as plan.json for resumed runs

Dynamic turn allocation:
- Priority dirs: 15-20 turns (capped at 25)
- Shallow dirs: 5 turns
- Default: 10 turns
- Skip dirs: excluded entirely
- Orchestrator passes per-dir max_turns to _run_dir_loop()

Quality instrumentation:
- _TokenTracker._loop_turns counts API calls per dir loop
- completeness field (0.0-1.0) added to dir-scope submit_report
- plan_evaluation.json emitted after dir loops comparing plan predictions
  to actual turn utilization, completeness, and confidence
- Turn utilization logged per directory during investigation

Also fixes _get_child_summaries() to distinguish actual leaf directories
from parents whose children have not been investigated yet, replacing
the misleading "this is a leaf directory" placeholder.

26 new tests (260 total, all passing).

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>

											
										
										
											2026-04-12 20:21:49 -06:00
+								        max_turns = turn_map.get(dir_path, _DEFAULT_TURNS)
 								        print(
 								            f"  [AI] Investigating: {dir_rel}/ ({i}/{total}, "
 								            f"{max_turns} turns)",
 								            file=sys.stderr,
 								        )
-												refactor: replace single-shot API with multi-pass agentic investigation

Rewrites ai.py from a single Claude API call into a multi-pass,
cache-driven agent architecture:

- Per-directory isolated agent loops (max 10 turns each) with context
  discarded between directories
- Leaves-first processing order so child summaries inform parents
- Disk cache (/tmp/luminos/{uuid}/) persists across runs for resumability
- Investigation ID persistence keyed by target realpath
- Separate synthesis pass reads only directory-level cache entries
- Replaces urllib with Anthropic SDK (streaming, automatic retries)
- Token counting with 70% context budget threshold for early exit
- parse_structure tool via tree-sitter (Python, JS, Rust, Go)
- python-magic integration for MIME-aware directory listings
- Cost tracking printed at end of investigation

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>

											
										
										
											2026-03-30 12:13:55 -06:00
-												feat(ai): implement Phase 3 investigation planning (#8, #9, #10, #11, #74)

Add a planning pass that runs after survey and before dir loops. The
planner classifies directories into priority/shallow/skip tiers and
allocates turns accordingly, replacing the fixed max_turns=14 per
directory with dynamic allocation from a global budget.

Planning pass:
- _PLANNING_SYSTEM_PROMPT in prompts.py with submit_plan tool
- _run_planning() follows the same single-turn pattern as _run_survey()
- submit_plan tool registered in new "planning" scope
- _apply_plan() pure function: band-sorted ordering (leaf-first within
  bands), turn map, skip-dir removal
- _default_plan() fallback when planning is skipped or fails
- Plan cached as plan.json for resumed runs

Dynamic turn allocation:
- Priority dirs: 15-20 turns (capped at 25)
- Shallow dirs: 5 turns
- Default: 10 turns
- Skip dirs: excluded entirely
- Orchestrator passes per-dir max_turns to _run_dir_loop()

Quality instrumentation:
- _TokenTracker._loop_turns counts API calls per dir loop
- completeness field (0.0-1.0) added to dir-scope submit_report
- plan_evaluation.json emitted after dir loops comparing plan predictions
  to actual turn utilization, completeness, and confidence
- Turn utilization logged per directory during investigation

Also fixes _get_child_summaries() to distinguish actual leaf directories
from parents whose children have not been investigated yet, replacing
the misleading "this is a leaf directory" placeholder.

26 new tests (260 total, all passing).

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>

											
										
										
											2026-04-12 20:21:49 -06:00
+								        tracker.reset_loop()
 								        summary, completeness = _run_dir_loop(
 								            client, target, cache, tracker, dir_path,
 								            max_turns=max_turns, verbose=verbose, survey=survey,
-												refactor: replace single-shot API with multi-pass agentic investigation

Rewrites ai.py from a single Claude API call into a multi-pass,
cache-driven agent architecture:

- Per-directory isolated agent loops (max 10 turns each) with context
  discarded between directories
- Leaves-first processing order so child summaries inform parents
- Disk cache (/tmp/luminos/{uuid}/) persists across runs for resumability
- Investigation ID persistence keyed by target realpath
- Separate synthesis pass reads only directory-level cache entries
- Replaces urllib with Anthropic SDK (streaming, automatic retries)
- Token counting with 70% context budget threshold for early exit
- parse_structure tool via tree-sitter (Python, JS, Rust, Go)
- python-magic integration for MIME-aware directory listings
- Cost tracking printed at end of investigation

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>

											
										
										
											2026-03-30 12:13:55 -06:00
+								        )
-												feat(ai): implement Phase 3 investigation planning (#8, #9, #10, #11, #74)

Add a planning pass that runs after survey and before dir loops. The
planner classifies directories into priority/shallow/skip tiers and
allocates turns accordingly, replacing the fixed max_turns=14 per
directory with dynamic allocation from a global budget.

Planning pass:
- _PLANNING_SYSTEM_PROMPT in prompts.py with submit_plan tool
- _run_planning() follows the same single-turn pattern as _run_survey()
- submit_plan tool registered in new "planning" scope
- _apply_plan() pure function: band-sorted ordering (leaf-first within
  bands), turn map, skip-dir removal
- _default_plan() fallback when planning is skipped or fails
- Plan cached as plan.json for resumed runs

Dynamic turn allocation:
- Priority dirs: 15-20 turns (capped at 25)
- Shallow dirs: 5 turns
- Default: 10 turns
- Skip dirs: excluded entirely
- Orchestrator passes per-dir max_turns to _run_dir_loop()

Quality instrumentation:
- _TokenTracker._loop_turns counts API calls per dir loop
- completeness field (0.0-1.0) added to dir-scope submit_report
- plan_evaluation.json emitted after dir loops comparing plan predictions
  to actual turn utilization, completeness, and confidence
- Turn utilization logged per directory during investigation

Also fixes _get_child_summaries() to distinguish actual leaf directories
from parents whose children have not been investigated yet, replacing
the misleading "this is a leaf directory" placeholder.

26 new tests (260 total, all passing).

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>

											
										
										
											2026-04-12 20:21:49 -06:00
+								        # Track turn utilization for quality metrics (#74).
 								        turns_used = tracker._loop_turns
 								        turn_utilization.append({
 								            "dir": dir_rel,
 								            "turns_allocated": max_turns,
 								            "turns_used": turns_used,
 								            "completeness": completeness,
 								        })
-												refactor: replace single-shot API with multi-pass agentic investigation

Rewrites ai.py from a single Claude API call into a multi-pass,
cache-driven agent architecture:

- Per-directory isolated agent loops (max 10 turns each) with context
  discarded between directories
- Leaves-first processing order so child summaries inform parents
- Disk cache (/tmp/luminos/{uuid}/) persists across runs for resumability
- Investigation ID persistence keyed by target realpath
- Separate synthesis pass reads only directory-level cache entries
- Replaces urllib with Anthropic SDK (streaming, automatic retries)
- Token counting with 70% context budget threshold for early exit
- parse_structure tool via tree-sitter (Python, JS, Rust, Go)
- python-magic integration for MIME-aware directory listings
- Cost tracking printed at end of investigation

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>

											
										
										
											2026-03-30 12:13:55 -06:00
+								        if summary and not cache.has_entry("dir", dir_path):
-												feat(ai): implement Phase 3 investigation planning (#8, #9, #10, #11, #74)

Add a planning pass that runs after survey and before dir loops. The
planner classifies directories into priority/shallow/skip tiers and
allocates turns accordingly, replacing the fixed max_turns=14 per
directory with dynamic allocation from a global budget.

Planning pass:
- _PLANNING_SYSTEM_PROMPT in prompts.py with submit_plan tool
- _run_planning() follows the same single-turn pattern as _run_survey()
- submit_plan tool registered in new "planning" scope
- _apply_plan() pure function: band-sorted ordering (leaf-first within
  bands), turn map, skip-dir removal
- _default_plan() fallback when planning is skipped or fails
- Plan cached as plan.json for resumed runs

Dynamic turn allocation:
- Priority dirs: 15-20 turns (capped at 25)
- Shallow dirs: 5 turns
- Default: 10 turns
- Skip dirs: excluded entirely
- Orchestrator passes per-dir max_turns to _run_dir_loop()

Quality instrumentation:
- _TokenTracker._loop_turns counts API calls per dir loop
- completeness field (0.0-1.0) added to dir-scope submit_report
- plan_evaluation.json emitted after dir loops comparing plan predictions
  to actual turn utilization, completeness, and confidence
- Turn utilization logged per directory during investigation

Also fixes _get_child_summaries() to distinguish actual leaf directories
from parents whose children have not been investigated yet, replacing
the misleading "this is a leaf directory" placeholder.

26 new tests (260 total, all passing).

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>

											
										
										
											2026-04-12 20:21:49 -06:00
+								            entry = {
-												refactor: replace single-shot API with multi-pass agentic investigation

Rewrites ai.py from a single Claude API call into a multi-pass,
cache-driven agent architecture:

- Per-directory isolated agent loops (max 10 turns each) with context
  discarded between directories
- Leaves-first processing order so child summaries inform parents
- Disk cache (/tmp/luminos/{uuid}/) persists across runs for resumability
- Investigation ID persistence keyed by target realpath
- Separate synthesis pass reads only directory-level cache entries
- Replaces urllib with Anthropic SDK (streaming, automatic retries)
- Token counting with 70% context budget threshold for early exit
- parse_structure tool via tree-sitter (Python, JS, Rust, Go)
- python-magic integration for MIME-aware directory listings
- Cost tracking printed at end of investigation

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>

											
										
										
											2026-03-30 12:13:55 -06:00
+								                "path": dir_path,
 								                "relative_path": os.path.relpath(dir_path, target),
 								                "child_count": len([
 								                    n for n in os.listdir(dir_path)
 								                    if not n.startswith(".")
 								                ]) if os.path.isdir(dir_path) else 0,
 								                "summary": summary,
 								                "dominant_category": "unknown",
 								                "notable_files": [],
 								                "cached_at": _now_iso(),
-												feat(ai): implement Phase 3 investigation planning (#8, #9, #10, #11, #74)

Add a planning pass that runs after survey and before dir loops. The
planner classifies directories into priority/shallow/skip tiers and
allocates turns accordingly, replacing the fixed max_turns=14 per
directory with dynamic allocation from a global budget.

Planning pass:
- _PLANNING_SYSTEM_PROMPT in prompts.py with submit_plan tool
- _run_planning() follows the same single-turn pattern as _run_survey()
- submit_plan tool registered in new "planning" scope
- _apply_plan() pure function: band-sorted ordering (leaf-first within
  bands), turn map, skip-dir removal
- _default_plan() fallback when planning is skipped or fails
- Plan cached as plan.json for resumed runs

Dynamic turn allocation:
- Priority dirs: 15-20 turns (capped at 25)
- Shallow dirs: 5 turns
- Default: 10 turns
- Skip dirs: excluded entirely
- Orchestrator passes per-dir max_turns to _run_dir_loop()

Quality instrumentation:
- _TokenTracker._loop_turns counts API calls per dir loop
- completeness field (0.0-1.0) added to dir-scope submit_report
- plan_evaluation.json emitted after dir loops comparing plan predictions
  to actual turn utilization, completeness, and confidence
- Turn utilization logged per directory during investigation

Also fixes _get_child_summaries() to distinguish actual leaf directories
from parents whose children have not been investigated yet, replacing
the misleading "this is a leaf directory" placeholder.

26 new tests (260 total, all passing).

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>

											
										
										
											2026-04-12 20:21:49 -06:00
+								            }
 								            if completeness is not None:
 								                entry["completeness"] = completeness
 								            cache.write_entry("dir", dir_path, entry)
-												refactor: replace single-shot API with multi-pass agentic investigation

Rewrites ai.py from a single Claude API call into a multi-pass,
cache-driven agent architecture:

- Per-directory isolated agent loops (max 10 turns each) with context
  discarded between directories
- Leaves-first processing order so child summaries inform parents
- Disk cache (/tmp/luminos/{uuid}/) persists across runs for resumability
- Investigation ID persistence keyed by target realpath
- Separate synthesis pass reads only directory-level cache entries
- Replaces urllib with Anthropic SDK (streaming, automatic retries)
- Token counting with 70% context budget threshold for early exit
- parse_structure tool via tree-sitter (Python, JS, Rust, Go)
- python-magic integration for MIME-aware directory listings
- Cost tracking printed at end of investigation

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>

											
										
										
											2026-03-30 12:13:55 -06:00
 								    cache.update_meta(
 								        directories_investigated=total + cached_count,
 								        end_time=_now_iso(),
 								    )
-												feat(ai): implement Phase 3 investigation planning (#8, #9, #10, #11, #74)

Add a planning pass that runs after survey and before dir loops. The
planner classifies directories into priority/shallow/skip tiers and
allocates turns accordingly, replacing the fixed max_turns=14 per
directory with dynamic allocation from a global budget.

Planning pass:
- _PLANNING_SYSTEM_PROMPT in prompts.py with submit_plan tool
- _run_planning() follows the same single-turn pattern as _run_survey()
- submit_plan tool registered in new "planning" scope
- _apply_plan() pure function: band-sorted ordering (leaf-first within
  bands), turn map, skip-dir removal
- _default_plan() fallback when planning is skipped or fails
- Plan cached as plan.json for resumed runs

Dynamic turn allocation:
- Priority dirs: 15-20 turns (capped at 25)
- Shallow dirs: 5 turns
- Default: 10 turns
- Skip dirs: excluded entirely
- Orchestrator passes per-dir max_turns to _run_dir_loop()

Quality instrumentation:
- _TokenTracker._loop_turns counts API calls per dir loop
- completeness field (0.0-1.0) added to dir-scope submit_report
- plan_evaluation.json emitted after dir loops comparing plan predictions
  to actual turn utilization, completeness, and confidence
- Turn utilization logged per directory during investigation

Also fixes _get_child_summaries() to distinguish actual leaf directories
from parents whose children have not been investigated yet, replacing
the misleading "this is a leaf directory" placeholder.

26 new tests (260 total, all passing).

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>

											
										
										
											2026-04-12 20:21:49 -06:00
+								    # Emit plan evaluation (#74).
 								    _write_plan_evaluation(cache, plan, turn_utilization)
-												refactor: replace single-shot API with multi-pass agentic investigation

Rewrites ai.py from a single Claude API call into a multi-pass,
cache-driven agent architecture:

- Per-directory isolated agent loops (max 10 turns each) with context
  discarded between directories
- Leaves-first processing order so child summaries inform parents
- Disk cache (/tmp/luminos/{uuid}/) persists across runs for resumability
- Investigation ID persistence keyed by target realpath
- Separate synthesis pass reads only directory-level cache entries
- Replaces urllib with Anthropic SDK (streaming, automatic retries)
- Token counting with 70% context budget threshold for early exit
- parse_structure tool via tree-sitter (Python, JS, Rust, Go)
- python-magic integration for MIME-aware directory listings
- Cost tracking printed at end of investigation

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>

											
										
										
											2026-03-30 12:13:55 -06:00
+								    print("  [AI] Synthesis pass...", file=sys.stderr)
 								    brief, detailed = _run_synthesis(
 								        client, target, cache, tracker, verbose=verbose,
 								    )
-												feat: add chain-of-thought observability tools

Adds think, checkpoint, and flag tools for agent reasoning visibility:
- think: records observation/hypothesis/next_action before investigation
- checkpoint: summarizes learned/unknown/next_phase after file clusters
- flag: marks notable findings to flags.jsonl with severity levels

Additional changes:
- Step numbering in investigation system prompt
- Text blocks from agent now printed to stderr (step labels visible)
- flag tool available in both investigation and synthesis passes
- analyze_directory() returns (brief, detailed, flags) three-tuple
- format_flags() in report.py renders flags sorted by severity
- Per-directory max_turns increased from 10 to 14

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>

											
										
										
											2026-03-30 13:02:19 -06:00
+								    # Read flags from flags.jsonl
 								    flags = []
 								    flags_path = os.path.join(cache.root, "flags.jsonl")
 								    try:
 								        with open(flags_path) as f:
 								            for line in f:
 								                line = line.strip()
 								                if line:
 								                    flags.append(json.loads(line))
 								    except (OSError, json.JSONDecodeError):
 								        pass
-												refactor: replace single-shot API with multi-pass agentic investigation

Rewrites ai.py from a single Claude API call into a multi-pass,
cache-driven agent architecture:

- Per-directory isolated agent loops (max 10 turns each) with context
  discarded between directories
- Leaves-first processing order so child summaries inform parents
- Disk cache (/tmp/luminos/{uuid}/) persists across runs for resumability
- Investigation ID persistence keyed by target realpath
- Separate synthesis pass reads only directory-level cache entries
- Replaces urllib with Anthropic SDK (streaming, automatic retries)
- Token counting with 70% context budget threshold for early exit
- parse_structure tool via tree-sitter (Python, JS, Rust, Go)
- python-magic integration for MIME-aware directory listings
- Cost tracking printed at end of investigation

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>

											
										
										
											2026-03-30 12:13:55 -06:00
+								    print(f"  [AI] Total tokens used: {tracker.summary()}", file=sys.stderr)
-												feat: add AI-powered directory analysis via Claude API

Adds --ai flag that sends the directory tree, file categories, and
sampled file contents to Claude for analysis. Produces a brief
summary at the top of the report and a detailed breakdown at the
end. Requires ANTHROPIC_API_KEY env var; degrades gracefully without it.
Uses only stdlib (urllib) to keep the zero-dependency constraint.

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>

											
										
										
											2026-03-30 10:03:48 -06:00
-												feat: add chain-of-thought observability tools

Adds think, checkpoint, and flag tools for agent reasoning visibility:
- think: records observation/hypothesis/next_action before investigation
- checkpoint: summarizes learned/unknown/next_phase after file clusters
- flag: marks notable findings to flags.jsonl with severity levels

Additional changes:
- Step numbering in investigation system prompt
- Text blocks from agent now printed to stderr (step labels visible)
- flag tool available in both investigation and synthesis passes
- analyze_directory() returns (brief, detailed, flags) three-tuple
- format_flags() in report.py renders flags sorted by severity
- Per-directory max_turns increased from 10 to 14

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>

											
										
										
											2026-03-30 13:02:19 -06:00
+								    return brief, detailed, flags
-												feat: add AI-powered directory analysis via Claude API

Adds --ai flag that sends the directory tree, file categories, and
sampled file contents to Claude for analysis. Produces a brief
summary at the top of the report and a detailed breakdown at the
end. Requires ANTHROPIC_API_KEY env var; degrades gracefully without it.
Uses only stdlib (urllib) to keep the zero-dependency constraint.

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>

											
										
										
											2026-03-30 10:03:48 -06:00
-												refactor: replace single-shot API with multi-pass agentic investigation

Rewrites ai.py from a single Claude API call into a multi-pass,
cache-driven agent architecture:

- Per-directory isolated agent loops (max 10 turns each) with context
  discarded between directories
- Leaves-first processing order so child summaries inform parents
- Disk cache (/tmp/luminos/{uuid}/) persists across runs for resumability
- Investigation ID persistence keyed by target realpath
- Separate synthesis pass reads only directory-level cache entries
- Replaces urllib with Anthropic SDK (streaming, automatic retries)
- Token counting with 70% context budget threshold for early exit
- parse_structure tool via tree-sitter (Python, JS, Rust, Go)
- python-magic integration for MIME-aware directory listings
- Cost tracking printed at end of investigation

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>

											
										
										
											2026-03-30 12:13:55 -06:00
+								# ---------------------------------------------------------------------------
 								# Public interface
 								# ---------------------------------------------------------------------------
-												feat: add -x/--exclude flag to exclude directories from scan and AI analysis

											
										
										
											2026-04-06 14:32:12 -06:00
+								def analyze_directory(report, target, verbose_tools=False, fresh=False,
 								                      exclude=None):
-												feat: add chain-of-thought observability tools

Adds think, checkpoint, and flag tools for agent reasoning visibility:
- think: records observation/hypothesis/next_action before investigation
- checkpoint: summarizes learned/unknown/next_phase after file clusters
- flag: marks notable findings to flags.jsonl with severity levels

Additional changes:
- Step numbering in investigation system prompt
- Text blocks from agent now printed to stderr (step labels visible)
- flag tool available in both investigation and synthesis passes
- analyze_directory() returns (brief, detailed, flags) three-tuple
- format_flags() in report.py renders flags sorted by severity
- Per-directory max_turns increased from 10 to 14

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>

											
										
										
											2026-03-30 13:02:19 -06:00
+								    """Run AI analysis on the directory. Returns (brief, detailed, flags).
-												feat: add AI-powered directory analysis via Claude API

Adds --ai flag that sends the directory tree, file categories, and
sampled file contents to Claude for analysis. Produces a brief
summary at the top of the report and a detailed breakdown at the
end. Requires ANTHROPIC_API_KEY env var; degrades gracefully without it.
Uses only stdlib (urllib) to keep the zero-dependency constraint.

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>

											
										
										
											2026-03-30 10:03:48 -06:00
-												feat: AI investigation is the product, drop zero-dep constraint (#64)

Two original design constraints are dropped:

1. Zero-dependency Python CLI is no longer a goal. Luminos installs from
   requirements.txt like a normal Python project.
2. AI investigation is the headline. The base scan becomes the agent's
   first input pass, not a standalone product. There is no --ai flag and
   no --no-ai mode. AI runs unconditionally on every invocation.

Watch mode is deleted as part of the same change because a non-AI
filesystem-churn monitor conflicts with the new philosophy. If a live
update mode is wanted later, it gets rebuilt as incremental AI
re-investigation.

Code:
- Delete luminos_lib/watch.py
- Delete luminos_lib/capabilities.py and tests/test_capabilities.py
- Move clear_cache() into luminos_lib/cache.py
- luminos.py: remove --watch, --ai, --install-extras flags. AI runs
  unconditionally after the base scan. If ANTHROPIC_API_KEY is unset,
  exit 0 with a one-line hint before running the base scan.
- ai.py: drop the check_ai_dependencies() call and import.
- New requirements.txt: anthropic, tree-sitter + grammars, python-magic.
- setup_env.sh installs from requirements.txt.

Docs:
- README.md rewritten to lead with AI investigation, drops the two-modes
  framing and the watch feature line.
- CLAUDE.md (project): rewrites Key Constraints, updates module map and
  Running Luminos commands.
- PLAN.md: strips zero-dep philosophy from the file map and reframes the
  watch+incremental note as a future live-mode feature.

Tests: 164 pass (down from 168 with the 4 removed capabilities tests).

											
										
										
											2026-04-11 09:43:47 -06:00
+								    Returns ("", "", []) if the API key is missing.
-												feat: add AI-powered directory analysis via Claude API

Adds --ai flag that sends the directory tree, file categories, and
sampled file contents to Claude for analysis. Produces a brief
summary at the top of the report and a detailed breakdown at the
end. Requires ANTHROPIC_API_KEY env var; degrades gracefully without it.
Uses only stdlib (urllib) to keep the zero-dependency constraint.

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>

											
										
										
											2026-03-30 10:03:48 -06:00
+								    """
 								    api_key = _get_api_key()
 								    if not api_key:
-												feat: add chain-of-thought observability tools

Adds think, checkpoint, and flag tools for agent reasoning visibility:
- think: records observation/hypothesis/next_action before investigation
- checkpoint: summarizes learned/unknown/next_phase after file clusters
- flag: marks notable findings to flags.jsonl with severity levels

Additional changes:
- Step numbering in investigation system prompt
- Text blocks from agent now printed to stderr (step labels visible)
- flag tool available in both investigation and synthesis passes
- analyze_directory() returns (brief, detailed, flags) three-tuple
- format_flags() in report.py renders flags sorted by severity
- Per-directory max_turns increased from 10 to 14

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>

											
										
										
											2026-03-30 13:02:19 -06:00
+								        return "", "", []
-												feat: add AI-powered directory analysis via Claude API

Adds --ai flag that sends the directory tree, file categories, and
sampled file contents to Claude for analysis. Produces a brief
summary at the top of the report and a detailed breakdown at the
end. Requires ANTHROPIC_API_KEY env var; degrades gracefully without it.
Uses only stdlib (urllib) to keep the zero-dependency constraint.

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>

											
										
										
											2026-03-30 10:03:48 -06:00
-												refactor: replace single-shot API with multi-pass agentic investigation

Rewrites ai.py from a single Claude API call into a multi-pass,
cache-driven agent architecture:

- Per-directory isolated agent loops (max 10 turns each) with context
  discarded between directories
- Leaves-first processing order so child summaries inform parents
- Disk cache (/tmp/luminos/{uuid}/) persists across runs for resumability
- Investigation ID persistence keyed by target realpath
- Separate synthesis pass reads only directory-level cache entries
- Replaces urllib with Anthropic SDK (streaming, automatic retries)
- Token counting with 70% context budget threshold for early exit
- parse_structure tool via tree-sitter (Python, JS, Rust, Go)
- python-magic integration for MIME-aware directory listings
- Cost tracking printed at end of investigation

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>

											
										
										
											2026-03-30 12:13:55 -06:00
+								    print("  [AI] Starting multi-pass investigation...", file=sys.stderr)
 								    client = anthropic.Anthropic(api_key=api_key)
 								    try:
-												feat: add chain-of-thought observability tools

Adds think, checkpoint, and flag tools for agent reasoning visibility:
- think: records observation/hypothesis/next_action before investigation
- checkpoint: summarizes learned/unknown/next_phase after file clusters
- flag: marks notable findings to flags.jsonl with severity levels

Additional changes:
- Step numbering in investigation system prompt
- Text blocks from agent now printed to stderr (step labels visible)
- flag tool available in both investigation and synthesis passes
- analyze_directory() returns (brief, detailed, flags) three-tuple
- format_flags() in report.py renders flags sorted by severity
- Per-directory max_turns increased from 10 to 14

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>

											
										
										
											2026-03-30 13:02:19 -06:00
+								        brief, detailed, flags = _run_investigation(
-												refactor: replace single-shot API with multi-pass agentic investigation

Rewrites ai.py from a single Claude API call into a multi-pass,
cache-driven agent architecture:

- Per-directory isolated agent loops (max 10 turns each) with context
  discarded between directories
- Leaves-first processing order so child summaries inform parents
- Disk cache (/tmp/luminos/{uuid}/) persists across runs for resumability
- Investigation ID persistence keyed by target realpath
- Separate synthesis pass reads only directory-level cache entries
- Replaces urllib with Anthropic SDK (streaming, automatic retries)
- Token counting with 70% context budget threshold for early exit
- parse_structure tool via tree-sitter (Python, JS, Rust, Go)
- python-magic integration for MIME-aware directory listings
- Cost tracking printed at end of investigation

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>

											
										
										
											2026-03-30 12:13:55 -06:00
+								            client, target, report, fresh=fresh, verbose=verbose_tools,
-												feat: add -x/--exclude flag to exclude directories from scan and AI analysis

											
										
										
											2026-04-06 14:32:12 -06:00
+								            exclude=exclude,
-												refactor: replace single-shot API with multi-pass agentic investigation

Rewrites ai.py from a single Claude API call into a multi-pass,
cache-driven agent architecture:

- Per-directory isolated agent loops (max 10 turns each) with context
  discarded between directories
- Leaves-first processing order so child summaries inform parents
- Disk cache (/tmp/luminos/{uuid}/) persists across runs for resumability
- Investigation ID persistence keyed by target realpath
- Separate synthesis pass reads only directory-level cache entries
- Replaces urllib with Anthropic SDK (streaming, automatic retries)
- Token counting with 70% context budget threshold for early exit
- parse_structure tool via tree-sitter (Python, JS, Rust, Go)
- python-magic integration for MIME-aware directory listings
- Cost tracking printed at end of investigation

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>

											
										
										
											2026-03-30 12:13:55 -06:00
+								        )
 								    except Exception as e:
 								        print(f"Warning: AI analysis failed: {e}", file=sys.stderr)
-												feat: add chain-of-thought observability tools

Adds think, checkpoint, and flag tools for agent reasoning visibility:
- think: records observation/hypothesis/next_action before investigation
- checkpoint: summarizes learned/unknown/next_phase after file clusters
- flag: marks notable findings to flags.jsonl with severity levels

Additional changes:
- Step numbering in investigation system prompt
- Text blocks from agent now printed to stderr (step labels visible)
- flag tool available in both investigation and synthesis passes
- analyze_directory() returns (brief, detailed, flags) three-tuple
- format_flags() in report.py renders flags sorted by severity
- Per-directory max_turns increased from 10 to 14

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>

											
										
										
											2026-03-30 13:02:19 -06:00
+								        return "", "", []
-												feat: add AI-powered directory analysis via Claude API

Adds --ai flag that sends the directory tree, file categories, and
sampled file contents to Claude for analysis. Produces a brief
summary at the top of the report and a detailed breakdown at the
end. Requires ANTHROPIC_API_KEY env var; degrades gracefully without it.
Uses only stdlib (urllib) to keep the zero-dependency constraint.

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>

											
										
										
											2026-03-30 10:03:48 -06:00
-												refactor: replace single-shot API with multi-pass agentic investigation

Rewrites ai.py from a single Claude API call into a multi-pass,
cache-driven agent architecture:

- Per-directory isolated agent loops (max 10 turns each) with context
  discarded between directories
- Leaves-first processing order so child summaries inform parents
- Disk cache (/tmp/luminos/{uuid}/) persists across runs for resumability
- Investigation ID persistence keyed by target realpath
- Separate synthesis pass reads only directory-level cache entries
- Replaces urllib with Anthropic SDK (streaming, automatic retries)
- Token counting with 70% context budget threshold for early exit
- parse_structure tool via tree-sitter (Python, JS, Rust, Go)
- python-magic integration for MIME-aware directory listings
- Cost tracking printed at end of investigation

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>

											
										
										
											2026-03-30 12:13:55 -06:00
+								    if not brief and not detailed:
 								        print("  [AI] Warning: agent produced no output.", file=sys.stderr)
 								    print("  [AI] Investigation complete.", file=sys.stderr)
-												feat: add chain-of-thought observability tools

Adds think, checkpoint, and flag tools for agent reasoning visibility:
- think: records observation/hypothesis/next_action before investigation
- checkpoint: summarizes learned/unknown/next_phase after file clusters
- flag: marks notable findings to flags.jsonl with severity levels

Additional changes:
- Step numbering in investigation system prompt
- Text blocks from agent now printed to stderr (step labels visible)
- flag tool available in both investigation and synthesis passes
- analyze_directory() returns (brief, detailed, flags) three-tuple
- format_flags() in report.py renders flags sorted by severity
- Per-directory max_turns increased from 10 to 14

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>

											
										
										
											2026-03-30 13:02:19 -06:00
+								    return brief, detailed, flags