feat: add chain-of-thought observability tools

Adds think, checkpoint, and flag tools for agent reasoning visibility: - think: records observation/hypothesis/next_action before investigation - checkpoint: summarizes learned/unknown/next_phase after file clusters - flag: marks notable findings to flags.jsonl with severity levels Additional changes: - Step numbering in investigation system prompt - Text blocks from agent now printed to stderr (step labels visible) - flag tool available in both investigation and synthesis passes - analyze_directory() returns (brief, detailed, flags) three-tuple - format_flags() in report.py renders flags sorted by severity - Per-directory max_turns increased from 10 to 14 Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
2026-03-30 13:02:19 -06:00 · 2026-03-30 13:02:19 -06:00 · f324648c10
commit f324648c10
parent dd58a4fd3a
3 changed files with 238 additions and 12 deletions
--- a/luminos.py
+++ b/luminos.py
@ -99,16 +99,18 @@ def main():

    report = scan(target, depth=args.depth, show_hidden=args.all)

+    flags = []
    if args.ai:
        from luminos_lib.ai import analyze_directory
-        brief, detailed = analyze_directory(report, target, fresh=args.fresh)
+        brief, detailed, flags = analyze_directory(report, target, fresh=args.fresh)
        report["ai_brief"] = brief
        report["ai_detailed"] = detailed
+        report["flags"] = flags

    if args.json_output:
        output = json.dumps(report, indent=2, default=str)
    else:
-        output = format_report(report, target)
+        output = format_report(report, target, flags=flags)

    if args.output:
        try:
--- a/luminos_lib/ai.py
+++ b/luminos_lib/ai.py
@ -435,6 +435,85 @@ _DIR_TOOLS = [
            "required": ["cache_type", "path", "data"],
        },
    },
+    {
+        "name": "think",
+        "description": (
+            "Record your reasoning before choosing which file or directory "
+            "to investigate next. Call this when deciding what to look at "
+            "— not before every individual tool call."
+        ),
+        "input_schema": {
+            "type": "object",
+            "properties": {
+                "observation": {
+                    "type": "string",
+                    "description": "What you have observed so far.",
+                },
+                "hypothesis": {
+                    "type": "string",
+                    "description": "Your hypothesis about the directory.",
+                },
+                "next_action": {
+                    "type": "string",
+                    "description": "What you plan to investigate next and why.",
+                },
+            },
+            "required": ["observation", "hypothesis", "next_action"],
+        },
+    },
+    {
+        "name": "checkpoint",
+        "description": (
+            "Summarize what you have learned so far about this directory "
+            "and what you still need to determine. Call this after completing "
+            "a significant cluster of files — not after every file."
+        ),
+        "input_schema": {
+            "type": "object",
+            "properties": {
+                "learned": {
+                    "type": "string",
+                    "description": "What you have learned so far.",
+                },
+                "still_unknown": {
+                    "type": "string",
+                    "description": "What you still need to determine.",
+                },
+                "next_phase": {
+                    "type": "string",
+                    "description": "What you will investigate next.",
+                },
+            },
+            "required": ["learned", "still_unknown", "next_phase"],
+        },
+    },
+    {
+        "name": "flag",
+        "description": (
+            "Mark a file, directory, or finding as notable or anomalous. "
+            "Call this immediately when you discover something surprising, "
+            "concerning, or important — do not save it for the report."
+        ),
+        "input_schema": {
+            "type": "object",
+            "properties": {
+                "path": {
+                    "type": "string",
+                    "description": "Relative path, or 'general'.",
+                },
+                "finding": {
+                    "type": "string",
+                    "description": "What you found.",
+                },
+                "severity": {
+                    "type": "string",
+                    "enum": ["info", "concern", "critical"],
+                    "description": "info | concern | critical",
+                },
+            },
+            "required": ["path", "finding", "severity"],
+        },
+    },
    {
        "name": "submit_report",
        "description": (
@ -486,6 +565,33 @@ _SYNTHESIS_TOOLS = [
            "required": ["cache_type"],
        },
    },
+    {
+        "name": "flag",
+        "description": (
+            "Mark a file, directory, or finding as notable or anomalous. "
+            "Call this immediately when you discover something surprising, "
+            "concerning, or important — do not save it for the report."
+        ),
+        "input_schema": {
+            "type": "object",
+            "properties": {
+                "path": {
+                    "type": "string",
+                    "description": "Relative path, or 'general'.",
+                },
+                "finding": {
+                    "type": "string",
+                    "description": "What you found.",
+                },
+                "severity": {
+                    "type": "string",
+                    "enum": ["info", "concern", "critical"],
+                    "description": "info | concern | critical",
+                },
+            },
+            "required": ["path", "finding", "severity"],
+        },
+    },
    {
        "name": "submit_report",
        "description": "Submit the final analysis report.",
@ -806,6 +912,44 @@ def _tool_list_cache(args, _target, cache):
    return "\n".join(paths)


+def _tool_think(args, _target, _cache):
+    obs = args.get("observation", "")
+    hyp = args.get("hypothesis", "")
+    nxt = args.get("next_action", "")
+    print(f"  [AI] THINK", file=sys.stderr)
+    print(f"       observation: {obs}", file=sys.stderr)
+    print(f"       hypothesis:  {hyp}", file=sys.stderr)
+    print(f"       next_action: {nxt}", file=sys.stderr)
+    return "ok"
+
+
+def _tool_checkpoint(args, _target, _cache):
+    learned = args.get("learned", "")
+    unknown = args.get("still_unknown", "")
+    phase = args.get("next_phase", "")
+    print(f"  [AI] CHECKPOINT", file=sys.stderr)
+    print(f"       learned:       {learned}", file=sys.stderr)
+    print(f"       still_unknown: {unknown}", file=sys.stderr)
+    print(f"       next_phase:    {phase}", file=sys.stderr)
+    return "ok"
+
+
+def _tool_flag(args, _target, cache):
+    path = args.get("path", "general")
+    finding = args.get("finding", "")
+    severity = args.get("severity", "info")
+    print(f"  [AI] FLAG [{severity.upper()}] {path}", file=sys.stderr)
+    print(f"       {finding}", file=sys.stderr)
+    flags_path = os.path.join(cache.root, "flags.jsonl")
+    entry = {"path": path, "finding": finding, "severity": severity}
+    try:
+        with open(flags_path, "a") as f:
+            f.write(json.dumps(entry) + "\n")
+    except OSError:
+        pass
+    return "ok"
+
+
 _TOOL_DISPATCH = {
    "read_file": _tool_read_file,
    "list_directory": _tool_list_directory,
@ -814,6 +958,9 @@ _TOOL_DISPATCH = {
    "write_cache": _tool_write_cache,
    "read_cache": _tool_read_cache,
    "list_cache": _tool_list_cache,
+    "think": _tool_think,
+    "checkpoint": _tool_checkpoint,
+    "flag": _tool_flag,
 }


@ -915,6 +1062,26 @@ understand content that structure cannot capture (comments, documentation, \
 data files, config values). A file where structure and content appear to \
 contradict each other is always worth reading in full.

+Use the think tool when choosing which file or directory to investigate \
+next — before starting a new file or switching investigation direction. \
+Do NOT call think before every individual tool call in a sequence.
+
+Use the checkpoint tool after completing investigation of a meaningful \
+cluster of files. Not after every file — once or twice per directory \
+loop at most.
+
+Use the flag tool immediately when you find something notable, \
+surprising, or concerning. Severity guide:
+  info     = interesting but not problematic
+  concern  = worth addressing
+  critical = likely broken or dangerous
+
+## Step Numbering
+Number your investigation steps as you go. Before starting each new \
+file cluster or phase transition, output:
+Step N: <what you are doing and why>
+Output this as plain text before tool calls, not as a tool call itself.
+
 ## Efficiency Rules
 - Batch multiple tool calls in a single turn whenever possible
 - Skip binary/compiled/generated files (.pyc, .class, .o, .min.js, etc.)
@ -979,7 +1146,7 @@ def _get_child_summaries(dir_path, cache):
    return "\n".join(parts) if parts else "(none — this is a leaf directory)"


-def _run_dir_loop(client, target, cache, tracker, dir_path, max_turns=10,
+def _run_dir_loop(client, target, cache, tracker, dir_path, max_turns=14,
                  verbose=False):
    """Run an isolated agent loop for a single directory."""
    dir_rel = os.path.relpath(dir_path, target)
@ -1076,6 +1243,12 @@ def _run_dir_loop(client, target, cache, tracker, dir_path, max_turns=10,
            print(f"  [AI]   API error: {e}", file=sys.stderr)
            break

+        # Print text blocks (step numbering, reasoning) to stderr
+        for b in content_blocks:
+            if b.type == "text" and b.text.strip():
+                for line in b.text.strip().split("\n"):
+                    print(f"  [AI]   {line}", file=sys.stderr)
+
        # Print tool decisions now that we have the full response
        tool_uses = [b for b in content_blocks if b.type == "tool_use"]
        for tu in tool_uses:
@ -1215,6 +1388,12 @@ def _run_synthesis(client, target, cache, tracker, max_turns=5, verbose=False):
            print(f"  [AI]   API error: {e}", file=sys.stderr)
            break

+        # Print text blocks to stderr
+        for b in content_blocks:
+            if b.type == "text" and b.text.strip():
+                for line in b.text.strip().split("\n"):
+                    print(f"  [AI]   {line}", file=sys.stderr)
+
        tool_uses = [b for b in content_blocks if b.type == "tool_use"]
        for tu in tool_uses:
            arg_summary = ", ".join(
@ -1294,7 +1473,7 @@ def _synthesize_from_cache(cache):

 def _run_investigation(client, target, report, show_hidden=False,
                       fresh=False, verbose=False):
-    """Orchestrate the multi-pass investigation. Returns (brief, detailed)."""
+    """Orchestrate the multi-pass investigation. Returns (brief, detailed, flags)."""
    investigation_id, is_new = _get_investigation_id(target, fresh=fresh)
    cache = _CacheManager(investigation_id, target)
    tracker = _TokenTracker()
@ -1358,9 +1537,21 @@ def _run_investigation(client, target, report, show_hidden=False,
        client, target, cache, tracker, verbose=verbose,
    )

+    # Read flags from flags.jsonl
+    flags = []
+    flags_path = os.path.join(cache.root, "flags.jsonl")
+    try:
+        with open(flags_path) as f:
+            for line in f:
+                line = line.strip()
+                if line:
+                    flags.append(json.loads(line))
+    except (OSError, json.JSONDecodeError):
+        pass
+
    print(f"  [AI] Total tokens used: {tracker.summary()}", file=sys.stderr)

-    return brief, detailed
+    return brief, detailed, flags


 # ---------------------------------------------------------------------------
@ -1383,31 +1574,31 @@ def clear_cache():
 # ---------------------------------------------------------------------------

 def analyze_directory(report, target, verbose_tools=False, fresh=False):
-    """Run AI analysis on the directory. Returns (brief, detailed) strings.
+    """Run AI analysis on the directory. Returns (brief, detailed, flags).

-    Returns ("", "") if the API key is missing or dependencies are not met.
+    Returns ("", "", []) if the API key is missing or dependencies are not met.
    """
    if not check_ai_dependencies():
        sys.exit(1)

    api_key = _get_api_key()
    if not api_key:
-        return "", ""
+        return "", "", []

    print("  [AI] Starting multi-pass investigation...", file=sys.stderr)

    client = anthropic.Anthropic(api_key=api_key)

    try:
-        brief, detailed = _run_investigation(
+        brief, detailed, flags = _run_investigation(
            client, target, report, fresh=fresh, verbose=verbose_tools,
        )
    except Exception as e:
        print(f"Warning: AI analysis failed: {e}", file=sys.stderr)
-        return "", ""
+        return "", "", []

    if not brief and not detailed:
        print("  [AI] Warning: agent produced no output.", file=sys.stderr)

    print("  [AI] Investigation complete.", file=sys.stderr)
-    return brief, detailed
+    return brief, detailed, flags
--- a/luminos_lib/report.py
+++ b/luminos_lib/report.py
@ -1,7 +1,35 @@
 """Report formatting — human-readable terminal output."""


-def format_report(report, target):
+_SEVERITY_ORDER = {"critical": 0, "concern": 1, "info": 2}
+
+
+def format_flags(flags):
+    """Format a list of flag dicts as a human-readable string.
+
+    Returns empty string if flags is empty.
+    """
+    if not flags:
+        return ""
+
+    sorted_flags = sorted(flags, key=lambda f: _SEVERITY_ORDER.get(
+        f.get("severity", "info"), 99))
+
+    lines = []
+    lines.append("")
+    lines.append(">> FLAGS")
+    lines.append("-" * 40)
+    for f in sorted_flags:
+        severity = f.get("severity", "info").upper()
+        path = f.get("path", "general")
+        finding = f.get("finding", "")
+        lines.append(f"  [{severity:<8s}] {path}")
+        lines.append(f"             {finding}")
+
+    return "\n".join(lines)
+
+
+def format_report(report, target, flags=None):
    """Format the full report as a human-readable string."""
    sep = "=" * 60
    lines = []
@ -96,6 +124,11 @@ def format_report(report, target):
        for paragraph in ai_detailed.split("\n"):
            lines.append(f"  {paragraph}")

+    # Flags
+    flags_text = format_flags(flags or [])
+    if flags_text:
+        lines.append(flags_text)
+
    lines.append("")
    lines.append(sep)
    lines.append("  End of report.")