feat: add chain-of-thought observability tools

Adds think, checkpoint, and flag tools for agent reasoning visibility:
- think: records observation/hypothesis/next_action before investigation
- checkpoint: summarizes learned/unknown/next_phase after file clusters
- flag: marks notable findings to flags.jsonl with severity levels

Additional changes:
- Step numbering in investigation system prompt
- Text blocks from agent now printed to stderr (step labels visible)
- flag tool available in both investigation and synthesis passes
- analyze_directory() returns (brief, detailed, flags) three-tuple
- format_flags() in report.py renders flags sorted by severity
- Per-directory max_turns increased from 10 to 14

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
Jeff Smith 2026-03-30 13:02:19 -06:00
parent dd58a4fd3a
commit f324648c10
3 changed files with 238 additions and 12 deletions

View file

@ -99,16 +99,18 @@ def main():
report = scan(target, depth=args.depth, show_hidden=args.all)
flags = []
if args.ai:
from luminos_lib.ai import analyze_directory
brief, detailed = analyze_directory(report, target, fresh=args.fresh)
brief, detailed, flags = analyze_directory(report, target, fresh=args.fresh)
report["ai_brief"] = brief
report["ai_detailed"] = detailed
report["flags"] = flags
if args.json_output:
output = json.dumps(report, indent=2, default=str)
else:
output = format_report(report, target)
output = format_report(report, target, flags=flags)
if args.output:
try:

View file

@ -435,6 +435,85 @@ _DIR_TOOLS = [
"required": ["cache_type", "path", "data"],
},
},
{
"name": "think",
"description": (
"Record your reasoning before choosing which file or directory "
"to investigate next. Call this when deciding what to look at "
"— not before every individual tool call."
),
"input_schema": {
"type": "object",
"properties": {
"observation": {
"type": "string",
"description": "What you have observed so far.",
},
"hypothesis": {
"type": "string",
"description": "Your hypothesis about the directory.",
},
"next_action": {
"type": "string",
"description": "What you plan to investigate next and why.",
},
},
"required": ["observation", "hypothesis", "next_action"],
},
},
{
"name": "checkpoint",
"description": (
"Summarize what you have learned so far about this directory "
"and what you still need to determine. Call this after completing "
"a significant cluster of files — not after every file."
),
"input_schema": {
"type": "object",
"properties": {
"learned": {
"type": "string",
"description": "What you have learned so far.",
},
"still_unknown": {
"type": "string",
"description": "What you still need to determine.",
},
"next_phase": {
"type": "string",
"description": "What you will investigate next.",
},
},
"required": ["learned", "still_unknown", "next_phase"],
},
},
{
"name": "flag",
"description": (
"Mark a file, directory, or finding as notable or anomalous. "
"Call this immediately when you discover something surprising, "
"concerning, or important — do not save it for the report."
),
"input_schema": {
"type": "object",
"properties": {
"path": {
"type": "string",
"description": "Relative path, or 'general'.",
},
"finding": {
"type": "string",
"description": "What you found.",
},
"severity": {
"type": "string",
"enum": ["info", "concern", "critical"],
"description": "info | concern | critical",
},
},
"required": ["path", "finding", "severity"],
},
},
{
"name": "submit_report",
"description": (
@ -486,6 +565,33 @@ _SYNTHESIS_TOOLS = [
"required": ["cache_type"],
},
},
{
"name": "flag",
"description": (
"Mark a file, directory, or finding as notable or anomalous. "
"Call this immediately when you discover something surprising, "
"concerning, or important — do not save it for the report."
),
"input_schema": {
"type": "object",
"properties": {
"path": {
"type": "string",
"description": "Relative path, or 'general'.",
},
"finding": {
"type": "string",
"description": "What you found.",
},
"severity": {
"type": "string",
"enum": ["info", "concern", "critical"],
"description": "info | concern | critical",
},
},
"required": ["path", "finding", "severity"],
},
},
{
"name": "submit_report",
"description": "Submit the final analysis report.",
@ -806,6 +912,44 @@ def _tool_list_cache(args, _target, cache):
return "\n".join(paths)
def _tool_think(args, _target, _cache):
obs = args.get("observation", "")
hyp = args.get("hypothesis", "")
nxt = args.get("next_action", "")
print(f" [AI] THINK", file=sys.stderr)
print(f" observation: {obs}", file=sys.stderr)
print(f" hypothesis: {hyp}", file=sys.stderr)
print(f" next_action: {nxt}", file=sys.stderr)
return "ok"
def _tool_checkpoint(args, _target, _cache):
learned = args.get("learned", "")
unknown = args.get("still_unknown", "")
phase = args.get("next_phase", "")
print(f" [AI] CHECKPOINT", file=sys.stderr)
print(f" learned: {learned}", file=sys.stderr)
print(f" still_unknown: {unknown}", file=sys.stderr)
print(f" next_phase: {phase}", file=sys.stderr)
return "ok"
def _tool_flag(args, _target, cache):
path = args.get("path", "general")
finding = args.get("finding", "")
severity = args.get("severity", "info")
print(f" [AI] FLAG [{severity.upper()}] {path}", file=sys.stderr)
print(f" {finding}", file=sys.stderr)
flags_path = os.path.join(cache.root, "flags.jsonl")
entry = {"path": path, "finding": finding, "severity": severity}
try:
with open(flags_path, "a") as f:
f.write(json.dumps(entry) + "\n")
except OSError:
pass
return "ok"
_TOOL_DISPATCH = {
"read_file": _tool_read_file,
"list_directory": _tool_list_directory,
@ -814,6 +958,9 @@ _TOOL_DISPATCH = {
"write_cache": _tool_write_cache,
"read_cache": _tool_read_cache,
"list_cache": _tool_list_cache,
"think": _tool_think,
"checkpoint": _tool_checkpoint,
"flag": _tool_flag,
}
@ -915,6 +1062,26 @@ understand content that structure cannot capture (comments, documentation, \
data files, config values). A file where structure and content appear to \
contradict each other is always worth reading in full.
Use the think tool when choosing which file or directory to investigate \
next before starting a new file or switching investigation direction. \
Do NOT call think before every individual tool call in a sequence.
Use the checkpoint tool after completing investigation of a meaningful \
cluster of files. Not after every file once or twice per directory \
loop at most.
Use the flag tool immediately when you find something notable, \
surprising, or concerning. Severity guide:
info = interesting but not problematic
concern = worth addressing
critical = likely broken or dangerous
## Step Numbering
Number your investigation steps as you go. Before starting each new \
file cluster or phase transition, output:
Step N: <what you are doing and why>
Output this as plain text before tool calls, not as a tool call itself.
## Efficiency Rules
- Batch multiple tool calls in a single turn whenever possible
- Skip binary/compiled/generated files (.pyc, .class, .o, .min.js, etc.)
@ -979,7 +1146,7 @@ def _get_child_summaries(dir_path, cache):
return "\n".join(parts) if parts else "(none — this is a leaf directory)"
def _run_dir_loop(client, target, cache, tracker, dir_path, max_turns=10,
def _run_dir_loop(client, target, cache, tracker, dir_path, max_turns=14,
verbose=False):
"""Run an isolated agent loop for a single directory."""
dir_rel = os.path.relpath(dir_path, target)
@ -1076,6 +1243,12 @@ def _run_dir_loop(client, target, cache, tracker, dir_path, max_turns=10,
print(f" [AI] API error: {e}", file=sys.stderr)
break
# Print text blocks (step numbering, reasoning) to stderr
for b in content_blocks:
if b.type == "text" and b.text.strip():
for line in b.text.strip().split("\n"):
print(f" [AI] {line}", file=sys.stderr)
# Print tool decisions now that we have the full response
tool_uses = [b for b in content_blocks if b.type == "tool_use"]
for tu in tool_uses:
@ -1215,6 +1388,12 @@ def _run_synthesis(client, target, cache, tracker, max_turns=5, verbose=False):
print(f" [AI] API error: {e}", file=sys.stderr)
break
# Print text blocks to stderr
for b in content_blocks:
if b.type == "text" and b.text.strip():
for line in b.text.strip().split("\n"):
print(f" [AI] {line}", file=sys.stderr)
tool_uses = [b for b in content_blocks if b.type == "tool_use"]
for tu in tool_uses:
arg_summary = ", ".join(
@ -1294,7 +1473,7 @@ def _synthesize_from_cache(cache):
def _run_investigation(client, target, report, show_hidden=False,
fresh=False, verbose=False):
"""Orchestrate the multi-pass investigation. Returns (brief, detailed)."""
"""Orchestrate the multi-pass investigation. Returns (brief, detailed, flags)."""
investigation_id, is_new = _get_investigation_id(target, fresh=fresh)
cache = _CacheManager(investigation_id, target)
tracker = _TokenTracker()
@ -1358,9 +1537,21 @@ def _run_investigation(client, target, report, show_hidden=False,
client, target, cache, tracker, verbose=verbose,
)
# Read flags from flags.jsonl
flags = []
flags_path = os.path.join(cache.root, "flags.jsonl")
try:
with open(flags_path) as f:
for line in f:
line = line.strip()
if line:
flags.append(json.loads(line))
except (OSError, json.JSONDecodeError):
pass
print(f" [AI] Total tokens used: {tracker.summary()}", file=sys.stderr)
return brief, detailed
return brief, detailed, flags
# ---------------------------------------------------------------------------
@ -1383,31 +1574,31 @@ def clear_cache():
# ---------------------------------------------------------------------------
def analyze_directory(report, target, verbose_tools=False, fresh=False):
"""Run AI analysis on the directory. Returns (brief, detailed) strings.
"""Run AI analysis on the directory. Returns (brief, detailed, flags).
Returns ("", "") if the API key is missing or dependencies are not met.
Returns ("", "", []) if the API key is missing or dependencies are not met.
"""
if not check_ai_dependencies():
sys.exit(1)
api_key = _get_api_key()
if not api_key:
return "", ""
return "", "", []
print(" [AI] Starting multi-pass investigation...", file=sys.stderr)
client = anthropic.Anthropic(api_key=api_key)
try:
brief, detailed = _run_investigation(
brief, detailed, flags = _run_investigation(
client, target, report, fresh=fresh, verbose=verbose_tools,
)
except Exception as e:
print(f"Warning: AI analysis failed: {e}", file=sys.stderr)
return "", ""
return "", "", []
if not brief and not detailed:
print(" [AI] Warning: agent produced no output.", file=sys.stderr)
print(" [AI] Investigation complete.", file=sys.stderr)
return brief, detailed
return brief, detailed, flags

View file

@ -1,7 +1,35 @@
"""Report formatting — human-readable terminal output."""
def format_report(report, target):
_SEVERITY_ORDER = {"critical": 0, "concern": 1, "info": 2}
def format_flags(flags):
"""Format a list of flag dicts as a human-readable string.
Returns empty string if flags is empty.
"""
if not flags:
return ""
sorted_flags = sorted(flags, key=lambda f: _SEVERITY_ORDER.get(
f.get("severity", "info"), 99))
lines = []
lines.append("")
lines.append(">> FLAGS")
lines.append("-" * 40)
for f in sorted_flags:
severity = f.get("severity", "info").upper()
path = f.get("path", "general")
finding = f.get("finding", "")
lines.append(f" [{severity:<8s}] {path}")
lines.append(f" {finding}")
return "\n".join(lines)
def format_report(report, target, flags=None):
"""Format the full report as a human-readable string."""
sep = "=" * 60
lines = []
@ -96,6 +124,11 @@ def format_report(report, target):
for paragraph in ai_detailed.split("\n"):
lines.append(f" {paragraph}")
# Flags
flags_text = format_flags(flags or [])
if flags_text:
lines.append(flags_text)
lines.append("")
lines.append(sep)
lines.append(" End of report.")