2026-03-30 18:13:55 +00:00
|
|
|
"""AI-powered directory analysis using a multi-pass, cache-driven agent loop.
|
2026-03-30 16:03:48 +00:00
|
|
|
|
2026-03-30 18:13:55 +00:00
|
|
|
Architecture:
|
|
|
|
|
1. Discover all directories under the target
|
|
|
|
|
2. Sort leaves-first (deepest directories first)
|
|
|
|
|
3. Run an isolated agent loop per directory (max 10 turns each)
|
|
|
|
|
4. Cache every file and directory summary to disk
|
|
|
|
|
5. Run a final synthesis pass reading only directory cache entries
|
|
|
|
|
|
|
|
|
|
Uses the Anthropic SDK for streaming, automatic retries, and token counting.
|
|
|
|
|
Uses tree-sitter for AST parsing and python-magic for file classification.
|
|
|
|
|
"""
|
|
|
|
|
|
2026-03-30 16:03:48 +00:00
|
|
|
import json
|
|
|
|
|
import os
|
2026-03-30 18:13:55 +00:00
|
|
|
import subprocess
|
2026-03-30 16:03:48 +00:00
|
|
|
import sys
|
2026-03-30 18:13:55 +00:00
|
|
|
from datetime import datetime, timezone
|
|
|
|
|
|
|
|
|
|
import anthropic
|
|
|
|
|
import magic
|
|
|
|
|
import tree_sitter
|
|
|
|
|
import tree_sitter_python
|
|
|
|
|
import tree_sitter_javascript
|
|
|
|
|
import tree_sitter_rust
|
|
|
|
|
import tree_sitter_go
|
|
|
|
|
|
2026-03-30 19:12:37 +00:00
|
|
|
from luminos_lib.cache import CACHE_ROOT, _CacheManager, _get_investigation_id
|
2026-03-30 18:13:55 +00:00
|
|
|
from luminos_lib.capabilities import check_ai_dependencies
|
2026-03-30 16:03:48 +00:00
|
|
|
|
|
|
|
|
MODEL = "claude-sonnet-4-20250514"
|
2026-03-30 18:13:55 +00:00
|
|
|
|
|
|
|
|
# Context budget: trigger early exit at 70% of Sonnet's context window.
|
|
|
|
|
MAX_CONTEXT = 180_000
|
|
|
|
|
CONTEXT_BUDGET = int(MAX_CONTEXT * 0.70)
|
|
|
|
|
|
|
|
|
|
# Pricing per 1M tokens (Claude Sonnet).
|
|
|
|
|
INPUT_PRICE_PER_M = 3.00
|
|
|
|
|
OUTPUT_PRICE_PER_M = 15.00
|
|
|
|
|
|
|
|
|
|
# Directories to always skip during investigation.
|
|
|
|
|
_SKIP_DIRS = {
|
|
|
|
|
".git", "__pycache__", "node_modules", ".tox", ".mypy_cache",
|
|
|
|
|
".pytest_cache", ".venv", "venv", ".env", "dist", "build",
|
|
|
|
|
".eggs", "*.egg-info", ".svn", ".hg",
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
# Commands the run_command tool is allowed to execute.
|
|
|
|
|
_COMMAND_WHITELIST = {"wc", "file", "grep", "head", "tail", "stat", "du", "find"}
|
2026-03-30 16:03:48 +00:00
|
|
|
|
2026-03-30 18:13:55 +00:00
|
|
|
# tree-sitter language registry: extension → (grammar_module, language_name)
|
|
|
|
|
_TS_LANGUAGES = {
|
|
|
|
|
".py": (tree_sitter_python, "python"),
|
|
|
|
|
".js": (tree_sitter_javascript, "javascript"),
|
|
|
|
|
".jsx": (tree_sitter_javascript, "javascript"),
|
|
|
|
|
".mjs": (tree_sitter_javascript, "javascript"),
|
|
|
|
|
".rs": (tree_sitter_rust, "rust"),
|
|
|
|
|
".go": (tree_sitter_go, "go"),
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
# Precompute Language objects once.
|
|
|
|
|
_TS_LANG_CACHE = {}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def _get_ts_parser(ext):
|
|
|
|
|
"""Return a (Parser, language_name) tuple for a file extension, or None."""
|
|
|
|
|
entry = _TS_LANGUAGES.get(ext)
|
|
|
|
|
if entry is None:
|
|
|
|
|
return None
|
|
|
|
|
module, lang_name = entry
|
|
|
|
|
if lang_name not in _TS_LANG_CACHE:
|
|
|
|
|
_TS_LANG_CACHE[lang_name] = tree_sitter.Language(module.language())
|
|
|
|
|
lang = _TS_LANG_CACHE[lang_name]
|
|
|
|
|
parser = tree_sitter.Parser(lang)
|
|
|
|
|
return parser, lang_name
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
# ---------------------------------------------------------------------------
|
|
|
|
|
# Helpers
|
|
|
|
|
# ---------------------------------------------------------------------------
|
2026-03-30 16:03:48 +00:00
|
|
|
|
|
|
|
|
def _get_api_key():
|
|
|
|
|
"""Read the Anthropic API key from the environment."""
|
|
|
|
|
key = os.environ.get("ANTHROPIC_API_KEY", "")
|
|
|
|
|
if not key:
|
|
|
|
|
print("Warning: ANTHROPIC_API_KEY not set. Skipping AI analysis.",
|
|
|
|
|
file=sys.stderr)
|
|
|
|
|
return key
|
|
|
|
|
|
|
|
|
|
|
2026-03-30 18:13:55 +00:00
|
|
|
def _path_is_safe(path, target):
|
|
|
|
|
"""Return True if *path* resolves to somewhere inside *target*."""
|
|
|
|
|
real = os.path.realpath(path)
|
|
|
|
|
target_real = os.path.realpath(target)
|
|
|
|
|
return real == target_real or real.startswith(target_real + os.sep)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def _now_iso():
|
|
|
|
|
return datetime.now(timezone.utc).isoformat()
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def _should_skip_dir(name):
|
|
|
|
|
"""Return True if a directory name matches the skip list."""
|
|
|
|
|
if name in _SKIP_DIRS:
|
|
|
|
|
return True
|
|
|
|
|
for pattern in _SKIP_DIRS:
|
|
|
|
|
if pattern.startswith("*") and name.endswith(pattern[1:]):
|
|
|
|
|
return True
|
|
|
|
|
return False
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
# ---------------------------------------------------------------------------
|
|
|
|
|
# Token tracker
|
|
|
|
|
# ---------------------------------------------------------------------------
|
|
|
|
|
|
|
|
|
|
class _TokenTracker:
|
|
|
|
|
"""Track cumulative token usage across API calls."""
|
|
|
|
|
|
|
|
|
|
def __init__(self):
|
|
|
|
|
self.total_input = 0
|
|
|
|
|
self.total_output = 0
|
|
|
|
|
self.loop_input = 0
|
|
|
|
|
self.loop_output = 0
|
|
|
|
|
|
|
|
|
|
def record(self, usage):
|
|
|
|
|
"""Record usage from a single API call."""
|
|
|
|
|
inp = getattr(usage, "input_tokens", 0)
|
|
|
|
|
out = getattr(usage, "output_tokens", 0)
|
|
|
|
|
self.total_input += inp
|
|
|
|
|
self.total_output += out
|
|
|
|
|
self.loop_input += inp
|
|
|
|
|
self.loop_output += out
|
|
|
|
|
|
|
|
|
|
def reset_loop(self):
|
|
|
|
|
"""Reset per-loop counters (called between directory loops)."""
|
|
|
|
|
self.loop_input = 0
|
|
|
|
|
self.loop_output = 0
|
|
|
|
|
|
|
|
|
|
@property
|
|
|
|
|
def loop_total(self):
|
|
|
|
|
return self.loop_input + self.loop_output
|
|
|
|
|
|
|
|
|
|
def budget_exceeded(self):
|
|
|
|
|
return self.loop_total > CONTEXT_BUDGET
|
|
|
|
|
|
|
|
|
|
def summary(self):
|
|
|
|
|
cost_in = self.total_input * INPUT_PRICE_PER_M / 1_000_000
|
|
|
|
|
cost_out = self.total_output * OUTPUT_PRICE_PER_M / 1_000_000
|
|
|
|
|
cost = cost_in + cost_out
|
|
|
|
|
return (f"{self.total_input:,} input / {self.total_output:,} output "
|
|
|
|
|
f"(approx ${cost:.2f})")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
# ---------------------------------------------------------------------------
|
|
|
|
|
# Tool definitions
|
|
|
|
|
# ---------------------------------------------------------------------------
|
|
|
|
|
|
|
|
|
|
_DIR_TOOLS = [
|
|
|
|
|
{
|
|
|
|
|
"name": "read_file",
|
|
|
|
|
"description": (
|
|
|
|
|
"Read and return the contents of a file. Path must be inside "
|
|
|
|
|
"the target directory."
|
|
|
|
|
),
|
|
|
|
|
"input_schema": {
|
|
|
|
|
"type": "object",
|
|
|
|
|
"properties": {
|
|
|
|
|
"path": {
|
|
|
|
|
"type": "string",
|
|
|
|
|
"description": "Absolute or relative path to the file.",
|
|
|
|
|
},
|
|
|
|
|
"max_bytes": {
|
|
|
|
|
"type": "integer",
|
|
|
|
|
"description": "Maximum bytes to read (default 4096).",
|
|
|
|
|
},
|
|
|
|
|
},
|
|
|
|
|
"required": ["path"],
|
|
|
|
|
},
|
|
|
|
|
},
|
|
|
|
|
{
|
|
|
|
|
"name": "list_directory",
|
|
|
|
|
"description": (
|
|
|
|
|
"List the contents of a directory with file sizes and types."
|
|
|
|
|
),
|
|
|
|
|
"input_schema": {
|
|
|
|
|
"type": "object",
|
|
|
|
|
"properties": {
|
|
|
|
|
"path": {
|
|
|
|
|
"type": "string",
|
|
|
|
|
"description": "Absolute or relative path to the directory.",
|
|
|
|
|
},
|
|
|
|
|
"show_hidden": {
|
|
|
|
|
"type": "boolean",
|
|
|
|
|
"description": "Include hidden files (default false).",
|
|
|
|
|
},
|
|
|
|
|
},
|
|
|
|
|
"required": ["path"],
|
|
|
|
|
},
|
|
|
|
|
},
|
|
|
|
|
{
|
|
|
|
|
"name": "run_command",
|
|
|
|
|
"description": (
|
|
|
|
|
"Run a read-only shell command. Allowed binaries: "
|
|
|
|
|
"wc, file, grep, head, tail, stat, du, find."
|
|
|
|
|
),
|
|
|
|
|
"input_schema": {
|
|
|
|
|
"type": "object",
|
|
|
|
|
"properties": {
|
|
|
|
|
"command": {
|
|
|
|
|
"type": "string",
|
|
|
|
|
"description": "The shell command to execute.",
|
|
|
|
|
},
|
|
|
|
|
},
|
|
|
|
|
"required": ["command"],
|
|
|
|
|
},
|
|
|
|
|
},
|
|
|
|
|
{
|
|
|
|
|
"name": "parse_structure",
|
|
|
|
|
"description": (
|
|
|
|
|
"Parse a source file using tree-sitter and return its structural "
|
|
|
|
|
"skeleton: functions, classes, imports, and code metrics. "
|
|
|
|
|
"Supported: Python, JavaScript, TypeScript, Rust, Go."
|
|
|
|
|
),
|
|
|
|
|
"input_schema": {
|
|
|
|
|
"type": "object",
|
|
|
|
|
"properties": {
|
|
|
|
|
"path": {
|
|
|
|
|
"type": "string",
|
|
|
|
|
"description": "Path to the source file to parse.",
|
|
|
|
|
},
|
|
|
|
|
},
|
|
|
|
|
"required": ["path"],
|
|
|
|
|
},
|
|
|
|
|
},
|
|
|
|
|
{
|
|
|
|
|
"name": "write_cache",
|
|
|
|
|
"description": (
|
|
|
|
|
"Write a summary cache entry for a file or directory. The data "
|
|
|
|
|
"must NOT contain raw file contents — summaries only."
|
|
|
|
|
),
|
|
|
|
|
"input_schema": {
|
|
|
|
|
"type": "object",
|
|
|
|
|
"properties": {
|
|
|
|
|
"cache_type": {
|
|
|
|
|
"type": "string",
|
|
|
|
|
"enum": ["file", "dir"],
|
|
|
|
|
"description": "'file' or 'dir'.",
|
|
|
|
|
},
|
|
|
|
|
"path": {
|
|
|
|
|
"type": "string",
|
|
|
|
|
"description": "The path being cached.",
|
|
|
|
|
},
|
|
|
|
|
"data": {
|
|
|
|
|
"type": "object",
|
|
|
|
|
"description": (
|
|
|
|
|
"Cache entry. Files: {path, relative_path, size_bytes, "
|
|
|
|
|
"category, summary, notable, notable_reason, cached_at}. "
|
|
|
|
|
"Dirs: {path, relative_path, child_count, summary, "
|
|
|
|
|
"dominant_category, notable_files, cached_at}."
|
|
|
|
|
),
|
|
|
|
|
},
|
|
|
|
|
},
|
|
|
|
|
"required": ["cache_type", "path", "data"],
|
|
|
|
|
},
|
|
|
|
|
},
|
2026-03-30 19:02:19 +00:00
|
|
|
{
|
|
|
|
|
"name": "think",
|
|
|
|
|
"description": (
|
|
|
|
|
"Record your reasoning before choosing which file or directory "
|
|
|
|
|
"to investigate next. Call this when deciding what to look at "
|
|
|
|
|
"— not before every individual tool call."
|
|
|
|
|
),
|
|
|
|
|
"input_schema": {
|
|
|
|
|
"type": "object",
|
|
|
|
|
"properties": {
|
|
|
|
|
"observation": {
|
|
|
|
|
"type": "string",
|
|
|
|
|
"description": "What you have observed so far.",
|
|
|
|
|
},
|
|
|
|
|
"hypothesis": {
|
|
|
|
|
"type": "string",
|
|
|
|
|
"description": "Your hypothesis about the directory.",
|
|
|
|
|
},
|
|
|
|
|
"next_action": {
|
|
|
|
|
"type": "string",
|
|
|
|
|
"description": "What you plan to investigate next and why.",
|
|
|
|
|
},
|
|
|
|
|
},
|
|
|
|
|
"required": ["observation", "hypothesis", "next_action"],
|
|
|
|
|
},
|
|
|
|
|
},
|
|
|
|
|
{
|
|
|
|
|
"name": "checkpoint",
|
|
|
|
|
"description": (
|
|
|
|
|
"Summarize what you have learned so far about this directory "
|
|
|
|
|
"and what you still need to determine. Call this after completing "
|
|
|
|
|
"a significant cluster of files — not after every file."
|
|
|
|
|
),
|
|
|
|
|
"input_schema": {
|
|
|
|
|
"type": "object",
|
|
|
|
|
"properties": {
|
|
|
|
|
"learned": {
|
|
|
|
|
"type": "string",
|
|
|
|
|
"description": "What you have learned so far.",
|
|
|
|
|
},
|
|
|
|
|
"still_unknown": {
|
|
|
|
|
"type": "string",
|
|
|
|
|
"description": "What you still need to determine.",
|
|
|
|
|
},
|
|
|
|
|
"next_phase": {
|
|
|
|
|
"type": "string",
|
|
|
|
|
"description": "What you will investigate next.",
|
|
|
|
|
},
|
|
|
|
|
},
|
|
|
|
|
"required": ["learned", "still_unknown", "next_phase"],
|
|
|
|
|
},
|
|
|
|
|
},
|
|
|
|
|
{
|
|
|
|
|
"name": "flag",
|
|
|
|
|
"description": (
|
|
|
|
|
"Mark a file, directory, or finding as notable or anomalous. "
|
|
|
|
|
"Call this immediately when you discover something surprising, "
|
|
|
|
|
"concerning, or important — do not save it for the report."
|
|
|
|
|
),
|
|
|
|
|
"input_schema": {
|
|
|
|
|
"type": "object",
|
|
|
|
|
"properties": {
|
|
|
|
|
"path": {
|
|
|
|
|
"type": "string",
|
|
|
|
|
"description": "Relative path, or 'general'.",
|
|
|
|
|
},
|
|
|
|
|
"finding": {
|
|
|
|
|
"type": "string",
|
|
|
|
|
"description": "What you found.",
|
|
|
|
|
},
|
|
|
|
|
"severity": {
|
|
|
|
|
"type": "string",
|
|
|
|
|
"enum": ["info", "concern", "critical"],
|
|
|
|
|
"description": "info | concern | critical",
|
|
|
|
|
},
|
|
|
|
|
},
|
|
|
|
|
"required": ["path", "finding", "severity"],
|
|
|
|
|
},
|
|
|
|
|
},
|
2026-03-30 18:13:55 +00:00
|
|
|
{
|
|
|
|
|
"name": "submit_report",
|
|
|
|
|
"description": (
|
|
|
|
|
"Submit the directory summary. This ends the investigation loop."
|
|
|
|
|
),
|
|
|
|
|
"input_schema": {
|
|
|
|
|
"type": "object",
|
|
|
|
|
"properties": {
|
|
|
|
|
"summary": {
|
|
|
|
|
"type": "string",
|
|
|
|
|
"description": "1-3 sentence summary of the directory.",
|
|
|
|
|
},
|
|
|
|
|
},
|
|
|
|
|
"required": ["summary"],
|
|
|
|
|
},
|
|
|
|
|
},
|
|
|
|
|
]
|
|
|
|
|
|
|
|
|
|
_SYNTHESIS_TOOLS = [
|
|
|
|
|
{
|
|
|
|
|
"name": "read_cache",
|
|
|
|
|
"description": "Read a previously cached summary for a file or directory.",
|
|
|
|
|
"input_schema": {
|
|
|
|
|
"type": "object",
|
|
|
|
|
"properties": {
|
|
|
|
|
"cache_type": {
|
|
|
|
|
"type": "string",
|
|
|
|
|
"enum": ["file", "dir"],
|
|
|
|
|
},
|
|
|
|
|
"path": {
|
|
|
|
|
"type": "string",
|
|
|
|
|
"description": "The path to look up.",
|
|
|
|
|
},
|
|
|
|
|
},
|
|
|
|
|
"required": ["cache_type", "path"],
|
|
|
|
|
},
|
|
|
|
|
},
|
|
|
|
|
{
|
|
|
|
|
"name": "list_cache",
|
|
|
|
|
"description": "List all cached entry paths of a given type.",
|
|
|
|
|
"input_schema": {
|
|
|
|
|
"type": "object",
|
|
|
|
|
"properties": {
|
|
|
|
|
"cache_type": {
|
|
|
|
|
"type": "string",
|
|
|
|
|
"enum": ["file", "dir"],
|
|
|
|
|
},
|
|
|
|
|
},
|
|
|
|
|
"required": ["cache_type"],
|
|
|
|
|
},
|
|
|
|
|
},
|
2026-03-30 19:02:19 +00:00
|
|
|
{
|
|
|
|
|
"name": "flag",
|
|
|
|
|
"description": (
|
|
|
|
|
"Mark a file, directory, or finding as notable or anomalous. "
|
|
|
|
|
"Call this immediately when you discover something surprising, "
|
|
|
|
|
"concerning, or important — do not save it for the report."
|
|
|
|
|
),
|
|
|
|
|
"input_schema": {
|
|
|
|
|
"type": "object",
|
|
|
|
|
"properties": {
|
|
|
|
|
"path": {
|
|
|
|
|
"type": "string",
|
|
|
|
|
"description": "Relative path, or 'general'.",
|
|
|
|
|
},
|
|
|
|
|
"finding": {
|
|
|
|
|
"type": "string",
|
|
|
|
|
"description": "What you found.",
|
|
|
|
|
},
|
|
|
|
|
"severity": {
|
|
|
|
|
"type": "string",
|
|
|
|
|
"enum": ["info", "concern", "critical"],
|
|
|
|
|
"description": "info | concern | critical",
|
|
|
|
|
},
|
|
|
|
|
},
|
|
|
|
|
"required": ["path", "finding", "severity"],
|
|
|
|
|
},
|
|
|
|
|
},
|
2026-03-30 18:13:55 +00:00
|
|
|
{
|
|
|
|
|
"name": "submit_report",
|
|
|
|
|
"description": "Submit the final analysis report.",
|
|
|
|
|
"input_schema": {
|
|
|
|
|
"type": "object",
|
|
|
|
|
"properties": {
|
|
|
|
|
"brief": {
|
|
|
|
|
"type": "string",
|
|
|
|
|
"description": "2-4 sentence summary.",
|
|
|
|
|
},
|
|
|
|
|
"detailed": {
|
|
|
|
|
"type": "string",
|
|
|
|
|
"description": "Thorough breakdown.",
|
|
|
|
|
},
|
|
|
|
|
},
|
|
|
|
|
"required": ["brief", "detailed"],
|
|
|
|
|
},
|
|
|
|
|
},
|
|
|
|
|
]
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
# ---------------------------------------------------------------------------
|
|
|
|
|
# Tool implementations
|
|
|
|
|
# ---------------------------------------------------------------------------
|
|
|
|
|
|
|
|
|
|
def _tool_read_file(args, target, _cache):
|
|
|
|
|
path = args.get("path", "")
|
|
|
|
|
max_bytes = args.get("max_bytes", 4096)
|
|
|
|
|
if not os.path.isabs(path):
|
|
|
|
|
path = os.path.join(target, path)
|
|
|
|
|
if not _path_is_safe(path, target):
|
|
|
|
|
return f"Error: path '{path}' is outside the target directory."
|
|
|
|
|
try:
|
|
|
|
|
file_size = os.path.getsize(path)
|
2026-03-30 16:03:48 +00:00
|
|
|
with open(path, "r", errors="replace") as f:
|
2026-03-30 18:13:55 +00:00
|
|
|
content = f.read(max_bytes)
|
|
|
|
|
if not content:
|
|
|
|
|
return "(empty file)"
|
|
|
|
|
if file_size > max_bytes:
|
|
|
|
|
content += (
|
|
|
|
|
f"\n\n[TRUNCATED — showed {max_bytes} of {file_size} bytes. "
|
|
|
|
|
f"Call again with a larger max_bytes or use "
|
|
|
|
|
f"run_command('tail -n ... {os.path.relpath(path, target)}') "
|
|
|
|
|
f"to see the rest.]"
|
|
|
|
|
)
|
|
|
|
|
return content
|
|
|
|
|
except OSError as e:
|
|
|
|
|
return f"Error reading file: {e}"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def _tool_list_directory(args, target, _cache):
|
|
|
|
|
path = args.get("path", target)
|
|
|
|
|
show_hidden = args.get("show_hidden", False)
|
|
|
|
|
if not os.path.isabs(path):
|
|
|
|
|
path = os.path.join(target, path)
|
|
|
|
|
if not _path_is_safe(path, target):
|
|
|
|
|
return f"Error: path '{path}' is outside the target directory."
|
|
|
|
|
if not os.path.isdir(path):
|
|
|
|
|
return f"Error: '{path}' is not a directory."
|
|
|
|
|
try:
|
|
|
|
|
entries = sorted(os.listdir(path))
|
|
|
|
|
lines = []
|
|
|
|
|
for name in entries:
|
|
|
|
|
if not show_hidden and name.startswith("."):
|
|
|
|
|
continue
|
|
|
|
|
full = os.path.join(path, name)
|
|
|
|
|
try:
|
|
|
|
|
st = os.stat(full)
|
|
|
|
|
mime = magic.from_file(full, mime=True) if not os.path.isdir(full) else None
|
|
|
|
|
if os.path.isdir(full):
|
|
|
|
|
lines.append(f" {name}/ (dir)")
|
|
|
|
|
else:
|
|
|
|
|
mime_str = f" [{mime}]" if mime else ""
|
|
|
|
|
lines.append(f" {name} ({st.st_size} bytes){mime_str}")
|
|
|
|
|
except OSError:
|
|
|
|
|
lines.append(f" {name} (stat failed)")
|
|
|
|
|
return "\n".join(lines) if lines else "(empty directory)"
|
|
|
|
|
except OSError as e:
|
|
|
|
|
return f"Error listing directory: {e}"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def _tool_run_command(args, target, _cache):
|
|
|
|
|
command = args.get("command", "")
|
|
|
|
|
parts = command.split()
|
|
|
|
|
if not parts:
|
|
|
|
|
return "Error: empty command."
|
|
|
|
|
binary = os.path.basename(parts[0])
|
|
|
|
|
if binary not in _COMMAND_WHITELIST:
|
|
|
|
|
return (
|
|
|
|
|
f"Error: '{binary}' is not allowed. "
|
|
|
|
|
f"Whitelist: {', '.join(sorted(_COMMAND_WHITELIST))}"
|
|
|
|
|
)
|
|
|
|
|
try:
|
|
|
|
|
result = subprocess.run(
|
|
|
|
|
command, shell=True, capture_output=True, text=True,
|
|
|
|
|
timeout=15, cwd=target,
|
|
|
|
|
)
|
|
|
|
|
output = result.stdout
|
|
|
|
|
if result.returncode != 0 and result.stderr:
|
|
|
|
|
output += f"\n(stderr: {result.stderr.strip()})"
|
|
|
|
|
return output.strip() if output.strip() else "(no output)"
|
|
|
|
|
except subprocess.TimeoutExpired:
|
|
|
|
|
return "Error: command timed out after 15 seconds."
|
|
|
|
|
except OSError as e:
|
|
|
|
|
return f"Error running command: {e}"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def _tool_parse_structure(args, target, _cache):
|
|
|
|
|
path = args.get("path", "")
|
|
|
|
|
if not os.path.isabs(path):
|
|
|
|
|
path = os.path.join(target, path)
|
|
|
|
|
if not _path_is_safe(path, target):
|
|
|
|
|
return f"Error: path '{path}' is outside the target directory."
|
|
|
|
|
if not os.path.isfile(path):
|
|
|
|
|
return f"Error: '{path}' is not a file."
|
|
|
|
|
|
|
|
|
|
ext = os.path.splitext(path)[1].lower()
|
|
|
|
|
ts = _get_ts_parser(ext)
|
|
|
|
|
if ts is None:
|
|
|
|
|
return f"Error: no grammar for extension '{ext}'. Supported: {', '.join(sorted(_TS_LANGUAGES.keys()))}"
|
|
|
|
|
|
|
|
|
|
parser, lang_name = ts
|
|
|
|
|
|
|
|
|
|
try:
|
|
|
|
|
with open(path, "rb") as f:
|
|
|
|
|
source = f.read()
|
|
|
|
|
except OSError as e:
|
|
|
|
|
return f"Error reading file: {e}"
|
|
|
|
|
|
|
|
|
|
tree = parser.parse(source)
|
|
|
|
|
root = tree.root_node
|
|
|
|
|
source_text = source.decode("utf-8", errors="replace")
|
|
|
|
|
lines = source_text.split("\n")
|
|
|
|
|
line_count = len(lines)
|
|
|
|
|
|
|
|
|
|
functions = []
|
|
|
|
|
classes = []
|
|
|
|
|
imports = []
|
|
|
|
|
has_docstrings = False
|
|
|
|
|
comment_lines = 0
|
|
|
|
|
|
|
|
|
|
def _walk(node):
|
|
|
|
|
nonlocal has_docstrings, comment_lines
|
|
|
|
|
for child in node.children:
|
|
|
|
|
ntype = child.type
|
|
|
|
|
|
|
|
|
|
# Comments
|
|
|
|
|
if ntype in ("comment", "line_comment", "block_comment"):
|
|
|
|
|
comment_lines += child.text.decode("utf-8", errors="replace").count("\n") + 1
|
|
|
|
|
|
|
|
|
|
# Python
|
|
|
|
|
if lang_name == "python":
|
|
|
|
|
if ntype == "function_definition":
|
|
|
|
|
functions.append(_py_func_sig(child))
|
|
|
|
|
elif ntype == "class_definition":
|
|
|
|
|
classes.append(_py_class(child))
|
|
|
|
|
elif ntype in ("import_statement", "import_from_statement"):
|
|
|
|
|
imports.append(child.text.decode("utf-8", errors="replace").strip())
|
|
|
|
|
elif ntype == "expression_statement":
|
|
|
|
|
first = child.children[0] if child.children else None
|
|
|
|
|
if first and first.type == "string":
|
|
|
|
|
has_docstrings = True
|
|
|
|
|
|
|
|
|
|
# JavaScript
|
|
|
|
|
elif lang_name == "javascript":
|
|
|
|
|
if ntype in ("function_declaration", "arrow_function",
|
|
|
|
|
"function"):
|
|
|
|
|
functions.append(_js_func_sig(child))
|
|
|
|
|
elif ntype == "class_declaration":
|
|
|
|
|
classes.append(_js_class(child))
|
|
|
|
|
elif ntype in ("import_statement",):
|
|
|
|
|
imports.append(child.text.decode("utf-8", errors="replace").strip())
|
|
|
|
|
|
|
|
|
|
# Rust
|
|
|
|
|
elif lang_name == "rust":
|
|
|
|
|
if ntype == "function_item":
|
|
|
|
|
functions.append(_rust_func_sig(child))
|
|
|
|
|
elif ntype in ("struct_item", "enum_item", "impl_item"):
|
|
|
|
|
classes.append(_rust_struct(child))
|
|
|
|
|
elif ntype == "use_declaration":
|
|
|
|
|
imports.append(child.text.decode("utf-8", errors="replace").strip())
|
|
|
|
|
|
|
|
|
|
# Go
|
|
|
|
|
elif lang_name == "go":
|
|
|
|
|
if ntype == "function_declaration":
|
|
|
|
|
functions.append(_go_func_sig(child))
|
|
|
|
|
elif ntype == "type_declaration":
|
|
|
|
|
classes.append(_go_type(child))
|
|
|
|
|
elif ntype == "import_declaration":
|
|
|
|
|
imports.append(child.text.decode("utf-8", errors="replace").strip())
|
|
|
|
|
|
|
|
|
|
_walk(child)
|
|
|
|
|
|
|
|
|
|
_walk(root)
|
|
|
|
|
|
|
|
|
|
code_lines = max(1, line_count - comment_lines)
|
|
|
|
|
result = {
|
|
|
|
|
"language": lang_name,
|
|
|
|
|
"functions": functions[:50],
|
|
|
|
|
"classes": classes[:30],
|
|
|
|
|
"imports": imports[:30],
|
|
|
|
|
"line_count": line_count,
|
|
|
|
|
"has_docstrings": has_docstrings,
|
|
|
|
|
"has_comments": comment_lines > 0,
|
|
|
|
|
"comment_to_code_ratio": round(comment_lines / code_lines, 2),
|
|
|
|
|
}
|
|
|
|
|
return json.dumps(result, indent=2)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
# --- tree-sitter extraction helpers ---
|
|
|
|
|
|
|
|
|
|
def _child_by_type(node, *types):
|
|
|
|
|
for c in node.children:
|
|
|
|
|
if c.type in types:
|
|
|
|
|
return c
|
|
|
|
|
return None
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def _text(node):
|
|
|
|
|
return node.text.decode("utf-8", errors="replace") if node else ""
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def _py_func_sig(node):
|
|
|
|
|
name = _text(_child_by_type(node, "identifier"))
|
|
|
|
|
params = _text(_child_by_type(node, "parameters"))
|
|
|
|
|
ret = _child_by_type(node, "type")
|
|
|
|
|
sig = f"{name}{params}"
|
|
|
|
|
if ret:
|
|
|
|
|
sig += f" -> {_text(ret)}"
|
|
|
|
|
return sig
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def _py_class(node):
|
|
|
|
|
name = _text(_child_by_type(node, "identifier"))
|
|
|
|
|
methods = []
|
|
|
|
|
body = _child_by_type(node, "block")
|
|
|
|
|
if body:
|
|
|
|
|
for child in body.children:
|
|
|
|
|
if child.type == "function_definition":
|
|
|
|
|
methods.append(_py_func_sig(child))
|
|
|
|
|
return {"name": name, "methods": methods[:20]}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def _js_func_sig(node):
|
|
|
|
|
name = _text(_child_by_type(node, "identifier"))
|
|
|
|
|
params = _text(_child_by_type(node, "formal_parameters"))
|
|
|
|
|
return f"{name}{params}" if name else f"(anonymous){params}"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def _js_class(node):
|
|
|
|
|
name = _text(_child_by_type(node, "identifier"))
|
|
|
|
|
methods = []
|
|
|
|
|
body = _child_by_type(node, "class_body")
|
|
|
|
|
if body:
|
|
|
|
|
for child in body.children:
|
|
|
|
|
if child.type == "method_definition":
|
|
|
|
|
mname = _text(_child_by_type(child, "property_identifier"))
|
|
|
|
|
mparams = _text(_child_by_type(child, "formal_parameters"))
|
|
|
|
|
methods.append(f"{mname}{mparams}")
|
|
|
|
|
return {"name": name, "methods": methods[:20]}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def _rust_func_sig(node):
|
|
|
|
|
name = _text(_child_by_type(node, "identifier"))
|
|
|
|
|
params = _text(_child_by_type(node, "parameters"))
|
|
|
|
|
ret = _child_by_type(node, "type_identifier", "generic_type",
|
|
|
|
|
"reference_type", "scoped_type_identifier")
|
|
|
|
|
sig = f"{name}{params}"
|
|
|
|
|
if ret:
|
|
|
|
|
sig += f" -> {_text(ret)}"
|
|
|
|
|
return sig
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def _rust_struct(node):
|
|
|
|
|
name = _text(_child_by_type(node, "type_identifier"))
|
|
|
|
|
return {"name": name or _text(node)[:60], "methods": []}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def _go_func_sig(node):
|
|
|
|
|
name = _text(_child_by_type(node, "identifier"))
|
|
|
|
|
params = _text(_child_by_type(node, "parameter_list"))
|
|
|
|
|
return f"{name}{params}"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def _go_type(node):
|
|
|
|
|
spec = _child_by_type(node, "type_spec")
|
|
|
|
|
name = _text(_child_by_type(spec, "type_identifier")) if spec else ""
|
|
|
|
|
return {"name": name or _text(node)[:60], "methods": []}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def _tool_write_cache(args, _target, cache):
|
|
|
|
|
cache_type = args.get("cache_type", "")
|
|
|
|
|
path = args.get("path", "")
|
|
|
|
|
data = args.get("data", {})
|
|
|
|
|
if cache_type not in ("file", "dir"):
|
|
|
|
|
return "Error: cache_type must be 'file' or 'dir'."
|
|
|
|
|
return cache.write_entry(cache_type, path, data)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def _tool_read_cache(args, _target, cache):
|
|
|
|
|
cache_type = args.get("cache_type", "")
|
|
|
|
|
path = args.get("path", "")
|
|
|
|
|
if cache_type not in ("file", "dir"):
|
|
|
|
|
return "Error: cache_type must be 'file' or 'dir'."
|
|
|
|
|
entry = cache.read_entry(cache_type, path)
|
|
|
|
|
if entry is None:
|
|
|
|
|
return "null"
|
|
|
|
|
return json.dumps(entry, indent=2)
|
|
|
|
|
|
2026-03-30 16:03:48 +00:00
|
|
|
|
2026-03-30 18:13:55 +00:00
|
|
|
def _tool_list_cache(args, _target, cache):
|
|
|
|
|
cache_type = args.get("cache_type", "")
|
|
|
|
|
if cache_type not in ("file", "dir"):
|
|
|
|
|
return "Error: cache_type must be 'file' or 'dir'."
|
|
|
|
|
paths = cache.list_entries(cache_type)
|
|
|
|
|
if not paths:
|
|
|
|
|
return "(no cached entries)"
|
|
|
|
|
return "\n".join(paths)
|
2026-03-30 16:03:48 +00:00
|
|
|
|
2026-03-30 18:13:55 +00:00
|
|
|
|
2026-03-30 19:02:19 +00:00
|
|
|
def _tool_think(args, _target, _cache):
|
|
|
|
|
obs = args.get("observation", "")
|
|
|
|
|
hyp = args.get("hypothesis", "")
|
|
|
|
|
nxt = args.get("next_action", "")
|
|
|
|
|
print(f" [AI] THINK", file=sys.stderr)
|
|
|
|
|
print(f" observation: {obs}", file=sys.stderr)
|
|
|
|
|
print(f" hypothesis: {hyp}", file=sys.stderr)
|
|
|
|
|
print(f" next_action: {nxt}", file=sys.stderr)
|
|
|
|
|
return "ok"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def _tool_checkpoint(args, _target, _cache):
|
|
|
|
|
learned = args.get("learned", "")
|
|
|
|
|
unknown = args.get("still_unknown", "")
|
|
|
|
|
phase = args.get("next_phase", "")
|
|
|
|
|
print(f" [AI] CHECKPOINT", file=sys.stderr)
|
|
|
|
|
print(f" learned: {learned}", file=sys.stderr)
|
|
|
|
|
print(f" still_unknown: {unknown}", file=sys.stderr)
|
|
|
|
|
print(f" next_phase: {phase}", file=sys.stderr)
|
|
|
|
|
return "ok"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def _tool_flag(args, _target, cache):
|
|
|
|
|
path = args.get("path", "general")
|
|
|
|
|
finding = args.get("finding", "")
|
|
|
|
|
severity = args.get("severity", "info")
|
|
|
|
|
print(f" [AI] FLAG [{severity.upper()}] {path}", file=sys.stderr)
|
|
|
|
|
print(f" {finding}", file=sys.stderr)
|
|
|
|
|
flags_path = os.path.join(cache.root, "flags.jsonl")
|
|
|
|
|
entry = {"path": path, "finding": finding, "severity": severity}
|
|
|
|
|
try:
|
|
|
|
|
with open(flags_path, "a") as f:
|
|
|
|
|
f.write(json.dumps(entry) + "\n")
|
|
|
|
|
except OSError:
|
|
|
|
|
pass
|
|
|
|
|
return "ok"
|
|
|
|
|
|
|
|
|
|
|
2026-03-30 18:13:55 +00:00
|
|
|
_TOOL_DISPATCH = {
|
|
|
|
|
"read_file": _tool_read_file,
|
|
|
|
|
"list_directory": _tool_list_directory,
|
|
|
|
|
"run_command": _tool_run_command,
|
|
|
|
|
"parse_structure": _tool_parse_structure,
|
|
|
|
|
"write_cache": _tool_write_cache,
|
|
|
|
|
"read_cache": _tool_read_cache,
|
|
|
|
|
"list_cache": _tool_list_cache,
|
2026-03-30 19:02:19 +00:00
|
|
|
"think": _tool_think,
|
|
|
|
|
"checkpoint": _tool_checkpoint,
|
|
|
|
|
"flag": _tool_flag,
|
2026-03-30 18:13:55 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def _execute_tool(name, args, target, cache, dir_rel, turn, verbose=False):
|
|
|
|
|
"""Execute a tool by name and return the result string."""
|
|
|
|
|
handler = _TOOL_DISPATCH.get(name)
|
|
|
|
|
if handler is None:
|
|
|
|
|
return f"Error: unknown tool '{name}'."
|
|
|
|
|
result = handler(args, target, cache)
|
|
|
|
|
|
|
|
|
|
cache.log_turn(dir_rel, turn, name,
|
|
|
|
|
{k: v for k, v in args.items() if k != "data"},
|
|
|
|
|
len(result))
|
|
|
|
|
|
|
|
|
|
if verbose:
|
|
|
|
|
preview = result[:200] + "..." if len(result) > 200 else result
|
|
|
|
|
print(f" [AI] <- {len(result)} chars: {preview}", file=sys.stderr)
|
|
|
|
|
|
|
|
|
|
return result
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
# ---------------------------------------------------------------------------
|
|
|
|
|
# Streaming API caller
|
|
|
|
|
# ---------------------------------------------------------------------------
|
|
|
|
|
|
|
|
|
|
def _call_api_streaming(client, system, messages, tools, tracker):
|
|
|
|
|
"""Call Claude via streaming. Print tool decisions in real-time.
|
|
|
|
|
|
|
|
|
|
Returns (content_blocks, usage) where content_blocks is the list of
|
|
|
|
|
content blocks from the response.
|
|
|
|
|
"""
|
|
|
|
|
with client.messages.stream(
|
|
|
|
|
model=MODEL,
|
|
|
|
|
max_tokens=4096,
|
|
|
|
|
system=system,
|
|
|
|
|
messages=messages,
|
|
|
|
|
tools=tools,
|
|
|
|
|
) as stream:
|
|
|
|
|
# Print tool call names as they arrive
|
|
|
|
|
current_tool = None
|
|
|
|
|
for event in stream:
|
|
|
|
|
if event.type == "content_block_start":
|
|
|
|
|
block = event.content_block
|
|
|
|
|
if block.type == "tool_use":
|
|
|
|
|
current_tool = block.name
|
|
|
|
|
# We'll print the full args after the block is complete
|
|
|
|
|
elif event.type == "content_block_stop":
|
|
|
|
|
current_tool = None
|
|
|
|
|
|
|
|
|
|
response = stream.get_final_message()
|
|
|
|
|
|
|
|
|
|
tracker.record(response.usage)
|
|
|
|
|
return response.content, response.usage
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
# ---------------------------------------------------------------------------
|
|
|
|
|
# Directory discovery
|
|
|
|
|
# ---------------------------------------------------------------------------
|
|
|
|
|
|
|
|
|
|
def _discover_directories(target, show_hidden=False):
|
|
|
|
|
"""Walk the target and return all directories sorted leaves-first."""
|
|
|
|
|
dirs = []
|
|
|
|
|
target_real = os.path.realpath(target)
|
|
|
|
|
for root, subdirs, _files in os.walk(target_real, topdown=True):
|
|
|
|
|
subdirs[:] = [
|
|
|
|
|
d for d in subdirs
|
|
|
|
|
if not _should_skip_dir(d)
|
|
|
|
|
and (show_hidden or not d.startswith("."))
|
|
|
|
|
]
|
|
|
|
|
dirs.append(root)
|
|
|
|
|
dirs.sort(key=lambda d: (-d.count(os.sep), d))
|
|
|
|
|
return dirs
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
# ---------------------------------------------------------------------------
|
|
|
|
|
# Per-directory agent loop
|
|
|
|
|
# ---------------------------------------------------------------------------
|
|
|
|
|
|
|
|
|
|
_DIR_SYSTEM_PROMPT = """\
|
|
|
|
|
You are an expert analyst investigating a SINGLE directory on a file system.
|
|
|
|
|
Do NOT assume the type of content before investigating. Discover what this
|
|
|
|
|
directory contains from what you find.
|
|
|
|
|
|
|
|
|
|
## Your Task
|
|
|
|
|
Investigate the directory: {dir_path}
|
|
|
|
|
(relative to target: {dir_rel})
|
|
|
|
|
|
|
|
|
|
You must:
|
|
|
|
|
1. Read the important files in THIS directory (not subdirectories)
|
|
|
|
|
2. For each file you read, call write_cache to save a summary
|
|
|
|
|
3. Call write_cache for the directory itself with a synthesis
|
|
|
|
|
4. Call submit_report with a 1-3 sentence summary
|
|
|
|
|
|
|
|
|
|
## Tools
|
|
|
|
|
parse_structure gives you the skeleton of a file. It does NOT replace \
|
|
|
|
|
reading the file. Use parse_structure first to understand structure, then \
|
|
|
|
|
use read_file if you need to verify intent, check for anomalies, or \
|
|
|
|
|
understand content that structure cannot capture (comments, documentation, \
|
|
|
|
|
data files, config values). A file where structure and content appear to \
|
|
|
|
|
contradict each other is always worth reading in full.
|
|
|
|
|
|
2026-03-30 19:02:19 +00:00
|
|
|
Use the think tool when choosing which file or directory to investigate \
|
|
|
|
|
next — before starting a new file or switching investigation direction. \
|
|
|
|
|
Do NOT call think before every individual tool call in a sequence.
|
|
|
|
|
|
|
|
|
|
Use the checkpoint tool after completing investigation of a meaningful \
|
|
|
|
|
cluster of files. Not after every file — once or twice per directory \
|
|
|
|
|
loop at most.
|
|
|
|
|
|
|
|
|
|
Use the flag tool immediately when you find something notable, \
|
|
|
|
|
surprising, or concerning. Severity guide:
|
|
|
|
|
info = interesting but not problematic
|
|
|
|
|
concern = worth addressing
|
|
|
|
|
critical = likely broken or dangerous
|
|
|
|
|
|
|
|
|
|
## Step Numbering
|
|
|
|
|
Number your investigation steps as you go. Before starting each new \
|
|
|
|
|
file cluster or phase transition, output:
|
|
|
|
|
Step N: <what you are doing and why>
|
|
|
|
|
Output this as plain text before tool calls, not as a tool call itself.
|
|
|
|
|
|
2026-03-30 18:13:55 +00:00
|
|
|
## Efficiency Rules
|
|
|
|
|
- Batch multiple tool calls in a single turn whenever possible
|
|
|
|
|
- Skip binary/compiled/generated files (.pyc, .class, .o, .min.js, etc.)
|
|
|
|
|
- Skip files >100KB unless uniquely important
|
|
|
|
|
- Prioritize: README, index, main, config, schema, manifest files
|
|
|
|
|
- For source files: try parse_structure first, then read_file if needed
|
|
|
|
|
- If read_file returns truncated content, use a larger max_bytes or
|
|
|
|
|
run_command('tail ...') — NEVER retry the identical call
|
|
|
|
|
- You have only {max_turns} turns — be efficient
|
|
|
|
|
|
|
|
|
|
## Cache Schemas
|
|
|
|
|
File: {{path, relative_path, size_bytes, category, summary, notable,
|
|
|
|
|
notable_reason, cached_at}}
|
|
|
|
|
Dir: {{path, relative_path, child_count, summary, dominant_category,
|
|
|
|
|
notable_files, cached_at}}
|
|
|
|
|
|
|
|
|
|
category values: source, config, data, document, media, archive, unknown
|
|
|
|
|
|
|
|
|
|
## Context
|
|
|
|
|
{context}
|
|
|
|
|
|
|
|
|
|
## Child Directory Summaries (already investigated)
|
|
|
|
|
{child_summaries}"""
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def _build_dir_context(dir_path):
|
|
|
|
|
lines = []
|
|
|
|
|
try:
|
|
|
|
|
entries = sorted(os.listdir(dir_path))
|
|
|
|
|
for name in entries:
|
|
|
|
|
if name.startswith("."):
|
|
|
|
|
continue
|
|
|
|
|
full = os.path.join(dir_path, name)
|
|
|
|
|
try:
|
|
|
|
|
st = os.stat(full)
|
|
|
|
|
if os.path.isdir(full):
|
|
|
|
|
lines.append(f" {name}/ (dir)")
|
|
|
|
|
else:
|
|
|
|
|
mime = magic.from_file(full, mime=True)
|
|
|
|
|
lines.append(f" {name} ({st.st_size} bytes) [{mime}]")
|
|
|
|
|
except OSError:
|
|
|
|
|
lines.append(f" {name} (stat failed)")
|
|
|
|
|
except OSError:
|
|
|
|
|
lines.append(" (could not list directory)")
|
|
|
|
|
return "Directory contents:\n" + "\n".join(lines) if lines else "(empty)"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def _get_child_summaries(dir_path, cache):
|
2026-03-30 16:03:48 +00:00
|
|
|
parts = []
|
2026-03-30 18:13:55 +00:00
|
|
|
try:
|
|
|
|
|
for name in sorted(os.listdir(dir_path)):
|
|
|
|
|
child = os.path.join(dir_path, name)
|
|
|
|
|
if not os.path.isdir(child):
|
|
|
|
|
continue
|
|
|
|
|
entry = cache.read_entry("dir", child)
|
|
|
|
|
if entry:
|
|
|
|
|
rel = entry.get("relative_path", name)
|
|
|
|
|
summary = entry.get("summary", "(no summary)")
|
|
|
|
|
parts.append(f"- {rel}/: {summary}")
|
|
|
|
|
except OSError:
|
|
|
|
|
pass
|
|
|
|
|
return "\n".join(parts) if parts else "(none — this is a leaf directory)"
|
2026-03-30 16:03:48 +00:00
|
|
|
|
2026-03-30 18:13:55 +00:00
|
|
|
|
2026-03-30 19:02:19 +00:00
|
|
|
def _run_dir_loop(client, target, cache, tracker, dir_path, max_turns=14,
|
2026-03-30 18:13:55 +00:00
|
|
|
verbose=False):
|
|
|
|
|
"""Run an isolated agent loop for a single directory."""
|
|
|
|
|
dir_rel = os.path.relpath(dir_path, target)
|
|
|
|
|
if dir_rel == ".":
|
|
|
|
|
dir_rel = os.path.basename(target)
|
|
|
|
|
|
|
|
|
|
context = _build_dir_context(dir_path)
|
|
|
|
|
child_summaries = _get_child_summaries(dir_path, cache)
|
|
|
|
|
|
|
|
|
|
system = _DIR_SYSTEM_PROMPT.format(
|
|
|
|
|
dir_path=dir_path,
|
|
|
|
|
dir_rel=dir_rel,
|
|
|
|
|
max_turns=max_turns,
|
|
|
|
|
context=context,
|
|
|
|
|
child_summaries=child_summaries,
|
2026-03-30 16:03:48 +00:00
|
|
|
)
|
|
|
|
|
|
2026-03-30 18:13:55 +00:00
|
|
|
messages = [
|
|
|
|
|
{
|
|
|
|
|
"role": "user",
|
|
|
|
|
"content": (
|
|
|
|
|
"Investigate this directory now. Use parse_structure for "
|
|
|
|
|
"source files, read_file for others, cache summaries, and "
|
|
|
|
|
"call submit_report. Batch tool calls for efficiency."
|
|
|
|
|
),
|
2026-03-30 16:03:48 +00:00
|
|
|
},
|
2026-03-30 18:13:55 +00:00
|
|
|
]
|
|
|
|
|
|
|
|
|
|
tracker.reset_loop()
|
|
|
|
|
summary = None
|
|
|
|
|
|
|
|
|
|
for turn in range(max_turns):
|
|
|
|
|
# Check context budget
|
|
|
|
|
if tracker.budget_exceeded():
|
|
|
|
|
print(f" [AI] Context budget reached — exiting early "
|
|
|
|
|
f"({tracker.loop_total:,} tokens used)", file=sys.stderr)
|
2026-03-30 18:17:28 +00:00
|
|
|
# Flush a partial directory summary from cached file entries
|
|
|
|
|
if not cache.has_entry("dir", dir_path):
|
|
|
|
|
dir_real = os.path.realpath(dir_path)
|
|
|
|
|
file_entries = [
|
|
|
|
|
e for e in cache.read_all_entries("file")
|
|
|
|
|
if os.path.realpath(e.get("path", "")).startswith(
|
|
|
|
|
dir_real + os.sep)
|
|
|
|
|
or os.path.dirname(
|
|
|
|
|
os.path.join(target, e.get("relative_path", ""))
|
|
|
|
|
) == dir_real
|
|
|
|
|
]
|
|
|
|
|
if file_entries:
|
|
|
|
|
file_summaries = [
|
|
|
|
|
e["summary"] for e in file_entries if e.get("summary")
|
|
|
|
|
]
|
|
|
|
|
notable = [
|
|
|
|
|
e.get("relative_path", e.get("path", ""))
|
|
|
|
|
for e in file_entries if e.get("notable")
|
|
|
|
|
]
|
|
|
|
|
partial_summary = " ".join(file_summaries)
|
|
|
|
|
cache.write_entry("dir", dir_path, {
|
|
|
|
|
"path": dir_path,
|
|
|
|
|
"relative_path": os.path.relpath(dir_path, target),
|
|
|
|
|
"child_count": len([
|
|
|
|
|
n for n in os.listdir(dir_path)
|
|
|
|
|
if not n.startswith(".")
|
|
|
|
|
]) if os.path.isdir(dir_path) else 0,
|
|
|
|
|
"summary": partial_summary,
|
|
|
|
|
"dominant_category": "unknown",
|
|
|
|
|
"notable_files": notable,
|
|
|
|
|
"partial": True,
|
|
|
|
|
"partial_reason": "context budget reached",
|
|
|
|
|
"cached_at": _now_iso(),
|
|
|
|
|
})
|
|
|
|
|
if not summary:
|
|
|
|
|
summary = partial_summary
|
|
|
|
|
else:
|
|
|
|
|
cache.write_entry("dir", dir_path, {
|
|
|
|
|
"path": dir_path,
|
|
|
|
|
"relative_path": os.path.relpath(dir_path, target),
|
|
|
|
|
"child_count": 0,
|
|
|
|
|
"summary": ("Investigation incomplete — context budget "
|
|
|
|
|
"reached before any files were processed."),
|
|
|
|
|
"dominant_category": "unknown",
|
|
|
|
|
"notable_files": [],
|
|
|
|
|
"partial": True,
|
|
|
|
|
"partial_reason": (
|
|
|
|
|
"context budget reached before files processed"),
|
|
|
|
|
"cached_at": _now_iso(),
|
|
|
|
|
})
|
2026-03-30 18:13:55 +00:00
|
|
|
break
|
|
|
|
|
|
|
|
|
|
try:
|
|
|
|
|
content_blocks, usage = _call_api_streaming(
|
|
|
|
|
client, system, messages, _DIR_TOOLS, tracker,
|
|
|
|
|
)
|
|
|
|
|
except anthropic.APIError as e:
|
|
|
|
|
print(f" [AI] API error: {e}", file=sys.stderr)
|
|
|
|
|
break
|
|
|
|
|
|
2026-03-30 19:02:19 +00:00
|
|
|
# Print text blocks (step numbering, reasoning) to stderr
|
|
|
|
|
for b in content_blocks:
|
|
|
|
|
if b.type == "text" and b.text.strip():
|
|
|
|
|
for line in b.text.strip().split("\n"):
|
|
|
|
|
print(f" [AI] {line}", file=sys.stderr)
|
|
|
|
|
|
2026-03-30 18:13:55 +00:00
|
|
|
# Print tool decisions now that we have the full response
|
|
|
|
|
tool_uses = [b for b in content_blocks if b.type == "tool_use"]
|
|
|
|
|
for tu in tool_uses:
|
|
|
|
|
arg_summary = ", ".join(
|
|
|
|
|
f"{k}={v!r}" for k, v in tu.input.items() if k != "data"
|
|
|
|
|
) if tu.input else ""
|
|
|
|
|
print(f" [AI] -> {tu.name}({arg_summary})", file=sys.stderr)
|
|
|
|
|
|
|
|
|
|
messages.append({
|
|
|
|
|
"role": "assistant",
|
|
|
|
|
"content": [_block_to_dict(b) for b in content_blocks],
|
|
|
|
|
})
|
|
|
|
|
|
|
|
|
|
if not tool_uses:
|
|
|
|
|
messages.append({
|
|
|
|
|
"role": "user",
|
|
|
|
|
"content": "Please call submit_report with your summary.",
|
|
|
|
|
})
|
|
|
|
|
continue
|
|
|
|
|
|
|
|
|
|
tool_results = []
|
|
|
|
|
done = False
|
|
|
|
|
for tu in tool_uses:
|
|
|
|
|
if tu.name == "submit_report":
|
|
|
|
|
summary = tu.input.get("summary", "")
|
|
|
|
|
tool_results.append({
|
|
|
|
|
"type": "tool_result",
|
|
|
|
|
"tool_use_id": tu.id,
|
|
|
|
|
"content": "Summary submitted.",
|
|
|
|
|
})
|
|
|
|
|
done = True
|
|
|
|
|
else:
|
|
|
|
|
result_text = _execute_tool(
|
|
|
|
|
tu.name, tu.input, target, cache, dir_rel,
|
|
|
|
|
turn + 1, verbose=verbose,
|
|
|
|
|
)
|
|
|
|
|
tool_results.append({
|
|
|
|
|
"type": "tool_result",
|
|
|
|
|
"tool_use_id": tu.id,
|
|
|
|
|
"content": result_text,
|
|
|
|
|
})
|
|
|
|
|
|
|
|
|
|
messages.append({"role": "user", "content": tool_results})
|
|
|
|
|
|
|
|
|
|
if done:
|
|
|
|
|
break
|
|
|
|
|
else:
|
|
|
|
|
print(f" [AI] Warning: max turns reached for {dir_rel}",
|
|
|
|
|
file=sys.stderr)
|
|
|
|
|
|
|
|
|
|
return summary
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def _block_to_dict(block):
|
|
|
|
|
"""Convert an SDK content block to a plain dict for message history."""
|
|
|
|
|
if block.type == "text":
|
|
|
|
|
return {"type": "text", "text": block.text}
|
|
|
|
|
elif block.type == "tool_use":
|
|
|
|
|
return {"type": "tool_use", "id": block.id,
|
|
|
|
|
"name": block.name, "input": block.input}
|
|
|
|
|
return {"type": block.type}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
# ---------------------------------------------------------------------------
|
|
|
|
|
# Synthesis pass
|
|
|
|
|
# ---------------------------------------------------------------------------
|
|
|
|
|
|
|
|
|
|
_SYNTHESIS_SYSTEM_PROMPT = """\
|
|
|
|
|
You are an expert analyst synthesizing a final report about a directory tree.
|
|
|
|
|
ALL directory summaries are provided below — you do NOT need to call
|
|
|
|
|
list_cache or read_cache. Just read the summaries and call submit_report
|
|
|
|
|
immediately in your first turn.
|
|
|
|
|
|
|
|
|
|
Do NOT assume the type of content. Let the summaries speak for themselves.
|
|
|
|
|
|
|
|
|
|
## Your Goal
|
|
|
|
|
Produce two outputs via the submit_report tool:
|
|
|
|
|
1. **brief**: A 2-4 sentence summary of what this directory tree is.
|
|
|
|
|
2. **detailed**: A thorough breakdown covering purpose, structure, key
|
|
|
|
|
components, technologies, notable patterns, and any concerns.
|
|
|
|
|
|
|
|
|
|
## Rules
|
|
|
|
|
- ALL summaries are below — call submit_report directly
|
|
|
|
|
- Be specific — reference actual directory and file names
|
|
|
|
|
- Do NOT call list_cache or read_cache
|
|
|
|
|
|
|
|
|
|
## Target
|
|
|
|
|
{target}
|
|
|
|
|
|
|
|
|
|
## Directory Summaries
|
|
|
|
|
{summaries_text}"""
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def _run_synthesis(client, target, cache, tracker, max_turns=5, verbose=False):
|
|
|
|
|
"""Run the final synthesis pass. Returns (brief, detailed)."""
|
|
|
|
|
dir_entries = cache.read_all_entries("dir")
|
|
|
|
|
|
|
|
|
|
summary_lines = []
|
|
|
|
|
for entry in dir_entries:
|
|
|
|
|
rel = entry.get("relative_path", "?")
|
|
|
|
|
summary = entry.get("summary", "(no summary)")
|
|
|
|
|
dominant = entry.get("dominant_category", "?")
|
|
|
|
|
notable = entry.get("notable_files", [])
|
|
|
|
|
summary_lines.append(f"### {rel}/")
|
|
|
|
|
summary_lines.append(f"Category: {dominant}")
|
|
|
|
|
summary_lines.append(f"Summary: {summary}")
|
|
|
|
|
if notable:
|
|
|
|
|
summary_lines.append(f"Notable files: {', '.join(notable)}")
|
|
|
|
|
summary_lines.append("")
|
|
|
|
|
|
|
|
|
|
summaries_text = "\n".join(summary_lines) if summary_lines else "(none)"
|
|
|
|
|
|
|
|
|
|
system = _SYNTHESIS_SYSTEM_PROMPT.format(
|
|
|
|
|
target=target,
|
|
|
|
|
summaries_text=summaries_text,
|
2026-03-30 16:03:48 +00:00
|
|
|
)
|
|
|
|
|
|
2026-03-30 18:13:55 +00:00
|
|
|
messages = [
|
|
|
|
|
{
|
|
|
|
|
"role": "user",
|
|
|
|
|
"content": (
|
|
|
|
|
"All directory summaries are in the system prompt above. "
|
|
|
|
|
"Synthesize them into a cohesive report and call "
|
|
|
|
|
"submit_report immediately — no other tool calls needed."
|
|
|
|
|
),
|
|
|
|
|
},
|
|
|
|
|
]
|
|
|
|
|
|
|
|
|
|
brief, detailed = "", ""
|
|
|
|
|
|
|
|
|
|
for turn in range(max_turns):
|
|
|
|
|
try:
|
|
|
|
|
content_blocks, usage = _call_api_streaming(
|
|
|
|
|
client, system, messages, _SYNTHESIS_TOOLS, tracker,
|
|
|
|
|
)
|
|
|
|
|
except anthropic.APIError as e:
|
|
|
|
|
print(f" [AI] API error: {e}", file=sys.stderr)
|
|
|
|
|
break
|
|
|
|
|
|
2026-03-30 19:02:19 +00:00
|
|
|
# Print text blocks to stderr
|
|
|
|
|
for b in content_blocks:
|
|
|
|
|
if b.type == "text" and b.text.strip():
|
|
|
|
|
for line in b.text.strip().split("\n"):
|
|
|
|
|
print(f" [AI] {line}", file=sys.stderr)
|
|
|
|
|
|
2026-03-30 18:13:55 +00:00
|
|
|
tool_uses = [b for b in content_blocks if b.type == "tool_use"]
|
|
|
|
|
for tu in tool_uses:
|
|
|
|
|
arg_summary = ", ".join(
|
|
|
|
|
f"{k}={v!r}" for k, v in tu.input.items() if k != "data"
|
|
|
|
|
) if tu.input else ""
|
|
|
|
|
print(f" [AI] -> {tu.name}({arg_summary})", file=sys.stderr)
|
|
|
|
|
|
|
|
|
|
messages.append({
|
|
|
|
|
"role": "assistant",
|
|
|
|
|
"content": [_block_to_dict(b) for b in content_blocks],
|
|
|
|
|
})
|
|
|
|
|
|
|
|
|
|
if not tool_uses:
|
|
|
|
|
messages.append({
|
|
|
|
|
"role": "user",
|
|
|
|
|
"content": "Please call submit_report with your analysis.",
|
|
|
|
|
})
|
|
|
|
|
continue
|
|
|
|
|
|
|
|
|
|
tool_results = []
|
|
|
|
|
done = False
|
|
|
|
|
for tu in tool_uses:
|
|
|
|
|
if tu.name == "submit_report":
|
|
|
|
|
brief = tu.input.get("brief", "")
|
|
|
|
|
detailed = tu.input.get("detailed", "")
|
|
|
|
|
tool_results.append({
|
|
|
|
|
"type": "tool_result",
|
|
|
|
|
"tool_use_id": tu.id,
|
|
|
|
|
"content": "Report submitted. Thank you.",
|
|
|
|
|
})
|
|
|
|
|
done = True
|
|
|
|
|
else:
|
|
|
|
|
result_text = _execute_tool(
|
|
|
|
|
tu.name, tu.input, target, cache, "(synthesis)",
|
|
|
|
|
turn + 1, verbose=verbose,
|
|
|
|
|
)
|
|
|
|
|
tool_results.append({
|
|
|
|
|
"type": "tool_result",
|
|
|
|
|
"tool_use_id": tu.id,
|
|
|
|
|
"content": result_text,
|
|
|
|
|
})
|
|
|
|
|
|
|
|
|
|
messages.append({"role": "user", "content": tool_results})
|
|
|
|
|
|
|
|
|
|
if done:
|
|
|
|
|
break
|
2026-03-30 16:03:48 +00:00
|
|
|
else:
|
2026-03-30 18:13:55 +00:00
|
|
|
print(" [AI] Warning: synthesis ran out of turns.", file=sys.stderr)
|
|
|
|
|
brief, detailed = _synthesize_from_cache(cache)
|
|
|
|
|
|
|
|
|
|
return brief, detailed
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def _synthesize_from_cache(cache):
|
|
|
|
|
"""Build a best-effort report from cached directory summaries."""
|
|
|
|
|
dir_entries = cache.read_all_entries("dir")
|
|
|
|
|
if not dir_entries:
|
|
|
|
|
return ("(AI analysis incomplete — no data was cached)", "")
|
|
|
|
|
|
|
|
|
|
brief_parts = []
|
|
|
|
|
detail_parts = []
|
|
|
|
|
for entry in dir_entries:
|
|
|
|
|
rel = entry.get("relative_path", "?")
|
|
|
|
|
summary = entry.get("summary", "")
|
|
|
|
|
if summary:
|
|
|
|
|
detail_parts.append(f"**{rel}/**: {summary}")
|
|
|
|
|
brief_parts.append(summary)
|
|
|
|
|
|
|
|
|
|
brief = brief_parts[0] if brief_parts else "(AI analysis incomplete)"
|
|
|
|
|
detailed = "\n\n".join(detail_parts) if detail_parts else ""
|
|
|
|
|
return brief, detailed
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
# ---------------------------------------------------------------------------
|
|
|
|
|
# Main orchestrator
|
|
|
|
|
# ---------------------------------------------------------------------------
|
|
|
|
|
|
|
|
|
|
def _run_investigation(client, target, report, show_hidden=False,
|
|
|
|
|
fresh=False, verbose=False):
|
2026-03-30 19:02:19 +00:00
|
|
|
"""Orchestrate the multi-pass investigation. Returns (brief, detailed, flags)."""
|
2026-03-30 18:13:55 +00:00
|
|
|
investigation_id, is_new = _get_investigation_id(target, fresh=fresh)
|
|
|
|
|
cache = _CacheManager(investigation_id, target)
|
|
|
|
|
tracker = _TokenTracker()
|
|
|
|
|
|
|
|
|
|
if is_new:
|
|
|
|
|
cache.write_meta(MODEL, _now_iso())
|
|
|
|
|
|
|
|
|
|
print(f" [AI] Investigation ID: {investigation_id}"
|
|
|
|
|
f"{'' if is_new else ' (resumed)'}", file=sys.stderr)
|
|
|
|
|
print(f" [AI] Cache: {cache.root}/", file=sys.stderr)
|
|
|
|
|
|
|
|
|
|
all_dirs = _discover_directories(target, show_hidden=show_hidden)
|
|
|
|
|
|
|
|
|
|
to_investigate = []
|
|
|
|
|
cached_count = 0
|
|
|
|
|
for d in all_dirs:
|
|
|
|
|
if cache.has_entry("dir", d):
|
|
|
|
|
cached_count += 1
|
|
|
|
|
rel = os.path.relpath(d, target)
|
|
|
|
|
print(f" [AI] Skipping (cached): {rel}/", file=sys.stderr)
|
|
|
|
|
else:
|
|
|
|
|
to_investigate.append(d)
|
|
|
|
|
|
|
|
|
|
total = len(to_investigate)
|
|
|
|
|
if cached_count:
|
|
|
|
|
print(f" [AI] Directories cached: {cached_count}", file=sys.stderr)
|
|
|
|
|
print(f" [AI] Directories to investigate: {total}", file=sys.stderr)
|
|
|
|
|
|
|
|
|
|
for i, dir_path in enumerate(to_investigate, 1):
|
|
|
|
|
dir_rel = os.path.relpath(dir_path, target)
|
|
|
|
|
if dir_rel == ".":
|
|
|
|
|
dir_rel = os.path.basename(target)
|
|
|
|
|
print(f" [AI] Investigating: {dir_rel}/ ({i}/{total})",
|
|
|
|
|
file=sys.stderr)
|
|
|
|
|
|
|
|
|
|
summary = _run_dir_loop(
|
|
|
|
|
client, target, cache, tracker, dir_path, verbose=verbose,
|
|
|
|
|
)
|
|
|
|
|
|
|
|
|
|
if summary and not cache.has_entry("dir", dir_path):
|
|
|
|
|
cache.write_entry("dir", dir_path, {
|
|
|
|
|
"path": dir_path,
|
|
|
|
|
"relative_path": os.path.relpath(dir_path, target),
|
|
|
|
|
"child_count": len([
|
|
|
|
|
n for n in os.listdir(dir_path)
|
|
|
|
|
if not n.startswith(".")
|
|
|
|
|
]) if os.path.isdir(dir_path) else 0,
|
|
|
|
|
"summary": summary,
|
|
|
|
|
"dominant_category": "unknown",
|
|
|
|
|
"notable_files": [],
|
|
|
|
|
"cached_at": _now_iso(),
|
|
|
|
|
})
|
|
|
|
|
|
|
|
|
|
cache.update_meta(
|
|
|
|
|
directories_investigated=total + cached_count,
|
|
|
|
|
end_time=_now_iso(),
|
|
|
|
|
)
|
|
|
|
|
|
|
|
|
|
print(" [AI] Synthesis pass...", file=sys.stderr)
|
|
|
|
|
brief, detailed = _run_synthesis(
|
|
|
|
|
client, target, cache, tracker, verbose=verbose,
|
|
|
|
|
)
|
|
|
|
|
|
2026-03-30 19:02:19 +00:00
|
|
|
# Read flags from flags.jsonl
|
|
|
|
|
flags = []
|
|
|
|
|
flags_path = os.path.join(cache.root, "flags.jsonl")
|
|
|
|
|
try:
|
|
|
|
|
with open(flags_path) as f:
|
|
|
|
|
for line in f:
|
|
|
|
|
line = line.strip()
|
|
|
|
|
if line:
|
|
|
|
|
flags.append(json.loads(line))
|
|
|
|
|
except (OSError, json.JSONDecodeError):
|
|
|
|
|
pass
|
|
|
|
|
|
2026-03-30 18:13:55 +00:00
|
|
|
print(f" [AI] Total tokens used: {tracker.summary()}", file=sys.stderr)
|
2026-03-30 16:03:48 +00:00
|
|
|
|
2026-03-30 19:02:19 +00:00
|
|
|
return brief, detailed, flags
|
2026-03-30 16:03:48 +00:00
|
|
|
|
|
|
|
|
|
2026-03-30 18:13:55 +00:00
|
|
|
# ---------------------------------------------------------------------------
|
|
|
|
|
# Cache cleanup
|
|
|
|
|
# ---------------------------------------------------------------------------
|
|
|
|
|
|
|
|
|
|
def clear_cache():
|
|
|
|
|
"""Remove all investigation caches under /tmp/luminos/."""
|
|
|
|
|
import shutil
|
|
|
|
|
if os.path.isdir(CACHE_ROOT):
|
|
|
|
|
shutil.rmtree(CACHE_ROOT)
|
|
|
|
|
print(f"Cleared cache: {CACHE_ROOT}", file=sys.stderr)
|
|
|
|
|
else:
|
|
|
|
|
print(f"No cache to clear ({CACHE_ROOT} does not exist).",
|
|
|
|
|
file=sys.stderr)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
# ---------------------------------------------------------------------------
|
|
|
|
|
# Public interface
|
|
|
|
|
# ---------------------------------------------------------------------------
|
|
|
|
|
|
|
|
|
|
def analyze_directory(report, target, verbose_tools=False, fresh=False):
|
2026-03-30 19:02:19 +00:00
|
|
|
"""Run AI analysis on the directory. Returns (brief, detailed, flags).
|
2026-03-30 16:03:48 +00:00
|
|
|
|
2026-03-30 19:02:19 +00:00
|
|
|
Returns ("", "", []) if the API key is missing or dependencies are not met.
|
2026-03-30 16:03:48 +00:00
|
|
|
"""
|
2026-03-30 18:13:55 +00:00
|
|
|
if not check_ai_dependencies():
|
|
|
|
|
sys.exit(1)
|
|
|
|
|
|
2026-03-30 16:03:48 +00:00
|
|
|
api_key = _get_api_key()
|
|
|
|
|
if not api_key:
|
2026-03-30 19:02:19 +00:00
|
|
|
return "", "", []
|
2026-03-30 16:03:48 +00:00
|
|
|
|
2026-03-30 18:13:55 +00:00
|
|
|
print(" [AI] Starting multi-pass investigation...", file=sys.stderr)
|
|
|
|
|
|
|
|
|
|
client = anthropic.Anthropic(api_key=api_key)
|
|
|
|
|
|
|
|
|
|
try:
|
2026-03-30 19:02:19 +00:00
|
|
|
brief, detailed, flags = _run_investigation(
|
2026-03-30 18:13:55 +00:00
|
|
|
client, target, report, fresh=fresh, verbose=verbose_tools,
|
|
|
|
|
)
|
|
|
|
|
except Exception as e:
|
|
|
|
|
print(f"Warning: AI analysis failed: {e}", file=sys.stderr)
|
2026-03-30 19:02:19 +00:00
|
|
|
return "", "", []
|
2026-03-30 16:03:48 +00:00
|
|
|
|
2026-03-30 18:13:55 +00:00
|
|
|
if not brief and not detailed:
|
|
|
|
|
print(" [AI] Warning: agent produced no output.", file=sys.stderr)
|
|
|
|
|
|
|
|
|
|
print(" [AI] Investigation complete.", file=sys.stderr)
|
2026-03-30 19:02:19 +00:00
|
|
|
return brief, detailed, flags
|