From 2e3d21f7742a3ead63e81441e8aea280d730974e Mon Sep 17 00:00:00 2001
From: Jeff Smith <jeff@unbiasedgeek.com>
Date: Mon, 6 Apr 2026 22:07:12 -0600
Subject: [PATCH] feat(ai): wire survey output into dir loop (#6)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The survey pass now actually steers dir loop behavior, in two ways:

1. Prompt injection: a new {survey_context} placeholder in
   _DIR_SYSTEM_PROMPT receives the survey description, approach,
   domain_notes, relevant_tools, and skip_tools so the dir-loop agent
   has investigation context before its first turn.

2. Tool schema filtering: _filter_dir_tools() removes any tool listed
   in skip_tools from the schema passed to the API, gated on
   survey confidence >= 0.5. Control-flow tools (submit_report) are
   always preserved. This is hard enforcement — the agent literally
   cannot call a filtered tool, which the smoke test for #5 showed
   was necessary (prompt-only guidance was ignored).

Smoke test on luminos_lib: zero run_command invocations (vs 2 before),
context budget no longer exhausted (87k vs 133k), cost ~$0.34 (vs
$0.46), investigation completes instead of early-exiting.

Adds tests/test_ai_filter.py with 14 tests covering _filter_dir_tools
and _format_survey_block — both pure helpers, no live API needed.
---
 luminos_lib/ai.py       |  54 ++++++++++++++++++-
 luminos_lib/prompts.py  |   3 ++
 tests/test_ai_filter.py | 112 ++++++++++++++++++++++++++++++++++++++++
 3 files changed, 167 insertions(+), 2 deletions(-)
 create mode 100644 tests/test_ai_filter.py

diff --git a/luminos_lib/ai.py b/luminos_lib/ai.py
index 38c707c..bc330a1 100644
--- a/luminos_lib/ai.py
+++ b/luminos_lib/ai.py
@@ -753,8 +753,54 @@ def _get_child_summaries(dir_path, cache):
     return "\n".join(parts) if parts else "(none — this is a leaf directory)"
 
 
+_SURVEY_CONFIDENCE_THRESHOLD = 0.5
+_PROTECTED_DIR_TOOLS = {"submit_report"}
+
+
+def _format_survey_block(survey):
+    """Render survey output as a labeled text block for the dir prompt."""
+    if not survey:
+        return "(no survey available)"
+    lines = [
+        f"Description: {survey.get('description', '')}",
+        f"Approach: {survey.get('approach', '')}",
+    ]
+    notes = survey.get("domain_notes", "")
+    if notes:
+        lines.append(f"Domain notes: {notes}")
+    relevant = survey.get("relevant_tools") or []
+    if relevant:
+        lines.append(f"Relevant tools (lean on these): {', '.join(relevant)}")
+    skip = survey.get("skip_tools") or []
+    if skip:
+        lines.append(f"Skip tools (already removed from your toolbox): "
+                     f"{', '.join(skip)}")
+    return "\n".join(lines)
+
+
+def _filter_dir_tools(survey):
+    """Return _DIR_TOOLS with skip_tools removed, gated on confidence.
+
+    - Returns full list if survey is None or confidence < threshold.
+    - Always preserves control-flow tools in _PROTECTED_DIR_TOOLS.
+    - Tool names in skip_tools that don't match anything are silently ignored.
+    """
+    if not survey:
+        return list(_DIR_TOOLS)
+    try:
+        confidence = float(survey.get("confidence", 0.0) or 0.0)
+    except (TypeError, ValueError):
+        confidence = 0.0
+    if confidence < _SURVEY_CONFIDENCE_THRESHOLD:
+        return list(_DIR_TOOLS)
+    skip = set(survey.get("skip_tools") or []) - _PROTECTED_DIR_TOOLS
+    if not skip:
+        return list(_DIR_TOOLS)
+    return [t for t in _DIR_TOOLS if t["name"] not in skip]
+
+
 def _run_dir_loop(client, target, cache, tracker, dir_path, max_turns=14,
-                  verbose=False):
+                  verbose=False, survey=None):
     """Run an isolated agent loop for a single directory."""
     dir_rel = os.path.relpath(dir_path, target)
     if dir_rel == ".":
@@ -762,6 +808,8 @@ def _run_dir_loop(client, target, cache, tracker, dir_path, max_turns=14,
 
     context = _build_dir_context(dir_path)
     child_summaries = _get_child_summaries(dir_path, cache)
+    survey_context = _format_survey_block(survey)
+    dir_tools = _filter_dir_tools(survey)
 
     system = _DIR_SYSTEM_PROMPT.format(
         dir_path=dir_path,
@@ -769,6 +817,7 @@ def _run_dir_loop(client, target, cache, tracker, dir_path, max_turns=14,
         max_turns=max_turns,
         context=context,
         child_summaries=child_summaries,
+        survey_context=survey_context,
     )
 
     messages = [
@@ -844,7 +893,7 @@ def _run_dir_loop(client, target, cache, tracker, dir_path, max_turns=14,
 
         try:
             content_blocks, usage = _call_api_streaming(
-                client, system, messages, _DIR_TOOLS, tracker,
+                client, system, messages, dir_tools, tracker,
             )
         except anthropic.APIError as e:
             print(f"  [AI]   API error: {e}", file=sys.stderr)
@@ -1229,6 +1278,7 @@ def _run_investigation(client, target, report, show_hidden=False,
 
         summary = _run_dir_loop(
             client, target, cache, tracker, dir_path, verbose=verbose,
+            survey=survey,
         )
 
         if summary and not cache.has_entry("dir", dir_path):
diff --git a/luminos_lib/prompts.py b/luminos_lib/prompts.py
index c695d3f..eb49100 100644
--- a/luminos_lib/prompts.py
+++ b/luminos_lib/prompts.py
@@ -73,6 +73,9 @@ why you are uncertain (e.g. "binary file, content not readable",
 "file truncated at max_bytes"). Do NOT set confidence_reason when
 confidence is 0.7 or above.
 
+## Survey Context
+{survey_context}
+
 ## Context
 {context}
 
diff --git a/tests/test_ai_filter.py b/tests/test_ai_filter.py
new file mode 100644
index 0000000..b396710
--- /dev/null
+++ b/tests/test_ai_filter.py
@@ -0,0 +1,112 @@
+"""Tests for the pure helpers in ai.py that don't require a live API."""
+
+import unittest
+from unittest.mock import MagicMock
+import sys
+
+
+def _import_ai():
+    # Stub heavy/optional deps so ai.py imports cleanly in unit tests.
+    for mod in ("anthropic", "magic"):
+        if mod not in sys.modules:
+            sys.modules[mod] = MagicMock()
+    if "luminos_lib.ast_parser" not in sys.modules:
+        stub = MagicMock()
+        stub.parse_structure = MagicMock()
+        sys.modules["luminos_lib.ast_parser"] = stub
+    from luminos_lib import ai
+    return ai
+
+
+ai = _import_ai()
+
+
+class FilterDirToolsTests(unittest.TestCase):
+    def setUp(self):
+        self.all_names = {t["name"] for t in ai._DIR_TOOLS}
+
+    def _names(self, tools):
+        return {t["name"] for t in tools}
+
+    def test_none_survey_returns_full_list(self):
+        self.assertEqual(self._names(ai._filter_dir_tools(None)), self.all_names)
+
+    def test_low_confidence_returns_full_list(self):
+        survey = {"confidence": 0.3, "skip_tools": ["run_command"]}
+        self.assertEqual(self._names(ai._filter_dir_tools(survey)), self.all_names)
+
+    def test_high_confidence_drops_skip_tools(self):
+        survey = {"confidence": 0.9, "skip_tools": ["run_command"]}
+        result = self._names(ai._filter_dir_tools(survey))
+        self.assertNotIn("run_command", result)
+        self.assertEqual(result, self.all_names - {"run_command"})
+
+    def test_threshold_boundary_inclusive(self):
+        survey = {"confidence": 0.5, "skip_tools": ["run_command"]}
+        result = self._names(ai._filter_dir_tools(survey))
+        self.assertNotIn("run_command", result)
+
+    def test_protected_tool_never_dropped(self):
+        survey = {"confidence": 1.0, "skip_tools": ["submit_report", "run_command"]}
+        result = self._names(ai._filter_dir_tools(survey))
+        self.assertIn("submit_report", result)
+        self.assertNotIn("run_command", result)
+
+    def test_unknown_tool_in_skip_is_ignored(self):
+        survey = {"confidence": 0.9, "skip_tools": ["nonexistent_tool"]}
+        self.assertEqual(self._names(ai._filter_dir_tools(survey)), self.all_names)
+
+    def test_empty_skip_tools_returns_full_list(self):
+        survey = {"confidence": 0.9, "skip_tools": []}
+        self.assertEqual(self._names(ai._filter_dir_tools(survey)), self.all_names)
+
+    def test_missing_confidence_treated_as_zero(self):
+        survey = {"skip_tools": ["run_command"]}
+        self.assertEqual(self._names(ai._filter_dir_tools(survey)), self.all_names)
+
+    def test_garbage_confidence_treated_as_zero(self):
+        survey = {"confidence": "not a number", "skip_tools": ["run_command"]}
+        self.assertEqual(self._names(ai._filter_dir_tools(survey)), self.all_names)
+
+    def test_multiple_skip_tools(self):
+        survey = {
+            "confidence": 0.9,
+            "skip_tools": ["run_command", "parse_structure"],
+        }
+        result = self._names(ai._filter_dir_tools(survey))
+        self.assertNotIn("run_command", result)
+        self.assertNotIn("parse_structure", result)
+
+
+class FormatSurveyBlockTests(unittest.TestCase):
+    def test_none_returns_placeholder(self):
+        self.assertIn("no survey", ai._format_survey_block(None).lower())
+
+    def test_includes_description_and_approach(self):
+        block = ai._format_survey_block({
+            "description": "A Python lib", "approach": "read modules",
+            "confidence": 0.9,
+        })
+        self.assertIn("A Python lib", block)
+        self.assertIn("read modules", block)
+
+    def test_includes_skip_tools_when_present(self):
+        block = ai._format_survey_block({
+            "description": "x", "approach": "y",
+            "skip_tools": ["run_command"], "confidence": 0.9,
+        })
+        self.assertIn("run_command", block)
+
+    def test_omits_empty_optional_fields(self):
+        block = ai._format_survey_block({
+            "description": "x", "approach": "y",
+            "domain_notes": "", "relevant_tools": [], "skip_tools": [],
+            "confidence": 0.9,
+        })
+        self.assertNotIn("Domain notes:", block)
+        self.assertNotIn("Relevant tools", block)
+        self.assertNotIn("Skip tools", block)
+
+
+if __name__ == "__main__":
+    unittest.main()