From 8fb2f90678a3afbeb75eb7cd8f83f520a15f6367 Mon Sep 17 00:00:00 2001 From: Jeff Smith Date: Mon, 6 Apr 2026 22:19:25 -0600 Subject: [PATCH] feat(ai): skip survey pass for tiny targets (#7) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Adds a gate in _run_investigation that skips the survey API call when a target has both fewer than _SURVEY_MIN_FILES (5) files AND fewer than _SURVEY_MIN_DIRS (2) directories. AND semantics handle the deep-narrow edge case correctly: a target with 4 files spread across 50 directories still gets a survey because dir count amortizes the cost across 50 dir loops. When skipped, _default_survey() supplies a synthetic dict with confidence=0.0 — chosen specifically so _filter_dir_tools() never enforces skip_tools from a synthetic value. The dir loop receives a generic "small target, read everything" framing in its prompt and keeps its full toolbox. Reorders _discover_directories() to run before the survey gate so total_dirs is available without a second walk. #46 tracks revisiting the threshold values with empirical data after Phase 2 ships and we've run --ai on a variety of real targets. Smoke tested on a 2-file target: gate triggers, default survey substituted, dir loop completes normally. Adds 4 unit tests for _default_survey() covering schema, confidence guard, filter interaction, and empty skip_tools. --- luminos_lib/ai.py | 44 ++++++++++++++++++++++++++++++++++++----- tests/test_ai_filter.py | 24 ++++++++++++++++++++++ 2 files changed, 63 insertions(+), 5 deletions(-) diff --git a/luminos_lib/ai.py b/luminos_lib/ai.py index bc330a1..4115957 100644 --- a/luminos_lib/ai.py +++ b/luminos_lib/ai.py @@ -756,6 +756,30 @@ def _get_child_summaries(dir_path, cache): _SURVEY_CONFIDENCE_THRESHOLD = 0.5 _PROTECTED_DIR_TOOLS = {"submit_report"} +# Survey-skip thresholds. Skip the survey only when BOTH are below. +# See #46 for the plan to revisit these with empirical data. +_SURVEY_MIN_FILES = 5 +_SURVEY_MIN_DIRS = 2 + + +def _default_survey(): + """Synthetic survey for targets too small to justify the API call. + + confidence=0.0 ensures _filter_dir_tools() never enforces skip_tools + based on this synthetic value — the dir loop keeps its full toolbox. + """ + return { + "description": "Small target — survey skipped.", + "approach": ( + "The target is small enough to investigate exhaustively. " + "Read every file directly." + ), + "relevant_tools": [], + "skip_tools": [], + "domain_notes": "", + "confidence": 0.0, + } + def _format_survey_block(survey): """Render survey output as a labeled text block for the dir prompt.""" @@ -1228,8 +1252,21 @@ def _run_investigation(client, target, report, show_hidden=False, f"{'' if is_new else ' (resumed)'}", file=sys.stderr) print(f" [AI] Cache: {cache.root}/", file=sys.stderr) - print(" [AI] Survey pass...", file=sys.stderr) - survey = _run_survey(client, target, report, tracker, verbose=verbose) + all_dirs = _discover_directories(target, show_hidden=show_hidden, + exclude=exclude) + + total_files = sum((report.get("file_categories") or {}).values()) + total_dirs = len(all_dirs) + if total_files < _SURVEY_MIN_FILES and total_dirs < _SURVEY_MIN_DIRS: + print( + f" [AI] Survey skipped — {total_files} files, {total_dirs} dirs " + f"(below threshold).", + file=sys.stderr, + ) + survey = _default_survey() + else: + print(" [AI] Survey pass...", file=sys.stderr) + survey = _run_survey(client, target, report, tracker, verbose=verbose) if survey: print( f" [AI] Survey: {survey['description']} " @@ -1251,9 +1288,6 @@ def _run_investigation(client, target, report, show_hidden=False, else: print(" [AI] Survey unavailable — proceeding without it.", file=sys.stderr) - all_dirs = _discover_directories(target, show_hidden=show_hidden, - exclude=exclude) - to_investigate = [] cached_count = 0 for d in all_dirs: diff --git a/tests/test_ai_filter.py b/tests/test_ai_filter.py index b396710..466755d 100644 --- a/tests/test_ai_filter.py +++ b/tests/test_ai_filter.py @@ -108,5 +108,29 @@ class FormatSurveyBlockTests(unittest.TestCase): self.assertNotIn("Skip tools", block) +class DefaultSurveyTests(unittest.TestCase): + def test_has_all_required_keys(self): + survey = ai._default_survey() + for key in ("description", "approach", "relevant_tools", + "skip_tools", "domain_notes", "confidence"): + self.assertIn(key, survey) + + def test_confidence_below_filter_threshold(self): + # Must be < _SURVEY_CONFIDENCE_THRESHOLD so _filter_dir_tools() + # never enforces skip_tools from a synthetic survey. + self.assertLess( + ai._default_survey()["confidence"], + ai._SURVEY_CONFIDENCE_THRESHOLD, + ) + + def test_filter_returns_full_toolbox_for_default(self): + all_names = {t["name"] for t in ai._DIR_TOOLS} + result = {t["name"] for t in ai._filter_dir_tools(ai._default_survey())} + self.assertEqual(result, all_names) + + def test_skip_tools_is_empty(self): + self.assertEqual(ai._default_survey()["skip_tools"], []) + + if __name__ == "__main__": unittest.main() -- 2.45.2