From efaa2024d76d77bc6880728e7870eb4405c6ead4 Mon Sep 17 00:00:00 2001 From: Jeff Smith Date: Sat, 11 Apr 2026 10:41:16 -0600 Subject: [PATCH] test(ai): cover _TokenTracker, _synthesize_from_cache, _discover_directories (#70) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Second wave of pre-Phase-3 test coverage. The #55 round picked off the easy decision-logic helpers; this round covers the three highest-impact helpers that escaped the first sweep. Three new test classes appended to tests/test_ai_pure.py: - TestTokenTracker (11 tests) Pins the load-bearing #44 fix: budget_exceeded() must use last_input (the most recent call's context size) NOT cumulative input, because each turn's input_tokens already includes the full message history. Tests assert: cumulative-input far above budget does NOT trip the gate when last_input stays small; reset_loop() preserves grand totals; the boundary is strict > not >=. - TestSynthesizeFromCache (5 tests) The synthesis fallback fires only when _run_synthesis exhausts its max_turns, which almost never happens in normal runs — exactly the kind of code that silently rots. Tests assert: empty cache returns the incomplete-message brief and empty detailed; single dir entry produces a markdown line; multi-entry detailed contains all entries; empty-summary entries are skipped; file entries alone do not satisfy (the function reads dir entries only). - TestDiscoverDirectories (9 tests) The leaves-first walk drives the entire dir-loop iteration order and is the foundation of the cache reuse story. Tests assert: empty target returns target only; nested trees come back leaves- first; .git / __pycache__ / node_modules / *.egg-info excluded; custom --exclude honored; hidden dirs excluded by default; show_ hidden=True includes them but does not override the skip list. PLAN.md: added Phase 2.7 (#56 ✅) and Phase 2.8 (#55 ✅, #70) entries to the implementation order, and removed the now-stale Phase 3.4 (#56) and Background chore (#55) sections that were displaced by the pre-Phase-3 cleanup pattern. Verification: 234 tests pass (209 prior + 25 new). --- PLAN.md | 69 +++++----- tests/test_ai_pure.py | 297 ++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 334 insertions(+), 32 deletions(-) diff --git a/PLAN.md b/PLAN.md index b402a0e..cabb149 100644 --- a/PLAN.md +++ b/PLAN.md @@ -576,21 +576,45 @@ without 9 phases of rework. output size, redundant reads) before picking a fix. - Add token-usage instrumentation so regressions are visible. -### Phase 2.6 — Pre-Phase-3 cleanup (#54, #57) +### Phase 2.6 — Pre-Phase-3 cleanup (#54, #57) ✅ shipped Two debts surfaced during the Session 5 documentation deep dive that -should be paid before Phase 3 adds more state to the same code paths: +were paid before Phase 3 adds more state to the same code paths: -- **#54 — Phase 1 confidence-write path is dormant.** The cache schema - accepts `confidence` and `low_confidence_entries()` works, but no - prompt instructs the agent to set the field. Phase 1 is half-shipped. - Wire the prompt before Phase 8 (refinement) tries to consume the - signal — preferably now while Phase 1 context is still fresh. -- **#57 — Refactor `_run_dir_loop` before #10 lands.** The function is - at its readability ceiling (~160 lines, four conceptual layers). - Phase 3 #10 (dynamic turn allocation) will inject per-dir `max_turns` - and possibly mid-loop renegotiation. Extract `_build_dir_loop_context`, - `_check_budget_and_flush_partial`, and `_handle_turn_response` into - focused helpers first; refactoring after #10 will be harder. +- **#54** — Phase 1 confidence-write path was dormant. Cache schema + accepted `confidence` and `low_confidence_entries()` worked, but no + prompt instructed the agent to set the field. Wired in Session 8. +- **#57** — `_run_dir_loop` was ~160 lines holding four conceptual + layers. Refactored in Session 9 into three focused helpers + (`_build_dir_loop_context`, `_flush_partial_dir_entry`, + `_handle_turn_response`) so Phase 3 dynamic turn allocation has a + thin coordinator to inject into. + +### Phase 2.7 — Tool registration cleanup (#56) ✅ shipped +Adding a tool used to require updating `_TOOL_DISPATCH` and `_DIR_TOOLS` +in two separate places. Forgetting one half was silent. Replaced both +with a single `register_tool()` call per (tool, scope) in Session 9. +Phase 3.5 MCP backend will eventually replace this with dynamic +discovery, at which point `register_tool()` collapses to a one-line +forward. + +### Phase 2.8 — Pre-Phase-3 test coverage (#55, #70) +Safety nets for the helpers Phase 3 will pile state on top of. Two +waves: + +- **#55** ✅ shipped — `tests/test_ai_pure.py` covers the easy + decision-logic helpers: `_filter_dir_tools`, `_format_survey_block`, + `_format_survey_signals`, `_default_survey`, `_should_skip_dir`, + `_path_is_safe`, `_block_to_dict`, plus `_flush_partial_dir_entry` + from #57. 45 tests added in Session 9. +- **#70** — second wave covering the highest-impact remaining helpers + that escaped the first sweep: + - `_TokenTracker` — pins the load-bearing #44 fix + (`last_input` vs cumulative for budget decisions) + - `_synthesize_from_cache` — last-resort fallback that fires almost + never in normal runs and is therefore the kind of code that silently + rots + - `_discover_directories` — leaves-first walk and skip-dir filter, + foundation of the cache reuse story ### Phase 3 — Investigation planning - Planning pass after survey, before dir loops @@ -598,17 +622,6 @@ should be paid before Phase 3 adds more state to the same code paths: - Dynamic turn allocation based on plan - Dir loop orchestrator updated to follow plan -### Phase 3.4 — Tool registration cleanup (#56) -Adding a tool currently requires updating `_TOOL_DISPATCH` and -`_DIR_TOOLS` in two separate places in `ai.py`. Forgetting one half is -silent. A small `@dir_tool` decorator collapses this into one -declaration. **Decision point:** Phase 3.5 (MCP) will replace -`_TOOL_DISPATCH` with dynamic discovery from an MCP server, which makes -this issue partially moot. Either fix now (easier to migrate one -well-structured registry to MCP than two parallel structures) or defer -and let it die with the Python dispatch table. Decide before starting -3.5. - ### Phase 3.5 — MCP backend abstraction (pivot point) See Part 10 for full design. This phase happens *after* Phase 3 is working and *before* Phase 4. The goal is to migrate the filesystem @@ -671,14 +684,6 @@ architecture. The migration pain is intentional and instructive. - Report formatter renders populated fields only - Domain-appropriate section headers -### Background chore — Unit tests for pure helpers in ai.py (#55) -`ai.py` is documented as exempt from unit testing because the dir loop -needs a live API. But several pure helpers (`_filter_dir_tools`, -`_format_survey_block`, `_format_survey_signals`, `_default_survey`, -`_should_skip_dir`, `_path_is_safe`, `_block_to_dict`) have no API -dependency and can be tested directly. Low priority, not phase-blocking, -fold into any session that touches these helpers. - ### End-of-project tuning - **Honest terminal report file-type view (#49)** — the report still shows the bucketed `summarize_categories()` view, which collapses diff --git a/tests/test_ai_pure.py b/tests/test_ai_pure.py index 455bee5..fbc64b8 100644 --- a/tests/test_ai_pure.py +++ b/tests/test_ai_pure.py @@ -13,17 +13,21 @@ import unittest from types import SimpleNamespace from luminos_lib.ai import ( + CONTEXT_BUDGET, _DIR_TOOLS, _PROTECTED_DIR_TOOLS, _SURVEY_CONFIDENCE_THRESHOLD, + _TokenTracker, _block_to_dict, _default_survey, + _discover_directories, _filter_dir_tools, _flush_partial_dir_entry, _format_survey_block, _format_survey_signals, _path_is_safe, _should_skip_dir, + _synthesize_from_cache, ) from luminos_lib.cache import _CacheManager @@ -420,5 +424,298 @@ class TestFlushPartialDirEntry(unittest.TestCase): self.assertIn("subdir/important.py", entry["notable_files"]) +# --------------------------------------------------------------------------- +# _TokenTracker (added by #70) +# --------------------------------------------------------------------------- + +def _usage(input_tokens=0, output_tokens=0): + """Build a fake Anthropic SDK usage object for the tracker tests.""" + return SimpleNamespace( + input_tokens=input_tokens, output_tokens=output_tokens, + ) + + +class TestTokenTracker(unittest.TestCase): + def test_initial_state_is_zero(self): + t = _TokenTracker() + self.assertEqual(t.total_input, 0) + self.assertEqual(t.total_output, 0) + self.assertEqual(t.loop_input, 0) + self.assertEqual(t.loop_output, 0) + self.assertEqual(t.last_input, 0) + self.assertEqual(t.loop_total, 0) + self.assertFalse(t.budget_exceeded()) + + def test_record_updates_all_counters(self): + t = _TokenTracker() + t.record(_usage(input_tokens=1000, output_tokens=200)) + self.assertEqual(t.total_input, 1000) + self.assertEqual(t.total_output, 200) + self.assertEqual(t.loop_input, 1000) + self.assertEqual(t.loop_output, 200) + self.assertEqual(t.last_input, 1000) + + def test_loop_total_property(self): + t = _TokenTracker() + t.record(_usage(input_tokens=300, output_tokens=70)) + self.assertEqual(t.loop_total, 370) + + def test_record_with_missing_attrs_defaults_to_zero(self): + t = _TokenTracker() + t.record(SimpleNamespace()) + self.assertEqual(t.total_input, 0) + self.assertEqual(t.total_output, 0) + self.assertEqual(t.last_input, 0) + + def test_multiple_records_accumulate(self): + t = _TokenTracker() + t.record(_usage(input_tokens=500, output_tokens=100)) + t.record(_usage(input_tokens=700, output_tokens=200)) + self.assertEqual(t.total_input, 1200) + self.assertEqual(t.total_output, 300) + # last_input is the most recent call, NOT the cumulative sum. + self.assertEqual(t.last_input, 700) + + def test_reset_loop_zeros_loop_counters_preserves_totals(self): + t = _TokenTracker() + t.record(_usage(input_tokens=500, output_tokens=100)) + t.record(_usage(input_tokens=300, output_tokens=50)) + t.reset_loop() + # Loop counters cleared. + self.assertEqual(t.loop_input, 0) + self.assertEqual(t.loop_output, 0) + self.assertEqual(t.last_input, 0) + # Cumulative totals preserved. + self.assertEqual(t.total_input, 800) + self.assertEqual(t.total_output, 150) + + def test_grand_totals_accumulate_across_loops(self): + t = _TokenTracker() + t.record(_usage(input_tokens=500, output_tokens=100)) + t.reset_loop() + t.record(_usage(input_tokens=400, output_tokens=80)) + t.reset_loop() + t.record(_usage(input_tokens=200, output_tokens=40)) + self.assertEqual(t.total_input, 1100) + self.assertEqual(t.total_output, 220) + + def test_budget_exceeded_uses_last_input_not_cumulative(self): + # The load-bearing #44 fix: cumulative is meaningless because + # each turn's input_tokens already includes prior history. + t = _TokenTracker() + # Record many small calls whose CUMULATIVE input would exceed + # the budget but whose individual last_input stays small. + for _ in range(10): + t.record(_usage(input_tokens=CONTEXT_BUDGET // 5, output_tokens=0)) + self.assertGreater(t.total_input, CONTEXT_BUDGET) + # last_input is just the most recent call — well under budget. + self.assertEqual(t.last_input, CONTEXT_BUDGET // 5) + self.assertFalse(t.budget_exceeded()) + + def test_budget_exceeded_strict_greater_than(self): + # The gate is `>`, not `>=`. last_input == CONTEXT_BUDGET + # should NOT trip the budget. + t = _TokenTracker() + t.record(_usage(input_tokens=CONTEXT_BUDGET, output_tokens=0)) + self.assertFalse(t.budget_exceeded()) + + def test_budget_exceeded_one_over_trips(self): + t = _TokenTracker() + t.record(_usage(input_tokens=CONTEXT_BUDGET + 1, output_tokens=0)) + self.assertTrue(t.budget_exceeded()) + + def test_summary_returns_nonempty_string(self): + t = _TokenTracker() + t.record(_usage(input_tokens=1000, output_tokens=500)) + out = t.summary() + self.assertIsInstance(out, str) + self.assertIn("1,000", out) + self.assertIn("500", out) + self.assertIn("$", out) + + +# --------------------------------------------------------------------------- +# _synthesize_from_cache (added by #70) +# --------------------------------------------------------------------------- + +class TestSynthesizeFromCache(unittest.TestCase): + def setUp(self): + self.target = tempfile.mkdtemp(prefix="luminos-test-target-") + self.cache = _make_manager(self.target) + + def tearDown(self): + shutil.rmtree(self.target, ignore_errors=True) + + def test_empty_cache_returns_incomplete_message(self): + brief, detailed = _synthesize_from_cache(self.cache) + self.assertIn("incomplete", brief) + self.assertEqual(detailed, "") + + def test_single_dir_entry(self): + self.cache.write_entry("dir", "/x/auth", { + "path": "/x/auth", + "relative_path": "auth", + "child_count": 3, + "summary": "Authentication module.", + "dominant_category": "code", + "cached_at": "2026-04-11T00:00:00+00:00", + }) + brief, detailed = _synthesize_from_cache(self.cache) + self.assertEqual(brief, "Authentication module.") + self.assertIn("**auth/**", detailed) + self.assertIn("Authentication module.", detailed) + + def test_multiple_dir_entries_brief_is_first(self): + for rel, summary in [ + ("auth", "Auth code."), + ("db", "Database layer."), + ("api", "HTTP API."), + ]: + self.cache.write_entry("dir", f"/x/{rel}", { + "path": f"/x/{rel}", + "relative_path": rel, + "child_count": 1, + "summary": summary, + "dominant_category": "code", + "cached_at": "2026-04-11T00:00:00+00:00", + }) + brief, detailed = _synthesize_from_cache(self.cache) + # Brief is the first dir entry's summary. + self.assertIn(brief, {"Auth code.", "Database layer.", "HTTP API."}) + # Detailed includes all three with markdown formatting. + self.assertIn("Auth code.", detailed) + self.assertIn("Database layer.", detailed) + self.assertIn("HTTP API.", detailed) + self.assertIn("**auth/**", detailed) + self.assertIn("**db/**", detailed) + self.assertIn("**api/**", detailed) + + def test_dir_entries_with_empty_summary_are_skipped(self): + self.cache.write_entry("dir", "/x/empty", { + "path": "/x/empty", + "relative_path": "empty", + "child_count": 0, + "summary": "", + "dominant_category": "unknown", + "cached_at": "2026-04-11T00:00:00+00:00", + }) + self.cache.write_entry("dir", "/x/real", { + "path": "/x/real", + "relative_path": "real", + "child_count": 1, + "summary": "Real content.", + "dominant_category": "code", + "cached_at": "2026-04-11T00:00:00+00:00", + }) + brief, detailed = _synthesize_from_cache(self.cache) + self.assertEqual(brief, "Real content.") + self.assertNotIn("**empty/**", detailed) + self.assertIn("**real/**", detailed) + + def test_file_entries_alone_do_not_satisfy(self): + # _synthesize_from_cache reads dir entries only. File entries + # alone should produce the "incomplete" fallback. + self.cache.write_entry("file", "/x/foo.py", { + "path": "/x/foo.py", + "relative_path": "foo.py", + "size_bytes": 10, + "category": "code", + "summary": "A file.", + "cached_at": "2026-04-11T00:00:00+00:00", + }) + brief, detailed = _synthesize_from_cache(self.cache) + self.assertIn("incomplete", brief) + self.assertEqual(detailed, "") + + +# --------------------------------------------------------------------------- +# _discover_directories (added by #70) +# --------------------------------------------------------------------------- + +class TestDiscoverDirectories(unittest.TestCase): + def setUp(self): + self.target = tempfile.mkdtemp(prefix="luminos-test-target-") + + def tearDown(self): + shutil.rmtree(self.target, ignore_errors=True) + + def _mkdirs(self, *rels): + for rel in rels: + os.makedirs(os.path.join(self.target, rel), exist_ok=True) + + def test_empty_target_returns_target_only(self): + result = _discover_directories(self.target) + self.assertEqual(len(result), 1) + self.assertEqual(os.path.realpath(result[0]), + os.path.realpath(self.target)) + + def test_single_subdir(self): + self._mkdirs("sub") + result = _discover_directories(self.target) + self.assertEqual(len(result), 2) + # Leaf-first: "sub" (deeper) comes before target. + self.assertTrue(result[0].endswith("sub")) + + def test_leaves_first_ordering(self): + self._mkdirs("a/b/c", "a/d", "a/b/e") + result = _discover_directories(self.target) + # Compute depth (sep count) of each result. + depths = [d.count(os.sep) for d in result] + # Each successive entry should have depth <= the previous one. + for i in range(len(depths) - 1): + self.assertGreaterEqual( + depths[i], depths[i + 1], + f"Not leaves-first: {result}", + ) + # Verify all expected dirs are present. + rels = [os.path.relpath(d, self.target) for d in result] + for expected in ["a/b/c", "a/b/e", "a/d", "a/b", "a", "."]: + self.assertIn(expected, rels, f"missing {expected} from {rels}") + + def test_skip_dirs_excluded(self): + self._mkdirs(".git", "__pycache__", "node_modules", "src") + result = _discover_directories(self.target) + rels = [os.path.relpath(d, self.target) for d in result] + self.assertIn("src", rels) + self.assertNotIn(".git", rels) + self.assertNotIn("__pycache__", rels) + self.assertNotIn("node_modules", rels) + + def test_egg_info_glob_excluded(self): + self._mkdirs("luminos.egg-info", "src") + result = _discover_directories(self.target) + rels = [os.path.relpath(d, self.target) for d in result] + self.assertNotIn("luminos.egg-info", rels) + self.assertIn("src", rels) + + def test_custom_exclude_honored(self): + self._mkdirs("vendor", "src") + result = _discover_directories(self.target, exclude=["vendor"]) + rels = [os.path.relpath(d, self.target) for d in result] + self.assertNotIn("vendor", rels) + self.assertIn("src", rels) + + def test_hidden_dirs_excluded_by_default(self): + self._mkdirs(".hidden_thing", "visible") + result = _discover_directories(self.target) + rels = [os.path.relpath(d, self.target) for d in result] + self.assertNotIn(".hidden_thing", rels) + self.assertIn("visible", rels) + + def test_show_hidden_includes_dotdirs(self): + self._mkdirs(".hidden_thing", "visible") + result = _discover_directories(self.target, show_hidden=True) + rels = [os.path.relpath(d, self.target) for d in result] + self.assertIn(".hidden_thing", rels) + self.assertIn("visible", rels) + + def test_show_hidden_does_not_override_skip_list(self): + # .git is in _SKIP_DIRS so even with show_hidden=True it stays out. + self._mkdirs(".git") + result = _discover_directories(self.target, show_hidden=True) + rels = [os.path.relpath(d, self.target) for d in result] + self.assertNotIn(".git", rels) + + if __name__ == "__main__": unittest.main()