refactor(ai): extract _run_dir_loop into three focused helpers (#57)

_run_dir_loop was ~160 lines holding four conceptual layers in one function: pre-loop setup, budget check + partial-flush, API call + response printing, and tool dispatch + done detection. Phase 3 dynamic turn allocation will inject more state into the same code path, so this debt is paid before that lands. Three new helpers above _run_dir_loop: - _build_dir_loop_context(): pure setup. Builds the dir context, child summaries, survey block, filtered tool list, system prompt, and seed user message. Returns a _DirLoopContext namedtuple. - _flush_partial_dir_entry(): idempotent partial-cache writer for the budget-exceeded path. Returns the partial summary string. Idempotent via cache.has_entry() guard, so callers can call it without checking. - _handle_turn_response(): per-turn response processing. Prints text blocks and tool decisions, appends the assistant message, dispatches tools (or nudges the agent to call submit_report), appends tool_results. Returns (done, summary). _run_dir_loop is now a ~25-line coordinator: build context, then for-loop calls budget check, API, and turn handler in sequence. No behavior change. 164 tests pass. Internals.md §4 updated for the new structure and the file:line refs that drifted.
2026-04-11 10:02:21 -06:00 · 2026-04-11 10:02:21 -06:00 · 427f66b488
commit 427f66b488
parent 68f327243c
1 changed files with 157 additions and 107 deletions
--- a/luminos_lib/ai.py
+++ b/luminos_lib/ai.py
@ -15,6 +15,7 @@ import json
 import os
 import subprocess
 import sys
 from collections import namedtuple
 from datetime import datetime, timezone
 import anthropic
@ -846,9 +847,18 @@ def _filter_dir_tools(survey):
    return [t for t in _DIR_TOOLS if t["name"] not in skip]
-def _run_dir_loop(client, target, cache, tracker, dir_path, max_turns=14,
+_DirLoopContext = namedtuple(
-                  verbose=False, survey=None):
+    "_DirLoopContext", ["dir_rel", "system", "dir_tools", "messages"],
-    """Run an isolated agent loop for a single directory."""
+)
 def _build_dir_loop_context(dir_path, target, cache, survey, max_turns):
    """Assemble the static inputs the dir loop needs before its first turn.
    Pure data assembly: reads the cache for child summaries, builds the
    formatted system prompt, filters the tool list, and returns the seed
    user message. No writes.
    """
    dir_rel = os.path.relpath(dir_path, target)
    if dir_rel == ".":
        dir_rel = os.path.basename(target)
@ -878,131 +888,171 @@ def _run_dir_loop(client, target, cache, tracker, dir_path, max_turns=14,
        },
    ]
    return _DirLoopContext(
        dir_rel=dir_rel, system=system, dir_tools=dir_tools, messages=messages,
    )
 def _flush_partial_dir_entry(dir_path, target, cache):
    """Write a partial dir cache entry from any already-cached file entries.
    Called when the per-loop context budget is exceeded before the agent
    reaches submit_report. Idempotent: returns "" without writing if a dir
    entry already exists. Returns the partial summary string (empty if no
    file entries were available to synthesize from).
    """
    if cache.has_entry("dir", dir_path):
        return ""
    dir_real = os.path.realpath(dir_path)
    file_entries = [
        e for e in cache.read_all_entries("file")
        if os.path.realpath(e.get("path", "")).startswith(dir_real + os.sep)
        or os.path.dirname(
            os.path.join(target, e.get("relative_path", ""))
        ) == dir_real
    ]
    if file_entries:
        file_summaries = [
            e["summary"] for e in file_entries if e.get("summary")
        ]
        notable = [
            e.get("relative_path", e.get("path", ""))
            for e in file_entries if e.get("notable")
        ]
        partial_summary = " ".join(file_summaries)
        cache.write_entry("dir", dir_path, {
            "path": dir_path,
            "relative_path": os.path.relpath(dir_path, target),
            "child_count": len([
                n for n in os.listdir(dir_path)
                if not n.startswith(".")
            ]) if os.path.isdir(dir_path) else 0,
            "summary": partial_summary,
            "dominant_category": "unknown",
            "notable_files": notable,
            "partial": True,
            "partial_reason": "context budget reached",
            "cached_at": _now_iso(),
        })
        return partial_summary
    cache.write_entry("dir", dir_path, {
        "path": dir_path,
        "relative_path": os.path.relpath(dir_path, target),
        "child_count": 0,
        "summary": ("Investigation incomplete — context budget "
                    "reached before any files were processed."),
        "dominant_category": "unknown",
        "notable_files": [],
        "partial": True,
        "partial_reason": (
            "context budget reached before files processed"),
        "cached_at": _now_iso(),
    })
    return ""
 def _handle_turn_response(content_blocks, messages, target, cache, dir_rel,
                          turn, verbose):
    """Process one turn's response: print, append, dispatch tools.
    Mutates `messages` in place: appends the assistant message, then either
    a "please call submit_report" nudge (no tool_uses) or the tool_results
    user message. Recognizes submit_report as the loop's done signal and
    extracts its summary. Returns (done, summary).
    """
    for b in content_blocks:
        if b.type == "text" and b.text.strip():
            for line in b.text.strip().split("\n"):
                print(f"  [AI]   {line}", file=sys.stderr)
    tool_uses = [b for b in content_blocks if b.type == "tool_use"]
    for tu in tool_uses:
        arg_summary = ", ".join(
            f"{k}={v!r}" for k, v in tu.input.items() if k != "data"
        ) if tu.input else ""
        print(f"  [AI]   -> {tu.name}({arg_summary})", file=sys.stderr)
    messages.append({
        "role": "assistant",
        "content": [_block_to_dict(b) for b in content_blocks],
    })
    if not tool_uses:
        messages.append({
            "role": "user",
            "content": "Please call submit_report with your summary.",
        })
        return False, None
    tool_results = []
    done = False
    summary = None
    for tu in tool_uses:
        if tu.name == "submit_report":
            summary = tu.input.get("summary", "")
            tool_results.append({
                "type": "tool_result",
                "tool_use_id": tu.id,
                "content": "Summary submitted.",
            })
            done = True
        else:
            result_text = _execute_tool(
                tu.name, tu.input, target, cache, dir_rel,
                turn + 1, verbose=verbose,
            )
            tool_results.append({
                "type": "tool_result",
                "tool_use_id": tu.id,
                "content": result_text,
            })
    messages.append({"role": "user", "content": tool_results})
    return done, summary
 def _run_dir_loop(client, target, cache, tracker, dir_path, max_turns=14,
                  verbose=False, survey=None):
    """Run an isolated agent loop for a single directory."""
    ctx = _build_dir_loop_context(
        dir_path, target, cache, survey, max_turns,
    )
    tracker.reset_loop()
    summary = None
    for turn in range(max_turns):
        # Check context budget
        if tracker.budget_exceeded():
            print(f"  [AI]   Context budget reached — exiting early "
                  f"(context size {tracker.last_input:,} > "
                  f"{CONTEXT_BUDGET:,} budget; "
                  f"loop spend {tracker.loop_total:,} tokens)",
                  file=sys.stderr)
-            # Flush a partial directory summary from cached file entries
+            partial = _flush_partial_dir_entry(dir_path, target, cache)
-            if not cache.has_entry("dir", dir_path):
+            if partial and not summary:
-                dir_real = os.path.realpath(dir_path)
+                summary = partial
                file_entries = [
                    e for e in cache.read_all_entries("file")
                    if os.path.realpath(e.get("path", "")).startswith(
                        dir_real + os.sep)
                    or os.path.dirname(
                        os.path.join(target, e.get("relative_path", ""))
                    ) == dir_real
                ]
                if file_entries:
                    file_summaries = [
                        e["summary"] for e in file_entries if e.get("summary")
                    ]
                    notable = [
                        e.get("relative_path", e.get("path", ""))
                        for e in file_entries if e.get("notable")
                    ]
                    partial_summary = " ".join(file_summaries)
                    cache.write_entry("dir", dir_path, {
                        "path": dir_path,
                        "relative_path": os.path.relpath(dir_path, target),
                        "child_count": len([
                            n for n in os.listdir(dir_path)
                            if not n.startswith(".")
                        ]) if os.path.isdir(dir_path) else 0,
                        "summary": partial_summary,
                        "dominant_category": "unknown",
                        "notable_files": notable,
                        "partial": True,
                        "partial_reason": "context budget reached",
                        "cached_at": _now_iso(),
                    })
                    if not summary:
                        summary = partial_summary
                else:
                    cache.write_entry("dir", dir_path, {
                        "path": dir_path,
                        "relative_path": os.path.relpath(dir_path, target),
                        "child_count": 0,
                        "summary": ("Investigation incomplete — context budget "
                                    "reached before any files were processed."),
                        "dominant_category": "unknown",
                        "notable_files": [],
                        "partial": True,
                        "partial_reason": (
                            "context budget reached before files processed"),
                        "cached_at": _now_iso(),
                    })
            break
        try:
-            content_blocks, usage = _call_api_streaming(
+            content_blocks, _usage = _call_api_streaming(
-                client, system, messages, dir_tools, tracker,
+                client, ctx.system, ctx.messages, ctx.dir_tools, tracker,
            )
        except anthropic.APIError as e:
            print(f"  [AI]   API error: {e}", file=sys.stderr)
            break
-        # Print text blocks (step numbering, reasoning) to stderr
+        done, turn_summary = _handle_turn_response(
-        for b in content_blocks:
+            content_blocks, ctx.messages, target, cache,
-            if b.type == "text" and b.text.strip():
+            ctx.dir_rel, turn, verbose,
-                for line in b.text.strip().split("\n"):
+        )
-                    print(f"  [AI]   {line}", file=sys.stderr)
+        if turn_summary is not None:
-
+            summary = turn_summary
        # Print tool decisions now that we have the full response
        tool_uses = [b for b in content_blocks if b.type == "tool_use"]
        for tu in tool_uses:
            arg_summary = ", ".join(
                f"{k}={v!r}" for k, v in tu.input.items() if k != "data"
            ) if tu.input else ""
            print(f"  [AI]   -> {tu.name}({arg_summary})", file=sys.stderr)
        messages.append({
            "role": "assistant",
            "content": [_block_to_dict(b) for b in content_blocks],
        })
        if not tool_uses:
            messages.append({
                "role": "user",
                "content": "Please call submit_report with your summary.",
            })
            continue
        tool_results = []
        done = False
        for tu in tool_uses:
            if tu.name == "submit_report":
                summary = tu.input.get("summary", "")
                tool_results.append({
                    "type": "tool_result",
                    "tool_use_id": tu.id,
                    "content": "Summary submitted.",
                })
                done = True
            else:
                result_text = _execute_tool(
                    tu.name, tu.input, target, cache, dir_rel,
                    turn + 1, verbose=verbose,
                )
                tool_results.append({
                    "type": "tool_result",
                    "tool_use_id": tu.id,
                    "content": result_text,
                })
        messages.append({"role": "user", "content": tool_results})
        if done:
            break
    else:
-        print(f"  [AI]   Warning: max turns reached for {dir_rel}",
+        print(f"  [AI]   Warning: max turns reached for {ctx.dir_rel}",
              file=sys.stderr)
    return summary