From 6ff1a6af3d01bc68db66a33c128ab562d8aa29fa Mon Sep 17 00:00:00 2001
From: Jeff Smith <jeff@unbiasedgeek.com>
Date: Wed, 8 Apr 2026 15:29:22 -0600
Subject: [PATCH] Enforce token_budget before each iteration (#17)

The loop previously checked the token budget at the *bottom* of each
iteration, after the LLM call and tool work had already happened. By
the time the cap was caught the budget had been exceeded and the
overshoot was unbounded by the iteration's cost.

Move the check to the *top* of the loop so a new iteration is never
started past the budget. Document the policy explicitly: token_budget
is a soft cap on the tool-use loop only; the synthesis call is always
allowed to complete so callers get a structured ResearchResult rather
than a fallback stub. Capping synthesis is a separate, larger design
question (would require splitting the budget between loop and
synthesis up-front).

Verified: token_budget=5000, max_iterations=10 now stops after 2
iterations with budget_exhausted=True and a complete answer with
10 citations.

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
---
 researchers/web/agent.py | 26 +++++++++++++++++---------
 1 file changed, 17 insertions(+), 9 deletions(-)

diff --git a/researchers/web/agent.py b/researchers/web/agent.py
index b3c0d1b..284b175 100644
--- a/researchers/web/agent.py
+++ b/researchers/web/agent.py
@@ -219,7 +219,24 @@ class WebResearcher:
         messages = [{"role": "user", "content": user_message}]
 
         # --- Tool-use loop ---
+        # Budget policy: the loop honors token_budget as a soft cap. Before
+        # starting a new iteration we check whether we've already hit the
+        # budget; if so we stop and let synthesis run on whatever evidence
+        # we already have. Synthesis tokens are tracked but not capped here
+        # — the synthesis call is always allowed to complete so the caller
+        # gets a structured result rather than a stub.
         while iterations < constraints.max_iterations:
+            if total_tokens >= constraints.token_budget:
+                budget_exhausted = True
+                trace.log_step(
+                    "budget_exhausted",
+                    decision=(
+                        f"Token budget reached before iteration "
+                        f"{iterations + 1}: {total_tokens}/{constraints.token_budget}"
+                    ),
+                )
+                break
+
             iterations += 1
 
             trace.log_step(
@@ -275,15 +292,6 @@ class WebResearcher:
             messages.append({"role": "assistant", "content": response.content})
             messages.append({"role": "user", "content": tool_results})
 
-            # Check token budget
-            if total_tokens >= constraints.token_budget:
-                budget_exhausted = True
-                trace.log_step(
-                    "budget_exhausted",
-                    decision=f"Token budget reached: {total_tokens}/{constraints.token_budget}",
-                )
-                break
-
         # --- Synthesis step ---
         trace.log_step(
             "synthesis_start",
-- 
2.45.2