From 6ff1a6af3d01bc68db66a33c128ab562d8aa29fa Mon Sep 17 00:00:00 2001 From: Jeff Smith Date: Wed, 8 Apr 2026 15:29:22 -0600 Subject: [PATCH] Enforce token_budget before each iteration (#17) The loop previously checked the token budget at the *bottom* of each iteration, after the LLM call and tool work had already happened. By the time the cap was caught the budget had been exceeded and the overshoot was unbounded by the iteration's cost. Move the check to the *top* of the loop so a new iteration is never started past the budget. Document the policy explicitly: token_budget is a soft cap on the tool-use loop only; the synthesis call is always allowed to complete so callers get a structured ResearchResult rather than a fallback stub. Capping synthesis is a separate, larger design question (would require splitting the budget between loop and synthesis up-front). Verified: token_budget=5000, max_iterations=10 now stops after 2 iterations with budget_exhausted=True and a complete answer with 10 citations. Co-Authored-By: Claude Opus 4.6 (1M context) --- researchers/web/agent.py | 26 +++++++++++++++++--------- 1 file changed, 17 insertions(+), 9 deletions(-) diff --git a/researchers/web/agent.py b/researchers/web/agent.py index b3c0d1b..284b175 100644 --- a/researchers/web/agent.py +++ b/researchers/web/agent.py @@ -219,7 +219,24 @@ class WebResearcher: messages = [{"role": "user", "content": user_message}] # --- Tool-use loop --- + # Budget policy: the loop honors token_budget as a soft cap. Before + # starting a new iteration we check whether we've already hit the + # budget; if so we stop and let synthesis run on whatever evidence + # we already have. Synthesis tokens are tracked but not capped here + # — the synthesis call is always allowed to complete so the caller + # gets a structured result rather than a stub. while iterations < constraints.max_iterations: + if total_tokens >= constraints.token_budget: + budget_exhausted = True + trace.log_step( + "budget_exhausted", + decision=( + f"Token budget reached before iteration " + f"{iterations + 1}: {total_tokens}/{constraints.token_budget}" + ), + ) + break + iterations += 1 trace.log_step( @@ -275,15 +292,6 @@ class WebResearcher: messages.append({"role": "assistant", "content": response.content}) messages.append({"role": "user", "content": tool_results}) - # Check token budget - if total_tokens >= constraints.token_budget: - budget_exhausted = True - trace.log_step( - "budget_exhausted", - decision=f"Token budget reached: {total_tokens}/{constraints.token_budget}", - ) - break - # --- Synthesis step --- trace.log_step( "synthesis_start", -- 2.45.2