Compare commits

..

2 commits

Author SHA1 Message Date
712638fe8c Merge pull request 'Enforce token_budget before each iteration' (#22) from fix/budget-enforcement into main
Reviewed-on: #22
Reviewed-by: archeious <archeious@unbiasedgeek.com>
2026-04-08 21:30:26 +00:00
Jeff Smith
6ff1a6af3d Enforce token_budget before each iteration (#17)
The loop previously checked the token budget at the *bottom* of each
iteration, after the LLM call and tool work had already happened. By
the time the cap was caught the budget had been exceeded and the
overshoot was unbounded by the iteration's cost.

Move the check to the *top* of the loop so a new iteration is never
started past the budget. Document the policy explicitly: token_budget
is a soft cap on the tool-use loop only; the synthesis call is always
allowed to complete so callers get a structured ResearchResult rather
than a fallback stub. Capping synthesis is a separate, larger design
question (would require splitting the budget between loop and
synthesis up-front).

Verified: token_budget=5000, max_iterations=10 now stops after 2
iterations with budget_exhausted=True and a complete answer with
10 citations.

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
2026-04-08 15:29:22 -06:00

View file

@ -219,7 +219,24 @@ class WebResearcher:
messages = [{"role": "user", "content": user_message}]
# --- Tool-use loop ---
# Budget policy: the loop honors token_budget as a soft cap. Before
# starting a new iteration we check whether we've already hit the
# budget; if so we stop and let synthesis run on whatever evidence
# we already have. Synthesis tokens are tracked but not capped here
# — the synthesis call is always allowed to complete so the caller
# gets a structured result rather than a stub.
while iterations < constraints.max_iterations:
if total_tokens >= constraints.token_budget:
budget_exhausted = True
trace.log_step(
"budget_exhausted",
decision=(
f"Token budget reached before iteration "
f"{iterations + 1}: {total_tokens}/{constraints.token_budget}"
),
)
break
iterations += 1
trace.log_step(
@ -275,15 +292,6 @@ class WebResearcher:
messages.append({"role": "assistant", "content": response.content})
messages.append({"role": "user", "content": tool_results})
# Check token budget
if total_tokens >= constraints.token_budget:
budget_exhausted = True
trace.log_step(
"budget_exhausted",
decision=f"Token budget reached: {total_tokens}/{constraints.token_budget}",
)
break
# --- Synthesis step ---
trace.log_step(
"synthesis_start",