Enforce token_budget before each iteration #22
1 changed files with 17 additions and 9 deletions
|
|
@ -219,7 +219,24 @@ class WebResearcher:
|
|||
messages = [{"role": "user", "content": user_message}]
|
||||
|
||||
# --- Tool-use loop ---
|
||||
# Budget policy: the loop honors token_budget as a soft cap. Before
|
||||
# starting a new iteration we check whether we've already hit the
|
||||
# budget; if so we stop and let synthesis run on whatever evidence
|
||||
# we already have. Synthesis tokens are tracked but not capped here
|
||||
# — the synthesis call is always allowed to complete so the caller
|
||||
# gets a structured result rather than a stub.
|
||||
while iterations < constraints.max_iterations:
|
||||
if total_tokens >= constraints.token_budget:
|
||||
budget_exhausted = True
|
||||
trace.log_step(
|
||||
"budget_exhausted",
|
||||
decision=(
|
||||
f"Token budget reached before iteration "
|
||||
f"{iterations + 1}: {total_tokens}/{constraints.token_budget}"
|
||||
),
|
||||
)
|
||||
break
|
||||
|
||||
iterations += 1
|
||||
|
||||
trace.log_step(
|
||||
|
|
@ -275,15 +292,6 @@ class WebResearcher:
|
|||
messages.append({"role": "assistant", "content": response.content})
|
||||
messages.append({"role": "user", "content": tool_results})
|
||||
|
||||
# Check token budget
|
||||
if total_tokens >= constraints.token_budget:
|
||||
budget_exhausted = True
|
||||
trace.log_step(
|
||||
"budget_exhausted",
|
||||
decision=f"Token budget reached: {total_tokens}/{constraints.token_budget}",
|
||||
)
|
||||
break
|
||||
|
||||
# --- Synthesis step ---
|
||||
trace.log_step(
|
||||
"synthesis_start",
|
||||
|
|
|
|||
Loading…
Reference in a new issue