diff --git a/researchers/web/agent.py b/researchers/web/agent.py
index b3c0d1b..284b175 100644
--- a/researchers/web/agent.py
+++ b/researchers/web/agent.py
@@ -219,7 +219,24 @@ class WebResearcher:
         messages = [{"role": "user", "content": user_message}]
 
         # --- Tool-use loop ---
+        # Budget policy: the loop honors token_budget as a soft cap. Before
+        # starting a new iteration we check whether we've already hit the
+        # budget; if so we stop and let synthesis run on whatever evidence
+        # we already have. Synthesis tokens are tracked but not capped here
+        # — the synthesis call is always allowed to complete so the caller
+        # gets a structured result rather than a stub.
         while iterations < constraints.max_iterations:
+            if total_tokens >= constraints.token_budget:
+                budget_exhausted = True
+                trace.log_step(
+                    "budget_exhausted",
+                    decision=(
+                        f"Token budget reached before iteration "
+                        f"{iterations + 1}: {total_tokens}/{constraints.token_budget}"
+                    ),
+                )
+                break
+
             iterations += 1
 
             trace.log_step(
@@ -275,15 +292,6 @@ class WebResearcher:
             messages.append({"role": "assistant", "content": response.content})
             messages.append({"role": "user", "content": tool_results})
 
-            # Check token budget
-            if total_tokens >= constraints.token_budget:
-                budget_exhausted = True
-                trace.log_step(
-                    "budget_exhausted",
-                    decision=f"Token budget reached: {total_tokens}/{constraints.token_budget}",
-                )
-                break
-
         # --- Synthesis step ---
         trace.log_step(
             "synthesis_start",