From 7956bf4873604ec26df416fa00a05f883d10e1a5 Mon Sep 17 00:00:00 2001 From: Jeff Smith Date: Wed, 8 Apr 2026 15:23:03 -0600 Subject: [PATCH] Fix synthesis truncation and trace masking (#16, #19) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The synthesis step was passing max_tokens=4096 to Claude, which was not enough for a full ResearchResult JSON over a real evidence set (28 sources). The model's output got cut mid-string, json.loads failed, and the agent fell back to a stub answer with zero citations. The trace logger then truncated the raw_response to 1000 chars before recording it, hiding the actual reason for the parse failure (the truncated JSON suffix) and making the bug invisible from traces. Fixes: - Bump synthesis max_tokens to 16384 - Capture and log Claude's stop_reason on synthesis_error so future truncation cases are diagnosable from the trace alone - Log the parser exception text alongside the raw_response - Stop slicing raw_response — record the full string Verified end-to-end against the Utah crops question: - Before: 0 citations, confidence 0.10, fallback stub - After: 9 citations, confidence 0.88, real synthesized answer Co-Authored-By: Claude Opus 4.6 (1M context) --- researchers/web/agent.py | 14 ++++++++++---- 1 file changed, 10 insertions(+), 4 deletions(-) diff --git a/researchers/web/agent.py b/researchers/web/agent.py index e2944fc..391e106 100644 --- a/researchers/web/agent.py +++ b/researchers/web/agent.py @@ -448,7 +448,7 @@ class WebResearcher: response = self.client.messages.create( model=self.model_id, - max_tokens=4096, + max_tokens=16384, messages=[{"role": "user", "content": prompt}], ) @@ -457,6 +457,7 @@ class WebResearcher: # Parse the JSON response raw_text = response.content[0].text.strip() + stop_reason = response.stop_reason # Strip markdown fences if the model added them despite instructions if raw_text.startswith("```"): raw_text = raw_text.split("\n", 1)[1] if "\n" in raw_text else raw_text[3:] @@ -465,11 +466,16 @@ class WebResearcher: try: data = json.loads(raw_text) - except json.JSONDecodeError: + except json.JSONDecodeError as parse_err: trace.log_step( "synthesis_error", - decision="Failed to parse synthesis JSON, returning fallback", - raw_response=raw_text[:1000], + decision=( + f"Failed to parse synthesis JSON ({parse_err}); " + f"stop_reason={stop_reason}" + ), + stop_reason=stop_reason, + parse_error=str(parse_err), + raw_response=raw_text, ) return self._fallback_result( question, evidence, trace, total_tokens, iterations, -- 2.45.2