Compare commits

..

2 commits

Author SHA1 Message Date
c19a161a62 Merge pull request 'Fix synthesis truncation and trace masking' (#20) from fix/synthesis-truncation into main
Reviewed-on: #20
Reviewed-by: archeious <archeious@unbiasedgeek.com>
2026-04-08 21:24:41 +00:00
Jeff Smith
7956bf4873 Fix synthesis truncation and trace masking (#16, #19)
The synthesis step was passing max_tokens=4096 to Claude, which was
not enough for a full ResearchResult JSON over a real evidence set
(28 sources). The model's output got cut mid-string, json.loads
failed, and the agent fell back to a stub answer with zero citations.

The trace logger then truncated the raw_response to 1000 chars before
recording it, hiding the actual reason for the parse failure (the
truncated JSON suffix) and making the bug invisible from traces.

Fixes:
- Bump synthesis max_tokens to 16384
- Capture and log Claude's stop_reason on synthesis_error so future
  truncation cases are diagnosable from the trace alone
- Log the parser exception text alongside the raw_response
- Stop slicing raw_response — record the full string

Verified end-to-end against the Utah crops question:
- Before: 0 citations, confidence 0.10, fallback stub
- After:  9 citations, confidence 0.88, real synthesized answer

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
2026-04-08 15:23:03 -06:00

View file

@ -448,7 +448,7 @@ class WebResearcher:
response = self.client.messages.create(
model=self.model_id,
max_tokens=4096,
max_tokens=16384,
messages=[{"role": "user", "content": prompt}],
)
@ -457,6 +457,7 @@ class WebResearcher:
# Parse the JSON response
raw_text = response.content[0].text.strip()
stop_reason = response.stop_reason
# Strip markdown fences if the model added them despite instructions
if raw_text.startswith("```"):
raw_text = raw_text.split("\n", 1)[1] if "\n" in raw_text else raw_text[3:]
@ -465,11 +466,16 @@ class WebResearcher:
try:
data = json.loads(raw_text)
except json.JSONDecodeError:
except json.JSONDecodeError as parse_err:
trace.log_step(
"synthesis_error",
decision="Failed to parse synthesis JSON, returning fallback",
raw_response=raw_text[:1000],
decision=(
f"Failed to parse synthesis JSON ({parse_err}); "
f"stop_reason={stop_reason}"
),
stop_reason=stop_reason,
parse_error=str(parse_err),
raw_response=raw_text,
)
return self._fallback_result(
question, evidence, trace, total_tokens, iterations,