marchwarden/researchers/web/models.py
Jeff Smith ae48acd421 depth flag now drives constraint defaults (#30)
Previously the depth parameter (shallow/balanced/deep) was passed
only as a text hint inside the agent's user message, with no
mechanical effect on iterations, token budget, or source count.
The flag was effectively cosmetic — the LLM was expected to
"interpret" it.

Add DEPTH_PRESETS table and constraints_for_depth() helper in
researchers.web.models:

  shallow:  2 iters,  5,000 tokens,  5 sources
  balanced: 5 iters, 20,000 tokens, 10 sources  (= historical defaults)
  deep:     8 iters, 60,000 tokens, 20 sources

Wired through the stack:

- WebResearcher.research(): when constraints is None, builds from
  the depth preset instead of bare ResearchConstraints()
- MCP server `research` tool: max_iterations and token_budget now
  default to None; constraints are built via constraints_for_depth
  with explicit values overriding the preset
- CLI `ask` command: --max-iterations and --budget default to None;
  the CLI only forwards them to the MCP tool when set, so unset
  flags fall through to the depth preset

balanced is unchanged from the historical defaults so existing
callers see no behavior difference. Explicit --max-iterations /
--budget always win over the preset.

Tests cover each preset's values, balanced backward-compat,
unknown depth fallback, full override, and partial override.
116/116 tests passing. Live-verified: --depth shallow on a simple
question now caps at 2 iterations and stays under budget.

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
2026-04-08 16:27:38 -06:00

301 lines
9.9 KiB
Python

"""Marchwarden Research Contract v1 — Pydantic models.
These models define the stable contract between a researcher MCP server
and its caller (PI agent or CLI shim). Changes to required fields or
types require a contract version bump.
"""
from enum import Enum
from typing import Optional
from pydantic import BaseModel, Field
# ---------------------------------------------------------------------------
# Input types
# ---------------------------------------------------------------------------
class ResearchConstraints(BaseModel):
"""Fine-grained control over researcher behavior."""
max_iterations: int = Field(
default=5,
ge=1,
le=20,
description="Stop after N iterations, regardless of progress.",
)
token_budget: int = Field(
default=20_000,
ge=1_000,
description="Soft limit on total tokens consumed by the research loop.",
)
max_sources: int = Field(
default=10,
ge=1,
description="Maximum number of sources to fetch and extract.",
)
source_filter: Optional[str] = Field(
default=None,
description="Restrict search to specific domains (V2). E.g. '.gov,.edu'.",
)
# Depth presets — choosing a depth picks sensible defaults for the
# constraint fields. Explicit overrides (--max-iterations, --budget,
# explicit ResearchConstraints) always win over the preset.
#
# `balanced` matches the historical defaults so existing callers see
# no behavior change. `shallow` and `deep` are tuned for "quick lookup"
# and "thorough investigation" respectively. These are starting points;
# Phase 3 stress testing will inform calibration.
DEPTH_PRESETS: dict[str, dict[str, int]] = {
"shallow": {"max_iterations": 2, "token_budget": 5_000, "max_sources": 5},
"balanced": {"max_iterations": 5, "token_budget": 20_000, "max_sources": 10},
"deep": {"max_iterations": 8, "token_budget": 60_000, "max_sources": 20},
}
def constraints_for_depth(
depth: str,
*,
max_iterations: Optional[int] = None,
token_budget: Optional[int] = None,
max_sources: Optional[int] = None,
) -> ResearchConstraints:
"""Build a ResearchConstraints from a depth preset, with optional overrides.
Any non-None override wins over the preset value. Unknown depths
fall back to ``balanced``.
"""
preset = DEPTH_PRESETS.get(depth, DEPTH_PRESETS["balanced"]).copy()
if max_iterations is not None:
preset["max_iterations"] = max_iterations
if token_budget is not None:
preset["token_budget"] = token_budget
if max_sources is not None:
preset["max_sources"] = max_sources
return ResearchConstraints(**preset)
# ---------------------------------------------------------------------------
# Output types — Citation
# ---------------------------------------------------------------------------
class Citation(BaseModel):
"""A single source used by the researcher, with raw evidence."""
source: str = Field(
description="Source type: 'web', 'file', 'database', etc.",
)
locator: str = Field(
description="URL, file path, row ID, or unique identifier.",
)
title: Optional[str] = Field(
default=None,
description="Human-readable title (for web sources).",
)
snippet: Optional[str] = Field(
default=None,
description="Researcher's summary of relevant content (50-200 chars).",
)
raw_excerpt: str = Field(
description=(
"Verbatim text from the source (up to 500 chars). "
"Bypasses researcher synthesis to prevent the Synthesis Paradox."
),
)
confidence: float = Field(
ge=0.0,
le=1.0,
description="Researcher's confidence in this source's accuracy.",
)
# ---------------------------------------------------------------------------
# Output types — Gap
# ---------------------------------------------------------------------------
class GapCategory(str, Enum):
"""Categorized reason a gap exists. Drives PI decision-making."""
SOURCE_NOT_FOUND = "source_not_found"
ACCESS_DENIED = "access_denied"
BUDGET_EXHAUSTED = "budget_exhausted"
CONTRADICTORY_SOURCES = "contradictory_sources"
SCOPE_EXCEEDED = "scope_exceeded"
class Gap(BaseModel):
"""An unresolved aspect of the research question."""
topic: str = Field(
description="What aspect wasn't resolved.",
)
category: GapCategory = Field(
description="Structured reason category.",
)
detail: str = Field(
description="Human-readable explanation of why this gap exists.",
)
# ---------------------------------------------------------------------------
# Output types — DiscoveryEvent
# ---------------------------------------------------------------------------
class DiscoveryEvent(BaseModel):
"""A lateral finding relevant to another researcher's domain."""
type: str = Field(
description="Event type: 'related_research', 'new_source', 'contradiction'.",
)
suggested_researcher: Optional[str] = Field(
default=None,
description="Target researcher type: 'arxiv', 'database', 'legal', etc.",
)
query: str = Field(
description="Suggested query for the target researcher.",
)
reason: str = Field(
description="Why this is relevant to the overall investigation.",
)
source_locator: Optional[str] = Field(
default=None,
description="Where the discovery was found (URL, DOI, etc.).",
)
# ---------------------------------------------------------------------------
# Output types — OpenQuestion
# ---------------------------------------------------------------------------
class OpenQuestion(BaseModel):
"""A follow-up question that emerged from the research.
Distinct from gaps (what failed) and discovery events (what's lateral).
Open questions look forward: "based on what I found, this needs deeper
investigation." The PI uses these to decide whether to dispatch
additional research calls.
"""
question: str = Field(
description="The follow-up question that emerged from the research.",
)
context: str = Field(
description="What evidence or finding prompted this question.",
)
priority: str = Field(
description="'high' (critical to answer quality), 'medium' (would improve answer), 'low' (nice to know).",
)
source_locator: Optional[str] = Field(
default=None,
description="URL or source where this question arose from.",
)
# ---------------------------------------------------------------------------
# Output types — Confidence
# ---------------------------------------------------------------------------
class ConfidenceFactors(BaseModel):
"""Inputs that fed the confidence score. Enables auditability and future calibration."""
num_corroborating_sources: int = Field(
ge=0,
description="How many sources agree on the core claims.",
)
source_authority: str = Field(
description="'high' (.gov, .edu, peer-reviewed), 'medium' (established orgs), 'low' (blogs, forums).",
)
contradiction_detected: bool = Field(
description="Were conflicting claims found across sources?",
)
query_specificity_match: float = Field(
ge=0.0,
le=1.0,
description="How well the results address the actual question (0.0-1.0).",
)
budget_exhausted: bool = Field(
description="True if the researcher hit its iteration or token cap.",
)
recency: Optional[str] = Field(
default=None,
description="'current' (< 1yr), 'recent' (1-3yr), 'dated' (> 3yr), None if unknown.",
)
# ---------------------------------------------------------------------------
# Output types — CostMetadata
# ---------------------------------------------------------------------------
class CostMetadata(BaseModel):
"""Resource usage for a single research call."""
tokens_used: int = Field(
ge=0,
description="Total tokens consumed (Claude + search API calls).",
)
iterations_run: int = Field(
ge=0,
description="Number of inner-loop iterations completed.",
)
wall_time_sec: float = Field(
ge=0.0,
description="Actual elapsed wall-clock time in seconds.",
)
budget_exhausted: bool = Field(
description="True if the researcher hit its iteration or token cap.",
)
model_id: str = Field(
description="Model used for the research loop (e.g. 'claude-sonnet-4-6').",
)
# ---------------------------------------------------------------------------
# Top-level output
# ---------------------------------------------------------------------------
class ResearchResult(BaseModel):
"""Complete result from a single research() call. This is the contract."""
answer: str = Field(
description="The synthesized answer. Every claim must trace to a citation.",
)
citations: list[Citation] = Field(
default_factory=list,
description="Sources used, with raw evidence.",
)
gaps: list[Gap] = Field(
default_factory=list,
description="What couldn't be resolved, categorized by cause.",
)
discovery_events: list[DiscoveryEvent] = Field(
default_factory=list,
description="Lateral findings for other researchers.",
)
open_questions: list[OpenQuestion] = Field(
default_factory=list,
description="Follow-up questions that emerged from the research.",
)
confidence: float = Field(
ge=0.0,
le=1.0,
description="Overall confidence in the answer (0.0-1.0).",
)
confidence_factors: ConfidenceFactors = Field(
description="What fed the confidence score.",
)
cost_metadata: CostMetadata = Field(
description="Resource usage for this research call.",
)
trace_id: str = Field(
description="UUID linking to the JSONL trace log.",
)