"""Marchwarden Research Contract v1 — Pydantic models. These models define the stable contract between a researcher MCP server and its caller (PI agent or CLI shim). Changes to required fields or types require a contract version bump. """ from enum import Enum from typing import Optional from pydantic import BaseModel, Field # --------------------------------------------------------------------------- # Input types # --------------------------------------------------------------------------- class ResearchConstraints(BaseModel): """Fine-grained control over researcher behavior.""" max_iterations: int = Field( default=5, ge=1, le=20, description="Stop after N iterations, regardless of progress.", ) token_budget: int = Field( default=20_000, ge=1_000, description="Soft limit on total tokens consumed by the research loop.", ) max_sources: int = Field( default=10, ge=1, description="Maximum number of sources to fetch and extract.", ) source_filter: Optional[str] = Field( default=None, description="Restrict search to specific domains (V2). E.g. '.gov,.edu'.", ) # Depth presets — choosing a depth picks sensible defaults for the # constraint fields. Explicit overrides (--max-iterations, --budget, # explicit ResearchConstraints) always win over the preset. # # `balanced` matches the historical defaults so existing callers see # no behavior change. `shallow` and `deep` are tuned for "quick lookup" # and "thorough investigation" respectively. These are starting points; # Phase 3 stress testing will inform calibration. DEPTH_PRESETS: dict[str, dict[str, int]] = { "shallow": {"max_iterations": 2, "token_budget": 5_000, "max_sources": 5}, "balanced": {"max_iterations": 5, "token_budget": 20_000, "max_sources": 10}, "deep": {"max_iterations": 8, "token_budget": 60_000, "max_sources": 20}, } def constraints_for_depth( depth: str, *, max_iterations: Optional[int] = None, token_budget: Optional[int] = None, max_sources: Optional[int] = None, ) -> ResearchConstraints: """Build a ResearchConstraints from a depth preset, with optional overrides. Any non-None override wins over the preset value. Unknown depths fall back to ``balanced``. """ preset = DEPTH_PRESETS.get(depth, DEPTH_PRESETS["balanced"]).copy() if max_iterations is not None: preset["max_iterations"] = max_iterations if token_budget is not None: preset["token_budget"] = token_budget if max_sources is not None: preset["max_sources"] = max_sources return ResearchConstraints(**preset) # --------------------------------------------------------------------------- # Output types — Citation # --------------------------------------------------------------------------- class Citation(BaseModel): """A single source used by the researcher, with raw evidence.""" source: str = Field( description="Source type: 'web', 'file', 'database', etc.", ) locator: str = Field( description="URL, file path, row ID, or unique identifier.", ) title: Optional[str] = Field( default=None, description="Human-readable title (for web sources).", ) snippet: Optional[str] = Field( default=None, description="Researcher's summary of relevant content (50-200 chars).", ) raw_excerpt: str = Field( description=( "Verbatim text from the source (up to 500 chars). " "Bypasses researcher synthesis to prevent the Synthesis Paradox." ), ) confidence: float = Field( ge=0.0, le=1.0, description="Researcher's confidence in this source's accuracy.", ) # --------------------------------------------------------------------------- # Output types — Gap # --------------------------------------------------------------------------- class GapCategory(str, Enum): """Categorized reason a gap exists. Drives PI decision-making.""" SOURCE_NOT_FOUND = "source_not_found" ACCESS_DENIED = "access_denied" BUDGET_EXHAUSTED = "budget_exhausted" CONTRADICTORY_SOURCES = "contradictory_sources" SCOPE_EXCEEDED = "scope_exceeded" class Gap(BaseModel): """An unresolved aspect of the research question.""" topic: str = Field( description="What aspect wasn't resolved.", ) category: GapCategory = Field( description="Structured reason category.", ) detail: str = Field( description="Human-readable explanation of why this gap exists.", ) # --------------------------------------------------------------------------- # Output types — DiscoveryEvent # --------------------------------------------------------------------------- class DiscoveryEvent(BaseModel): """A lateral finding relevant to another researcher's domain.""" type: str = Field( description="Event type: 'related_research', 'new_source', 'contradiction'.", ) suggested_researcher: Optional[str] = Field( default=None, description="Target researcher type: 'arxiv', 'database', 'legal', etc.", ) query: str = Field( description="Suggested query for the target researcher.", ) reason: str = Field( description="Why this is relevant to the overall investigation.", ) source_locator: Optional[str] = Field( default=None, description="Where the discovery was found (URL, DOI, etc.).", ) # --------------------------------------------------------------------------- # Output types — OpenQuestion # --------------------------------------------------------------------------- class OpenQuestion(BaseModel): """A follow-up question that emerged from the research. Distinct from gaps (what failed) and discovery events (what's lateral). Open questions look forward: "based on what I found, this needs deeper investigation." The PI uses these to decide whether to dispatch additional research calls. """ question: str = Field( description="The follow-up question that emerged from the research.", ) context: str = Field( description="What evidence or finding prompted this question.", ) priority: str = Field( description="'high' (critical to answer quality), 'medium' (would improve answer), 'low' (nice to know).", ) source_locator: Optional[str] = Field( default=None, description="URL or source where this question arose from.", ) # --------------------------------------------------------------------------- # Output types — Confidence # --------------------------------------------------------------------------- class ConfidenceFactors(BaseModel): """Inputs that fed the confidence score. Enables auditability and future calibration.""" num_corroborating_sources: int = Field( ge=0, description="How many sources agree on the core claims.", ) source_authority: str = Field( description="'high' (.gov, .edu, peer-reviewed), 'medium' (established orgs), 'low' (blogs, forums).", ) contradiction_detected: bool = Field( description="Were conflicting claims found across sources?", ) query_specificity_match: float = Field( ge=0.0, le=1.0, description="How well the results address the actual question (0.0-1.0).", ) budget_exhausted: bool = Field( description="True if the researcher hit its iteration or token cap.", ) recency: Optional[str] = Field( default=None, description="'current' (< 1yr), 'recent' (1-3yr), 'dated' (> 3yr), None if unknown.", ) # --------------------------------------------------------------------------- # Output types — CostMetadata # --------------------------------------------------------------------------- class CostMetadata(BaseModel): """Resource usage for a single research call.""" tokens_used: int = Field( ge=0, description="Total tokens consumed (Claude + search API calls).", ) iterations_run: int = Field( ge=0, description="Number of inner-loop iterations completed.", ) wall_time_sec: float = Field( ge=0.0, description="Actual elapsed wall-clock time in seconds.", ) budget_exhausted: bool = Field( description="True if the researcher hit its iteration or token cap.", ) model_id: str = Field( description="Model used for the research loop (e.g. 'claude-sonnet-4-6').", ) # --------------------------------------------------------------------------- # Top-level output # --------------------------------------------------------------------------- class ResearchResult(BaseModel): """Complete result from a single research() call. This is the contract.""" answer: str = Field( description="The synthesized answer. Every claim must trace to a citation.", ) citations: list[Citation] = Field( default_factory=list, description="Sources used, with raw evidence.", ) gaps: list[Gap] = Field( default_factory=list, description="What couldn't be resolved, categorized by cause.", ) discovery_events: list[DiscoveryEvent] = Field( default_factory=list, description="Lateral findings for other researchers.", ) open_questions: list[OpenQuestion] = Field( default_factory=list, description="Follow-up questions that emerged from the research.", ) confidence: float = Field( ge=0.0, le=1.0, description="Overall confidence in the answer (0.0-1.0).", ) confidence_factors: ConfidenceFactors = Field( description="What fed the confidence score.", ) cost_metadata: CostMetadata = Field( description="Resource usage for this research call.", ) trace_id: str = Field( description="UUID linking to the JSONL trace log.", )