marchwarden/researchers/web/models.py

"""Marchwarden Research Contract v1 — Pydantic models.

These models define the stable contract between a researcher MCP server
and its caller (PI agent or CLI shim). Changes to required fields or
types require a contract version bump.
"""

from enum import Enum
from typing import Optional

from pydantic import BaseModel, Field


# ---------------------------------------------------------------------------
# Input types
# ---------------------------------------------------------------------------


class ResearchConstraints(BaseModel):
    """Fine-grained control over researcher behavior."""

    max_iterations: int = Field(
        default=5,
        ge=1,
        le=20,
        description="Stop after N iterations, regardless of progress.",
    )
    token_budget: int = Field(
        default=20_000,
        ge=1_000,
        description="Soft limit on total tokens consumed by the research loop.",
    )
    max_sources: int = Field(
        default=10,
        ge=1,
        description="Maximum number of sources to fetch and extract.",
    )
    source_filter: Optional[str] = Field(
        default=None,
        description="Restrict search to specific domains (V2). E.g. '.gov,.edu'.",
    )


# Depth presets — choosing a depth picks sensible defaults for the
# constraint fields. Explicit overrides (--max-iterations, --budget,
# explicit ResearchConstraints) always win over the preset.
#
# `balanced` matches the historical defaults so existing callers see
# no behavior change. `shallow` and `deep` are tuned for "quick lookup"
# and "thorough investigation" respectively. These are starting points;
# Phase 3 stress testing will inform calibration.
DEPTH_PRESETS: dict[str, dict[str, int]] = {
    "shallow": {"max_iterations": 2, "token_budget": 5_000, "max_sources": 5},
    "balanced": {"max_iterations": 5, "token_budget": 20_000, "max_sources": 10},
    "deep": {"max_iterations": 8, "token_budget": 60_000, "max_sources": 20},
}


def constraints_for_depth(
    depth: str,
    *,
    max_iterations: Optional[int] = None,
    token_budget: Optional[int] = None,
    max_sources: Optional[int] = None,
) -> ResearchConstraints:
    """Build a ResearchConstraints from a depth preset, with optional overrides.

    Any non-None override wins over the preset value. Unknown depths
    fall back to ``balanced``.
    """
    preset = DEPTH_PRESETS.get(depth, DEPTH_PRESETS["balanced"]).copy()
    if max_iterations is not None:
        preset["max_iterations"] = max_iterations
    if token_budget is not None:
        preset["token_budget"] = token_budget
    if max_sources is not None:
        preset["max_sources"] = max_sources
    return ResearchConstraints(**preset)


# ---------------------------------------------------------------------------
# Output types — Citation
# ---------------------------------------------------------------------------


class Citation(BaseModel):
    """A single source used by the researcher, with raw evidence."""

    source: str = Field(
        description="Source type: 'web', 'file', 'database', etc.",
    )
    locator: str = Field(
        description="URL, file path, row ID, or unique identifier.",
    )
    title: Optional[str] = Field(
        default=None,
        description="Human-readable title (for web sources).",
    )
    snippet: Optional[str] = Field(
        default=None,
        description="Researcher's summary of relevant content (50-200 chars).",
    )
    raw_excerpt: str = Field(
        description=(
            "Verbatim text from the source (up to 500 chars). "
            "Bypasses researcher synthesis to prevent the Synthesis Paradox."
        ),
    )
    confidence: float = Field(
        ge=0.0,
        le=1.0,
        description="Researcher's confidence in this source's accuracy.",
    )


# ---------------------------------------------------------------------------
# Output types — Gap
# ---------------------------------------------------------------------------


class GapCategory(str, Enum):
    """Categorized reason a gap exists. Drives PI decision-making."""

    SOURCE_NOT_FOUND = "source_not_found"
    ACCESS_DENIED = "access_denied"
    BUDGET_EXHAUSTED = "budget_exhausted"
    CONTRADICTORY_SOURCES = "contradictory_sources"
    SCOPE_EXCEEDED = "scope_exceeded"


class Gap(BaseModel):
    """An unresolved aspect of the research question."""

    topic: str = Field(
        description="What aspect wasn't resolved.",
    )
    category: GapCategory = Field(
        description="Structured reason category.",
    )
    detail: str = Field(
        description="Human-readable explanation of why this gap exists.",
    )


# ---------------------------------------------------------------------------
# Output types — DiscoveryEvent
# ---------------------------------------------------------------------------


class DiscoveryEvent(BaseModel):
    """A lateral finding relevant to another researcher's domain."""

    type: str = Field(
        description="Event type: 'related_research', 'new_source', 'contradiction'.",
    )
    suggested_researcher: Optional[str] = Field(
        default=None,
        description="Target researcher type: 'arxiv', 'database', 'legal', etc.",
    )
    query: str = Field(
        description="Suggested query for the target researcher.",
    )
    reason: str = Field(
        description="Why this is relevant to the overall investigation.",
    )
    source_locator: Optional[str] = Field(
        default=None,
        description="Where the discovery was found (URL, DOI, etc.).",
    )


# ---------------------------------------------------------------------------
# Output types — OpenQuestion
# ---------------------------------------------------------------------------


class OpenQuestion(BaseModel):
    """A follow-up question that emerged from the research.

    Distinct from gaps (what failed) and discovery events (what's lateral).
    Open questions look forward: "based on what I found, this needs deeper
    investigation." The PI uses these to decide whether to dispatch
    additional research calls.
    """

    question: str = Field(
        description="The follow-up question that emerged from the research.",
    )
    context: str = Field(
        description="What evidence or finding prompted this question.",
    )
    priority: str = Field(
        description="'high' (critical to answer quality), 'medium' (would improve answer), 'low' (nice to know).",
    )
    source_locator: Optional[str] = Field(
        default=None,
        description="URL or source where this question arose from.",
    )


# ---------------------------------------------------------------------------
# Output types — Confidence
# ---------------------------------------------------------------------------


class ConfidenceFactors(BaseModel):
    """Inputs that fed the confidence score. Enables auditability and future calibration."""

    num_corroborating_sources: int = Field(
        ge=0,
        description="How many sources agree on the core claims.",
    )
    source_authority: str = Field(
        description="'high' (.gov, .edu, peer-reviewed), 'medium' (established orgs), 'low' (blogs, forums).",
    )
    contradiction_detected: bool = Field(
        description="Were conflicting claims found across sources?",
    )
    query_specificity_match: float = Field(
        ge=0.0,
        le=1.0,
        description="How well the results address the actual question (0.0-1.0).",
    )
    budget_exhausted: bool = Field(
        description="True if the researcher hit its iteration or token cap.",
    )
    recency: Optional[str] = Field(
        default=None,
        description="'current' (< 1yr), 'recent' (1-3yr), 'dated' (> 3yr), None if unknown.",
    )


# ---------------------------------------------------------------------------
# Output types — CostMetadata
# ---------------------------------------------------------------------------


class CostMetadata(BaseModel):
    """Resource usage for a single research call."""

    tokens_used: int = Field(
        ge=0,
        description="Total tokens consumed (Claude + search API calls).",
    )
    iterations_run: int = Field(
        ge=0,
        description="Number of inner-loop iterations completed.",
    )
    wall_time_sec: float = Field(
        ge=0.0,
        description="Actual elapsed wall-clock time in seconds.",
    )
    budget_exhausted: bool = Field(
        description="True if the researcher hit its iteration or token cap.",
    )
    model_id: str = Field(
        description="Model used for the research loop (e.g. 'claude-sonnet-4-6').",
    )


# ---------------------------------------------------------------------------
# Top-level output
# ---------------------------------------------------------------------------


class ResearchResult(BaseModel):
    """Complete result from a single research() call. This is the contract."""

    answer: str = Field(
        description="The synthesized answer. Every claim must trace to a citation.",
    )
    citations: list[Citation] = Field(
        default_factory=list,
        description="Sources used, with raw evidence.",
    )
    gaps: list[Gap] = Field(
        default_factory=list,
        description="What couldn't be resolved, categorized by cause.",
    )
    discovery_events: list[DiscoveryEvent] = Field(
        default_factory=list,
        description="Lateral findings for other researchers.",
    )
    open_questions: list[OpenQuestion] = Field(
        default_factory=list,
        description="Follow-up questions that emerged from the research.",
    )
    confidence: float = Field(
        ge=0.0,
        le=1.0,
        description="Overall confidence in the answer (0.0-1.0).",
    )
    confidence_factors: ConfidenceFactors = Field(
        description="What fed the confidence score.",
    )
    cost_metadata: CostMetadata = Field(
        description="Resource usage for this research call.",
    )
    trace_id: str = Field(
        description="UUID linking to the JSONL trace log.",
    )