marchwarden/researchers/web/models.py

"""Marchwarden Research Contract v1 — Pydantic models.

These models define the stable contract between a researcher MCP server
and its caller (PI agent or CLI shim). Changes to required fields or
types require a contract version bump.
"""

from enum import Enum
from typing import Optional

from pydantic import BaseModel, Field


# ---------------------------------------------------------------------------
# Input types
# ---------------------------------------------------------------------------


class ResearchConstraints(BaseModel):
    """Fine-grained control over researcher behavior."""

    max_iterations: int = Field(
        default=5,
        ge=1,
        le=20,
        description="Stop after N iterations, regardless of progress.",
    )
    token_budget: int = Field(
        default=20_000,
        ge=1_000,
        description="Soft limit on total tokens consumed by the research loop.",
    )
    max_sources: int = Field(
        default=10,
        ge=1,
        description="Maximum number of sources to fetch and extract.",
    )
    source_filter: Optional[str] = Field(
        default=None,
        description="Restrict search to specific domains (V2). E.g. '.gov,.edu'.",
    )


# ---------------------------------------------------------------------------
# Output types — Citation
# ---------------------------------------------------------------------------


class Citation(BaseModel):
    """A single source used by the researcher, with raw evidence."""

    source: str = Field(
        description="Source type: 'web', 'file', 'database', etc.",
    )
    locator: str = Field(
        description="URL, file path, row ID, or unique identifier.",
    )
    title: Optional[str] = Field(
        default=None,
        description="Human-readable title (for web sources).",
    )
    snippet: Optional[str] = Field(
        default=None,
        description="Researcher's summary of relevant content (50-200 chars).",
    )
    raw_excerpt: str = Field(
        description=(
            "Verbatim text from the source (up to 500 chars). "
            "Bypasses researcher synthesis to prevent the Synthesis Paradox."
        ),
    )
    confidence: float = Field(
        ge=0.0,
        le=1.0,
        description="Researcher's confidence in this source's accuracy.",
    )


# ---------------------------------------------------------------------------
# Output types — Gap
# ---------------------------------------------------------------------------


class GapCategory(str, Enum):
    """Categorized reason a gap exists. Drives PI decision-making."""

    SOURCE_NOT_FOUND = "source_not_found"
    ACCESS_DENIED = "access_denied"
    BUDGET_EXHAUSTED = "budget_exhausted"
    CONTRADICTORY_SOURCES = "contradictory_sources"
    SCOPE_EXCEEDED = "scope_exceeded"


class Gap(BaseModel):
    """An unresolved aspect of the research question."""

    topic: str = Field(
        description="What aspect wasn't resolved.",
    )
    category: GapCategory = Field(
        description="Structured reason category.",
    )
    detail: str = Field(
        description="Human-readable explanation of why this gap exists.",
    )


# ---------------------------------------------------------------------------
# Output types — DiscoveryEvent
# ---------------------------------------------------------------------------


class DiscoveryEvent(BaseModel):
    """A lateral finding relevant to another researcher's domain."""

    type: str = Field(
        description="Event type: 'related_research', 'new_source', 'contradiction'.",
    )
    suggested_researcher: Optional[str] = Field(
        default=None,
        description="Target researcher type: 'arxiv', 'database', 'legal', etc.",
    )
    query: str = Field(
        description="Suggested query for the target researcher.",
    )
    reason: str = Field(
        description="Why this is relevant to the overall investigation.",
    )
    source_locator: Optional[str] = Field(
        default=None,
        description="Where the discovery was found (URL, DOI, etc.).",
    )


# ---------------------------------------------------------------------------
# Output types — OpenQuestion
# ---------------------------------------------------------------------------


class OpenQuestion(BaseModel):
    """A follow-up question that emerged from the research.

    Distinct from gaps (what failed) and discovery events (what's lateral).
    Open questions look forward: "based on what I found, this needs deeper
    investigation." The PI uses these to decide whether to dispatch
    additional research calls.
    """

    question: str = Field(
        description="The follow-up question that emerged from the research.",
    )
    context: str = Field(
        description="What evidence or finding prompted this question.",
    )
    priority: str = Field(
        description="'high' (critical to answer quality), 'medium' (would improve answer), 'low' (nice to know).",
    )
    source_locator: Optional[str] = Field(
        default=None,
        description="URL or source where this question arose from.",
    )


# ---------------------------------------------------------------------------
# Output types — Confidence
# ---------------------------------------------------------------------------


class ConfidenceFactors(BaseModel):
    """Inputs that fed the confidence score. Enables auditability and future calibration."""

    num_corroborating_sources: int = Field(
        ge=0,
        description="How many sources agree on the core claims.",
    )
    source_authority: str = Field(
        description="'high' (.gov, .edu, peer-reviewed), 'medium' (established orgs), 'low' (blogs, forums).",
    )
    contradiction_detected: bool = Field(
        description="Were conflicting claims found across sources?",
    )
    query_specificity_match: float = Field(
        ge=0.0,
        le=1.0,
        description="How well the results address the actual question (0.0-1.0).",
    )
    budget_exhausted: bool = Field(
        description="True if the researcher hit its iteration or token cap.",
    )
    recency: Optional[str] = Field(
        default=None,
        description="'current' (< 1yr), 'recent' (1-3yr), 'dated' (> 3yr), None if unknown.",
    )


# ---------------------------------------------------------------------------
# Output types — CostMetadata
# ---------------------------------------------------------------------------


class CostMetadata(BaseModel):
    """Resource usage for a single research call."""

    tokens_used: int = Field(
        ge=0,
        description="Total tokens consumed (Claude + search API calls).",
    )
    iterations_run: int = Field(
        ge=0,
        description="Number of inner-loop iterations completed.",
    )
    wall_time_sec: float = Field(
        ge=0.0,
        description="Actual elapsed wall-clock time in seconds.",
    )
    budget_exhausted: bool = Field(
        description="True if the researcher hit its iteration or token cap.",
    )
    model_id: str = Field(
        description="Model used for the research loop (e.g. 'claude-sonnet-4-6').",
    )


# ---------------------------------------------------------------------------
# Top-level output
# ---------------------------------------------------------------------------


class ResearchResult(BaseModel):
    """Complete result from a single research() call. This is the contract."""

    answer: str = Field(
        description="The synthesized answer. Every claim must trace to a citation.",
    )
    citations: list[Citation] = Field(
        default_factory=list,
        description="Sources used, with raw evidence.",
    )
    gaps: list[Gap] = Field(
        default_factory=list,
        description="What couldn't be resolved, categorized by cause.",
    )
    discovery_events: list[DiscoveryEvent] = Field(
        default_factory=list,
        description="Lateral findings for other researchers.",
    )
    open_questions: list[OpenQuestion] = Field(
        default_factory=list,
        description="Follow-up questions that emerged from the research.",
    )
    confidence: float = Field(
        ge=0.0,
        le=1.0,
        description="Overall confidence in the answer (0.0-1.0).",
    )
    confidence_factors: ConfidenceFactors = Field(
        description="What fed the confidence score.",
    )
    cost_metadata: CostMetadata = Field(
        description="Resource usage for this research call.",
    )
    trace_id: str = Field(
        description="UUID linking to the JSONL trace log.",
    )
M0.3: Implement contract v1 Pydantic models with tests All Research Contract types as Pydantic models: - ResearchConstraints (input) - Citation with raw_excerpt (output) - GapCategory enum (5 categories) - Gap with structured category (output) - DiscoveryEvent (lateral findings) - ConfidenceFactors (auditable scoring inputs) - CostMetadata with model_id (resource tracking) - ResearchResult (top-level contract) 32 tests: validation, bounds checking, serialization roundtrips, JSON structure verification against contract spec. Refs: archeious/marchwarden#1 Co-Authored-By: Claude Haiku 4.5 <noreply@anthropic.com> 2026-04-08 20:00:45 +00:00			`"""Marchwarden Research Contract v1 — Pydantic models.`

			`These models define the stable contract between a researcher MCP server`
			`and its caller (PI agent or CLI shim). Changes to required fields or`
			`types require a contract version bump.`
			`"""`

			`from enum import Enum`
			`from typing import Optional`

			`from pydantic import BaseModel, Field`


			`# ---------------------------------------------------------------------------`
			`# Input types`
			`# ---------------------------------------------------------------------------`


			`class ResearchConstraints(BaseModel):`
			`"""Fine-grained control over researcher behavior."""`

			`max_iterations: int = Field(`
			`default=5,`
			`ge=1,`
			`le=20,`
			`description="Stop after N iterations, regardless of progress.",`
			`)`
			`token_budget: int = Field(`
			`default=20_000,`
			`ge=1_000,`
			`description="Soft limit on total tokens consumed by the research loop.",`
			`)`
			`max_sources: int = Field(`
			`default=10,`
			`ge=1,`
			`description="Maximum number of sources to fetch and extract.",`
			`)`
			`source_filter: Optional[str] = Field(`
			`default=None,`
			`description="Restrict search to specific domains (V2). E.g. '.gov,.edu'.",`
			`)`


			`# ---------------------------------------------------------------------------`
			`# Output types — Citation`
			`# ---------------------------------------------------------------------------`


			`class Citation(BaseModel):`
			`"""A single source used by the researcher, with raw evidence."""`

			`source: str = Field(`
			`description="Source type: 'web', 'file', 'database', etc.",`
			`)`
			`locator: str = Field(`
			`description="URL, file path, row ID, or unique identifier.",`
			`)`
			`title: Optional[str] = Field(`
			`default=None,`
			`description="Human-readable title (for web sources).",`
			`)`
			`snippet: Optional[str] = Field(`
			`default=None,`
			`description="Researcher's summary of relevant content (50-200 chars).",`
			`)`
			`raw_excerpt: str = Field(`
			`description=(`
			`"Verbatim text from the source (up to 500 chars). "`
			`"Bypasses researcher synthesis to prevent the Synthesis Paradox."`
			`),`
			`)`
			`confidence: float = Field(`
			`ge=0.0,`
			`le=1.0,`
			`description="Researcher's confidence in this source's accuracy.",`
			`)`


			`# ---------------------------------------------------------------------------`
			`# Output types — Gap`
			`# ---------------------------------------------------------------------------`


			`class GapCategory(str, Enum):`
			`"""Categorized reason a gap exists. Drives PI decision-making."""`

			`SOURCE_NOT_FOUND = "source_not_found"`
			`ACCESS_DENIED = "access_denied"`
			`BUDGET_EXHAUSTED = "budget_exhausted"`
			`CONTRADICTORY_SOURCES = "contradictory_sources"`
			`SCOPE_EXCEEDED = "scope_exceeded"`


			`class Gap(BaseModel):`
			`"""An unresolved aspect of the research question."""`

			`topic: str = Field(`
			`description="What aspect wasn't resolved.",`
			`)`
			`category: GapCategory = Field(`
			`description="Structured reason category.",`
			`)`
			`detail: str = Field(`
			`description="Human-readable explanation of why this gap exists.",`
			`)`


			`# ---------------------------------------------------------------------------`
			`# Output types — DiscoveryEvent`
			`# ---------------------------------------------------------------------------`


			`class DiscoveryEvent(BaseModel):`
			`"""A lateral finding relevant to another researcher's domain."""`

			`type: str = Field(`
			`description="Event type: 'related_research', 'new_source', 'contradiction'.",`
			`)`
			`suggested_researcher: Optional[str] = Field(`
			`default=None,`
			`description="Target researcher type: 'arxiv', 'database', 'legal', etc.",`
			`)`
			`query: str = Field(`
			`description="Suggested query for the target researcher.",`
			`)`
			`reason: str = Field(`
			`description="Why this is relevant to the overall investigation.",`
			`)`
			`source_locator: Optional[str] = Field(`
			`default=None,`
			`description="Where the discovery was found (URL, DOI, etc.).",`
			`)`


Add OpenQuestion to research contract New field on ResearchResult: open_questions — follow-up questions that emerged from the research itself. Distinct from gaps (backward: what failed) and discovery_events (sideways: what's lateral). Open questions look forward: 'based on what I found, this needs deeper investigation.' - OpenQuestion model: question, context, priority (high/medium/low), source_locator - Updated agent synthesis prompt to produce open_questions - Updated agent result builder to parse open_questions from JSON - 3 new tests for OpenQuestion model - Updated existing tests for new field 77 tests passing. Refs: archeious/marchwarden#1 Co-Authored-By: Claude Haiku 4.5 <noreply@anthropic.com> 2026-04-08 20:37:30 +00:00			`# ---------------------------------------------------------------------------`
			`# Output types — OpenQuestion`
			`# ---------------------------------------------------------------------------`


			`class OpenQuestion(BaseModel):`
			`"""A follow-up question that emerged from the research.`

			`Distinct from gaps (what failed) and discovery events (what's lateral).`
			`Open questions look forward: "based on what I found, this needs deeper`
			`investigation." The PI uses these to decide whether to dispatch`
			`additional research calls.`
			`"""`

			`question: str = Field(`
			`description="The follow-up question that emerged from the research.",`
			`)`
			`context: str = Field(`
			`description="What evidence or finding prompted this question.",`
			`)`
			`priority: str = Field(`
			`description="'high' (critical to answer quality), 'medium' (would improve answer), 'low' (nice to know).",`
			`)`
			`source_locator: Optional[str] = Field(`
			`default=None,`
			`description="URL or source where this question arose from.",`
			`)`


M0.3: Implement contract v1 Pydantic models with tests All Research Contract types as Pydantic models: - ResearchConstraints (input) - Citation with raw_excerpt (output) - GapCategory enum (5 categories) - Gap with structured category (output) - DiscoveryEvent (lateral findings) - ConfidenceFactors (auditable scoring inputs) - CostMetadata with model_id (resource tracking) - ResearchResult (top-level contract) 32 tests: validation, bounds checking, serialization roundtrips, JSON structure verification against contract spec. Refs: archeious/marchwarden#1 Co-Authored-By: Claude Haiku 4.5 <noreply@anthropic.com> 2026-04-08 20:00:45 +00:00			`# ---------------------------------------------------------------------------`
			`# Output types — Confidence`
			`# ---------------------------------------------------------------------------`


			`class ConfidenceFactors(BaseModel):`
			`"""Inputs that fed the confidence score. Enables auditability and future calibration."""`

			`num_corroborating_sources: int = Field(`
			`ge=0,`
			`description="How many sources agree on the core claims.",`
			`)`
			`source_authority: str = Field(`
			`description="'high' (.gov, .edu, peer-reviewed), 'medium' (established orgs), 'low' (blogs, forums).",`
			`)`
			`contradiction_detected: bool = Field(`
			`description="Were conflicting claims found across sources?",`
			`)`
			`query_specificity_match: float = Field(`
			`ge=0.0,`
			`le=1.0,`
			`description="How well the results address the actual question (0.0-1.0).",`
			`)`
			`budget_exhausted: bool = Field(`
			`description="True if the researcher hit its iteration or token cap.",`
			`)`
			`recency: Optional[str] = Field(`
			`default=None,`
			`description="'current' (< 1yr), 'recent' (1-3yr), 'dated' (> 3yr), None if unknown.",`
			`)`


			`# ---------------------------------------------------------------------------`
			`# Output types — CostMetadata`
			`# ---------------------------------------------------------------------------`


			`class CostMetadata(BaseModel):`
			`"""Resource usage for a single research call."""`

			`tokens_used: int = Field(`
			`ge=0,`
			`description="Total tokens consumed (Claude + search API calls).",`
			`)`
			`iterations_run: int = Field(`
			`ge=0,`
			`description="Number of inner-loop iterations completed.",`
			`)`
			`wall_time_sec: float = Field(`
			`ge=0.0,`
			`description="Actual elapsed wall-clock time in seconds.",`
			`)`
			`budget_exhausted: bool = Field(`
			`description="True if the researcher hit its iteration or token cap.",`
			`)`
			`model_id: str = Field(`
			`description="Model used for the research loop (e.g. 'claude-sonnet-4-6').",`
			`)`


			`# ---------------------------------------------------------------------------`
			`# Top-level output`
			`# ---------------------------------------------------------------------------`


			`class ResearchResult(BaseModel):`
			`"""Complete result from a single research() call. This is the contract."""`

			`answer: str = Field(`
			`description="The synthesized answer. Every claim must trace to a citation.",`
			`)`
			`citations: list[Citation] = Field(`
			`default_factory=list,`
			`description="Sources used, with raw evidence.",`
			`)`
			`gaps: list[Gap] = Field(`
			`default_factory=list,`
			`description="What couldn't be resolved, categorized by cause.",`
			`)`
			`discovery_events: list[DiscoveryEvent] = Field(`
			`default_factory=list,`
			`description="Lateral findings for other researchers.",`
			`)`
Add OpenQuestion to research contract New field on ResearchResult: open_questions — follow-up questions that emerged from the research itself. Distinct from gaps (backward: what failed) and discovery_events (sideways: what's lateral). Open questions look forward: 'based on what I found, this needs deeper investigation.' - OpenQuestion model: question, context, priority (high/medium/low), source_locator - Updated agent synthesis prompt to produce open_questions - Updated agent result builder to parse open_questions from JSON - 3 new tests for OpenQuestion model - Updated existing tests for new field 77 tests passing. Refs: archeious/marchwarden#1 Co-Authored-By: Claude Haiku 4.5 <noreply@anthropic.com> 2026-04-08 20:37:30 +00:00			`open_questions: list[OpenQuestion] = Field(`
			`default_factory=list,`
			`description="Follow-up questions that emerged from the research.",`
			`)`
M0.3: Implement contract v1 Pydantic models with tests All Research Contract types as Pydantic models: - ResearchConstraints (input) - Citation with raw_excerpt (output) - GapCategory enum (5 categories) - Gap with structured category (output) - DiscoveryEvent (lateral findings) - ConfidenceFactors (auditable scoring inputs) - CostMetadata with model_id (resource tracking) - ResearchResult (top-level contract) 32 tests: validation, bounds checking, serialization roundtrips, JSON structure verification against contract spec. Refs: archeious/marchwarden#1 Co-Authored-By: Claude Haiku 4.5 <noreply@anthropic.com> 2026-04-08 20:00:45 +00:00			`confidence: float = Field(`
			`ge=0.0,`
			`le=1.0,`
			`description="Overall confidence in the answer (0.0-1.0).",`
			`)`
			`confidence_factors: ConfidenceFactors = Field(`
			`description="What fed the confidence score.",`
			`)`
			`cost_metadata: CostMetadata = Field(`
			`description="Resource usage for this research call.",`
			`)`
			`trace_id: str = Field(`
			`description="UUID linking to the JSONL trace log.",`
			`)`