marchwarden/researchers/web/models.py

265 lines
8.4 KiB
Python
Raw Normal View History

"""Marchwarden Research Contract v1 — Pydantic models.
These models define the stable contract between a researcher MCP server
and its caller (PI agent or CLI shim). Changes to required fields or
types require a contract version bump.
"""
from enum import Enum
from typing import Optional
from pydantic import BaseModel, Field
# ---------------------------------------------------------------------------
# Input types
# ---------------------------------------------------------------------------
class ResearchConstraints(BaseModel):
"""Fine-grained control over researcher behavior."""
max_iterations: int = Field(
default=5,
ge=1,
le=20,
description="Stop after N iterations, regardless of progress.",
)
token_budget: int = Field(
default=20_000,
ge=1_000,
description="Soft limit on total tokens consumed by the research loop.",
)
max_sources: int = Field(
default=10,
ge=1,
description="Maximum number of sources to fetch and extract.",
)
source_filter: Optional[str] = Field(
default=None,
description="Restrict search to specific domains (V2). E.g. '.gov,.edu'.",
)
# ---------------------------------------------------------------------------
# Output types — Citation
# ---------------------------------------------------------------------------
class Citation(BaseModel):
"""A single source used by the researcher, with raw evidence."""
source: str = Field(
description="Source type: 'web', 'file', 'database', etc.",
)
locator: str = Field(
description="URL, file path, row ID, or unique identifier.",
)
title: Optional[str] = Field(
default=None,
description="Human-readable title (for web sources).",
)
snippet: Optional[str] = Field(
default=None,
description="Researcher's summary of relevant content (50-200 chars).",
)
raw_excerpt: str = Field(
description=(
"Verbatim text from the source (up to 500 chars). "
"Bypasses researcher synthesis to prevent the Synthesis Paradox."
),
)
confidence: float = Field(
ge=0.0,
le=1.0,
description="Researcher's confidence in this source's accuracy.",
)
# ---------------------------------------------------------------------------
# Output types — Gap
# ---------------------------------------------------------------------------
class GapCategory(str, Enum):
"""Categorized reason a gap exists. Drives PI decision-making."""
SOURCE_NOT_FOUND = "source_not_found"
ACCESS_DENIED = "access_denied"
BUDGET_EXHAUSTED = "budget_exhausted"
CONTRADICTORY_SOURCES = "contradictory_sources"
SCOPE_EXCEEDED = "scope_exceeded"
class Gap(BaseModel):
"""An unresolved aspect of the research question."""
topic: str = Field(
description="What aspect wasn't resolved.",
)
category: GapCategory = Field(
description="Structured reason category.",
)
detail: str = Field(
description="Human-readable explanation of why this gap exists.",
)
# ---------------------------------------------------------------------------
# Output types — DiscoveryEvent
# ---------------------------------------------------------------------------
class DiscoveryEvent(BaseModel):
"""A lateral finding relevant to another researcher's domain."""
type: str = Field(
description="Event type: 'related_research', 'new_source', 'contradiction'.",
)
suggested_researcher: Optional[str] = Field(
default=None,
description="Target researcher type: 'arxiv', 'database', 'legal', etc.",
)
query: str = Field(
description="Suggested query for the target researcher.",
)
reason: str = Field(
description="Why this is relevant to the overall investigation.",
)
source_locator: Optional[str] = Field(
default=None,
description="Where the discovery was found (URL, DOI, etc.).",
)
# ---------------------------------------------------------------------------
# Output types — OpenQuestion
# ---------------------------------------------------------------------------
class OpenQuestion(BaseModel):
"""A follow-up question that emerged from the research.
Distinct from gaps (what failed) and discovery events (what's lateral).
Open questions look forward: "based on what I found, this needs deeper
investigation." The PI uses these to decide whether to dispatch
additional research calls.
"""
question: str = Field(
description="The follow-up question that emerged from the research.",
)
context: str = Field(
description="What evidence or finding prompted this question.",
)
priority: str = Field(
description="'high' (critical to answer quality), 'medium' (would improve answer), 'low' (nice to know).",
)
source_locator: Optional[str] = Field(
default=None,
description="URL or source where this question arose from.",
)
# ---------------------------------------------------------------------------
# Output types — Confidence
# ---------------------------------------------------------------------------
class ConfidenceFactors(BaseModel):
"""Inputs that fed the confidence score. Enables auditability and future calibration."""
num_corroborating_sources: int = Field(
ge=0,
description="How many sources agree on the core claims.",
)
source_authority: str = Field(
description="'high' (.gov, .edu, peer-reviewed), 'medium' (established orgs), 'low' (blogs, forums).",
)
contradiction_detected: bool = Field(
description="Were conflicting claims found across sources?",
)
query_specificity_match: float = Field(
ge=0.0,
le=1.0,
description="How well the results address the actual question (0.0-1.0).",
)
budget_exhausted: bool = Field(
description="True if the researcher hit its iteration or token cap.",
)
recency: Optional[str] = Field(
default=None,
description="'current' (< 1yr), 'recent' (1-3yr), 'dated' (> 3yr), None if unknown.",
)
# ---------------------------------------------------------------------------
# Output types — CostMetadata
# ---------------------------------------------------------------------------
class CostMetadata(BaseModel):
"""Resource usage for a single research call."""
tokens_used: int = Field(
ge=0,
description="Total tokens consumed (Claude + search API calls).",
)
iterations_run: int = Field(
ge=0,
description="Number of inner-loop iterations completed.",
)
wall_time_sec: float = Field(
ge=0.0,
description="Actual elapsed wall-clock time in seconds.",
)
budget_exhausted: bool = Field(
description="True if the researcher hit its iteration or token cap.",
)
model_id: str = Field(
description="Model used for the research loop (e.g. 'claude-sonnet-4-6').",
)
# ---------------------------------------------------------------------------
# Top-level output
# ---------------------------------------------------------------------------
class ResearchResult(BaseModel):
"""Complete result from a single research() call. This is the contract."""
answer: str = Field(
description="The synthesized answer. Every claim must trace to a citation.",
)
citations: list[Citation] = Field(
default_factory=list,
description="Sources used, with raw evidence.",
)
gaps: list[Gap] = Field(
default_factory=list,
description="What couldn't be resolved, categorized by cause.",
)
discovery_events: list[DiscoveryEvent] = Field(
default_factory=list,
description="Lateral findings for other researchers.",
)
open_questions: list[OpenQuestion] = Field(
default_factory=list,
description="Follow-up questions that emerged from the research.",
)
confidence: float = Field(
ge=0.0,
le=1.0,
description="Overall confidence in the answer (0.0-1.0).",
)
confidence_factors: ConfidenceFactors = Field(
description="What fed the confidence score.",
)
cost_metadata: CostMetadata = Field(
description="Resource usage for this research call.",
)
trace_id: str = Field(
description="UUID linking to the JSONL trace log.",
)