Merge pull request 'depth flag now drives constraint defaults' (#33) from feat/depth-presets into main

Reviewed-on: #33
This commit is contained in:
archeious 2026-04-08 22:33:48 +00:00
commit 226c1c8660
5 changed files with 123 additions and 27 deletions

View file

@ -40,27 +40,28 @@ log = get_logger("marchwarden.cli")
async def call_research_tool(
question: str,
depth: str,
max_iterations: int,
token_budget: int,
max_iterations: Optional[int],
token_budget: Optional[int],
) -> ResearchResult:
"""Spawn the web researcher MCP server and call its `research` tool."""
"""Spawn the web researcher MCP server and call its `research` tool.
``max_iterations`` and ``token_budget`` are optional when None,
the MCP server uses the depth preset (Issue #30).
"""
params = StdioServerParameters(
command=sys.executable,
args=["-m", "researchers.web.server"],
env=os.environ.copy(),
)
arguments: dict = {"question": question, "depth": depth}
if max_iterations is not None:
arguments["max_iterations"] = max_iterations
if token_budget is not None:
arguments["token_budget"] = token_budget
async with stdio_client(params) as (read, write):
async with ClientSession(read, write) as session:
await session.initialize()
result = await session.call_tool(
"research",
arguments={
"question": question,
"depth": depth,
"max_iterations": max_iterations,
"token_budget": token_budget,
},
)
result = await session.call_tool("research", arguments=arguments)
# FastMCP returns the tool's string return as a TextContent block.
payload = result.content[0].text
return ResearchResult.model_validate_json(payload)
@ -178,21 +179,20 @@ def cli() -> None:
"--budget",
"token_budget",
type=int,
default=20_000,
show_default=True,
help="Token budget for the research loop.",
default=None,
help="Token budget for the research loop. Overrides the depth preset.",
)
@click.option(
"--max-iterations",
type=int,
default=5,
show_default=True,
default=None,
help="Max research loop iterations. Overrides the depth preset.",
)
def ask(
question: str,
depth: str,
token_budget: int,
max_iterations: int,
token_budget: Optional[int],
max_iterations: Optional[int],
) -> None:
"""Ask the web researcher a QUESTION."""
console = Console()

View file

@ -25,6 +25,7 @@ from researchers.web.models import (
OpenQuestion,
ResearchConstraints,
ResearchResult,
constraints_for_depth,
)
from researchers.web.tools import SearchResult, fetch_url, tavily_search
from researchers.web.trace import TraceLogger
@ -203,7 +204,10 @@ class WebResearcher:
Returns:
A ResearchResult conforming to the v1 contract.
"""
constraints = constraints or ResearchConstraints()
# If the caller didn't supply explicit constraints, build them
# from the depth preset (Issue #30). Callers that DO pass a
# ResearchConstraints are taken at their word — explicit wins.
constraints = constraints or constraints_for_depth(depth)
trace = TraceLogger(trace_dir=self.trace_dir)
start_time = time.time()
total_tokens = 0

View file

@ -41,6 +41,43 @@ class ResearchConstraints(BaseModel):
)
# Depth presets — choosing a depth picks sensible defaults for the
# constraint fields. Explicit overrides (--max-iterations, --budget,
# explicit ResearchConstraints) always win over the preset.
#
# `balanced` matches the historical defaults so existing callers see
# no behavior change. `shallow` and `deep` are tuned for "quick lookup"
# and "thorough investigation" respectively. These are starting points;
# Phase 3 stress testing will inform calibration.
DEPTH_PRESETS: dict[str, dict[str, int]] = {
"shallow": {"max_iterations": 2, "token_budget": 5_000, "max_sources": 5},
"balanced": {"max_iterations": 5, "token_budget": 20_000, "max_sources": 10},
"deep": {"max_iterations": 8, "token_budget": 60_000, "max_sources": 20},
}
def constraints_for_depth(
depth: str,
*,
max_iterations: Optional[int] = None,
token_budget: Optional[int] = None,
max_sources: Optional[int] = None,
) -> ResearchConstraints:
"""Build a ResearchConstraints from a depth preset, with optional overrides.
Any non-None override wins over the preset value. Unknown depths
fall back to ``balanced``.
"""
preset = DEPTH_PRESETS.get(depth, DEPTH_PRESETS["balanced"]).copy()
if max_iterations is not None:
preset["max_iterations"] = max_iterations
if token_budget is not None:
preset["token_budget"] = token_budget
if max_sources is not None:
preset["max_sources"] = max_sources
return ResearchConstraints(**preset)
# ---------------------------------------------------------------------------
# Output types — Citation
# ---------------------------------------------------------------------------

View file

@ -13,7 +13,7 @@ from mcp.server.fastmcp import FastMCP
from obs import configure_logging, get_logger
from researchers.web.agent import WebResearcher
from researchers.web.models import ResearchConstraints
from researchers.web.models import constraints_for_depth
log = get_logger("marchwarden.mcp")
@ -52,17 +52,18 @@ async def research(
question: str,
context: Optional[str] = None,
depth: str = "balanced",
max_iterations: int = 5,
token_budget: int = 20000,
max_iterations: Optional[int] = None,
token_budget: Optional[int] = None,
) -> str:
"""Research a question using web search and return a structured answer.
Args:
question: The question to investigate.
context: What the caller already knows (optional).
depth: Research depth "shallow", "balanced", or "deep".
max_iterations: Maximum number of search/fetch iterations (1-20).
token_budget: Maximum tokens to spend (1000-100000).
depth: Research depth "shallow", "balanced", or "deep". Each
depth picks default max_iterations / token_budget / max_sources.
max_iterations: Override the depth preset for iterations (1-20).
token_budget: Override the depth preset for token budget.
Returns:
JSON string containing the full ResearchResult with answer,
@ -70,7 +71,8 @@ async def research(
and cost_metadata.
"""
researcher = _get_researcher()
constraints = ResearchConstraints(
constraints = constraints_for_depth(
depth,
max_iterations=max_iterations,
token_budget=token_budget,
)

View file

@ -443,3 +443,56 @@ class TestResearchResult:
"recency",
}
assert set(data["confidence_factors"].keys()) == cf_keys
# ---------------------------------------------------------------------------
# Depth presets (Issue #30)
# ---------------------------------------------------------------------------
from researchers.web.models import DEPTH_PRESETS, constraints_for_depth
class TestDepthPresets:
def test_shallow_preset(self):
c = constraints_for_depth("shallow")
assert c.max_iterations == 2
assert c.token_budget == 5_000
assert c.max_sources == 5
def test_balanced_preset_matches_historical_defaults(self):
# Backward compat: balanced must equal the original ResearchConstraints defaults
c = constraints_for_depth("balanced")
default = ResearchConstraints()
assert c.max_iterations == default.max_iterations == 5
assert c.token_budget == default.token_budget == 20_000
assert c.max_sources == default.max_sources == 10
def test_deep_preset(self):
c = constraints_for_depth("deep")
assert c.max_iterations == 8
assert c.token_budget == 60_000
assert c.max_sources == 20
def test_unknown_depth_falls_back_to_balanced(self):
c = constraints_for_depth("nonsense")
assert c.max_iterations == DEPTH_PRESETS["balanced"]["max_iterations"]
assert c.token_budget == DEPTH_PRESETS["balanced"]["token_budget"]
def test_explicit_overrides_win(self):
c = constraints_for_depth(
"shallow",
max_iterations=10,
token_budget=42_000,
max_sources=15,
)
assert c.max_iterations == 10
assert c.token_budget == 42_000
assert c.max_sources == 15
def test_partial_override(self):
# Only one field overridden — others stay at the preset
c = constraints_for_depth("deep", token_budget=100_000)
assert c.token_budget == 100_000
assert c.max_iterations == 8 # deep preset
assert c.max_sources == 20 # deep preset