Merge pull request 'depth flag now drives constraint defaults' (#33 ) from feat/depth-presets into main

Reviewed-on: #33
depth flag now drives constraint defaults (#30 )
2026-04-08 22:33:48 +00:00 · 2026-04-08 16:27:38 -06:00
5 changed files with 123 additions and 27 deletions
--- a/cli/main.py
+++ b/cli/main.py
@ -40,27 +40,28 @@ log = get_logger("marchwarden.cli")
 async def call_research_tool(
    question: str,
    depth: str,
-    max_iterations: int,
+    max_iterations: Optional[int],
-    token_budget: int,
+    token_budget: Optional[int],
 ) -> ResearchResult:
-    """Spawn the web researcher MCP server and call its `research` tool."""
+    """Spawn the web researcher MCP server and call its `research` tool.
    ``max_iterations`` and ``token_budget`` are optional — when None,
    the MCP server uses the depth preset (Issue #30).
    """
    params = StdioServerParameters(
        command=sys.executable,
        args=["-m", "researchers.web.server"],
        env=os.environ.copy(),
    )
    arguments: dict = {"question": question, "depth": depth}
    if max_iterations is not None:
        arguments["max_iterations"] = max_iterations
    if token_budget is not None:
        arguments["token_budget"] = token_budget
    async with stdio_client(params) as (read, write):
        async with ClientSession(read, write) as session:
            await session.initialize()
-            result = await session.call_tool(
+            result = await session.call_tool("research", arguments=arguments)
                "research",
                arguments={
                    "question": question,
                    "depth": depth,
                    "max_iterations": max_iterations,
                    "token_budget": token_budget,
                },
            )
            # FastMCP returns the tool's string return as a TextContent block.
            payload = result.content[0].text
            return ResearchResult.model_validate_json(payload)
@ -178,21 +179,20 @@ def cli() -> None:
    "--budget",
    "token_budget",
    type=int,
-    default=20_000,
+    default=None,
-    show_default=True,
+    help="Token budget for the research loop. Overrides the depth preset.",
    help="Token budget for the research loop.",
 )
@click.option(
    "--max-iterations",
    type=int,
-    default=5,
+    default=None,
-    show_default=True,
+    help="Max research loop iterations. Overrides the depth preset.",
 )
 def ask(
    question: str,
    depth: str,
-    token_budget: int,
+    token_budget: Optional[int],
-    max_iterations: int,
+    max_iterations: Optional[int],
 ) -> None:
    """Ask the web researcher a QUESTION."""
    console = Console()
--- a/researchers/web/agent.py
+++ b/researchers/web/agent.py
@ -25,6 +25,7 @@ from researchers.web.models import (
    OpenQuestion,
    ResearchConstraints,
    ResearchResult,
    constraints_for_depth,
 )
 from researchers.web.tools import SearchResult, fetch_url, tavily_search
 from researchers.web.trace import TraceLogger
@ -203,7 +204,10 @@ class WebResearcher:
        Returns:
            A ResearchResult conforming to the v1 contract.
        """
-        constraints = constraints or ResearchConstraints()
+        # If the caller didn't supply explicit constraints, build them
        # from the depth preset (Issue #30). Callers that DO pass a
        # ResearchConstraints are taken at their word — explicit wins.
        constraints = constraints or constraints_for_depth(depth)
        trace = TraceLogger(trace_dir=self.trace_dir)
        start_time = time.time()
        total_tokens = 0
--- a/researchers/web/models.py
+++ b/researchers/web/models.py
@ -41,6 +41,43 @@ class ResearchConstraints(BaseModel):
    )
 # Depth presets — choosing a depth picks sensible defaults for the
 # constraint fields. Explicit overrides (--max-iterations, --budget,
 # explicit ResearchConstraints) always win over the preset.
 #
 # `balanced` matches the historical defaults so existing callers see
 # no behavior change. `shallow` and `deep` are tuned for "quick lookup"
 # and "thorough investigation" respectively. These are starting points;
 # Phase 3 stress testing will inform calibration.
 DEPTH_PRESETS: dict[str, dict[str, int]] = {
    "shallow": {"max_iterations": 2, "token_budget": 5_000, "max_sources": 5},
    "balanced": {"max_iterations": 5, "token_budget": 20_000, "max_sources": 10},
    "deep": {"max_iterations": 8, "token_budget": 60_000, "max_sources": 20},
 }
 def constraints_for_depth(
    depth: str,
    *,
    max_iterations: Optional[int] = None,
    token_budget: Optional[int] = None,
    max_sources: Optional[int] = None,
 ) -> ResearchConstraints:
    """Build a ResearchConstraints from a depth preset, with optional overrides.
    Any non-None override wins over the preset value. Unknown depths
    fall back to ``balanced``.
    """
    preset = DEPTH_PRESETS.get(depth, DEPTH_PRESETS["balanced"]).copy()
    if max_iterations is not None:
        preset["max_iterations"] = max_iterations
    if token_budget is not None:
        preset["token_budget"] = token_budget
    if max_sources is not None:
        preset["max_sources"] = max_sources
    return ResearchConstraints(**preset)
 # ---------------------------------------------------------------------------
 # Output types — Citation
 # ---------------------------------------------------------------------------
--- a/researchers/web/server.py
+++ b/researchers/web/server.py
@ -13,7 +13,7 @@ from mcp.server.fastmcp import FastMCP
 from obs import configure_logging, get_logger
 from researchers.web.agent import WebResearcher
-from researchers.web.models import ResearchConstraints
+from researchers.web.models import constraints_for_depth
 log = get_logger("marchwarden.mcp")
@ -52,17 +52,18 @@ async def research(
    question: str,
    context: Optional[str] = None,
    depth: str = "balanced",
-    max_iterations: int = 5,
+    max_iterations: Optional[int] = None,
-    token_budget: int = 20000,
+    token_budget: Optional[int] = None,
 ) -> str:
    """Research a question using web search and return a structured answer.
    Args:
        question: The question to investigate.
        context: What the caller already knows (optional).
-        depth: Research depth — "shallow", "balanced", or "deep".
+        depth: Research depth — "shallow", "balanced", or "deep". Each
-        max_iterations: Maximum number of search/fetch iterations (1-20).
+            depth picks default max_iterations / token_budget / max_sources.
-        token_budget: Maximum tokens to spend (1000-100000).
+        max_iterations: Override the depth preset for iterations (1-20).
        token_budget: Override the depth preset for token budget.
    Returns:
        JSON string containing the full ResearchResult with answer,
@ -70,7 +71,8 @@ async def research(
        and cost_metadata.
    """
    researcher = _get_researcher()
-    constraints = ResearchConstraints(
+    constraints = constraints_for_depth(
        depth,
        max_iterations=max_iterations,
        token_budget=token_budget,
    )
--- a/tests/test_models.py
+++ b/tests/test_models.py
@ -443,3 +443,56 @@ class TestResearchResult:
            "recency",
        }
        assert set(data["confidence_factors"].keys()) == cf_keys
 # ---------------------------------------------------------------------------
 # Depth presets (Issue #30)
 # ---------------------------------------------------------------------------
 from researchers.web.models import DEPTH_PRESETS, constraints_for_depth
 class TestDepthPresets:
    def test_shallow_preset(self):
        c = constraints_for_depth("shallow")
        assert c.max_iterations == 2
        assert c.token_budget == 5_000
        assert c.max_sources == 5
    def test_balanced_preset_matches_historical_defaults(self):
        # Backward compat: balanced must equal the original ResearchConstraints defaults
        c = constraints_for_depth("balanced")
        default = ResearchConstraints()
        assert c.max_iterations == default.max_iterations == 5
        assert c.token_budget == default.token_budget == 20_000
        assert c.max_sources == default.max_sources == 10
    def test_deep_preset(self):
        c = constraints_for_depth("deep")
        assert c.max_iterations == 8
        assert c.token_budget == 60_000
        assert c.max_sources == 20
    def test_unknown_depth_falls_back_to_balanced(self):
        c = constraints_for_depth("nonsense")
        assert c.max_iterations == DEPTH_PRESETS["balanced"]["max_iterations"]
        assert c.token_budget == DEPTH_PRESETS["balanced"]["token_budget"]
    def test_explicit_overrides_win(self):
        c = constraints_for_depth(
            "shallow",
            max_iterations=10,
            token_budget=42_000,
            max_sources=15,
        )
        assert c.max_iterations == 10
        assert c.token_budget == 42_000
        assert c.max_sources == 15
    def test_partial_override(self):
        # Only one field overridden — others stay at the preset
        c = constraints_for_depth("deep", token_budget=100_000)
        assert c.token_budget == 100_000
        assert c.max_iterations == 8  # deep preset
        assert c.max_sources == 20  # deep preset