depth flag now drives constraint defaults (#30)
Previously the depth parameter (shallow/balanced/deep) was passed only as a text hint inside the agent's user message, with no mechanical effect on iterations, token budget, or source count. The flag was effectively cosmetic — the LLM was expected to "interpret" it. Add DEPTH_PRESETS table and constraints_for_depth() helper in researchers.web.models: shallow: 2 iters, 5,000 tokens, 5 sources balanced: 5 iters, 20,000 tokens, 10 sources (= historical defaults) deep: 8 iters, 60,000 tokens, 20 sources Wired through the stack: - WebResearcher.research(): when constraints is None, builds from the depth preset instead of bare ResearchConstraints() - MCP server `research` tool: max_iterations and token_budget now default to None; constraints are built via constraints_for_depth with explicit values overriding the preset - CLI `ask` command: --max-iterations and --budget default to None; the CLI only forwards them to the MCP tool when set, so unset flags fall through to the depth preset balanced is unchanged from the historical defaults so existing callers see no behavior difference. Explicit --max-iterations / --budget always win over the preset. Tests cover each preset's values, balanced backward-compat, unknown depth fallback, full override, and partial override. 116/116 tests passing. Live-verified: --depth shallow on a simple question now caps at 2 iterations and stays under budget. Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
parent
d51f16d33e
commit
ae48acd421
5 changed files with 123 additions and 27 deletions
38
cli/main.py
38
cli/main.py
|
|
@ -40,27 +40,28 @@ log = get_logger("marchwarden.cli")
|
||||||
async def call_research_tool(
|
async def call_research_tool(
|
||||||
question: str,
|
question: str,
|
||||||
depth: str,
|
depth: str,
|
||||||
max_iterations: int,
|
max_iterations: Optional[int],
|
||||||
token_budget: int,
|
token_budget: Optional[int],
|
||||||
) -> ResearchResult:
|
) -> ResearchResult:
|
||||||
"""Spawn the web researcher MCP server and call its `research` tool."""
|
"""Spawn the web researcher MCP server and call its `research` tool.
|
||||||
|
|
||||||
|
``max_iterations`` and ``token_budget`` are optional — when None,
|
||||||
|
the MCP server uses the depth preset (Issue #30).
|
||||||
|
"""
|
||||||
params = StdioServerParameters(
|
params = StdioServerParameters(
|
||||||
command=sys.executable,
|
command=sys.executable,
|
||||||
args=["-m", "researchers.web.server"],
|
args=["-m", "researchers.web.server"],
|
||||||
env=os.environ.copy(),
|
env=os.environ.copy(),
|
||||||
)
|
)
|
||||||
|
arguments: dict = {"question": question, "depth": depth}
|
||||||
|
if max_iterations is not None:
|
||||||
|
arguments["max_iterations"] = max_iterations
|
||||||
|
if token_budget is not None:
|
||||||
|
arguments["token_budget"] = token_budget
|
||||||
async with stdio_client(params) as (read, write):
|
async with stdio_client(params) as (read, write):
|
||||||
async with ClientSession(read, write) as session:
|
async with ClientSession(read, write) as session:
|
||||||
await session.initialize()
|
await session.initialize()
|
||||||
result = await session.call_tool(
|
result = await session.call_tool("research", arguments=arguments)
|
||||||
"research",
|
|
||||||
arguments={
|
|
||||||
"question": question,
|
|
||||||
"depth": depth,
|
|
||||||
"max_iterations": max_iterations,
|
|
||||||
"token_budget": token_budget,
|
|
||||||
},
|
|
||||||
)
|
|
||||||
# FastMCP returns the tool's string return as a TextContent block.
|
# FastMCP returns the tool's string return as a TextContent block.
|
||||||
payload = result.content[0].text
|
payload = result.content[0].text
|
||||||
return ResearchResult.model_validate_json(payload)
|
return ResearchResult.model_validate_json(payload)
|
||||||
|
|
@ -178,21 +179,20 @@ def cli() -> None:
|
||||||
"--budget",
|
"--budget",
|
||||||
"token_budget",
|
"token_budget",
|
||||||
type=int,
|
type=int,
|
||||||
default=20_000,
|
default=None,
|
||||||
show_default=True,
|
help="Token budget for the research loop. Overrides the depth preset.",
|
||||||
help="Token budget for the research loop.",
|
|
||||||
)
|
)
|
||||||
@click.option(
|
@click.option(
|
||||||
"--max-iterations",
|
"--max-iterations",
|
||||||
type=int,
|
type=int,
|
||||||
default=5,
|
default=None,
|
||||||
show_default=True,
|
help="Max research loop iterations. Overrides the depth preset.",
|
||||||
)
|
)
|
||||||
def ask(
|
def ask(
|
||||||
question: str,
|
question: str,
|
||||||
depth: str,
|
depth: str,
|
||||||
token_budget: int,
|
token_budget: Optional[int],
|
||||||
max_iterations: int,
|
max_iterations: Optional[int],
|
||||||
) -> None:
|
) -> None:
|
||||||
"""Ask the web researcher a QUESTION."""
|
"""Ask the web researcher a QUESTION."""
|
||||||
console = Console()
|
console = Console()
|
||||||
|
|
|
||||||
|
|
@ -25,6 +25,7 @@ from researchers.web.models import (
|
||||||
OpenQuestion,
|
OpenQuestion,
|
||||||
ResearchConstraints,
|
ResearchConstraints,
|
||||||
ResearchResult,
|
ResearchResult,
|
||||||
|
constraints_for_depth,
|
||||||
)
|
)
|
||||||
from researchers.web.tools import SearchResult, fetch_url, tavily_search
|
from researchers.web.tools import SearchResult, fetch_url, tavily_search
|
||||||
from researchers.web.trace import TraceLogger
|
from researchers.web.trace import TraceLogger
|
||||||
|
|
@ -203,7 +204,10 @@ class WebResearcher:
|
||||||
Returns:
|
Returns:
|
||||||
A ResearchResult conforming to the v1 contract.
|
A ResearchResult conforming to the v1 contract.
|
||||||
"""
|
"""
|
||||||
constraints = constraints or ResearchConstraints()
|
# If the caller didn't supply explicit constraints, build them
|
||||||
|
# from the depth preset (Issue #30). Callers that DO pass a
|
||||||
|
# ResearchConstraints are taken at their word — explicit wins.
|
||||||
|
constraints = constraints or constraints_for_depth(depth)
|
||||||
trace = TraceLogger(trace_dir=self.trace_dir)
|
trace = TraceLogger(trace_dir=self.trace_dir)
|
||||||
start_time = time.time()
|
start_time = time.time()
|
||||||
total_tokens = 0
|
total_tokens = 0
|
||||||
|
|
|
||||||
|
|
@ -41,6 +41,43 @@ class ResearchConstraints(BaseModel):
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
|
# Depth presets — choosing a depth picks sensible defaults for the
|
||||||
|
# constraint fields. Explicit overrides (--max-iterations, --budget,
|
||||||
|
# explicit ResearchConstraints) always win over the preset.
|
||||||
|
#
|
||||||
|
# `balanced` matches the historical defaults so existing callers see
|
||||||
|
# no behavior change. `shallow` and `deep` are tuned for "quick lookup"
|
||||||
|
# and "thorough investigation" respectively. These are starting points;
|
||||||
|
# Phase 3 stress testing will inform calibration.
|
||||||
|
DEPTH_PRESETS: dict[str, dict[str, int]] = {
|
||||||
|
"shallow": {"max_iterations": 2, "token_budget": 5_000, "max_sources": 5},
|
||||||
|
"balanced": {"max_iterations": 5, "token_budget": 20_000, "max_sources": 10},
|
||||||
|
"deep": {"max_iterations": 8, "token_budget": 60_000, "max_sources": 20},
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
def constraints_for_depth(
|
||||||
|
depth: str,
|
||||||
|
*,
|
||||||
|
max_iterations: Optional[int] = None,
|
||||||
|
token_budget: Optional[int] = None,
|
||||||
|
max_sources: Optional[int] = None,
|
||||||
|
) -> ResearchConstraints:
|
||||||
|
"""Build a ResearchConstraints from a depth preset, with optional overrides.
|
||||||
|
|
||||||
|
Any non-None override wins over the preset value. Unknown depths
|
||||||
|
fall back to ``balanced``.
|
||||||
|
"""
|
||||||
|
preset = DEPTH_PRESETS.get(depth, DEPTH_PRESETS["balanced"]).copy()
|
||||||
|
if max_iterations is not None:
|
||||||
|
preset["max_iterations"] = max_iterations
|
||||||
|
if token_budget is not None:
|
||||||
|
preset["token_budget"] = token_budget
|
||||||
|
if max_sources is not None:
|
||||||
|
preset["max_sources"] = max_sources
|
||||||
|
return ResearchConstraints(**preset)
|
||||||
|
|
||||||
|
|
||||||
# ---------------------------------------------------------------------------
|
# ---------------------------------------------------------------------------
|
||||||
# Output types — Citation
|
# Output types — Citation
|
||||||
# ---------------------------------------------------------------------------
|
# ---------------------------------------------------------------------------
|
||||||
|
|
|
||||||
|
|
@ -13,7 +13,7 @@ from mcp.server.fastmcp import FastMCP
|
||||||
|
|
||||||
from obs import configure_logging, get_logger
|
from obs import configure_logging, get_logger
|
||||||
from researchers.web.agent import WebResearcher
|
from researchers.web.agent import WebResearcher
|
||||||
from researchers.web.models import ResearchConstraints
|
from researchers.web.models import constraints_for_depth
|
||||||
|
|
||||||
log = get_logger("marchwarden.mcp")
|
log = get_logger("marchwarden.mcp")
|
||||||
|
|
||||||
|
|
@ -52,17 +52,18 @@ async def research(
|
||||||
question: str,
|
question: str,
|
||||||
context: Optional[str] = None,
|
context: Optional[str] = None,
|
||||||
depth: str = "balanced",
|
depth: str = "balanced",
|
||||||
max_iterations: int = 5,
|
max_iterations: Optional[int] = None,
|
||||||
token_budget: int = 20000,
|
token_budget: Optional[int] = None,
|
||||||
) -> str:
|
) -> str:
|
||||||
"""Research a question using web search and return a structured answer.
|
"""Research a question using web search and return a structured answer.
|
||||||
|
|
||||||
Args:
|
Args:
|
||||||
question: The question to investigate.
|
question: The question to investigate.
|
||||||
context: What the caller already knows (optional).
|
context: What the caller already knows (optional).
|
||||||
depth: Research depth — "shallow", "balanced", or "deep".
|
depth: Research depth — "shallow", "balanced", or "deep". Each
|
||||||
max_iterations: Maximum number of search/fetch iterations (1-20).
|
depth picks default max_iterations / token_budget / max_sources.
|
||||||
token_budget: Maximum tokens to spend (1000-100000).
|
max_iterations: Override the depth preset for iterations (1-20).
|
||||||
|
token_budget: Override the depth preset for token budget.
|
||||||
|
|
||||||
Returns:
|
Returns:
|
||||||
JSON string containing the full ResearchResult with answer,
|
JSON string containing the full ResearchResult with answer,
|
||||||
|
|
@ -70,7 +71,8 @@ async def research(
|
||||||
and cost_metadata.
|
and cost_metadata.
|
||||||
"""
|
"""
|
||||||
researcher = _get_researcher()
|
researcher = _get_researcher()
|
||||||
constraints = ResearchConstraints(
|
constraints = constraints_for_depth(
|
||||||
|
depth,
|
||||||
max_iterations=max_iterations,
|
max_iterations=max_iterations,
|
||||||
token_budget=token_budget,
|
token_budget=token_budget,
|
||||||
)
|
)
|
||||||
|
|
|
||||||
|
|
@ -443,3 +443,56 @@ class TestResearchResult:
|
||||||
"recency",
|
"recency",
|
||||||
}
|
}
|
||||||
assert set(data["confidence_factors"].keys()) == cf_keys
|
assert set(data["confidence_factors"].keys()) == cf_keys
|
||||||
|
|
||||||
|
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
# Depth presets (Issue #30)
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
|
||||||
|
|
||||||
|
from researchers.web.models import DEPTH_PRESETS, constraints_for_depth
|
||||||
|
|
||||||
|
|
||||||
|
class TestDepthPresets:
|
||||||
|
def test_shallow_preset(self):
|
||||||
|
c = constraints_for_depth("shallow")
|
||||||
|
assert c.max_iterations == 2
|
||||||
|
assert c.token_budget == 5_000
|
||||||
|
assert c.max_sources == 5
|
||||||
|
|
||||||
|
def test_balanced_preset_matches_historical_defaults(self):
|
||||||
|
# Backward compat: balanced must equal the original ResearchConstraints defaults
|
||||||
|
c = constraints_for_depth("balanced")
|
||||||
|
default = ResearchConstraints()
|
||||||
|
assert c.max_iterations == default.max_iterations == 5
|
||||||
|
assert c.token_budget == default.token_budget == 20_000
|
||||||
|
assert c.max_sources == default.max_sources == 10
|
||||||
|
|
||||||
|
def test_deep_preset(self):
|
||||||
|
c = constraints_for_depth("deep")
|
||||||
|
assert c.max_iterations == 8
|
||||||
|
assert c.token_budget == 60_000
|
||||||
|
assert c.max_sources == 20
|
||||||
|
|
||||||
|
def test_unknown_depth_falls_back_to_balanced(self):
|
||||||
|
c = constraints_for_depth("nonsense")
|
||||||
|
assert c.max_iterations == DEPTH_PRESETS["balanced"]["max_iterations"]
|
||||||
|
assert c.token_budget == DEPTH_PRESETS["balanced"]["token_budget"]
|
||||||
|
|
||||||
|
def test_explicit_overrides_win(self):
|
||||||
|
c = constraints_for_depth(
|
||||||
|
"shallow",
|
||||||
|
max_iterations=10,
|
||||||
|
token_budget=42_000,
|
||||||
|
max_sources=15,
|
||||||
|
)
|
||||||
|
assert c.max_iterations == 10
|
||||||
|
assert c.token_budget == 42_000
|
||||||
|
assert c.max_sources == 15
|
||||||
|
|
||||||
|
def test_partial_override(self):
|
||||||
|
# Only one field overridden — others stay at the preset
|
||||||
|
c = constraints_for_depth("deep", token_budget=100_000)
|
||||||
|
assert c.token_budget == 100_000
|
||||||
|
assert c.max_iterations == 8 # deep preset
|
||||||
|
assert c.max_sources == 20 # deep preset
|
||||||
|
|
|
||||||
Loading…
Reference in a new issue