M1.4: MCP server wrapping web researcher

FastMCP server exposing a single 'research' tool: - Delegates to WebResearcher with keys from ~/secrets - Accepts question, context, depth, max_iterations, token_budget - Returns full ResearchResult as JSON - Configurable model via MARCHWARDEN_MODEL env var - Runnable as: python -m researchers.web 4 tests: secret reading, JSON response validation, default parameters. Refs: archeious/marchwarden#1 Co-Authored-By: Claude Haiku 4.5 <noreply@anthropic.com>
2026-04-08 14:41:13 -06:00 · 2026-04-08 14:41:13 -06:00 · 5d894d9e10
commit 5d894d9e10
parent f593dd060b
3 changed files with 251 additions and 0 deletions
--- a/researchers/web/main.py
+++ b/researchers/web/main.py
@ -0,0 +1,8 @@
+"""Allow running the web researcher MCP server as a module.
+
+Usage: python -m researchers.web
+"""
+
+from researchers.web.server import main
+
+main()
--- a/researchers/web/server.py
+++ b/researchers/web/server.py
@ -0,0 +1,91 @@
+"""MCP server for the web researcher.
+
+Exposes a single tool `research` that delegates to WebResearcher.
+Run with: python -m researchers.web.server
+"""
+
+import asyncio
+import os
+import sys
+from typing import Optional
+
+from mcp.server.fastmcp import FastMCP
+
+from researchers.web.agent import WebResearcher
+from researchers.web.models import ResearchConstraints
+
+mcp = FastMCP(
+    name="marchwarden-web-researcher",
+    instructions=(
+        "A Marchwarden web research specialist. "
+        "Call the research tool with a question to get a grounded, "
+        "evidence-based answer with citations, gaps, open questions, "
+        "and confidence scoring."
+    ),
+)
+
+
+def _read_secret(key: str) -> str:
+    """Read a secret from ~/secrets file."""
+    secrets_path = os.path.expanduser("~/secrets")
+    with open(secrets_path) as f:
+        for line in f:
+            if line.startswith(f"{key}="):
+                return line.split("=", 1)[1].strip()
+    raise ValueError(f"Key {key} not found in {secrets_path}")
+
+
+def _get_researcher() -> WebResearcher:
+    """Create a WebResearcher with keys from ~/secrets."""
+    return WebResearcher(
+        anthropic_api_key=_read_secret("ANTHROPIC_API_KEY"),
+        tavily_api_key=_read_secret("TAVILY_API_KEY"),
+        model_id=os.environ.get("MARCHWARDEN_MODEL", "claude-sonnet-4-5-20250514"),
+    )
+
+
+@mcp.tool()
+async def research(
+    question: str,
+    context: Optional[str] = None,
+    depth: str = "balanced",
+    max_iterations: int = 5,
+    token_budget: int = 20000,
+) -> str:
+    """Research a question using web search and return a structured answer.
+
+    Args:
+        question: The question to investigate.
+        context: What the caller already knows (optional).
+        depth: Research depth — "shallow", "balanced", or "deep".
+        max_iterations: Maximum number of search/fetch iterations (1-20).
+        token_budget: Maximum tokens to spend (1000-100000).
+
+    Returns:
+        JSON string containing the full ResearchResult with answer,
+        citations, gaps, discovery_events, open_questions, confidence,
+        and cost_metadata.
+    """
+    researcher = _get_researcher()
+    constraints = ResearchConstraints(
+        max_iterations=max_iterations,
+        token_budget=token_budget,
+    )
+
+    result = await researcher.research(
+        question=question,
+        context=context,
+        depth=depth,
+        constraints=constraints,
+    )
+
+    return result.model_dump_json(indent=2)
+
+
+def main():
+    """Run the MCP server on stdio."""
+    mcp.run(transport="stdio")
+
+
+if __name__ == "__main__":
+    main()
--- a/tests/test_server.py
+++ b/tests/test_server.py
@ -0,0 +1,152 @@
+"""Tests for the MCP server."""
+
+import json
+from unittest.mock import AsyncMock, patch, MagicMock
+
+import pytest
+
+from researchers.web.server import _read_secret, research
+
+
+# ---------------------------------------------------------------------------
+# _read_secret
+# ---------------------------------------------------------------------------
+
+
+class TestReadSecret:
+    def test_reads_key(self, tmp_path):
+        secrets = tmp_path / "secrets"
+        secrets.write_text("FOO=bar\nBAZ=qux\n")
+        with patch("researchers.web.server.os.path.expanduser", return_value=str(secrets)):
+            assert _read_secret("FOO") == "bar"
+            assert _read_secret("BAZ") == "qux"
+
+    def test_missing_key_raises(self, tmp_path):
+        secrets = tmp_path / "secrets"
+        secrets.write_text("FOO=bar\n")
+        with patch("researchers.web.server.os.path.expanduser", return_value=str(secrets)):
+            with pytest.raises(ValueError, match="MISSING"):
+                _read_secret("MISSING")
+
+
+# ---------------------------------------------------------------------------
+# research tool
+# ---------------------------------------------------------------------------
+
+
+class TestResearchTool:
+    @pytest.mark.asyncio
+    async def test_returns_valid_json(self):
+        """The research tool should return a JSON string with all contract fields."""
+        from researchers.web.models import (
+            ResearchResult,
+            ConfidenceFactors,
+            CostMetadata,
+        )
+
+        mock_result = ResearchResult(
+            answer="Test answer.",
+            citations=[],
+            gaps=[],
+            discovery_events=[],
+            open_questions=[],
+            confidence=0.8,
+            confidence_factors=ConfidenceFactors(
+                num_corroborating_sources=1,
+                source_authority="medium",
+                contradiction_detected=False,
+                query_specificity_match=0.7,
+                budget_exhausted=False,
+                recency="current",
+            ),
+            cost_metadata=CostMetadata(
+                tokens_used=500,
+                iterations_run=1,
+                wall_time_sec=5.0,
+                budget_exhausted=False,
+                model_id="claude-test",
+            ),
+            trace_id="test-trace-id",
+        )
+
+        with patch("researchers.web.server._get_researcher") as mock_get:
+            mock_researcher = AsyncMock()
+            mock_researcher.research.return_value = mock_result
+            mock_get.return_value = mock_researcher
+
+            result_json = await research(
+                question="test question",
+                context="some context",
+                depth="shallow",
+                max_iterations=2,
+                token_budget=5000,
+            )
+
+        data = json.loads(result_json)
+        assert data["answer"] == "Test answer."
+        assert data["confidence"] == 0.8
+        assert data["trace_id"] == "test-trace-id"
+        assert "citations" in data
+        assert "gaps" in data
+        assert "discovery_events" in data
+        assert "open_questions" in data
+        assert "confidence_factors" in data
+        assert "cost_metadata" in data
+
+        # Verify researcher was called with correct args
+        mock_researcher.research.assert_called_once()
+        call_kwargs = mock_researcher.research.call_args[1]
+        assert call_kwargs["question"] == "test question"
+        assert call_kwargs["context"] == "some context"
+        assert call_kwargs["depth"] == "shallow"
+        assert call_kwargs["constraints"].max_iterations == 2
+        assert call_kwargs["constraints"].token_budget == 5000
+
+    @pytest.mark.asyncio
+    async def test_defaults(self):
+        """Test that defaults work when optional args are omitted."""
+        from researchers.web.models import (
+            ResearchResult,
+            ConfidenceFactors,
+            CostMetadata,
+        )
+
+        mock_result = ResearchResult(
+            answer="Default test.",
+            citations=[],
+            gaps=[],
+            discovery_events=[],
+            open_questions=[],
+            confidence=0.5,
+            confidence_factors=ConfidenceFactors(
+                num_corroborating_sources=0,
+                source_authority="low",
+                contradiction_detected=False,
+                query_specificity_match=0.5,
+                budget_exhausted=False,
+            ),
+            cost_metadata=CostMetadata(
+                tokens_used=100,
+                iterations_run=1,
+                wall_time_sec=1.0,
+                budget_exhausted=False,
+                model_id="claude-test",
+            ),
+            trace_id="test-id",
+        )
+
+        with patch("researchers.web.server._get_researcher") as mock_get:
+            mock_researcher = AsyncMock()
+            mock_researcher.research.return_value = mock_result
+            mock_get.return_value = mock_researcher
+
+            result_json = await research(question="just a question")
+
+        data = json.loads(result_json)
+        assert data["answer"] == "Default test."
+
+        call_kwargs = mock_researcher.research.call_args[1]
+        assert call_kwargs["context"] is None
+        assert call_kwargs["depth"] == "balanced"
+        assert call_kwargs["constraints"].max_iterations == 5
+        assert call_kwargs["constraints"].token_budget == 20000