marchwarden/tests/test_cli.py

189 lines
6.2 KiB
Python
Raw Normal View History

"""Tests for the marchwarden CLI."""
from unittest.mock import patch
from click.testing import CliRunner
from cli.main import cli, render_result, render_trace
from researchers.web.models import (
Citation,
ConfidenceFactors,
CostMetadata,
DiscoveryEvent,
Gap,
GapCategory,
OpenQuestion,
ResearchResult,
)
from rich.console import Console
def _fixture_result() -> ResearchResult:
return ResearchResult(
answer="Tomatoes, peppers, squash, and beans grow well in Utah.",
citations=[
Citation(
source="web",
locator="https://extension.usu.edu/yard-and-garden",
title="USU Extension — Yard and Garden",
snippet="USU recommends warm-season crops for Utah's climate.",
raw_excerpt="Tomatoes, peppers, and squash thrive in Utah summers.",
confidence=0.9,
),
],
gaps=[
Gap(
topic="Microclimate variation",
category=GapCategory.SCOPE_EXCEEDED,
detail="Did not investigate elevation-specific recommendations.",
),
],
discovery_events=[
DiscoveryEvent(
type="related_research",
suggested_researcher="docs",
query="Utah USDA hardiness zones",
reason="Zone-specific guidance would improve answer.",
),
],
open_questions=[
OpenQuestion(
question="What are the best cool-season crops?",
context="Answer focused on warm-season crops.",
priority="medium",
),
],
confidence=0.82,
confidence_factors=ConfidenceFactors(
num_corroborating_sources=3,
source_authority="high",
contradiction_detected=False,
query_specificity_match=0.85,
budget_exhausted=False,
recency="current",
),
cost_metadata=CostMetadata(
tokens_used=4321,
iterations_run=3,
wall_time_sec=12.5,
budget_exhausted=False,
model_id="claude-sonnet-4-6",
),
trace_id="trace-abc-123",
)
class TestRenderResult:
def test_renders_all_sections(self):
console = Console(record=True, width=120)
render_result(_fixture_result(), console)
out = console.export_text()
assert "Tomatoes" in out
assert "USU Extension" in out
assert "scope_exceeded" in out
assert "related_research" in out
assert "cool-season" in out
assert "Confidence" in out
assert "claude-sonnet-4-6" in out
assert "trace-abc-123" in out
class TestAskCommand:
def test_ask_invokes_mcp_and_renders(self):
runner = CliRunner()
fixture = _fixture_result()
async def fake_call(question, depth, max_iterations, token_budget):
assert question == "What grows in Utah?"
assert depth == "shallow"
assert max_iterations == 2
assert token_budget == 5000
return fixture
with patch("cli.main.call_research_tool", side_effect=fake_call):
result = runner.invoke(
cli,
[
"ask",
"What grows in Utah?",
"--depth",
"shallow",
"--max-iterations",
"2",
"--budget",
"5000",
],
)
assert result.exit_code == 0, result.output
assert "Tomatoes" in result.output
assert "trace-abc-123" in result.output
def test_ask_handles_error(self):
runner = CliRunner()
async def boom(**kwargs):
raise RuntimeError("mcp went sideways")
with patch("cli.main.call_research_tool", side_effect=boom):
result = runner.invoke(cli, ["ask", "anything"])
assert result.exit_code == 1
assert "mcp went sideways" in result.output
class TestReplayCommand:
def _write_trace(self, tmp_path, trace_id="trace-xyz"):
path = tmp_path / f"{trace_id}.jsonl"
path.write_text(
'{"step": 1, "action": "search", "decision": "initial query", '
'"timestamp": "2026-04-08T00:00:00Z", "query": "utah crops"}\n'
'{"step": 2, "action": "fetch_url", "decision": "promising source", '
'"timestamp": "2026-04-08T00:00:01Z", "url": "https://example.com", '
'"content_hash": "sha256:deadbeef"}\n'
'{"step": 3, "action": "synthesize", "decision": "have enough", '
'"timestamp": "2026-04-08T00:00:02Z"}\n'
)
return path
def test_replay_renders_trace(self, tmp_path):
runner = CliRunner()
self._write_trace(tmp_path)
result = runner.invoke(
cli,
["replay", "trace-xyz", "--trace-dir", str(tmp_path)],
)
assert result.exit_code == 0, result.output
assert "trace-xyz" in result.output
assert "search" in result.output
assert "fetch_url" in result.output
assert "synthesize" in result.output
assert "sha256:deadbeef" in result.output
assert "utah crops" in result.output
def test_replay_unknown_trace_id(self, tmp_path):
runner = CliRunner()
result = runner.invoke(
cli,
["replay", "missing-id", "--trace-dir", str(tmp_path)],
)
assert result.exit_code == 1
assert "no trace file found" in result.output
def test_replay_invalid_json(self, tmp_path):
runner = CliRunner()
(tmp_path / "broken.jsonl").write_text("{not json}\n")
result = runner.invoke(
cli,
["replay", "broken", "--trace-dir", str(tmp_path)],
)
assert result.exit_code == 1
assert "invalid JSON" in result.output
def test_render_trace_empty(self):
console = Console(record=True, width=120)
render_trace([], "empty-trace", console)
out = console.export_text()
assert "empty-trace" in out
assert "empty" in out.lower()