"""Tests for the marchwarden CLI.""" from unittest.mock import patch from click.testing import CliRunner from cli.main import cli, render_costs, render_result, render_trace from researchers.web.models import ( Citation, ConfidenceFactors, CostMetadata, DiscoveryEvent, Gap, GapCategory, OpenQuestion, ResearchResult, ) from rich.console import Console def _fixture_result() -> ResearchResult: return ResearchResult( answer="Tomatoes, peppers, squash, and beans grow well in Utah.", citations=[ Citation( source="web", locator="https://extension.usu.edu/yard-and-garden", title="USU Extension — Yard and Garden", snippet="USU recommends warm-season crops for Utah's climate.", raw_excerpt="Tomatoes, peppers, and squash thrive in Utah summers.", confidence=0.9, ), ], gaps=[ Gap( topic="Microclimate variation", category=GapCategory.SCOPE_EXCEEDED, detail="Did not investigate elevation-specific recommendations.", ), ], discovery_events=[ DiscoveryEvent( type="related_research", suggested_researcher="docs", query="Utah USDA hardiness zones", reason="Zone-specific guidance would improve answer.", ), ], open_questions=[ OpenQuestion( question="What are the best cool-season crops?", context="Answer focused on warm-season crops.", priority="medium", ), ], confidence=0.82, confidence_factors=ConfidenceFactors( num_corroborating_sources=3, source_authority="high", contradiction_detected=False, query_specificity_match=0.85, budget_exhausted=False, recency="current", ), cost_metadata=CostMetadata( tokens_used=4321, iterations_run=3, wall_time_sec=12.5, budget_exhausted=False, model_id="claude-sonnet-4-6", ), trace_id="trace-abc-123", ) class TestRenderResult: def test_renders_all_sections(self): console = Console(record=True, width=120) render_result(_fixture_result(), console) out = console.export_text() assert "Tomatoes" in out assert "USU Extension" in out assert "scope_exceeded" in out assert "related_research" in out assert "cool-season" in out assert "Confidence" in out assert "claude-sonnet-4-6" in out assert "trace-abc-123" in out class TestAskCommand: def test_ask_invokes_mcp_and_renders(self): runner = CliRunner() fixture = _fixture_result() async def fake_call(question, depth, max_iterations, token_budget): assert question == "What grows in Utah?" assert depth == "shallow" assert max_iterations == 2 assert token_budget == 5000 return fixture with patch("cli.main.call_research_tool", side_effect=fake_call): result = runner.invoke( cli, [ "ask", "What grows in Utah?", "--depth", "shallow", "--max-iterations", "2", "--budget", "5000", ], ) assert result.exit_code == 0, result.output assert "Tomatoes" in result.output assert "trace-abc-123" in result.output def test_ask_handles_error(self): runner = CliRunner() async def boom(**kwargs): raise RuntimeError("mcp went sideways") with patch("cli.main.call_research_tool", side_effect=boom): result = runner.invoke(cli, ["ask", "anything"]) assert result.exit_code == 1 assert "mcp went sideways" in result.output class TestReplayCommand: def _write_trace(self, tmp_path, trace_id="trace-xyz"): path = tmp_path / f"{trace_id}.jsonl" path.write_text( '{"step": 1, "action": "search", "decision": "initial query", ' '"timestamp": "2026-04-08T00:00:00Z", "query": "utah crops"}\n' '{"step": 2, "action": "fetch_url", "decision": "promising source", ' '"timestamp": "2026-04-08T00:00:01Z", "url": "https://example.com", ' '"content_hash": "sha256:deadbeef"}\n' '{"step": 3, "action": "synthesize", "decision": "have enough", ' '"timestamp": "2026-04-08T00:00:02Z"}\n' ) return path def test_replay_renders_trace(self, tmp_path): runner = CliRunner() self._write_trace(tmp_path) result = runner.invoke( cli, ["replay", "trace-xyz", "--trace-dir", str(tmp_path)], ) assert result.exit_code == 0, result.output assert "trace-xyz" in result.output assert "search" in result.output assert "fetch_url" in result.output assert "synthesize" in result.output assert "sha256:deadbeef" in result.output assert "utah crops" in result.output def test_replay_unknown_trace_id(self, tmp_path): runner = CliRunner() result = runner.invoke( cli, ["replay", "missing-id", "--trace-dir", str(tmp_path)], ) assert result.exit_code == 1 assert "no trace file found" in result.output def test_replay_invalid_json(self, tmp_path): runner = CliRunner() (tmp_path / "broken.jsonl").write_text("{not json}\n") result = runner.invoke( cli, ["replay", "broken", "--trace-dir", str(tmp_path)], ) assert result.exit_code == 1 assert "invalid JSON" in result.output def test_replay_renders_persisted_result(self, tmp_path): """Issue #54: replay loads .result.json sibling and renders it.""" runner = CliRunner() self._write_trace(tmp_path) result_payload = { "answer": "Test answer about Utah crops.", "citations": [ { "source": "web", "locator": "https://example.com/utah", "title": "Utah Guide", "snippet": None, "raw_excerpt": "raw excerpt content", "confidence": 0.9, } ], "gaps": [ { "topic": "irrigation", "category": "scope_exceeded", "detail": "out of scope", } ], "discovery_events": [], "open_questions": [], "confidence": 0.8, "confidence_factors": { "num_corroborating_sources": 2, "source_authority": "high", "contradiction_detected": False, "query_specificity_match": 0.8, "budget_exhausted": False, "recency": "current", }, "cost_metadata": { "tokens_used": 1000, "iterations_run": 2, "wall_time_sec": 12.5, "budget_exhausted": False, "model_id": "claude-test", }, "trace_id": "trace-xyz", } import json as _j (tmp_path / "trace-xyz.result.json").write_text(_j.dumps(result_payload)) result = runner.invoke( cli, ["replay", "trace-xyz", "--trace-dir", str(tmp_path)], ) assert result.exit_code == 0, result.output # Step log still rendered assert "search" in result.output # Persisted result also rendered assert "Test answer about Utah crops" in result.output assert "scope_exceeded" in result.output assert "irrigation" in result.output def test_replay_without_result_file_notes_absence(self, tmp_path): runner = CliRunner() self._write_trace(tmp_path) result = runner.invoke( cli, ["replay", "trace-xyz", "--trace-dir", str(tmp_path)], ) assert result.exit_code == 0 assert "No persisted result file" in result.output def test_render_trace_empty(self): console = Console(record=True, width=120) render_trace([], "empty-trace", console) out = console.export_text() assert "empty-trace" in out assert "empty" in out.lower() # --------------------------------------------------------------------------- # costs command # --------------------------------------------------------------------------- import json as _json def _write_ledger(path, entries): path.write_text("\n".join(_json.dumps(e) for e in entries) + "\n") def _ledger_fixture(tmp_path): path = tmp_path / "costs.jsonl" entries = [ { "timestamp": "2026-04-06T10:00:00Z", "trace_id": "t1", "question": "What is X?", "model_id": "claude-sonnet-4-6", "tokens_used": 1000, "tokens_input": 800, "tokens_output": 200, "iterations_run": 1, "wall_time_sec": 5.0, "tavily_searches": 1, "estimated_cost_usd": 0.005, "budget_exhausted": False, "confidence": 0.9, }, { "timestamp": "2026-04-07T11:00:00Z", "trace_id": "t2", "question": "Bigger query", "model_id": "claude-opus-4-6", "tokens_used": 50000, "tokens_input": 40000, "tokens_output": 10000, "iterations_run": 5, "wall_time_sec": 120.0, "tavily_searches": 8, "estimated_cost_usd": 1.25, "budget_exhausted": True, "confidence": 0.7, }, { "timestamp": "2026-04-08T12:00:00Z", "trace_id": "t3", "question": "Unknown model run", "model_id": "future-model-7", "tokens_used": 500, "tokens_input": 400, "tokens_output": 100, "iterations_run": 1, "wall_time_sec": 2.0, "tavily_searches": 0, "estimated_cost_usd": None, "budget_exhausted": False, "confidence": 0.5, }, ] _write_ledger(path, entries) return path class TestCostsCommand: def test_renders_summary(self, tmp_path): path = _ledger_fixture(tmp_path) runner = CliRunner() result = runner.invoke(cli, ["costs", "--ledger", str(path)]) assert result.exit_code == 0, result.output # Summary assert "Calls: 3" in result.output assert "$1.2550" in result.output # Per-day rows assert "2026-04-06" in result.output assert "2026-04-07" in result.output assert "2026-04-08" in result.output # Per-model rows assert "claude-sonnet-4-6" in result.output assert "claude-opus-4-6" in result.output # Highest-cost panel assert "t2" in result.output # Unknown model warning assert "unknown model price" in result.output def test_filter_by_model(self, tmp_path): path = _ledger_fixture(tmp_path) runner = CliRunner() result = runner.invoke( cli, ["costs", "--ledger", str(path), "--model", "claude-opus-4-6"], ) assert result.exit_code == 0 assert "Calls: 1" in result.output assert "claude-sonnet-4-6" not in result.output def test_filter_by_since_iso(self, tmp_path): path = _ledger_fixture(tmp_path) runner = CliRunner() result = runner.invoke( cli, ["costs", "--ledger", str(path), "--since", "2026-04-08"], ) assert result.exit_code == 0 assert "Calls: 1" in result.output assert "future-model-7" in result.output assert "claude-sonnet-4-6" not in result.output def test_json_output(self, tmp_path): path = _ledger_fixture(tmp_path) runner = CliRunner() result = runner.invoke( cli, ["costs", "--ledger", str(path), "--json"], ) assert result.exit_code == 0 lines = [l for l in result.output.strip().splitlines() if l] assert len(lines) == 3 first = _json.loads(lines[0]) assert first["trace_id"] == "t1" def test_empty_ledger(self, tmp_path): path = tmp_path / "missing.jsonl" runner = CliRunner() result = runner.invoke(cli, ["costs", "--ledger", str(path)]) assert result.exit_code == 0 assert "No cost data yet" in result.output def test_render_costs_handles_empty(self): console = Console(record=True, width=120) render_costs([], console) out = console.export_text() assert "No cost data yet" in out