"""Marchwarden CLI shim. Talks to the web researcher MCP server over stdio and pretty-prints ResearchResult contracts to the terminal. """ import asyncio import json import os import re import sys from collections import defaultdict from datetime import datetime, timedelta, timezone from pathlib import Path from typing import Optional import click from mcp import ClientSession, StdioServerParameters from mcp.client.stdio import stdio_client from rich.console import Console from rich.panel import Panel from rich.table import Table from rich.text import Text from obs import configure_logging, get_logger from obs.costs import DEFAULT_LEDGER_PATH from researchers.web.models import ResearchResult DEFAULT_TRACE_DIR = "~/.marchwarden/traces" log = get_logger("marchwarden.cli") # --------------------------------------------------------------------------- # MCP client # --------------------------------------------------------------------------- async def call_research_tool( question: str, depth: str, max_iterations: Optional[int], token_budget: Optional[int], ) -> ResearchResult: """Spawn the web researcher MCP server and call its `research` tool. ``max_iterations`` and ``token_budget`` are optional — when None, the MCP server uses the depth preset (Issue #30). """ params = StdioServerParameters( command=sys.executable, args=["-m", "researchers.web.server"], env=os.environ.copy(), ) arguments: dict = {"question": question, "depth": depth} if max_iterations is not None: arguments["max_iterations"] = max_iterations if token_budget is not None: arguments["token_budget"] = token_budget async with stdio_client(params) as (read, write): async with ClientSession(read, write) as session: await session.initialize() result = await session.call_tool("research", arguments=arguments) # FastMCP returns the tool's string return as a TextContent block. payload = result.content[0].text return ResearchResult.model_validate_json(payload) # --------------------------------------------------------------------------- # Pretty printing # --------------------------------------------------------------------------- def render_result(result: ResearchResult, console: Console) -> None: """Render a ResearchResult to the console using rich.""" # Answer console.print( Panel( result.answer, title="[bold cyan]Answer[/bold cyan]", border_style="cyan", ) ) # Citations if result.citations: table = Table(title="Citations", show_lines=True, expand=True) table.add_column("#", style="dim", width=3) table.add_column("Title / Locator", overflow="fold") table.add_column("Excerpt", overflow="fold") table.add_column("Conf", justify="right", width=5) for i, c in enumerate(result.citations, 1): header = f"[bold]{c.title or c.locator}[/bold]\n[dim]{c.locator}[/dim]" table.add_row(str(i), header, c.raw_excerpt, f"{c.confidence:.2f}") console.print(table) else: console.print("[dim]No citations.[/dim]") # Gaps grouped by category if result.gaps: gap_table = Table(title="Gaps", show_lines=True, expand=True) gap_table.add_column("Category", style="yellow") gap_table.add_column("Topic") gap_table.add_column("Detail", overflow="fold") for g in result.gaps: gap_table.add_row(g.category.value, g.topic, g.detail) console.print(gap_table) # Discovery events if result.discovery_events: de_table = Table(title="Discovery Events", show_lines=True, expand=True) de_table.add_column("Type", style="magenta") de_table.add_column("Suggested Researcher") de_table.add_column("Query", overflow="fold") de_table.add_column("Reason", overflow="fold") for d in result.discovery_events: de_table.add_row( d.type, d.suggested_researcher or "-", d.query, d.reason ) console.print(de_table) # Open questions if result.open_questions: oq_table = Table(title="Open Questions", show_lines=True, expand=True) oq_table.add_column("Priority", style="green") oq_table.add_column("Question", overflow="fold") oq_table.add_column("Context", overflow="fold") for q in result.open_questions: oq_table.add_row(q.priority, q.question, q.context) console.print(oq_table) # Confidence + factors cf = result.confidence_factors conf_text = Text() conf_text.append(f"Overall: {result.confidence:.2f}\n", style="bold") conf_text.append(f"Corroborating sources: {cf.num_corroborating_sources}\n") conf_text.append(f"Source authority: {cf.source_authority}\n") conf_text.append(f"Contradiction detected: {cf.contradiction_detected}\n") conf_text.append(f"Query specificity match: {cf.query_specificity_match:.2f}\n") budget_status = "spent" if cf.budget_exhausted else "under cap" conf_text.append(f"Budget status: {budget_status}\n") conf_text.append(f"Recency: {cf.recency or 'unknown'}") console.print(Panel(conf_text, title="Confidence", border_style="green")) # Cost cm = result.cost_metadata cost_text = Text() cost_text.append(f"Tokens: {cm.tokens_used}\n") cost_text.append(f"Iterations: {cm.iterations_run}\n") cost_text.append(f"Wall time: {cm.wall_time_sec:.2f}s\n") cost_text.append(f"Model: {cm.model_id}") console.print(Panel(cost_text, title="Cost", border_style="blue")) # Trace footer console.print(f"\n[dim]trace_id: {result.trace_id}[/dim]") # --------------------------------------------------------------------------- # Click app # --------------------------------------------------------------------------- @click.group() def cli() -> None: """Marchwarden — agentic research CLI.""" configure_logging() @cli.command() @click.argument("question") @click.option( "--depth", type=click.Choice(["shallow", "balanced", "deep"]), default="balanced", show_default=True, ) @click.option( "--budget", "token_budget", type=int, default=None, help="Token budget for the research loop. Overrides the depth preset.", ) @click.option( "--max-iterations", type=int, default=None, help="Max research loop iterations. Overrides the depth preset.", ) def ask( question: str, depth: str, token_budget: Optional[int], max_iterations: Optional[int], ) -> None: """Ask the web researcher a QUESTION.""" console = Console() console.print(f"[dim]Researching:[/dim] {question}\n") log.info( "ask_started", question=question, depth=depth, max_iterations=max_iterations, token_budget=token_budget, ) try: result = asyncio.run( call_research_tool( question=question, depth=depth, max_iterations=max_iterations, token_budget=token_budget, ) ) except Exception as e: log.error("ask_failed", question=question, error=str(e), exc_info=True) console.print(f"[bold red]Error:[/bold red] {e}") sys.exit(1) log.info( "ask_completed", trace_id=result.trace_id, confidence=result.confidence, citations=len(result.citations), tokens_used=result.cost_metadata.tokens_used, wall_time_sec=result.cost_metadata.wall_time_sec, ) render_result(result, console) def _resolve_trace_path(trace_id: str, trace_dir: Optional[str]) -> Path: """Resolve the JSONL path for a trace_id.""" base = Path(os.path.expanduser(trace_dir or DEFAULT_TRACE_DIR)) return base / f"{trace_id}.jsonl" def render_trace(entries: list[dict], trace_id: str, console: Console) -> None: """Pretty-print a list of trace entries.""" console.print( Panel( f"[bold]trace_id:[/bold] {trace_id}\n[bold]steps:[/bold] {len(entries)}", title="[cyan]Replay[/cyan]", border_style="cyan", ) ) if not entries: console.print("[dim]Trace file is empty.[/dim]") return table = Table(show_lines=True, expand=True) table.add_column("#", style="dim", width=4) table.add_column("Action", style="magenta") table.add_column("Decision", overflow="fold") table.add_column("Details", overflow="fold") table.add_column("Hash", style="dim", overflow="fold") reserved = {"step", "action", "decision", "timestamp", "content_hash"} for e in entries: step = str(e.get("step", "?")) action = str(e.get("action", "")) decision = str(e.get("decision", "")) content_hash = str(e.get("content_hash", "") or "") extras = {k: v for k, v in e.items() if k not in reserved} details = "\n".join(f"{k}: {v}" for k, v in extras.items()) table.add_row(step, action, decision, details, content_hash) console.print(table) @cli.command() @click.argument("trace_id") @click.option( "--trace-dir", default=None, help=f"Trace directory (default: {DEFAULT_TRACE_DIR}).", ) def replay(trace_id: str, trace_dir: Optional[str]) -> None: """Replay a prior research run by TRACE_ID.""" console = Console() path = _resolve_trace_path(trace_id, trace_dir) if not path.exists(): console.print( f"[bold red]Error:[/bold red] no trace file found for " f"trace_id [bold]{trace_id}[/bold] at {path}" ) sys.exit(1) entries: list[dict] = [] with open(path, "r", encoding="utf-8") as f: for lineno, line in enumerate(f, 1): line = line.strip() if not line: continue try: entries.append(json.loads(line)) except json.JSONDecodeError as e: console.print( f"[bold red]Error:[/bold red] invalid JSON on line {lineno}: {e}" ) sys.exit(1) render_trace(entries, trace_id, console) # Issue #54: if the agent persisted a sibling .result.json, render # the full structured ResearchResult underneath the step log so # replay can show which gaps fired, which sources were cited, etc. result_path = path.parent / f"{trace_id}.result.json" if result_path.exists(): try: result = ResearchResult.model_validate_json( result_path.read_text(encoding="utf-8") ) except Exception as exc: console.print( f"[yellow]warning:[/yellow] could not parse {result_path.name}: {exc}" ) else: console.print() render_result(result, console) else: console.print( "[dim]No persisted result file alongside this trace.[/dim]" ) # --------------------------------------------------------------------------- # costs command # --------------------------------------------------------------------------- _RELATIVE_RE = re.compile(r"^(\d+)([dwhm])$") def _parse_when(value: str) -> datetime: """Parse an ISO date or a relative shorthand like '7d', '24h'.""" m = _RELATIVE_RE.match(value) if m: n = int(m.group(1)) unit = m.group(2) delta = { "h": timedelta(hours=n), "d": timedelta(days=n), "w": timedelta(weeks=n), "m": timedelta(days=30 * n), }[unit] return datetime.now(timezone.utc) - delta # Otherwise treat as ISO date / datetime dt = datetime.fromisoformat(value) if dt.tzinfo is None: dt = dt.replace(tzinfo=timezone.utc) return dt def _load_ledger(path: Path) -> list[dict]: if not path.exists(): return [] entries: list[dict] = [] with open(path, "r", encoding="utf-8") as f: for lineno, line in enumerate(f, 1): line = line.strip() if not line: continue try: entries.append(json.loads(line)) except json.JSONDecodeError: # Skip a corrupt line rather than blow up the whole report continue return entries def _filter_entries( entries: list[dict], since: Optional[datetime], until: Optional[datetime], model: Optional[str], ) -> list[dict]: out = [] for e in entries: ts_str = e.get("timestamp", "") try: ts = datetime.fromisoformat(ts_str.replace("Z", "+00:00")) except ValueError: continue if since and ts < since: continue if until and ts > until: continue if model and e.get("model_id") != model: continue out.append(e) return out def render_costs(entries: list[dict], console: Console) -> None: """Render a cost summary from filtered ledger entries.""" if not entries: console.print("[dim]No cost data yet.[/dim]") return total_calls = len(entries) total_tokens = sum(e.get("tokens_used", 0) for e in entries) total_input = sum(e.get("tokens_input") or 0 for e in entries) total_output = sum(e.get("tokens_output") or 0 for e in entries) total_tavily = sum(e.get("tavily_searches", 0) for e in entries) total_spend = sum( e.get("estimated_cost_usd") or 0.0 for e in entries ) unknown_cost_calls = sum( 1 for e in entries if e.get("estimated_cost_usd") is None ) # Summary panel summary = Text() summary.append(f"Calls: {total_calls}\n", style="bold") summary.append(f"Total spend: ${total_spend:.4f}\n", style="bold green") summary.append(f"Total tokens: {total_tokens:,} ") summary.append(f"(in {total_input:,} / out {total_output:,})\n", style="dim") summary.append(f"Tavily searches: {total_tavily}\n") if unknown_cost_calls: summary.append( f"Calls with unknown model price: {unknown_cost_calls}\n", style="yellow", ) console.print(Panel(summary, title="Cost Summary", border_style="green")) # Per-day breakdown per_day: dict[str, dict] = defaultdict(lambda: {"calls": 0, "tokens": 0, "spend": 0.0}) for e in entries: day = e.get("timestamp", "")[:10] per_day[day]["calls"] += 1 per_day[day]["tokens"] += e.get("tokens_used", 0) per_day[day]["spend"] += e.get("estimated_cost_usd") or 0.0 day_table = Table(title="Per Day", show_lines=False, expand=True) day_table.add_column("Date", style="dim") day_table.add_column("Calls", justify="right") day_table.add_column("Tokens", justify="right") day_table.add_column("Spend (USD)", justify="right", style="green") for day in sorted(per_day.keys()): d = per_day[day] day_table.add_row( day, str(d["calls"]), f"{d['tokens']:,}", f"${d['spend']:.4f}" ) console.print(day_table) # Per-model breakdown per_model: dict[str, dict] = defaultdict( lambda: {"calls": 0, "tokens": 0, "spend": 0.0} ) for e in entries: m = e.get("model_id", "(unknown)") per_model[m]["calls"] += 1 per_model[m]["tokens"] += e.get("tokens_used", 0) per_model[m]["spend"] += e.get("estimated_cost_usd") or 0.0 model_table = Table(title="Per Model", show_lines=False, expand=True) model_table.add_column("Model") model_table.add_column("Calls", justify="right") model_table.add_column("Tokens", justify="right") model_table.add_column("Spend (USD)", justify="right", style="green") for m in sorted(per_model.keys()): d = per_model[m] model_table.add_row( m, str(d["calls"]), f"{d['tokens']:,}", f"${d['spend']:.4f}" ) console.print(model_table) # Highest-cost call costed = [e for e in entries if e.get("estimated_cost_usd") is not None] if costed: top = max(costed, key=lambda e: e["estimated_cost_usd"]) top_text = Text() top_text.append(f"trace_id: {top.get('trace_id', '?')}\n") top_text.append(f"question: {top.get('question', '')[:120]}\n") top_text.append(f"model: {top.get('model_id', '?')}\n") top_text.append(f"tokens: {top.get('tokens_used', 0):,}\n") top_text.append( f"spend: ${top.get('estimated_cost_usd', 0):.4f}\n", style="bold green", ) console.print( Panel(top_text, title="Highest-Cost Call", border_style="yellow") ) @cli.command() @click.option( "--since", default=None, help="Filter by start time. ISO date or relative (e.g. 7d, 24h, 2w).", ) @click.option( "--until", default=None, help="Filter by end time. ISO date or relative.", ) @click.option( "--model", default=None, help="Filter to a specific model_id.", ) @click.option( "--json", "as_json", is_flag=True, default=False, help="Emit raw filtered ledger entries as JSON instead of the table.", ) @click.option( "--ledger", default=None, help=f"Override ledger path (default: {DEFAULT_LEDGER_PATH}).", ) def costs( since: Optional[str], until: Optional[str], model: Optional[str], as_json: bool, ledger: Optional[str], ) -> None: """Show cost summary from the research ledger.""" console = Console() path = Path(os.path.expanduser(ledger or DEFAULT_LEDGER_PATH)) entries = _load_ledger(path) since_dt = _parse_when(since) if since else None until_dt = _parse_when(until) if until else None filtered = _filter_entries(entries, since_dt, until_dt, model) if as_json: for e in filtered: click.echo(json.dumps(e)) return render_costs(filtered, console) # --------------------------------------------------------------------------- # arxiv subgroup (M5.1.1) # --------------------------------------------------------------------------- @cli.group() def arxiv() -> None: """Manage the local arxiv-rag corpus. Sub-commands let you ingest arxiv papers, list what's indexed, and inspect individual entries. Retrieval and search ship in #39+. """ @arxiv.command("add") @click.argument("arxiv_ids", nargs=-1, required=True) @click.option( "--embedding-model", default=None, help=( "Override embedding model. Defaults to " "$MARCHWARDEN_ARXIV_EMBED_MODEL or nomic-ai/nomic-embed-text-v1.5." ), ) def arxiv_add(arxiv_ids: tuple[str, ...], embedding_model: Optional[str]) -> None: """Download, extract, embed, and index one or more arxiv papers by ID.""" # Imported lazily so the CLI doesn't pay the chromadb / torch import # cost on every invocation — only when the user actually runs an # arxiv command. from researchers.arxiv.ingest import DEFAULT_EMBEDDING_MODEL, ingest from researchers.arxiv.store import ArxivStore console = Console() store = ArxivStore() model = embedding_model or DEFAULT_EMBEDDING_MODEL for arxiv_id in arxiv_ids: console.print(f"[dim]Ingesting:[/dim] {arxiv_id} (model={model})") try: record = ingest(arxiv_id, store=store, model_name=model) except Exception as exc: console.print(f"[bold red]Failed:[/bold red] {arxiv_id}: {exc}") continue console.print( f" -> [green]ok[/green] {record.title or '(no title)'} " f"({record.chunks_indexed} chunks)" ) @arxiv.command("list") def arxiv_list() -> None: """Show all indexed arxiv papers.""" from researchers.arxiv.store import ArxivStore console = Console() store = ArxivStore() papers = store.list_papers() if not papers: console.print( "[dim]No papers indexed yet. Use[/dim] " "[bold]marchwarden arxiv add [/bold]" ) return table = Table(title=f"Indexed papers ({len(papers)})", show_lines=False, expand=True) table.add_column("arxiv_id", style="cyan") table.add_column("Title", overflow="fold") table.add_column("Year", justify="right", width=6) table.add_column("Chunks", justify="right", width=6) table.add_column("Model", overflow="fold") for p in papers: table.add_row( p.arxiv_id, p.title or "(no title)", str(p.year) if p.year else "—", str(p.chunks_indexed), p.embedding_model, ) console.print(table) @arxiv.command("info") @click.argument("arxiv_id") def arxiv_info(arxiv_id: str) -> None: """Show metadata + chunk count for one indexed paper.""" from researchers.arxiv.store import ArxivStore console = Console() store = ArxivStore() record = store.get_paper(arxiv_id) if record is None: console.print( f"[bold red]Not indexed:[/bold red] {arxiv_id}. " f"Use [bold]marchwarden arxiv add {arxiv_id}[/bold]." ) sys.exit(1) text = Text() text.append(f"arxiv_id: {record.arxiv_id}\n", style="bold") text.append(f"title: {record.title or '(none)'}\n") text.append(f"authors: {', '.join(record.authors) or '(none)'}\n") text.append(f"year: {record.year or '(unknown)'}\n") text.append(f"category: {record.category or '(unknown)'}\n") text.append(f"chunks: {record.chunks_indexed}\n") text.append(f"embedding_model: {record.embedding_model}\n") text.append(f"added_at: {record.added_at}\n") console.print(Panel(text, title=arxiv_id, border_style="cyan")) @arxiv.command("remove") @click.argument("arxiv_id") def arxiv_remove(arxiv_id: str) -> None: """Drop one paper from the manifest and chromadb collection.""" from researchers.arxiv.store import ArxivStore console = Console() store = ArxivStore() chunks_removed = store.delete_paper(arxiv_id) in_manifest = store.remove_paper(arxiv_id) if not in_manifest and chunks_removed == 0: console.print(f"[yellow]Not found:[/yellow] {arxiv_id}") sys.exit(1) console.print( f"[green]Removed[/green] {arxiv_id} " f"({chunks_removed} chunks dropped)" ) if __name__ == "__main__": cli()