marchwarden/cli/main.py

303 lines
9.7 KiB
Python
Raw Permalink Normal View History

"""Marchwarden CLI shim.
Talks to the web researcher MCP server over stdio and pretty-prints
ResearchResult contracts to the terminal.
"""
import asyncio
import json
import os
import sys
from pathlib import Path
from typing import Optional
import click
from mcp import ClientSession, StdioServerParameters
from mcp.client.stdio import stdio_client
from rich.console import Console
from rich.panel import Panel
from rich.table import Table
from rich.text import Text
M2.5.1: Structured application logger via structlog (#24) Adds an operational logging layer separate from the JSONL trace audit logs. Operational logs cover system events (startup, errors, MCP transport, research lifecycle); JSONL traces remain the researcher provenance audit trail. Backend: structlog with two renderers selectable via MARCHWARDEN_LOG_FORMAT (json|console). Defaults to console when stderr is a TTY, json otherwise — so dev runs are human-readable and shipped runs (containers, automation) emit OpenSearch-ready JSON without configuration. Key features: - Named loggers per component: marchwarden.cli, marchwarden.mcp, marchwarden.researcher.web - MARCHWARDEN_LOG_LEVEL controls global level (default INFO) - MARCHWARDEN_LOG_FILE=1 enables a 10MB-rotating file at ~/.marchwarden/logs/marchwarden.log - structlog contextvars bind trace_id + researcher at the start of each research() call so every downstream log line carries them automatically; cleared on completion - stdlib logging is funneled through the same pipeline so noisy third-party loggers (httpx, anthropic) get the same formatting and quieted to WARN unless DEBUG is requested - Logs to stderr to keep MCP stdio stdout clean Wired into: - cli.main.cli — configures logging on startup, logs ask_started/ ask_completed/ask_failed - researchers.web.server.main — configures logging on startup, logs mcp_server_starting - researchers.web.agent.research — binds trace context, logs research_started/research_completed Tests verify JSON and console formats, contextvar propagation, level filtering, idempotency, and auto-configure-on-first-use. 94/94 tests passing. Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
2026-04-08 21:46:51 +00:00
from obs import configure_logging, get_logger
from researchers.web.models import ResearchResult
DEFAULT_TRACE_DIR = "~/.marchwarden/traces"
M2.5.1: Structured application logger via structlog (#24) Adds an operational logging layer separate from the JSONL trace audit logs. Operational logs cover system events (startup, errors, MCP transport, research lifecycle); JSONL traces remain the researcher provenance audit trail. Backend: structlog with two renderers selectable via MARCHWARDEN_LOG_FORMAT (json|console). Defaults to console when stderr is a TTY, json otherwise — so dev runs are human-readable and shipped runs (containers, automation) emit OpenSearch-ready JSON without configuration. Key features: - Named loggers per component: marchwarden.cli, marchwarden.mcp, marchwarden.researcher.web - MARCHWARDEN_LOG_LEVEL controls global level (default INFO) - MARCHWARDEN_LOG_FILE=1 enables a 10MB-rotating file at ~/.marchwarden/logs/marchwarden.log - structlog contextvars bind trace_id + researcher at the start of each research() call so every downstream log line carries them automatically; cleared on completion - stdlib logging is funneled through the same pipeline so noisy third-party loggers (httpx, anthropic) get the same formatting and quieted to WARN unless DEBUG is requested - Logs to stderr to keep MCP stdio stdout clean Wired into: - cli.main.cli — configures logging on startup, logs ask_started/ ask_completed/ask_failed - researchers.web.server.main — configures logging on startup, logs mcp_server_starting - researchers.web.agent.research — binds trace context, logs research_started/research_completed Tests verify JSON and console formats, contextvar propagation, level filtering, idempotency, and auto-configure-on-first-use. 94/94 tests passing. Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
2026-04-08 21:46:51 +00:00
log = get_logger("marchwarden.cli")
# ---------------------------------------------------------------------------
# MCP client
# ---------------------------------------------------------------------------
async def call_research_tool(
question: str,
depth: str,
max_iterations: int,
token_budget: int,
) -> ResearchResult:
"""Spawn the web researcher MCP server and call its `research` tool."""
params = StdioServerParameters(
command=sys.executable,
args=["-m", "researchers.web.server"],
env=os.environ.copy(),
)
async with stdio_client(params) as (read, write):
async with ClientSession(read, write) as session:
await session.initialize()
result = await session.call_tool(
"research",
arguments={
"question": question,
"depth": depth,
"max_iterations": max_iterations,
"token_budget": token_budget,
},
)
# FastMCP returns the tool's string return as a TextContent block.
payload = result.content[0].text
return ResearchResult.model_validate_json(payload)
# ---------------------------------------------------------------------------
# Pretty printing
# ---------------------------------------------------------------------------
def render_result(result: ResearchResult, console: Console) -> None:
"""Render a ResearchResult to the console using rich."""
# Answer
console.print(
Panel(
result.answer,
title="[bold cyan]Answer[/bold cyan]",
border_style="cyan",
)
)
# Citations
if result.citations:
table = Table(title="Citations", show_lines=True, expand=True)
table.add_column("#", style="dim", width=3)
table.add_column("Title / Locator", overflow="fold")
table.add_column("Excerpt", overflow="fold")
table.add_column("Conf", justify="right", width=5)
for i, c in enumerate(result.citations, 1):
header = f"[bold]{c.title or c.locator}[/bold]\n[dim]{c.locator}[/dim]"
table.add_row(str(i), header, c.raw_excerpt, f"{c.confidence:.2f}")
console.print(table)
else:
console.print("[dim]No citations.[/dim]")
# Gaps grouped by category
if result.gaps:
gap_table = Table(title="Gaps", show_lines=True, expand=True)
gap_table.add_column("Category", style="yellow")
gap_table.add_column("Topic")
gap_table.add_column("Detail", overflow="fold")
for g in result.gaps:
gap_table.add_row(g.category.value, g.topic, g.detail)
console.print(gap_table)
# Discovery events
if result.discovery_events:
de_table = Table(title="Discovery Events", show_lines=True, expand=True)
de_table.add_column("Type", style="magenta")
de_table.add_column("Suggested Researcher")
de_table.add_column("Query", overflow="fold")
de_table.add_column("Reason", overflow="fold")
for d in result.discovery_events:
de_table.add_row(
d.type, d.suggested_researcher or "-", d.query, d.reason
)
console.print(de_table)
# Open questions
if result.open_questions:
oq_table = Table(title="Open Questions", show_lines=True, expand=True)
oq_table.add_column("Priority", style="green")
oq_table.add_column("Question", overflow="fold")
oq_table.add_column("Context", overflow="fold")
for q in result.open_questions:
oq_table.add_row(q.priority, q.question, q.context)
console.print(oq_table)
# Confidence + factors
cf = result.confidence_factors
conf_text = Text()
conf_text.append(f"Overall: {result.confidence:.2f}\n", style="bold")
conf_text.append(f"Corroborating sources: {cf.num_corroborating_sources}\n")
conf_text.append(f"Source authority: {cf.source_authority}\n")
conf_text.append(f"Contradiction detected: {cf.contradiction_detected}\n")
conf_text.append(f"Query specificity match: {cf.query_specificity_match:.2f}\n")
conf_text.append(f"Budget exhausted: {cf.budget_exhausted}\n")
conf_text.append(f"Recency: {cf.recency or 'unknown'}")
console.print(Panel(conf_text, title="Confidence", border_style="green"))
# Cost
cm = result.cost_metadata
cost_text = Text()
cost_text.append(f"Tokens: {cm.tokens_used}\n")
cost_text.append(f"Iterations: {cm.iterations_run}\n")
cost_text.append(f"Wall time: {cm.wall_time_sec:.2f}s\n")
cost_text.append(f"Model: {cm.model_id}")
console.print(Panel(cost_text, title="Cost", border_style="blue"))
# Trace footer
console.print(f"\n[dim]trace_id: {result.trace_id}[/dim]")
# ---------------------------------------------------------------------------
# Click app
# ---------------------------------------------------------------------------
@click.group()
def cli() -> None:
"""Marchwarden — agentic research CLI."""
M2.5.1: Structured application logger via structlog (#24) Adds an operational logging layer separate from the JSONL trace audit logs. Operational logs cover system events (startup, errors, MCP transport, research lifecycle); JSONL traces remain the researcher provenance audit trail. Backend: structlog with two renderers selectable via MARCHWARDEN_LOG_FORMAT (json|console). Defaults to console when stderr is a TTY, json otherwise — so dev runs are human-readable and shipped runs (containers, automation) emit OpenSearch-ready JSON without configuration. Key features: - Named loggers per component: marchwarden.cli, marchwarden.mcp, marchwarden.researcher.web - MARCHWARDEN_LOG_LEVEL controls global level (default INFO) - MARCHWARDEN_LOG_FILE=1 enables a 10MB-rotating file at ~/.marchwarden/logs/marchwarden.log - structlog contextvars bind trace_id + researcher at the start of each research() call so every downstream log line carries them automatically; cleared on completion - stdlib logging is funneled through the same pipeline so noisy third-party loggers (httpx, anthropic) get the same formatting and quieted to WARN unless DEBUG is requested - Logs to stderr to keep MCP stdio stdout clean Wired into: - cli.main.cli — configures logging on startup, logs ask_started/ ask_completed/ask_failed - researchers.web.server.main — configures logging on startup, logs mcp_server_starting - researchers.web.agent.research — binds trace context, logs research_started/research_completed Tests verify JSON and console formats, contextvar propagation, level filtering, idempotency, and auto-configure-on-first-use. 94/94 tests passing. Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
2026-04-08 21:46:51 +00:00
configure_logging()
@cli.command()
@click.argument("question")
@click.option(
"--depth",
type=click.Choice(["shallow", "balanced", "deep"]),
default="balanced",
show_default=True,
)
@click.option(
"--budget",
"token_budget",
type=int,
default=20_000,
show_default=True,
help="Token budget for the research loop.",
)
@click.option(
"--max-iterations",
type=int,
default=5,
show_default=True,
)
def ask(
question: str,
depth: str,
token_budget: int,
max_iterations: int,
) -> None:
"""Ask the web researcher a QUESTION."""
console = Console()
console.print(f"[dim]Researching:[/dim] {question}\n")
M2.5.1: Structured application logger via structlog (#24) Adds an operational logging layer separate from the JSONL trace audit logs. Operational logs cover system events (startup, errors, MCP transport, research lifecycle); JSONL traces remain the researcher provenance audit trail. Backend: structlog with two renderers selectable via MARCHWARDEN_LOG_FORMAT (json|console). Defaults to console when stderr is a TTY, json otherwise — so dev runs are human-readable and shipped runs (containers, automation) emit OpenSearch-ready JSON without configuration. Key features: - Named loggers per component: marchwarden.cli, marchwarden.mcp, marchwarden.researcher.web - MARCHWARDEN_LOG_LEVEL controls global level (default INFO) - MARCHWARDEN_LOG_FILE=1 enables a 10MB-rotating file at ~/.marchwarden/logs/marchwarden.log - structlog contextvars bind trace_id + researcher at the start of each research() call so every downstream log line carries them automatically; cleared on completion - stdlib logging is funneled through the same pipeline so noisy third-party loggers (httpx, anthropic) get the same formatting and quieted to WARN unless DEBUG is requested - Logs to stderr to keep MCP stdio stdout clean Wired into: - cli.main.cli — configures logging on startup, logs ask_started/ ask_completed/ask_failed - researchers.web.server.main — configures logging on startup, logs mcp_server_starting - researchers.web.agent.research — binds trace context, logs research_started/research_completed Tests verify JSON and console formats, contextvar propagation, level filtering, idempotency, and auto-configure-on-first-use. 94/94 tests passing. Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
2026-04-08 21:46:51 +00:00
log.info(
"ask_started",
question=question,
depth=depth,
max_iterations=max_iterations,
token_budget=token_budget,
)
try:
result = asyncio.run(
call_research_tool(
question=question,
depth=depth,
max_iterations=max_iterations,
token_budget=token_budget,
)
)
except Exception as e:
M2.5.1: Structured application logger via structlog (#24) Adds an operational logging layer separate from the JSONL trace audit logs. Operational logs cover system events (startup, errors, MCP transport, research lifecycle); JSONL traces remain the researcher provenance audit trail. Backend: structlog with two renderers selectable via MARCHWARDEN_LOG_FORMAT (json|console). Defaults to console when stderr is a TTY, json otherwise — so dev runs are human-readable and shipped runs (containers, automation) emit OpenSearch-ready JSON without configuration. Key features: - Named loggers per component: marchwarden.cli, marchwarden.mcp, marchwarden.researcher.web - MARCHWARDEN_LOG_LEVEL controls global level (default INFO) - MARCHWARDEN_LOG_FILE=1 enables a 10MB-rotating file at ~/.marchwarden/logs/marchwarden.log - structlog contextvars bind trace_id + researcher at the start of each research() call so every downstream log line carries them automatically; cleared on completion - stdlib logging is funneled through the same pipeline so noisy third-party loggers (httpx, anthropic) get the same formatting and quieted to WARN unless DEBUG is requested - Logs to stderr to keep MCP stdio stdout clean Wired into: - cli.main.cli — configures logging on startup, logs ask_started/ ask_completed/ask_failed - researchers.web.server.main — configures logging on startup, logs mcp_server_starting - researchers.web.agent.research — binds trace context, logs research_started/research_completed Tests verify JSON and console formats, contextvar propagation, level filtering, idempotency, and auto-configure-on-first-use. 94/94 tests passing. Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
2026-04-08 21:46:51 +00:00
log.error("ask_failed", question=question, error=str(e), exc_info=True)
console.print(f"[bold red]Error:[/bold red] {e}")
sys.exit(1)
M2.5.1: Structured application logger via structlog (#24) Adds an operational logging layer separate from the JSONL trace audit logs. Operational logs cover system events (startup, errors, MCP transport, research lifecycle); JSONL traces remain the researcher provenance audit trail. Backend: structlog with two renderers selectable via MARCHWARDEN_LOG_FORMAT (json|console). Defaults to console when stderr is a TTY, json otherwise — so dev runs are human-readable and shipped runs (containers, automation) emit OpenSearch-ready JSON without configuration. Key features: - Named loggers per component: marchwarden.cli, marchwarden.mcp, marchwarden.researcher.web - MARCHWARDEN_LOG_LEVEL controls global level (default INFO) - MARCHWARDEN_LOG_FILE=1 enables a 10MB-rotating file at ~/.marchwarden/logs/marchwarden.log - structlog contextvars bind trace_id + researcher at the start of each research() call so every downstream log line carries them automatically; cleared on completion - stdlib logging is funneled through the same pipeline so noisy third-party loggers (httpx, anthropic) get the same formatting and quieted to WARN unless DEBUG is requested - Logs to stderr to keep MCP stdio stdout clean Wired into: - cli.main.cli — configures logging on startup, logs ask_started/ ask_completed/ask_failed - researchers.web.server.main — configures logging on startup, logs mcp_server_starting - researchers.web.agent.research — binds trace context, logs research_started/research_completed Tests verify JSON and console formats, contextvar propagation, level filtering, idempotency, and auto-configure-on-first-use. 94/94 tests passing. Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
2026-04-08 21:46:51 +00:00
log.info(
"ask_completed",
trace_id=result.trace_id,
confidence=result.confidence,
citations=len(result.citations),
tokens_used=result.cost_metadata.tokens_used,
wall_time_sec=result.cost_metadata.wall_time_sec,
)
render_result(result, console)
def _resolve_trace_path(trace_id: str, trace_dir: Optional[str]) -> Path:
"""Resolve the JSONL path for a trace_id."""
base = Path(os.path.expanduser(trace_dir or DEFAULT_TRACE_DIR))
return base / f"{trace_id}.jsonl"
def render_trace(entries: list[dict], trace_id: str, console: Console) -> None:
"""Pretty-print a list of trace entries."""
console.print(
Panel(
f"[bold]trace_id:[/bold] {trace_id}\n[bold]steps:[/bold] {len(entries)}",
title="[cyan]Replay[/cyan]",
border_style="cyan",
)
)
if not entries:
console.print("[dim]Trace file is empty.[/dim]")
return
table = Table(show_lines=True, expand=True)
table.add_column("#", style="dim", width=4)
table.add_column("Action", style="magenta")
table.add_column("Decision", overflow="fold")
table.add_column("Details", overflow="fold")
table.add_column("Hash", style="dim", overflow="fold")
reserved = {"step", "action", "decision", "timestamp", "content_hash"}
for e in entries:
step = str(e.get("step", "?"))
action = str(e.get("action", ""))
decision = str(e.get("decision", ""))
content_hash = str(e.get("content_hash", "") or "")
extras = {k: v for k, v in e.items() if k not in reserved}
details = "\n".join(f"{k}: {v}" for k, v in extras.items())
table.add_row(step, action, decision, details, content_hash)
console.print(table)
@cli.command()
@click.argument("trace_id")
@click.option(
"--trace-dir",
default=None,
help=f"Trace directory (default: {DEFAULT_TRACE_DIR}).",
)
def replay(trace_id: str, trace_dir: Optional[str]) -> None:
"""Replay a prior research run by TRACE_ID."""
console = Console()
path = _resolve_trace_path(trace_id, trace_dir)
if not path.exists():
console.print(
f"[bold red]Error:[/bold red] no trace file found for "
f"trace_id [bold]{trace_id}[/bold] at {path}"
)
sys.exit(1)
entries: list[dict] = []
with open(path, "r", encoding="utf-8") as f:
for lineno, line in enumerate(f, 1):
line = line.strip()
if not line:
continue
try:
entries.append(json.loads(line))
except json.JSONDecodeError as e:
console.print(
f"[bold red]Error:[/bold red] invalid JSON on line {lineno}: {e}"
)
sys.exit(1)
render_trace(entries, trace_id, console)
if __name__ == "__main__":
cli()