From 8a62f6b014fac089fd7e4082b7e3b80ad16a2a45 Mon Sep 17 00:00:00 2001
From: Jeff Smith <jeff@unbiasedgeek.com>
Date: Wed, 8 Apr 2026 15:46:51 -0600
Subject: [PATCH] M2.5.1: Structured application logger via structlog (#24)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Adds an operational logging layer separate from the JSONL trace
audit logs. Operational logs cover system events (startup, errors,
MCP transport, research lifecycle); JSONL traces remain the
researcher provenance audit trail.

Backend: structlog with two renderers selectable via
MARCHWARDEN_LOG_FORMAT (json|console). Defaults to console when
stderr is a TTY, json otherwise — so dev runs are human-readable
and shipped runs (containers, automation) emit OpenSearch-ready
JSON without configuration.

Key features:
- Named loggers per component: marchwarden.cli,
  marchwarden.mcp, marchwarden.researcher.web
- MARCHWARDEN_LOG_LEVEL controls global level (default INFO)
- MARCHWARDEN_LOG_FILE=1 enables a 10MB-rotating file at
  ~/.marchwarden/logs/marchwarden.log
- structlog contextvars bind trace_id + researcher at the start
  of each research() call so every downstream log line carries
  them automatically; cleared on completion
- stdlib logging is funneled through the same pipeline so noisy
  third-party loggers (httpx, anthropic) get the same formatting
  and quieted to WARN unless DEBUG is requested
- Logs to stderr to keep MCP stdio stdout clean

Wired into:
- cli.main.cli — configures logging on startup, logs ask_started/
  ask_completed/ask_failed
- researchers.web.server.main — configures logging on startup,
  logs mcp_server_starting
- researchers.web.agent.research — binds trace context, logs
  research_started/research_completed

Tests verify JSON and console formats, contextvar propagation,
level filtering, idempotency, and auto-configure-on-first-use.
94/94 tests passing.

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
---
 cli/main.py               |  20 ++++++
 obs/__init__.py           | 130 ++++++++++++++++++++++++++++++++++++++
 pyproject.toml            |   3 +-
 researchers/web/agent.py  |  32 ++++++++++
 researchers/web/server.py |   5 ++
 tests/test_obs.py         | 128 +++++++++++++++++++++++++++++++++++++
 6 files changed, 317 insertions(+), 1 deletion(-)
 create mode 100644 obs/__init__.py
 create mode 100644 tests/test_obs.py

diff --git a/cli/main.py b/cli/main.py
index 69808e8..aa135c6 100644
--- a/cli/main.py
+++ b/cli/main.py
@@ -19,11 +19,14 @@ from rich.panel import Panel
 from rich.table import Table
 from rich.text import Text
 
+from obs import configure_logging, get_logger
 from researchers.web.models import ResearchResult
 
 
 DEFAULT_TRACE_DIR = "~/.marchwarden/traces"
 
+log = get_logger("marchwarden.cli")
+
 
 # ---------------------------------------------------------------------------
 # MCP client
@@ -155,6 +158,7 @@ def render_result(result: ResearchResult, console: Console) -> None:
 @click.group()
 def cli() -> None:
     """Marchwarden — agentic research CLI."""
+    configure_logging()
 
 
 @cli.command()
@@ -188,6 +192,13 @@ def ask(
     """Ask the web researcher a QUESTION."""
     console = Console()
     console.print(f"[dim]Researching:[/dim] {question}\n")
+    log.info(
+        "ask_started",
+        question=question,
+        depth=depth,
+        max_iterations=max_iterations,
+        token_budget=token_budget,
+    )
     try:
         result = asyncio.run(
             call_research_tool(
@@ -198,8 +209,17 @@ def ask(
             )
         )
     except Exception as e:
+        log.error("ask_failed", question=question, error=str(e), exc_info=True)
         console.print(f"[bold red]Error:[/bold red] {e}")
         sys.exit(1)
+    log.info(
+        "ask_completed",
+        trace_id=result.trace_id,
+        confidence=result.confidence,
+        citations=len(result.citations),
+        tokens_used=result.cost_metadata.tokens_used,
+        wall_time_sec=result.cost_metadata.wall_time_sec,
+    )
     render_result(result, console)
 
 
diff --git a/obs/__init__.py b/obs/__init__.py
new file mode 100644
index 0000000..a247e8d
--- /dev/null
+++ b/obs/__init__.py
@@ -0,0 +1,130 @@
+"""Marchwarden observability — structured application logging.
+
+Operational logs for administrators. NOT the same as the JSONL traces
+in `~/.marchwarden/traces/` — those are per-research-call audit logs
+for verifying provenance. These logs cover system events: startup,
+shutdown, errors, MCP transport activity, cost ledger writes.
+
+Format is structured-by-default (structlog) so logs can be ingested
+into OpenSearch or similar without per-call formatter ceremony.
+
+Usage:
+    from obs import configure_logging, get_logger
+
+    configure_logging()  # call once at process startup
+    log = get_logger("marchwarden.cli")
+    log.info("ask_started", question=question, depth=depth)
+
+    # Bind context that flows to every downstream log call:
+    log = log.bind(trace_id="abc-123", researcher="web")
+    log.info("research_started")  # automatically includes trace_id + researcher
+"""
+
+import logging
+import os
+import sys
+from logging.handlers import RotatingFileHandler
+from pathlib import Path
+from typing import Optional
+
+import structlog
+
+
+_CONFIGURED = False
+
+
+def _resolve_format() -> str:
+    """Pick the renderer: explicit env override, else auto-detect TTY."""
+    explicit = os.environ.get("MARCHWARDEN_LOG_FORMAT")
+    if explicit in {"json", "console"}:
+        return explicit
+    return "console" if sys.stderr.isatty() else "json"
+
+
+def _resolve_level() -> int:
+    name = os.environ.get("MARCHWARDEN_LOG_LEVEL", "INFO").upper()
+    return getattr(logging, name, logging.INFO)
+
+
+def configure_logging(force: bool = False) -> None:
+    """Configure structlog + stdlib logging once per process.
+
+    Idempotent — subsequent calls are no-ops unless ``force=True``.
+    Honors:
+      - MARCHWARDEN_LOG_LEVEL (default INFO)
+      - MARCHWARDEN_LOG_FORMAT (json|console; auto-detected from TTY if unset)
+      - MARCHWARDEN_LOG_FILE (truthy → also log to ~/.marchwarden/logs/marchwarden.log)
+    """
+    global _CONFIGURED
+    if _CONFIGURED and not force:
+        return
+
+    level = _resolve_level()
+    fmt = _resolve_format()
+
+    timestamper = structlog.processors.TimeStamper(fmt="iso", utc=True)
+
+    shared_processors = [
+        structlog.contextvars.merge_contextvars,
+        structlog.stdlib.add_logger_name,
+        structlog.stdlib.add_log_level,
+        timestamper,
+        structlog.processors.StackInfoRenderer(),
+        structlog.processors.format_exc_info,
+    ]
+
+    if fmt == "json":
+        renderer: structlog.types.Processor = structlog.processors.JSONRenderer()
+    else:
+        renderer = structlog.dev.ConsoleRenderer(colors=sys.stderr.isatty())
+
+    structlog.configure(
+        processors=shared_processors
+        + [structlog.stdlib.ProcessorFormatter.wrap_for_formatter],
+        logger_factory=structlog.stdlib.LoggerFactory(),
+        wrapper_class=structlog.make_filtering_bound_logger(level),
+        cache_logger_on_first_use=True,
+    )
+
+    formatter = structlog.stdlib.ProcessorFormatter(
+        foreign_pre_chain=shared_processors,
+        processors=[
+            structlog.stdlib.ProcessorFormatter.remove_processors_meta,
+            renderer,
+        ],
+    )
+
+    # Always log to stderr so MCP stdio stdout stays clean.
+    handler: logging.Handler = logging.StreamHandler(sys.stderr)
+    handler.setFormatter(formatter)
+
+    root = logging.getLogger()
+    # Replace handlers so re-configuration in tests works cleanly.
+    root.handlers = [handler]
+    root.setLevel(level)
+
+    if os.environ.get("MARCHWARDEN_LOG_FILE"):
+        log_dir = Path(os.path.expanduser("~/.marchwarden/logs"))
+        log_dir.mkdir(parents=True, exist_ok=True)
+        file_handler = RotatingFileHandler(
+            log_dir / "marchwarden.log",
+            maxBytes=10 * 1024 * 1024,
+            backupCount=5,
+            encoding="utf-8",
+        )
+        file_handler.setFormatter(formatter)
+        root.addHandler(file_handler)
+
+    # Quiet a few noisy third-party loggers unless DEBUG is requested.
+    if level > logging.DEBUG:
+        for noisy in ("httpx", "httpcore", "anthropic"):
+            logging.getLogger(noisy).setLevel(logging.WARNING)
+
+    _CONFIGURED = True
+
+
+def get_logger(name: str) -> structlog.stdlib.BoundLogger:
+    """Return a bound structlog logger. Configures logging on first call."""
+    if not _CONFIGURED:
+        configure_logging()
+    return structlog.get_logger(name)
diff --git a/pyproject.toml b/pyproject.toml
index 68f97b6..c2140c0 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -20,6 +20,7 @@ dependencies = [
     "httpx>=0.24.0",
     "click>=8.0",
     "rich>=13.0",
+    "structlog>=24.0",
 ]
 
 [project.optional-dependencies]
@@ -36,7 +37,7 @@ dev = [
 marchwarden = "cli.main:cli"
 
 [tool.setuptools.packages.find]
-include = ["researchers*", "orchestrator*", "cli*"]
+include = ["researchers*", "orchestrator*", "cli*", "obs*"]
 
 [tool.pytest.ini_options]
 testpaths = ["tests"]
diff --git a/researchers/web/agent.py b/researchers/web/agent.py
index 284b175..795580e 100644
--- a/researchers/web/agent.py
+++ b/researchers/web/agent.py
@@ -10,8 +10,10 @@ import json
 import time
 from typing import Optional
 
+import structlog
 from anthropic import Anthropic
 
+from obs import get_logger
 from researchers.web.models import (
     Citation,
     ConfidenceFactors,
@@ -26,6 +28,8 @@ from researchers.web.models import (
 from researchers.web.tools import SearchResult, fetch_url, tavily_search
 from researchers.web.trace import TraceLogger
 
+log = get_logger("marchwarden.researcher.web")
+
 SYSTEM_PROMPT = """\
 You are a Marchwarden — a research specialist stationed at the frontier of knowledge. \
 Your job is to investigate a question thoroughly using web search and URL fetching, \
@@ -201,6 +205,21 @@ class WebResearcher:
         evidence: list[dict] = []
         budget_exhausted = False
 
+        # Bind trace context so every downstream log call automatically
+        # carries trace_id and researcher. Cleared in the finally block.
+        structlog.contextvars.bind_contextvars(
+            trace_id=trace.trace_id,
+            researcher="web",
+        )
+        log.info(
+            "research_started",
+            question=question,
+            depth=depth,
+            max_iterations=constraints.max_iterations,
+            token_budget=constraints.token_budget,
+            model_id=self.model_id,
+        )
+
         trace.log_step(
             "start",
             decision=f"Beginning research: depth={depth}",
@@ -322,6 +341,19 @@ class WebResearcher:
         )
         trace.close()
 
+        log.info(
+            "research_completed",
+            confidence=result.confidence,
+            citations=len(result.citations),
+            gaps=len(result.gaps),
+            discovery_events=len(result.discovery_events),
+            tokens_used=result.cost_metadata.tokens_used,
+            iterations_run=result.cost_metadata.iterations_run,
+            wall_time_sec=result.cost_metadata.wall_time_sec,
+            budget_exhausted=result.cost_metadata.budget_exhausted,
+        )
+        structlog.contextvars.clear_contextvars()
+
         return result
 
     async def _execute_tool(
diff --git a/researchers/web/server.py b/researchers/web/server.py
index bce016c..9207d6c 100644
--- a/researchers/web/server.py
+++ b/researchers/web/server.py
@@ -11,9 +11,12 @@ from typing import Optional
 
 from mcp.server.fastmcp import FastMCP
 
+from obs import configure_logging, get_logger
 from researchers.web.agent import WebResearcher
 from researchers.web.models import ResearchConstraints
 
+log = get_logger("marchwarden.mcp")
+
 mcp = FastMCP(
     name="marchwarden-web-researcher",
     instructions=(
@@ -84,6 +87,8 @@ async def research(
 
 def main():
     """Run the MCP server on stdio."""
+    configure_logging()
+    log.info("mcp_server_starting", transport="stdio", server="marchwarden-web-researcher")
     mcp.run(transport="stdio")
 
 
diff --git a/tests/test_obs.py b/tests/test_obs.py
new file mode 100644
index 0000000..f204f6b
--- /dev/null
+++ b/tests/test_obs.py
@@ -0,0 +1,128 @@
+"""Tests for the obs (structured logging) module."""
+
+import io
+import json
+import logging
+import os
+from unittest.mock import patch
+
+import pytest
+import structlog
+
+from obs import configure_logging, get_logger
+
+
+@pytest.fixture(autouse=True)
+def reset_logging():
+    """Reset structlog + stdlib state between tests so configure_logging
+    is forced to re-run."""
+    import obs
+
+    obs._CONFIGURED = False
+    structlog.reset_defaults()
+    structlog.contextvars.clear_contextvars()
+    root = logging.getLogger()
+    root.handlers = []
+    root.setLevel(logging.WARNING)
+    yield
+    obs._CONFIGURED = False
+    structlog.reset_defaults()
+    structlog.contextvars.clear_contextvars()
+    root.handlers = []
+
+
+def _capture_stderr(monkeypatch):
+    buf = io.StringIO()
+    monkeypatch.setattr("sys.stderr", buf)
+    return buf
+
+
+class TestConfigureLogging:
+    def test_json_format_emits_json(self, monkeypatch):
+        monkeypatch.setenv("MARCHWARDEN_LOG_FORMAT", "json")
+        monkeypatch.setenv("MARCHWARDEN_LOG_LEVEL", "INFO")
+        buf = _capture_stderr(monkeypatch)
+
+        configure_logging(force=True)
+        log = get_logger("marchwarden.test")
+        log.info("hello", key="value", count=3)
+
+        line = buf.getvalue().strip()
+        assert line, "expected at least one log line"
+        parsed = json.loads(line)
+        assert parsed["event"] == "hello"
+        assert parsed["key"] == "value"
+        assert parsed["count"] == 3
+        assert parsed["level"] == "info"
+        assert parsed["logger"] == "marchwarden.test"
+        assert "timestamp" in parsed
+
+    def test_console_format_is_human_readable(self, monkeypatch):
+        monkeypatch.setenv("MARCHWARDEN_LOG_FORMAT", "console")
+        monkeypatch.setenv("MARCHWARDEN_LOG_LEVEL", "INFO")
+        buf = _capture_stderr(monkeypatch)
+
+        configure_logging(force=True)
+        log = get_logger("marchwarden.test")
+        log.info("greeting", who="world")
+
+        text = buf.getvalue()
+        assert "greeting" in text
+        assert "world" in text
+        # Console renderer is not JSON
+        with pytest.raises(json.JSONDecodeError):
+            json.loads(text.strip().splitlines()[-1])
+
+    def test_context_binding_propagates(self, monkeypatch):
+        monkeypatch.setenv("MARCHWARDEN_LOG_FORMAT", "json")
+        buf = _capture_stderr(monkeypatch)
+
+        configure_logging(force=True)
+        log = get_logger("marchwarden.test")
+        structlog.contextvars.bind_contextvars(trace_id="abc-123", researcher="web")
+        try:
+            log.info("step")
+        finally:
+            structlog.contextvars.clear_contextvars()
+
+        parsed = json.loads(buf.getvalue().strip())
+        assert parsed["trace_id"] == "abc-123"
+        assert parsed["researcher"] == "web"
+
+    def test_log_level_filters(self, monkeypatch):
+        monkeypatch.setenv("MARCHWARDEN_LOG_FORMAT", "json")
+        monkeypatch.setenv("MARCHWARDEN_LOG_LEVEL", "WARNING")
+        buf = _capture_stderr(monkeypatch)
+
+        configure_logging(force=True)
+        log = get_logger("marchwarden.test")
+        log.info("ignored")
+        log.warning("kept")
+
+        lines = [l for l in buf.getvalue().strip().splitlines() if l]
+        assert len(lines) == 1
+        assert json.loads(lines[0])["event"] == "kept"
+
+    def test_idempotent(self, monkeypatch):
+        monkeypatch.setenv("MARCHWARDEN_LOG_FORMAT", "json")
+        configure_logging(force=True)
+
+        import obs
+
+        obs._CONFIGURED = True
+        # Second call should be a no-op
+        before = logging.getLogger().handlers
+        configure_logging()
+        after = logging.getLogger().handlers
+        assert before is after
+
+    def test_get_logger_auto_configures(self, monkeypatch):
+        monkeypatch.setenv("MARCHWARDEN_LOG_FORMAT", "json")
+        buf = _capture_stderr(monkeypatch)
+
+        # No explicit configure_logging() call
+        log = get_logger("marchwarden.test")
+        log.info("auto")
+
+        parsed = json.loads(buf.getvalue().strip())
+        assert parsed["event"] == "auto"