M2.5.1: Structured application logger via structlog (#24)
Adds an operational logging layer separate from the JSONL trace audit logs. Operational logs cover system events (startup, errors, MCP transport, research lifecycle); JSONL traces remain the researcher provenance audit trail. Backend: structlog with two renderers selectable via MARCHWARDEN_LOG_FORMAT (json|console). Defaults to console when stderr is a TTY, json otherwise — so dev runs are human-readable and shipped runs (containers, automation) emit OpenSearch-ready JSON without configuration. Key features: - Named loggers per component: marchwarden.cli, marchwarden.mcp, marchwarden.researcher.web - MARCHWARDEN_LOG_LEVEL controls global level (default INFO) - MARCHWARDEN_LOG_FILE=1 enables a 10MB-rotating file at ~/.marchwarden/logs/marchwarden.log - structlog contextvars bind trace_id + researcher at the start of each research() call so every downstream log line carries them automatically; cleared on completion - stdlib logging is funneled through the same pipeline so noisy third-party loggers (httpx, anthropic) get the same formatting and quieted to WARN unless DEBUG is requested - Logs to stderr to keep MCP stdio stdout clean Wired into: - cli.main.cli — configures logging on startup, logs ask_started/ ask_completed/ask_failed - researchers.web.server.main — configures logging on startup, logs mcp_server_starting - researchers.web.agent.research — binds trace context, logs research_started/research_completed Tests verify JSON and console formats, contextvar propagation, level filtering, idempotency, and auto-configure-on-first-use. 94/94 tests passing. Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
parent
8293cbfb68
commit
8a62f6b014
6 changed files with 317 additions and 1 deletions
20
cli/main.py
20
cli/main.py
|
|
@ -19,11 +19,14 @@ from rich.panel import Panel
|
||||||
from rich.table import Table
|
from rich.table import Table
|
||||||
from rich.text import Text
|
from rich.text import Text
|
||||||
|
|
||||||
|
from obs import configure_logging, get_logger
|
||||||
from researchers.web.models import ResearchResult
|
from researchers.web.models import ResearchResult
|
||||||
|
|
||||||
|
|
||||||
DEFAULT_TRACE_DIR = "~/.marchwarden/traces"
|
DEFAULT_TRACE_DIR = "~/.marchwarden/traces"
|
||||||
|
|
||||||
|
log = get_logger("marchwarden.cli")
|
||||||
|
|
||||||
|
|
||||||
# ---------------------------------------------------------------------------
|
# ---------------------------------------------------------------------------
|
||||||
# MCP client
|
# MCP client
|
||||||
|
|
@ -155,6 +158,7 @@ def render_result(result: ResearchResult, console: Console) -> None:
|
||||||
@click.group()
|
@click.group()
|
||||||
def cli() -> None:
|
def cli() -> None:
|
||||||
"""Marchwarden — agentic research CLI."""
|
"""Marchwarden — agentic research CLI."""
|
||||||
|
configure_logging()
|
||||||
|
|
||||||
|
|
||||||
@cli.command()
|
@cli.command()
|
||||||
|
|
@ -188,6 +192,13 @@ def ask(
|
||||||
"""Ask the web researcher a QUESTION."""
|
"""Ask the web researcher a QUESTION."""
|
||||||
console = Console()
|
console = Console()
|
||||||
console.print(f"[dim]Researching:[/dim] {question}\n")
|
console.print(f"[dim]Researching:[/dim] {question}\n")
|
||||||
|
log.info(
|
||||||
|
"ask_started",
|
||||||
|
question=question,
|
||||||
|
depth=depth,
|
||||||
|
max_iterations=max_iterations,
|
||||||
|
token_budget=token_budget,
|
||||||
|
)
|
||||||
try:
|
try:
|
||||||
result = asyncio.run(
|
result = asyncio.run(
|
||||||
call_research_tool(
|
call_research_tool(
|
||||||
|
|
@ -198,8 +209,17 @@ def ask(
|
||||||
)
|
)
|
||||||
)
|
)
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
|
log.error("ask_failed", question=question, error=str(e), exc_info=True)
|
||||||
console.print(f"[bold red]Error:[/bold red] {e}")
|
console.print(f"[bold red]Error:[/bold red] {e}")
|
||||||
sys.exit(1)
|
sys.exit(1)
|
||||||
|
log.info(
|
||||||
|
"ask_completed",
|
||||||
|
trace_id=result.trace_id,
|
||||||
|
confidence=result.confidence,
|
||||||
|
citations=len(result.citations),
|
||||||
|
tokens_used=result.cost_metadata.tokens_used,
|
||||||
|
wall_time_sec=result.cost_metadata.wall_time_sec,
|
||||||
|
)
|
||||||
render_result(result, console)
|
render_result(result, console)
|
||||||
|
|
||||||
|
|
||||||
|
|
|
||||||
130
obs/__init__.py
Normal file
130
obs/__init__.py
Normal file
|
|
@ -0,0 +1,130 @@
|
||||||
|
"""Marchwarden observability — structured application logging.
|
||||||
|
|
||||||
|
Operational logs for administrators. NOT the same as the JSONL traces
|
||||||
|
in `~/.marchwarden/traces/` — those are per-research-call audit logs
|
||||||
|
for verifying provenance. These logs cover system events: startup,
|
||||||
|
shutdown, errors, MCP transport activity, cost ledger writes.
|
||||||
|
|
||||||
|
Format is structured-by-default (structlog) so logs can be ingested
|
||||||
|
into OpenSearch or similar without per-call formatter ceremony.
|
||||||
|
|
||||||
|
Usage:
|
||||||
|
from obs import configure_logging, get_logger
|
||||||
|
|
||||||
|
configure_logging() # call once at process startup
|
||||||
|
log = get_logger("marchwarden.cli")
|
||||||
|
log.info("ask_started", question=question, depth=depth)
|
||||||
|
|
||||||
|
# Bind context that flows to every downstream log call:
|
||||||
|
log = log.bind(trace_id="abc-123", researcher="web")
|
||||||
|
log.info("research_started") # automatically includes trace_id + researcher
|
||||||
|
"""
|
||||||
|
|
||||||
|
import logging
|
||||||
|
import os
|
||||||
|
import sys
|
||||||
|
from logging.handlers import RotatingFileHandler
|
||||||
|
from pathlib import Path
|
||||||
|
from typing import Optional
|
||||||
|
|
||||||
|
import structlog
|
||||||
|
|
||||||
|
|
||||||
|
_CONFIGURED = False
|
||||||
|
|
||||||
|
|
||||||
|
def _resolve_format() -> str:
|
||||||
|
"""Pick the renderer: explicit env override, else auto-detect TTY."""
|
||||||
|
explicit = os.environ.get("MARCHWARDEN_LOG_FORMAT")
|
||||||
|
if explicit in {"json", "console"}:
|
||||||
|
return explicit
|
||||||
|
return "console" if sys.stderr.isatty() else "json"
|
||||||
|
|
||||||
|
|
||||||
|
def _resolve_level() -> int:
|
||||||
|
name = os.environ.get("MARCHWARDEN_LOG_LEVEL", "INFO").upper()
|
||||||
|
return getattr(logging, name, logging.INFO)
|
||||||
|
|
||||||
|
|
||||||
|
def configure_logging(force: bool = False) -> None:
|
||||||
|
"""Configure structlog + stdlib logging once per process.
|
||||||
|
|
||||||
|
Idempotent — subsequent calls are no-ops unless ``force=True``.
|
||||||
|
Honors:
|
||||||
|
- MARCHWARDEN_LOG_LEVEL (default INFO)
|
||||||
|
- MARCHWARDEN_LOG_FORMAT (json|console; auto-detected from TTY if unset)
|
||||||
|
- MARCHWARDEN_LOG_FILE (truthy → also log to ~/.marchwarden/logs/marchwarden.log)
|
||||||
|
"""
|
||||||
|
global _CONFIGURED
|
||||||
|
if _CONFIGURED and not force:
|
||||||
|
return
|
||||||
|
|
||||||
|
level = _resolve_level()
|
||||||
|
fmt = _resolve_format()
|
||||||
|
|
||||||
|
timestamper = structlog.processors.TimeStamper(fmt="iso", utc=True)
|
||||||
|
|
||||||
|
shared_processors = [
|
||||||
|
structlog.contextvars.merge_contextvars,
|
||||||
|
structlog.stdlib.add_logger_name,
|
||||||
|
structlog.stdlib.add_log_level,
|
||||||
|
timestamper,
|
||||||
|
structlog.processors.StackInfoRenderer(),
|
||||||
|
structlog.processors.format_exc_info,
|
||||||
|
]
|
||||||
|
|
||||||
|
if fmt == "json":
|
||||||
|
renderer: structlog.types.Processor = structlog.processors.JSONRenderer()
|
||||||
|
else:
|
||||||
|
renderer = structlog.dev.ConsoleRenderer(colors=sys.stderr.isatty())
|
||||||
|
|
||||||
|
structlog.configure(
|
||||||
|
processors=shared_processors
|
||||||
|
+ [structlog.stdlib.ProcessorFormatter.wrap_for_formatter],
|
||||||
|
logger_factory=structlog.stdlib.LoggerFactory(),
|
||||||
|
wrapper_class=structlog.make_filtering_bound_logger(level),
|
||||||
|
cache_logger_on_first_use=True,
|
||||||
|
)
|
||||||
|
|
||||||
|
formatter = structlog.stdlib.ProcessorFormatter(
|
||||||
|
foreign_pre_chain=shared_processors,
|
||||||
|
processors=[
|
||||||
|
structlog.stdlib.ProcessorFormatter.remove_processors_meta,
|
||||||
|
renderer,
|
||||||
|
],
|
||||||
|
)
|
||||||
|
|
||||||
|
# Always log to stderr so MCP stdio stdout stays clean.
|
||||||
|
handler: logging.Handler = logging.StreamHandler(sys.stderr)
|
||||||
|
handler.setFormatter(formatter)
|
||||||
|
|
||||||
|
root = logging.getLogger()
|
||||||
|
# Replace handlers so re-configuration in tests works cleanly.
|
||||||
|
root.handlers = [handler]
|
||||||
|
root.setLevel(level)
|
||||||
|
|
||||||
|
if os.environ.get("MARCHWARDEN_LOG_FILE"):
|
||||||
|
log_dir = Path(os.path.expanduser("~/.marchwarden/logs"))
|
||||||
|
log_dir.mkdir(parents=True, exist_ok=True)
|
||||||
|
file_handler = RotatingFileHandler(
|
||||||
|
log_dir / "marchwarden.log",
|
||||||
|
maxBytes=10 * 1024 * 1024,
|
||||||
|
backupCount=5,
|
||||||
|
encoding="utf-8",
|
||||||
|
)
|
||||||
|
file_handler.setFormatter(formatter)
|
||||||
|
root.addHandler(file_handler)
|
||||||
|
|
||||||
|
# Quiet a few noisy third-party loggers unless DEBUG is requested.
|
||||||
|
if level > logging.DEBUG:
|
||||||
|
for noisy in ("httpx", "httpcore", "anthropic"):
|
||||||
|
logging.getLogger(noisy).setLevel(logging.WARNING)
|
||||||
|
|
||||||
|
_CONFIGURED = True
|
||||||
|
|
||||||
|
|
||||||
|
def get_logger(name: str) -> structlog.stdlib.BoundLogger:
|
||||||
|
"""Return a bound structlog logger. Configures logging on first call."""
|
||||||
|
if not _CONFIGURED:
|
||||||
|
configure_logging()
|
||||||
|
return structlog.get_logger(name)
|
||||||
|
|
@ -20,6 +20,7 @@ dependencies = [
|
||||||
"httpx>=0.24.0",
|
"httpx>=0.24.0",
|
||||||
"click>=8.0",
|
"click>=8.0",
|
||||||
"rich>=13.0",
|
"rich>=13.0",
|
||||||
|
"structlog>=24.0",
|
||||||
]
|
]
|
||||||
|
|
||||||
[project.optional-dependencies]
|
[project.optional-dependencies]
|
||||||
|
|
@ -36,7 +37,7 @@ dev = [
|
||||||
marchwarden = "cli.main:cli"
|
marchwarden = "cli.main:cli"
|
||||||
|
|
||||||
[tool.setuptools.packages.find]
|
[tool.setuptools.packages.find]
|
||||||
include = ["researchers*", "orchestrator*", "cli*"]
|
include = ["researchers*", "orchestrator*", "cli*", "obs*"]
|
||||||
|
|
||||||
[tool.pytest.ini_options]
|
[tool.pytest.ini_options]
|
||||||
testpaths = ["tests"]
|
testpaths = ["tests"]
|
||||||
|
|
|
||||||
|
|
@ -10,8 +10,10 @@ import json
|
||||||
import time
|
import time
|
||||||
from typing import Optional
|
from typing import Optional
|
||||||
|
|
||||||
|
import structlog
|
||||||
from anthropic import Anthropic
|
from anthropic import Anthropic
|
||||||
|
|
||||||
|
from obs import get_logger
|
||||||
from researchers.web.models import (
|
from researchers.web.models import (
|
||||||
Citation,
|
Citation,
|
||||||
ConfidenceFactors,
|
ConfidenceFactors,
|
||||||
|
|
@ -26,6 +28,8 @@ from researchers.web.models import (
|
||||||
from researchers.web.tools import SearchResult, fetch_url, tavily_search
|
from researchers.web.tools import SearchResult, fetch_url, tavily_search
|
||||||
from researchers.web.trace import TraceLogger
|
from researchers.web.trace import TraceLogger
|
||||||
|
|
||||||
|
log = get_logger("marchwarden.researcher.web")
|
||||||
|
|
||||||
SYSTEM_PROMPT = """\
|
SYSTEM_PROMPT = """\
|
||||||
You are a Marchwarden — a research specialist stationed at the frontier of knowledge. \
|
You are a Marchwarden — a research specialist stationed at the frontier of knowledge. \
|
||||||
Your job is to investigate a question thoroughly using web search and URL fetching, \
|
Your job is to investigate a question thoroughly using web search and URL fetching, \
|
||||||
|
|
@ -201,6 +205,21 @@ class WebResearcher:
|
||||||
evidence: list[dict] = []
|
evidence: list[dict] = []
|
||||||
budget_exhausted = False
|
budget_exhausted = False
|
||||||
|
|
||||||
|
# Bind trace context so every downstream log call automatically
|
||||||
|
# carries trace_id and researcher. Cleared in the finally block.
|
||||||
|
structlog.contextvars.bind_contextvars(
|
||||||
|
trace_id=trace.trace_id,
|
||||||
|
researcher="web",
|
||||||
|
)
|
||||||
|
log.info(
|
||||||
|
"research_started",
|
||||||
|
question=question,
|
||||||
|
depth=depth,
|
||||||
|
max_iterations=constraints.max_iterations,
|
||||||
|
token_budget=constraints.token_budget,
|
||||||
|
model_id=self.model_id,
|
||||||
|
)
|
||||||
|
|
||||||
trace.log_step(
|
trace.log_step(
|
||||||
"start",
|
"start",
|
||||||
decision=f"Beginning research: depth={depth}",
|
decision=f"Beginning research: depth={depth}",
|
||||||
|
|
@ -322,6 +341,19 @@ class WebResearcher:
|
||||||
)
|
)
|
||||||
trace.close()
|
trace.close()
|
||||||
|
|
||||||
|
log.info(
|
||||||
|
"research_completed",
|
||||||
|
confidence=result.confidence,
|
||||||
|
citations=len(result.citations),
|
||||||
|
gaps=len(result.gaps),
|
||||||
|
discovery_events=len(result.discovery_events),
|
||||||
|
tokens_used=result.cost_metadata.tokens_used,
|
||||||
|
iterations_run=result.cost_metadata.iterations_run,
|
||||||
|
wall_time_sec=result.cost_metadata.wall_time_sec,
|
||||||
|
budget_exhausted=result.cost_metadata.budget_exhausted,
|
||||||
|
)
|
||||||
|
structlog.contextvars.clear_contextvars()
|
||||||
|
|
||||||
return result
|
return result
|
||||||
|
|
||||||
async def _execute_tool(
|
async def _execute_tool(
|
||||||
|
|
|
||||||
|
|
@ -11,9 +11,12 @@ from typing import Optional
|
||||||
|
|
||||||
from mcp.server.fastmcp import FastMCP
|
from mcp.server.fastmcp import FastMCP
|
||||||
|
|
||||||
|
from obs import configure_logging, get_logger
|
||||||
from researchers.web.agent import WebResearcher
|
from researchers.web.agent import WebResearcher
|
||||||
from researchers.web.models import ResearchConstraints
|
from researchers.web.models import ResearchConstraints
|
||||||
|
|
||||||
|
log = get_logger("marchwarden.mcp")
|
||||||
|
|
||||||
mcp = FastMCP(
|
mcp = FastMCP(
|
||||||
name="marchwarden-web-researcher",
|
name="marchwarden-web-researcher",
|
||||||
instructions=(
|
instructions=(
|
||||||
|
|
@ -84,6 +87,8 @@ async def research(
|
||||||
|
|
||||||
def main():
|
def main():
|
||||||
"""Run the MCP server on stdio."""
|
"""Run the MCP server on stdio."""
|
||||||
|
configure_logging()
|
||||||
|
log.info("mcp_server_starting", transport="stdio", server="marchwarden-web-researcher")
|
||||||
mcp.run(transport="stdio")
|
mcp.run(transport="stdio")
|
||||||
|
|
||||||
|
|
||||||
|
|
|
||||||
128
tests/test_obs.py
Normal file
128
tests/test_obs.py
Normal file
|
|
@ -0,0 +1,128 @@
|
||||||
|
"""Tests for the obs (structured logging) module."""
|
||||||
|
|
||||||
|
import io
|
||||||
|
import json
|
||||||
|
import logging
|
||||||
|
import os
|
||||||
|
from unittest.mock import patch
|
||||||
|
|
||||||
|
import pytest
|
||||||
|
import structlog
|
||||||
|
|
||||||
|
from obs import configure_logging, get_logger
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.fixture(autouse=True)
|
||||||
|
def reset_logging():
|
||||||
|
"""Reset structlog + stdlib state between tests so configure_logging
|
||||||
|
is forced to re-run."""
|
||||||
|
import obs
|
||||||
|
|
||||||
|
obs._CONFIGURED = False
|
||||||
|
structlog.reset_defaults()
|
||||||
|
structlog.contextvars.clear_contextvars()
|
||||||
|
root = logging.getLogger()
|
||||||
|
root.handlers = []
|
||||||
|
root.setLevel(logging.WARNING)
|
||||||
|
yield
|
||||||
|
obs._CONFIGURED = False
|
||||||
|
structlog.reset_defaults()
|
||||||
|
structlog.contextvars.clear_contextvars()
|
||||||
|
root.handlers = []
|
||||||
|
|
||||||
|
|
||||||
|
def _capture_stderr(monkeypatch):
|
||||||
|
buf = io.StringIO()
|
||||||
|
monkeypatch.setattr("sys.stderr", buf)
|
||||||
|
return buf
|
||||||
|
|
||||||
|
|
||||||
|
class TestConfigureLogging:
|
||||||
|
def test_json_format_emits_json(self, monkeypatch):
|
||||||
|
monkeypatch.setenv("MARCHWARDEN_LOG_FORMAT", "json")
|
||||||
|
monkeypatch.setenv("MARCHWARDEN_LOG_LEVEL", "INFO")
|
||||||
|
buf = _capture_stderr(monkeypatch)
|
||||||
|
|
||||||
|
configure_logging(force=True)
|
||||||
|
log = get_logger("marchwarden.test")
|
||||||
|
log.info("hello", key="value", count=3)
|
||||||
|
|
||||||
|
line = buf.getvalue().strip()
|
||||||
|
assert line, "expected at least one log line"
|
||||||
|
parsed = json.loads(line)
|
||||||
|
assert parsed["event"] == "hello"
|
||||||
|
assert parsed["key"] == "value"
|
||||||
|
assert parsed["count"] == 3
|
||||||
|
assert parsed["level"] == "info"
|
||||||
|
assert parsed["logger"] == "marchwarden.test"
|
||||||
|
assert "timestamp" in parsed
|
||||||
|
|
||||||
|
def test_console_format_is_human_readable(self, monkeypatch):
|
||||||
|
monkeypatch.setenv("MARCHWARDEN_LOG_FORMAT", "console")
|
||||||
|
monkeypatch.setenv("MARCHWARDEN_LOG_LEVEL", "INFO")
|
||||||
|
buf = _capture_stderr(monkeypatch)
|
||||||
|
|
||||||
|
configure_logging(force=True)
|
||||||
|
log = get_logger("marchwarden.test")
|
||||||
|
log.info("greeting", who="world")
|
||||||
|
|
||||||
|
text = buf.getvalue()
|
||||||
|
assert "greeting" in text
|
||||||
|
assert "world" in text
|
||||||
|
# Console renderer is not JSON
|
||||||
|
with pytest.raises(json.JSONDecodeError):
|
||||||
|
json.loads(text.strip().splitlines()[-1])
|
||||||
|
|
||||||
|
def test_context_binding_propagates(self, monkeypatch):
|
||||||
|
monkeypatch.setenv("MARCHWARDEN_LOG_FORMAT", "json")
|
||||||
|
buf = _capture_stderr(monkeypatch)
|
||||||
|
|
||||||
|
configure_logging(force=True)
|
||||||
|
log = get_logger("marchwarden.test")
|
||||||
|
structlog.contextvars.bind_contextvars(trace_id="abc-123", researcher="web")
|
||||||
|
try:
|
||||||
|
log.info("step")
|
||||||
|
finally:
|
||||||
|
structlog.contextvars.clear_contextvars()
|
||||||
|
|
||||||
|
parsed = json.loads(buf.getvalue().strip())
|
||||||
|
assert parsed["trace_id"] == "abc-123"
|
||||||
|
assert parsed["researcher"] == "web"
|
||||||
|
|
||||||
|
def test_log_level_filters(self, monkeypatch):
|
||||||
|
monkeypatch.setenv("MARCHWARDEN_LOG_FORMAT", "json")
|
||||||
|
monkeypatch.setenv("MARCHWARDEN_LOG_LEVEL", "WARNING")
|
||||||
|
buf = _capture_stderr(monkeypatch)
|
||||||
|
|
||||||
|
configure_logging(force=True)
|
||||||
|
log = get_logger("marchwarden.test")
|
||||||
|
log.info("ignored")
|
||||||
|
log.warning("kept")
|
||||||
|
|
||||||
|
lines = [l for l in buf.getvalue().strip().splitlines() if l]
|
||||||
|
assert len(lines) == 1
|
||||||
|
assert json.loads(lines[0])["event"] == "kept"
|
||||||
|
|
||||||
|
def test_idempotent(self, monkeypatch):
|
||||||
|
monkeypatch.setenv("MARCHWARDEN_LOG_FORMAT", "json")
|
||||||
|
configure_logging(force=True)
|
||||||
|
|
||||||
|
import obs
|
||||||
|
|
||||||
|
obs._CONFIGURED = True
|
||||||
|
# Second call should be a no-op
|
||||||
|
before = logging.getLogger().handlers
|
||||||
|
configure_logging()
|
||||||
|
after = logging.getLogger().handlers
|
||||||
|
assert before is after
|
||||||
|
|
||||||
|
def test_get_logger_auto_configures(self, monkeypatch):
|
||||||
|
monkeypatch.setenv("MARCHWARDEN_LOG_FORMAT", "json")
|
||||||
|
buf = _capture_stderr(monkeypatch)
|
||||||
|
|
||||||
|
# No explicit configure_logging() call
|
||||||
|
log = get_logger("marchwarden.test")
|
||||||
|
log.info("auto")
|
||||||
|
|
||||||
|
parsed = json.loads(buf.getvalue().strip())
|
||||||
|
assert parsed["event"] == "auto"
|
||||||
Loading…
Reference in a new issue