M2.5.1: Structured application logger #27
6 changed files with 317 additions and 1 deletions
20
cli/main.py
20
cli/main.py
|
|
@ -19,11 +19,14 @@ from rich.panel import Panel
|
|||
from rich.table import Table
|
||||
from rich.text import Text
|
||||
|
||||
from obs import configure_logging, get_logger
|
||||
from researchers.web.models import ResearchResult
|
||||
|
||||
|
||||
DEFAULT_TRACE_DIR = "~/.marchwarden/traces"
|
||||
|
||||
log = get_logger("marchwarden.cli")
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# MCP client
|
||||
|
|
@ -155,6 +158,7 @@ def render_result(result: ResearchResult, console: Console) -> None:
|
|||
@click.group()
|
||||
def cli() -> None:
|
||||
"""Marchwarden — agentic research CLI."""
|
||||
configure_logging()
|
||||
|
||||
|
||||
@cli.command()
|
||||
|
|
@ -188,6 +192,13 @@ def ask(
|
|||
"""Ask the web researcher a QUESTION."""
|
||||
console = Console()
|
||||
console.print(f"[dim]Researching:[/dim] {question}\n")
|
||||
log.info(
|
||||
"ask_started",
|
||||
question=question,
|
||||
depth=depth,
|
||||
max_iterations=max_iterations,
|
||||
token_budget=token_budget,
|
||||
)
|
||||
try:
|
||||
result = asyncio.run(
|
||||
call_research_tool(
|
||||
|
|
@ -198,8 +209,17 @@ def ask(
|
|||
)
|
||||
)
|
||||
except Exception as e:
|
||||
log.error("ask_failed", question=question, error=str(e), exc_info=True)
|
||||
console.print(f"[bold red]Error:[/bold red] {e}")
|
||||
sys.exit(1)
|
||||
log.info(
|
||||
"ask_completed",
|
||||
trace_id=result.trace_id,
|
||||
confidence=result.confidence,
|
||||
citations=len(result.citations),
|
||||
tokens_used=result.cost_metadata.tokens_used,
|
||||
wall_time_sec=result.cost_metadata.wall_time_sec,
|
||||
)
|
||||
render_result(result, console)
|
||||
|
||||
|
||||
|
|
|
|||
130
obs/__init__.py
Normal file
130
obs/__init__.py
Normal file
|
|
@ -0,0 +1,130 @@
|
|||
"""Marchwarden observability — structured application logging.
|
||||
|
||||
Operational logs for administrators. NOT the same as the JSONL traces
|
||||
in `~/.marchwarden/traces/` — those are per-research-call audit logs
|
||||
for verifying provenance. These logs cover system events: startup,
|
||||
shutdown, errors, MCP transport activity, cost ledger writes.
|
||||
|
||||
Format is structured-by-default (structlog) so logs can be ingested
|
||||
into OpenSearch or similar without per-call formatter ceremony.
|
||||
|
||||
Usage:
|
||||
from obs import configure_logging, get_logger
|
||||
|
||||
configure_logging() # call once at process startup
|
||||
log = get_logger("marchwarden.cli")
|
||||
log.info("ask_started", question=question, depth=depth)
|
||||
|
||||
# Bind context that flows to every downstream log call:
|
||||
log = log.bind(trace_id="abc-123", researcher="web")
|
||||
log.info("research_started") # automatically includes trace_id + researcher
|
||||
"""
|
||||
|
||||
import logging
|
||||
import os
|
||||
import sys
|
||||
from logging.handlers import RotatingFileHandler
|
||||
from pathlib import Path
|
||||
from typing import Optional
|
||||
|
||||
import structlog
|
||||
|
||||
|
||||
_CONFIGURED = False
|
||||
|
||||
|
||||
def _resolve_format() -> str:
|
||||
"""Pick the renderer: explicit env override, else auto-detect TTY."""
|
||||
explicit = os.environ.get("MARCHWARDEN_LOG_FORMAT")
|
||||
if explicit in {"json", "console"}:
|
||||
return explicit
|
||||
return "console" if sys.stderr.isatty() else "json"
|
||||
|
||||
|
||||
def _resolve_level() -> int:
|
||||
name = os.environ.get("MARCHWARDEN_LOG_LEVEL", "INFO").upper()
|
||||
return getattr(logging, name, logging.INFO)
|
||||
|
||||
|
||||
def configure_logging(force: bool = False) -> None:
|
||||
"""Configure structlog + stdlib logging once per process.
|
||||
|
||||
Idempotent — subsequent calls are no-ops unless ``force=True``.
|
||||
Honors:
|
||||
- MARCHWARDEN_LOG_LEVEL (default INFO)
|
||||
- MARCHWARDEN_LOG_FORMAT (json|console; auto-detected from TTY if unset)
|
||||
- MARCHWARDEN_LOG_FILE (truthy → also log to ~/.marchwarden/logs/marchwarden.log)
|
||||
"""
|
||||
global _CONFIGURED
|
||||
if _CONFIGURED and not force:
|
||||
return
|
||||
|
||||
level = _resolve_level()
|
||||
fmt = _resolve_format()
|
||||
|
||||
timestamper = structlog.processors.TimeStamper(fmt="iso", utc=True)
|
||||
|
||||
shared_processors = [
|
||||
structlog.contextvars.merge_contextvars,
|
||||
structlog.stdlib.add_logger_name,
|
||||
structlog.stdlib.add_log_level,
|
||||
timestamper,
|
||||
structlog.processors.StackInfoRenderer(),
|
||||
structlog.processors.format_exc_info,
|
||||
]
|
||||
|
||||
if fmt == "json":
|
||||
renderer: structlog.types.Processor = structlog.processors.JSONRenderer()
|
||||
else:
|
||||
renderer = structlog.dev.ConsoleRenderer(colors=sys.stderr.isatty())
|
||||
|
||||
structlog.configure(
|
||||
processors=shared_processors
|
||||
+ [structlog.stdlib.ProcessorFormatter.wrap_for_formatter],
|
||||
logger_factory=structlog.stdlib.LoggerFactory(),
|
||||
wrapper_class=structlog.make_filtering_bound_logger(level),
|
||||
cache_logger_on_first_use=True,
|
||||
)
|
||||
|
||||
formatter = structlog.stdlib.ProcessorFormatter(
|
||||
foreign_pre_chain=shared_processors,
|
||||
processors=[
|
||||
structlog.stdlib.ProcessorFormatter.remove_processors_meta,
|
||||
renderer,
|
||||
],
|
||||
)
|
||||
|
||||
# Always log to stderr so MCP stdio stdout stays clean.
|
||||
handler: logging.Handler = logging.StreamHandler(sys.stderr)
|
||||
handler.setFormatter(formatter)
|
||||
|
||||
root = logging.getLogger()
|
||||
# Replace handlers so re-configuration in tests works cleanly.
|
||||
root.handlers = [handler]
|
||||
root.setLevel(level)
|
||||
|
||||
if os.environ.get("MARCHWARDEN_LOG_FILE"):
|
||||
log_dir = Path(os.path.expanduser("~/.marchwarden/logs"))
|
||||
log_dir.mkdir(parents=True, exist_ok=True)
|
||||
file_handler = RotatingFileHandler(
|
||||
log_dir / "marchwarden.log",
|
||||
maxBytes=10 * 1024 * 1024,
|
||||
backupCount=5,
|
||||
encoding="utf-8",
|
||||
)
|
||||
file_handler.setFormatter(formatter)
|
||||
root.addHandler(file_handler)
|
||||
|
||||
# Quiet a few noisy third-party loggers unless DEBUG is requested.
|
||||
if level > logging.DEBUG:
|
||||
for noisy in ("httpx", "httpcore", "anthropic"):
|
||||
logging.getLogger(noisy).setLevel(logging.WARNING)
|
||||
|
||||
_CONFIGURED = True
|
||||
|
||||
|
||||
def get_logger(name: str) -> structlog.stdlib.BoundLogger:
|
||||
"""Return a bound structlog logger. Configures logging on first call."""
|
||||
if not _CONFIGURED:
|
||||
configure_logging()
|
||||
return structlog.get_logger(name)
|
||||
|
|
@ -20,6 +20,7 @@ dependencies = [
|
|||
"httpx>=0.24.0",
|
||||
"click>=8.0",
|
||||
"rich>=13.0",
|
||||
"structlog>=24.0",
|
||||
]
|
||||
|
||||
[project.optional-dependencies]
|
||||
|
|
@ -36,7 +37,7 @@ dev = [
|
|||
marchwarden = "cli.main:cli"
|
||||
|
||||
[tool.setuptools.packages.find]
|
||||
include = ["researchers*", "orchestrator*", "cli*"]
|
||||
include = ["researchers*", "orchestrator*", "cli*", "obs*"]
|
||||
|
||||
[tool.pytest.ini_options]
|
||||
testpaths = ["tests"]
|
||||
|
|
|
|||
|
|
@ -10,8 +10,10 @@ import json
|
|||
import time
|
||||
from typing import Optional
|
||||
|
||||
import structlog
|
||||
from anthropic import Anthropic
|
||||
|
||||
from obs import get_logger
|
||||
from researchers.web.models import (
|
||||
Citation,
|
||||
ConfidenceFactors,
|
||||
|
|
@ -26,6 +28,8 @@ from researchers.web.models import (
|
|||
from researchers.web.tools import SearchResult, fetch_url, tavily_search
|
||||
from researchers.web.trace import TraceLogger
|
||||
|
||||
log = get_logger("marchwarden.researcher.web")
|
||||
|
||||
SYSTEM_PROMPT = """\
|
||||
You are a Marchwarden — a research specialist stationed at the frontier of knowledge. \
|
||||
Your job is to investigate a question thoroughly using web search and URL fetching, \
|
||||
|
|
@ -201,6 +205,21 @@ class WebResearcher:
|
|||
evidence: list[dict] = []
|
||||
budget_exhausted = False
|
||||
|
||||
# Bind trace context so every downstream log call automatically
|
||||
# carries trace_id and researcher. Cleared in the finally block.
|
||||
structlog.contextvars.bind_contextvars(
|
||||
trace_id=trace.trace_id,
|
||||
researcher="web",
|
||||
)
|
||||
log.info(
|
||||
"research_started",
|
||||
question=question,
|
||||
depth=depth,
|
||||
max_iterations=constraints.max_iterations,
|
||||
token_budget=constraints.token_budget,
|
||||
model_id=self.model_id,
|
||||
)
|
||||
|
||||
trace.log_step(
|
||||
"start",
|
||||
decision=f"Beginning research: depth={depth}",
|
||||
|
|
@ -322,6 +341,19 @@ class WebResearcher:
|
|||
)
|
||||
trace.close()
|
||||
|
||||
log.info(
|
||||
"research_completed",
|
||||
confidence=result.confidence,
|
||||
citations=len(result.citations),
|
||||
gaps=len(result.gaps),
|
||||
discovery_events=len(result.discovery_events),
|
||||
tokens_used=result.cost_metadata.tokens_used,
|
||||
iterations_run=result.cost_metadata.iterations_run,
|
||||
wall_time_sec=result.cost_metadata.wall_time_sec,
|
||||
budget_exhausted=result.cost_metadata.budget_exhausted,
|
||||
)
|
||||
structlog.contextvars.clear_contextvars()
|
||||
|
||||
return result
|
||||
|
||||
async def _execute_tool(
|
||||
|
|
|
|||
|
|
@ -11,9 +11,12 @@ from typing import Optional
|
|||
|
||||
from mcp.server.fastmcp import FastMCP
|
||||
|
||||
from obs import configure_logging, get_logger
|
||||
from researchers.web.agent import WebResearcher
|
||||
from researchers.web.models import ResearchConstraints
|
||||
|
||||
log = get_logger("marchwarden.mcp")
|
||||
|
||||
mcp = FastMCP(
|
||||
name="marchwarden-web-researcher",
|
||||
instructions=(
|
||||
|
|
@ -84,6 +87,8 @@ async def research(
|
|||
|
||||
def main():
|
||||
"""Run the MCP server on stdio."""
|
||||
configure_logging()
|
||||
log.info("mcp_server_starting", transport="stdio", server="marchwarden-web-researcher")
|
||||
mcp.run(transport="stdio")
|
||||
|
||||
|
||||
|
|
|
|||
128
tests/test_obs.py
Normal file
128
tests/test_obs.py
Normal file
|
|
@ -0,0 +1,128 @@
|
|||
"""Tests for the obs (structured logging) module."""
|
||||
|
||||
import io
|
||||
import json
|
||||
import logging
|
||||
import os
|
||||
from unittest.mock import patch
|
||||
|
||||
import pytest
|
||||
import structlog
|
||||
|
||||
from obs import configure_logging, get_logger
|
||||
|
||||
|
||||
@pytest.fixture(autouse=True)
|
||||
def reset_logging():
|
||||
"""Reset structlog + stdlib state between tests so configure_logging
|
||||
is forced to re-run."""
|
||||
import obs
|
||||
|
||||
obs._CONFIGURED = False
|
||||
structlog.reset_defaults()
|
||||
structlog.contextvars.clear_contextvars()
|
||||
root = logging.getLogger()
|
||||
root.handlers = []
|
||||
root.setLevel(logging.WARNING)
|
||||
yield
|
||||
obs._CONFIGURED = False
|
||||
structlog.reset_defaults()
|
||||
structlog.contextvars.clear_contextvars()
|
||||
root.handlers = []
|
||||
|
||||
|
||||
def _capture_stderr(monkeypatch):
|
||||
buf = io.StringIO()
|
||||
monkeypatch.setattr("sys.stderr", buf)
|
||||
return buf
|
||||
|
||||
|
||||
class TestConfigureLogging:
|
||||
def test_json_format_emits_json(self, monkeypatch):
|
||||
monkeypatch.setenv("MARCHWARDEN_LOG_FORMAT", "json")
|
||||
monkeypatch.setenv("MARCHWARDEN_LOG_LEVEL", "INFO")
|
||||
buf = _capture_stderr(monkeypatch)
|
||||
|
||||
configure_logging(force=True)
|
||||
log = get_logger("marchwarden.test")
|
||||
log.info("hello", key="value", count=3)
|
||||
|
||||
line = buf.getvalue().strip()
|
||||
assert line, "expected at least one log line"
|
||||
parsed = json.loads(line)
|
||||
assert parsed["event"] == "hello"
|
||||
assert parsed["key"] == "value"
|
||||
assert parsed["count"] == 3
|
||||
assert parsed["level"] == "info"
|
||||
assert parsed["logger"] == "marchwarden.test"
|
||||
assert "timestamp" in parsed
|
||||
|
||||
def test_console_format_is_human_readable(self, monkeypatch):
|
||||
monkeypatch.setenv("MARCHWARDEN_LOG_FORMAT", "console")
|
||||
monkeypatch.setenv("MARCHWARDEN_LOG_LEVEL", "INFO")
|
||||
buf = _capture_stderr(monkeypatch)
|
||||
|
||||
configure_logging(force=True)
|
||||
log = get_logger("marchwarden.test")
|
||||
log.info("greeting", who="world")
|
||||
|
||||
text = buf.getvalue()
|
||||
assert "greeting" in text
|
||||
assert "world" in text
|
||||
# Console renderer is not JSON
|
||||
with pytest.raises(json.JSONDecodeError):
|
||||
json.loads(text.strip().splitlines()[-1])
|
||||
|
||||
def test_context_binding_propagates(self, monkeypatch):
|
||||
monkeypatch.setenv("MARCHWARDEN_LOG_FORMAT", "json")
|
||||
buf = _capture_stderr(monkeypatch)
|
||||
|
||||
configure_logging(force=True)
|
||||
log = get_logger("marchwarden.test")
|
||||
structlog.contextvars.bind_contextvars(trace_id="abc-123", researcher="web")
|
||||
try:
|
||||
log.info("step")
|
||||
finally:
|
||||
structlog.contextvars.clear_contextvars()
|
||||
|
||||
parsed = json.loads(buf.getvalue().strip())
|
||||
assert parsed["trace_id"] == "abc-123"
|
||||
assert parsed["researcher"] == "web"
|
||||
|
||||
def test_log_level_filters(self, monkeypatch):
|
||||
monkeypatch.setenv("MARCHWARDEN_LOG_FORMAT", "json")
|
||||
monkeypatch.setenv("MARCHWARDEN_LOG_LEVEL", "WARNING")
|
||||
buf = _capture_stderr(monkeypatch)
|
||||
|
||||
configure_logging(force=True)
|
||||
log = get_logger("marchwarden.test")
|
||||
log.info("ignored")
|
||||
log.warning("kept")
|
||||
|
||||
lines = [l for l in buf.getvalue().strip().splitlines() if l]
|
||||
assert len(lines) == 1
|
||||
assert json.loads(lines[0])["event"] == "kept"
|
||||
|
||||
def test_idempotent(self, monkeypatch):
|
||||
monkeypatch.setenv("MARCHWARDEN_LOG_FORMAT", "json")
|
||||
configure_logging(force=True)
|
||||
|
||||
import obs
|
||||
|
||||
obs._CONFIGURED = True
|
||||
# Second call should be a no-op
|
||||
before = logging.getLogger().handlers
|
||||
configure_logging()
|
||||
after = logging.getLogger().handlers
|
||||
assert before is after
|
||||
|
||||
def test_get_logger_auto_configures(self, monkeypatch):
|
||||
monkeypatch.setenv("MARCHWARDEN_LOG_FORMAT", "json")
|
||||
buf = _capture_stderr(monkeypatch)
|
||||
|
||||
# No explicit configure_logging() call
|
||||
log = get_logger("marchwarden.test")
|
||||
log.info("auto")
|
||||
|
||||
parsed = json.loads(buf.getvalue().strip())
|
||||
assert parsed["event"] == "auto"
|
||||
Loading…
Reference in a new issue