marchwarden/obs/__init__.py

131 lines
4.3 KiB
Python
Raw Permalink Normal View History

M2.5.1: Structured application logger via structlog (#24) Adds an operational logging layer separate from the JSONL trace audit logs. Operational logs cover system events (startup, errors, MCP transport, research lifecycle); JSONL traces remain the researcher provenance audit trail. Backend: structlog with two renderers selectable via MARCHWARDEN_LOG_FORMAT (json|console). Defaults to console when stderr is a TTY, json otherwise — so dev runs are human-readable and shipped runs (containers, automation) emit OpenSearch-ready JSON without configuration. Key features: - Named loggers per component: marchwarden.cli, marchwarden.mcp, marchwarden.researcher.web - MARCHWARDEN_LOG_LEVEL controls global level (default INFO) - MARCHWARDEN_LOG_FILE=1 enables a 10MB-rotating file at ~/.marchwarden/logs/marchwarden.log - structlog contextvars bind trace_id + researcher at the start of each research() call so every downstream log line carries them automatically; cleared on completion - stdlib logging is funneled through the same pipeline so noisy third-party loggers (httpx, anthropic) get the same formatting and quieted to WARN unless DEBUG is requested - Logs to stderr to keep MCP stdio stdout clean Wired into: - cli.main.cli — configures logging on startup, logs ask_started/ ask_completed/ask_failed - researchers.web.server.main — configures logging on startup, logs mcp_server_starting - researchers.web.agent.research — binds trace context, logs research_started/research_completed Tests verify JSON and console formats, contextvar propagation, level filtering, idempotency, and auto-configure-on-first-use. 94/94 tests passing. Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
2026-04-08 21:46:51 +00:00
"""Marchwarden observability — structured application logging.
Operational logs for administrators. NOT the same as the JSONL traces
in `~/.marchwarden/traces/` those are per-research-call audit logs
for verifying provenance. These logs cover system events: startup,
shutdown, errors, MCP transport activity, cost ledger writes.
Format is structured-by-default (structlog) so logs can be ingested
into OpenSearch or similar without per-call formatter ceremony.
Usage:
from obs import configure_logging, get_logger
configure_logging() # call once at process startup
log = get_logger("marchwarden.cli")
log.info("ask_started", question=question, depth=depth)
# Bind context that flows to every downstream log call:
log = log.bind(trace_id="abc-123", researcher="web")
log.info("research_started") # automatically includes trace_id + researcher
"""
import logging
import os
import sys
from logging.handlers import RotatingFileHandler
from pathlib import Path
from typing import Optional
import structlog
_CONFIGURED = False
def _resolve_format() -> str:
"""Pick the renderer: explicit env override, else auto-detect TTY."""
explicit = os.environ.get("MARCHWARDEN_LOG_FORMAT")
if explicit in {"json", "console"}:
return explicit
return "console" if sys.stderr.isatty() else "json"
def _resolve_level() -> int:
name = os.environ.get("MARCHWARDEN_LOG_LEVEL", "INFO").upper()
return getattr(logging, name, logging.INFO)
def configure_logging(force: bool = False) -> None:
"""Configure structlog + stdlib logging once per process.
Idempotent subsequent calls are no-ops unless ``force=True``.
Honors:
- MARCHWARDEN_LOG_LEVEL (default INFO)
- MARCHWARDEN_LOG_FORMAT (json|console; auto-detected from TTY if unset)
- MARCHWARDEN_LOG_FILE (truthy also log to ~/.marchwarden/logs/marchwarden.log)
"""
global _CONFIGURED
if _CONFIGURED and not force:
return
level = _resolve_level()
fmt = _resolve_format()
timestamper = structlog.processors.TimeStamper(fmt="iso", utc=True)
shared_processors = [
structlog.contextvars.merge_contextvars,
structlog.stdlib.add_logger_name,
structlog.stdlib.add_log_level,
timestamper,
structlog.processors.StackInfoRenderer(),
structlog.processors.format_exc_info,
]
if fmt == "json":
renderer: structlog.types.Processor = structlog.processors.JSONRenderer()
else:
renderer = structlog.dev.ConsoleRenderer(colors=sys.stderr.isatty())
structlog.configure(
processors=shared_processors
+ [structlog.stdlib.ProcessorFormatter.wrap_for_formatter],
logger_factory=structlog.stdlib.LoggerFactory(),
wrapper_class=structlog.make_filtering_bound_logger(level),
cache_logger_on_first_use=True,
)
formatter = structlog.stdlib.ProcessorFormatter(
foreign_pre_chain=shared_processors,
processors=[
structlog.stdlib.ProcessorFormatter.remove_processors_meta,
renderer,
],
)
# Always log to stderr so MCP stdio stdout stays clean.
handler: logging.Handler = logging.StreamHandler(sys.stderr)
handler.setFormatter(formatter)
root = logging.getLogger()
# Replace handlers so re-configuration in tests works cleanly.
root.handlers = [handler]
root.setLevel(level)
if os.environ.get("MARCHWARDEN_LOG_FILE"):
log_dir = Path(os.path.expanduser("~/.marchwarden/logs"))
log_dir.mkdir(parents=True, exist_ok=True)
file_handler = RotatingFileHandler(
log_dir / "marchwarden.log",
maxBytes=10 * 1024 * 1024,
backupCount=5,
encoding="utf-8",
)
file_handler.setFormatter(formatter)
root.addHandler(file_handler)
# Quiet a few noisy third-party loggers unless DEBUG is requested.
if level > logging.DEBUG:
for noisy in ("httpx", "httpcore", "anthropic"):
logging.getLogger(noisy).setLevel(logging.WARNING)
_CONFIGURED = True
def get_logger(name: str) -> structlog.stdlib.BoundLogger:
"""Return a bound structlog logger. Configures logging on first call."""
if not _CONFIGURED:
configure_logging()
return structlog.get_logger(name)