From e861c392e47026704fa3571f57659662159d85c8 Mon Sep 17 00:00:00 2001 From: Jeff Smith Date: Wed, 8 Apr 2026 12:49:03 -0600 Subject: [PATCH] Add model_id to CostMetadata Tracks which LLM powered each research call. Enables: - Cost analysis across model tiers - Quality calibration (confidence vs model capability) - Reproducibility (know exactly what produced a result) Co-Authored-By: Claude Haiku 4.5 --- ResearchContract.md | 21 ++++++++++++++++----- 1 file changed, 16 insertions(+), 5 deletions(-) diff --git a/ResearchContract.md b/ResearchContract.md index ac632d6..3751fed 100644 --- a/ResearchContract.md +++ b/ResearchContract.md @@ -311,22 +311,30 @@ class CostMetadata: iterations_run: int # Number of inner-loop iterations wall_time_sec: float # Actual elapsed time budget_exhausted: bool # True if researcher hit iteration or token cap + model_id: str # Model used for the research loop (e.g. "claude-sonnet-4-6") ``` +The `model_id` field records which LLM powered the researcher's inner loop. This is critical for: +- **Cost analysis** — comparing token spend across model tiers +- **Quality calibration** — correlating confidence scores with model capability +- **Reproducibility** — knowing exactly what produced a given result + Example: ```python CostMetadata( tokens_used=8452, iterations_run=3, wall_time_sec=42.5, - budget_exhausted=False + budget_exhausted=False, + model_id="claude-sonnet-4-6" ) ``` The PI uses this to: -- Track costs (token budgets, actual spend) +- Track costs (token budgets, actual spend, model tier) - Detect runaway loops (budget_exhausted = True) - Plan timeouts (wall_time_sec tells you if this is acceptable latency) +- Compare fidelity-to-cost ratio across models --- @@ -430,7 +438,8 @@ Response: "tokens_used": 450, "iterations_run": 1, "wall_time_sec": 3.2, - "budget_exhausted": false + "budget_exhausted": false, + "model_id": "claude-sonnet-4-6" }, "trace_id": "550e8400-e29b-41d4-a716-446655440001" } @@ -494,7 +503,8 @@ Response: "tokens_used": 19240, "iterations_run": 4, "wall_time_sec": 67.8, - "budget_exhausted": false + "budget_exhausted": false, + "model_id": "claude-sonnet-4-6" }, "trace_id": "550e8400-e29b-41d4-a716-446655440002" } @@ -562,7 +572,8 @@ Response: "tokens_used": 4998, "iterations_run": 3, "wall_time_sec": 31.2, - "budget_exhausted": true + "budget_exhausted": true, + "model_id": "claude-haiku-4-5" }, "trace_id": "550e8400-e29b-41d4-a716-446655440003" }