From ae9c11a79b13f8366b9d0dd0986168ce9c6fd580 Mon Sep 17 00:00:00 2001
From: Jeff Smith <jeff@unbiasedgeek.com>
Date: Wed, 8 Apr 2026 14:37:30 -0600
Subject: [PATCH] Add OpenQuestion to research contract
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

New field on ResearchResult: open_questions — follow-up questions that
emerged from the research itself. Distinct from gaps (backward: what
failed) and discovery_events (sideways: what's lateral). Open questions
look forward: 'based on what I found, this needs deeper investigation.'

- OpenQuestion model: question, context, priority (high/medium/low),
  source_locator
- Updated agent synthesis prompt to produce open_questions
- Updated agent result builder to parse open_questions from JSON
- 3 new tests for OpenQuestion model
- Updated existing tests for new field

77 tests passing.

Refs: archeious/marchwarden#1

Co-Authored-By: Claude Haiku 4.5 <noreply@anthropic.com>
---
 researchers/web/agent.py  | 20 ++++++++++++++++
 researchers/web/models.py | 33 ++++++++++++++++++++++++++
 tests/test_agent.py       | 10 ++++++++
 tests/test_models.py      | 49 ++++++++++++++++++++++++++++++++++++++-
 4 files changed, 111 insertions(+), 1 deletion(-)

diff --git a/researchers/web/agent.py b/researchers/web/agent.py
index dbb2e59..e2944fc 100644
--- a/researchers/web/agent.py
+++ b/researchers/web/agent.py
@@ -19,6 +19,7 @@ from researchers.web.models import (
     DiscoveryEvent,
     Gap,
     GapCategory,
+    OpenQuestion,
     ResearchConstraints,
     ResearchResult,
 )
@@ -93,6 +94,14 @@ Produce a JSON object with these exact fields:
       "source_locator": "URL where you found this, or null"
     }}
   ],
+  "open_questions": [
+    {{
+      "question": "A follow-up question that emerged from the research",
+      "context": "What evidence prompted this question",
+      "priority": "high|medium|low",
+      "source_locator": "URL where this question arose, or null"
+    }}
+  ],
   "confidence": 0.0-1.0,
   "confidence_factors": {{
     "num_corroborating_sources": 0,
@@ -506,6 +515,16 @@ class WebResearcher:
                 for d in data.get("discovery_events", [])
             ]
 
+            open_questions = [
+                OpenQuestion(
+                    question=q.get("question", ""),
+                    context=q.get("context", ""),
+                    priority=q.get("priority", "medium"),
+                    source_locator=q.get("source_locator"),
+                )
+                for q in data.get("open_questions", [])
+            ]
+
             cf = data.get("confidence_factors", {})
             confidence_factors = ConfidenceFactors(
                 num_corroborating_sources=cf.get("num_corroborating_sources", 0),
@@ -521,6 +540,7 @@ class WebResearcher:
                 citations=citations,
                 gaps=gaps,
                 discovery_events=discovery_events,
+                open_questions=open_questions,
                 confidence=data.get("confidence", 0.5),
                 confidence_factors=confidence_factors,
                 cost_metadata=CostMetadata(
diff --git a/researchers/web/models.py b/researchers/web/models.py
index 56f95f8..07fb059 100644
--- a/researchers/web/models.py
+++ b/researchers/web/models.py
@@ -132,6 +132,35 @@ class DiscoveryEvent(BaseModel):
     )
 
 
+# ---------------------------------------------------------------------------
+# Output types — OpenQuestion
+# ---------------------------------------------------------------------------
+
+
+class OpenQuestion(BaseModel):
+    """A follow-up question that emerged from the research.
+
+    Distinct from gaps (what failed) and discovery events (what's lateral).
+    Open questions look forward: "based on what I found, this needs deeper
+    investigation." The PI uses these to decide whether to dispatch
+    additional research calls.
+    """
+
+    question: str = Field(
+        description="The follow-up question that emerged from the research.",
+    )
+    context: str = Field(
+        description="What evidence or finding prompted this question.",
+    )
+    priority: str = Field(
+        description="'high' (critical to answer quality), 'medium' (would improve answer), 'low' (nice to know).",
+    )
+    source_locator: Optional[str] = Field(
+        default=None,
+        description="URL or source where this question arose from.",
+    )
+
+
 # ---------------------------------------------------------------------------
 # Output types — Confidence
 # ---------------------------------------------------------------------------
@@ -215,6 +244,10 @@ class ResearchResult(BaseModel):
         default_factory=list,
         description="Lateral findings for other researchers.",
     )
+    open_questions: list[OpenQuestion] = Field(
+        default_factory=list,
+        description="Follow-up questions that emerged from the research.",
+    )
     confidence: float = Field(
         ge=0.0,
         le=1.0,
diff --git a/tests/test_agent.py b/tests/test_agent.py
index c3df74d..604a5e5 100644
--- a/tests/test_agent.py
+++ b/tests/test_agent.py
@@ -70,6 +70,14 @@ VALID_SYNTHESIS_JSON = json.dumps(
                 "source_locator": "https://example.com/ref",
             }
         ],
+        "open_questions": [
+            {
+                "question": "What is the optimal irrigation schedule for high-elevation potatoes?",
+                "context": "Multiple sources mention irrigation is critical but none specify schedules.",
+                "priority": "medium",
+                "source_locator": "https://example.com/utah-crops",
+            }
+        ],
         "confidence": 0.82,
         "confidence_factors": {
             "num_corroborating_sources": 3,
@@ -197,6 +205,8 @@ class TestWebResearcher:
             assert len(result.gaps) == 1
             assert result.gaps[0].category == "source_not_found"
             assert len(result.discovery_events) == 1
+            assert len(result.open_questions) == 1
+            assert "irrigation" in result.open_questions[0].question
             assert result.confidence == 0.82
             assert result.confidence_factors.num_corroborating_sources == 3
             assert result.cost_metadata.model_id == "claude-test"
diff --git a/tests/test_models.py b/tests/test_models.py
index 2580140..5ac2fea 100644
--- a/tests/test_models.py
+++ b/tests/test_models.py
@@ -13,6 +13,7 @@ from researchers.web.models import (
     DiscoveryEvent,
     Gap,
     GapCategory,
+    OpenQuestion,
     ResearchConstraints,
     ResearchResult,
 )
@@ -58,6 +59,17 @@ def make_discovery_event(**overrides) -> DiscoveryEvent:
     return DiscoveryEvent(**defaults)
 
 
+def make_open_question(**overrides) -> OpenQuestion:
+    defaults = {
+        "question": "What is the optimal irrigation schedule for high-elevation potatoes?",
+        "context": "Multiple sources mention irrigation is critical but none specify schedules.",
+        "priority": "medium",
+        "source_locator": "https://example.com/utah-crops",
+    }
+    defaults.update(overrides)
+    return OpenQuestion(**defaults)
+
+
 def make_confidence_factors(**overrides) -> ConfidenceFactors:
     defaults = {
         "num_corroborating_sources": 3,
@@ -89,6 +101,7 @@ def make_research_result(**overrides) -> ResearchResult:
         "citations": [make_citation()],
         "gaps": [make_gap()],
         "discovery_events": [make_discovery_event()],
+        "open_questions": [make_open_question()],
         "confidence": 0.82,
         "confidence_factors": make_confidence_factors(),
         "cost_metadata": make_cost_metadata(),
@@ -250,6 +263,33 @@ class TestDiscoveryEvent:
         assert e == e2
 
 
+# ---------------------------------------------------------------------------
+# OpenQuestion
+# ---------------------------------------------------------------------------
+
+
+class TestOpenQuestion:
+    def test_full_question(self):
+        q = make_open_question()
+        assert "irrigation" in q.question
+        assert q.priority == "medium"
+        assert q.source_locator is not None
+
+    def test_minimal_question(self):
+        q = OpenQuestion(
+            question="Is this viable?",
+            context="Found conflicting data.",
+            priority="low",
+        )
+        assert q.source_locator is None
+
+    def test_serialization_roundtrip(self):
+        q = make_open_question()
+        data = q.model_dump()
+        q2 = OpenQuestion(**data)
+        assert q == q2
+
+
 # ---------------------------------------------------------------------------
 # ConfidenceFactors
 # ---------------------------------------------------------------------------
@@ -325,16 +365,18 @@ class TestResearchResult:
         assert len(r.citations) == 1
         assert len(r.gaps) == 1
         assert len(r.discovery_events) == 1
+        assert len(r.open_questions) == 1
         assert 0.0 <= r.confidence <= 1.0
         assert r.cost_metadata.model_id == "claude-sonnet-4-6"
 
     def test_empty_lists_allowed(self):
         r = make_research_result(
-            citations=[], gaps=[], discovery_events=[]
+            citations=[], gaps=[], discovery_events=[], open_questions=[]
         )
         assert r.citations == []
         assert r.gaps == []
         assert r.discovery_events == []
+        assert r.open_questions == []
 
     def test_confidence_bounds(self):
         with pytest.raises(ValidationError):
@@ -358,6 +400,7 @@ class TestResearchResult:
             "citations",
             "gaps",
             "discovery_events",
+            "open_questions",
             "confidence",
             "confidence_factors",
             "cost_metadata",
@@ -365,6 +408,10 @@ class TestResearchResult:
         }
         assert set(data.keys()) == expected_keys
 
+        # OpenQuestion keys
+        oq_keys = {"question", "context", "priority", "source_locator"}
+        assert set(data["open_questions"][0].keys()) == oq_keys
+
         # Citation keys
         citation_keys = {
             "source",