From 1f81471ce8ae2e675e81da860fc710109bfa7ecd Mon Sep 17 00:00:00 2001 From: Jeff Smith Date: Wed, 8 Apr 2026 14:37:37 -0600 Subject: [PATCH] Add OpenQuestion to contract spec and examples - New section documenting OpenQuestion (question, context, priority, source_locator) - Direction table: gaps (backward), discovery_events (sideways), open_questions (forward) - Updated all 3 examples to include open_questions - Updated researcher rules (rule 5) and caller rules (rule 6) Co-Authored-By: Claude Haiku 4.5 --- ResearchContract.md | 81 ++++++++++++++++++++++++++++++++++++++++++--- 1 file changed, 76 insertions(+), 5 deletions(-) diff --git a/ResearchContract.md b/ResearchContract.md index 3751fed..c943bad 100644 --- a/ResearchContract.md +++ b/ResearchContract.md @@ -70,6 +70,7 @@ class ResearchResult: citations: List[Citation] # Sources used, with raw evidence gaps: List[Gap] # What couldn't be resolved (categorized) discovery_events: List[DiscoveryEvent] # Lateral findings for other researchers + open_questions: List[OpenQuestion] # Follow-up questions that emerged confidence: float # 0.0–1.0 overall confidence confidence_factors: ConfidenceFactors # What fed the confidence score cost_metadata: CostMetadata # Resource usage @@ -255,6 +256,57 @@ Example: --- +### `open_questions` (list of OpenQuestion objects) + +```python +@dataclass +class OpenQuestion: + question: str # The follow-up question that emerged + context: str # What evidence prompted this question + priority: str # "high", "medium", "low" + source_locator: Optional[str] # Where this question arose from +``` + +Open questions capture **forward-looking follow-ups** that emerged from the research itself. They are distinct from gaps (what failed) and discovery events (what's lateral): + +| Field | Direction | Meaning | +|:---|:---|:---| +| `gaps` | Backward | "I tried to find X but couldn't" | +| `discovery_events` | Sideways | "Another researcher should look at this" | +| `open_questions` | **Forward** | "Based on what I found, this needs deeper investigation" | + +Example: +```python +[ + OpenQuestion( + question="What is the optimal irrigation schedule for high-elevation potatoes?", + context="Multiple sources mention irrigation is critical but none specify schedules.", + priority="medium", + source_locator="https://extension.usu.edu/gardening/utah-crops" + ), + OpenQuestion( + question="How does Utah's soil salinity vary by county?", + context="Two sources referenced salinity as a limiting factor but with conflicting data.", + priority="high", + source_locator=None + ), +] +``` + +Priority levels: +- `"high"` — critical to answer quality; the PI should strongly consider dispatching follow-up research +- `"medium"` — would meaningfully improve the answer +- `"low"` — nice to know; not essential + +The PI uses open questions to feed a **dynamic priority queue** — deciding whether to go deeper on the current topic or move on. + +**Rules:** +- Each open question must be grounded in evidence encountered during research (not speculative) +- Questions should be specific and actionable (not vague like "learn more about Utah") +- The researcher should not attempt to answer its own open questions — that's the PI's job + +--- + ### `confidence` (float, 0.0–1.0) Overall confidence in the answer. Accompanied by `confidence_factors` to prevent "vibe check" scoring. @@ -381,11 +433,12 @@ The `content_hash` enables: 2. **Provide raw evidence.** Every citation must include a `raw_excerpt` copied verbatim from the source. 3. **Admit and categorize gaps.** If you can't find something, say so with the appropriate `GapCategory`. 4. **Report lateral discoveries.** If you encounter something relevant to another researcher's domain, emit a `DiscoveryEvent`. -5. **Respect budgets.** Stop iterating if `max_iterations` or `token_budget` is reached. Reflect in `budget_exhausted`. -6. **Ground claims.** Every factual claim in `answer` must link to at least one citation. -7. **Explain confidence.** Populate `confidence_factors` honestly; do not inflate scores. -8. **Hash fetched content.** Every URL/source fetch in the trace must include a `content_hash`. -9. **Handle failures gracefully.** If Tavily is down or a URL is broken, note it in `gaps` with the appropriate category and continue with what you have. +5. **Surface open questions.** If the research raises follow-up questions that need deeper investigation, emit `OpenQuestion` objects with priority levels. +6. **Respect budgets.** Stop iterating if `max_iterations` or `token_budget` is reached. Reflect in `budget_exhausted`. +7. **Ground claims.** Every factual claim in `answer` must link to at least one citation. +8. **Explain confidence.** Populate `confidence_factors` honestly; do not inflate scores. +9. **Hash fetched content.** Every URL/source fetch in the trace must include a `content_hash`. +10. **Handle failures gracefully.** If Tavily is down or a URL is broken, note it in `gaps` with the appropriate category and continue with what you have. ### The Caller (PI/CLI) Must @@ -394,6 +447,7 @@ The `content_hash` enables: 3. **Check raw_excerpts.** For important decisions, verify claims against `raw_excerpt` before acting. 4. **Process discovery_events.** Log them (V1) or dispatch additional researchers (V2+). 5. **Respect gap categories.** Use the category to decide the appropriate response (retry, re-dispatch, escalate, accept). +6. **Review open_questions.** Use priority levels to decide whether to dispatch deeper research or accept the current answer. --- @@ -425,6 +479,7 @@ Response: ], "gaps": [], "discovery_events": [], + "open_questions": [], "confidence": 0.99, "confidence_factors": { "num_corroborating_sources": 1, @@ -490,6 +545,14 @@ Response: "source_locator": "https://www.crunchbase.com/hub/crispr-startups" } ], + "open_questions": [ + { + "question": "What are the current Phase III CRISPR trial success rates?", + "context": "Multiple sources reference ongoing trials but none include outcome data.", + "priority": "high", + "source_locator": "https://www.crunchbase.com/hub/crispr-startups" + } + ], "confidence": 0.72, "confidence_factors": { "num_corroborating_sources": 3, @@ -559,6 +622,14 @@ Response: "source_locator": null } ], + "open_questions": [ + { + "question": "What caused the second AI winter to end?", + "context": "Found conflicting narratives about 1990s revival; needed more iterations to resolve.", + "priority": "medium", + "source_locator": null + } + ], "confidence": 0.55, "confidence_factors": { "num_corroborating_sources": 2,