diff --git a/cmd/engine/main.go b/cmd/engine/main.go
index edfc821..793238e 100644
--- a/cmd/engine/main.go
+++ b/cmd/engine/main.go
@@ -412,6 +412,9 @@ func buildPageIndexStrategy(c config.RetrievalConfig, client llmgate.Client, sto
 	if c.PageIndex.PageContentLimit > 0 {
 		p.PageContentLimit = c.PageIndex.PageContentLimit
 	}
+	if c.PageIndex.MaxCitations > 0 {
+		p.MaxCitations = c.PageIndex.MaxCitations
+	}
 	p.ModelOverride = c.PageIndex.Model
 	return p
 }
diff --git a/cmd/server/main.go b/cmd/server/main.go
index 2430862..63191d2 100644
--- a/cmd/server/main.go
+++ b/cmd/server/main.go
@@ -488,6 +488,9 @@ func buildPageIndexStrategy(c enginecfg.RetrievalConfig, client llmgate.Client,
 	if c.PageIndex.PageContentLimit > 0 {
 		p.PageContentLimit = c.PageIndex.PageContentLimit
 	}
+	if c.PageIndex.MaxCitations > 0 {
+		p.MaxCitations = c.PageIndex.MaxCitations
+	}
 	p.ModelOverride = c.PageIndex.Model
 	return p
 }
diff --git a/config.example.yaml b/config.example.yaml
index 6bec472..4a91048 100644
--- a/config.example.yaml
+++ b/config.example.yaml
@@ -286,6 +286,16 @@ retrieval:
     # flagship model's context window. Higher values risk burning
     # context budget on stray full-document fetches.
     page_content_limit: 16000
+    # Cap on how many distinct page ranges the FINAL answer may
+    # cite. A confidence backstop, not a navigation limit: the
+    # model may read as many pages as max_hops allows, but the
+    # citation set it commits to is bounded. FinanceBench data
+    # shows the failure mode is "spray ~5 low-confidence ranges ->
+    # miss all" while a single confident pick scores perfectly;
+    # capping the final set tames the spray. 3 keeps a genuinely
+    # multi-location answer (e.g. a figure + its footnote) while
+    # cutting the low-confidence tail. Env: VLE_/VLS_RETRIEVAL_PAGEINDEX_MAX_CITATIONS.
+    max_citations: 3
     # Override the navigation-loop model; empty inherits the
     # request's model (which itself falls back to the engine
     # default). Most deployments leave this blank — navigation
diff --git a/internal/api/pageindex.go b/internal/api/pageindex.go
index 92fd540..541799a 100644
--- a/internal/api/pageindex.go
+++ b/internal/api/pageindex.go
@@ -175,6 +175,7 @@ func (d Deps) handleAnswerPageIndex(w http.ResponseWriter, r *http.Request) {
 		"citations":   citations,
 		"strategy":    perReq.Name(),
 		"model":       budget.ModelName,
+		"confidence":  res.Confidence,
 		"hops_taken":  res.HopsTaken,
 		"usage": map[string]any{
 			"input_tokens":  res.Usage.InputTokens,
@@ -268,6 +269,7 @@ func (d Deps) serveAnswerPageIndexStream(w http.ResponseWriter, r *http.Request,
 		"citations":   citations,
 		"strategy":    strat.Name(),
 		"model":       budget.ModelName,
+		"confidence":  res.Confidence,
 		"hops_taken":  res.HopsTaken,
 		"usage": map[string]any{
 			"input_tokens":  res.Usage.InputTokens,
@@ -298,27 +300,27 @@ func (d Deps) buildPageIndexCitations(ctx context.Context, t *tree.Tree, res *re
 	if res == nil {
 		return nil
 	}
-	// Build a citation per UNIQUE page range present in PagesRead.
-	// The set of pages the model "read" is a superset of what it
-	// cited — some get_pages calls don't end up in the final
-	// cited_pages list — but the union is the right cone of trust
-	// to surface as evidence. The trace token is computed over
-	// only the strictly-cited ranges, which the strategy already
-	// has, so citation drift doesn't break replay.
-	seen := make(map[[2]int]struct{}, len(res.PagesRead))
-	citations := make([]map[string]any, 0, len(res.PagesRead))
-
-	for _, pr := range res.PagesRead {
-		key := [2]int{pr.StartPage, pr.EndPage}
-		if _, dup := seen[key]; dup {
-			continue
+	// Source of truth for citations is the FINAL cited-range set
+	// (res.CitedPages) — already deduped and capped to MaxCitations by
+	// the strategy. This is what makes a confident single-pick answer
+	// surface ONE citation even when it skimmed several pages to find
+	// it: PagesRead is the navigation footprint (a superset), not the
+	// commitment. Fall back to PagesRead only when nothing was cited
+	// (refusal) or the run was hop-capped without a done — the trace
+	// token is keyed on the cited ranges either way, so this never
+	// breaks replay.
+	sources := retrieval.CitationSources(res)
+	citations := make([]map[string]any, 0, len(sources))
+
+	for _, src := range sources {
+		sectionIDs := src.SectionIDs
+		if sectionIDs == nil {
+			sectionIDs = retrieval.SectionIDsOverlapping(t, src.Start, src.End)
 		}
-		seen[key] = struct{}{}
-
 		c := map[string]any{
-			"start_page":  pr.StartPage,
-			"end_page":    pr.EndPage,
-			"section_ids": pr.SectionIDs,
+			"start_page":  src.Start,
+			"end_page":    src.End,
+			"section_ids": sectionIDs,
 		}
 
 		// Quote extraction is best-effort: an LLM blip or empty
@@ -326,7 +328,7 @@ func (d Deps) buildPageIndexCitations(ctx context.Context, t *tree.Tree, res *re
 		// path. We materialise the cited content from storage and
 		// run one SpanExtractor call per citation.
 		if d.LLM != nil {
-			content := d.materialiseCitedContent(ctx, t, pr.SectionIDs)
+			content := d.materialiseCitedContent(ctx, t, sectionIDs)
 			if strings.TrimSpace(content) != "" {
 				ext := d.pageIndexSpanExtractor(requestModel)
 				span, _, err := ext.Extract(ctx, content, query)
diff --git a/internal/api/pageindex_test.go b/internal/api/pageindex_test.go
index d76bfa4..ca31cc7 100644
--- a/internal/api/pageindex_test.go
+++ b/internal/api/pageindex_test.go
@@ -261,6 +261,107 @@ func TestHandleAnswerPageIndexReasoningTrace(t *testing.T) {
 	}
 }
 
+// TestHandleAnswerPageIndexDedupAndCapCitations is the end-to-end
+// proof of the bench-facing fix: a done that sprays the SAME range
+// five times plus extras must produce a citations[] array that is
+// deduped and capped at MaxCitations — no duplicate page ranges, no
+// repeated section ids across citations, and confidence surfaced.
+// This is the API-layer mirror of the strategy's dedup test and the
+// reason precision@5 stops deflating.
+func TestHandleAnswerPageIndexDedupAndCapCitations(t *testing.T) {
+	t.Parallel()
+
+	// Read one range, then a done that cites [1,2] five times plus
+	// two more distinct ranges and a confidence. MaxCitations=3.
+	deps, _, _ := newTestDeps(t,
+		`{"tool":"get_pages","start_page":1,"end_page":2,"reasoning":"skim"}`,
+		`{"tool":"done","answer":"sprayed","confidence":0.8,"cited_pages":[[1,2],[1,2],[1,2],[1,2],[1,2],[3,4],[8,9]]}`,
+	)
+
+	body := strings.NewReader(`{"document_id":"doc_x","query":"q"}`)
+	req := httptest.NewRequest(http.MethodPost, "/v1/answer/pageindex", body)
+	rec := httptest.NewRecorder()
+	pageIndexHandlerRouter(deps).ServeHTTP(rec, req)
+	if rec.Code != http.StatusOK {
+		t.Fatalf("status = %d, body = %s", rec.Code, rec.Body.String())
+	}
+	var resp map[string]any
+	if err := json.Unmarshal(rec.Body.Bytes(), &resp); err != nil {
+		t.Fatalf("unmarshal: %v", err)
+	}
+
+	cits, ok := resp["citations"].([]any)
+	if !ok {
+		t.Fatalf("citations missing: %v", resp["citations"])
+	}
+	// Capped at MaxCitations=3, and every page range distinct.
+	if len(cits) > 3 {
+		t.Errorf("citations must be capped at 3, got %d", len(cits))
+	}
+	seenRange := map[[2]int]int{}
+	for _, raw := range cits {
+		c := raw.(map[string]any)
+		key := [2]int{int(c["start_page"].(float64)), int(c["end_page"].(float64))}
+		seenRange[key]++
+	}
+	for key, n := range seenRange {
+		if n > 1 {
+			t.Errorf("citation page range %v appears %d times; must be deduped", key, n)
+		}
+	}
+	// The three distinct ranges all survive (under the cap).
+	for _, want := range [][2]int{{1, 2}, {3, 4}, {8, 9}} {
+		if seenRange[want] == 0 {
+			t.Errorf("expected a citation for range %v, citations: %v", want, cits)
+		}
+	}
+	// confidence surfaces on the response.
+	if conf, ok := resp["confidence"].(float64); !ok || conf != 0.8 {
+		t.Errorf("response confidence = %v, want 0.8", resp["confidence"])
+	}
+}
+
+// TestHandleAnswerPageIndexConfidentSingleCitation is the happy half
+// at the API layer: a confident single-range done — even after the
+// model skimmed several pages — surfaces exactly ONE citation. This
+// is the f1=1.0 commit case, and the fix that stops a multi-page
+// navigation footprint from leaking into citations[].
+func TestHandleAnswerPageIndexConfidentSingleCitation(t *testing.T) {
+	t.Parallel()
+
+	// The model reads pages 1-2 AND 8-9 while searching, but commits
+	// to a single cited range [8,9]. citations[] must be just [8,9].
+	deps, _, _ := newTestDeps(t,
+		`{"tool":"get_pages","start_page":1,"end_page":2,"reasoning":"check setup"}`,
+		`{"tool":"get_pages","start_page":8,"end_page":9,"reasoning":"check debt"}`,
+		`{"tool":"done","answer":"Debt is on 8-9.","confidence":0.95,"cited_pages":[[8,9]],"reasoning":"clear"}`,
+	)
+
+	body := strings.NewReader(`{"document_id":"doc_x","query":"debt?"}`)
+	req := httptest.NewRequest(http.MethodPost, "/v1/answer/pageindex", body)
+	rec := httptest.NewRecorder()
+	pageIndexHandlerRouter(deps).ServeHTTP(rec, req)
+	if rec.Code != http.StatusOK {
+		t.Fatalf("status = %d, body = %s", rec.Code, rec.Body.String())
+	}
+	var resp map[string]any
+	_ = json.Unmarshal(rec.Body.Bytes(), &resp)
+
+	cits, ok := resp["citations"].([]any)
+	if !ok || len(cits) != 1 {
+		t.Fatalf("confident single pick must yield exactly ONE citation, got %v", resp["citations"])
+	}
+	first := cits[0].(map[string]any)
+	if first["start_page"].(float64) != 8 || first["end_page"].(float64) != 9 {
+		t.Errorf("citation = %v-%v, want 8-9", first["start_page"], first["end_page"])
+	}
+	// pages_read still records the full navigation footprint (both
+	// reads) — only citations[] is tightened to the commitment.
+	if pages, ok := resp["pages_read"].([]any); !ok || len(pages) != 2 {
+		t.Errorf("pages_read must keep the full footprint (2 reads), got %v", resp["pages_read"])
+	}
+}
+
 // TestHandleAnswerPageIndexReasoningTraceQueryParam: the
 // ?reasoning=true query param is an alternative to the body field.
 // Some clients prefer it for GET-friendliness when prototyping.
diff --git a/internal/config/config.go b/internal/config/config.go
index 8a2ed78..a44d5e8 100644
--- a/internal/config/config.go
+++ b/internal/config/config.go
@@ -326,6 +326,16 @@ func applyEnvOverrides(c *Config) {
 	if v := firstEnv("VLS_INGEST_MODE", "VLE_INGEST_MODE"); v != "" {
 		c.Engine.Ingest.Mode = v
 	}
+	// PageIndex final-citation cap. Forwarded so the spray backstop can
+	// be tuned on the live server without a config edit (e.g. tighten
+	// to 1 to force single-citation answers, or relax for a doc set
+	// with many genuinely multi-location questions). VLS_ wins over
+	// VLE_; a garbled or negative value leaves the engine default (3).
+	if v := firstEnv("VLS_RETRIEVAL_PAGEINDEX_MAX_CITATIONS", "VLE_RETRIEVAL_PAGEINDEX_MAX_CITATIONS"); v != "" {
+		if n, err := strconv.Atoi(v); err == nil && n >= 0 {
+			c.Engine.Retrieval.PageIndex.MaxCitations = n
+		}
+	}
 	// Total-parse timeout (seconds). Forwarded so the deployed server
 	// honours a tuned parse deadline without a secret/config edit — the
 	// outermost robustness valve against a parse that hangs (pre-LLM,
diff --git a/internal/config/config_pageindex_test.go b/internal/config/config_pageindex_test.go
new file mode 100644
index 0000000..326d8b9
--- /dev/null
+++ b/internal/config/config_pageindex_test.go
@@ -0,0 +1,54 @@
+package config
+
+import (
+	"os"
+	"testing"
+)
+
+// TestForwardPageIndexMaxCitations covers the server-config
+// pass-through for the PageIndex final-citation cap: the operator can
+// set it via VLS_ or VLE_ and have it reach the embedded engine
+// config, with VLS_ winning when both are present and a garbled value
+// preserving the engine default.
+func TestForwardPageIndexMaxCitations(t *testing.T) {
+	prevVLS := os.Getenv("VLS_RETRIEVAL_PAGEINDEX_MAX_CITATIONS")
+	prevVLE := os.Getenv("VLE_RETRIEVAL_PAGEINDEX_MAX_CITATIONS")
+	defer func() {
+		os.Setenv("VLS_RETRIEVAL_PAGEINDEX_MAX_CITATIONS", prevVLS)
+		os.Setenv("VLE_RETRIEVAL_PAGEINDEX_MAX_CITATIONS", prevVLE)
+	}()
+
+	// Engine default is 3 with no env set.
+	os.Unsetenv("VLS_RETRIEVAL_PAGEINDEX_MAX_CITATIONS")
+	os.Unsetenv("VLE_RETRIEVAL_PAGEINDEX_MAX_CITATIONS")
+	cfg := Default()
+	applyEnvOverrides(&cfg)
+	if cfg.Engine.Retrieval.PageIndex.MaxCitations != 3 {
+		t.Errorf("default max_citations = %d, want 3", cfg.Engine.Retrieval.PageIndex.MaxCitations)
+	}
+
+	// VLE_ alone forwards through.
+	os.Setenv("VLE_RETRIEVAL_PAGEINDEX_MAX_CITATIONS", "2")
+	cfg2 := Default()
+	applyEnvOverrides(&cfg2)
+	if cfg2.Engine.Retrieval.PageIndex.MaxCitations != 2 {
+		t.Errorf("VLE_ forward: max_citations = %d, want 2", cfg2.Engine.Retrieval.PageIndex.MaxCitations)
+	}
+
+	// VLS_ wins over VLE_.
+	os.Setenv("VLS_RETRIEVAL_PAGEINDEX_MAX_CITATIONS", "1")
+	cfg3 := Default()
+	applyEnvOverrides(&cfg3)
+	if cfg3.Engine.Retrieval.PageIndex.MaxCitations != 1 {
+		t.Errorf("VLS_ should win: max_citations = %d, want 1", cfg3.Engine.Retrieval.PageIndex.MaxCitations)
+	}
+
+	// Garbled value preserves the engine default (3), does not zero it.
+	os.Unsetenv("VLS_RETRIEVAL_PAGEINDEX_MAX_CITATIONS")
+	os.Setenv("VLE_RETRIEVAL_PAGEINDEX_MAX_CITATIONS", "heaps")
+	cfg4 := Default()
+	applyEnvOverrides(&cfg4)
+	if cfg4.Engine.Retrieval.PageIndex.MaxCitations != 3 {
+		t.Errorf("garbage value should preserve default 3, got %d", cfg4.Engine.Retrieval.PageIndex.MaxCitations)
+	}
+}
diff --git a/internal/handler/answer_pageindex.go b/internal/handler/answer_pageindex.go
index fb207f0..3d890d5 100644
--- a/internal/handler/answer_pageindex.go
+++ b/internal/handler/answer_pageindex.go
@@ -200,6 +200,7 @@ func (h *AnswerPageIndexHandler) HandleAnswerPageIndex(w http.ResponseWriter, r
 		"citations":   citations,
 		"strategy":    perReq.Name(),
 		"model":       budget.ModelName,
+		"confidence":  res.Confidence,
 		"hops_taken":  res.HopsTaken,
 		"usage": map[string]any{
 			"input_tokens":  res.Usage.InputTokens,
@@ -281,6 +282,7 @@ func (h *AnswerPageIndexHandler) serveStream(w http.ResponseWriter, r *http.Requ
 		"citations":   citations,
 		"strategy":    strat.Name(),
 		"model":       budget.ModelName,
+		"confidence":  res.Confidence,
 		"hops_taken":  res.HopsTaken,
 		"usage": map[string]any{
 			"input_tokens":  res.Usage.InputTokens,
@@ -296,32 +298,33 @@ func (h *AnswerPageIndexHandler) serveStream(w http.ResponseWriter, r *http.Requ
 	emitSSE("answer", final)
 }
 
-// buildCitations transforms the strategy's PagesRead + the section
-// tree into the response's citations array: one citation per unique
-// cited page range, each carrying the overlapping section IDs and a
-// best-effort grounding quote extracted from the cited content.
+// buildCitations transforms the strategy's FINAL cited ranges + the
+// section tree into the response's citations array: one citation per
+// cited page range (deduped + capped by the strategy), each carrying
+// the overlapping section IDs and a best-effort grounding quote
+// extracted from the cited content. Falls back to the PagesRead
+// footprint when nothing was cited (refusal / hop-capped run) so the
+// caller still sees where the model looked.
 func (h *AnswerPageIndexHandler) buildCitations(ctx context.Context, t *tree.Tree, res *retrieval.Result, query, requestModel string) []map[string]any {
 	if res == nil {
 		return nil
 	}
-	seen := make(map[[2]int]struct{}, len(res.PagesRead))
-	citations := make([]map[string]any, 0, len(res.PagesRead))
+	sources := retrieval.CitationSources(res)
+	citations := make([]map[string]any, 0, len(sources))
 
-	for _, pr := range res.PagesRead {
-		key := [2]int{pr.StartPage, pr.EndPage}
-		if _, dup := seen[key]; dup {
-			continue
+	for _, src := range sources {
+		sectionIDs := src.SectionIDs
+		if sectionIDs == nil {
+			sectionIDs = retrieval.SectionIDsOverlapping(t, src.Start, src.End)
 		}
-		seen[key] = struct{}{}
-
 		c := map[string]any{
-			"start_page":  pr.StartPage,
-			"end_page":    pr.EndPage,
-			"section_ids": pr.SectionIDs,
+			"start_page":  src.Start,
+			"end_page":    src.End,
+			"section_ids": sectionIDs,
 		}
 
 		if h.llm != nil {
-			content := h.materialiseCitedContent(ctx, t, pr.SectionIDs)
+			content := h.materialiseCitedContent(ctx, t, sectionIDs)
 			if strings.TrimSpace(content) != "" {
 				ext := h.spanExtractor(requestModel)
 				span, _, err := ext.Extract(ctx, content, query)
diff --git a/openapi.yaml b/openapi.yaml
index ddd8022..511ab1b 100644
--- a/openapi.yaml
+++ b/openapi.yaml
@@ -1118,6 +1118,20 @@ components:
           enum: [pageindex]
         model:
           type: string
+        confidence:
+          type: number
+          format: float
+          minimum: 0
+          maximum: 1
+          description: |
+            The agent's self-reported confidence in the answer +
+            citation set, in [0,1], taken from the terminal `done`
+            tool call. 0 means the model did not report one (it is
+            NOT a "definitely wrong" signal). Surfaced so callers
+            can treat a low-confidence answer more cautiously; it
+            does not suppress the answer — even an unsure agent
+            still returns its single best citation rather than a
+            spray of hedged ones.
         hops_taken:
           type: integer
           description: |
diff --git a/pkg/config/config.go b/pkg/config/config.go
index 8a6b30e..bfca023 100644
--- a/pkg/config/config.go
+++ b/pkg/config/config.go
@@ -458,6 +458,16 @@ type PageIndexBlock struct {
 	// fetch from torching the model's context window.
 	PageContentLimit int `yaml:"page_content_limit"`
 
+	// MaxCitations caps how many distinct page ranges the final
+	// answer may cite. Default: 3. Set to 0 to use the strategy's
+	// built-in default. This is a confidence backstop, not a
+	// navigation limit: the model may read as many pages as MaxHops
+	// allows, but the citation set it commits to is bounded. The cap
+	// mechanically tames the "spray many low-confidence ranges →
+	// miss all" failure mode FinanceBench surfaced, while still
+	// allowing a genuinely multi-location answer to cite two or three.
+	MaxCitations int `yaml:"max_citations"`
+
 	// Model overrides the LLM model used for the navigation loop.
 	// Empty means use the request's model (which itself falls back
 	// to the engine default). Useful when navigation should run on
@@ -743,6 +753,7 @@ func Default() Config {
 				Enabled:          true,
 				MaxHops:          8,
 				PageContentLimit: 16000,
+				MaxCitations:     3,
 			},
 		},
 		Ingest: IngestConfig{
@@ -1132,6 +1143,14 @@ func applyEnvOverrides(c *Config) {
 			c.Retrieval.PageIndex.PageContentLimit = n
 		}
 	}
+	// MaxCitations accepts both the VLE_ and VLS_ prefixes so the
+	// deploy layer (which forwards VLS_*) and local VLE_* envs both
+	// reach it. A garbled or negative value preserves the default.
+	if v := firstEnv("VLE_RETRIEVAL_PAGEINDEX_MAX_CITATIONS", "VLS_RETRIEVAL_PAGEINDEX_MAX_CITATIONS"); v != "" {
+		if n, err := strconv.Atoi(v); err == nil && n >= 0 {
+			c.Retrieval.PageIndex.MaxCitations = n
+		}
+	}
 	if v := os.Getenv("VLE_RETRIEVAL_PAGEINDEX_MODEL"); v != "" {
 		c.Retrieval.PageIndex.Model = v
 	}
@@ -1296,6 +1315,9 @@ func (c Config) Validate() error {
 	if c.Retrieval.PageIndex.PageContentLimit < 0 {
 		return fmt.Errorf("retrieval.pageindex.page_content_limit must be >= 0, got %d", c.Retrieval.PageIndex.PageContentLimit)
 	}
+	if c.Retrieval.PageIndex.MaxCitations < 0 {
+		return fmt.Errorf("retrieval.pageindex.max_citations must be >= 0, got %d", c.Retrieval.PageIndex.MaxCitations)
+	}
 
 	return nil
 }
diff --git a/pkg/config/config_test.go b/pkg/config/config_test.go
index 8830dde..5d4e920 100644
--- a/pkg/config/config_test.go
+++ b/pkg/config/config_test.go
@@ -684,6 +684,9 @@ func TestPageIndexDefaults(t *testing.T) {
 	if cfg.Retrieval.PageIndex.PageContentLimit != 16000 {
 		t.Errorf("page_content_limit = %d, want 16000", cfg.Retrieval.PageIndex.PageContentLimit)
 	}
+	if cfg.Retrieval.PageIndex.MaxCitations != 3 {
+		t.Errorf("max_citations = %d, want 3", cfg.Retrieval.PageIndex.MaxCitations)
+	}
 	if cfg.Retrieval.PageIndex.Model != "" {
 		t.Errorf("model default should be empty (inherit), got %q", cfg.Retrieval.PageIndex.Model)
 	}
@@ -695,17 +698,20 @@ func TestPageIndexEnvOverride(t *testing.T) {
 	prevEnabled := os.Getenv("VLE_RETRIEVAL_PAGEINDEX_ENABLED")
 	prevHops := os.Getenv("VLE_RETRIEVAL_PAGEINDEX_MAX_HOPS")
 	prevLimit := os.Getenv("VLE_RETRIEVAL_PAGEINDEX_PAGE_CONTENT_LIMIT")
+	prevCits := os.Getenv("VLE_RETRIEVAL_PAGEINDEX_MAX_CITATIONS")
 	prevModel := os.Getenv("VLE_RETRIEVAL_PAGEINDEX_MODEL")
 	defer func() {
 		os.Setenv("VLE_RETRIEVAL_PAGEINDEX_ENABLED", prevEnabled)
 		os.Setenv("VLE_RETRIEVAL_PAGEINDEX_MAX_HOPS", prevHops)
 		os.Setenv("VLE_RETRIEVAL_PAGEINDEX_PAGE_CONTENT_LIMIT", prevLimit)
+		os.Setenv("VLE_RETRIEVAL_PAGEINDEX_MAX_CITATIONS", prevCits)
 		os.Setenv("VLE_RETRIEVAL_PAGEINDEX_MODEL", prevModel)
 	}()
 
 	os.Setenv("VLE_RETRIEVAL_PAGEINDEX_ENABLED", "false")
 	os.Setenv("VLE_RETRIEVAL_PAGEINDEX_MAX_HOPS", "12")
 	os.Setenv("VLE_RETRIEVAL_PAGEINDEX_PAGE_CONTENT_LIMIT", "32000")
+	os.Setenv("VLE_RETRIEVAL_PAGEINDEX_MAX_CITATIONS", "5")
 	os.Setenv("VLE_RETRIEVAL_PAGEINDEX_MODEL", "gemini-2.0-flash")
 
 	cfg := Default()
@@ -720,11 +726,43 @@ func TestPageIndexEnvOverride(t *testing.T) {
 	if cfg.Retrieval.PageIndex.PageContentLimit != 32000 {
 		t.Errorf("page_content_limit = %d, want 32000", cfg.Retrieval.PageIndex.PageContentLimit)
 	}
+	if cfg.Retrieval.PageIndex.MaxCitations != 5 {
+		t.Errorf("max_citations = %d, want 5", cfg.Retrieval.PageIndex.MaxCitations)
+	}
 	if cfg.Retrieval.PageIndex.Model != "gemini-2.0-flash" {
 		t.Errorf("model = %q, want gemini-2.0-flash", cfg.Retrieval.PageIndex.Model)
 	}
 }
 
+// TestPageIndexMaxCitationsVLSAlias: the VLS_ prefix reaches
+// MaxCitations too (the deploy layer forwards VLS_*), and VLE_ wins
+// when both are set.
+func TestPageIndexMaxCitationsVLSAlias(t *testing.T) {
+	prevVLE := os.Getenv("VLE_RETRIEVAL_PAGEINDEX_MAX_CITATIONS")
+	prevVLS := os.Getenv("VLS_RETRIEVAL_PAGEINDEX_MAX_CITATIONS")
+	defer func() {
+		os.Setenv("VLE_RETRIEVAL_PAGEINDEX_MAX_CITATIONS", prevVLE)
+		os.Setenv("VLS_RETRIEVAL_PAGEINDEX_MAX_CITATIONS", prevVLS)
+	}()
+
+	// VLS_ alone reaches the field.
+	os.Unsetenv("VLE_RETRIEVAL_PAGEINDEX_MAX_CITATIONS")
+	os.Setenv("VLS_RETRIEVAL_PAGEINDEX_MAX_CITATIONS", "2")
+	cfg := Default()
+	applyEnvOverrides(&cfg)
+	if cfg.Retrieval.PageIndex.MaxCitations != 2 {
+		t.Errorf("VLS_ alias: max_citations = %d, want 2", cfg.Retrieval.PageIndex.MaxCitations)
+	}
+
+	// VLE_ wins when both are set.
+	os.Setenv("VLE_RETRIEVAL_PAGEINDEX_MAX_CITATIONS", "4")
+	cfg2 := Default()
+	applyEnvOverrides(&cfg2)
+	if cfg2.Retrieval.PageIndex.MaxCitations != 4 {
+		t.Errorf("VLE_ should win over VLS_: max_citations = %d, want 4", cfg2.Retrieval.PageIndex.MaxCitations)
+	}
+}
+
 // TestPageIndexEnvOverrideEnable: toggle on from an explicitly
 // disabled state.
 func TestPageIndexEnvOverrideEnable(t *testing.T) {
@@ -745,13 +783,16 @@ func TestPageIndexEnvOverrideEnable(t *testing.T) {
 func TestPageIndexEnvOverrideRejectsBad(t *testing.T) {
 	prevHops := os.Getenv("VLE_RETRIEVAL_PAGEINDEX_MAX_HOPS")
 	prevLimit := os.Getenv("VLE_RETRIEVAL_PAGEINDEX_PAGE_CONTENT_LIMIT")
+	prevCits := os.Getenv("VLE_RETRIEVAL_PAGEINDEX_MAX_CITATIONS")
 	defer func() {
 		os.Setenv("VLE_RETRIEVAL_PAGEINDEX_MAX_HOPS", prevHops)
 		os.Setenv("VLE_RETRIEVAL_PAGEINDEX_PAGE_CONTENT_LIMIT", prevLimit)
+		os.Setenv("VLE_RETRIEVAL_PAGEINDEX_MAX_CITATIONS", prevCits)
 	}()
 
 	os.Setenv("VLE_RETRIEVAL_PAGEINDEX_MAX_HOPS", "abc")
 	os.Setenv("VLE_RETRIEVAL_PAGEINDEX_PAGE_CONTENT_LIMIT", "not-a-number")
+	os.Setenv("VLE_RETRIEVAL_PAGEINDEX_MAX_CITATIONS", "lots")
 
 	cfg := Default()
 	applyEnvOverrides(&cfg)
@@ -761,6 +802,9 @@ func TestPageIndexEnvOverrideRejectsBad(t *testing.T) {
 	if cfg.Retrieval.PageIndex.PageContentLimit != 16000 {
 		t.Errorf("garbage page_content_limit env should preserve default, got %d", cfg.Retrieval.PageIndex.PageContentLimit)
 	}
+	if cfg.Retrieval.PageIndex.MaxCitations != 3 {
+		t.Errorf("garbage max_citations env should preserve default 3, got %d", cfg.Retrieval.PageIndex.MaxCitations)
+	}
 }
 
 // TestValidatePageIndexNegatives: negatives rejected by Validate.
@@ -780,10 +824,18 @@ func TestValidatePageIndexNegatives(t *testing.T) {
 		t.Error("negative page_content_limit should fail validation")
 	}
 
+	cfgCits := Default()
+	cfgCits.Database.URL = "postgres://localhost/test"
+	cfgCits.Retrieval.PageIndex.MaxCitations = -1
+	if err := cfgCits.Validate(); err == nil {
+		t.Error("negative max_citations should fail validation")
+	}
+
 	cfg3 := Default()
 	cfg3.Database.URL = "postgres://localhost/test"
 	cfg3.Retrieval.PageIndex.MaxHops = 0
 	cfg3.Retrieval.PageIndex.PageContentLimit = 0
+	cfg3.Retrieval.PageIndex.MaxCitations = 0
 	if err := cfg3.Validate(); err != nil {
 		t.Errorf("zero values should pass (defaults applied at runtime): %v", err)
 	}
diff --git a/pkg/retrieval/pageindex_strategy.go b/pkg/retrieval/pageindex_strategy.go
index 6df2ed2..17411d2 100644
--- a/pkg/retrieval/pageindex_strategy.go
+++ b/pkg/retrieval/pageindex_strategy.go
@@ -73,6 +73,19 @@ type PageIndexStrategy struct {
 	// easily blow past 200K chars otherwise.
 	PageContentLimit int
 
+	// MaxCitations caps how many distinct page ranges the FINAL done
+	// action may cite. Zero means use defaultPageIndexMaxCitations.
+	//
+	// This is a confidence backstop, not a navigation limit: the
+	// model is free to read as many pages as MaxHops allows, but the
+	// citation set it commits to is bounded. FinanceBench data shows
+	// the loss mode is "spray ~5 low-confidence ranges → all miss"
+	// while a single confident pick scores f1=1.0; capping the final
+	// set mechanically tames the spray even when the prompt doesn't
+	// fully. When done returns more ranges than the cap, the top-N
+	// by confidence (ties broken by first-seen order) are kept.
+	MaxCitations int
+
 	// ModelOverride, if non-empty, replaces the budget's ModelName
 	// for every turn. Useful for routing the navigation loop to a
 	// cheaper or faster model than the rest of the engine.
@@ -106,7 +119,13 @@ type PageIndexEvent struct {
 	SectionIDs []tree.SectionID `json:"section_ids,omitempty"`
 	Answer     string           `json:"answer,omitempty"`
 	CitedPages [][2]int         `json:"cited_pages,omitempty"`
-	Note       string           `json:"note,omitempty"`
+	// Confidence is the model's self-reported confidence in the FINAL
+	// answer + citation set, in [0,1]. Populated only on the "done"
+	// event. Surfaced so SSE/trace consumers can show how sure the
+	// agent was — and, downstream, so a low-confidence answer can be
+	// treated more cautiously than a confident one.
+	Confidence float64 `json:"confidence,omitempty"`
+	Note       string  `json:"note,omitempty"`
 }
 
 // defaultPageIndexMaxHops bounds the loop. Eight turns is enough for
@@ -121,6 +140,16 @@ const defaultPageIndexMaxHops = 8
 // excerpt. Matches PageIndex's reference behaviour.
 const defaultPageContentLimit = 16000
 
+// defaultPageIndexMaxCitations bounds the FINAL cited-range set. Three
+// is generous for the answer-spans-one-place common case (where ONE is
+// ideal) while still allowing a genuinely multi-location answer (e.g. a
+// 10-K figure cross-referenced between the income statement and a
+// footnote) to cite two or three distinct ranges. The FinanceBench
+// signal that motivated the cap: confident single-pick = f1 1.0,
+// 5-range spray = f1 0. Capping at 3 keeps the legitimate multi-range
+// case while removing the long tail of low-confidence noise.
+const defaultPageIndexMaxCitations = 3
+
 // strategyNamePageIndex is the stable identifier for config
 // (retrieval.strategy: pageindex) and telemetry.
 const strategyNamePageIndex = "pageindex"
@@ -170,6 +199,7 @@ func NewPageIndexStrategy(client llmgate.Client) *PageIndexStrategy {
 		LLM:              client,
 		MaxHops:          defaultPageIndexMaxHops,
 		PageContentLimit: defaultPageContentLimit,
+		MaxCitations:     defaultPageIndexMaxCitations,
 	}
 }
 
@@ -214,6 +244,10 @@ func (s *PageIndexStrategy) SelectWithCost(ctx context.Context, t *tree.Tree, qu
 	if pageLimit <= 0 {
 		pageLimit = defaultPageContentLimit
 	}
+	maxCitations := s.MaxCitations
+	if maxCitations <= 0 {
+		maxCitations = defaultPageIndexMaxCitations
+	}
 
 	// Pre-flatten the tree into an ordinal section list ordered by
 	// page. The get_pages observation iterates this twice per call;
@@ -280,17 +314,28 @@ func (s *PageIndexStrategy) SelectWithCost(ctx context.Context, t *tree.Tree, qu
 		case pageActionDone:
 			finalAnswer = strings.TrimSpace(action.Answer)
 			finalReasoning = strings.TrimSpace(action.Reasoning)
-			citedRanges = normaliseRanges(action.CitedPages, maxPage)
+			// Confidence-gate the final citation set: clamp + dedup the
+			// model's cited ranges, then cap to maxCitations keeping the
+			// highest-confidence (ties → first-seen) ranges. This is the
+			// fix for the "spray 5 low-confidence ranges, hit none"
+			// failure mode — a confident single pick survives untouched,
+			// a spray is collapsed to its best few.
+			citedRanges = selectCitedRanges(action.CitedPages, action.CitedConfidences, maxPage, maxCitations)
+			confidence := clampConfidence(action.Confidence)
 			selectedIDs := sectionsOverlapping(sections, citedRanges)
 			s.emit(PageIndexEvent{
 				Hop:        hopsTaken,
 				Type:       pageActionDone,
 				Reasoning:  finalReasoning,
 				Answer:     finalAnswer,
-				CitedPages: action.CitedPages,
+				CitedPages: rangesToPairs(citedRanges),
+				Confidence: confidence,
 			})
 			return &Result{
 				SelectedIDs: selectedIDs,
+				Confidences: confidenceMap(selectedIDs, confidence),
+				Confidence:  confidence,
+				CitedPages:  rangesToPairs(citedRanges),
 				Reasoning:   finalAnswer, // /v1/answer/pageindex reads this
 				ModelUsed:   model,
 				Usage:       totalUsage,
@@ -373,12 +418,16 @@ func (s *PageIndexStrategy) SelectWithCost(ctx context.Context, t *tree.Tree, qu
 	// ignores the rule, we return whatever pages have been read so
 	// the caller at least sees the navigation footprint and an empty
 	// answer rather than a 500.
-	finalAnswer, finalReasoning, citedRanges = s.forceDone(ctx, &msgs, &totalUsage, &hopsTaken, model, maxPage)
+	var forcedConfidence float64
+	finalAnswer, finalReasoning, citedRanges, forcedConfidence = s.forceDone(ctx, &msgs, &totalUsage, &hopsTaken, model, maxPage, maxCitations)
 	selectedIDs := sectionsOverlapping(sections, citedRanges)
 	log.Printf("retrieval: pageindex strategy hit max_hops=%d; forced done", maxHops)
 	_ = finalReasoning
 	return &Result{
 		SelectedIDs: selectedIDs,
+		Confidences: confidenceMap(selectedIDs, forcedConfidence),
+		Confidence:  forcedConfidence,
+		CitedPages:  rangesToPairs(citedRanges),
 		Reasoning:   finalAnswer,
 		ModelUsed:   model,
 		Usage:       totalUsage,
@@ -544,13 +593,14 @@ func (s *PageIndexStrategy) loadSectionBody(ctx context.Context, sec sectionPage
 
 // forceDone runs one final hop with a hard "emit done NOW" prompt so
 // the loop can exit gracefully on a stubborn model. Returns
-// (answer, reasoning, cited_ranges). When the model still doesn't
-// emit a valid done action, the empty values flow back and the
-// caller sees a hop-capped Result.
-func (s *PageIndexStrategy) forceDone(ctx context.Context, msgs *[]llmgate.Message, totalUsage *Usage, hopsTaken *int, model string, maxPage int) (string, string, []pageRange) {
+// (answer, reasoning, cited_ranges, confidence). When the model still
+// doesn't emit a valid done action, the empty values flow back and the
+// caller sees a hop-capped Result. The forced citation set is gated
+// through the same dedup + cap as the normal done path.
+func (s *PageIndexStrategy) forceDone(ctx context.Context, msgs *[]llmgate.Message, totalUsage *Usage, hopsTaken *int, model string, maxPage, maxCitations int) (string, string, []pageRange, float64) {
 	*msgs = append(*msgs, llmgate.Message{
 		Role:    llmgate.RoleUser,
-		Content: "You have used your tool-call budget. Reply NOW with one JSON object: {\"tool\":\"done\",\"answer\":\"<your best answer\",\"cited_pages\":[[start,end],...],\"reasoning\":\"why\"}. Do not call any more tools. Do not emit prose.",
+		Content: "You have used your tool-call budget. Reply NOW with one JSON object: {\"tool\":\"done\",\"answer\":\"<your best answer\",\"cited_pages\":[[start,end]],\"confidence\":0.0-1.0,\"reasoning\":\"why\"}. Cite the SINGLE best page range unless the answer truly spans more than one. Do not call any more tools. Do not emit prose.",
 	})
 	req := llmgate.Request{
 		Model:       model,
@@ -560,7 +610,7 @@ func (s *PageIndexStrategy) forceDone(ctx context.Context, msgs *[]llmgate.Messa
 	}
 	resp, err := s.LLM.Complete(ctx, req)
 	if err != nil {
-		return "", "", nil
+		return "", "", nil, 0
 	}
 	*hopsTaken++
 	totalUsage.Add(Usage{
@@ -572,9 +622,12 @@ func (s *PageIndexStrategy) forceDone(ctx context.Context, msgs *[]llmgate.Messa
 	})
 	action, err := ParsePageIndexAction(resp.Content)
 	if err != nil || action.Action != pageActionDone {
-		return "", "", nil
+		return "", "", nil, 0
 	}
-	return strings.TrimSpace(action.Answer), strings.TrimSpace(action.Reasoning), normaliseRanges(action.CitedPages, maxPage)
+	return strings.TrimSpace(action.Answer),
+		strings.TrimSpace(action.Reasoning),
+		selectCitedRanges(action.CitedPages, action.CitedConfidences, maxPage, maxCitations),
+		clampConfidence(action.Confidence)
 }
 
 // --- TOC synthesis ---
@@ -757,29 +810,85 @@ func clampRange(start, end, maxPage int) (int, int, bool) {
 	return start, end, true
 }
 
-// normaliseRanges collapses raw model-emitted ranges (which may be
-// flipped, zero-pages, or duplicated) into a sorted, deduplicated
-// list of valid inclusive ranges clamped to [1,maxPage]. Bad ranges
-// are silently dropped — the trace token must compute over a stable
-// canonical form regardless of how the model orders its citations.
-func normaliseRanges(raw [][2]int, maxPage int) []pageRange {
+// selectCitedRanges is the confidence-gated finaliser for a done
+// action's cited pages. It is the single chokepoint every terminal
+// citation set flows through (normal done + forced done), and it does
+// three things in order:
+//
+//  1. Clamp + DEDUP. Each raw [start,end] is clamped to [1,maxPage];
+//     unusable ranges are dropped; exact duplicates collapse to one.
+//     This alone fixes the precision-deflation bug where a model
+//     emitted the same range (and thus the same section id) five
+//     times — the dedup makes "[[3,5],[3,5],[3,5]]" a single citation.
+//
+//  2. CAP to maxCitations. When more distinct ranges survive than the
+//     cap allows, keep the top-N. Ranking key: per-range confidence
+//     when supplied (descending), else the model's own emission order
+//     (it is instructed to list its single best range first). This is
+//     the mechanical backstop against "spraying": even if the prompt
+//     fails to make the model commit, the engine commits for it.
+//
+//  3. Canonicalise. The kept ranges are returned sorted by page so the
+//     trace token (which hashes the range strings) is stable across
+//     runs that cite the same pages in any order.
+//
+// maxCitations <= 0 disables the cap (dedup still applies); callers
+// pass a resolved positive value, so the <=0 escape hatch only matters
+// to direct unit tests of this helper.
+func selectCitedRanges(raw [][2]int, confidences []float64, maxPage, maxCitations int) []pageRange {
 	if len(raw) == 0 {
 		return nil
 	}
-	seen := make(map[pageRange]struct{}, len(raw))
-	out := make([]pageRange, 0, len(raw))
-	for _, r := range raw {
-		s, e, ok := clampRange(r[0], r[1], maxPage)
+	type scored struct {
+		r     pageRange
+		conf  float64
+		order int
+	}
+	seen := make(map[pageRange]int, len(raw)) // range → index in kept
+	kept := make([]scored, 0, len(raw))
+	for i, rr := range raw {
+		s, e, ok := clampRange(rr[0], rr[1], maxPage)
 		if !ok {
 			continue
 		}
 		pr := pageRange{Start: s, End: e}
-		if _, dup := seen[pr]; dup {
+		conf := 0.0
+		if i < len(confidences) {
+			conf = clampConfidence(confidences[i])
+		}
+		if idx, dup := seen[pr]; dup {
+			// Keep the higher confidence for a repeated range so a
+			// dedup never discards the model's strongest signal.
+			if conf > kept[idx].conf {
+				kept[idx].conf = conf
+			}
 			continue
 		}
-		seen[pr] = struct{}{}
-		out = append(out, pr)
+		seen[pr] = len(kept)
+		kept = append(kept, scored{r: pr, conf: conf, order: i})
 	}
+	if len(kept) == 0 {
+		return nil
+	}
+
+	// Cap: when the distinct set overflows, keep the top-N by
+	// confidence, breaking ties by first-seen order so the result is
+	// deterministic and respects the model's own prioritisation.
+	if maxCitations > 0 && len(kept) > maxCitations {
+		sort.SliceStable(kept, func(i, j int) bool {
+			if kept[i].conf != kept[j].conf {
+				return kept[i].conf > kept[j].conf
+			}
+			return kept[i].order < kept[j].order
+		})
+		kept = kept[:maxCitations]
+	}
+
+	out := make([]pageRange, len(kept))
+	for i, k := range kept {
+		out[i] = k.r
+	}
+	// Canonical page order for a stable trace token.
 	sort.Slice(out, func(i, j int) bool {
 		if out[i].Start != out[j].Start {
 			return out[i].Start < out[j].Start
@@ -789,6 +898,121 @@ func normaliseRanges(raw [][2]int, maxPage int) []pageRange {
 	return out
 }
 
+// clampConfidence coerces a model-emitted confidence into [0,1]. Out
+// of range or NaN-ish values clamp to the nearest bound; a missing
+// confidence arrives as 0 and stays 0 (treated as "unstated", not
+// "certainly wrong" — the answer is still returned).
+func clampConfidence(c float64) float64 {
+	if c < 0 {
+		return 0
+	}
+	if c > 1 {
+		return 1
+	}
+	return c
+}
+
+// confidenceMap projects a single overall confidence across every
+// selected section ID so the API layer's existing per-pick confidence
+// surface (Result.Confidences) and abstention machinery can read it
+// uniformly with the section-based strategies. Returns nil when there
+// is no signal to surface (no IDs, or confidence unstated) so callers
+// that gate on a non-empty map behave exactly as before for runs that
+// don't carry confidence.
+func confidenceMap(ids []tree.SectionID, confidence float64) map[tree.SectionID]float64 {
+	if len(ids) == 0 || confidence <= 0 {
+		return nil
+	}
+	m := make(map[tree.SectionID]float64, len(ids))
+	for _, id := range ids {
+		m[id] = confidence
+	}
+	return m
+}
+
+// rangesToPairs flattens []pageRange back to the [][2]int wire shape
+// used by PageIndexEvent.CitedPages, so the done event reports the
+// FINAL (deduped, capped) citation set rather than the raw model
+// output. Consumers building citations from the event therefore never
+// see the pre-dedup spray.
+func rangesToPairs(ranges []pageRange) [][2]int {
+	if len(ranges) == 0 {
+		return nil
+	}
+	out := make([][2]int, len(ranges))
+	for i, r := range ranges {
+		out[i] = [2]int{r.Start, r.End}
+	}
+	return out
+}
+
+// CitationSource is one resolved citation the API layer renders into
+// the response's citations[] array. Start/End are the inclusive page
+// range; SectionIDs are the sections whose page range overlaps it
+// (nil when the caller must compute them from the tree). It is the
+// shared shape both /v1/answer/pageindex implementations build from so
+// citation construction stays identical across the two API layers.
+type CitationSource struct {
+	Start      int
+	End        int
+	SectionIDs []tree.SectionID
+}
+
+// CitationSources resolves which page ranges a page-based Result
+// should surface as citations, in render order. It prefers the FINAL
+// cited-range set (res.CitedPages — deduped + capped by the strategy)
+// so a confident single-pick answer yields ONE citation. When nothing
+// was cited (a refusal, or a hop-capped run with no valid done) it
+// falls back to the unique ranges in PagesRead so the caller still
+// sees the navigation footprint rather than an empty citations array.
+//
+// In the cited-ranges path SectionIDs is left nil (the caller resolves
+// it from the tree via SectionIDsOverlapping); in the PagesRead
+// fallback the per-read SectionIDs the strategy already recorded are
+// reused directly.
+func CitationSources(res *Result) []CitationSource {
+	if res == nil {
+		return nil
+	}
+	if len(res.CitedPages) > 0 {
+		out := make([]CitationSource, 0, len(res.CitedPages))
+		seen := make(map[[2]int]struct{}, len(res.CitedPages))
+		for _, p := range res.CitedPages {
+			key := [2]int{p[0], p[1]}
+			if _, dup := seen[key]; dup {
+				continue
+			}
+			seen[key] = struct{}{}
+			out = append(out, CitationSource{Start: p[0], End: p[1]})
+		}
+		return out
+	}
+	// Fallback: unique PagesRead ranges, first-seen order.
+	out := make([]CitationSource, 0, len(res.PagesRead))
+	seen := make(map[[2]int]struct{}, len(res.PagesRead))
+	for _, pr := range res.PagesRead {
+		key := [2]int{pr.StartPage, pr.EndPage}
+		if _, dup := seen[key]; dup {
+			continue
+		}
+		seen[key] = struct{}{}
+		out = append(out, CitationSource{Start: pr.StartPage, End: pr.EndPage, SectionIDs: pr.SectionIDs})
+	}
+	return out
+}
+
+// SectionIDsOverlapping returns, in document order, the IDs of every
+// section in the tree whose [PageStart,PageEnd] overlaps the inclusive
+// range [start,end]. It is the tree-facing companion to the internal
+// sectionsOverlapping helper, exported so the API layers can resolve a
+// cited page range to its sections without re-flattening logic.
+func SectionIDsOverlapping(t *tree.Tree, start, end int) []tree.SectionID {
+	if t == nil || t.Root == nil {
+		return nil
+	}
+	return sectionsOverlapping(flattenSectionsByPage(t), []pageRange{{Start: start, End: end}})
+}
+
 // sectionsOverlapping returns the IDs of every section whose
 // [PageStart, PageEnd] overlaps any of the cited ranges. Preserves
 // document order (because sections is page-sorted) and deduplicates.
@@ -879,6 +1103,24 @@ type PageIndexAction struct {
 	// single page can be expressed as [5,5].
 	CitedPages [][2]int `json:"cited_pages,omitempty"`
 
+	// Confidence is the model's self-reported confidence in the
+	// answer + citation set as a whole, in [0,1]. Optional on a done
+	// action. The loop surfaces it on the Result so callers can see
+	// how sure the agent was. A low overall confidence does NOT
+	// suppress the answer — the agent still returns its single best
+	// pick rather than nothing — it only annotates it.
+	Confidence float64 `json:"confidence,omitempty"`
+
+	// CitedConfidences is an OPTIONAL per-range confidence aligned by
+	// index with CitedPages: CitedConfidences[i] is the model's
+	// confidence in CitedPages[i]. When present and the cited set
+	// exceeds the cap, it drives which ranges are kept (highest
+	// confidence first). When absent, the cap falls back to the
+	// model's own ordering (it is told to list its best range first).
+	// Parsed from the rich cited_pages shape
+	// [{"pages":[5,7],"confidence":0.9}] when the model emits it.
+	CitedConfidences []float64 `json:"-"`
+
 	// Reasoning is the per-call explanation the system prompt
 	// asks the model to emit. Surfaced into the reasoning_trace
 	// when the endpoint is called with ?reasoning=true.
@@ -972,21 +1214,52 @@ func ParsePageIndexAction(raw string) (PageIndexAction, error) {
 	if v, ok := fields["reasoning"]; ok {
 		_ = json.Unmarshal(v, &a.Reasoning)
 	}
+	// Overall answer confidence (0-1). Tolerant of a bare number or a
+	// numeric string; anything unparseable leaves it at 0 ("unstated").
+	if v, ok := fields["confidence"]; ok && len(v) > 0 {
+		if err := json.Unmarshal(v, &a.Confidence); err != nil {
+			var cs string
+			if json.Unmarshal(v, &cs) == nil {
+				if f, perr := strconv.ParseFloat(strings.TrimSpace(cs), 64); perr == nil {
+					a.Confidence = f
+				}
+			}
+		}
+	}
 
-	// cited_pages: try the typed shape first ([[1,2],[5,7]]); fall
-	// back to the string-shape (["1-2","5-7"]) when the typed
-	// decode fails or is empty.
+	// cited_pages accepts three shapes, tried most-structured first:
+	//   1. [[1,2],[5,7]]                                  (preferred)
+	//   2. [{"pages":[1,2],"confidence":0.9}, ...]        (rich, per-range conf)
+	//   3. ["1-2","5-7"]                                  (tolerated string form)
+	// The rich shape lets the model attach per-range confidence the
+	// cap can rank on; the other two leave CitedConfidences empty and
+	// the cap falls back to emission order.
 	if v, ok := fields["cited_pages"]; ok && len(v) > 0 {
 		if err := json.Unmarshal(v, &a.CitedPages); err != nil || len(a.CitedPages) == 0 {
 			a.CitedPages = nil
-			var asStrings []string
-			if err := json.Unmarshal(v, &asStrings); err == nil {
-				for _, p := range asStrings {
-					s, e, ok := parsePageRangeString(p)
+			// Rich object form before string form: an array of objects
+			// fails [][2]int decode but carries the most signal.
+			var asObjs []citedRangeObj
+			if err := json.Unmarshal(v, &asObjs); err == nil && len(asObjs) > 0 {
+				for _, o := range asObjs {
+					s, e, ok := o.toRange()
 					if !ok {
 						continue
 					}
 					a.CitedPages = append(a.CitedPages, [2]int{s, e})
+					a.CitedConfidences = append(a.CitedConfidences, o.Confidence)
+				}
+			}
+			if len(a.CitedPages) == 0 {
+				var asStrings []string
+				if err := json.Unmarshal(v, &asStrings); err == nil {
+					for _, p := range asStrings {
+						s, e, ok := parsePageRangeString(p)
+						if !ok {
+							continue
+						}
+						a.CitedPages = append(a.CitedPages, [2]int{s, e})
+					}
 				}
 			}
 		}
@@ -1005,6 +1278,69 @@ func ParsePageIndexAction(raw string) (PageIndexAction, error) {
 	return a, nil
 }
 
+// citedRangeObj is the rich cited_pages element shape:
+// {"pages":[5,7],"confidence":0.9}. It also tolerates explicit
+// start/end keys ({"start":5,"end":7,"confidence":0.9}) and a
+// "pages":"5-7" string, so a model that reaches for any of those
+// keyings still produces a usable range + confidence.
+type citedRangeObj struct {
+	Pages      []int   `json:"pages"`
+	PagesStr   string  `json:"-"`
+	Start      int     `json:"start"`
+	End        int     `json:"end"`
+	Confidence float64 `json:"confidence"`
+}
+
+// UnmarshalJSON lets "pages" be either [5,7] or the "5-7" string form
+// without tripping the array decode. Everything else uses the struct
+// tags via an alias to avoid recursion.
+func (o *citedRangeObj) UnmarshalJSON(b []byte) error {
+	type alias struct {
+		Pages      json.RawMessage `json:"pages"`
+		Start      int             `json:"start"`
+		End        int             `json:"end"`
+		Confidence float64         `json:"confidence"`
+	}
+	var a alias
+	if err := json.Unmarshal(b, &a); err != nil {
+		return err
+	}
+	o.Start, o.End, o.Confidence = a.Start, a.End, a.Confidence
+	if len(a.Pages) > 0 {
+		if err := json.Unmarshal(a.Pages, &o.Pages); err != nil {
+			// Not an int array — try the "5-7" string form.
+			o.Pages = nil
+			_ = json.Unmarshal(a.Pages, &o.PagesStr)
+		}
+	}
+	return nil
+}
+
+// toRange resolves the object's range, preferring the pages array,
+// then the pages string, then explicit start/end. Returns ok=false
+// when no usable range is present.
+func (o citedRangeObj) toRange() (int, int, bool) {
+	if len(o.Pages) == 1 && o.Pages[0] > 0 {
+		return o.Pages[0], o.Pages[0], true
+	}
+	if len(o.Pages) >= 2 && o.Pages[0] > 0 && o.Pages[1] > 0 {
+		return o.Pages[0], o.Pages[1], true
+	}
+	if o.PagesStr != "" {
+		if s, e, ok := parsePageRangeString(o.PagesStr); ok {
+			return s, e, true
+		}
+	}
+	if o.Start > 0 {
+		end := o.End
+		if end <= 0 {
+			end = o.Start
+		}
+		return o.Start, end, true
+	}
+	return 0, 0, false
+}
+
 // parsePageRangeString parses "5", "5-7", or "5,7" (the loosest
 // shape the model is allowed to emit). Returns (start, end, true)
 // on success; (0, 0, false) otherwise. "5,7" is treated as
@@ -1057,7 +1393,16 @@ func wrapPageObservation(tool, body string) string {
 //   - Use tight page ranges; never fetch the whole document.
 //   - Emit a one-sentence reason before each tool call.
 //   - Answer only from tool output (no priors).
-//   - End with a done action carrying answer + cited_pages.
+//   - End with a done action carrying answer + cited_pages + confidence.
+//
+// CITATION DISCIPLINE is the load-bearing addition over the reference
+// PageIndex prompt. FinanceBench measurements show the failure mode is
+// not retrieval but commitment: when the model is sure it names ONE
+// page range and scores f1=1.0; when unsure it lists ~5 hedged ranges
+// and misses on all of them. The prompt therefore makes "commit to the
+// single best range" the default and frames listing many uncertain
+// ranges as the WORSE choice, and asks for an explicit confidence so a
+// low-confidence answer is still a single committed pick, not a spray.
 const pageIndexSystemPrompt = `You are a document QA assistant navigating a paginated document.
 
 TOOL USE PROTOCOL:
@@ -1065,19 +1410,28 @@ TOOL USE PROTOCOL:
 - Always call get_document_structure first to see titles + page ranges.
 - Call get_pages with TIGHT page ranges (e.g. {"tool":"get_pages","start_page":5,"end_page":7}). Never fetch the whole document.
 - Before each tool call, populate the "reasoning" field with ONE short sentence explaining why you're calling it.
-- When you have enough evidence, emit done with the natural-language answer, the page ranges you relied on, and a one-line reasoning trace.
+- When you have enough evidence, emit done with the natural-language answer, the page ranges you relied on, a confidence score, and a one-line reasoning trace.
+
+CITATION DISCIPLINE (read carefully — this determines whether your answer scores):
+- Cite the FEWEST page ranges that actually contain the answer. Ideally cite exactly ONE range. Commit to your single best range rather than hedging.
+- Listing many ranges because you are unsure is WORSE than committing to the one best range: every extra low-relevance citation hurts more than it helps. Do NOT pad cited_pages with maybes.
+- Add a second or third range ONLY when the answer genuinely depends on evidence from physically separate parts of the document (e.g. a figure on one page and the footnote that defines it on another). If a single range already supports the answer, cite only that one.
+- Even when your overall confidence is LOW, still return your SINGLE best range — never replace one uncertain pick with five. Report the low confidence in the "confidence" field instead.
+- "confidence" is your honesty signal in [0,1] for how sure you are the answer is correct and grounded on the cited pages. Set it low when unsure; this never penalizes you — it only annotates the answer.
 
 RULES:
 - Answer based ONLY on tool output. Do not invent facts.
 - Cite by page range, not by section title.
 - Be concise. Single-paragraph answers when possible.
-- If nothing in the document answers the query, emit done with answer="The document does not address this query." and an empty cited_pages array.`
+- If nothing in the document answers the query, emit done with answer="The document does not address this query.", an empty cited_pages array, and confidence 0.`
 
 // pageIndexActionHelp is the one-shot reminder appended to the
 // initial user prompt so the model gets concrete examples without us
-// needing to maintain a separate few-shot block.
+// needing to maintain a separate few-shot block. The done example
+// shows the SINGLE-range default (the multi-range form is allowed but
+// deliberately not modelled here, so the example nudges toward one).
 const pageIndexActionHelp = `- {"tool":"get_document_structure","reasoning":"orient by titles"} — fetch the TOC tree (titles + page ranges, no body text)
 - {"tool":"get_pages","start_page":5,"end_page":7,"reasoning":"section on debt"} — fetch text covering pages 5-7
-- {"tool":"done","answer":"...","cited_pages":[[5,7],[12,12]],"reasoning":"the answer is grounded on these pages"} — final answer
+- {"tool":"done","answer":"...","cited_pages":[[5,7]],"confidence":0.9,"reasoning":"the answer is grounded on these pages"} — final answer (cite ONE range when one suffices)
 
 Reply with ONLY the JSON object.`
diff --git a/pkg/retrieval/pageindex_strategy_test.go b/pkg/retrieval/pageindex_strategy_test.go
index 4d50b37..cbc569d 100644
--- a/pkg/retrieval/pageindex_strategy_test.go
+++ b/pkg/retrieval/pageindex_strategy_test.go
@@ -829,3 +829,321 @@ func indexOfSection(haystack []tree.SectionID, needle tree.SectionID) (int, bool
 	}
 	return -1, false
 }
+
+// distinctRangeCount counts the unique [start,end] pairs in a
+// CitedPages slice. The whole point of the dedup work is that this
+// equals len(CitedPages) — no pair appears twice.
+func distinctRangeCount(pairs [][2]int) int {
+	seen := map[[2]int]struct{}{}
+	for _, p := range pairs {
+		seen[p] = struct{}{}
+	}
+	return len(seen)
+}
+
+// TestPageIndexDedupCollapsesRepeatedRange is the core regression for
+// the FinanceBench "same id ×5" miss (id_00499 returned sec_363... five
+// times). A done that cites the SAME range five times, plus two more
+// distinct ranges, must collapse to distinct ranges only AND respect
+// the MaxCitations cap. With MaxCitations=3 the three distinct ranges
+// survive; the four duplicate repeats are gone.
+func TestPageIndexDedupCollapsesRepeatedRange(t *testing.T) {
+	t.Parallel()
+
+	tr := buildPagedTree() // pages 1-9
+	llm := &pageScriptedLLM{
+		replies: []string{
+			// sec_a1 (pages 1-2) cited five times, then two distinct
+			// ranges. This is the spray-with-dupes pattern.
+			`{"tool":"done","answer":"sprayed","cited_pages":[[1,2],[1,2],[1,2],[1,2],[1,2],[3,4],[8,9]]}`,
+		},
+	}
+	s := retrieval.NewPageIndexStrategy(llm)
+	s.PageLoader = pageMapLoader{data: map[string]string{}}
+	// Default MaxCitations is 3.
+
+	res, err := s.SelectWithCost(context.Background(), tr, "q", retrieval.ContextBudget{MaxTokens: 100000})
+	if err != nil {
+		t.Fatalf("SelectWithCost: %v", err)
+	}
+
+	// CitedPages must be deduped: the [1,2] repeat collapses to one,
+	// and the total distinct set is capped at 3.
+	if n := distinctRangeCount(res.CitedPages); n != len(res.CitedPages) {
+		t.Errorf("CitedPages contains duplicates: %v (distinct=%d, len=%d)", res.CitedPages, n, len(res.CitedPages))
+	}
+	if len(res.CitedPages) > 3 {
+		t.Errorf("CitedPages must be capped at MaxCitations=3, got %d: %v", len(res.CitedPages), res.CitedPages)
+	}
+	// The three distinct ranges [1,2],[3,4],[8,9] all survive (each is
+	// under the cap), in canonical page order.
+	want := [][2]int{{1, 2}, {3, 4}, {8, 9}}
+	if len(res.CitedPages) != len(want) {
+		t.Fatalf("CitedPages = %v, want %v", res.CitedPages, want)
+	}
+	for i := range want {
+		if res.CitedPages[i] != want[i] {
+			t.Errorf("CitedPages[%d] = %v, want %v", i, res.CitedPages[i], want[i])
+		}
+	}
+
+	// SelectedIDs must also be deduped — sec_a1 must appear exactly
+	// once even though it was cited five times. This is the precision
+	// fix: no section id repeats.
+	count := map[tree.SectionID]int{}
+	for _, id := range res.SelectedIDs {
+		count[id]++
+	}
+	for id, c := range count {
+		if c > 1 {
+			t.Errorf("section id %q appears %d times in SelectedIDs; must be deduped", id, c)
+		}
+	}
+}
+
+// TestPageIndexCapKeepsHighestConfidence proves the cap is
+// confidence-aware: when more than MaxCitations distinct ranges are
+// cited WITH per-range confidence, the highest-confidence ranges win
+// (not the first-listed). Here five ranges are cited; the cap is 2;
+// the two with the top confidence must be the ones kept.
+func TestPageIndexCapKeepsHighestConfidence(t *testing.T) {
+	t.Parallel()
+
+	tr := buildPagedTree() // pages 1-9
+	// Rich cited_pages with per-range confidence. The two strongest
+	// are [8,9] (0.95) and [5,7] (0.9); the cap of 2 should keep those.
+	llm := &pageScriptedLLM{
+		replies: []string{
+			`{"tool":"done","answer":"x","confidence":0.9,"cited_pages":[` +
+				`{"pages":[1,2],"confidence":0.1},` +
+				`{"pages":[3,4],"confidence":0.2},` +
+				`{"pages":[5,7],"confidence":0.9},` +
+				`{"pages":[8,9],"confidence":0.95},` +
+				`{"pages":[1,1],"confidence":0.05}]}`,
+		},
+	}
+	s := retrieval.NewPageIndexStrategy(llm)
+	s.PageLoader = pageMapLoader{data: map[string]string{}}
+	s.MaxCitations = 2
+
+	res, err := s.SelectWithCost(context.Background(), tr, "q", retrieval.ContextBudget{MaxTokens: 100000})
+	if err != nil {
+		t.Fatalf("SelectWithCost: %v", err)
+	}
+	if len(res.CitedPages) != 2 {
+		t.Fatalf("CitedPages must be capped at 2, got %d: %v", len(res.CitedPages), res.CitedPages)
+	}
+	// Output is page-sorted, so [5,7] then [8,9].
+	want := [][2]int{{5, 7}, {8, 9}}
+	for i := range want {
+		if res.CitedPages[i] != want[i] {
+			t.Errorf("CitedPages[%d] = %v, want %v (the two highest-confidence ranges)", i, res.CitedPages[i], want[i])
+		}
+	}
+	// Overall confidence (0.9) must surface on the Result.
+	if res.Confidence != 0.9 {
+		t.Errorf("Result.Confidence = %v, want 0.9", res.Confidence)
+	}
+}
+
+// TestPageIndexConfidentSinglePreserved is the happy half of the
+// signal: a confident single citation must pass through untouched and
+// the confidence must surface on both Result.Confidence and the
+// per-section Confidences map (so the abstain machinery can read it).
+func TestPageIndexConfidentSinglePreserved(t *testing.T) {
+	t.Parallel()
+
+	tr := buildPagedTree()
+	llm := &pageScriptedLLM{
+		replies: []string{
+			`{"tool":"done","answer":"Install on pages 1-2.","cited_pages":[[1,2]],"confidence":0.92,"reasoning":"clear"}`,
+		},
+	}
+	s := retrieval.NewPageIndexStrategy(llm)
+	s.PageLoader = pageMapLoader{data: map[string]string{}}
+
+	res, err := s.SelectWithCost(context.Background(), tr, "how do I install?", retrieval.ContextBudget{MaxTokens: 100000})
+	if err != nil {
+		t.Fatalf("SelectWithCost: %v", err)
+	}
+	if len(res.CitedPages) != 1 || res.CitedPages[0] != [2]int{1, 2} {
+		t.Errorf("single confident citation must be preserved, got %v", res.CitedPages)
+	}
+	if res.Confidence != 0.92 {
+		t.Errorf("Result.Confidence = %v, want 0.92", res.Confidence)
+	}
+	if len(res.Confidences) == 0 {
+		t.Fatal("Confidences map must surface the answer confidence per selected section")
+	}
+	// sec_a1 overlaps pages 1-2 and must carry the confidence.
+	if got := res.Confidences["sec_a1"]; got != 0.92 {
+		t.Errorf("Confidences[sec_a1] = %v, want 0.92", got)
+	}
+}
+
+// TestPageIndexLowConfidenceStillCommitsSingle guards the over-
+// suppression risk: even when the model reports LOW confidence, it must
+// still return its single best pick — never an empty citation set. A
+// low confidence annotates the answer; it does not delete it.
+func TestPageIndexLowConfidenceStillCommitsSingle(t *testing.T) {
+	t.Parallel()
+
+	tr := buildPagedTree()
+	llm := &pageScriptedLLM{
+		replies: []string{
+			`{"tool":"done","answer":"Probably debt on 8-9.","cited_pages":[[8,9]],"confidence":0.15,"reasoning":"unsure"}`,
+		},
+	}
+	s := retrieval.NewPageIndexStrategy(llm)
+	s.PageLoader = pageMapLoader{data: map[string]string{}}
+
+	res, err := s.SelectWithCost(context.Background(), tr, "q", retrieval.ContextBudget{MaxTokens: 100000})
+	if err != nil {
+		t.Fatalf("SelectWithCost: %v", err)
+	}
+	if len(res.CitedPages) != 1 || res.CitedPages[0] != [2]int{8, 9} {
+		t.Errorf("low-confidence answer must still commit to its single pick, got %v", res.CitedPages)
+	}
+	if res.Confidence != 0.15 {
+		t.Errorf("Result.Confidence = %v, want 0.15 (low but surfaced)", res.Confidence)
+	}
+	if len(res.SelectedIDs) == 0 {
+		t.Error("low confidence must not empty the selection")
+	}
+}
+
+// TestPageIndexConfidenceClamped: out-of-range confidence values clamp
+// into [0,1] rather than propagating absurd numbers.
+func TestPageIndexConfidenceClamped(t *testing.T) {
+	t.Parallel()
+
+	tr := buildPagedTree()
+	llm := &pageScriptedLLM{
+		replies: []string{
+			`{"tool":"done","answer":"x","cited_pages":[[1,2]],"confidence":7.5}`,
+		},
+	}
+	s := retrieval.NewPageIndexStrategy(llm)
+	s.PageLoader = pageMapLoader{data: map[string]string{}}
+
+	res, err := s.SelectWithCost(context.Background(), tr, "q", retrieval.ContextBudget{MaxTokens: 100000})
+	if err != nil {
+		t.Fatalf("SelectWithCost: %v", err)
+	}
+	if res.Confidence != 1.0 {
+		t.Errorf("confidence 7.5 must clamp to 1.0, got %v", res.Confidence)
+	}
+}
+
+// TestPageIndexCapConfigurable: a custom MaxCitations is honoured. Six
+// distinct ranges cited, cap=1 → exactly one survives.
+func TestPageIndexCapConfigurable(t *testing.T) {
+	t.Parallel()
+
+	tr := buildPagedTree()
+	llm := &pageScriptedLLM{
+		replies: []string{
+			`{"tool":"done","answer":"x","cited_pages":[[1,1],[2,2],[3,3],[4,4],[5,5],[6,6]]}`,
+		},
+	}
+	s := retrieval.NewPageIndexStrategy(llm)
+	s.PageLoader = pageMapLoader{data: map[string]string{}}
+	s.MaxCitations = 1
+
+	res, err := s.SelectWithCost(context.Background(), tr, "q", retrieval.ContextBudget{MaxTokens: 100000})
+	if err != nil {
+		t.Fatalf("SelectWithCost: %v", err)
+	}
+	if len(res.CitedPages) != 1 {
+		t.Errorf("MaxCitations=1 must keep exactly one range, got %d: %v", len(res.CitedPages), res.CitedPages)
+	}
+	// With no per-range confidence the cap falls back to emission
+	// order: the first-listed range [1,1] wins.
+	if res.CitedPages[0] != [2]int{1, 1} {
+		t.Errorf("cap with no confidence should keep first-listed range [1,1], got %v", res.CitedPages[0])
+	}
+}
+
+// TestPageIndexEmptyCitationsNoConfidence: a refusal (empty cited_pages)
+// leaves CitedPages nil, Confidences nil, and Confidence 0 — so the
+// API layer's abstain check (which fires only on a non-empty
+// Confidences map) behaves exactly as before for refusals.
+func TestPageIndexEmptyCitationsNoConfidence(t *testing.T) {
+	t.Parallel()
+
+	tr := buildPagedTree()
+	llm := &pageScriptedLLM{
+		replies: []string{
+			`{"tool":"done","answer":"The document does not address this query.","cited_pages":[],"confidence":0}`,
+		},
+	}
+	s := retrieval.NewPageIndexStrategy(llm)
+	s.PageLoader = pageMapLoader{data: map[string]string{}}
+
+	res, err := s.SelectWithCost(context.Background(), tr, "q", retrieval.ContextBudget{MaxTokens: 100000})
+	if err != nil {
+		t.Fatalf("SelectWithCost: %v", err)
+	}
+	if len(res.CitedPages) != 0 {
+		t.Errorf("refusal must cite nothing, got %v", res.CitedPages)
+	}
+	if len(res.Confidences) != 0 {
+		t.Errorf("no confidence map on a zero-confidence refusal, got %v", res.Confidences)
+	}
+	if len(res.SelectedIDs) != 0 {
+		t.Errorf("refusal must select nothing, got %v", res.SelectedIDs)
+	}
+}
+
+// TestParsePageIndexConfidenceAndRichCitations covers the new parser
+// surfaces: a top-level confidence number, and the rich cited_pages
+// object form carrying per-range confidence.
+func TestParsePageIndexConfidenceAndRichCitations(t *testing.T) {
+	t.Parallel()
+
+	t.Run("top_level_confidence", func(t *testing.T) {
+		a, err := retrieval.ParsePageIndexAction(`{"tool":"done","answer":"x","cited_pages":[[1,2]],"confidence":0.8}`)
+		if err != nil {
+			t.Fatalf("parse: %v", err)
+		}
+		if a.Confidence != 0.8 {
+			t.Errorf("Confidence = %v, want 0.8", a.Confidence)
+		}
+	})
+
+	t.Run("confidence_as_string", func(t *testing.T) {
+		a, err := retrieval.ParsePageIndexAction(`{"tool":"done","answer":"x","cited_pages":[[1,2]],"confidence":"0.6"}`)
+		if err != nil {
+			t.Fatalf("parse: %v", err)
+		}
+		if a.Confidence != 0.6 {
+			t.Errorf("Confidence = %v, want 0.6 (string form)", a.Confidence)
+		}
+	})
+
+	t.Run("rich_cited_pages_objects", func(t *testing.T) {
+		a, err := retrieval.ParsePageIndexAction(`{"tool":"done","answer":"x","cited_pages":[{"pages":[5,7],"confidence":0.9},{"pages":[12,12],"confidence":0.4}]}`)
+		if err != nil {
+			t.Fatalf("parse: %v", err)
+		}
+		want := [][2]int{{5, 7}, {12, 12}}
+		if len(a.CitedPages) != len(want) {
+			t.Fatalf("CitedPages = %v, want %v", a.CitedPages, want)
+		}
+		for i := range want {
+			if a.CitedPages[i] != want[i] {
+				t.Errorf("CitedPages[%d] = %v, want %v", i, a.CitedPages[i], want[i])
+			}
+		}
+	})
+
+	t.Run("rich_cited_pages_start_end", func(t *testing.T) {
+		a, err := retrieval.ParsePageIndexAction(`{"tool":"done","answer":"x","cited_pages":[{"start":3,"end":4,"confidence":0.7}]}`)
+		if err != nil {
+			t.Fatalf("parse: %v", err)
+		}
+		if len(a.CitedPages) != 1 || a.CitedPages[0] != [2]int{3, 4} {
+			t.Errorf("CitedPages = %v, want [[3 4]]", a.CitedPages)
+		}
+	})
+}
diff --git a/pkg/retrieval/strategy.go b/pkg/retrieval/strategy.go
index a3db98a..0aa428d 100644
--- a/pkg/retrieval/strategy.go
+++ b/pkg/retrieval/strategy.go
@@ -97,6 +97,24 @@ type Result struct {
 	// cost/coverage debugging: a 10-K answer that read pages 50-55 +
 	// 102-104 leaves a concrete page footprint behind.
 	PagesRead []PageReadEntry `json:"pages_read,omitempty"`
+
+	// CitedPages is the FINAL set of page ranges the answer commits
+	// to — the model's cited_pages after dedup and the confidence cap,
+	// NOT every page it read (that is PagesRead). Page-based strategies
+	// populate this; it is the authoritative input for building the
+	// response's citations[] so a confident single-pick answer surfaces
+	// ONE citation even when it skimmed several pages to find it. Nil
+	// when the answer cited nothing (refusal) or the run was hop-capped
+	// without a valid done — callers then fall back to PagesRead.
+	CitedPages [][2]int `json:"cited_pages,omitempty"`
+
+	// Confidence is the model's self-reported confidence in the answer
+	// as a whole, in [0,1], for page-based strategies that ask for it
+	// on the done action. Zero means unstated (the model didn't report
+	// one) — it is NOT a "definitely wrong" signal, and the API layer
+	// must not abstain on a zero here. Per-pick confidence for section
+	// strategies still lives in Confidences.
+	Confidence float64 `json:"confidence,omitempty"`
 }
 
 // PageReadEntry is one get_pages tool call that materialised during a