From 010728f18d0c3776861e6a4b51afe3847fccc285 Mon Sep 17 00:00:00 2001 From: Raphael Reynaldi Date: Wed, 13 May 2026 07:57:33 +0700 Subject: [PATCH] feat: Popup reason --- src/controllers/recommendations.controller.js | 1 + src/services/insight-trigger.service.js | 17 ++- src/services/llm/gemini.service.js | 114 ++++++++++++++++-- 3 files changed, 122 insertions(+), 10 deletions(-) diff --git a/src/controllers/recommendations.controller.js b/src/controllers/recommendations.controller.js index c57af0f..623bdc2 100644 --- a/src/controllers/recommendations.controller.js +++ b/src/controllers/recommendations.controller.js @@ -18,6 +18,7 @@ function shapeRecommendation(row) { recommendation_type: row.recommendation_type, recommendation_text: row.recommendation_text, reasoning: row.code_context?.reasoning ?? null, + evidence: Array.isArray(row.code_context?.evidence) ? row.code_context.evidence : [], user_action: row.user_action, created_at: row.created_at, }; diff --git a/src/services/insight-trigger.service.js b/src/services/insight-trigger.service.js index 477d3a0..b1f4462 100644 --- a/src/services/insight-trigger.service.js +++ b/src/services/insight-trigger.service.js @@ -172,7 +172,11 @@ async function persistInsight({ userId, sessionId, triggeredRule, llmOutput }) { workflow_state_id: workflowState.id, recommendation_type: llmOutput.recommendation_type, recommendation_text: llmOutput.recommendation_text, - code_context: { reasoning: llmOutput.reasoning, triggered_rule: triggeredRule }, + code_context: { + reasoning: llmOutput.reasoning, + triggered_rule: triggeredRule, + evidence: llmOutput.evidence ?? [], + }, user_action: null, }, { transaction: t }, @@ -288,7 +292,16 @@ export async function createDemoRecommendation(userId) { workflow_state_id: workflowState.id, recommendation_type: 'execute', recommendation_text: DEMO_RECOMMENDATION_TEXT, - code_context: { reasoning: DEMO_REASONING, triggered_rule: 'demo_trigger' }, + code_context: { + reasoning: DEMO_REASONING, + triggered_rule: 'demo_trigger', + evidence: [ + { metric: 'duration_minutes', value: 145 }, + { metric: 'lines_added', value: 220 }, + { metric: 'lines_deleted', value: 180 }, + { metric: 'churn_ratio', value: 0.82 }, + ], + }, user_action: null, }, { transaction: t }, diff --git a/src/services/llm/gemini.service.js b/src/services/llm/gemini.service.js index 4f9443a..8b77853 100644 --- a/src/services/llm/gemini.service.js +++ b/src/services/llm/gemini.service.js @@ -19,15 +19,44 @@ const RECOMMENDATION_TYPES = [ 'execute', ]; +const CITABLE_METRICS = [ + 'lines_added', + 'lines_deleted', + 'churn_ratio', + 'switch_count', + 'rapid_switch_count', + 'duration_minutes', +]; + const insightSchema = { type: 'object', - required: ['state_type', 'confidence_score', 'recommendation_type', 'recommendation_text', 'reasoning'], + required: [ + 'state_type', + 'confidence_score', + 'recommendation_type', + 'recommendation_text', + 'reasoning', + 'evidence', + ], properties: { state_type: { type: 'string', enum: STATE_TYPES }, confidence_score: { type: 'number', minimum: 0, maximum: 1 }, recommendation_type: { type: 'string', enum: RECOMMENDATION_TYPES }, recommendation_text: { type: 'string', minLength: 1, maxLength: 240 }, - reasoning: { type: 'string', minLength: 1, maxLength: 600 }, + reasoning: { type: 'string', minLength: 1, maxLength: 800 }, + evidence: { + type: 'array', + maxItems: 6, + items: { + type: 'object', + required: ['metric', 'value'], + properties: { + metric: { type: 'string', enum: [...CITABLE_METRICS, 'top_file'] }, + value: { type: ['number', 'string'] }, + }, + additionalProperties: false, + }, + }, }, additionalProperties: false, }; @@ -64,7 +93,7 @@ export class GeminiValidationError extends Error { const SYSTEM_PROMPT = `You are a workflow-wellness coach embedded in a developer productivity tool. -You receive a small JSON object describing a single developer's recent coding activity. Your job is to characterize the developer's current workflow state and propose one concrete, second-person action. +You receive a small JSON object describing a single developer's recent coding activity. Your job is to characterize the developer's current workflow state and propose one concrete, second-person action — grounded in the actual numbers you were given. Categorize the workflow state as exactly one of: - "stuck_loop" — repeatedly rewriting the same code without progress @@ -83,12 +112,26 @@ Recommendation types: Rules: - Always respond with JSON matching the schema. NO markdown, NO prose outside JSON. -- recommendation_text is one sentence (≤240 chars), second-person ("You've been..."), one concrete action. -- reasoning is a short paragraph (≤600 chars) referencing the numbers you saw. -- If the data does not suggest a real problem, set state_type to "normal" with low confidence and a brief encouraging recommendation. -- Be honest. Do not invent issues that the numbers don't support.`; +- recommendation_text is one sentence (≤240 chars), second-person, naming one concrete action. +- recommendation_text MUST cite at least one specific number from the input (lines added/deleted, churn ratio, switch count, or duration in minutes). When state_type is "stuck_loop" or "ai_dependency_trap" and top_files is non-empty, also name the most-relevant file. +- Forbidden phrases in recommendation_text: "for a while", "consider taking a break" (without a duration), "step away" (without a reason), "you've been working" (without a number). Avoid filler. If you can't cite a number, set state_type to "normal". +- reasoning (≤800 chars) explains why you reached this state_type, referencing the numbers and any pattern across them (e.g., high churn + long duration + one dominant file = stuck_loop on that file). +- evidence is an array of {metric, value} pairs naming every number/file you cited in recommendation_text and reasoning. Use the exact metric names from the input: lines_added, lines_deleted, churn_ratio, switch_count, rapid_switch_count, duration_minutes, top_file. Only cite values that actually appear in the input — do not invent. +- If the data does not suggest a real problem, set state_type to "normal" with low confidence and a brief encouraging recommendation; evidence can be a short array referencing whatever you found notable. + +Examples of good recommendation_text: +- "You've deleted 180 lines after adding 220 in authService.ts (churn 0.82) — diff your last commit and pick one direction before continuing." +- "47 editor switches in 90 min suggests you're hunting; close all but the 2 files you need and re-read the function you started with." +- "3-hour session with only 40 lines committed — set a 10-min timer, write the simplest version that compiles, then iterate." + +Examples of bad recommendation_text (rejected): +- "You've been heads-down for a while. Consider stepping away." (no numbers) +- "High churn detected. Take a break." (no specific churn value, no action duration) +- "Consider rethinking your approach." (no numbers, no specifics)`; function buildUserPrompt(input) { + const topFiles = (input.topFiles ?? []).slice(0, 5); + const topFile = topFiles[0]?.path ?? topFiles[0]?.file ?? null; return JSON.stringify( { triggered_rule: input.triggeredRule, @@ -103,13 +146,53 @@ function buildUserPrompt(input) { switch_count: input.metrics?.switch_count ?? 0, rapid_switch_count: input.metrics?.rapid_switch_count ?? 0, }, - top_files: (input.topFiles ?? []).slice(0, 5), + top_file: topFile, + top_files: topFiles, }, null, 2, ); } +function collectInputValues(input) { + const values = new Set(); + const topFiles = (input.topFiles ?? []).slice(0, 5); + const topFile = topFiles[0]?.path ?? topFiles[0]?.file ?? null; + const candidates = { + lines_added: input.metrics?.lines_added ?? 0, + lines_deleted: input.metrics?.lines_deleted ?? 0, + churn_ratio: input.metrics?.churn_ratio, + switch_count: input.metrics?.switch_count ?? 0, + rapid_switch_count: input.metrics?.rapid_switch_count ?? 0, + duration_minutes: input.session?.duration_minutes, + top_file: topFile, + }; + for (const [, v] of Object.entries(candidates)) { + if (v === null || v === undefined) continue; + values.add(typeof v === 'number' ? roundForCompare(v) : String(v)); + } + return { values, candidates }; +} + +function roundForCompare(n) { + return Math.round(n * 100) / 100; +} + +function evidenceMatchesInput(evidence, candidates) { + if (!Array.isArray(evidence)) return false; + for (const item of evidence) { + if (!item || typeof item.metric !== 'string') return false; + const expected = candidates[item.metric]; + if (expected === undefined || expected === null) return false; + if (typeof expected === 'number' && typeof item.value === 'number') { + if (roundForCompare(expected) !== roundForCompare(item.value)) return false; + } else if (String(expected) !== String(item.value)) { + return false; + } + } + return true; +} + export async function generateInsight(input) { if (!model) { throw new Error('Gemini service is not configured (GOOGLE_API_KEY missing)'); @@ -149,5 +232,20 @@ export async function generateInsight(input) { ); } + const { candidates } = collectInputValues(input); + if (!evidenceMatchesInput(parsed.evidence, candidates)) { + logger.warn('gemini: evidence cites values not in input — downgrading to normal', { + evidence: parsed.evidence, + }); + return { + ...parsed, + state_type: 'normal', + confidence_score: Math.min(parsed.confidence_score, 0.3), + recommendation_type: 'execute', + recommendation_text: 'Keep going — nothing concerning in the recent activity.', + evidence: [], + }; + } + return parsed; }