From 63871e3b68a03ab56d7bcb461faf201892876d5b Mon Sep 17 00:00:00 2001 From: bradygaster Date: Thu, 12 Mar 2026 02:47:16 -0700 Subject: [PATCH 1/2] feat: add Phase 1 discovery adapters (Reddit, Dev.to, Stack Overflow, GitHub Discussions) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit New adapters implementing the SourceAdapter pattern from #1: - RedditSourceAdapter (#6): Fetches from r/dotnet, r/csharp, r/programming, r/aspnetcore via public JSON API with rate limiting - DevToSourceAdapter (#2): Tag-based article search with pagination, cross-tag deduplication, and engagement metrics - StackOverflowSourceAdapter (#7): Stack Exchange API v2.3 with gzip decompression, optional API key, unanswered question detection - GitHubDiscussionsSourceAdapter (#4): GraphQL-based dotnet/aspire discussions with pagination and signal inference All adapters registered in createDefaultRegistry(). Zero new dependencies — uses Node.js built-in fetch and existing @octokit/rest. Closes #2, closes #4, closes #6, closes #7 Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> --- src/discovery/adapters/devto.ts | 174 +++++++++++++++++ src/discovery/adapters/discussions.ts | 243 ++++++++++++++++++++++++ src/discovery/adapters/index.ts | 12 ++ src/discovery/adapters/reddit.ts | 183 ++++++++++++++++++ src/discovery/adapters/stackoverflow.ts | 219 +++++++++++++++++++++ 5 files changed, 831 insertions(+) create mode 100644 src/discovery/adapters/devto.ts create mode 100644 src/discovery/adapters/discussions.ts create mode 100644 src/discovery/adapters/reddit.ts create mode 100644 src/discovery/adapters/stackoverflow.ts diff --git a/src/discovery/adapters/devto.ts b/src/discovery/adapters/devto.ts new file mode 100644 index 0000000..e82e0a5 --- /dev/null +++ b/src/discovery/adapters/devto.ts @@ -0,0 +1,174 @@ +/** + * DevToSourceAdapter — Dev.to REST API discovery implementation. + * Uses public Dev.to API to search articles by tags, with filtering and deduplication. + */ + +import type { Channel, ContentItem, ContentType, DiscoveryResult, RunState } from '../../types.js'; +import type { AdapterValidation, SourceAdapter } from './types.js'; +import { generateCanonicalId, isAspireRelated, isExcluded, truncate } from './helpers.js'; + +const DEVTO_API_BASE = 'https://dev.to/api/articles'; +const TAGS = ['dotnetaspire', 'aspire', 'dotnet'] as const; +const PER_PAGE = 30; +const MAX_PAGES = 3; +const REQUEST_DELAY_MS = 500; + +interface DevToArticle { + id: number; + title: string; + description: string; + url: string; + published_at: string; + tag_list: string[]; + user: { + username: string; + name: string; + }; + public_reactions_count: number; + comments_count: number; + reading_time_minutes: number; +} + +export class DevToSourceAdapter implements SourceAdapter { + readonly name = 'devto'; + readonly displayName = 'Dev.to'; + readonly channel: Channel = 'devto'; + + async validate(): Promise { + return { valid: true }; + } + + async discover(state: RunState): Promise { + const results: DiscoveryResult[] = []; + const sinceDate = new Date(state.last_run); + const seenUrls = new Set(); + + for (const tag of TAGS) { + try { + console.log(` 📡 Fetching Dev.to tag: ${tag}`); + const items: ContentItem[] = []; + + for (let page = 1; page <= MAX_PAGES; page++) { + const url = `${DEVTO_API_BASE}?tag=${tag}&per_page=${PER_PAGE}&page=${page}`; + + const response = await fetch(url); + if (!response.ok) { + console.warn(` ⚠️ Dev.to API error for tag ${tag} page ${page}: ${response.status}`); + break; + } + + const articles: DevToArticle[] = await response.json(); + if (articles.length === 0) break; + + for (const article of articles) { + const pubDate = new Date(article.published_at); + if (pubDate < sinceDate) continue; + + // Dedupe by URL across tags + if (seenUrls.has(article.url)) continue; + + const text = `${article.title} ${article.description} ${article.tag_list.join(' ')}`; + + // Filter by Aspire relevance for broad tags + if (tag === 'dotnet' || tag === 'aspire') { + if (!isAspireRelated(text)) continue; + } + + if (isExcluded(text)) continue; + + seenUrls.add(article.url); + + const canonicalId = generateCanonicalId( + article.title, + article.url, + article.user.username, + article.published_at, + ); + + items.push({ + canonical_id: canonicalId, + title: article.title, + url: article.url, + type: 'blog' as ContentType, + channel: 'devto', + published_at: article.published_at, + author: article.user.username, + summary: truncate(article.description || '', 300), + tags: { + topic: extractTopics(text), + audience: ['intermediate'], + signal: ['adoption'], + confidence: 'medium', + actionability: 'investigate', + }, + provenance: { + discovered_from: `devto:tag:${tag}`, + discovered_query: null, + source_first_seen: new Date().toISOString(), + raw_evidence_path: null, + }, + dedupe: { + is_duplicate: false, + duplicate_of: null, + duplicate_reason: null, + }, + }); + } + + // Rate limiting between pages + if (page < MAX_PAGES) { + await sleep(REQUEST_DELAY_MS); + } + } + + if (items.length > 0) { + results.push({ items, source: `devto:tag:${tag}` }); + } + } catch (err) { + const message = err instanceof Error ? err.message : String(err); + console.warn(` ⚠️ Dev.to tag ${tag} failed: ${message}`); + } + + // Rate limiting between tags + await sleep(REQUEST_DELAY_MS); + } + + return results; + } +} + +function extractTopics(text: string): string[] { + const topicKeywords: Record = { + apphost: ['apphost', 'app host'], + dashboard: ['dashboard'], + integrations: ['integration'], + k8s: ['kubernetes', 'k8s'], + aca: ['azure container app', 'aca'], + otel: ['opentelemetry', 'otel'], + postgres: ['postgres', 'postgresql'], + redis: ['redis'], + dapr: ['dapr'], + auth: ['auth', 'authentication', 'identity'], + caching: ['cache', 'caching'], + dotnet: ['.net', 'dotnet', 'c#', 'csharp'], + typescript: ['typescript'], + python: ['python'], + docker: ['docker', 'container'], + deploy: ['deploy', 'deployment'], + }; + + const found: string[] = []; + const lower = text.toLowerCase(); + + for (const [topic, keywords] of Object.entries(topicKeywords)) { + if (keywords.some((k) => lower.includes(k))) { + found.push(topic); + } + } + + return found.length > 0 ? found : ['aspire']; +} + +function sleep(ms: number): Promise { + return new Promise((resolve) => setTimeout(resolve, ms)); +} diff --git a/src/discovery/adapters/discussions.ts b/src/discovery/adapters/discussions.ts new file mode 100644 index 0000000..6b1ed1a --- /dev/null +++ b/src/discovery/adapters/discussions.ts @@ -0,0 +1,243 @@ +/** + * GitHubDiscussionsSourceAdapter — GitHub Discussions discovery implementation. + * Targets dotnet/aspire repository discussions via GraphQL API. + */ + +import { Octokit } from '@octokit/rest'; +import type { Channel, ContentItem, ContentType, DiscoveryResult, RunState, Signal } from '../../types.js'; +import type { AdapterValidation, SourceAdapter } from './types.js'; +import { generateCanonicalId, truncate } from './helpers.js'; + +const TARGET_OWNER = 'dotnet'; +const TARGET_REPO = 'aspire'; + +interface DiscussionNode { + title: string; + url: string; + createdAt: string; + author: { login: string } | null; + category: { name: string } | null; + upvoteCount: number; + comments: { totalCount: number }; + body: string; + isAnswered: boolean; +} + +interface GraphQLResponse { + repository: { + discussions: { + pageInfo: { + hasNextPage: boolean; + endCursor: string | null; + }; + nodes: DiscussionNode[]; + }; + }; +} + +export class GitHubDiscussionsSourceAdapter implements SourceAdapter { + readonly name = 'github-discussions'; + readonly displayName = 'GitHub Discussions'; + readonly channel: Channel = 'github'; + + async validate(): Promise { + const token = process.env['GITHUB_TOKEN']; + if (!token) { + return { + valid: false, + reason: 'GITHUB_TOKEN required for GraphQL Discussions API', + }; + } + return { valid: true }; + } + + async discover(state: RunState): Promise { + const token = process.env['GITHUB_TOKEN']; + if (!token) { + throw new Error('GITHUB_TOKEN required for GitHub Discussions API'); + } + + const octokit = new Octokit({ auth: token }); + const discussions = await this.fetchDiscussions(octokit, state.last_run); + + if (discussions.length === 0) { + return []; + } + + const items: ContentItem[] = discussions.map((discussion) => ({ + canonical_id: generateCanonicalId( + discussion.title, + discussion.url, + discussion.author?.login ?? null, + discussion.createdAt, + ), + title: discussion.title, + url: discussion.url, + type: 'discussion' as ContentType, + channel: 'github', + published_at: discussion.createdAt, + author: discussion.author?.login ?? null, + summary: truncate(discussion.body ?? '', 300), + tags: { + topic: extractTopics(discussion.title, discussion.body, discussion.category?.name ?? null), + audience: ['intermediate'], + signal: inferDiscussionSignal(discussion), + confidence: 'medium', + actionability: discussion.isAnswered ? 'investigate' : 'respond', + }, + provenance: { + discovered_from: 'github:discussions', + discovered_query: `${TARGET_OWNER}/${TARGET_REPO}`, + source_first_seen: new Date().toISOString(), + raw_evidence_path: null, + }, + dedupe: { + is_duplicate: false, + duplicate_of: null, + duplicate_reason: null, + }, + })); + + return [{ + items, + source: 'github:discussions', + query: `${TARGET_OWNER}/${TARGET_REPO}`, + }]; + } + + private async fetchDiscussions(octokit: Octokit, since: string): Promise { + const sinceDate = new Date(since); + const allDiscussions: DiscussionNode[] = []; + let hasNextPage = true; + let cursor: string | null = null; + const maxPages = 3; // Limit to ~300 discussions max + let pageCount = 0; + + console.log(` 🔍 GitHub Discussions: ${TARGET_OWNER}/${TARGET_REPO}`); + + while (hasNextPage && pageCount < maxPages) { + const query = ` + query($owner: String!, $repo: String!, $first: Int!, $after: String) { + repository(owner: $owner, name: $repo) { + discussions(first: $first, after: $after, orderBy: {field: CREATED_AT, direction: DESC}) { + pageInfo { + hasNextPage + endCursor + } + nodes { + title + url + createdAt + author { + login + } + category { + name + } + upvoteCount + comments { + totalCount + } + body + isAnswered + } + } + } + } + `; + + const variables = { + owner: TARGET_OWNER, + repo: TARGET_REPO, + first: 100, + after: cursor, + }; + + try { + const response = await octokit.request('POST /graphql', { + query, + ...variables, + }); + + const data = response.data as GraphQLResponse; + const discussions = data.repository.discussions; + + // Filter by date + const filtered = discussions.nodes.filter((d) => { + const createdAt = new Date(d.createdAt); + return createdAt >= sinceDate; + }); + + allDiscussions.push(...filtered); + + // If we got fewer than we asked for after filtering, we've gone past the date threshold + if (filtered.length < discussions.nodes.length) { + break; + } + + hasNextPage = discussions.pageInfo.hasNextPage; + cursor = discussions.pageInfo.endCursor; + pageCount++; + } catch (err) { + const message = err instanceof Error ? err.message : String(err); + console.warn(` ⚠️ GitHub Discussions fetch failed: ${message}`); + break; + } + } + + return allDiscussions; + } +} + +function extractTopics(title: string, body: string, category: string | null): string[] { + const text = `${title} ${body ?? ''}`.toLowerCase(); + const topics: string[] = []; + + const topicMap: Record = { + apphost: ['apphost', 'app-host', 'app host'], + dashboard: ['dashboard'], + deployment: ['deploy', 'deployment', 'kubernetes', 'k8s', 'aca', 'azure'], + integrations: ['integration', 'component', 'redis', 'postgres', 'sql'], + otel: ['opentelemetry', 'otel', 'telemetry'], + dotnet: ['.net', 'dotnet', 'csharp', 'c#'], + docker: ['docker', 'container'], + }; + + for (const [topic, keywords] of Object.entries(topicMap)) { + if (keywords.some((k) => text.includes(k))) { + topics.push(topic); + } + } + + // Add category if it exists + if (category) { + topics.push(category.toLowerCase().replace(/\s+/g, '-')); + } + + return topics.length > 0 ? topics : ['aspire']; +} + +function inferDiscussionSignal(discussion: DiscussionNode): Signal[] { + const text = `${discussion.title} ${discussion.body ?? ''}`.toLowerCase(); + + if (text.includes('bug') || text.includes('error') || text.includes('crash') || text.includes('issue')) { + return ['complaint']; + } + if (text.includes('feature') || text.includes('request') || text.includes('enhancement')) { + return ['request']; + } + if (text.includes('how') || text.includes('help') || text.includes('question') || text.includes('?')) { + return ['confusion']; + } + if (discussion.upvoteCount > 10) { + return ['adoption']; + } + if (text.includes('release') || text.includes('announcement')) { + return ['release']; + } + if (text.includes('thanks') || text.includes('great') || text.includes('awesome') || text.includes('love')) { + return ['praise']; + } + + return ['other']; +} diff --git a/src/discovery/adapters/index.ts b/src/discovery/adapters/index.ts index c72d3bc..7b99560 100644 --- a/src/discovery/adapters/index.ts +++ b/src/discovery/adapters/index.ts @@ -8,14 +8,26 @@ export * from './helpers.js'; export { SourceRegistry } from './registry.js'; export { RSSSourceAdapter } from './rss.js'; export { GitHubSourceAdapter } from './github.js'; +export { RedditSourceAdapter } from './reddit.js'; +export { DevToSourceAdapter } from './devto.js'; +export { StackOverflowSourceAdapter } from './stackoverflow.js'; +export { GitHubDiscussionsSourceAdapter } from './discussions.js'; import { SourceRegistry } from './registry.js'; import { RSSSourceAdapter } from './rss.js'; import { GitHubSourceAdapter } from './github.js'; +import { RedditSourceAdapter } from './reddit.js'; +import { DevToSourceAdapter } from './devto.js'; +import { StackOverflowSourceAdapter } from './stackoverflow.js'; +import { GitHubDiscussionsSourceAdapter } from './discussions.js'; export function createDefaultRegistry(): SourceRegistry { const registry = new SourceRegistry(); registry.register(new RSSSourceAdapter()); registry.register(new GitHubSourceAdapter()); + registry.register(new RedditSourceAdapter()); + registry.register(new DevToSourceAdapter()); + registry.register(new StackOverflowSourceAdapter()); + registry.register(new GitHubDiscussionsSourceAdapter()); return registry; } diff --git a/src/discovery/adapters/reddit.ts b/src/discovery/adapters/reddit.ts new file mode 100644 index 0000000..c812713 --- /dev/null +++ b/src/discovery/adapters/reddit.ts @@ -0,0 +1,183 @@ +/** + * RedditSourceAdapter — Reddit discovery implementation. + * Fetches posts from specified subreddits using Reddit's public JSON API. + */ + +import type { Channel, ContentItem, ContentType, DiscoveryResult, RunState, Signal } from '../../types.js'; +import type { AdapterValidation, SourceAdapter } from './types.js'; +import { generateCanonicalId, isAspireRelated, isExcluded, truncate } from './helpers.js'; + +const SUBREDDITS: readonly string[] = ['dotnet', 'csharp', 'programming', 'aspnetcore'] as const; + +const USER_AGENT = 'ACCES-ContentEngine/1.0 (Community Source Scout for .NET Aspire)'; + +const sleep = (ms: number) => new Promise((r) => setTimeout(r, ms)); + +interface RedditPost { + data: { + title: string; + author: string; + subreddit: string; + permalink: string; + score: number; + num_comments: number; + created_utc: number; + selftext: string; + url: string; + is_self: boolean; + }; +} + +interface RedditResponse { + data: { + children: RedditPost[]; + }; +} + +export class RedditSourceAdapter implements SourceAdapter { + readonly name = 'reddit'; + readonly displayName = 'Reddit'; + readonly channel: Channel = 'reddit'; + + async validate(): Promise { + return { valid: true }; + } + + async discover(state: RunState): Promise { + const results: DiscoveryResult[] = []; + const sinceDate = new Date(state.last_run); + + for (const subreddit of SUBREDDITS) { + try { + console.log(` 📡 Fetching Reddit: r/${subreddit}`); + + const url = `https://www.reddit.com/r/${subreddit}/new.json?limit=100`; + const response = await fetch(url, { + headers: { + 'User-Agent': USER_AGENT, + }, + }); + + if (!response.ok) { + console.warn(` ⚠️ Reddit r/${subreddit} returned ${response.status}`); + continue; + } + + const data = (await response.json()) as RedditResponse; + const items: ContentItem[] = []; + + for (const post of data.data.children ?? []) { + const postData = post.data; + + if (!postData.title || postData.author === '[deleted]') continue; + + const publishedDate = new Date(postData.created_utc * 1000); + if (publishedDate < sinceDate) continue; + + const title = postData.title; + const selftext = postData.selftext || ''; + const text = `${title} ${selftext}`.toLowerCase(); + + if (!isAspireRelated(text)) continue; + if (isExcluded(text)) continue; + + const canonicalId = generateCanonicalId( + title, + postData.permalink, + postData.author, + postData.created_utc.toString(), + ); + + const redditUrl = `https://www.reddit.com${postData.permalink}`; + + items.push({ + canonical_id: canonicalId, + title, + url: redditUrl, + type: 'reddit' as ContentType, + channel: 'reddit', + published_at: publishedDate.toISOString(), + author: postData.author, + summary: truncate(selftext || title, 300), + tags: { + topic: extractTopics(text), + audience: ['intermediate'], + signal: inferRedditSignal(title, selftext, postData.score), + confidence: 'medium', + actionability: 'investigate', + }, + provenance: { + discovered_from: `reddit:r/${subreddit}`, + discovered_query: null, + source_first_seen: new Date().toISOString(), + raw_evidence_path: null, + }, + dedupe: { + is_duplicate: false, + duplicate_of: null, + duplicate_reason: null, + }, + }); + } + + if (items.length > 0) { + results.push({ items, source: `reddit:r/${subreddit}` }); + } + + await sleep(1000); + } catch (err) { + const message = err instanceof Error ? err.message : String(err); + console.warn(` ⚠️ Reddit r/${subreddit} failed: ${message}`); + } + } + + return results; + } +} + +function inferRedditSignal(title: string, body: string, score: number): Signal[] { + const text = `${title} ${body}`.toLowerCase(); + + if (text.includes('release') || text.includes('announcement')) return ['release']; + if (text.includes('vulnerability') || text.includes('security')) return ['vulnerability']; + if (text.includes('tutorial') || text.includes('how to') || text.includes('guide')) return ['tutorial']; + if (text.includes('question') || text.includes('help') || text.includes('confused')) return ['confusion']; + if (text.includes('issue') || text.includes('problem') || text.includes('bug')) return ['complaint']; + if (text.includes('feature request') || text.includes('would be nice')) return ['request']; + if (score > 50 || text.includes('love') || text.includes('amazing') || text.includes('great')) return ['praise']; + if (text.includes('using') || text.includes('deployed') || text.includes('migrated')) return ['adoption']; + + return ['other']; +} + +function extractTopics(text: string): string[] { + const topicKeywords: Record = { + apphost: ['apphost', 'app host'], + dashboard: ['dashboard'], + integrations: ['integration'], + k8s: ['kubernetes', 'k8s'], + aca: ['azure container app', 'aca'], + otel: ['opentelemetry', 'otel'], + postgres: ['postgres', 'postgresql'], + redis: ['redis'], + dapr: ['dapr'], + auth: ['auth', 'authentication', 'identity'], + caching: ['cache', 'caching'], + dotnet: ['.net', 'dotnet', 'c#', 'csharp'], + typescript: ['typescript'], + python: ['python'], + docker: ['docker', 'container'], + deploy: ['deploy', 'deployment'], + }; + + const found: string[] = []; + const lower = text.toLowerCase(); + + for (const [topic, keywords] of Object.entries(topicKeywords)) { + if (keywords.some((k) => lower.includes(k))) { + found.push(topic); + } + } + + return found.length > 0 ? found : ['aspire']; +} diff --git a/src/discovery/adapters/stackoverflow.ts b/src/discovery/adapters/stackoverflow.ts new file mode 100644 index 0000000..81a3d67 --- /dev/null +++ b/src/discovery/adapters/stackoverflow.ts @@ -0,0 +1,219 @@ +/** + * StackOverflowSourceAdapter — Stack Overflow questions discovery. + * Uses Stack Exchange API v2.3 to discover Aspire-related questions. + */ + +import { gunzipSync } from 'node:zlib'; +import type { Channel, ContentItem, ContentType, DiscoveryResult, RunState, Signal } from '../../types.js'; +import type { AdapterValidation, SourceAdapter } from './types.js'; +import { generateCanonicalId, truncate } from './helpers.js'; + +const STACKOVERFLOW_API_BASE = 'https://api.stackexchange.com/2.3/questions'; +const ASPIRE_TAGS = ['dotnet-aspire', '.net-aspire']; + +interface StackOverflowQuestion { + title: string; + link: string; + owner?: { display_name?: string }; + score: number; + answer_count: number; + view_count: number; + creation_date: number; + tags: string[]; + is_answered: boolean; +} + +interface StackOverflowResponse { + items: StackOverflowQuestion[]; + has_more: boolean; + quota_remaining: number; + backoff?: number; +} + +export class StackOverflowSourceAdapter implements SourceAdapter { + readonly name = 'stackoverflow'; + readonly displayName = 'Stack Overflow'; + readonly channel: Channel = 'stackoverflow'; + + async validate(): Promise { + const apiKey = process.env['STACKOVERFLOW_API_KEY']; + if (!apiKey) { + return { + valid: true, + warnings: ['STACKOVERFLOW_API_KEY not set — using unauthenticated API (300 req/day limit)'], + }; + } + return { valid: true }; + } + + async discover(state: RunState): Promise { + const results: DiscoveryResult[] = []; + const sinceDate = new Date(state.last_run); + const fromDate = Math.floor(sinceDate.getTime() / 1000); + + for (const tag of ASPIRE_TAGS) { + try { + console.log(` 🔍 Fetching Stack Overflow tag: ${tag}`); + const items = await this.fetchQuestions(tag, fromDate); + + if (items.length > 0) { + results.push({ + items, + source: `stackoverflow:${tag}`, + query: tag, + }); + } + } catch (err) { + const message = err instanceof Error ? err.message : String(err); + console.warn(` ⚠️ Stack Overflow tag ${tag} failed: ${message}`); + } + } + + return results; + } + + private async fetchQuestions(tag: string, fromDate: number): Promise { + const apiKey = process.env['STACKOVERFLOW_API_KEY']; + const params = new URLSearchParams({ + tagged: tag, + site: 'stackoverflow', + sort: 'creation', + order: 'desc', + pagesize: '100', + fromdate: fromDate.toString(), + }); + + if (apiKey) { + params.set('key', apiKey); + } + + const url = `${STACKOVERFLOW_API_BASE}?${params.toString()}`; + const response = await fetch(url); + + if (!response.ok) { + throw new Error(`Stack Overflow API returned ${response.status}: ${response.statusText}`); + } + + const data = await this.decompressResponse(response); + + if (data.backoff) { + console.warn(` ⏳ Stack Overflow API requested backoff: ${data.backoff}s`); + await this.sleep(data.backoff * 1000); + } + + if (data.quota_remaining !== undefined) { + console.log(` 📊 Quota remaining: ${data.quota_remaining}`); + } + + return data.items.map((item) => this.mapToContentItem(item, tag)); + } + + private async decompressResponse(response: Response): Promise { + const buffer = await response.arrayBuffer(); + const decompressed = gunzipSync(Buffer.from(buffer)); + return JSON.parse(decompressed.toString('utf-8')); + } + + private mapToContentItem(item: StackOverflowQuestion, tag: string): ContentItem { + const title = this.decodeHtmlEntities(item.title); + const author = item.owner?.display_name ?? null; + const publishedAt = new Date(item.creation_date * 1000).toISOString(); + + const canonicalId = generateCanonicalId(title, item.link, author, publishedAt); + + return { + canonical_id: canonicalId, + title, + url: item.link, + type: 'discussion' as ContentType, + channel: 'stackoverflow', + published_at: publishedAt, + author, + summary: truncate(title, 300), + tags: { + topic: this.mapSOTags(item.tags), + audience: ['intermediate'], + signal: this.inferSOSignal(item), + confidence: 'medium', + actionability: item.is_answered ? 'investigate' : 'respond', + }, + provenance: { + discovered_from: 'stackoverflow:questions', + discovered_query: tag, + source_first_seen: new Date().toISOString(), + raw_evidence_path: null, + }, + dedupe: { + is_duplicate: false, + duplicate_of: null, + duplicate_reason: null, + }, + }; + } + + private mapSOTags(tags: string[]): string[] { + const topicMap: Record = { + 'dotnet-aspire': 'aspire', + '.net-aspire': 'aspire', + 'c#': 'dotnet', + '.net': 'dotnet', + 'dotnet': 'dotnet', + 'azure': 'azure', + 'kubernetes': 'k8s', + 'docker': 'docker', + 'redis': 'redis', + 'postgresql': 'postgres', + 'postgres': 'postgres', + 'opentelemetry': 'otel', + 'authentication': 'auth', + 'caching': 'caching', + 'deployment': 'deploy', + }; + + const mapped = tags + .map((tag) => topicMap[tag.toLowerCase()] ?? null) + .filter((t): t is string => t !== null); + + return mapped.length > 0 ? [...new Set(mapped)] : ['aspire']; + } + + private inferSOSignal(item: StackOverflowQuestion): Signal[] { + const signals: Signal[] = []; + + if (!item.is_answered && item.answer_count === 0) { + signals.push('confusion'); + } + + if (item.score >= 5) { + signals.push('adoption'); + } + + if (item.score < 0) { + signals.push('complaint'); + } + + if (item.view_count > 100) { + signals.push('adoption'); + } + + return signals.length > 0 ? signals : ['other']; + } + + private decodeHtmlEntities(text: string): string { + const entities: Record = { + '&': '&', + '<': '<', + '>': '>', + '"': '"', + ''': "'", + ''': "'", + ''': "'", + }; + + return text.replace(/&[a-z0-9#]+;/gi, (match) => entities[match] ?? match); + } + + private sleep(ms: number): Promise { + return new Promise((resolve) => setTimeout(resolve, ms)); + } +} From 31763005ae3af5b42c1649c89db34bd0a772d5f0 Mon Sep 17 00:00:00 2001 From: bradygaster Date: Thu, 12 Mar 2026 02:49:40 -0700 Subject: [PATCH 2/2] fix: address code review issues in Phase 1 adapters - Stack Overflow: handle both gzip and plain JSON responses by checking Content-Encoding header with fallback try/catch - GitHub Discussions: check for GraphQL errors before accessing data, handle response.data.data nesting correctly - Dev.to: handle 429 rate limit responses with Retry-After header support Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> --- src/discovery/adapters/devto.ts | 7 +++++++ src/discovery/adapters/discussions.ts | 8 +++++++- src/discovery/adapters/stackoverflow.ts | 18 +++++++++++++++--- 3 files changed, 29 insertions(+), 4 deletions(-) diff --git a/src/discovery/adapters/devto.ts b/src/discovery/adapters/devto.ts index e82e0a5..20e3abe 100644 --- a/src/discovery/adapters/devto.ts +++ b/src/discovery/adapters/devto.ts @@ -53,6 +53,13 @@ export class DevToSourceAdapter implements SourceAdapter { const response = await fetch(url); if (!response.ok) { + if (response.status === 429) { + const retryAfter = response.headers.get('retry-after'); + const waitMs = retryAfter ? parseInt(retryAfter, 10) * 1000 : 60_000; + console.warn(` ⏳ Dev.to rate limited, waiting ${waitMs / 1000}s`); + await sleep(waitMs); + continue; + } console.warn(` ⚠️ Dev.to API error for tag ${tag} page ${page}: ${response.status}`); break; } diff --git a/src/discovery/adapters/discussions.ts b/src/discovery/adapters/discussions.ts index 6b1ed1a..1c2bbf5 100644 --- a/src/discovery/adapters/discussions.ts +++ b/src/discovery/adapters/discussions.ts @@ -159,7 +159,13 @@ export class GitHubDiscussionsSourceAdapter implements SourceAdapter { ...variables, }); - const data = response.data as GraphQLResponse; + const responseData = response.data as { data?: GraphQLResponse; errors?: Array<{ message: string }> }; + + if (responseData.errors) { + throw new Error(`GraphQL errors: ${responseData.errors.map(e => e.message).join(', ')}`); + } + + const data = (responseData.data ?? responseData) as GraphQLResponse; const discussions = data.repository.discussions; // Filter by date diff --git a/src/discovery/adapters/stackoverflow.ts b/src/discovery/adapters/stackoverflow.ts index 81a3d67..1dd85ef 100644 --- a/src/discovery/adapters/stackoverflow.ts +++ b/src/discovery/adapters/stackoverflow.ts @@ -109,9 +109,21 @@ export class StackOverflowSourceAdapter implements SourceAdapter { } private async decompressResponse(response: Response): Promise { - const buffer = await response.arrayBuffer(); - const decompressed = gunzipSync(Buffer.from(buffer)); - return JSON.parse(decompressed.toString('utf-8')); + const buffer = Buffer.from(await response.arrayBuffer()); + const encoding = response.headers.get('content-encoding'); + + if (encoding === 'gzip') { + const decompressed = gunzipSync(buffer); + return JSON.parse(decompressed.toString('utf-8')); + } + + // Try gzip first (SE API sometimes sends gzip without header), fall back to plain JSON + try { + const decompressed = gunzipSync(buffer); + return JSON.parse(decompressed.toString('utf-8')); + } catch { + return JSON.parse(buffer.toString('utf-8')); + } } private mapToContentItem(item: StackOverflowQuestion, tag: string): ContentItem {