From 71e557bcea2a7d5179dc0cf97bb27a79f3a0abf8 Mon Sep 17 00:00:00 2001 From: bradygaster Date: Thu, 12 Mar 2026 02:52:48 -0700 Subject: [PATCH] feat: add Phase 2 discovery adapters (YouTube, Podcasts) - YouTubeSourceAdapter (#5): YouTube Data API v3 with two-step search+details, quota tracking (10K units/day), YOUTUBE_API_KEY validation - PodcastSourceAdapter (#3): RSS-based podcast discovery from 6 curated .NET podcasts (.NET Rocks, Hanselminutes, etc.) with per-feed error handling All 8 discovery adapters now registered in createDefaultRegistry(). Projected signal coverage: ~82% of Aspire community conversation. Closes #3, closes #5 Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> --- src/discovery/adapters/index.ts | 6 + src/discovery/adapters/podcast.ts | 131 ++++++++++++++++ src/discovery/adapters/youtube.ts | 253 ++++++++++++++++++++++++++++++ 3 files changed, 390 insertions(+) create mode 100644 src/discovery/adapters/podcast.ts create mode 100644 src/discovery/adapters/youtube.ts diff --git a/src/discovery/adapters/index.ts b/src/discovery/adapters/index.ts index 7b99560..f4e1443 100644 --- a/src/discovery/adapters/index.ts +++ b/src/discovery/adapters/index.ts @@ -12,6 +12,8 @@ export { RedditSourceAdapter } from './reddit.js'; export { DevToSourceAdapter } from './devto.js'; export { StackOverflowSourceAdapter } from './stackoverflow.js'; export { GitHubDiscussionsSourceAdapter } from './discussions.js'; +export { YouTubeSourceAdapter } from './youtube.js'; +export { PodcastSourceAdapter } from './podcast.js'; import { SourceRegistry } from './registry.js'; import { RSSSourceAdapter } from './rss.js'; @@ -20,6 +22,8 @@ import { RedditSourceAdapter } from './reddit.js'; import { DevToSourceAdapter } from './devto.js'; import { StackOverflowSourceAdapter } from './stackoverflow.js'; import { GitHubDiscussionsSourceAdapter } from './discussions.js'; +import { YouTubeSourceAdapter } from './youtube.js'; +import { PodcastSourceAdapter } from './podcast.js'; export function createDefaultRegistry(): SourceRegistry { const registry = new SourceRegistry(); @@ -29,5 +33,7 @@ export function createDefaultRegistry(): SourceRegistry { registry.register(new DevToSourceAdapter()); registry.register(new StackOverflowSourceAdapter()); registry.register(new GitHubDiscussionsSourceAdapter()); + registry.register(new YouTubeSourceAdapter()); + registry.register(new PodcastSourceAdapter()); return registry; } diff --git a/src/discovery/adapters/podcast.ts b/src/discovery/adapters/podcast.ts new file mode 100644 index 0000000..7b7c6d0 --- /dev/null +++ b/src/discovery/adapters/podcast.ts @@ -0,0 +1,131 @@ +/** + * PodcastSourceAdapter — Podcast RSS feed discovery implementation. + * Discovers .NET Aspire mentions in podcast episodes from curated feeds. + */ + +import RssParser from 'rss-parser'; +import type { Channel, ContentItem, DiscoveryResult, RunState } from '../../types.js'; +import type { AdapterValidation, SourceAdapter } from './types.js'; +import { generateCanonicalId, isAspireRelated, isExcluded, truncate } from './helpers.js'; + +const PODCAST_FEEDS: readonly { url: string; name: string }[] = [ + { url: 'https://www.dotnetrocks.com/feed', name: '.NET Rocks' }, + { url: 'https://feeds.simplecast.com/gvtxUiIf', name: 'Hanselminutes' }, + { url: 'https://thedotnetcorepodcast.libsyn.com/rss', name: 'The .NET Core Podcast' }, + { url: 'https://www.codingblocks.net/feed/podcast', name: 'Coding Blocks' }, + { url: 'https://6figuredev.com/feed/podcast', name: 'The 6 Figure Developer' }, + { url: 'https://feeds.simplecast.com/GDyuEEo6', name: 'Adventures in .NET' }, +] as const; + +export class PodcastSourceAdapter implements SourceAdapter { + readonly name = 'podcasts'; + readonly displayName = 'Podcasts'; + readonly channel: Channel = 'podcast'; + + async validate(): Promise { + if (PODCAST_FEEDS.length === 0) { + return { valid: false, reason: 'No podcast feeds configured' }; + } + return { valid: true }; + } + + async discover(state: RunState): Promise { + const parser = new RssParser(); + const results: DiscoveryResult[] = []; + const sinceDate = new Date(state.last_run); + + for (const feed of PODCAST_FEEDS) { + try { + console.log(` 🎙️ Fetching Podcast: ${feed.name}`); + const parsed = await parser.parseURL(feed.url); + + const items: ContentItem[] = []; + for (const entry of parsed.items ?? []) { + const pubDate = entry.pubDate ? new Date(entry.pubDate) : null; + + if (pubDate && pubDate < sinceDate) continue; + + const title = entry.title ?? 'Untitled Episode'; + const url = entry.link ?? entry.enclosure?.url ?? ''; + if (!url) continue; + + const text = `${title} ${entry.contentSnippet ?? ''} ${entry.content ?? ''}`.toLowerCase(); + if (!isAspireRelated(text)) continue; + if (isExcluded(text)) continue; + + const canonicalId = generateCanonicalId(title, url, feed.name, entry.pubDate ?? null); + + items.push({ + canonical_id: canonicalId, + title, + url, + type: 'blog', + channel: 'podcast', + published_at: entry.pubDate ?? null, + author: feed.name, + summary: truncate(entry.contentSnippet ?? '', 300), + tags: { + topic: extractTopics(text), + audience: ['intermediate'], + signal: ['adoption'], + confidence: 'medium', + actionability: 'investigate', + }, + provenance: { + discovered_from: `podcast:${feed.name}`, + discovered_query: null, + source_first_seen: new Date().toISOString(), + raw_evidence_path: null, + }, + dedupe: { + is_duplicate: false, + duplicate_of: null, + duplicate_reason: null, + }, + }); + } + + if (items.length > 0) { + results.push({ items, source: `podcast:${feed.name}` }); + } + } catch (err) { + const message = err instanceof Error ? err.message : String(err); + console.warn(` ⚠️ Podcast feed ${feed.name} failed: ${message}`); + } + } + + return results; + } +} + +function extractTopics(text: string): string[] { + const topicKeywords: Record = { + apphost: ['apphost', 'app host'], + dashboard: ['dashboard'], + integrations: ['integration'], + k8s: ['kubernetes', 'k8s'], + aca: ['azure container app', 'aca'], + otel: ['opentelemetry', 'otel'], + postgres: ['postgres', 'postgresql'], + redis: ['redis'], + dapr: ['dapr'], + auth: ['auth', 'authentication', 'identity'], + caching: ['cache', 'caching'], + dotnet: ['.net', 'dotnet', 'c#', 'csharp'], + typescript: ['typescript'], + python: ['python'], + docker: ['docker', 'container'], + deploy: ['deploy', 'deployment'], + }; + + const found: string[] = []; + const lower = text.toLowerCase(); + + for (const [topic, keywords] of Object.entries(topicKeywords)) { + if (keywords.some((k) => lower.includes(k))) { + found.push(topic); + } + } + + return found.length > 0 ? found : ['aspire']; +} diff --git a/src/discovery/adapters/youtube.ts b/src/discovery/adapters/youtube.ts new file mode 100644 index 0000000..4293f1f --- /dev/null +++ b/src/discovery/adapters/youtube.ts @@ -0,0 +1,253 @@ +/** + * YouTubeSourceAdapter — YouTube discovery implementation. + * Fetches videos from YouTube Data API v3 with two-step discovery: + * 1. Search for videos matching Aspire-related queries + * 2. Fetch detailed statistics for matched videos + */ + +import type { Channel, ContentItem, ContentType, DiscoveryResult, RunState, Signal } from '../../types.js'; +import type { AdapterValidation, SourceAdapter } from './types.js'; +import { generateCanonicalId, isExcluded, truncate } from './helpers.js'; + +const SEARCH_QUERIES: readonly string[] = ['dotnet aspire', '.net aspire', 'aspire dotnet'] as const; + +const SEARCH_API = 'https://www.googleapis.com/youtube/v3/search'; +const VIDEOS_API = 'https://www.googleapis.com/youtube/v3/videos'; + +const QUOTA_LIMIT = 10000; +const QUOTA_WARNING_THRESHOLD = 0.8; + +interface YouTubeSearchItem { + id: { + videoId: string; + }; + snippet: { + title: string; + description: string; + channelTitle: string; + publishedAt: string; + }; +} + +interface YouTubeSearchResponse { + items: YouTubeSearchItem[]; +} + +interface YouTubeVideoItem { + id: string; + snippet: { + title: string; + description: string; + channelTitle: string; + publishedAt: string; + }; + statistics: { + viewCount: string; + likeCount?: string; + commentCount?: string; + }; + contentDetails: { + duration: string; + }; +} + +interface YouTubeVideosResponse { + items: YouTubeVideoItem[]; +} + +export class YouTubeSourceAdapter implements SourceAdapter { + readonly name = 'youtube'; + readonly displayName = 'YouTube'; + readonly channel: Channel = 'youtube'; + + private quotaUsed = 0; + + async validate(): Promise { + if (!process.env['YOUTUBE_API_KEY']) { + return { valid: false, reason: 'YOUTUBE_API_KEY environment variable required' }; + } + return { valid: true }; + } + + async discover(state: RunState): Promise { + const apiKey = process.env['YOUTUBE_API_KEY']!; + const results: DiscoveryResult[] = []; + const sinceDate = new Date(state.last_run); + const videoIds = new Set(); + + for (const query of SEARCH_QUERIES) { + try { + console.log(` 📡 Searching YouTube: "${query}"`); + + const publishedAfter = sinceDate.toISOString(); + const searchUrl = `${SEARCH_API}?part=snippet&q=${encodeURIComponent(query)}&type=video&key=${apiKey}&maxResults=50&order=date&publishedAfter=${publishedAfter}`; + + const searchResponse = await fetch(searchUrl); + this.quotaUsed += 100; + + if (!searchResponse.ok) { + if (searchResponse.status === 403) { + console.warn(` ⚠️ YouTube quota exceeded (${this.quotaUsed}/${QUOTA_LIMIT} units used)`); + break; + } + console.warn(` ⚠️ YouTube search failed: ${searchResponse.status}`); + continue; + } + + const searchData = (await searchResponse.json()) as YouTubeSearchResponse; + + for (const item of searchData.items ?? []) { + if (item.id?.videoId) { + videoIds.add(item.id.videoId); + } + } + + this.checkQuota(); + } catch (err) { + const message = err instanceof Error ? err.message : String(err); + console.warn(` ⚠️ YouTube search "${query}" failed: ${message}`); + } + } + + if (videoIds.size === 0) { + return results; + } + + const videoIdArray = Array.from(videoIds); + const items: ContentItem[] = []; + + for (let i = 0; i < videoIdArray.length; i += 50) { + const batch = videoIdArray.slice(i, i + 50); + const videoIds = batch.join(','); + + try { + const videosUrl = `${VIDEOS_API}?part=snippet,statistics,contentDetails&id=${videoIds}&key=${apiKey}`; + + const videosResponse = await fetch(videosUrl); + this.quotaUsed += 1; + + if (!videosResponse.ok) { + if (videosResponse.status === 403) { + console.warn(` ⚠️ YouTube quota exceeded (${this.quotaUsed}/${QUOTA_LIMIT} units used)`); + break; + } + console.warn(` ⚠️ YouTube videos API failed: ${videosResponse.status}`); + continue; + } + + const videosData = (await videosResponse.json()) as YouTubeVideosResponse; + + for (const video of videosData.items ?? []) { + const title = video.snippet.title; + const description = video.snippet.description ?? ''; + const text = `${title} ${description}`.toLowerCase(); + + if (isExcluded(text)) continue; + + const videoUrl = `https://www.youtube.com/watch?v=${video.id}`; + const canonicalId = generateCanonicalId( + title, + videoUrl, + video.snippet.channelTitle, + video.snippet.publishedAt, + ); + + items.push({ + canonical_id: canonicalId, + title, + url: videoUrl, + type: 'video' as ContentType, + channel: 'youtube', + published_at: video.snippet.publishedAt, + author: video.snippet.channelTitle, + summary: truncate(description, 300), + tags: { + topic: extractTopics(text), + audience: ['intermediate'], + signal: inferYouTubeSignal(title, description), + confidence: 'medium', + actionability: 'investigate', + }, + provenance: { + discovered_from: 'youtube:search', + discovered_query: null, + source_first_seen: new Date().toISOString(), + raw_evidence_path: null, + }, + dedupe: { + is_duplicate: false, + duplicate_of: null, + duplicate_reason: null, + }, + }); + } + + this.checkQuota(); + } catch (err) { + const message = err instanceof Error ? err.message : String(err); + console.warn(` ⚠️ YouTube videos fetch failed: ${message}`); + } + } + + if (items.length > 0) { + results.push({ items, source: 'youtube:search' }); + } + + console.log(` ✓ YouTube discovered ${items.length} videos (quota: ${this.quotaUsed}/${QUOTA_LIMIT} units)`); + + return results; + } + + private checkQuota(): void { + if (this.quotaUsed >= QUOTA_LIMIT * QUOTA_WARNING_THRESHOLD) { + console.warn(` ⚠️ YouTube quota warning: ${this.quotaUsed}/${QUOTA_LIMIT} units used (${Math.round((this.quotaUsed / QUOTA_LIMIT) * 100)}%)`); + } + } +} + +function inferYouTubeSignal(title: string, description: string): Signal[] { + const text = `${title} ${description}`.toLowerCase(); + + if (text.includes('release') || text.includes('announcement') || text.includes('new version')) return ['release']; + if (text.includes('tutorial') || text.includes('how to') || text.includes('guide') || text.includes('demo')) return ['tutorial']; + if (text.includes('getting started') || text.includes('introduction') || text.includes('intro to')) return ['tutorial']; + if (text.includes('issue') || text.includes('problem') || text.includes('error') || text.includes('bug')) return ['complaint']; + if (text.includes('feature') || text.includes('new in')) return ['release']; + if (text.includes('deploy') || text.includes('production') || text.includes('using aspire')) return ['adoption']; + + return ['other']; +} + +function extractTopics(text: string): string[] { + const topicKeywords: Record = { + apphost: ['apphost', 'app host'], + dashboard: ['dashboard'], + integrations: ['integration'], + k8s: ['kubernetes', 'k8s'], + aca: ['azure container app', 'aca'], + otel: ['opentelemetry', 'otel'], + postgres: ['postgres', 'postgresql'], + redis: ['redis'], + dapr: ['dapr'], + auth: ['auth', 'authentication', 'identity'], + caching: ['cache', 'caching'], + dotnet: ['.net', 'dotnet', 'c#', 'csharp'], + typescript: ['typescript'], + python: ['python'], + docker: ['docker', 'container'], + deploy: ['deploy', 'deployment'], + azure: ['azure'], + aws: ['aws'], + }; + + const found: string[] = []; + const lower = text.toLowerCase(); + + for (const [topic, keywords] of Object.entries(topicKeywords)) { + if (keywords.some((k) => lower.includes(k))) { + found.push(topic); + } + } + + return found.length > 0 ? found : ['aspire']; +}