From 71e557bcea2a7d5179dc0cf97bb27a79f3a0abf8 Mon Sep 17 00:00:00 2001
From: bradygaster <bradyg@microsoft.com>
Date: Thu, 12 Mar 2026 02:52:48 -0700
Subject: [PATCH] feat: add Phase 2 discovery adapters (YouTube, Podcasts)

- YouTubeSourceAdapter (#5): YouTube Data API v3 with two-step search+details,
  quota tracking (10K units/day), YOUTUBE_API_KEY validation
- PodcastSourceAdapter (#3): RSS-based podcast discovery from 6 curated .NET
  podcasts (.NET Rocks, Hanselminutes, etc.) with per-feed error handling

All 8 discovery adapters now registered in createDefaultRegistry().
Projected signal coverage: ~82% of Aspire community conversation.

Closes #3, closes #5

Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com>
---
 src/discovery/adapters/index.ts   |   6 +
 src/discovery/adapters/podcast.ts | 131 ++++++++++++++++
 src/discovery/adapters/youtube.ts | 253 ++++++++++++++++++++++++++++++
 3 files changed, 390 insertions(+)
 create mode 100644 src/discovery/adapters/podcast.ts
 create mode 100644 src/discovery/adapters/youtube.ts

diff --git a/src/discovery/adapters/index.ts b/src/discovery/adapters/index.ts
index 7b99560..f4e1443 100644
--- a/src/discovery/adapters/index.ts
+++ b/src/discovery/adapters/index.ts
@@ -12,6 +12,8 @@ export { RedditSourceAdapter } from './reddit.js';
 export { DevToSourceAdapter } from './devto.js';
 export { StackOverflowSourceAdapter } from './stackoverflow.js';
 export { GitHubDiscussionsSourceAdapter } from './discussions.js';
+export { YouTubeSourceAdapter } from './youtube.js';
+export { PodcastSourceAdapter } from './podcast.js';
 
 import { SourceRegistry } from './registry.js';
 import { RSSSourceAdapter } from './rss.js';
@@ -20,6 +22,8 @@ import { RedditSourceAdapter } from './reddit.js';
 import { DevToSourceAdapter } from './devto.js';
 import { StackOverflowSourceAdapter } from './stackoverflow.js';
 import { GitHubDiscussionsSourceAdapter } from './discussions.js';
+import { YouTubeSourceAdapter } from './youtube.js';
+import { PodcastSourceAdapter } from './podcast.js';
 
 export function createDefaultRegistry(): SourceRegistry {
   const registry = new SourceRegistry();
@@ -29,5 +33,7 @@ export function createDefaultRegistry(): SourceRegistry {
   registry.register(new DevToSourceAdapter());
   registry.register(new StackOverflowSourceAdapter());
   registry.register(new GitHubDiscussionsSourceAdapter());
+  registry.register(new YouTubeSourceAdapter());
+  registry.register(new PodcastSourceAdapter());
   return registry;
 }
diff --git a/src/discovery/adapters/podcast.ts b/src/discovery/adapters/podcast.ts
new file mode 100644
index 0000000..7b7c6d0
--- /dev/null
+++ b/src/discovery/adapters/podcast.ts
@@ -0,0 +1,131 @@
+/**
+ * PodcastSourceAdapter — Podcast RSS feed discovery implementation.
+ * Discovers .NET Aspire mentions in podcast episodes from curated feeds.
+ */
+
+import RssParser from 'rss-parser';
+import type { Channel, ContentItem, DiscoveryResult, RunState } from '../../types.js';
+import type { AdapterValidation, SourceAdapter } from './types.js';
+import { generateCanonicalId, isAspireRelated, isExcluded, truncate } from './helpers.js';
+
+const PODCAST_FEEDS: readonly { url: string; name: string }[] = [
+  { url: 'https://www.dotnetrocks.com/feed', name: '.NET Rocks' },
+  { url: 'https://feeds.simplecast.com/gvtxUiIf', name: 'Hanselminutes' },
+  { url: 'https://thedotnetcorepodcast.libsyn.com/rss', name: 'The .NET Core Podcast' },
+  { url: 'https://www.codingblocks.net/feed/podcast', name: 'Coding Blocks' },
+  { url: 'https://6figuredev.com/feed/podcast', name: 'The 6 Figure Developer' },
+  { url: 'https://feeds.simplecast.com/GDyuEEo6', name: 'Adventures in .NET' },
+] as const;
+
+export class PodcastSourceAdapter implements SourceAdapter {
+  readonly name = 'podcasts';
+  readonly displayName = 'Podcasts';
+  readonly channel: Channel = 'podcast';
+
+  async validate(): Promise<AdapterValidation> {
+    if (PODCAST_FEEDS.length === 0) {
+      return { valid: false, reason: 'No podcast feeds configured' };
+    }
+    return { valid: true };
+  }
+
+  async discover(state: RunState): Promise<DiscoveryResult[]> {
+    const parser = new RssParser();
+    const results: DiscoveryResult[] = [];
+    const sinceDate = new Date(state.last_run);
+
+    for (const feed of PODCAST_FEEDS) {
+      try {
+        console.log(`  🎙️  Fetching Podcast: ${feed.name}`);
+        const parsed = await parser.parseURL(feed.url);
+
+        const items: ContentItem[] = [];
+        for (const entry of parsed.items ?? []) {
+          const pubDate = entry.pubDate ? new Date(entry.pubDate) : null;
+
+          if (pubDate && pubDate < sinceDate) continue;
+
+          const title = entry.title ?? 'Untitled Episode';
+          const url = entry.link ?? entry.enclosure?.url ?? '';
+          if (!url) continue;
+
+          const text = `${title} ${entry.contentSnippet ?? ''} ${entry.content ?? ''}`.toLowerCase();
+          if (!isAspireRelated(text)) continue;
+          if (isExcluded(text)) continue;
+
+          const canonicalId = generateCanonicalId(title, url, feed.name, entry.pubDate ?? null);
+
+          items.push({
+            canonical_id: canonicalId,
+            title,
+            url,
+            type: 'blog',
+            channel: 'podcast',
+            published_at: entry.pubDate ?? null,
+            author: feed.name,
+            summary: truncate(entry.contentSnippet ?? '', 300),
+            tags: {
+              topic: extractTopics(text),
+              audience: ['intermediate'],
+              signal: ['adoption'],
+              confidence: 'medium',
+              actionability: 'investigate',
+            },
+            provenance: {
+              discovered_from: `podcast:${feed.name}`,
+              discovered_query: null,
+              source_first_seen: new Date().toISOString(),
+              raw_evidence_path: null,
+            },
+            dedupe: {
+              is_duplicate: false,
+              duplicate_of: null,
+              duplicate_reason: null,
+            },
+          });
+        }
+
+        if (items.length > 0) {
+          results.push({ items, source: `podcast:${feed.name}` });
+        }
+      } catch (err) {
+        const message = err instanceof Error ? err.message : String(err);
+        console.warn(`  ⚠️  Podcast feed ${feed.name} failed: ${message}`);
+      }
+    }
+
+    return results;
+  }
+}
+
+function extractTopics(text: string): string[] {
+  const topicKeywords: Record<string, string[]> = {
+    apphost: ['apphost', 'app host'],
+    dashboard: ['dashboard'],
+    integrations: ['integration'],
+    k8s: ['kubernetes', 'k8s'],
+    aca: ['azure container app', 'aca'],
+    otel: ['opentelemetry', 'otel'],
+    postgres: ['postgres', 'postgresql'],
+    redis: ['redis'],
+    dapr: ['dapr'],
+    auth: ['auth', 'authentication', 'identity'],
+    caching: ['cache', 'caching'],
+    dotnet: ['.net', 'dotnet', 'c#', 'csharp'],
+    typescript: ['typescript'],
+    python: ['python'],
+    docker: ['docker', 'container'],
+    deploy: ['deploy', 'deployment'],
+  };
+
+  const found: string[] = [];
+  const lower = text.toLowerCase();
+
+  for (const [topic, keywords] of Object.entries(topicKeywords)) {
+    if (keywords.some((k) => lower.includes(k))) {
+      found.push(topic);
+    }
+  }
+
+  return found.length > 0 ? found : ['aspire'];
+}
diff --git a/src/discovery/adapters/youtube.ts b/src/discovery/adapters/youtube.ts
new file mode 100644
index 0000000..4293f1f
--- /dev/null
+++ b/src/discovery/adapters/youtube.ts
@@ -0,0 +1,253 @@
+/**
+ * YouTubeSourceAdapter — YouTube discovery implementation.
+ * Fetches videos from YouTube Data API v3 with two-step discovery:
+ * 1. Search for videos matching Aspire-related queries
+ * 2. Fetch detailed statistics for matched videos
+ */
+
+import type { Channel, ContentItem, ContentType, DiscoveryResult, RunState, Signal } from '../../types.js';
+import type { AdapterValidation, SourceAdapter } from './types.js';
+import { generateCanonicalId, isExcluded, truncate } from './helpers.js';
+
+const SEARCH_QUERIES: readonly string[] = ['dotnet aspire', '.net aspire', 'aspire dotnet'] as const;
+
+const SEARCH_API = 'https://www.googleapis.com/youtube/v3/search';
+const VIDEOS_API = 'https://www.googleapis.com/youtube/v3/videos';
+
+const QUOTA_LIMIT = 10000;
+const QUOTA_WARNING_THRESHOLD = 0.8;
+
+interface YouTubeSearchItem {
+  id: {
+    videoId: string;
+  };
+  snippet: {
+    title: string;
+    description: string;
+    channelTitle: string;
+    publishedAt: string;
+  };
+}
+
+interface YouTubeSearchResponse {
+  items: YouTubeSearchItem[];
+}
+
+interface YouTubeVideoItem {
+  id: string;
+  snippet: {
+    title: string;
+    description: string;
+    channelTitle: string;
+    publishedAt: string;
+  };
+  statistics: {
+    viewCount: string;
+    likeCount?: string;
+    commentCount?: string;
+  };
+  contentDetails: {
+    duration: string;
+  };
+}
+
+interface YouTubeVideosResponse {
+  items: YouTubeVideoItem[];
+}
+
+export class YouTubeSourceAdapter implements SourceAdapter {
+  readonly name = 'youtube';
+  readonly displayName = 'YouTube';
+  readonly channel: Channel = 'youtube';
+
+  private quotaUsed = 0;
+
+  async validate(): Promise<AdapterValidation> {
+    if (!process.env['YOUTUBE_API_KEY']) {
+      return { valid: false, reason: 'YOUTUBE_API_KEY environment variable required' };
+    }
+    return { valid: true };
+  }
+
+  async discover(state: RunState): Promise<DiscoveryResult[]> {
+    const apiKey = process.env['YOUTUBE_API_KEY']!;
+    const results: DiscoveryResult[] = [];
+    const sinceDate = new Date(state.last_run);
+    const videoIds = new Set<string>();
+
+    for (const query of SEARCH_QUERIES) {
+      try {
+        console.log(`  📡 Searching YouTube: "${query}"`);
+
+        const publishedAfter = sinceDate.toISOString();
+        const searchUrl = `${SEARCH_API}?part=snippet&q=${encodeURIComponent(query)}&type=video&key=${apiKey}&maxResults=50&order=date&publishedAfter=${publishedAfter}`;
+
+        const searchResponse = await fetch(searchUrl);
+        this.quotaUsed += 100;
+
+        if (!searchResponse.ok) {
+          if (searchResponse.status === 403) {
+            console.warn(`  ⚠️  YouTube quota exceeded (${this.quotaUsed}/${QUOTA_LIMIT} units used)`);
+            break;
+          }
+          console.warn(`  ⚠️  YouTube search failed: ${searchResponse.status}`);
+          continue;
+        }
+
+        const searchData = (await searchResponse.json()) as YouTubeSearchResponse;
+
+        for (const item of searchData.items ?? []) {
+          if (item.id?.videoId) {
+            videoIds.add(item.id.videoId);
+          }
+        }
+
+        this.checkQuota();
+      } catch (err) {
+        const message = err instanceof Error ? err.message : String(err);
+        console.warn(`  ⚠️  YouTube search "${query}" failed: ${message}`);
+      }
+    }
+
+    if (videoIds.size === 0) {
+      return results;
+    }
+
+    const videoIdArray = Array.from(videoIds);
+    const items: ContentItem[] = [];
+
+    for (let i = 0; i < videoIdArray.length; i += 50) {
+      const batch = videoIdArray.slice(i, i + 50);
+      const videoIds = batch.join(',');
+
+      try {
+        const videosUrl = `${VIDEOS_API}?part=snippet,statistics,contentDetails&id=${videoIds}&key=${apiKey}`;
+
+        const videosResponse = await fetch(videosUrl);
+        this.quotaUsed += 1;
+
+        if (!videosResponse.ok) {
+          if (videosResponse.status === 403) {
+            console.warn(`  ⚠️  YouTube quota exceeded (${this.quotaUsed}/${QUOTA_LIMIT} units used)`);
+            break;
+          }
+          console.warn(`  ⚠️  YouTube videos API failed: ${videosResponse.status}`);
+          continue;
+        }
+
+        const videosData = (await videosResponse.json()) as YouTubeVideosResponse;
+
+        for (const video of videosData.items ?? []) {
+          const title = video.snippet.title;
+          const description = video.snippet.description ?? '';
+          const text = `${title} ${description}`.toLowerCase();
+
+          if (isExcluded(text)) continue;
+
+          const videoUrl = `https://www.youtube.com/watch?v=${video.id}`;
+          const canonicalId = generateCanonicalId(
+            title,
+            videoUrl,
+            video.snippet.channelTitle,
+            video.snippet.publishedAt,
+          );
+
+          items.push({
+            canonical_id: canonicalId,
+            title,
+            url: videoUrl,
+            type: 'video' as ContentType,
+            channel: 'youtube',
+            published_at: video.snippet.publishedAt,
+            author: video.snippet.channelTitle,
+            summary: truncate(description, 300),
+            tags: {
+              topic: extractTopics(text),
+              audience: ['intermediate'],
+              signal: inferYouTubeSignal(title, description),
+              confidence: 'medium',
+              actionability: 'investigate',
+            },
+            provenance: {
+              discovered_from: 'youtube:search',
+              discovered_query: null,
+              source_first_seen: new Date().toISOString(),
+              raw_evidence_path: null,
+            },
+            dedupe: {
+              is_duplicate: false,
+              duplicate_of: null,
+              duplicate_reason: null,
+            },
+          });
+        }
+
+        this.checkQuota();
+      } catch (err) {
+        const message = err instanceof Error ? err.message : String(err);
+        console.warn(`  ⚠️  YouTube videos fetch failed: ${message}`);
+      }
+    }
+
+    if (items.length > 0) {
+      results.push({ items, source: 'youtube:search' });
+    }
+
+    console.log(`  ✓ YouTube discovered ${items.length} videos (quota: ${this.quotaUsed}/${QUOTA_LIMIT} units)`);
+
+    return results;
+  }
+
+  private checkQuota(): void {
+    if (this.quotaUsed >= QUOTA_LIMIT * QUOTA_WARNING_THRESHOLD) {
+      console.warn(`  ⚠️  YouTube quota warning: ${this.quotaUsed}/${QUOTA_LIMIT} units used (${Math.round((this.quotaUsed / QUOTA_LIMIT) * 100)}%)`);
+    }
+  }
+}
+
+function inferYouTubeSignal(title: string, description: string): Signal[] {
+  const text = `${title} ${description}`.toLowerCase();
+
+  if (text.includes('release') || text.includes('announcement') || text.includes('new version')) return ['release'];
+  if (text.includes('tutorial') || text.includes('how to') || text.includes('guide') || text.includes('demo')) return ['tutorial'];
+  if (text.includes('getting started') || text.includes('introduction') || text.includes('intro to')) return ['tutorial'];
+  if (text.includes('issue') || text.includes('problem') || text.includes('error') || text.includes('bug')) return ['complaint'];
+  if (text.includes('feature') || text.includes('new in')) return ['release'];
+  if (text.includes('deploy') || text.includes('production') || text.includes('using aspire')) return ['adoption'];
+
+  return ['other'];
+}
+
+function extractTopics(text: string): string[] {
+  const topicKeywords: Record<string, string[]> = {
+    apphost: ['apphost', 'app host'],
+    dashboard: ['dashboard'],
+    integrations: ['integration'],
+    k8s: ['kubernetes', 'k8s'],
+    aca: ['azure container app', 'aca'],
+    otel: ['opentelemetry', 'otel'],
+    postgres: ['postgres', 'postgresql'],
+    redis: ['redis'],
+    dapr: ['dapr'],
+    auth: ['auth', 'authentication', 'identity'],
+    caching: ['cache', 'caching'],
+    dotnet: ['.net', 'dotnet', 'c#', 'csharp'],
+    typescript: ['typescript'],
+    python: ['python'],
+    docker: ['docker', 'container'],
+    deploy: ['deploy', 'deployment'],
+    azure: ['azure'],
+    aws: ['aws'],
+  };
+
+  const found: string[] = [];
+  const lower = text.toLowerCase();
+
+  for (const [topic, keywords] of Object.entries(topicKeywords)) {
+    if (keywords.some((k) => lower.includes(k))) {
+      found.push(topic);
+    }
+  }
+
+  return found.length > 0 ? found : ['aspire'];
+}