diff --git a/web/package-lock.json b/web/package-lock.json
index 2e09f5e..4d18f39 100644
--- a/web/package-lock.json
+++ b/web/package-lock.json
@@ -9,6 +9,7 @@
"version": "1.0.0",
"dependencies": {
"@tanstack/react-virtual": "^3.13.13",
+ "@technical-1/email-archive-parser": "^3.0.0",
"date-fns": "^4.1.0",
"dexie": "^4.2.1",
"dompurify": "^3.4.7",
@@ -1886,6 +1887,18 @@
"url": "https://github.com/sponsors/tannerlinsley"
}
},
+ "node_modules/@technical-1/email-archive-parser": {
+ "version": "3.0.0",
+ "resolved": "https://registry.npmjs.org/@technical-1/email-archive-parser/-/email-archive-parser-3.0.0.tgz",
+ "integrity": "sha512-kYhvOfA10b1izX30rKyBe9ugDcZEvD8B9F2NnZt1NviSfh88otuZtgNl7Ik36XG5Omkd58mYCPnFDfNuin2wWg==",
+ "license": "MIT",
+ "dependencies": {
+ "jszip": "^3.10.1"
+ },
+ "engines": {
+ "node": ">=16.0.0"
+ }
+ },
"node_modules/@testing-library/dom": {
"version": "10.4.1",
"resolved": "https://registry.npmjs.org/@testing-library/dom/-/dom-10.4.1.tgz",
diff --git a/web/package.json b/web/package.json
index df52137..e5f0c52 100644
--- a/web/package.json
+++ b/web/package.json
@@ -14,6 +14,7 @@
},
"dependencies": {
"@tanstack/react-virtual": "^3.13.13",
+ "@technical-1/email-archive-parser": "^3.0.0",
"date-fns": "^4.1.0",
"dexie": "^4.2.1",
"dompurify": "^3.4.7",
diff --git a/web/src/__tests__/phase-7/mboxParser.test.ts b/web/src/__tests__/phase-7/mboxParser.test.ts
deleted file mode 100644
index fd5f0bd..0000000
--- a/web/src/__tests__/phase-7/mboxParser.test.ts
+++ /dev/null
@@ -1,129 +0,0 @@
-import { describe, it, expect } from 'vitest';
-import { mboxParser } from '../../services/mboxParser';
-
-describe('MBOXParser', () => {
- describe('isMBOXFile', () => {
- it('should identify .mbox files', () => {
- const file = new File([''], 'test.mbox', { type: 'application/mbox' });
- expect(mboxParser.isMBOXFile(file)).toBe(true);
- });
-
- it('should identify .mbx files', () => {
- const file = new File([''], 'inbox.mbx');
- expect(mboxParser.isMBOXFile(file)).toBe(true);
- });
-
- it('should not identify other file types', () => {
- const file = new File([''], 'emails.json', { type: 'application/json' });
- expect(mboxParser.isMBOXFile(file)).toBe(false);
- });
- });
-
- describe('parseMBOXFile', () => {
- it('should parse a simple MBOX email', async () => {
- const mboxContent = `From sender@example.com Mon Jan 01 00:00:00 2024
-From: John Doe
-To: jane@example.com
-Subject: Test Email
-Date: Mon, 01 Jan 2024 12:00:00 +0000
-
-This is the email body.
-`;
-
- const file = new File([mboxContent], 'test.mbox', { type: 'application/mbox' });
- const emails = await mboxParser.parseMBOXFile(file);
-
- expect(emails).toHaveLength(1);
- expect(emails[0].subject).toBe('Test Email');
- expect(emails[0].sender).toBe('john@example.com');
- expect(emails[0].body).toBe('This is the email body.');
- });
-
- it('should parse multiple emails', async () => {
- const mboxContent = `From sender1@example.com Mon Jan 01 00:00:00 2024
-From: sender1@example.com
-Subject: Email 1
-Date: Mon, 01 Jan 2024 12:00:00 +0000
-
-Body 1
-From sender2@example.com Tue Jan 02 00:00:00 2024
-From: sender2@example.com
-Subject: Email 2
-Date: Tue, 02 Jan 2024 12:00:00 +0000
-
-Body 2
-`;
-
- const file = new File([mboxContent], 'test.mbox', { type: 'application/mbox' });
- const emails = await mboxParser.parseMBOXFile(file);
-
- expect(emails).toHaveLength(2);
- expect(emails[0].subject).toBe('Email 1');
- expect(emails[1].subject).toBe('Email 2');
- });
-
- it('should handle emails with multiple recipients', async () => {
- const mboxContent = `From sender@example.com Mon Jan 01 00:00:00 2024
-From: sender@example.com
-To: user1@example.com, user2@example.com, user3@example.com
-Subject: Group Email
-Date: Mon, 01 Jan 2024 12:00:00 +0000
-
-Hello everyone!
-`;
-
- const file = new File([mboxContent], 'test.mbox', { type: 'application/mbox' });
- const emails = await mboxParser.parseMBOXFile(file);
-
- expect(emails).toHaveLength(1);
- expect(emails[0].recipients).toHaveLength(3);
- expect(emails[0].recipients).toContain('user1@example.com');
- expect(emails[0].recipients).toContain('user2@example.com');
- expect(emails[0].recipients).toContain('user3@example.com');
- });
-
- it('should handle quoted-printable encoding', async () => {
- const mboxContent = `From sender@example.com Mon Jan 01 00:00:00 2024
-From: sender@example.com
-Subject: Encoded Email
-Content-Transfer-Encoding: quoted-printable
-Date: Mon, 01 Jan 2024 12:00:00 +0000
-
-Hello=20World
-`;
-
- const file = new File([mboxContent], 'test.mbox', { type: 'application/mbox' });
- const emails = await mboxParser.parseMBOXFile(file);
-
- expect(emails).toHaveLength(1);
- expect(emails[0].body).toBe('Hello World');
- });
-
- it('should handle empty MBOX file', async () => {
- const file = new File([''], 'empty.mbox', { type: 'application/mbox' });
- const emails = await mboxParser.parseMBOXFile(file);
- expect(emails).toHaveLength(0);
- });
-
- it('should report progress during parsing', async () => {
- const mboxContent = `From sender@example.com Mon Jan 01 00:00:00 2024
-From: sender@example.com
-Subject: Test
-Date: Mon, 01 Jan 2024 12:00:00 +0000
-
-Body
-`;
-
- const file = new File([mboxContent], 'test.mbox', { type: 'application/mbox' });
- const progressUpdates: number[] = [];
-
- await mboxParser.parseMBOXFile(file, (progress) => {
- progressUpdates.push(progress);
- });
-
- expect(progressUpdates.length).toBeGreaterThan(0);
- expect(progressUpdates[progressUpdates.length - 1]).toBe(100);
- });
- });
-});
-
diff --git a/web/src/__tests__/phase-9/accountDetector.detect.test.ts b/web/src/__tests__/phase-9/accountDetector.detect.test.ts
deleted file mode 100644
index 6fd172d..0000000
--- a/web/src/__tests__/phase-9/accountDetector.detect.test.ts
+++ /dev/null
@@ -1,125 +0,0 @@
-import { describe, it, expect } from 'vitest';
-import type { Email } from '../../types';
-import { accountDetector } from '../../services/accountDetector';
-
-// Behavioral coverage for the primary detectAccountSignup() surface and the
-// createAccountFromEmail() factory. The existing accountDetector.domain test
-// only covers substring/subdomain matching (issue 5), so this fills the
-// happy-path + confidence-threshold gaps.
-
-const email = (overrides: Partial = {}): Email => ({
- id: 1,
- subject: 'Hello',
- sender: 'someone@example.com',
- senderName: undefined,
- recipients: ['me@example.com'],
- date: new Date('2024-01-01'),
- body: 'plain message',
- attachments: [],
- size: 100,
- isRead: true,
- isStarred: false,
- folderId: 'inbox',
- emailType: 'regular',
- ...overrides,
-});
-
-describe('AccountDetector.detectAccountSignup', () => {
- it('detects a known service with a welcome subject (known + subject = 80)', () => {
- const result = accountDetector.detectAccountSignup(
- email({
- sender: 'info@netflix.com',
- subject: 'Welcome to Netflix',
- body: 'Your account has been created. Thanks for signing up!',
- })
- );
-
- expect(result.type).toBe('account');
- expect(result.confidence).toBeGreaterThanOrEqual(70);
- expect(result.data?.serviceName).toBe('Netflix');
- expect(result.data?.serviceType).toBe('streaming');
- });
-
- it('detects an unknown service via strong subject + body and falls back to the domain name', () => {
- const result = accountDetector.detectAccountSignup(
- email({
- sender: 'hi@coolapp.io',
- subject: 'Verify your email',
- body: 'Click here to verify your email and finish setup.',
- })
- );
-
- expect(result.type).toBe('account');
- // No known service, no extractable name in subject -> formatted domain.
- expect(result.data?.serviceName).toBe('Coolapp');
- expect(result.data?.serviceType).toBe('other');
- });
-
- it('extracts the service name from a "Welcome to X!" subject for an unknown domain', () => {
- const result = accountDetector.detectAccountSignup(
- email({
- sender: 'team@mailer.acmewidgets.com',
- subject: 'Welcome to Acme!',
- body: 'Thanks for signing up. Your account has been created.',
- })
- );
-
- expect(result.type).toBe('account');
- expect(result.data?.serviceName).toBe('Acme');
- });
-
- it('does NOT flag a regular personal email as an account signup', () => {
- const result = accountDetector.detectAccountSignup(
- email({
- sender: 'friend@gmail.com',
- subject: 'lunch tomorrow?',
- body: 'wanna grab lunch around noon?',
- })
- );
-
- expect(result.type).toBe('none');
- expect(result.confidence).toBe(0);
- });
-
- it('does NOT flag a known service email that lacks any signup language (known alone = 40 < 70)', () => {
- const result = accountDetector.detectAccountSignup(
- email({
- sender: 'info@netflix.com',
- subject: 'New arrivals this week',
- body: 'Check out what is new to stream.',
- })
- );
-
- expect(result.type).toBe('none');
- });
-
- it('classifies known services into the correct serviceType', () => {
- expect(accountDetector.getServiceType('github.com')).toBe('development');
- expect(accountDetector.getServiceType('chase.com')).toBe('banking');
- expect(accountDetector.getServiceType('instagram.com')).toBe('social');
- expect(accountDetector.getServiceType('unknown-brand-xyz.com')).toBe('other');
- });
-});
-
-describe('AccountDetector.createAccountFromEmail', () => {
- it('builds an account record from the email, inferring serviceType from the domain', () => {
- const e = email({ id: 42, sender: 'noreply@github.com', date: new Date('2023-06-01') });
- const account = accountDetector.createAccountFromEmail(e, 'GitHub');
-
- expect(account.serviceName).toBe('GitHub');
- expect(account.signupEmailId).toBe(42);
- expect(account.domain).toBe('github.com');
- expect(account.serviceType).toBe('development');
- expect(account.emailCount).toBe(1);
- expect(account.signupDate).toEqual(new Date('2023-06-01'));
- });
-
- it('honors an explicit serviceType override', () => {
- const account = accountDetector.createAccountFromEmail(
- email({ sender: 'noreply@github.com' }),
- 'GitHub',
- 'other'
- );
- expect(account.serviceType).toBe('other');
- });
-});
diff --git a/web/src/__tests__/phase-9/accountDetector.domain.test.ts b/web/src/__tests__/phase-9/accountDetector.domain.test.ts
deleted file mode 100644
index bf61ddf..0000000
--- a/web/src/__tests__/phase-9/accountDetector.domain.test.ts
+++ /dev/null
@@ -1,23 +0,0 @@
-import { describe, it, expect } from 'vitest';
-import { accountDetector } from '../../services/accountDetector';
-
-describe('AccountDetector domain matching (issue 5)', () => {
- it('does NOT treat notnetflix.com as a known streaming service via "netflix"', () => {
- // 'other' is the fallback when no known service matches.
- // Bug: notnetflix.com.includes('.netflix') is true, so it incorrectly returns 'streaming'.
- expect(accountDetector.getServiceType('notnetflix.com')).toBe('other');
- });
-
- it('does NOT treat pineapple.com as Apple', () => {
- // apple.com type is 'other', so this verifies via service NAME rather than type.
- // The buggy loop causes pineapple.com to match apple.com (apple. substring match).
- // After fix, findKnownService returns null so getServiceType returns 'other' (correct reason).
- // We verify the subdomain test below proves the helper works correctly.
- expect(accountDetector.getServiceType('pineapple.com')).toBe('other');
- });
-
- it('still resolves real subdomains of a known service', () => {
- // mail.netflix.com should resolve to netflix's known type, not 'other'
- expect(accountDetector.getServiceType('mail.netflix.com')).not.toBe('other');
- });
-});
diff --git a/web/src/__tests__/phase-9/bucket-d-regression.test.tsx b/web/src/__tests__/phase-9/bucket-d-regression.test.tsx
index 1b6c272..0ab3330 100644
--- a/web/src/__tests__/phase-9/bucket-d-regression.test.tsx
+++ b/web/src/__tests__/phase-9/bucket-d-regression.test.tsx
@@ -122,8 +122,8 @@ describe('ThreadView expanded body (lazy-loaded via useLazyEmailBody)', () => {
subject: 'Thread Test',
emails: [storeEmail, email2],
participants: ['a@b.com'],
- lastMessageDate: storeEmail.date,
- firstMessageDate: storeEmail.date,
+ lastMessageDate: storeEmail.date!,
+ firstMessageDate: storeEmail.date!,
messageCount: 2,
unreadCount: 0,
hasAttachments: false,
@@ -154,8 +154,8 @@ describe('ThreadView expanded body (lazy-loaded via useLazyEmailBody)', () => {
// storeEmail is the LAST (latest) item and will be rendered expanded
emails: [olderEmail, storeEmail],
participants: ['a@b.com'],
- lastMessageDate: storeEmail.date,
- firstMessageDate: storeEmail.date,
+ lastMessageDate: storeEmail.date!,
+ firstMessageDate: storeEmail.date!,
messageCount: 2,
unreadCount: 0,
hasAttachments: false,
diff --git a/web/src/__tests__/phase-9/domainMatch.test.ts b/web/src/__tests__/phase-9/domainMatch.test.ts
deleted file mode 100644
index e5f1874..0000000
--- a/web/src/__tests__/phase-9/domainMatch.test.ts
+++ /dev/null
@@ -1,34 +0,0 @@
-import { describe, it, expect } from 'vitest';
-import { isDomainMatch } from '../../services/domainMatch';
-
-describe('isDomainMatch', () => {
- it('matches exact domain', () => {
- expect(isDomainMatch('netflix.com', 'netflix.com')).toBe(true);
- });
-
- it('matches a subdomain of the service domain', () => {
- expect(isDomainMatch('mail.netflix.com', 'netflix.com')).toBe(true);
- expect(isDomainMatch('noreply.spotify.com', 'spotify.com')).toBe(true);
- });
-
- it('does NOT match an unrelated domain that merely contains the base word', () => {
- expect(isDomainMatch('maxwell.com', 'max.com')).toBe(false);
- expect(isDomainMatch('pineapple.com', 'apple.com')).toBe(false);
- expect(isDomainMatch('php.net', 'hp.com')).toBe(false);
- });
-
- it('does NOT match when service domain is a suffix without a dot boundary', () => {
- // 'notnetflix.com' ends with 'netflix.com' as a string but not on a label boundary
- expect(isDomainMatch('notnetflix.com', 'netflix.com')).toBe(false);
- });
-
- it('is case-insensitive and trims', () => {
- expect(isDomainMatch('Mail.Netflix.COM', 'netflix.com')).toBe(true);
- expect(isDomainMatch(' netflix.com ', ' NETFLIX.COM ')).toBe(true);
- });
-
- it('returns false for empty inputs', () => {
- expect(isDomainMatch('', 'netflix.com')).toBe(false);
- expect(isDomainMatch('netflix.com', '')).toBe(false);
- });
-});
diff --git a/web/src/__tests__/phase-9/newsletterDetector.classify.test.ts b/web/src/__tests__/phase-9/newsletterDetector.classify.test.ts
deleted file mode 100644
index 1d2e3e1..0000000
--- a/web/src/__tests__/phase-9/newsletterDetector.classify.test.ts
+++ /dev/null
@@ -1,101 +0,0 @@
-import { describe, it, expect } from 'vitest';
-import type { Email } from '../../types';
-import { newsletterDetector } from '../../services/newsletterDetector';
-
-// Behavioral coverage for the newsletter-vs-promotional split, unsubscribe-link
-// extraction, and groupBySender/frequency aggregation. The existing
-// newsletterDetector.domain test covers promotional-domain matching only.
-
-const MARKETING_FOOTER =
- 'Unsubscribe here. View in browser. Privacy policy. All rights reserved.';
-
-const email = (overrides: Partial = {}): Email => ({
- id: 1,
- subject: 'Hello',
- sender: 'someone@example.com',
- recipients: ['me@example.com'],
- date: new Date('2024-01-01'),
- body: 'plain message',
- attachments: [],
- size: 100,
- isRead: true,
- isStarred: false,
- folderId: 'inbox',
- emailType: 'regular',
- ...overrides,
-});
-
-describe('NewsletterDetector.categorize', () => {
- it('classifies a discount blast as promotional', () => {
- const category = newsletterDetector.categorize(
- email({
- sender: 'deals@news.brand.com',
- subject: 'Save 50% off everything',
- body: MARKETING_FOOTER,
- })
- );
- expect(category).toBe('promotional');
- });
-
- it('classifies a weekly digest as a newsletter (not promotional)', () => {
- const category = newsletterDetector.categorize(
- email({
- sender: 'news@substack.com',
- subject: 'Weekly digest',
- body: MARKETING_FOOTER,
- })
- );
- expect(category).toBe('newsletter');
- });
-
- it('classifies an ordinary personal email as regular', () => {
- const category = newsletterDetector.categorize(
- email({
- sender: 'friend@gmail.com',
- subject: 'coffee?',
- body: 'free tomorrow morning?',
- })
- );
- expect(category).toBe('regular');
- });
-});
-
-describe('NewsletterDetector.extractUnsubscribeLink', () => {
- it('pulls an unsubscribe href out of an anchor tag', () => {
- const link = newsletterDetector.extractUnsubscribeLink(
- 'Unsubscribe'
- );
- expect(link).toBe('https://example.com/unsubscribe?id=9');
- });
-
- it('returns undefined when there is no unsubscribe link', () => {
- expect(newsletterDetector.extractUnsubscribeLink('just text
')).toBeUndefined();
- expect(newsletterDetector.extractUnsubscribeLink('')).toBeUndefined();
- });
-});
-
-describe('NewsletterDetector.groupBySender', () => {
- it('aggregates a sender across emails, deriving name, count and frequency', () => {
- const emails = [
- email({ id: 1, sender: 'news@substack.com', subject: 'Weekly digest', body: MARKETING_FOOTER, date: new Date('2024-01-15') }),
- email({ id: 2, sender: 'news@substack.com', subject: 'Weekly digest', body: MARKETING_FOOTER, date: new Date('2024-01-08') }),
- email({ id: 3, sender: 'news@substack.com', subject: 'Weekly digest', body: MARKETING_FOOTER, date: new Date('2024-01-01') }),
- ];
-
- const grouped = newsletterDetector.groupBySender(emails);
- const sub = grouped.get('news@substack.com');
-
- expect(sub).toBeDefined();
- expect(sub?.emailCount).toBe(3);
- expect(sub?.senderName).toBe('Substack');
- expect(sub?.frequency).toBe('weekly');
- expect(sub?.lastEmailDate).toEqual(new Date('2024-01-15'));
- });
-
- it('does not group ordinary non-marketing emails', () => {
- const grouped = newsletterDetector.groupBySender([
- email({ sender: 'friend@gmail.com', subject: 'hi', body: 'hello there' }),
- ]);
- expect(grouped.size).toBe(0);
- });
-});
diff --git a/web/src/__tests__/phase-9/newsletterDetector.domain.test.ts b/web/src/__tests__/phase-9/newsletterDetector.domain.test.ts
deleted file mode 100644
index f021189..0000000
--- a/web/src/__tests__/phase-9/newsletterDetector.domain.test.ts
+++ /dev/null
@@ -1,62 +0,0 @@
-import { describe, it, expect } from 'vitest';
-import type { Email } from '../../types';
-import { newsletterDetector } from '../../services/newsletterDetector';
-
-// isPromotionalSenderDomain is private; expose via a tiny cast to keep the test focused.
-const isPromoDomain = (domain: string): boolean =>
- (newsletterDetector as unknown as { isPromotionalSenderDomain(d: string): boolean })
- .isPromotionalSenderDomain(domain);
-
-const email = (overrides: Partial = {}): Email => ({
- id: 1,
- subject: 'Lunch tomorrow?',
- sender: 'friend@gmail.com',
- senderName: 'A Friend',
- recipients: ['me@example.com'],
- date: new Date('2024-01-01'),
- body: 'Hey, are we still on for lunch?',
- attachments: [],
- size: 1024,
- isRead: true,
- isStarred: false,
- folderId: 'inbox',
- emailType: 'regular',
- ...overrides,
-});
-
-describe('NewsletterDetector promotional-domain matching', () => {
- it('does NOT treat gmail.com as a promotional domain (substring "mail." bug)', () => {
- expect(isPromoDomain('gmail.com')).toBe(false);
- });
-
- it('does NOT treat hotmail.com as a promotional domain', () => {
- expect(isPromoDomain('hotmail.com')).toBe(false);
- });
-
- it('does NOT treat an ordinary brand domain as promotional', () => {
- expect(isPromoDomain('mybrand.com')).toBe(false);
- });
-
- it('matches a marketing subdomain prefix (newsletter.brand.com)', () => {
- expect(isPromoDomain('newsletter.brand.com')).toBe(true);
- });
-
- it('matches a news. subdomain prefix', () => {
- expect(isPromoDomain('news.brand.com')).toBe(true);
- });
-
- it('matches a known full promotional domain (email.amazonses.com)', () => {
- expect(isPromoDomain('email.amazonses.com')).toBe(true);
- });
-
- it('does NOT classify a personal gmail email with a generic footer as promotional', () => {
- // 3 generic footer phrases push marketing matches to 3, but without the
- // bogus gmail "mail." domain boost the scores stay below the 40 threshold.
- const personal = email({
- body: 'Thanks!\n\nunsubscribe\nprivacy policy\nall rights reserved',
- });
- const result = newsletterDetector.detectNewsletter(personal);
- expect(result.isPromotional).toBe(false);
- expect(result.isNewsletter).toBe(false);
- });
-});
diff --git a/web/src/__tests__/phase-9/purchaseDetector.currency.test.ts b/web/src/__tests__/phase-9/purchaseDetector.currency.test.ts
deleted file mode 100644
index 1fa28bb..0000000
--- a/web/src/__tests__/phase-9/purchaseDetector.currency.test.ts
+++ /dev/null
@@ -1,59 +0,0 @@
-import { describe, it, expect } from 'vitest';
-import type { Email } from '../../types';
-import { purchaseDetector } from '../../services/purchaseDetector';
-
-const email = (overrides: Partial = {}): Email => ({
- id: 1,
- subject: 'Your order confirmation #12345',
- sender: 'orders@example.com',
- senderName: 'Example',
- recipients: ['me@example.com'],
- date: new Date('2024-01-01'),
- body: 'Order total: $42.00',
- attachments: [],
- size: 1024,
- isRead: true,
- isStarred: false,
- folderId: 'inbox',
- emailType: 'regular',
- ...overrides,
-});
-
-describe('PurchaseDetector currency persistence', () => {
- it('createPurchaseFromEmail uses the supplied currency, not a hardcoded USD', () => {
- const purchase = purchaseDetector.createPurchaseFromEmail(
- email(),
- 'Acme',
- 49.99,
- undefined,
- 'EUR',
- );
- expect(purchase.currency).toBe('EUR');
- });
-
- it('defaults to USD when no currency is supplied', () => {
- const purchase = purchaseDetector.createPurchaseFromEmail(email(), 'Acme', 10);
- expect(purchase.currency).toBe('USD');
- });
-
- it('round-trips a detected EUR amount into the stored purchase currency', () => {
- const eur = email({
- sender: 'orders@shop.de',
- senderName: 'Shop',
- subject: 'Your order confirmation #99',
- body: 'Order total: €49,99\nThank you for your order.',
- });
- const result = purchaseDetector.detectPurchase(eur);
- expect(result.data?.currency).toBe('EUR');
-
- const purchase = purchaseDetector.createPurchaseFromEmail(
- eur,
- result.data!.merchant!,
- result.data!.amount!,
- result.data!.orderNumber,
- result.data!.currency,
- );
- expect(purchase.currency).toBe('EUR');
- expect(purchase.amount).toBe(49.99);
- });
-});
diff --git a/web/src/__tests__/phase-9/purchaseDetector.detect.test.ts b/web/src/__tests__/phase-9/purchaseDetector.detect.test.ts
deleted file mode 100644
index a84fbdf..0000000
--- a/web/src/__tests__/phase-9/purchaseDetector.detect.test.ts
+++ /dev/null
@@ -1,122 +0,0 @@
-import { describe, it, expect } from 'vitest';
-import type { Email } from '../../types';
-import { purchaseDetector } from '../../services/purchaseDetector';
-
-// Behavioral coverage for detectPurchase()'s confidence gating, anti-pattern
-// rejection, order-number extraction, and getPurchaseCategory(). The existing
-// purchaseDetector.{currency,locale,domain} tests cover amount parsing and
-// merchant matching only.
-
-const email = (overrides: Partial = {}): Email => ({
- id: 1,
- subject: 'Hello',
- sender: 'someone@example.com',
- recipients: ['me@example.com'],
- date: new Date('2024-01-01'),
- body: 'plain message',
- attachments: [],
- size: 100,
- isRead: true,
- isStarred: false,
- folderId: 'inbox',
- emailType: 'regular',
- ...overrides,
-});
-
-describe('PurchaseDetector.detectPurchase', () => {
- it('detects a known-merchant order confirmation with an amount', () => {
- const result = purchaseDetector.detectPurchase(
- email({
- sender: 'orders@amazon.com',
- subject: 'Your order confirmation #100001',
- body: 'Order total: $42.00\nYour order has been confirmed.',
- })
- );
-
- expect(result.type).toBe('purchase');
- expect(result.confidence).toBeGreaterThanOrEqual(70);
- expect(result.data?.merchant).toBe('Amazon');
- expect(result.data?.amount).toBe(42);
- expect(result.data?.currency).toBe('USD');
- });
-
- it('extracts and validates an order number from the body', () => {
- const result = purchaseDetector.detectPurchase(
- email({
- sender: 'orders@amazon.com',
- subject: 'Your order confirmation',
- body: 'Order number: ABC12345\nOrder total: $42.00\nYour order has been confirmed.',
- })
- );
-
- expect(result.type).toBe('purchase');
- expect(result.data?.orderNumber).toBe('ABC12345');
- });
-
- it('rejects promotional emails that trip 3+ anti-patterns', () => {
- const result = purchaseDetector.detectPurchase(
- email({
- sender: 'deals@amazon.com',
- subject: 'Save $50 today!',
- body: 'Up to 70% off! Free shipping on all orders. Order total: $42.00',
- })
- );
-
- expect(result.type).toBe('none');
- });
-
- it('does NOT report a purchase when no amount can be parsed (confidence < 70)', () => {
- const result = purchaseDetector.detectPurchase(
- email({
- sender: 'orders@amazon.com',
- subject: 'Your order confirmation',
- body: 'Thanks for your order. Details are inside your account.',
- })
- );
-
- expect(result.type).toBe('none');
- });
-
- it('does NOT flag a plain personal email as a purchase', () => {
- const result = purchaseDetector.detectPurchase(
- email({
- sender: 'friend@gmail.com',
- subject: 're: dinner',
- body: 'see you at 7',
- })
- );
-
- expect(result.type).toBe('none');
- });
-});
-
-describe('PurchaseDetector.getPurchaseCategory', () => {
- it('maps known merchants to their category', () => {
- expect(purchaseDetector.getPurchaseCategory('Amazon')).toBe('ecommerce');
- expect(purchaseDetector.getPurchaseCategory('Netflix')).toBe('entertainment');
- expect(purchaseDetector.getPurchaseCategory('Uber')).toBe('transportation');
- expect(purchaseDetector.getPurchaseCategory('Delta Airlines')).toBe('travel');
- });
-
- it('falls back to "other" for unknown merchants', () => {
- expect(purchaseDetector.getPurchaseCategory('Some Local Shop')).toBe('other');
- });
-});
-
-describe('PurchaseDetector.createPurchaseFromEmail', () => {
- it('builds a purchase record and derives the category from the merchant', () => {
- const purchase = purchaseDetector.createPurchaseFromEmail(
- email({ id: 7, date: new Date('2024-03-03') }),
- 'Amazon',
- 42,
- 'ABC12345'
- );
-
- expect(purchase.emailId).toBe(7);
- expect(purchase.merchant).toBe('Amazon');
- expect(purchase.amount).toBe(42);
- expect(purchase.orderNumber).toBe('ABC12345');
- expect(purchase.category).toBe('ecommerce');
- expect(purchase.purchaseDate).toEqual(new Date('2024-03-03'));
- });
-});
diff --git a/web/src/__tests__/phase-9/purchaseDetector.domain.test.ts b/web/src/__tests__/phase-9/purchaseDetector.domain.test.ts
deleted file mode 100644
index 4157104..0000000
--- a/web/src/__tests__/phase-9/purchaseDetector.domain.test.ts
+++ /dev/null
@@ -1,39 +0,0 @@
-import { describe, it, expect } from 'vitest';
-import type { Email } from '../../types';
-import { purchaseDetector } from '../../services/purchaseDetector';
-
-const email = (overrides: Partial = {}): Email => ({
- id: 1,
- subject: 'Your order confirmation #12345',
- sender: 'orders@maxwell.com',
- senderName: 'Maxwell',
- recipients: ['me@example.com'],
- date: new Date('2024-01-01'),
- body: 'Order total: $42.00',
- attachments: [],
- size: 1024,
- isRead: true,
- isStarred: false,
- folderId: 'inbox',
- emailType: 'regular',
- ...overrides,
-});
-
-describe('PurchaseDetector merchant domain matching (issue 5)', () => {
- it('does NOT attribute a maxwell.com purchase to a "max" merchant', () => {
- const result = purchaseDetector.detectPurchase(email());
- // detected merchant must be the formatted domain (Maxwell), never a known
- // merchant matched via the buggy substring path
- expect(result.data?.merchant).toBe('Maxwell');
- });
-
- it('does NOT attribute php.net purchase to HP via substring match', () => {
- // Bug: 'php.net'.includes('hp.') is true, so it wrongly matches hp.com -> 'HP'
- // After fix: php.net is not a subdomain of hp.com, so merchant = formatted domain
- const result = purchaseDetector.detectPurchase(
- email({ sender: 'billing@php.net', senderName: 'PHP' }),
- );
- // If matched via bug, merchant would be 'HP'. After fix, merchant = 'PHP' (formatted domain).
- expect(result.data?.merchant).not.toBe('HP');
- });
-});
diff --git a/web/src/__tests__/phase-9/purchaseDetector.locale.test.ts b/web/src/__tests__/phase-9/purchaseDetector.locale.test.ts
deleted file mode 100644
index 0d7fe52..0000000
--- a/web/src/__tests__/phase-9/purchaseDetector.locale.test.ts
+++ /dev/null
@@ -1,36 +0,0 @@
-import { describe, it, expect } from 'vitest';
-import { purchaseDetector } from '../../services/purchaseDetector';
-
-// parseAmount is private; expose via a tiny cast to keep the test focused.
-const parse = (s: string, currency: string): number =>
- (purchaseDetector as unknown as { parseAmount(s: string, c: string): number }).parseAmount(s, currency);
-
-describe('PurchaseDetector.parseAmount locale handling (issue 11)', () => {
- it('EUR thousands with dot, no cents: 1.234 -> 1234', () => {
- expect(parse('1.234', 'EUR')).toBe(1234);
- });
-
- it('EUR with dot thousands and comma decimals: 1.234,56 -> 1234.56', () => {
- expect(parse('1.234,56', 'EUR')).toBe(1234.56);
- });
-
- it('EUR comma decimals only: 1,23 -> 1.23 (cents NOT dropped)', () => {
- expect(parse('1,23', 'EUR')).toBe(1.23);
- });
-
- it('EUR space thousands: 1 234,56 -> 1234.56', () => {
- expect(parse('1 234,56', 'EUR')).toBe(1234.56);
- });
-
- it('USD dot decimals with comma thousands: 1,234.56 -> 1234.56', () => {
- expect(parse('1,234.56', 'USD')).toBe(1234.56);
- });
-
- it('USD plain decimals: 42.00 -> 42', () => {
- expect(parse('42.00', 'USD')).toBe(42);
- });
-
- it('CHF apostrophe thousands with dot decimals: 1\'234.50 -> 1234.5', () => {
- expect(parse("1'234.50", 'CHF')).toBe(1234.5);
- });
-});
diff --git a/web/src/__tests__/phase-9/snippet-render.test.tsx b/web/src/__tests__/phase-9/snippet-render.test.tsx
index 15b901d..f5f525d 100644
--- a/web/src/__tests__/phase-9/snippet-render.test.tsx
+++ b/web/src/__tests__/phase-9/snippet-render.test.tsx
@@ -45,13 +45,13 @@ describe('EmailCard snippet rendering', () => {
const oneThread: EmailThread = {
id: 't1', subject: 'T', emails: [{ ...base, snippet: 'THREAD SNIPPET' }],
- participants: ['a@b.com'], lastMessageDate: base.date, firstMessageDate: base.date,
+ participants: ['a@b.com'], lastMessageDate: base.date!, firstMessageDate: base.date!,
messageCount: 1, unreadCount: 1, hasAttachments: false, isStarred: false,
};
const oneThreadNoSnippet: EmailThread = {
id: 't2', subject: 'T2', emails: [{ ...base, snippet: undefined }],
- participants: ['a@b.com'], lastMessageDate: base.date, firstMessageDate: base.date,
+ participants: ['a@b.com'], lastMessageDate: base.date!, firstMessageDate: base.date!,
messageCount: 1, unreadCount: 1, hasAttachments: false, isStarred: false,
};
@@ -60,7 +60,7 @@ const multiThread: EmailThread = {
{ ...base, id: 2, snippet: 'OLDER SNIPPET', body: 'older body' },
{ ...base, id: 3, snippet: 'LATEST SNIPPET', body: 'latest body' },
],
- participants: ['a@b.com'], lastMessageDate: base.date, firstMessageDate: base.date,
+ participants: ['a@b.com'], lastMessageDate: base.date!, firstMessageDate: base.date!,
messageCount: 2, unreadCount: 0, hasAttachments: false, isStarred: false,
};
diff --git a/web/src/__tests__/phase-9/subscriptionDetector.billing.test.ts b/web/src/__tests__/phase-9/subscriptionDetector.billing.test.ts
deleted file mode 100644
index 3920393..0000000
--- a/web/src/__tests__/phase-9/subscriptionDetector.billing.test.ts
+++ /dev/null
@@ -1,73 +0,0 @@
-import { describe, it, expect } from 'vitest';
-import type { Email } from '../../types';
-import { subscriptionDetector } from '../../services/subscriptionDetector';
-
-const email = (overrides: Partial = {}): Email => ({
- id: 1,
- subject: 'Your subscription renewal',
- sender: 'billing@netflix.com',
- senderName: 'Netflix',
- recipients: ['me@example.com'],
- date: new Date('2024-01-01'),
- body: '',
- attachments: [],
- size: 1024,
- isRead: true,
- isStarred: false,
- folderId: 'inbox',
- emailType: 'regular',
- ...overrides,
-});
-
-describe('SubscriptionDetector billing context (issue 10)', () => {
- it('picks the billing amount, not an unrelated footer price', () => {
- const result = subscriptionDetector.detectSubscription(
- email({
- body: 'Your subscription renews. You will be charged $15.49 per month. Free shipping on orders over $0.00.',
- }),
- );
- expect(result.amount).toBe(15.49);
- });
-
- it('returns no amount when no billing-context phrase surrounds a price', () => {
- const result = subscriptionDetector.detectSubscription(
- email({
- body: 'Your subscription renewal is confirmed. Check out our store: hoodies from $0.00 today!',
- }),
- );
- expect(result.amount).toBeUndefined();
- });
-
- it('detects yearly only when the billing context says yearly', () => {
- const result = subscriptionDetector.detectSubscription(
- email({ body: 'Your subscription renews. You will be billed $99.00 per year.' }),
- );
- expect(result.frequency).toBe('yearly');
- });
-
- it('does NOT pick yearly from "billed monthly, save yearly"', () => {
- const result = subscriptionDetector.detectSubscription(
- email({ body: 'Recurring charge: $9.99 billed monthly. Switch and save 20% yearly!' }),
- );
- expect(result.frequency).toBe('monthly');
- });
-
- it('returns undefined frequency when there is no billing signal at all', () => {
- const result = subscriptionDetector.detectSubscription(
- email({ body: 'Your subscription renewal is confirmed. Enjoy the show.' }),
- );
- expect(result.frequency).toBeUndefined();
- });
-
- it('picks the billing-anchored price when a non-billing price appears first', () => {
- // The old first-match extractAmount would return 5.00 (first $ in body).
- // The fixed billing-anchored extractAmount skips $5.00 (no billing keyword
- // in its ±40-char window) and returns 12.99 (adjacent to "charged").
- const result = subscriptionDetector.detectSubscription(
- email({
- body: 'Limited-time offer: $5.00 off your next order! Your subscription will be charged $12.99 per month starting today.',
- }),
- );
- expect(result.amount).toBe(12.99);
- });
-});
diff --git a/web/src/__tests__/phase-9/subscriptionDetector.detect.test.ts b/web/src/__tests__/phase-9/subscriptionDetector.detect.test.ts
deleted file mode 100644
index 2584715..0000000
--- a/web/src/__tests__/phase-9/subscriptionDetector.detect.test.ts
+++ /dev/null
@@ -1,79 +0,0 @@
-import { describe, it, expect } from 'vitest';
-import type { Email } from '../../types';
-import { subscriptionDetector } from '../../services/subscriptionDetector';
-
-// Behavioral coverage for detectSubscription()'s end-to-end classification and
-// getKnownServices(). The existing subscriptionDetector.{billing,domain} tests
-// cover amount/frequency windowing and substring matching only.
-
-const email = (overrides: Partial = {}): Email => ({
- id: 1,
- subject: 'Hello',
- sender: 'someone@example.com',
- recipients: ['me@example.com'],
- date: new Date('2024-01-01'),
- body: 'plain message',
- attachments: [],
- size: 100,
- isRead: true,
- isStarred: false,
- folderId: 'inbox',
- emailType: 'regular',
- ...overrides,
-});
-
-describe('SubscriptionDetector.detectSubscription', () => {
- it('detects a known service renewal with amount, frequency and category', () => {
- const result = subscriptionDetector.detectSubscription(
- email({
- sender: 'billing@spotify.com',
- subject: 'Your subscription renewal',
- body: 'Your subscription will auto-renew. You will be charged $9.99 per month.',
- })
- );
-
- expect(result.isSubscription).toBe(true);
- expect(result.serviceName).toBe('Spotify');
- expect(result.category).toBe('streaming');
- expect(result.amount).toBe(9.99);
- expect(result.currency).toBe('USD');
- expect(result.frequency).toBe('monthly');
- });
-
- it('detects a subscription from an unknown sender via body patterns and names it from the sender', () => {
- const result = subscriptionDetector.detectSubscription(
- email({
- sender: 'noreply@someservice.io',
- senderName: 'Some Service',
- subject: 'Receipt',
- body: 'Billing period: monthly\nYour next billing date: 2024-02-01\nRecurring charge: $5.00',
- })
- );
-
- expect(result.isSubscription).toBe(true);
- expect(result.serviceName).toBe('Some Service');
- });
-
- it('does NOT flag a regular personal email as a subscription', () => {
- const result = subscriptionDetector.detectSubscription(
- email({
- sender: 'friend@gmail.com',
- subject: 'hi',
- body: 'how are you doing today?',
- })
- );
-
- expect(result.isSubscription).toBe(false);
- expect(result.category).toBe('other');
- });
-});
-
-describe('SubscriptionDetector.getKnownServices', () => {
- it('returns the catalogue of known subscription services with domain + category', () => {
- const services = subscriptionDetector.getKnownServices();
- const netflix = services.find(s => s.domain === 'netflix.com');
-
- expect(services.length).toBeGreaterThan(0);
- expect(netflix).toEqual({ domain: 'netflix.com', name: 'Netflix', category: 'streaming' });
- });
-});
diff --git a/web/src/__tests__/phase-9/subscriptionDetector.domain.test.ts b/web/src/__tests__/phase-9/subscriptionDetector.domain.test.ts
deleted file mode 100644
index 0d90d80..0000000
--- a/web/src/__tests__/phase-9/subscriptionDetector.domain.test.ts
+++ /dev/null
@@ -1,35 +0,0 @@
-import { describe, it, expect } from 'vitest';
-import type { Email } from '../../types';
-import { subscriptionDetector } from '../../services/subscriptionDetector';
-
-const email = (overrides: Partial = {}): Email => ({
- id: 1,
- subject: 'Your subscription renewal',
- sender: 'billing@maxwell.com',
- senderName: 'Maxwell',
- recipients: ['me@example.com'],
- date: new Date('2024-01-01'),
- body: 'Your subscription renews. Recurring charge: $9.99 per month.',
- attachments: [],
- size: 1024,
- isRead: true,
- isStarred: false,
- folderId: 'inbox',
- emailType: 'regular',
- ...overrides,
-});
-
-describe('SubscriptionDetector domain matching (issue 5)', () => {
- it('does NOT attribute maxwell.com to "Max" via base-word substring', () => {
- const result = subscriptionDetector.detectSubscription(email());
- // 'max.com' -> { name: 'Max' } exists in knownSubscriptions; maxwell.com must NOT match it
- expect(result.serviceName).not.toBe('Max');
- });
-
- it('still matches a real subdomain of a known service', () => {
- const result = subscriptionDetector.detectSubscription(
- email({ sender: 'no-reply@mail.netflix.com', senderName: '' }),
- );
- expect(result.serviceName).toBe('Netflix');
- });
-});
diff --git a/web/src/components/AttachmentGallery.tsx b/web/src/components/AttachmentGallery.tsx
index 294d6ab..9686927 100644
--- a/web/src/components/AttachmentGallery.tsx
+++ b/web/src/components/AttachmentGallery.tsx
@@ -67,7 +67,7 @@ export function AttachmentGallery({ emails }: AttachmentGalleryProps) {
// Sort by date (newest first)
filteredAttachments.sort(
- (a, b) => new Date(b.email.date).getTime() - new Date(a.email.date).getTime()
+ (a, b) => (b.email.date?.getTime() ?? -Infinity) - (a.email.date?.getTime() ?? -Infinity)
);
// Stable, content-based key of the image emails currently visible. Using a
diff --git a/web/src/components/ContactModal.tsx b/web/src/components/ContactModal.tsx
index aa9e208..267a4b7 100644
--- a/web/src/components/ContactModal.tsx
+++ b/web/src/components/ContactModal.tsx
@@ -85,7 +85,7 @@ export function ContactModal({ contact, isOpen, onClose, onSave }: ContactModalP
{contact.email}
- {contact.emailCount} emails • Last activity: {contact.lastEmailDate.toLocaleDateString()}
+ {contact.emailCount} emails • Last activity: {contact.lastEmailDate ? contact.lastEmailDate.toLocaleDateString() : 'Unknown date'}
diff --git a/web/src/components/EmailCard.tsx b/web/src/components/EmailCard.tsx
index f9acac9..1f08b2c 100644
--- a/web/src/components/EmailCard.tsx
+++ b/web/src/components/EmailCard.tsx
@@ -125,7 +125,7 @@ export const EmailCard = memo(function EmailCard({ email, onClick }: EmailCardPr
- {format(email.date, 'MMM d, yyyy')}
+ {email.date ? format(email.date, 'MMM d, yyyy') : 'Unknown date'}
diff --git a/web/src/components/ThreadView.tsx b/web/src/components/ThreadView.tsx
index 576b5e1..4417028 100644
--- a/web/src/components/ThreadView.tsx
+++ b/web/src/components/ThreadView.tsx
@@ -174,7 +174,7 @@ function SingleEmailView({ email, onClick, onToggleStar }: SingleEmailViewProps)
- {format(new Date(email.date), 'MMM d')}
+ {email.date ? format(email.date, 'MMM d') : 'Unknown date'}
- Signed up: {format(account.signupDate, 'MMM d, yyyy')}
+ Signed up: {account.signupDate ? format(account.signupDate, 'MMM d, yyyy') : 'Unknown date'}
{account.emailCount} email{account.emailCount !== 1 ? 's' : ''} from this service
diff --git a/web/src/pages/AnalyticsPage.tsx b/web/src/pages/AnalyticsPage.tsx
index 04fb7be..c8a17fd 100644
--- a/web/src/pages/AnalyticsPage.tsx
+++ b/web/src/pages/AnalyticsPage.tsx
@@ -15,6 +15,7 @@ export function AnalyticsPage() {
const availableYears = useMemo(() => {
const years = new Set();
emails.forEach((email) => {
+ if (!email.date) return; // undated emails contribute no year
years.add(new Date(email.date).getFullYear());
});
return Array.from(years).sort((a, b) => b - a); // Most recent first
@@ -23,7 +24,7 @@ export function AnalyticsPage() {
// Filter data by selected year
const filteredEmails = useMemo(() => {
if (selectedYear === 'all') return emails;
- return emails.filter(e => new Date(e.date).getFullYear() === selectedYear);
+ return emails.filter(e => e.date != null && new Date(e.date).getFullYear() === selectedYear);
}, [emails, selectedYear]);
const filteredPurchases = useMemo(() => {
@@ -36,13 +37,16 @@ export function AnalyticsPage() {
const now = new Date();
const thirtyDaysAgo = new Date(now.getTime() - 30 * 24 * 60 * 60 * 1000);
- const recentEmails = filteredEmails.filter(e => new Date(e.date) >= thirtyDaysAgo);
+ const recentEmails = filteredEmails.filter(e => e.date != null && new Date(e.date) >= thirtyDaysAgo);
const uniqueSenders = new Set(filteredEmails.map(e => e.sender)).size;
// Calculate date range for avg emails/day (inclusive of both start and end days)
let dateRange = 1;
if (filteredEmails.length > 0) {
- const sortedDates = filteredEmails.map(e => new Date(e.date).getTime()).sort((a, b) => a - b);
+ const sortedDates = filteredEmails
+ .filter(e => e.date != null)
+ .map(e => new Date(e.date as Date).getTime())
+ .sort((a, b) => a - b);
const oldestDate = sortedDates[0];
const newestDate = sortedDates[sortedDates.length - 1];
// Add 1 for inclusive counting: emails from Jan 1 to Jan 2 span 2 calendar days
@@ -62,6 +66,7 @@ export function AnalyticsPage() {
const monthlyData: Record = {};
filteredEmails.forEach((email) => {
+ if (!email.date) return; // undated emails excluded from volume aggregation
const date = new Date(email.date);
const key = `${date.getFullYear()}-${String(date.getMonth() + 1).padStart(2, '0')}`;
monthlyData[key] = (monthlyData[key] || 0) + 1;
@@ -127,6 +132,7 @@ export function AnalyticsPage() {
const hourlyData: number[][] = Array(7).fill(null).map(() => Array(24).fill(0));
filteredEmails.forEach((email) => {
+ if (!email.date) return; // undated emails excluded from activity heatmap
const date = new Date(email.date);
const day = date.getDay();
const hour = date.getHours();
diff --git a/web/src/pages/AttachmentsPage.tsx b/web/src/pages/AttachmentsPage.tsx
index 2ad69fb..acbbb99 100644
--- a/web/src/pages/AttachmentsPage.tsx
+++ b/web/src/pages/AttachmentsPage.tsx
@@ -108,7 +108,7 @@ export function AttachmentsPage() {
attachments.push({ ...attachment, email });
}
}
- return attachments.sort((a, b) => b.email.date.getTime() - a.email.date.getTime());
+ return attachments.sort((a, b) => (b.email.date?.getTime() ?? -Infinity) - (a.email.date?.getTime() ?? -Infinity));
}, [emails]);
// Filter attachments
@@ -478,7 +478,7 @@ function ListAttachmentRow({ att, isSelected, bodyCache, fetchData, onSelect, on
{att.filename}
- From: {att.email.sender} • {format(att.email.date, 'MMM d, yyyy')}
+ From: {att.email.sender} • {att.email.date ? format(att.email.date, 'MMM d, yyyy') : 'Unknown date'}
diff --git a/web/src/pages/ContactsPage.tsx b/web/src/pages/ContactsPage.tsx
index eeaab98..1304223 100644
--- a/web/src/pages/ContactsPage.tsx
+++ b/web/src/pages/ContactsPage.tsx
@@ -52,7 +52,7 @@ export function ContactsPage() {
case 'emailCount':
return b.emailCount - a.emailCount;
case 'lastActivity':
- return new Date(b.lastEmailDate).getTime() - new Date(a.lastEmailDate).getTime();
+ return (b.lastEmailDate?.getTime() ?? -Infinity) - (a.lastEmailDate?.getTime() ?? -Infinity);
default:
return 0;
}
@@ -221,7 +221,7 @@ export function ContactsPage() {
•
- {format(contact.lastEmailDate, 'MMM d, yyyy')}
+ {contact.lastEmailDate ? format(contact.lastEmailDate, 'MMM d, yyyy') : 'Unknown date'}
diff --git a/web/src/pages/EmailDetailPage.tsx b/web/src/pages/EmailDetailPage.tsx
index cf56751..7009d90 100644
--- a/web/src/pages/EmailDetailPage.tsx
+++ b/web/src/pages/EmailDetailPage.tsx
@@ -238,7 +238,7 @@ export function EmailDetailPage() {
)}
- {format(email.date, 'EEEE, MMMM d, yyyy \'at\' h:mm a')}
+ {email.date ? format(email.date, 'EEEE, MMMM d, yyyy \'at\' h:mm a') : 'Unknown date'}
{email.folderId !== SYSTEM_FOLDERS.INBOX && (
- Last: {format(nl.lastEmailDate, 'MMM d')}
+ Last: {nl.lastEmailDate ? format(nl.lastEmailDate, 'MMM d') : 'Unknown date'}
{nl.unsubscribeLink && (
diff --git a/web/src/pages/SenderEmailsPage.tsx b/web/src/pages/SenderEmailsPage.tsx
index 4e1e9f0..a5ae010 100644
--- a/web/src/pages/SenderEmailsPage.tsx
+++ b/web/src/pages/SenderEmailsPage.tsx
@@ -117,7 +117,7 @@ export function SenderEmailsPage() {
}
// Sort by date descending
- result.sort((a, b) => new Date(b.date).getTime() - new Date(a.date).getTime());
+ result.sort((a, b) => (b.date?.getTime() ?? -Infinity) - (a.date?.getTime() ?? -Infinity));
return result;
}, [senderEmails, readFilter, folderFilter, typeFilter, searchQuery, searchTextMap]);
@@ -368,7 +368,7 @@ export function SenderEmailsPage() {
)}
- {format(email.date, 'MMM d, yyyy')}
+ {email.date ? format(email.date, 'MMM d, yyyy') : 'Unknown date'}
diff --git a/web/src/pages/SendersPage.tsx b/web/src/pages/SendersPage.tsx
index b3fd2eb..00220c5 100644
--- a/web/src/pages/SendersPage.tsx
+++ b/web/src/pages/SendersPage.tsx
@@ -121,7 +121,8 @@ export function SendersPage() {
const existing = groups.get(key);
if (existing) {
existing.emails.push(email);
- if (new Date(email.date) > existing.latestDate) {
+ // Only advance latestDate from emails that have a real date.
+ if (email.date && new Date(email.date) > existing.latestDate) {
existing.latestDate = new Date(email.date);
}
} else {
@@ -129,7 +130,8 @@ export function SendersPage() {
key,
displayName,
emails: [email],
- latestDate: new Date(email.date),
+ // Undated first email seeds the epoch (sorts last); real dates override later.
+ latestDate: email.date ? new Date(email.date) : new Date(0),
});
}
});
@@ -358,7 +360,7 @@ export function SendersPage() {
- {format(email.date, 'MMM d, yyyy')}
+ {email.date ? format(email.date, 'MMM d, yyyy') : 'Unknown date'}
diff --git a/web/src/pages/SubscriptionsPage.tsx b/web/src/pages/SubscriptionsPage.tsx
index 2aa6fc6..7a55a9a 100644
--- a/web/src/pages/SubscriptionsPage.tsx
+++ b/web/src/pages/SubscriptionsPage.tsx
@@ -218,7 +218,7 @@ export function SubscriptionsPage() {
- Last: {format(sub.lastRenewalDate, 'MMM d, yyyy')}
+ Last: {sub.lastRenewalDate ? format(sub.lastRenewalDate, 'MMM d, yyyy') : 'Unknown date'}
@@ -302,7 +302,7 @@ export function SubscriptionsPage() {
Last Payment
- {format(selectedSubscription.lastRenewalDate, 'MMM d, yyyy')}
+ {selectedSubscription.lastRenewalDate ? format(selectedSubscription.lastRenewalDate, 'MMM d, yyyy') : 'Unknown date'}
@@ -336,7 +336,7 @@ export function SubscriptionsPage() {
{email.subject || '(No Subject)'}
- {format(email.date, 'MMM d, yyyy')} • {email.sender}
+ {email.date ? format(email.date, 'MMM d, yyyy') : 'Unknown date'} • {email.sender}
diff --git a/web/src/services/__tests__/library-smoke.test.ts b/web/src/services/__tests__/library-smoke.test.ts
new file mode 100644
index 0000000..a37240a
--- /dev/null
+++ b/web/src/services/__tests__/library-smoke.test.ts
@@ -0,0 +1,20 @@
+import { describe, it, expect } from 'vitest';
+import { MBOXParser, AccountDetector } from '@technical-1/email-archive-parser';
+
+describe('library smoke', () => {
+ it('parses a trivial MBOX buffer and runs a detector', async () => {
+ const mbox = [
+ 'From a@x.com Mon Jan 1 00:00:00 2024',
+ 'From: Welcome ',
+ 'Subject: Welcome to Netflix!',
+ 'Date: Mon, 01 Jan 2024 00:00:00 +0000',
+ '',
+ 'Your account has been created.',
+ '',
+ ].join('\n');
+ const result = await new MBOXParser().parse(Buffer.from(mbox, 'utf-8'));
+ expect(result.emails.length).toBe(1);
+ const det = new AccountDetector().detect(result.emails[0]);
+ expect(det.type).toBe('account');
+ });
+});
diff --git a/web/src/services/accountDetector.ts b/web/src/services/accountDetector.ts
deleted file mode 100644
index b3b157d..0000000
--- a/web/src/services/accountDetector.ts
+++ /dev/null
@@ -1,343 +0,0 @@
-import type { Email, Account, DetectionResult } from '../types';
-import { stripHtml, extractDomain } from '../utils/emailUtils';
-import { isDomainMatch } from './domainMatch';
-
-class AccountDetector {
- // Strong subject line patterns for account signups (must be primary purpose of email)
- private readonly strongSubjectPatterns = [
- /^welcome to/i,
- /^verify your.*(?:email|account)/i,
- /^confirm your.*(?:email|account|registration)/i,
- /^activate your.*account/i,
- /^your.*account.*(?:has been |is )created/i,
- /^(?:complete|finish) your registration/i,
- /^thanks for (?:signing up|registering|joining)/i,
- /^you(?:'re| are) (?:in|registered)/i,
- /email verification/i,
- /account verification/i,
- ];
-
- // Strong body patterns (high confidence indicators)
- private readonly strongBodyPatterns = [
- /click.*(?:here|below|button).*(?:to )?verify your email/i,
- /confirm your email address/i,
- /complete your registration/i,
- /your account has been (?:successfully )?created/i,
- /welcome to .{2,50}[!.]/i,
- /thanks for (?:signing up|registering|creating an account)/i,
- /verification code[:\s]+\d{4,8}/i,
- /your verification code is/i,
- ];
-
- // Known service domains for reliable detection
- private readonly knownServices: Record = {
- 'netflix.com': { name: 'Netflix', type: 'streaming' },
- 'spotify.com': { name: 'Spotify', type: 'streaming' },
- 'hulu.com': { name: 'Hulu', type: 'streaming' },
- 'disneyplus.com': { name: 'Disney+', type: 'streaming' },
- 'hbomax.com': { name: 'HBO Max', type: 'streaming' },
- 'max.com': { name: 'Max', type: 'streaming' },
- 'peacocktv.com': { name: 'Peacock', type: 'streaming' },
- 'paramountplus.com': { name: 'Paramount+', type: 'streaming' },
- 'primevideo.com': { name: 'Prime Video', type: 'streaming' },
- 'crunchyroll.com': { name: 'Crunchyroll', type: 'streaming' },
- 'youtube.com': { name: 'YouTube', type: 'streaming' },
- 'twitch.tv': { name: 'Twitch', type: 'streaming' },
- 'amazon.com': { name: 'Amazon', type: 'ecommerce' },
- 'ebay.com': { name: 'eBay', type: 'ecommerce' },
- 'etsy.com': { name: 'Etsy', type: 'ecommerce' },
- 'shopify.com': { name: 'Shopify', type: 'ecommerce' },
- 'walmart.com': { name: 'Walmart', type: 'ecommerce' },
- 'target.com': { name: 'Target', type: 'ecommerce' },
- 'bestbuy.com': { name: 'Best Buy', type: 'ecommerce' },
- 'aliexpress.com': { name: 'AliExpress', type: 'ecommerce' },
- 'wish.com': { name: 'Wish', type: 'ecommerce' },
- 'facebook.com': { name: 'Facebook', type: 'social' },
- 'meta.com': { name: 'Meta', type: 'social' },
- 'instagram.com': { name: 'Instagram', type: 'social' },
- 'twitter.com': { name: 'Twitter', type: 'social' },
- 'x.com': { name: 'X', type: 'social' },
- 'linkedin.com': { name: 'LinkedIn', type: 'social' },
- 'tiktok.com': { name: 'TikTok', type: 'social' },
- 'reddit.com': { name: 'Reddit', type: 'social' },
- 'pinterest.com': { name: 'Pinterest', type: 'social' },
- 'snapchat.com': { name: 'Snapchat', type: 'social' },
- 'threads.net': { name: 'Threads', type: 'social' },
- 'github.com': { name: 'GitHub', type: 'development' },
- 'gitlab.com': { name: 'GitLab', type: 'development' },
- 'bitbucket.org': { name: 'Bitbucket', type: 'development' },
- 'atlassian.com': { name: 'Atlassian', type: 'development' },
- 'jetbrains.com': { name: 'JetBrains', type: 'development' },
- 'stackoverflow.com': { name: 'Stack Overflow', type: 'development' },
- 'heroku.com': { name: 'Heroku', type: 'development' },
- 'vercel.com': { name: 'Vercel', type: 'development' },
- 'netlify.com': { name: 'Netlify', type: 'development' },
- 'digitalocean.com': { name: 'DigitalOcean', type: 'development' },
- 'aws.amazon.com': { name: 'AWS', type: 'development' },
- 'cloud.google.com': { name: 'Google Cloud', type: 'development' },
- 'azure.microsoft.com': { name: 'Azure', type: 'development' },
- 'slack.com': { name: 'Slack', type: 'communication' },
- 'zoom.us': { name: 'Zoom', type: 'communication' },
- 'discord.com': { name: 'Discord', type: 'communication' },
- 'teams.microsoft.com': { name: 'Microsoft Teams', type: 'communication' },
- 'telegram.org': { name: 'Telegram', type: 'communication' },
- 'whatsapp.com': { name: 'WhatsApp', type: 'communication' },
- 'signal.org': { name: 'Signal', type: 'communication' },
- 'paypal.com': { name: 'PayPal', type: 'banking' },
- 'venmo.com': { name: 'Venmo', type: 'banking' },
- 'stripe.com': { name: 'Stripe', type: 'banking' },
- 'chase.com': { name: 'Chase', type: 'banking' },
- 'bankofamerica.com': { name: 'Bank of America', type: 'banking' },
- 'wellsfargo.com': { name: 'Wells Fargo', type: 'banking' },
- 'capitalone.com': { name: 'Capital One', type: 'banking' },
- 'citi.com': { name: 'Citibank', type: 'banking' },
- 'schwab.com': { name: 'Charles Schwab', type: 'banking' },
- 'fidelity.com': { name: 'Fidelity', type: 'banking' },
- 'robinhood.com': { name: 'Robinhood', type: 'banking' },
- 'coinbase.com': { name: 'Coinbase', type: 'banking' },
- 'dropbox.com': { name: 'Dropbox', type: 'other' },
- 'box.com': { name: 'Box', type: 'other' },
- 'notion.so': { name: 'Notion', type: 'other' },
- 'figma.com': { name: 'Figma', type: 'other' },
- 'canva.com': { name: 'Canva', type: 'other' },
- 'adobe.com': { name: 'Adobe', type: 'other' },
- 'microsoft.com': { name: 'Microsoft', type: 'other' },
- 'google.com': { name: 'Google', type: 'other' },
- 'apple.com': { name: 'Apple', type: 'other' },
- 'icloud.com': { name: 'iCloud', type: 'other' },
- 'uber.com': { name: 'Uber', type: 'other' },
- 'lyft.com': { name: 'Lyft', type: 'other' },
- 'doordash.com': { name: 'DoorDash', type: 'other' },
- 'grubhub.com': { name: 'Grubhub', type: 'other' },
- 'instacart.com': { name: 'Instacart', type: 'other' },
- 'airbnb.com': { name: 'Airbnb', type: 'other' },
- // Additional streaming services
- 'appletv.apple.com': { name: 'Apple TV+', type: 'streaming' },
- 'funimation.com': { name: 'Funimation', type: 'streaming' },
- 'showtime.com': { name: 'Showtime', type: 'streaming' },
- 'starz.com': { name: 'Starz', type: 'streaming' },
- 'discovery.com': { name: 'Discovery+', type: 'streaming' },
- 'espn.com': { name: 'ESPN+', type: 'streaming' },
- 'audible.com': { name: 'Audible', type: 'streaming' },
- 'pandora.com': { name: 'Pandora', type: 'streaming' },
- 'deezer.com': { name: 'Deezer', type: 'streaming' },
- 'tidal.com': { name: 'Tidal', type: 'streaming' },
- // Additional ecommerce
- 'newegg.com': { name: 'Newegg', type: 'ecommerce' },
- 'wayfair.com': { name: 'Wayfair', type: 'ecommerce' },
- 'zappos.com': { name: 'Zappos', type: 'ecommerce' },
- 'macys.com': { name: "Macy's", type: 'ecommerce' },
- 'nordstrom.com': { name: 'Nordstrom', type: 'ecommerce' },
- 'costco.com': { name: 'Costco', type: 'ecommerce' },
- 'homedepot.com': { name: 'Home Depot', type: 'ecommerce' },
- 'lowes.com': { name: "Lowe's", type: 'ecommerce' },
- 'sephora.com': { name: 'Sephora', type: 'ecommerce' },
- 'ulta.com': { name: 'Ulta', type: 'ecommerce' },
- 'chewy.com': { name: 'Chewy', type: 'ecommerce' },
- // Additional social
- 'tumblr.com': { name: 'Tumblr', type: 'social' },
- 'mastodon.social': { name: 'Mastodon', type: 'social' },
- 'bluesky.social': { name: 'Bluesky', type: 'social' },
- 'nextdoor.com': { name: 'Nextdoor', type: 'social' },
- 'quora.com': { name: 'Quora', type: 'social' },
- // Additional development/productivity
- 'trello.com': { name: 'Trello', type: 'development' },
- 'asana.com': { name: 'Asana', type: 'development' },
- 'monday.com': { name: 'Monday.com', type: 'development' },
- 'jira.com': { name: 'Jira', type: 'development' },
- 'confluence.com': { name: 'Confluence', type: 'development' },
- 'npm.com': { name: 'npm', type: 'development' },
- 'docker.com': { name: 'Docker', type: 'development' },
- 'cloudflare.com': { name: 'Cloudflare', type: 'development' },
- 'firebase.google.com': { name: 'Firebase', type: 'development' },
- 'render.com': { name: 'Render', type: 'development' },
- 'railway.app': { name: 'Railway', type: 'development' },
- 'supabase.com': { name: 'Supabase', type: 'development' },
- 'planetscale.com': { name: 'PlanetScale', type: 'development' },
- // Additional communication
- 'webex.com': { name: 'Webex', type: 'communication' },
- 'gotomeeting.com': { name: 'GoToMeeting', type: 'communication' },
- 'line.me': { name: 'LINE', type: 'communication' },
- 'viber.com': { name: 'Viber', type: 'communication' },
- // Additional banking/finance
- 'americanexpress.com': { name: 'American Express', type: 'banking' },
- 'discover.com': { name: 'Discover', type: 'banking' },
- 'usbank.com': { name: 'US Bank', type: 'banking' },
- 'pnc.com': { name: 'PNC', type: 'banking' },
- 'tdbank.com': { name: 'TD Bank', type: 'banking' },
- 'ally.com': { name: 'Ally Bank', type: 'banking' },
- 'marcus.com': { name: 'Marcus', type: 'banking' },
- 'sofi.com': { name: 'SoFi', type: 'banking' },
- 'chime.com': { name: 'Chime', type: 'banking' },
- 'cashapp.com': { name: 'Cash App', type: 'banking' },
- 'wealthfront.com': { name: 'Wealthfront', type: 'banking' },
- 'betterment.com': { name: 'Betterment', type: 'banking' },
- 'acorns.com': { name: 'Acorns', type: 'banking' },
- 'kraken.com': { name: 'Kraken', type: 'banking' },
- 'binance.com': { name: 'Binance', type: 'banking' },
- // Additional other services
- 'evernote.com': { name: 'Evernote', type: 'other' },
- 'todoist.com': { name: 'Todoist', type: 'other' },
- 'grammarly.com': { name: 'Grammarly', type: 'other' },
- '1password.com': { name: '1Password', type: 'other' },
- 'lastpass.com': { name: 'LastPass', type: 'other' },
- 'bitwarden.com': { name: 'Bitwarden', type: 'other' },
- 'dashlane.com': { name: 'Dashlane', type: 'other' },
- 'nordvpn.com': { name: 'NordVPN', type: 'other' },
- 'expressvpn.com': { name: 'ExpressVPN', type: 'other' },
- 'surfshark.com': { name: 'Surfshark', type: 'other' },
- 'duolingo.com': { name: 'Duolingo', type: 'other' },
- 'coursera.org': { name: 'Coursera', type: 'other' },
- 'udemy.com': { name: 'Udemy', type: 'other' },
- 'skillshare.com': { name: 'Skillshare', type: 'other' },
- 'masterclass.com': { name: 'MasterClass', type: 'other' },
- 'calm.com': { name: 'Calm', type: 'other' },
- 'headspace.com': { name: 'Headspace', type: 'other' },
- 'strava.com': { name: 'Strava', type: 'other' },
- 'peloton.com': { name: 'Peloton', type: 'other' },
- 'myfitnesspal.com': { name: 'MyFitnessPal', type: 'other' },
- 'fitbit.com': { name: 'Fitbit', type: 'other' },
- };
-
- detectAccountSignup(email: Email): DetectionResult {
- const subject = email.subject || '';
- const body = stripHtml(email.body || '');
- const sender = email.sender || '';
-
- let confidence = 0;
- let detectedService = '';
- let serviceType: Account['serviceType'] = 'other';
-
- // Check if sender is from a known service
- const domain = extractDomain(sender);
- const serviceInfo = this.findKnownService(domain);
-
- if (serviceInfo) {
- detectedService = serviceInfo.name;
- serviceType = serviceInfo.type;
- confidence += 40; // Known service gives base confidence
- }
-
- // Check strong subject patterns
- for (const pattern of this.strongSubjectPatterns) {
- if (pattern.test(subject)) {
- confidence += 40;
- break;
- }
- }
-
- // Check strong body patterns
- for (const pattern of this.strongBodyPatterns) {
- if (pattern.test(body)) {
- confidence += 30;
- break;
- }
- }
-
- // If we have strong patterns but no known service, try to extract service name
- if (confidence >= 40 && !detectedService) {
- const extracted = this.extractServiceName(subject);
- if (extracted) {
- detectedService = extracted;
- confidence += 10;
- } else {
- // Use domain as fallback service name (but only if patterns matched)
- detectedService = this.formatDomainAsServiceName(domain);
- }
- }
-
- // Require high confidence AND a service name
- if (confidence >= 70 && detectedService) {
- return {
- type: 'account',
- confidence,
- data: {
- serviceName: detectedService,
- serviceType,
- },
- };
- }
-
- return { type: 'none', confidence: 0 };
- }
-
- private findKnownService(domain: string): { name: string; type: Account['serviceType'] } | null {
- // Direct match
- if (this.knownServices[domain]) {
- return this.knownServices[domain];
- }
-
- // Exact or subdomain match against each known service domain
- for (const [serviceDomain, info] of Object.entries(this.knownServices)) {
- if (isDomainMatch(domain, serviceDomain)) {
- return info;
- }
- }
-
- return null;
- }
-
- private extractServiceName(subject: string): string {
- // Very strict patterns - only match clear service name mentions
- const patterns = [
- /^welcome to ([A-Z][a-zA-Z0-9]+(?:\s[A-Z][a-zA-Z0-9]+)?)[!.,]/i,
- /thanks for (?:signing up|joining|registering) (?:for |with )?([A-Z][a-zA-Z0-9]+(?:\s[A-Z][a-zA-Z0-9]+)?)[!.,]/i,
- /your ([A-Z][a-zA-Z0-9]+(?:\s[A-Z][a-zA-Z0-9]+)?) account (?:has been |is )?(?:created|ready)/i,
- ];
-
- // Check subject first (more reliable)
- for (const pattern of patterns) {
- const match = subject.match(pattern);
- if (match && match[1]) {
- const name = match[1].trim();
- // Validate it looks like a service name (2-30 chars, starts with letter)
- if (name.length >= 2 && name.length <= 30 && /^[A-Z]/i.test(name)) {
- return name;
- }
- }
- }
-
- return '';
- }
-
- private formatDomainAsServiceName(domain: string): string {
- if (!domain) return '';
-
- // Get the main part of the domain (before the TLD)
- const parts = domain.split('.');
- if (parts.length < 2) return '';
-
- // For subdomains, try to get the main domain
- let mainPart = parts.length > 2 ? parts[parts.length - 2] : parts[0];
-
- // Skip common email subdomains
- const skipWords = ['mail', 'email', 'noreply', 'no-reply', 'notifications', 'info', 'support', 'news', 'newsletter'];
- if (skipWords.includes(mainPart.toLowerCase())) {
- mainPart = parts.length > 2 ? parts[parts.length - 2] : parts[0];
- }
-
- // Capitalize first letter
- return mainPart.charAt(0).toUpperCase() + mainPart.slice(1);
- }
-
- getServiceType(domain: string): Account['serviceType'] {
- const serviceInfo = this.findKnownService(domain);
- return serviceInfo ? serviceInfo.type : 'other';
- }
-
- createAccountFromEmail(email: Email, serviceName: string, serviceType?: Account['serviceType']): Omit {
- const senderDomain = extractDomain(email.sender);
-
- return {
- serviceName,
- signupEmailId: email.id,
- signupDate: email.date,
- serviceType: serviceType || this.getServiceType(senderDomain),
- domain: senderDomain,
- lastActivityDate: email.date,
- emailCount: 1,
- };
- }
-}
-
-export const accountDetector = new AccountDetector();
diff --git a/web/src/services/backupService.ts b/web/src/services/backupService.ts
index 6015b2e..9902ae5 100644
--- a/web/src/services/backupService.ts
+++ b/web/src/services/backupService.ts
@@ -87,7 +87,7 @@ class BackupService {
if (options.dateRange) {
const startTime = options.dateRange.start.getTime();
const endTime = options.dateRange.end.getTime();
- emails = emails.filter((e) => e.date >= startTime && e.date <= endTime);
+ emails = emails.filter((e) => e.date != null && e.date >= startTime && e.date <= endTime);
}
// Apply folder filter
@@ -97,7 +97,7 @@ class BackupService {
backup.emails = emails.map((e) => ({
...e,
- date: new Date(e.date),
+ date: e.date == null ? null : new Date(e.date),
})) as unknown as Email[];
backup.metadata.emailCount = backup.emails.length;
@@ -113,7 +113,7 @@ class BackupService {
const accounts = await db.accounts.toArray();
backup.accounts = accounts.map((a) => ({
...a,
- signupDate: new Date(a.signupDate),
+ signupDate: a.signupDate == null ? null : new Date(a.signupDate),
})) as unknown as Account[];
backup.metadata.accountCount = backup.accounts.length;
}
@@ -133,7 +133,7 @@ class BackupService {
const contacts = await db.contacts.toArray();
backup.contacts = contacts.map((c) => ({
...c,
- lastEmailDate: new Date(c.lastEmailDate),
+ lastEmailDate: c.lastEmailDate == null ? null : new Date(c.lastEmailDate),
})) as unknown as Contact[];
backup.metadata.contactCount = backup.contacts.length;
}
@@ -164,7 +164,7 @@ class BackupService {
const subscriptions = await db.subscriptions.toArray();
backup.subscriptions = subscriptions.map((s) => ({
...s,
- lastRenewalDate: new Date(s.lastRenewalDate),
+ lastRenewalDate: s.lastRenewalDate == null ? null : new Date(s.lastRenewalDate),
nextRenewalDate: s.nextRenewalDate ? new Date(s.nextRenewalDate) : undefined,
emailIds: typeof s.emailIds === 'string' ? JSON.parse(s.emailIds) : (s.emailIds || []),
})) as unknown as Subscription[];
@@ -176,7 +176,7 @@ class BackupService {
const newsletters = await db.newsletters.toArray();
backup.newsletters = newsletters.map((n) => ({
...n,
- lastEmailDate: new Date(n.lastEmailDate),
+ lastEmailDate: n.lastEmailDate == null ? null : new Date(n.lastEmailDate),
})) as unknown as Newsletter[];
backup.metadata.newsletterCount = backup.newsletters.length;
}
diff --git a/web/src/services/domainMatch.ts b/web/src/services/domainMatch.ts
deleted file mode 100644
index 5942f94..0000000
--- a/web/src/services/domainMatch.ts
+++ /dev/null
@@ -1,11 +0,0 @@
-/**
- * Returns true iff emailDomain is exactly serviceDomain or a subdomain of it.
- * Boundary-safe: 'maxwell.com' does NOT match 'max.com', 'pineapple.com' does
- * NOT match 'apple.com'. Comparison is case-insensitive and trimmed.
- */
-export function isDomainMatch(emailDomain: string, serviceDomain: string): boolean {
- const d = emailDomain.trim().toLowerCase();
- const s = serviceDomain.trim().toLowerCase();
- if (!d || !s) return false;
- return d === s || d.endsWith('.' + s);
-}
diff --git a/web/src/services/gmailTakeoutParser.ts b/web/src/services/gmailTakeoutParser.ts
deleted file mode 100644
index 9fbb272..0000000
--- a/web/src/services/gmailTakeoutParser.ts
+++ /dev/null
@@ -1,267 +0,0 @@
-import JSZip from 'jszip';
-import type { Email } from '../types';
-import { mboxParser, type EmailBatchCallback } from './mboxParser';
-import { logger } from '../utils/logger';
-
-/**
- * Parser for Google Takeout email archives
- * Handles the specific ZIP structure from Google Takeout
- *
- * Optimized for large files:
- * - Sequential MBOX processing to reduce memory pressure
- * - Explicit cleanup between files to allow garbage collection
- * - Streaming batch processing for each MBOX file
- */
-class GmailTakeoutParser {
- /**
- * Parse a Gmail Takeout ZIP file with streaming support
- * Processes MBOX files sequentially and calls onBatch for each batch of emails
- */
- async parseGmailTakeoutStreaming(
- file: File,
- onProgress?: (progress: number, message: string) => void,
- onBatch?: EmailBatchCallback
- ): Promise {
- let totalEmailsParsed = 0;
- let globalBatchNumber = 0;
- const seenEmailKeys = new Set();
-
- onProgress?.(0, 'Opening Gmail Takeout archive...');
-
- // Validate file size before loading (500MB compressed limit)
- const MAX_COMPRESSED_SIZE = 500 * 1024 * 1024;
- if (file.size > MAX_COMPRESSED_SIZE) {
- throw new Error(`File too large (${(file.size / 1024 / 1024).toFixed(0)}MB). Maximum supported size is 500MB.`);
- }
-
- const zip = await JSZip.loadAsync(file);
-
- // Check decompressed size to guard against zip bombs (2GB limit)
- // JSZip stores uncompressedSize in internal _data property (not in public types)
- const MAX_DECOMPRESSED_SIZE = 2 * 1024 * 1024 * 1024;
- let totalDecompressedSize = 0;
- for (const entry of Object.values(zip.files)) {
- if (!entry.dir) {
- const entryData = (entry as unknown as { _data?: { uncompressedSize?: number } })._data;
- if (entryData && typeof entryData.uncompressedSize === 'number') {
- totalDecompressedSize += entryData.uncompressedSize;
- }
- }
- }
- if (totalDecompressedSize > MAX_DECOMPRESSED_SIZE) {
- throw new Error(`Archive decompressed size exceeds 2GB limit. This may be a malicious file.`);
- }
-
- // Find all MBOX files in the archive
- const mboxFiles: string[] = [];
-
- zip.forEach((path, zipEntry) => {
- if (
- !zipEntry.dir &&
- (path.endsWith('.mbox') || path.includes('Takeout/Mail/'))
- ) {
- mboxFiles.push(path);
- }
- });
-
- onProgress?.(10, `Found ${mboxFiles.length} mail folders`);
-
- if (mboxFiles.length === 0) {
- throw new Error(
- 'No email archives found in this Takeout file. Make sure you selected Mail data during export.'
- );
- }
-
- // Process MBOX files SEQUENTIALLY (not in parallel) to reduce memory pressure
- for (let fileIndex = 0; fileIndex < mboxFiles.length; fileIndex++) {
- const mboxPath = mboxFiles[fileIndex];
-
- try {
- const zipEntry = zip.file(mboxPath);
- if (!zipEntry) continue;
-
- // Extract folder name from path
- const folderName = this.extractFolderName(mboxPath);
-
- onProgress?.(
- 10 + ((fileIndex + 0.5) / mboxFiles.length) * 80,
- `Processing ${folderName} (${fileIndex + 1}/${mboxFiles.length})...`
- );
-
- // Get file content - this is the memory-intensive part
- let content = await zipEntry.async('string');
-
- // Create a File object from the content
- const mboxFile = new File([content], `${folderName}.mbox`, {
- type: 'application/mbox',
- });
-
- // Clear the content string to free memory before parsing
- // @ts-expect-error - intentionally reassigning to help GC
- content = null;
-
- // Parse using streaming MBOX parser with deduplication
- const folderMappedBatchCallback: EmailBatchCallback = async (emails) => {
- // Deduplicate and add folder ID
- const uniqueEmails: Omit[] = [];
-
- for (const email of emails) {
- const key = email.threadId ||
- `${email.subject}|${email.sender}|${email.date.getTime()}`;
-
- if (!seenEmailKeys.has(key)) {
- seenEmailKeys.add(key);
- uniqueEmails.push({
- ...email,
- folderId: this.mapGmailFolderToId(folderName),
- });
- }
- }
-
- if (uniqueEmails.length > 0 && onBatch) {
- await onBatch(uniqueEmails, globalBatchNumber);
- globalBatchNumber++;
- }
-
- totalEmailsParsed += uniqueEmails.length;
- };
-
- // Use streaming parser
- await mboxParser.parseMBOXFileStreaming(
- mboxFile,
- (progress, message) => {
- // Combine progress from individual file with overall progress
- const baseProgress = 10 + (fileIndex / mboxFiles.length) * 80;
- const fileContribution = (80 / mboxFiles.length) * (progress / 100);
- onProgress?.(
- Math.round(baseProgress + fileContribution),
- `${folderName}: ${message}`
- );
- },
- folderMappedBatchCallback
- );
-
- // Update progress after each file
- onProgress?.(
- 10 + ((fileIndex + 1) / mboxFiles.length) * 80,
- `Completed ${folderName} (${fileIndex + 1}/${mboxFiles.length})`
- );
-
- // Yield to allow garbage collection between files
- await new Promise(resolve => setTimeout(resolve, 10));
-
- } catch (error) {
- logger.warn(`Failed to parse ${mboxPath}:`, error);
- }
- }
-
- onProgress?.(100, `Imported ${totalEmailsParsed} unique emails`);
-
- return totalEmailsParsed;
- }
-
- /**
- * Parse a Gmail Takeout ZIP file (legacy method for backwards compatibility)
- * For large files, prefer parseGmailTakeoutStreaming
- */
- async parseGmailTakeout(
- file: File,
- onProgress?: (progress: number, message: string) => void
- ): Promise[]> {
- const emails: Omit[] = [];
-
- await this.parseGmailTakeoutStreaming(
- file,
- onProgress,
- async (batch) => {
- emails.push(...batch);
- }
- );
-
- return emails;
- }
-
- /**
- * Extract folder name from file path
- */
- private extractFolderName(path: string): string {
- // Gmail Takeout structure: Takeout/Mail/Label Name.mbox
- const parts = path.split('/');
- const fileName = parts[parts.length - 1];
- return fileName.replace('.mbox', '').replace(/_/g, ' ');
- }
-
- /**
- * Map Gmail folder names to standard folder IDs
- */
- private mapGmailFolderToId(folderName: string): string {
- const lowerName = folderName.toLowerCase();
-
- // Standard Gmail folders
- if (lowerName.includes('inbox')) return 'inbox';
- if (lowerName.includes('sent')) return 'sent';
- if (lowerName.includes('draft')) return 'drafts';
- if (lowerName.includes('trash') || lowerName.includes('deleted'))
- return 'trash';
- if (lowerName.includes('spam') || lowerName.includes('junk')) return 'spam';
- if (lowerName.includes('archive') || lowerName === 'all mail')
- return 'archive';
- if (lowerName.includes('starred') || lowerName.includes('important'))
- return 'starred';
-
- // Custom labels become custom folders
- return `gmail-${folderName.toLowerCase().replace(/\s+/g, '-')}`;
- }
-
- /**
- * Check if a file is a Gmail Takeout archive
- */
- isGmailTakeout(file: File): boolean {
- return (
- file.type === 'application/zip' ||
- file.name.endsWith('.zip') ||
- file.name.toLowerCase().includes('takeout')
- );
- }
-
- /**
- * Validate Gmail Takeout structure
- */
- async validateTakeout(file: File): Promise<{
- valid: boolean;
- message: string;
- folderCount?: number;
- }> {
- try {
- const zip = await JSZip.loadAsync(file);
- let mboxCount = 0;
-
- zip.forEach((path) => {
- if (path.endsWith('.mbox') || path.includes('Takeout/Mail/')) {
- mboxCount++;
- }
- });
-
- if (mboxCount === 0) {
- return {
- valid: false,
- message:
- 'No email data found. Make sure you exported Mail data from Google Takeout.',
- };
- }
-
- return {
- valid: true,
- message: `Found ${mboxCount} mail folders ready to import`,
- folderCount: mboxCount,
- };
- } catch {
- return {
- valid: false,
- message: 'Could not read the archive. Make sure it is a valid ZIP file.',
- };
- }
- }
-}
-
-export const gmailTakeoutParser = new GmailTakeoutParser();
diff --git a/web/src/services/importPipeline.ts b/web/src/services/importPipeline.ts
index b729ace..7063705 100644
--- a/web/src/services/importPipeline.ts
+++ b/web/src/services/importPipeline.ts
@@ -30,11 +30,18 @@ import {
updateEmailFolder,
updateEmailTags,
} from '../db/database';
-import { accountDetector } from './accountDetector';
-import { purchaseDetector } from './purchaseDetector';
-import { subscriptionDetector } from './subscriptionDetector';
-import { newsletterDetector } from './newsletterDetector';
+import {
+ AccountDetector,
+ PurchaseDetector,
+ SubscriptionDetector,
+ NewsletterDetector,
+} from '@technical-1/email-archive-parser';
import { customRulesEngine } from './customRulesEngine';
+
+const accountDetector = new AccountDetector();
+const purchaseDetector = new PurchaseDetector();
+const subscriptionDetector = new SubscriptionDetector();
+const newsletterDetector = new NewsletterDetector();
import { extractDomain } from '../utils/emailUtils';
import { logger } from '../utils/logger';
@@ -53,44 +60,52 @@ export function createImportCounts(): OLMProcessingResult {
/** Run all four detectors against a single (already-persisted) email. */
export async function runDetection(email: Email, counts: OLMProcessingResult): Promise {
// Account signups
- const accountResult = accountDetector.detectAccountSignup(email);
+ const accountResult = accountDetector.detect(email);
if (accountResult.type === 'account' && accountResult.data?.serviceName) {
const existingAccount = await getAccountByServiceName(accountResult.data.serviceName);
if (!existingAccount) {
- const accountData = accountDetector.createAccountFromEmail(
- email,
- accountResult.data.serviceName,
- accountResult.data.serviceType as Account['serviceType'],
- );
- await insertAccount(accountData);
+ await insertAccount({
+ serviceName: accountResult.data.serviceName,
+ signupEmailId: email.id,
+ signupDate: email.date,
+ serviceType: (accountResult.data.serviceType ?? 'other') as Account['serviceType'],
+ domain: extractDomain(email.sender),
+ lastActivityDate: email.date,
+ emailCount: 1,
+ });
counts.accounts++;
}
}
// Purchases
- const purchaseResult = purchaseDetector.detectPurchase(email);
- if (purchaseResult.type === 'purchase' && purchaseResult.data?.amount) {
+ const purchaseResult = purchaseDetector.detect(email);
+ // Purchases require a concrete date (Purchase.purchaseDate is non-nullable and
+ // the dedup window keys off it). Undated emails are skipped for purchase records.
+ if (purchaseResult.type === 'purchase' && purchaseResult.data?.amount && email.date) {
+ const purchaseDate = email.date;
const merchant = purchaseResult.data.merchant || 'Unknown';
const amount = purchaseResult.data.amount;
const orderNumber = purchaseResult.data.orderNumber;
const currency = purchaseResult.data.currency;
- const existingPurchase = await findDuplicatePurchase(merchant, amount, email.date, orderNumber);
+ const existingPurchase = await findDuplicatePurchase(merchant, amount, purchaseDate, orderNumber);
if (!existingPurchase) {
- const purchaseData = purchaseDetector.createPurchaseFromEmail(
- email,
+ await insertPurchase({
+ emailId: email.id,
merchant,
amount,
+ currency: currency || 'USD',
+ purchaseDate,
orderNumber,
- currency,
- );
- await insertPurchase(purchaseData);
+ items: [],
+ category: purchaseDetector.getCategory(merchant),
+ });
counts.purchases++;
}
}
// Subscriptions — dedupe by serviceName, then by sender domain.
- const subResult = subscriptionDetector.detectSubscription(email);
+ const subResult = subscriptionDetector.detect(email);
if (subResult.isSubscription && subResult.serviceName) {
const senderDomain = extractDomain(email.sender);
@@ -101,7 +116,7 @@ export async function runDetection(email: Email, counts: OLMProcessingResult): P
if (existingSub) {
const emailIds = [...new Set([...existingSub.emailIds, email.id!])];
- const isNewerEmail = email.date > existingSub.lastRenewalDate;
+ const isNewerEmail = !!email.date && (!existingSub.lastRenewalDate || email.date > existingSub.lastRenewalDate);
const shouldUpdateAmount = isNewerEmail && subResult.amount != null && subResult.amount > 0;
await updateSubscription(existingSub.id!, {
@@ -127,13 +142,13 @@ export async function runDetection(email: Email, counts: OLMProcessingResult): P
}
// Newsletters / promotional
- const nlResult = newsletterDetector.detectNewsletter(email);
+ const nlResult = newsletterDetector.detect(email);
if (nlResult.isNewsletter || nlResult.isPromotional) {
const existingNL = await getNewsletterBySender(email.sender);
if (existingNL) {
await updateNewsletter(existingNL.id!, {
emailCount: existingNL.emailCount + 1,
- lastEmailDate: email.date > existingNL.lastEmailDate ? email.date : existingNL.lastEmailDate,
+ lastEmailDate: email.date && (!existingNL.lastEmailDate || email.date > existingNL.lastEmailDate) ? email.date : existingNL.lastEmailDate,
unsubscribeLink: nlResult.unsubscribeLink || existingNL.unsubscribeLink,
});
} else {
@@ -218,7 +233,7 @@ export async function processEmailBatch(
if (batch.length === 0) return;
// Worker messages may serialize dates to strings; coerce defensively.
- const emails = batch.map((e) => ({ ...e, date: new Date(e.date) }));
+ const emails = batch.map((e) => ({ ...e, date: e.date == null ? null : new Date(e.date) }));
for (const e of emails) {
if (e.folderId) folderIds.add(e.folderId);
}
diff --git a/web/src/services/mboxParser.ts b/web/src/services/mboxParser.ts
deleted file mode 100644
index dc96a46..0000000
--- a/web/src/services/mboxParser.ts
+++ /dev/null
@@ -1,628 +0,0 @@
-import type { Email } from '../types';
-import { cleanEmailAddress, normalizeSubject } from '../utils/emailUtils';
-import { logger } from '../utils/logger';
-import { decodeQuotedPrintable, decodeRfc2047 } from './mimeUtils';
-
-/**
- * Callback for streaming email processing
- */
-export type EmailBatchCallback = (emails: Omit[], batchNumber: number) => Promise;
-
-/**
- * Parser for MBOX email archive format
- * Uses streaming/batched approach for memory efficiency with large files
- */
-class MBOXParser {
- private readonly CHUNK_SIZE = 5 * 1024 * 1024; // 5MB chunks
- private readonly BATCH_SIZE = 100; // Process 100 emails at a time
-
- /**
- * Parse an MBOX file with streaming batch processing
- * Calls onBatch with each batch of emails as they're parsed
- */
- async parseMBOXFileStreaming(
- file: File,
- onProgress?: (progress: number, message: string) => void,
- onBatch?: EmailBatchCallback
- ): Promise {
- const fileSize = file.size;
- let offset = 0;
- let leftover = '';
- let totalEmailsParsed = 0;
- let currentBatch: Omit[] = [];
- let batchNumber = 0;
-
- onProgress?.(0, `Processing ${(fileSize / 1024 / 1024).toFixed(1)}MB file...`);
-
- while (offset < fileSize) {
- const chunkEnd = Math.min(offset + this.CHUNK_SIZE, fileSize);
- const chunk = file.slice(offset, chunkEnd);
-
- let chunkText: string;
- try {
- chunkText = await chunk.text();
- } catch (e) {
- logger.error('Error reading chunk:', e);
- break;
- }
-
- const textToProcess = leftover + chunkText;
-
- // Find the last "From " line to know where to split
- const lastFromIndex = this.findLastFromLine(textToProcess);
-
- let processableText: string;
- if (lastFromIndex > 0 && chunkEnd < fileSize) {
- processableText = textToProcess.substring(0, lastFromIndex);
- leftover = textToProcess.substring(lastFromIndex);
- } else {
- processableText = textToProcess;
- leftover = '';
- }
-
- // Parse emails from this chunk
- const chunkEmails = this.parseEmailsFromText(processableText);
-
- for (const email of chunkEmails) {
- currentBatch.push(email);
-
- // When batch is full, process it
- if (currentBatch.length >= this.BATCH_SIZE) {
- if (onBatch) {
- await onBatch(currentBatch, batchNumber);
- }
- totalEmailsParsed += currentBatch.length;
- batchNumber++;
- currentBatch = []; // Clear batch from memory
-
- // Yield to UI
- await new Promise(resolve => setTimeout(resolve, 0));
- }
- }
-
- offset = chunkEnd;
- const progress = Math.round((offset / fileSize) * 95);
- onProgress?.(progress, `Parsed ${totalEmailsParsed + currentBatch.length} emails (${Math.round(offset / fileSize * 100)}% read)...`);
-
- // Yield to prevent UI blocking
- await new Promise(resolve => setTimeout(resolve, 0));
- }
-
- // Process any remaining text
- if (leftover.trim()) {
- const finalEmails = this.parseEmailsFromText(leftover);
- for (const email of finalEmails) {
- currentBatch.push(email);
- }
- }
-
- // Process final batch
- if (currentBatch.length > 0 && onBatch) {
- await onBatch(currentBatch, batchNumber);
- totalEmailsParsed += currentBatch.length;
- }
-
- onProgress?.(100, `Parsed ${totalEmailsParsed} emails successfully`);
- return totalEmailsParsed;
- }
-
- /**
- * Legacy method for backwards compatibility
- * For small files only - use parseMBOXFileStreaming for large files
- */
- async parseMBOXFile(
- file: File,
- onProgress?: (progress: number, message: string) => void
- ): Promise[]> {
- // For files under 20MB, use simple accumulator approach
- if (file.size < 20 * 1024 * 1024) {
- const emails: Omit[] = [];
- await this.parseMBOXFileStreaming(file, onProgress, async (batch) => {
- emails.push(...batch);
- });
- return emails;
- }
-
- // For larger files, warn and still use streaming but accumulate
- console.warn('Large file detected. Consider using parseMBOXFileStreaming for better memory efficiency.');
- const emails: Omit[] = [];
- await this.parseMBOXFileStreaming(file, onProgress, async (batch) => {
- emails.push(...batch);
- });
- return emails;
- }
-
- /**
- * Check if a line is a valid MBOX "From " line
- */
- private isFromLine(line: string): boolean {
- if (!line.startsWith('From ')) return false;
- const dayPattern = /(Mon|Tue|Wed|Thu|Fri|Sat|Sun)/;
- return dayPattern.test(line);
- }
-
- /**
- * Find the index of the last "From " line in text
- */
- private findLastFromLine(text: string): number {
- let lastIndex = -1;
- let searchStart = text.length - 1;
-
- // Search backwards for "\nFrom " or "\r\nFrom " pattern
- while (searchStart > 0) {
- // Try both CRLF and LF
- let idx = text.lastIndexOf('\r\nFrom ', searchStart);
- let offset = 2; // Skip \r\n
-
- if (idx === -1) {
- idx = text.lastIndexOf('\nFrom ', searchStart);
- offset = 1; // Skip \n
- }
-
- if (idx === -1) break;
-
- const lineStart = idx + offset;
- let lineEnd = text.indexOf('\n', lineStart);
- if (lineEnd === -1) lineEnd = text.length;
- let line = text.substring(lineStart, lineEnd);
- // Remove trailing \r if present
- if (line.endsWith('\r')) line = line.slice(0, -1);
-
- if (this.isFromLine(line)) {
- lastIndex = lineStart;
- break;
- }
- searchStart = idx - 1;
- }
-
- // Also check if text starts with "From "
- if (lastIndex === -1 && text.startsWith('From ')) {
- let lineEnd = text.indexOf('\n');
- if (lineEnd === -1) lineEnd = text.length;
- let line = text.substring(0, lineEnd);
- if (line.endsWith('\r')) line = line.slice(0, -1);
- if (this.isFromLine(line)) {
- lastIndex = 0;
- }
- }
-
- return lastIndex;
- }
-
- /**
- * Parse multiple emails from a text block
- */
- private parseEmailsFromText(text: string): Omit[] {
- const emails: Omit[] = [];
- // Normalize CRLF to LF, then split
- const normalizedText = text.replace(/\r\n/g, '\n').replace(/\r/g, '\n');
- const lines = normalizedText.split('\n');
- let currentEmail: string[] = [];
-
- for (const line of lines) {
- if (this.isFromLine(line) && currentEmail.length > 0) {
- const email = this.parseEmailFromLines(currentEmail);
- if (email) {
- emails.push(email);
- }
- currentEmail = [];
- }
- currentEmail.push(line);
- }
-
- // Parse last email in chunk
- if (currentEmail.length > 0 && currentEmail.some(line => line.trim().length > 0)) {
- const email = this.parseEmailFromLines(currentEmail);
- if (email) {
- emails.push(email);
- }
- }
-
- return emails;
- }
-
- /**
- * Parse a single email from raw lines
- */
- private parseEmailFromLines(lines: string[]): Omit | null {
- try {
- if (lines.length < 2) return null;
-
- const headers: Record = {};
- let bodyStartIndex = 0;
- let inHeaders = true;
-
- // Parse headers (skip the "From " line)
- for (let i = 1; i < lines.length; i++) {
- const line = lines[i];
-
- if (line.trim() === '') {
- bodyStartIndex = i + 1;
- inHeaders = false;
- break;
- }
-
- if (inHeaders) {
- if (line.match(/^\s+/) && Object.keys(headers).length > 0) {
- const lastKey = Object.keys(headers).pop()!;
- headers[lastKey] += ' ' + line.trim();
- } else {
- const match = line.match(/^([^:]+):\s*(.*)$/);
- if (match) {
- const key = match[1].toLowerCase();
- headers[key] = match[2];
- }
- }
- }
- }
-
- if (inHeaders) {
- bodyStartIndex = lines.length;
- }
-
- // Extract body content
- const bodyLines = lines.slice(bodyStartIndex);
- const rawBody = bodyLines.join('\n');
-
- // Parse body based on content type
- const contentType = headers['content-type'] || 'text/plain';
- let body = '';
- let htmlBody: string | undefined;
-
- if (contentType.includes('multipart/')) {
- // Extract boundary from content-type
- const boundaryMatch = contentType.match(/boundary=["']?([^"';\s]+)["']?/i);
- if (boundaryMatch) {
- const boundary = boundaryMatch[1];
- const parts = this.parseMimeParts(rawBody, boundary);
- body = parts.text || '';
- htmlBody = parts.html;
- } else {
- body = rawBody;
- }
- } else {
- // Single part email
- body = rawBody;
- const encoding = headers['content-transfer-encoding']?.toLowerCase();
- if (encoding === 'quoted-printable') {
- body = this.decodeQuotedPrintable(body);
- } else if (encoding === 'base64') {
- try {
- body = atob(body.replace(/\s/g, ''));
- } catch {
- // Keep original if decode fails
- }
- }
-
- if (contentType.includes('text/html')) {
- htmlBody = body;
- }
- }
-
- const dateStr = headers['date'] || '';
- const date = this.parseDate(dateStr);
-
- const from = headers['from'] || '';
- const { email: sender, name: senderName } = this.parseEmailAddress(from);
-
- const to = headers['to'] || '';
- const recipients = this.parseRecipients(to);
-
- const subject = this.decodeHeaderValue(headers['subject'] || '(No Subject)');
-
- let threadId = headers['x-gm-thrid'] ||
- headers['thread-topic'] ||
- headers['references']?.split(/\s+/)[0] ||
- headers['in-reply-to'];
-
- if (!threadId) {
- const normalizedSubj = normalizeSubject(subject);
- if (normalizedSubj) {
- threadId = `subject:${normalizedSubj.toLowerCase().replace(/\s+/g, '-')}`;
- }
- }
-
- const gmailLabels = headers['x-gmail-labels'] || '';
- const folderId = this.mapGmailLabelsToFolder(gmailLabels);
- const isRead = !gmailLabels.toLowerCase().includes('unread');
- const isStarred = gmailLabels.toLowerCase().includes('starred');
-
- if (!sender && !subject) {
- return null;
- }
-
- // Sanitize field lengths to prevent memory issues with malformed emails
- const MAX_SUBJECT_LEN = 1000;
- const MAX_BODY_LEN = 10 * 1024 * 1024; // 10MB
- const MAX_EMAIL_LEN = 254; // RFC 5321
-
- const sanitizedSubject = subject.length > MAX_SUBJECT_LEN ? subject.slice(0, MAX_SUBJECT_LEN) : subject;
- const sanitizedBody = body.trim() || (htmlBody ? this.stripHtml(htmlBody) : '');
- const truncatedBody = sanitizedBody.length > MAX_BODY_LEN ? sanitizedBody.slice(0, MAX_BODY_LEN) : sanitizedBody;
- const truncatedHtmlBody = htmlBody && htmlBody.length > MAX_BODY_LEN ? htmlBody.slice(0, MAX_BODY_LEN) : htmlBody;
- const sanitizedSender = cleanEmailAddress(sender).slice(0, MAX_EMAIL_LEN);
- const sanitizedRecipients = recipients.map(r => r.slice(0, MAX_EMAIL_LEN)).slice(0, 1000);
-
- return {
- subject: sanitizedSubject,
- sender: sanitizedSender,
- senderName: senderName || undefined,
- recipients: sanitizedRecipients,
- date: date || new Date(),
- body: truncatedBody,
- htmlBody: truncatedHtmlBody,
- attachments: [],
- size: Math.min(lines.join('\n').length, 100000), // Cap size calculation
- isRead,
- isStarred,
- folderId,
- threadId,
- emailType: 'regular',
- };
- } catch (error) {
- console.warn('Failed to parse email:', error);
- return null;
- }
- }
-
- /**
- * Parse Gmail labels and return the primary folder ID
- * Priority: Inbox > Sent > Drafts > Spam > Trash > first custom label > Archive
- */
- private mapGmailLabelsToFolder(labels: string): string {
- const labelList = this.parseGmailLabels(labels);
-
- // Priority order for folder assignment
- if (labelList.includes('inbox')) return 'inbox';
- if (labelList.includes('sent') || labelList.includes('sent mail')) return 'sent';
- if (labelList.includes('draft') || labelList.includes('drafts')) return 'drafts';
- if (labelList.includes('spam')) return 'spam';
- if (labelList.includes('trash')) return 'trash';
-
- // Check for custom labels (not category/system labels)
- const customLabels = labelList.filter(l =>
- !l.startsWith('category ') &&
- !['opened', 'unread', 'starred', 'important', 'all mail'].includes(l)
- );
-
- if (customLabels.length > 0) {
- // Use first custom label as folder
- return this.labelToFolderId(customLabels[0]);
- }
-
- return 'archive';
- }
-
- /**
- * Parse the X-Gmail-Labels header into an array of label names
- */
- parseGmailLabels(labelsHeader: string): string[] {
- if (!labelsHeader) return [];
-
- // Labels are comma-separated, may be quoted if they contain special chars
- const labels: string[] = [];
- let current = '';
- let inQuotes = false;
-
- for (const char of labelsHeader) {
- if (char === '"') {
- inQuotes = !inQuotes;
- } else if (char === ',' && !inQuotes) {
- if (current.trim()) {
- labels.push(current.trim().toLowerCase());
- }
- current = '';
- } else {
- current += char;
- }
- }
-
- if (current.trim()) {
- labels.push(current.trim().toLowerCase());
- }
-
- return labels;
- }
-
- /**
- * Convert a label name to a valid folder ID
- */
- private labelToFolderId(label: string): string {
- return label
- .toLowerCase()
- .replace(/[^a-z0-9\s-]/g, '')
- .replace(/\s+/g, '-')
- .substring(0, 50); // Limit length
- }
-
- /**
- * Get all unique folder IDs that would be created from a labels header
- */
- getAllFolderIdsFromLabels(labelsHeader: string): string[] {
- const labels = this.parseGmailLabels(labelsHeader);
- const folderIds = new Set();
-
- // Add system folders if mentioned
- if (labels.includes('inbox')) folderIds.add('inbox');
- if (labels.includes('sent') || labels.includes('sent mail')) folderIds.add('sent');
- if (labels.includes('draft') || labels.includes('drafts')) folderIds.add('drafts');
- if (labels.includes('spam')) folderIds.add('spam');
- if (labels.includes('trash')) folderIds.add('trash');
-
- // Add custom labels as folders
- for (const label of labels) {
- if (!label.startsWith('category ') &&
- !['opened', 'unread', 'starred', 'important', 'all mail',
- 'inbox', 'sent', 'sent mail', 'draft', 'drafts', 'spam', 'trash'].includes(label)) {
- folderIds.add(this.labelToFolderId(label));
- }
- }
-
- return Array.from(folderIds);
- }
-
- /**
- * Parse MIME multipart content and extract text/html parts
- */
- private parseMimeParts(body: string, boundary: string): { text?: string; html?: string } {
- const result: { text?: string; html?: string } = {};
-
- // Split by boundary
- const boundaryMarker = '--' + boundary;
- const parts = body.split(boundaryMarker);
-
- for (const part of parts) {
- if (!part.trim() || part.trim() === '--') continue;
-
- // Split headers from content
- const headerEndIndex = part.indexOf('\n\n');
- if (headerEndIndex === -1) continue;
-
- const partHeaders = part.substring(0, headerEndIndex);
- let partContent = part.substring(headerEndIndex + 2);
-
- // Parse part headers
- const contentTypeMatch = partHeaders.match(/content-type:\s*([^;\n]+)/i);
- const encodingMatch = partHeaders.match(/content-transfer-encoding:\s*(\S+)/i);
-
- if (!contentTypeMatch) continue;
-
- const partContentType = contentTypeMatch[1].toLowerCase().trim();
- const partEncoding = encodingMatch?.[1]?.toLowerCase() || '7bit';
-
- // Handle nested multipart (multipart/alternative, etc.)
- if (partContentType.includes('multipart/')) {
- const nestedBoundaryMatch = partHeaders.match(/boundary=["']?([^"';\s\n]+)["']?/i);
- if (nestedBoundaryMatch) {
- const nestedResult = this.parseMimeParts(partContent, nestedBoundaryMatch[1]);
- if (nestedResult.text && !result.text) result.text = nestedResult.text;
- if (nestedResult.html && !result.html) result.html = nestedResult.html;
- }
- continue;
- }
-
- // Decode content
- partContent = partContent.trim();
- if (partEncoding === 'base64') {
- try {
- // Remove whitespace and decode
- const cleaned = partContent.replace(/\s/g, '');
- partContent = this.decodeBase64(cleaned);
- } catch {
- // Keep original if decode fails
- }
- } else if (partEncoding === 'quoted-printable') {
- partContent = this.decodeQuotedPrintable(partContent);
- }
-
- // Store based on content type
- if (partContentType.includes('text/plain') && !result.text) {
- result.text = partContent;
- } else if (partContentType.includes('text/html') && !result.html) {
- result.html = partContent;
- }
- }
-
- return result;
- }
-
- /**
- * Decode base64 with UTF-8 support
- */
- private decodeBase64(str: string): string {
- try {
- // Use TextDecoder for proper UTF-8 handling
- const binaryStr = atob(str);
- const bytes = new Uint8Array(binaryStr.length);
- for (let i = 0; i < binaryStr.length; i++) {
- bytes[i] = binaryStr.charCodeAt(i);
- }
- return new TextDecoder('utf-8').decode(bytes);
- } catch {
- // Fallback to simple atob
- try {
- return atob(str);
- } catch {
- return str;
- }
- }
- }
-
- /**
- * Strip HTML tags to create plain text
- */
- private stripHtml(html: string): string {
- return html
- .replace(/