From 235957e96ffd90117388c651a8436b9266586732 Mon Sep 17 00:00:00 2001 From: Amadeus Demarzi Date: Tue, 9 Jun 2026 16:25:06 -0700 Subject: [PATCH 1/3] add regression test for parsePatchFiles with sqp comment --- packages/diffs/test/parsePatchFiles.test.ts | 34 +++++++++++++++++++++ 1 file changed, 34 insertions(+) diff --git a/packages/diffs/test/parsePatchFiles.test.ts b/packages/diffs/test/parsePatchFiles.test.ts index de5ade736..536175aaa 100644 --- a/packages/diffs/test/parsePatchFiles.test.ts +++ b/packages/diffs/test/parsePatchFiles.test.ts @@ -81,6 +81,40 @@ describe('parsePatchFiles', () => { } }); + test('parses deleted SQL comment lines as hunk content in unified patches', () => { + const result = parsePatchFiles( + [ + '--- sql/test.sql\n', + '+++ sql/test.sql\n', + '@@ -1,5 +1,4 @@\n', + ' -- This is a test sql file\n', + '--- This is an sql comment\n', + ' \n', + ' CREATE TABLE users (\n', + ' id BIGSERIAL PRIMARY KEY,\n', + ].join(''), + undefined, + true + ); + + const file = result[0]?.files[0]; + expect(result[0]?.files).toHaveLength(1); + expect(file?.name).toBe('sql/test.sql'); + expect(file?.deletionLines).toEqual([ + '-- This is a test sql file\n', + '-- This is an sql comment\n', + '\n', + 'CREATE TABLE users (\n', + 'id BIGSERIAL PRIMARY KEY,\n', + ]); + expect(file?.additionLines).toEqual([ + '-- This is a test sql file\n', + '\n', + 'CREATE TABLE users (\n', + 'id BIGSERIAL PRIMARY KEY,\n', + ]); + }); + test('preserves leading BOM characters in parsed hunk lines', () => { const result = parsePatchFiles( [ From f6a26f2be34f251d874b50bde6db9810adbdee64 Mon Sep 17 00:00:00 2001 From: Amadeus Demarzi Date: Tue, 9 Jun 2026 18:40:20 -0700 Subject: [PATCH 2/3] Fix buggy parsing Fix the original issue found from the bug report, but also harden parsing a bit more --- packages/diffs/src/utils/parsePatchFiles.ts | 176 +++++++++++++++++++- packages/diffs/test/parsePatchFiles.test.ts | 76 +++++++++ 2 files changed, 248 insertions(+), 4 deletions(-) diff --git a/packages/diffs/src/utils/parsePatchFiles.ts b/packages/diffs/src/utils/parsePatchFiles.ts index cd866fad7..515b612ea 100644 --- a/packages/diffs/src/utils/parsePatchFiles.ts +++ b/packages/diffs/src/utils/parsePatchFiles.ts @@ -5,7 +5,6 @@ import { FILENAME_HEADER_REGEX_GIT, GIT_DIFF_FILE_BREAK_REGEX, INDEX_LINE_METADATA, - UNIFIED_DIFF_FILE_BREAK_REGEX, } from '../constants'; import type { ChangeContent, @@ -46,8 +45,8 @@ function _processPatch( ): ParsedPatch { const isGitDiff = isGitDiffPatch(data); const rawFiles = isGitDiff - ? splitAtLinePrefix(data, 'diff --git') - : data.split(UNIFIED_DIFF_FILE_BREAK_REGEX); + ? splitGitDiffFiles(data) + : splitUnifiedDiffFiles(data); let patchMetadata: string | undefined; const files: FileDiffMetadata[] = []; for (const fileOrPatchMetadata of rawFiles) { @@ -69,7 +68,7 @@ function _processPatch( continue; } else if ( !isGitDiff && - !UNIFIED_DIFF_FILE_BREAK_REGEX.test(fileOrPatchMetadata) + !startsWithUnifiedDiffFileHeader(fileOrPatchMetadata) ) { if (patchMetadata == null) { patchMetadata = detachString(fileOrPatchMetadata); @@ -341,6 +340,13 @@ function _processFile( parsedDeletionLines >= hunkData.deletionCount && !rawLine.startsWith('\\') ) { + if ( + throwOnError && + isHunkBodyLine(rawLine) && + !isFormatPatchVersionSeparator(rawLine) + ) { + throw Error('parsePatchContent: hunk has more lines than expected'); + } break; } @@ -354,6 +360,9 @@ function _processFile( firstChar !== ' ' && firstChar !== '\\' ) { + if (throwOnError) { + throw Error('parsePatchContent: invalid hunk line'); + } console.error( `parseLineType: Invalid firstChar: "${firstChar}", full line: "${rawLine}"` ); @@ -363,6 +372,9 @@ function _processFile( const type = parseRawLineType(firstChar); if (type === 'addition') { + if (throwOnError && parsedAdditionLines >= hunkData.additionCount) { + throw Error('parsePatchContent: hunk has too many addition lines'); + } const line = getParsedLineContent(rawLine); if (currentContent == null || currentContent.type !== 'change') { currentContent = createContentGroup( @@ -381,6 +393,9 @@ function _processFile( additionLines++; lastLineType = 'addition'; } else if (type === 'deletion') { + if (throwOnError && parsedDeletionLines >= hunkData.deletionCount) { + throw Error('parsePatchContent: hunk has too many deletion lines'); + } const line = getParsedLineContent(rawLine); if (currentContent == null || currentContent.type !== 'change') { currentContent = createContentGroup( @@ -399,6 +414,13 @@ function _processFile( deletionLines++; lastLineType = 'deletion'; } else if (type === 'context') { + if ( + throwOnError && + (parsedDeletionLines >= hunkData.deletionCount || + parsedAdditionLines >= hunkData.additionCount) + ) { + throw Error('parsePatchContent: hunk has too many context lines'); + } const line = getParsedLineContent(rawLine); if (currentContent == null || currentContent.type !== 'context') { currentContent = createContentGroup( @@ -454,6 +476,14 @@ function _processFile( } } + if ( + throwOnError && + (parsedAdditionLines !== hunkData.additionCount || + parsedDeletionLines !== hunkData.deletionCount) + ) { + throw Error('parsePatchContent: hunk line count mismatch'); + } + hunkData.additionLines = additionLines; hunkData.deletionLines = deletionLines; @@ -488,6 +518,14 @@ function _processFile( if (currentFile == null) { return undefined; } + if ( + throwOnError && + isPartial && + !isGitDiff && + currentFile.hunks.length === 0 + ) { + throw Error('parsePatchContent: unified file has no hunks'); + } // Account for collapsed lines after the final hunk and increment the // split/unified counts properly @@ -618,6 +656,136 @@ function splitWithNewlines(contents: string): string[] { return lines; } +function splitGitDiffFiles(contents: string): string[] { + return splitAtLinePrefix(contents, 'diff --git'); +} + +function splitUnifiedDiffFiles(contents: string): string[] { + if (contents.length === 0) { + return ['']; + } + + const parts: string[] = []; + let partStartIndex = 0; + let lineStartIndex = 0; + let remainingDeletionLines = 0; + let remainingAdditionLines = 0; + + while (lineStartIndex < contents.length) { + const nextLineStartIndex = getNextLineStartIndex(contents, lineStartIndex); + if (remainingDeletionLines <= 0 && remainingAdditionLines <= 0) { + if (isUnifiedDiffFileHeaderAt(contents, lineStartIndex)) { + if (lineStartIndex > partStartIndex) { + parts.push(contents.slice(partStartIndex, lineStartIndex)); + } + partStartIndex = lineStartIndex; + lineStartIndex = getNextLineStartIndex(contents, nextLineStartIndex); + continue; + } + + if (contents.startsWith('@@ -', lineStartIndex)) { + const fileHeader = parseHunkHeader( + contents.slice(lineStartIndex, nextLineStartIndex) + ); + if (fileHeader != null) { + remainingDeletionLines = fileHeader.deletionCount; + remainingAdditionLines = fileHeader.additionCount; + } + } + lineStartIndex = nextLineStartIndex; + continue; + } + + const firstChar = contents[lineStartIndex]; + if (firstChar === '\\') { + lineStartIndex = nextLineStartIndex; + continue; + } + + if (firstChar === ' ') { + remainingDeletionLines = Math.max(remainingDeletionLines - 1, 0); + remainingAdditionLines = Math.max(remainingAdditionLines - 1, 0); + } else if (firstChar === '-') { + remainingDeletionLines = Math.max(remainingDeletionLines - 1, 0); + } else if (firstChar === '+') { + remainingAdditionLines = Math.max(remainingAdditionLines - 1, 0); + } + lineStartIndex = nextLineStartIndex; + } + + parts.push(contents.slice(partStartIndex)); + return parts; +} + +function startsWithUnifiedDiffFileHeader(contents: string): boolean { + return isUnifiedDiffFileHeaderAt(contents, 0); +} + +function isUnifiedDiffFileHeaderAt(contents: string, lineStartIndex: number) { + const nextLineStartIndex = getNextLineStartIndex(contents, lineStartIndex); + return ( + isUnifiedDiffHeaderLineAt(contents, lineStartIndex, '---') && + isUnifiedDiffHeaderLineAt(contents, nextLineStartIndex, '+++') + ); +} + +function isUnifiedDiffHeaderLineAt( + contents: string, + lineStartIndex: number, + prefix: '---' | '+++' +): boolean { + if (!contents.startsWith(prefix, lineStartIndex)) { + return false; + } + + const separator = contents[lineStartIndex + prefix.length]; + if (separator !== ' ' && separator !== '\t') { + return false; + } + + for ( + let index = lineStartIndex + prefix.length + 1; + index < contents.length; + index++ + ) { + const char = contents[index]; + if (char === '\n' || char === '\r') { + break; + } + if (char !== ' ' && char !== '\t') { + return true; + } + } + return false; +} + +function getNextLineStartIndex( + contents: string, + lineStartIndex: number +): number { + const newlineIndex = contents.indexOf('\n', lineStartIndex); + return newlineIndex === -1 ? contents.length : newlineIndex + 1; +} + +function isHunkBodyLine(line: string): boolean { + const firstChar = line[0]; + return firstChar === '+' || firstChar === '-' || firstChar === ' '; +} + +function isFormatPatchVersionSeparator(line: string): boolean { + if (!line.startsWith('--')) { + return false; + } + + for (let index = 2; index < line.length; index++) { + const char = line[index]; + if (char !== ' ' && char !== '\t' && char !== '\n' && char !== '\r') { + return false; + } + } + return true; +} + function parseHunkHeader(line: string): ParsedHunkHeader | undefined { if (!line.startsWith('@@ -')) { return undefined; diff --git a/packages/diffs/test/parsePatchFiles.test.ts b/packages/diffs/test/parsePatchFiles.test.ts index 536175aaa..58efdaee5 100644 --- a/packages/diffs/test/parsePatchFiles.test.ts +++ b/packages/diffs/test/parsePatchFiles.test.ts @@ -63,6 +63,62 @@ describe('parsePatchFiles', () => { } }); + test('throws in strict mode for malformed patch with bare newline in hunk', () => { + expect(() => parsePatchFiles(malformedPatch, undefined, true)).toThrow( + 'invalid hunk line' + ); + }); + + test('throws in strict mode when a hunk has too few lines', () => { + expect(() => + parsePatchFiles( + [ + '--- incomplete.txt\n', + '+++ incomplete.txt\n', + '@@ -1 +1 @@\n', + '-old line\n', + ].join(''), + undefined, + true + ) + ).toThrow('hunk line count mismatch'); + }); + + test('throws in strict mode when a hunk has extra content lines', () => { + expect(() => + parsePatchFiles( + [ + '--- extra.txt\n', + '+++ extra.txt\n', + '@@ -1 +1 @@\n', + '-old line\n', + '+new line\n', + '-extra old line\n', + ].join(''), + undefined, + true + ) + ).toThrow('hunk has more lines than expected'); + }); + + test('throws in strict mode when a fake unified header creates a file without hunks', () => { + expect(() => + parsePatchFiles( + [ + '--- markers.txt\n', + '+++ markers.txt\n', + '@@ -1 +1 @@\n', + '--- old marker\n', + '+++ new marker\n', + '--- fake-old-marker\n', + '+++ fake-new-marker\n', + ].join(''), + undefined, + true + ) + ).toThrow('unified file has no hunks'); + }); + test('ignores format-patch version trailers after the final hunk', () => { const consoleError = spyOn(console, 'error').mockImplementation(() => {}); try { @@ -115,6 +171,26 @@ describe('parsePatchFiles', () => { ]); }); + test('does not split hunk content that resembles unified file headers', () => { + const result = parsePatchFiles( + [ + '--- markers.txt\n', + '+++ markers.txt\n', + '@@ -1 +1 @@\n', + '--- old marker\n', + '+++ new marker\n', + ].join(''), + undefined, + true + ); + + const file = result[0]?.files[0]; + expect(result[0]?.files).toHaveLength(1); + expect(file?.name).toBe('markers.txt'); + expect(file?.deletionLines).toEqual(['-- old marker\n']); + expect(file?.additionLines).toEqual(['++ new marker\n']); + }); + test('preserves leading BOM characters in parsed hunk lines', () => { const result = parsePatchFiles( [ From 35ca8fcb132cd45abf199b278ec0bad64079f3d7 Mon Sep 17 00:00:00 2001 From: Amadeus Demarzi Date: Tue, 9 Jun 2026 19:00:01 -0700 Subject: [PATCH 3/3] Fix feedback from the bot --- packages/diffs/src/utils/parsePatchFiles.ts | 4 +++- packages/diffs/test/parsePatchFiles.test.ts | 23 +++++++++++++++++++++ 2 files changed, 26 insertions(+), 1 deletion(-) diff --git a/packages/diffs/src/utils/parsePatchFiles.ts b/packages/diffs/src/utils/parsePatchFiles.ts index 515b612ea..20e0ad779 100644 --- a/packages/diffs/src/utils/parsePatchFiles.ts +++ b/packages/diffs/src/utils/parsePatchFiles.ts @@ -670,6 +670,7 @@ function splitUnifiedDiffFiles(contents: string): string[] { let lineStartIndex = 0; let remainingDeletionLines = 0; let remainingAdditionLines = 0; + let hasOpenedUnifiedFile = false; while (lineStartIndex < contents.length) { const nextLineStartIndex = getNextLineStartIndex(contents, lineStartIndex); @@ -679,11 +680,12 @@ function splitUnifiedDiffFiles(contents: string): string[] { parts.push(contents.slice(partStartIndex, lineStartIndex)); } partStartIndex = lineStartIndex; + hasOpenedUnifiedFile = true; lineStartIndex = getNextLineStartIndex(contents, nextLineStartIndex); continue; } - if (contents.startsWith('@@ -', lineStartIndex)) { + if (hasOpenedUnifiedFile && contents.startsWith('@@ -', lineStartIndex)) { const fileHeader = parseHunkHeader( contents.slice(lineStartIndex, nextLineStartIndex) ); diff --git a/packages/diffs/test/parsePatchFiles.test.ts b/packages/diffs/test/parsePatchFiles.test.ts index 58efdaee5..a013e8257 100644 --- a/packages/diffs/test/parsePatchFiles.test.ts +++ b/packages/diffs/test/parsePatchFiles.test.ts @@ -137,6 +137,29 @@ describe('parsePatchFiles', () => { } }); + test('ignores hunk-looking patch metadata before unified file headers', () => { + const result = parsePatchFiles( + [ + 'Patch metadata mentions @@ -1 +1 @@ before the file header.\n', + '@@ -1 +1 @@ is here.\n', + '\n', + '--- metadata.txt\n', + '+++ metadata.txt\n', + '@@ -1 +1 @@\n', + '-old line\n', + '+new line\n', + ].join(''), + undefined, + true + ); + + expect(result[0]?.patchMetadata).toBe( + 'Patch metadata mentions @@ -1 +1 @@ before the file header.\n@@ -1 +1 @@ is here.\n\n' + ); + expect(result[0]?.files).toHaveLength(1); + expect(result[0]?.files[0]?.name).toBe('metadata.txt'); + }); + test('parses deleted SQL comment lines as hunk content in unified patches', () => { const result = parsePatchFiles( [