From 235957e96ffd90117388c651a8436b9266586732 Mon Sep 17 00:00:00 2001
From: Amadeus Demarzi <amadeusdemarzi@gmail.com>
Date: Tue, 9 Jun 2026 16:25:06 -0700
Subject: [PATCH 1/3] add regression test for parsePatchFiles with sqp comment

---
 packages/diffs/test/parsePatchFiles.test.ts | 34 +++++++++++++++++++++
 1 file changed, 34 insertions(+)

diff --git a/packages/diffs/test/parsePatchFiles.test.ts b/packages/diffs/test/parsePatchFiles.test.ts
index de5ade736..536175aaa 100644
--- a/packages/diffs/test/parsePatchFiles.test.ts
+++ b/packages/diffs/test/parsePatchFiles.test.ts
@@ -81,6 +81,40 @@ describe('parsePatchFiles', () => {
     }
   });
 
+  test('parses deleted SQL comment lines as hunk content in unified patches', () => {
+    const result = parsePatchFiles(
+      [
+        '--- sql/test.sql\n',
+        '+++ sql/test.sql\n',
+        '@@ -1,5 +1,4 @@\n',
+        ' -- This is a test sql file\n',
+        '--- This is an sql comment\n',
+        ' \n',
+        ' CREATE TABLE users (\n',
+        ' id BIGSERIAL PRIMARY KEY,\n',
+      ].join(''),
+      undefined,
+      true
+    );
+
+    const file = result[0]?.files[0];
+    expect(result[0]?.files).toHaveLength(1);
+    expect(file?.name).toBe('sql/test.sql');
+    expect(file?.deletionLines).toEqual([
+      '-- This is a test sql file\n',
+      '-- This is an sql comment\n',
+      '\n',
+      'CREATE TABLE users (\n',
+      'id BIGSERIAL PRIMARY KEY,\n',
+    ]);
+    expect(file?.additionLines).toEqual([
+      '-- This is a test sql file\n',
+      '\n',
+      'CREATE TABLE users (\n',
+      'id BIGSERIAL PRIMARY KEY,\n',
+    ]);
+  });
+
   test('preserves leading BOM characters in parsed hunk lines', () => {
     const result = parsePatchFiles(
       [

From f6a26f2be34f251d874b50bde6db9810adbdee64 Mon Sep 17 00:00:00 2001
From: Amadeus Demarzi <amadeusdemarzi@gmail.com>
Date: Tue, 9 Jun 2026 18:40:20 -0700
Subject: [PATCH 2/3] Fix buggy parsing

Fix the original issue found from the bug report, but also harden
parsing a bit more
---
 packages/diffs/src/utils/parsePatchFiles.ts | 176 +++++++++++++++++++-
 packages/diffs/test/parsePatchFiles.test.ts |  76 +++++++++
 2 files changed, 248 insertions(+), 4 deletions(-)

diff --git a/packages/diffs/src/utils/parsePatchFiles.ts b/packages/diffs/src/utils/parsePatchFiles.ts
index cd866fad7..515b612ea 100644
--- a/packages/diffs/src/utils/parsePatchFiles.ts
+++ b/packages/diffs/src/utils/parsePatchFiles.ts
@@ -5,7 +5,6 @@ import {
   FILENAME_HEADER_REGEX_GIT,
   GIT_DIFF_FILE_BREAK_REGEX,
   INDEX_LINE_METADATA,
-  UNIFIED_DIFF_FILE_BREAK_REGEX,
 } from '../constants';
 import type {
   ChangeContent,
@@ -46,8 +45,8 @@ function _processPatch(
 ): ParsedPatch {
   const isGitDiff = isGitDiffPatch(data);
   const rawFiles = isGitDiff
-    ? splitAtLinePrefix(data, 'diff --git')
-    : data.split(UNIFIED_DIFF_FILE_BREAK_REGEX);
+    ? splitGitDiffFiles(data)
+    : splitUnifiedDiffFiles(data);
   let patchMetadata: string | undefined;
   const files: FileDiffMetadata[] = [];
   for (const fileOrPatchMetadata of rawFiles) {
@@ -69,7 +68,7 @@ function _processPatch(
       continue;
     } else if (
       !isGitDiff &&
-      !UNIFIED_DIFF_FILE_BREAK_REGEX.test(fileOrPatchMetadata)
+      !startsWithUnifiedDiffFileHeader(fileOrPatchMetadata)
     ) {
       if (patchMetadata == null) {
         patchMetadata = detachString(fileOrPatchMetadata);
@@ -341,6 +340,13 @@ function _processFile(
         parsedDeletionLines >= hunkData.deletionCount &&
         !rawLine.startsWith('\\')
       ) {
+        if (
+          throwOnError &&
+          isHunkBodyLine(rawLine) &&
+          !isFormatPatchVersionSeparator(rawLine)
+        ) {
+          throw Error('parsePatchContent: hunk has more lines than expected');
+        }
         break;
       }
 
@@ -354,6 +360,9 @@ function _processFile(
         firstChar !== ' ' &&
         firstChar !== '\\'
       ) {
+        if (throwOnError) {
+          throw Error('parsePatchContent: invalid hunk line');
+        }
         console.error(
           `parseLineType: Invalid firstChar: "${firstChar}", full line: "${rawLine}"`
         );
@@ -363,6 +372,9 @@ function _processFile(
 
       const type = parseRawLineType(firstChar);
       if (type === 'addition') {
+        if (throwOnError && parsedAdditionLines >= hunkData.additionCount) {
+          throw Error('parsePatchContent: hunk has too many addition lines');
+        }
         const line = getParsedLineContent(rawLine);
         if (currentContent == null || currentContent.type !== 'change') {
           currentContent = createContentGroup(
@@ -381,6 +393,9 @@ function _processFile(
         additionLines++;
         lastLineType = 'addition';
       } else if (type === 'deletion') {
+        if (throwOnError && parsedDeletionLines >= hunkData.deletionCount) {
+          throw Error('parsePatchContent: hunk has too many deletion lines');
+        }
         const line = getParsedLineContent(rawLine);
         if (currentContent == null || currentContent.type !== 'change') {
           currentContent = createContentGroup(
@@ -399,6 +414,13 @@ function _processFile(
         deletionLines++;
         lastLineType = 'deletion';
       } else if (type === 'context') {
+        if (
+          throwOnError &&
+          (parsedDeletionLines >= hunkData.deletionCount ||
+            parsedAdditionLines >= hunkData.additionCount)
+        ) {
+          throw Error('parsePatchContent: hunk has too many context lines');
+        }
         const line = getParsedLineContent(rawLine);
         if (currentContent == null || currentContent.type !== 'context') {
           currentContent = createContentGroup(
@@ -454,6 +476,14 @@ function _processFile(
       }
     }
 
+    if (
+      throwOnError &&
+      (parsedAdditionLines !== hunkData.additionCount ||
+        parsedDeletionLines !== hunkData.deletionCount)
+    ) {
+      throw Error('parsePatchContent: hunk line count mismatch');
+    }
+
     hunkData.additionLines = additionLines;
     hunkData.deletionLines = deletionLines;
 
@@ -488,6 +518,14 @@ function _processFile(
   if (currentFile == null) {
     return undefined;
   }
+  if (
+    throwOnError &&
+    isPartial &&
+    !isGitDiff &&
+    currentFile.hunks.length === 0
+  ) {
+    throw Error('parsePatchContent: unified file has no hunks');
+  }
 
   // Account for collapsed lines after the final hunk and increment the
   // split/unified counts properly
@@ -618,6 +656,136 @@ function splitWithNewlines(contents: string): string[] {
   return lines;
 }
 
+function splitGitDiffFiles(contents: string): string[] {
+  return splitAtLinePrefix(contents, 'diff --git');
+}
+
+function splitUnifiedDiffFiles(contents: string): string[] {
+  if (contents.length === 0) {
+    return [''];
+  }
+
+  const parts: string[] = [];
+  let partStartIndex = 0;
+  let lineStartIndex = 0;
+  let remainingDeletionLines = 0;
+  let remainingAdditionLines = 0;
+
+  while (lineStartIndex < contents.length) {
+    const nextLineStartIndex = getNextLineStartIndex(contents, lineStartIndex);
+    if (remainingDeletionLines <= 0 && remainingAdditionLines <= 0) {
+      if (isUnifiedDiffFileHeaderAt(contents, lineStartIndex)) {
+        if (lineStartIndex > partStartIndex) {
+          parts.push(contents.slice(partStartIndex, lineStartIndex));
+        }
+        partStartIndex = lineStartIndex;
+        lineStartIndex = getNextLineStartIndex(contents, nextLineStartIndex);
+        continue;
+      }
+
+      if (contents.startsWith('@@ -', lineStartIndex)) {
+        const fileHeader = parseHunkHeader(
+          contents.slice(lineStartIndex, nextLineStartIndex)
+        );
+        if (fileHeader != null) {
+          remainingDeletionLines = fileHeader.deletionCount;
+          remainingAdditionLines = fileHeader.additionCount;
+        }
+      }
+      lineStartIndex = nextLineStartIndex;
+      continue;
+    }
+
+    const firstChar = contents[lineStartIndex];
+    if (firstChar === '\\') {
+      lineStartIndex = nextLineStartIndex;
+      continue;
+    }
+
+    if (firstChar === ' ') {
+      remainingDeletionLines = Math.max(remainingDeletionLines - 1, 0);
+      remainingAdditionLines = Math.max(remainingAdditionLines - 1, 0);
+    } else if (firstChar === '-') {
+      remainingDeletionLines = Math.max(remainingDeletionLines - 1, 0);
+    } else if (firstChar === '+') {
+      remainingAdditionLines = Math.max(remainingAdditionLines - 1, 0);
+    }
+    lineStartIndex = nextLineStartIndex;
+  }
+
+  parts.push(contents.slice(partStartIndex));
+  return parts;
+}
+
+function startsWithUnifiedDiffFileHeader(contents: string): boolean {
+  return isUnifiedDiffFileHeaderAt(contents, 0);
+}
+
+function isUnifiedDiffFileHeaderAt(contents: string, lineStartIndex: number) {
+  const nextLineStartIndex = getNextLineStartIndex(contents, lineStartIndex);
+  return (
+    isUnifiedDiffHeaderLineAt(contents, lineStartIndex, '---') &&
+    isUnifiedDiffHeaderLineAt(contents, nextLineStartIndex, '+++')
+  );
+}
+
+function isUnifiedDiffHeaderLineAt(
+  contents: string,
+  lineStartIndex: number,
+  prefix: '---' | '+++'
+): boolean {
+  if (!contents.startsWith(prefix, lineStartIndex)) {
+    return false;
+  }
+
+  const separator = contents[lineStartIndex + prefix.length];
+  if (separator !== ' ' && separator !== '\t') {
+    return false;
+  }
+
+  for (
+    let index = lineStartIndex + prefix.length + 1;
+    index < contents.length;
+    index++
+  ) {
+    const char = contents[index];
+    if (char === '\n' || char === '\r') {
+      break;
+    }
+    if (char !== ' ' && char !== '\t') {
+      return true;
+    }
+  }
+  return false;
+}
+
+function getNextLineStartIndex(
+  contents: string,
+  lineStartIndex: number
+): number {
+  const newlineIndex = contents.indexOf('\n', lineStartIndex);
+  return newlineIndex === -1 ? contents.length : newlineIndex + 1;
+}
+
+function isHunkBodyLine(line: string): boolean {
+  const firstChar = line[0];
+  return firstChar === '+' || firstChar === '-' || firstChar === ' ';
+}
+
+function isFormatPatchVersionSeparator(line: string): boolean {
+  if (!line.startsWith('--')) {
+    return false;
+  }
+
+  for (let index = 2; index < line.length; index++) {
+    const char = line[index];
+    if (char !== ' ' && char !== '\t' && char !== '\n' && char !== '\r') {
+      return false;
+    }
+  }
+  return true;
+}
+
 function parseHunkHeader(line: string): ParsedHunkHeader | undefined {
   if (!line.startsWith('@@ -')) {
     return undefined;
diff --git a/packages/diffs/test/parsePatchFiles.test.ts b/packages/diffs/test/parsePatchFiles.test.ts
index 536175aaa..58efdaee5 100644
--- a/packages/diffs/test/parsePatchFiles.test.ts
+++ b/packages/diffs/test/parsePatchFiles.test.ts
@@ -63,6 +63,62 @@ describe('parsePatchFiles', () => {
     }
   });
 
+  test('throws in strict mode for malformed patch with bare newline in hunk', () => {
+    expect(() => parsePatchFiles(malformedPatch, undefined, true)).toThrow(
+      'invalid hunk line'
+    );
+  });
+
+  test('throws in strict mode when a hunk has too few lines', () => {
+    expect(() =>
+      parsePatchFiles(
+        [
+          '--- incomplete.txt\n',
+          '+++ incomplete.txt\n',
+          '@@ -1 +1 @@\n',
+          '-old line\n',
+        ].join(''),
+        undefined,
+        true
+      )
+    ).toThrow('hunk line count mismatch');
+  });
+
+  test('throws in strict mode when a hunk has extra content lines', () => {
+    expect(() =>
+      parsePatchFiles(
+        [
+          '--- extra.txt\n',
+          '+++ extra.txt\n',
+          '@@ -1 +1 @@\n',
+          '-old line\n',
+          '+new line\n',
+          '-extra old line\n',
+        ].join(''),
+        undefined,
+        true
+      )
+    ).toThrow('hunk has more lines than expected');
+  });
+
+  test('throws in strict mode when a fake unified header creates a file without hunks', () => {
+    expect(() =>
+      parsePatchFiles(
+        [
+          '--- markers.txt\n',
+          '+++ markers.txt\n',
+          '@@ -1 +1 @@\n',
+          '--- old marker\n',
+          '+++ new marker\n',
+          '--- fake-old-marker\n',
+          '+++ fake-new-marker\n',
+        ].join(''),
+        undefined,
+        true
+      )
+    ).toThrow('unified file has no hunks');
+  });
+
   test('ignores format-patch version trailers after the final hunk', () => {
     const consoleError = spyOn(console, 'error').mockImplementation(() => {});
     try {
@@ -115,6 +171,26 @@ describe('parsePatchFiles', () => {
     ]);
   });
 
+  test('does not split hunk content that resembles unified file headers', () => {
+    const result = parsePatchFiles(
+      [
+        '--- markers.txt\n',
+        '+++ markers.txt\n',
+        '@@ -1 +1 @@\n',
+        '--- old marker\n',
+        '+++ new marker\n',
+      ].join(''),
+      undefined,
+      true
+    );
+
+    const file = result[0]?.files[0];
+    expect(result[0]?.files).toHaveLength(1);
+    expect(file?.name).toBe('markers.txt');
+    expect(file?.deletionLines).toEqual(['-- old marker\n']);
+    expect(file?.additionLines).toEqual(['++ new marker\n']);
+  });
+
   test('preserves leading BOM characters in parsed hunk lines', () => {
     const result = parsePatchFiles(
       [

From 35ca8fcb132cd45abf199b278ec0bad64079f3d7 Mon Sep 17 00:00:00 2001
From: Amadeus Demarzi <amadeusdemarzi@gmail.com>
Date: Tue, 9 Jun 2026 19:00:01 -0700
Subject: [PATCH 3/3] Fix feedback from the bot

---
 packages/diffs/src/utils/parsePatchFiles.ts |  4 +++-
 packages/diffs/test/parsePatchFiles.test.ts | 23 +++++++++++++++++++++
 2 files changed, 26 insertions(+), 1 deletion(-)

diff --git a/packages/diffs/src/utils/parsePatchFiles.ts b/packages/diffs/src/utils/parsePatchFiles.ts
index 515b612ea..20e0ad779 100644
--- a/packages/diffs/src/utils/parsePatchFiles.ts
+++ b/packages/diffs/src/utils/parsePatchFiles.ts
@@ -670,6 +670,7 @@ function splitUnifiedDiffFiles(contents: string): string[] {
   let lineStartIndex = 0;
   let remainingDeletionLines = 0;
   let remainingAdditionLines = 0;
+  let hasOpenedUnifiedFile = false;
 
   while (lineStartIndex < contents.length) {
     const nextLineStartIndex = getNextLineStartIndex(contents, lineStartIndex);
@@ -679,11 +680,12 @@ function splitUnifiedDiffFiles(contents: string): string[] {
           parts.push(contents.slice(partStartIndex, lineStartIndex));
         }
         partStartIndex = lineStartIndex;
+        hasOpenedUnifiedFile = true;
         lineStartIndex = getNextLineStartIndex(contents, nextLineStartIndex);
         continue;
       }
 
-      if (contents.startsWith('@@ -', lineStartIndex)) {
+      if (hasOpenedUnifiedFile && contents.startsWith('@@ -', lineStartIndex)) {
         const fileHeader = parseHunkHeader(
           contents.slice(lineStartIndex, nextLineStartIndex)
         );
diff --git a/packages/diffs/test/parsePatchFiles.test.ts b/packages/diffs/test/parsePatchFiles.test.ts
index 58efdaee5..a013e8257 100644
--- a/packages/diffs/test/parsePatchFiles.test.ts
+++ b/packages/diffs/test/parsePatchFiles.test.ts
@@ -137,6 +137,29 @@ describe('parsePatchFiles', () => {
     }
   });
 
+  test('ignores hunk-looking patch metadata before unified file headers', () => {
+    const result = parsePatchFiles(
+      [
+        'Patch metadata mentions @@ -1 +1 @@ before the file header.\n',
+        '@@ -1 +1 @@ is here.\n',
+        '\n',
+        '--- metadata.txt\n',
+        '+++ metadata.txt\n',
+        '@@ -1 +1 @@\n',
+        '-old line\n',
+        '+new line\n',
+      ].join(''),
+      undefined,
+      true
+    );
+
+    expect(result[0]?.patchMetadata).toBe(
+      'Patch metadata mentions @@ -1 +1 @@ before the file header.\n@@ -1 +1 @@ is here.\n\n'
+    );
+    expect(result[0]?.files).toHaveLength(1);
+    expect(result[0]?.files[0]?.name).toBe('metadata.txt');
+  });
+
   test('parses deleted SQL comment lines as hunk content in unified patches', () => {
     const result = parsePatchFiles(
       [