From 0e54fc34d382467cf7ec82097659d2087495407d Mon Sep 17 00:00:00 2001
From: Abdeali Chharchhoda <abdealiking786@gmail.com>
Date: Thu, 16 Apr 2026 11:20:50 +0530
Subject: [PATCH 1/4] fix: update README to include Anthropic model and correct
 model names

---
 README.md | 39 ++++++++++++++++++++++-----------------
 1 file changed, 22 insertions(+), 17 deletions(-)
diff --git a/README.md b/README.md
index 000bdfa..f3669c8 100644
--- a/README.md
+++ b/README.md
@@ -6,7 +6,7 @@ Transaction Parser is an AI-powered add-on for ERPNext that automatically extrac
 
 ## Features
 
-**AI-Powered Extraction**: Uses advanced AI models (OpenAI, DeepSeek, Google Gemini) to extract structured data from PDFs
+**AI-Powered Extraction**: Uses advanced AI models (OpenAI, DeepSeek, Google Gemini, Anthropic) to extract structured data from PDFs
 * **Multi-Document Support**: Handles Sales Orders and Purchase Invoices (Expenses)
 * **Regional Support**: Special handling for India-specific requirements (GSTIN, PAN, HSN codes)
 * **Email Integration**: Automatically processes documents from incoming emails
@@ -29,8 +29,10 @@ Navigate to **Transaction Parser Settings** and configure:
 * OpenAI gpt-4o
 * OpenAI gpt-4o-mini
 * OpenAI gpt-5
-* Google Gemini 2.5 pro
-* Google Gemini 2.5 flash
+* OpenAI gpt-5-mini
+* Google Gemini Pro-2.5
+* Google Gemini Flash-2.5
+* Claude Haiku-4.5
 
   <img width="773" height="291" alt="image" src="https://github.com/user-attachments/assets/fc40bea1-1e11-4ef3-bcdf-f6c1db8585c8" />
 
@@ -38,11 +40,12 @@ Navigate to **Transaction Parser Settings** and configure:
 
 Add your API keys for the AI services:
 
-| Service Provider | Models Supported                 |
-|:-----------------|:---------------------------------|
-| OpenAI           | gpt-4o, gpt-4o-mini , gpt-5      |
-| DeepSeek         | deepseek-chat, deepseek-reasoner |
-| Google           | gemini 2.5 pro, gemini 2.5 flash |
+| Service Provider | Models Supported                       |
+|:-----------------|:---------------------------------------|
+| OpenAI           | gpt-4o, gpt-4o-mini, gpt-5, gpt-5-mini |
+| DeepSeek         | deepseek-chat, deepseek-reasoner       |
+| Google           | gemini-2.5-pro, gemini-2.5-flash       |
+| Anthropic        | claude-haiku-4-5                       |
 
   <img width="800" height="148" alt="image" src="https://github.com/user-attachments/assets/77f30bd8-59a1-4b66-8bf4-964bc2347ce4" />
 
@@ -94,15 +97,17 @@ When enabled, the system automatically:
 
 ## Model Comparison
 
-| Model             | Provider | Best For                                                | Speed     | Cost        |
-|:------------------|:---------|:--------------------------------------------------------|:----------|:------------|
-| gpt-5             | OpenAI   | State-of-the-art accuracy, complex multi-page documents | Medium    | High        |
-| gpt-4o            | OpenAI   | Complex documents, high accuracy                        | Medium    | Medium-High |
-| gpt-4o-mini       | OpenAI   | Cost-effective, good accuracy                           | Fast      | Low         |
-| gemini-2.5-pro    | Google   | Advanced reasoning, large context window                | Medium    | Medium      |
-| gemini-2.5-flash  | Google   | Fast processing, bulk documents                         | Very Fast | Low         |
-| deepseek-chat     | DeepSeek | General purpose extraction                              | Fast      | Low         |
-| deepseek-reasoner | DeepSeek | Complex reasoning tasks                                 | Slow      | Medium      |
+| Model             | Provider  | Best For                                                | Speed     | Cost        |
+|:------------------|:----------|:--------------------------------------------------------|:----------|:------------|
+| gpt-5             | OpenAI    | State-of-the-art accuracy, complex multi-page documents | Medium    | High        |
+| gpt-5-mini        | OpenAI    | Efficient reasoning, cost-effective                     | Fast      | Medium      |
+| gpt-4o            | OpenAI    | Complex documents, high accuracy                        | Medium    | Medium-High |
+| gpt-4o-mini       | OpenAI    | Cost-effective, good accuracy                           | Fast      | Low         |
+| gemini-2.5-pro    | Google    | Advanced reasoning, large context window                | Medium    | Medium      |
+| gemini-2.5-flash  | Google    | Fast processing, bulk documents                         | Very Fast | Low         |
+| deepseek-chat     | DeepSeek  | General purpose extraction                              | Fast      | Low         |
+| deepseek-reasoner | DeepSeek  | Complex reasoning tasks                                 | Slow      | Medium      |
+| claude-haiku-4-5  | Anthropic | Fast, lightweight tasks                                 | Fast      | Low         |
 
 ## India-Specific Features
 

From 4a82d26f502a0868c53b7b338179dcdd4e48b96d Mon Sep 17 00:00:00 2001
From: Abdeali Chharchhoda <abdealiking786@gmail.com>
Date: Thu, 16 Apr 2026 11:23:06 +0530
Subject: [PATCH 2/4] fix: handle missing docling dependency with informative
 error message

---
 .../transaction_parser/utils/pdf_processor.py | 24 ++++++++++++++-----
 1 file changed, 18 insertions(+), 6 deletions(-)

diff --git a/transaction_parser/transaction_parser/utils/pdf_processor.py b/transaction_parser/transaction_parser/utils/pdf_processor.py
index 2d62457..83366da 100644
--- a/transaction_parser/transaction_parser/utils/pdf_processor.py
+++ b/transaction_parser/transaction_parser/utils/pdf_processor.py
@@ -132,12 +132,24 @@ def process(self, file: io.BytesIO | File, page_limit: int | None = None) -> str
 
     def _get_converter(self):
         if DoclingPDFProcessor._converter is None:
-            from docling.datamodel.base_models import InputFormat
-            from docling.datamodel.pipeline_options import (
-                EasyOcrOptions,
-                PdfPipelineOptions,
-            )
-            from docling.document_converter import DocumentConverter, PdfFormatOption
+            try:
+                from docling.datamodel.base_models import InputFormat
+                from docling.datamodel.pipeline_options import (
+                    EasyOcrOptions,
+                    PdfPipelineOptions,
+                )
+                from docling.document_converter import (
+                    DocumentConverter,
+                    PdfFormatOption,
+                )
+            except ImportError:
+                frappe.throw(
+                    title=_("Missing Dependency"),
+                    msg=_(
+                        "docling is not installed.<br>"
+                        "Install it with: <code>bench pip install transaction_parser[docling]</code>"
+                    ),
+                )
 
             pipeline_options = PdfPipelineOptions()
             pipeline_options.do_ocr = True

From 356e21377b5a54fcfb396af62a847cde0906e657 Mon Sep 17 00:00:00 2001
From: Abdeali Chharchhoda <abdealiking786@gmail.com>
Date: Thu, 16 Apr 2026 11:34:06 +0530
Subject: [PATCH 3/4] fix: enhance setup instructions for Docling, PDFtoText,
 and OCRMyPDF processors

---
 .../transaction_parser/utils/pdf_processor.py | 34 +++++++++++++------
 1 file changed, 24 insertions(+), 10 deletions(-)

diff --git a/transaction_parser/transaction_parser/utils/pdf_processor.py b/transaction_parser/transaction_parser/utils/pdf_processor.py
index 83366da..f3df336 100644
--- a/transaction_parser/transaction_parser/utils/pdf_processor.py
+++ b/transaction_parser/transaction_parser/utils/pdf_processor.py
@@ -95,7 +95,10 @@ class DoclingPDFProcessor(PDFProcessor):
 
     _converter = None
 
-    # TODO: Give detail of install `docling` system dependency and opencv-python-headless for OCR
+    SETUP_URL = (
+        "https://github.com/resilient-tech/transaction-parser#3-docling-optional"
+    )
+
     def process(self, file: io.BytesIO | File, page_limit: int | None = None) -> str:
         try:
             from docling.datamodel.base_models import ConversionStatus, DocumentStream
@@ -104,8 +107,9 @@ def process(self, file: io.BytesIO | File, page_limit: int | None = None) -> str
                 title=_("Missing Dependency"),
                 msg=_(
                     "docling is not installed.<br>"
-                    "Install it with: <code>bench pip install transaction_parser[docling]</code>"
-                ),
+                    "Install it with: <code>bench pip install transaction_parser[docling]</code><br>"
+                    "See <a href='{0}'>setup instructions</a> for more details."
+                ).format(self.SETUP_URL),
             )
 
         file = self.get_sanitized_file(file, page_limit)
@@ -147,8 +151,9 @@ def _get_converter(self):
                     title=_("Missing Dependency"),
                     msg=_(
                         "docling is not installed.<br>"
-                        "Install it with: <code>bench pip install transaction_parser[docling]</code>"
-                    ),
+                        "Install it with: <code>bench pip install transaction_parser[docling]</code><br>"
+                        "See <a href='{0}'>setup instructions</a> for more details."
+                    ).format(self.SETUP_URL),
                 )
 
             pipeline_options = PdfPipelineOptions()
@@ -169,6 +174,10 @@ class PDFtoTextProcessor(PDFProcessor):
     PDF processor using pdftotext for layout-preserving text extraction.
     """
 
+    SETUP_URL = (
+        "https://github.com/resilient-tech/transaction-parser#1-pdftotext-default"
+    )
+
     def process(self, file: io.BytesIO | File, page_limit: int | None = None) -> str:
         file = self.get_sanitized_file(file, page_limit)
         return self.get_text(file)
@@ -183,8 +192,9 @@ def get_text(self, file: io.BytesIO) -> str:
                     "pdftotext is not installed.<br>"
                     "Install OS dependencies first if not already installed: "
                     "<code>sudo apt install build-essential libpoppler-cpp-dev pkg-config python3-dev</code>"
-                    "<br>Then run: <code>bench setup requirements</code>"
-                ),
+                    "<br>Then run: <code>bench setup requirements</code><br>"
+                    "See <a href='{0}'>setup instructions</a> for more details."
+                ).format(self.SETUP_URL),
             )
 
         pdf = pdftotext.PDF(file, physical=True)
@@ -197,13 +207,16 @@ class OCRMyPDFProcessor(PDFProcessor):
     PDF processor using PyMuPDF for text extraction and OCRMyPDF for OCR.
     """
 
+    SETUP_URL = (
+        "https://github.com/resilient-tech/transaction-parser#2-ocrmypdf-optional"
+    )
+
     def process(self, file: io.BytesIO | File, page_limit: int | None = None) -> str:
         file = self.get_sanitized_file(file, page_limit)
         file = self.apply_ocr(file)
 
         return self.get_text(file)
 
-    # TODO: Give detail of install `tesseract-ocr` system dependency
     def apply_ocr(self, file: io.BytesIO) -> io.BytesIO:
         try:
             import ocrmypdf
@@ -212,8 +225,9 @@ def apply_ocr(self, file: io.BytesIO) -> io.BytesIO:
                 title=_("Missing Dependency"),
                 msg=_(
                     "ocrmypdf is not installed.<br>"
-                    "Install it with: <code>bench pip install transaction_parser[ocrmypdf]</code>"
-                ),
+                    "Install it with: <code>bench pip install transaction_parser[ocrmypdf]</code><br>"
+                    "See <a href='{0}'>setup instructions</a> for more details."
+                ).format(self.SETUP_URL),
             )
 
         file.seek(0)

From 4ebb383394b5619e97e46f4a5d63eb5a6b113324 Mon Sep 17 00:00:00 2001
From: Abdeali Chharchhoda <abdealiking786@gmail.com>
Date: Thu, 16 Apr 2026 11:41:43 +0530
Subject: [PATCH 4/4] fix: add warning for PDF processor changes with setup
 instructions

---
 .../transaction_parser_settings.py              | 17 +++++++++++++++++
 1 file changed, 17 insertions(+)

diff --git a/transaction_parser/transaction_parser/doctype/transaction_parser_settings/transaction_parser_settings.py b/transaction_parser/transaction_parser/doctype/transaction_parser_settings/transaction_parser_settings.py
index 0a931f6..192bbee 100644
--- a/transaction_parser/transaction_parser/doctype/transaction_parser_settings/transaction_parser_settings.py
+++ b/transaction_parser/transaction_parser/doctype/transaction_parser_settings/transaction_parser_settings.py
@@ -63,6 +63,23 @@ def validate(self):
         self.validate_incoming_email_accounts()
         self.validate_party_email()
         self.validate_json_fields()
+        self.warn_on_pdf_processor_change()
+
+    def warn_on_pdf_processor_change(self):
+        if not self.has_value_changed("pdf_processor"):
+            return
+
+        frappe.msgprint(
+            _(
+                "Make sure the required dependencies for {0} are installed.<br>"
+                "See {1} for setup instructions."
+            ).format(
+                frappe.bold(self.pdf_processor),
+                '<a href="https://github.com/resilient-tech/transaction-parser#pdf-processor-setup" target="_blank">PDF Processor Setup</a>',
+            ),
+            title=_("PDF Processor Changed"),
+            indicator="orange",
+        )
 
     def validate_lookback_count(self):
         if self.invoice_lookback_count <= 0: