resilient-tech · Abdeali099 · Apr 16, 2026 · Apr 16, 2026 · Apr 16, 2026 · Apr 16, 2026
diff --git a/README.md b/README.md
@@ -6,7 +6,7 @@ Transaction Parser is an AI-powered add-on for ERPNext that automatically extrac
 
 ## Features
 
-**AI-Powered Extraction**: Uses advanced AI models (OpenAI, DeepSeek, Google Gemini) to extract structured data from PDFs
+**AI-Powered Extraction**: Uses advanced AI models (OpenAI, DeepSeek, Google Gemini, Anthropic) to extract structured data from PDFs
 * **Multi-Document Support**: Handles Sales Orders and Purchase Invoices (Expenses)
 * **Regional Support**: Special handling for India-specific requirements (GSTIN, PAN, HSN codes)
 * **Email Integration**: Automatically processes documents from incoming emails
@@ -29,20 +29,23 @@ Navigate to **Transaction Parser Settings** and configure:
 * OpenAI gpt-4o
 * OpenAI gpt-4o-mini
 * OpenAI gpt-5
-* Google Gemini 2.5 pro
-* Google Gemini 2.5 flash
+* OpenAI gpt-5-mini
+* Google Gemini Pro-2.5
+* Google Gemini Flash-2.5
+* Claude Haiku-4.5
 
   <img width="773" height="291" alt="image" src="https://github.com/user-attachments/assets/fc40bea1-1e11-4ef3-bcdf-f6c1db8585c8" />
 
 3\. API Keys Setup
 
 Add your API keys for the AI services:
 
-| Service Provider | Models Supported                 |
-|:-----------------|:---------------------------------|
-| OpenAI           | gpt-4o, gpt-4o-mini , gpt-5      |
-| DeepSeek         | deepseek-chat, deepseek-reasoner |
-| Google           | gemini 2.5 pro, gemini 2.5 flash |
+| Service Provider | Models Supported                       |
+|:-----------------|:---------------------------------------|
+| OpenAI           | gpt-4o, gpt-4o-mini, gpt-5, gpt-5-mini |
+| DeepSeek         | deepseek-chat, deepseek-reasoner       |
+| Google           | gemini-2.5-pro, gemini-2.5-flash       |
+| Anthropic        | claude-haiku-4-5                       |
 
   <img width="800" height="148" alt="image" src="https://github.com/user-attachments/assets/77f30bd8-59a1-4b66-8bf4-964bc2347ce4" />
 
@@ -94,15 +97,17 @@ When enabled, the system automatically:
 
 ## Model Comparison
 
-| Model             | Provider | Best For                                                | Speed     | Cost        |
-|:------------------|:---------|:--------------------------------------------------------|:----------|:------------|
-| gpt-5             | OpenAI   | State-of-the-art accuracy, complex multi-page documents | Medium    | High        |
-| gpt-4o            | OpenAI   | Complex documents, high accuracy                        | Medium    | Medium-High |
-| gpt-4o-mini       | OpenAI   | Cost-effective, good accuracy                           | Fast      | Low         |
-| gemini-2.5-pro    | Google   | Advanced reasoning, large context window                | Medium    | Medium      |
-| gemini-2.5-flash  | Google   | Fast processing, bulk documents                         | Very Fast | Low         |
-| deepseek-chat     | DeepSeek | General purpose extraction                              | Fast      | Low         |
-| deepseek-reasoner | DeepSeek | Complex reasoning tasks                                 | Slow      | Medium      |
+| Model             | Provider  | Best For                                                | Speed     | Cost        |
+|:------------------|:----------|:--------------------------------------------------------|:----------|:------------|
+| gpt-5             | OpenAI    | State-of-the-art accuracy, complex multi-page documents | Medium    | High        |
+| gpt-5-mini        | OpenAI    | Efficient reasoning, cost-effective                     | Fast      | Medium      |
+| gpt-4o            | OpenAI    | Complex documents, high accuracy                        | Medium    | Medium-High |
+| gpt-4o-mini       | OpenAI    | Cost-effective, good accuracy                           | Fast      | Low         |
+| gemini-2.5-pro    | Google    | Advanced reasoning, large context window                | Medium    | Medium      |
+| gemini-2.5-flash  | Google    | Fast processing, bulk documents                         | Very Fast | Low         |
+| deepseek-chat     | DeepSeek  | General purpose extraction                              | Fast      | Low         |
+| deepseek-reasoner | DeepSeek  | Complex reasoning tasks                                 | Slow      | Medium      |
+| claude-haiku-4-5  | Anthropic | Fast, lightweight tasks                                 | Fast      | Low         |
 
 ## India-Specific Features
 

diff --git a/...ser/transaction_parser/doctype/transaction_parser_settings/transaction_parser_settings.py b/...ser/transaction_parser/doctype/transaction_parser_settings/transaction_parser_settings.py
@@ -63,6 +63,23 @@ def validate(self):
         self.validate_incoming_email_accounts()
         self.validate_party_email()
         self.validate_json_fields()
+        self.warn_on_pdf_processor_change()
+
+    def warn_on_pdf_processor_change(self):
+        if not self.has_value_changed("pdf_processor"):
+            return
+
+        frappe.msgprint(
+            _(
+                "Make sure the required dependencies for {0} are installed.<br>"
+                "See {1} for setup instructions."
+            ).format(
+                frappe.bold(self.pdf_processor),
+                '<a href="https://github.com/resilient-tech/transaction-parser#pdf-processor-setup" target="_blank">PDF Processor Setup</a>',
+            ),
+            title=_("PDF Processor Changed"),
+            indicator="orange",
+        )
 
     def validate_lookback_count(self):
         if self.invoice_lookback_count <= 0:

diff --git a/transaction_parser/transaction_parser/utils/pdf_processor.py b/transaction_parser/transaction_parser/utils/pdf_processor.py
@@ -95,7 +95,10 @@ class DoclingPDFProcessor(PDFProcessor):
 
     _converter = None
 
-    # TODO: Give detail of install `docling` system dependency and opencv-python-headless for OCR
+    SETUP_URL = (
+        "https://github.com/resilient-tech/transaction-parser#3-docling-optional"
+    )
+
     def process(self, file: io.BytesIO | File, page_limit: int | None = None) -> str:
         try:
             from docling.datamodel.base_models import ConversionStatus, DocumentStream
@@ -104,8 +107,9 @@ def process(self, file: io.BytesIO | File, page_limit: int | None = None) -> str
                 title=_("Missing Dependency"),
                 msg=_(
                     "docling is not installed.<br>"
-                    "Install it with: <code>bench pip install transaction_parser[docling]</code>"
-                ),
+                    "Install it with: <code>bench pip install transaction_parser[docling]</code><br>"
+                    "See <a href='{0}'>setup instructions</a> for more details."
+                ).format(self.SETUP_URL),
             )
 
         file = self.get_sanitized_file(file, page_limit)
@@ -132,12 +136,25 @@ def process(self, file: io.BytesIO | File, page_limit: int | None = None) -> str
 
     def _get_converter(self):
         if DoclingPDFProcessor._converter is None:
-            from docling.datamodel.base_models import InputFormat
-            from docling.datamodel.pipeline_options import (
-                EasyOcrOptions,
-                PdfPipelineOptions,
-            )
-            from docling.document_converter import DocumentConverter, PdfFormatOption
+            try:
+                from docling.datamodel.base_models import InputFormat
+                from docling.datamodel.pipeline_options import (
+                    EasyOcrOptions,
+                    PdfPipelineOptions,
+                )
+                from docling.document_converter import (
+                    DocumentConverter,
+                    PdfFormatOption,
+                )
+            except ImportError:
+                frappe.throw(
+                    title=_("Missing Dependency"),
+                    msg=_(
+                        "docling is not installed.<br>"
+                        "Install it with: <code>bench pip install transaction_parser[docling]</code><br>"
+                        "See <a href='{0}'>setup instructions</a> for more details."
+                    ).format(self.SETUP_URL),
+                )
 
             pipeline_options = PdfPipelineOptions()
             pipeline_options.do_ocr = True
@@ -157,6 +174,10 @@ class PDFtoTextProcessor(PDFProcessor):
     PDF processor using pdftotext for layout-preserving text extraction.
     """
 
+    SETUP_URL = (
+        "https://github.com/resilient-tech/transaction-parser#1-pdftotext-default"
+    )
+
     def process(self, file: io.BytesIO | File, page_limit: int | None = None) -> str:
         file = self.get_sanitized_file(file, page_limit)
         return self.get_text(file)
@@ -171,8 +192,9 @@ def get_text(self, file: io.BytesIO) -> str:
                     "pdftotext is not installed.<br>"
                     "Install OS dependencies first if not already installed: "
                     "<code>sudo apt install build-essential libpoppler-cpp-dev pkg-config python3-dev</code>"
-                    "<br>Then run: <code>bench setup requirements</code>"
-                ),
+                    "<br>Then run: <code>bench setup requirements</code><br>"
+                    "See <a href='{0}'>setup instructions</a> for more details."
+                ).format(self.SETUP_URL),
             )
 
         pdf = pdftotext.PDF(file, physical=True)
@@ -185,13 +207,16 @@ class OCRMyPDFProcessor(PDFProcessor):
     PDF processor using PyMuPDF for text extraction and OCRMyPDF for OCR.
     """
 
+    SETUP_URL = (
+        "https://github.com/resilient-tech/transaction-parser#2-ocrmypdf-optional"
+    )
+
     def process(self, file: io.BytesIO | File, page_limit: int | None = None) -> str:
         file = self.get_sanitized_file(file, page_limit)
         file = self.apply_ocr(file)
 
         return self.get_text(file)
 
-    # TODO: Give detail of install `tesseract-ocr` system dependency
     def apply_ocr(self, file: io.BytesIO) -> io.BytesIO:
         try:
             import ocrmypdf
@@ -200,8 +225,9 @@ def apply_ocr(self, file: io.BytesIO) -> io.BytesIO:
                 title=_("Missing Dependency"),
                 msg=_(
                     "ocrmypdf is not installed.<br>"
-                    "Install it with: <code>bench pip install transaction_parser[ocrmypdf]</code>"
-                ),
+                    "Install it with: <code>bench pip install transaction_parser[ocrmypdf]</code><br>"
+                    "See <a href='{0}'>setup instructions</a> for more details."
+                ).format(self.SETUP_URL),
             )
 
         file.seek(0)