diff --git a/README.md b/README.md
index 000bdfa..f3669c8 100644
--- a/README.md
+++ b/README.md
@@ -6,7 +6,7 @@ Transaction Parser is an AI-powered add-on for ERPNext that automatically extrac
## Features
-**AI-Powered Extraction**: Uses advanced AI models (OpenAI, DeepSeek, Google Gemini) to extract structured data from PDFs
+**AI-Powered Extraction**: Uses advanced AI models (OpenAI, DeepSeek, Google Gemini, Anthropic) to extract structured data from PDFs
* **Multi-Document Support**: Handles Sales Orders and Purchase Invoices (Expenses)
* **Regional Support**: Special handling for India-specific requirements (GSTIN, PAN, HSN codes)
* **Email Integration**: Automatically processes documents from incoming emails
@@ -29,8 +29,10 @@ Navigate to **Transaction Parser Settings** and configure:
* OpenAI gpt-4o
* OpenAI gpt-4o-mini
* OpenAI gpt-5
-* Google Gemini 2.5 pro
-* Google Gemini 2.5 flash
+* OpenAI gpt-5-mini
+* Google Gemini Pro-2.5
+* Google Gemini Flash-2.5
+* Claude Haiku-4.5
@@ -38,11 +40,12 @@ Navigate to **Transaction Parser Settings** and configure:
Add your API keys for the AI services:
-| Service Provider | Models Supported |
-|:-----------------|:---------------------------------|
-| OpenAI | gpt-4o, gpt-4o-mini , gpt-5 |
-| DeepSeek | deepseek-chat, deepseek-reasoner |
-| Google | gemini 2.5 pro, gemini 2.5 flash |
+| Service Provider | Models Supported |
+|:-----------------|:---------------------------------------|
+| OpenAI | gpt-4o, gpt-4o-mini, gpt-5, gpt-5-mini |
+| DeepSeek | deepseek-chat, deepseek-reasoner |
+| Google | gemini-2.5-pro, gemini-2.5-flash |
+| Anthropic | claude-haiku-4-5 |
@@ -94,15 +97,17 @@ When enabled, the system automatically:
## Model Comparison
-| Model | Provider | Best For | Speed | Cost |
-|:------------------|:---------|:--------------------------------------------------------|:----------|:------------|
-| gpt-5 | OpenAI | State-of-the-art accuracy, complex multi-page documents | Medium | High |
-| gpt-4o | OpenAI | Complex documents, high accuracy | Medium | Medium-High |
-| gpt-4o-mini | OpenAI | Cost-effective, good accuracy | Fast | Low |
-| gemini-2.5-pro | Google | Advanced reasoning, large context window | Medium | Medium |
-| gemini-2.5-flash | Google | Fast processing, bulk documents | Very Fast | Low |
-| deepseek-chat | DeepSeek | General purpose extraction | Fast | Low |
-| deepseek-reasoner | DeepSeek | Complex reasoning tasks | Slow | Medium |
+| Model | Provider | Best For | Speed | Cost |
+|:------------------|:----------|:--------------------------------------------------------|:----------|:------------|
+| gpt-5 | OpenAI | State-of-the-art accuracy, complex multi-page documents | Medium | High |
+| gpt-5-mini | OpenAI | Efficient reasoning, cost-effective | Fast | Medium |
+| gpt-4o | OpenAI | Complex documents, high accuracy | Medium | Medium-High |
+| gpt-4o-mini | OpenAI | Cost-effective, good accuracy | Fast | Low |
+| gemini-2.5-pro | Google | Advanced reasoning, large context window | Medium | Medium |
+| gemini-2.5-flash | Google | Fast processing, bulk documents | Very Fast | Low |
+| deepseek-chat | DeepSeek | General purpose extraction | Fast | Low |
+| deepseek-reasoner | DeepSeek | Complex reasoning tasks | Slow | Medium |
+| claude-haiku-4-5 | Anthropic | Fast, lightweight tasks | Fast | Low |
## India-Specific Features
diff --git a/transaction_parser/transaction_parser/doctype/transaction_parser_settings/transaction_parser_settings.py b/transaction_parser/transaction_parser/doctype/transaction_parser_settings/transaction_parser_settings.py
index 0a931f6..192bbee 100644
--- a/transaction_parser/transaction_parser/doctype/transaction_parser_settings/transaction_parser_settings.py
+++ b/transaction_parser/transaction_parser/doctype/transaction_parser_settings/transaction_parser_settings.py
@@ -63,6 +63,23 @@ def validate(self):
self.validate_incoming_email_accounts()
self.validate_party_email()
self.validate_json_fields()
+ self.warn_on_pdf_processor_change()
+
+ def warn_on_pdf_processor_change(self):
+ if not self.has_value_changed("pdf_processor"):
+ return
+
+ frappe.msgprint(
+ _(
+ "Make sure the required dependencies for {0} are installed.
"
+ "See {1} for setup instructions."
+ ).format(
+ frappe.bold(self.pdf_processor),
+ 'PDF Processor Setup',
+ ),
+ title=_("PDF Processor Changed"),
+ indicator="orange",
+ )
def validate_lookback_count(self):
if self.invoice_lookback_count <= 0:
diff --git a/transaction_parser/transaction_parser/utils/pdf_processor.py b/transaction_parser/transaction_parser/utils/pdf_processor.py
index 2d62457..f3df336 100644
--- a/transaction_parser/transaction_parser/utils/pdf_processor.py
+++ b/transaction_parser/transaction_parser/utils/pdf_processor.py
@@ -95,7 +95,10 @@ class DoclingPDFProcessor(PDFProcessor):
_converter = None
- # TODO: Give detail of install `docling` system dependency and opencv-python-headless for OCR
+ SETUP_URL = (
+ "https://github.com/resilient-tech/transaction-parser#3-docling-optional"
+ )
+
def process(self, file: io.BytesIO | File, page_limit: int | None = None) -> str:
try:
from docling.datamodel.base_models import ConversionStatus, DocumentStream
@@ -104,8 +107,9 @@ def process(self, file: io.BytesIO | File, page_limit: int | None = None) -> str
title=_("Missing Dependency"),
msg=_(
"docling is not installed.
"
- "Install it with: bench pip install transaction_parser[docling]"
- ),
+ "Install it with: bench pip install transaction_parser[docling]
"
+ "See setup instructions for more details."
+ ).format(self.SETUP_URL),
)
file = self.get_sanitized_file(file, page_limit)
@@ -132,12 +136,25 @@ def process(self, file: io.BytesIO | File, page_limit: int | None = None) -> str
def _get_converter(self):
if DoclingPDFProcessor._converter is None:
- from docling.datamodel.base_models import InputFormat
- from docling.datamodel.pipeline_options import (
- EasyOcrOptions,
- PdfPipelineOptions,
- )
- from docling.document_converter import DocumentConverter, PdfFormatOption
+ try:
+ from docling.datamodel.base_models import InputFormat
+ from docling.datamodel.pipeline_options import (
+ EasyOcrOptions,
+ PdfPipelineOptions,
+ )
+ from docling.document_converter import (
+ DocumentConverter,
+ PdfFormatOption,
+ )
+ except ImportError:
+ frappe.throw(
+ title=_("Missing Dependency"),
+ msg=_(
+ "docling is not installed.
"
+ "Install it with: bench pip install transaction_parser[docling]
"
+ "See setup instructions for more details."
+ ).format(self.SETUP_URL),
+ )
pipeline_options = PdfPipelineOptions()
pipeline_options.do_ocr = True
@@ -157,6 +174,10 @@ class PDFtoTextProcessor(PDFProcessor):
PDF processor using pdftotext for layout-preserving text extraction.
"""
+ SETUP_URL = (
+ "https://github.com/resilient-tech/transaction-parser#1-pdftotext-default"
+ )
+
def process(self, file: io.BytesIO | File, page_limit: int | None = None) -> str:
file = self.get_sanitized_file(file, page_limit)
return self.get_text(file)
@@ -171,8 +192,9 @@ def get_text(self, file: io.BytesIO) -> str:
"pdftotext is not installed.
"
"Install OS dependencies first if not already installed: "
"sudo apt install build-essential libpoppler-cpp-dev pkg-config python3-dev"
- "
Then run: bench setup requirements"
- ),
+ "
Then run: bench setup requirements
"
+ "See setup instructions for more details."
+ ).format(self.SETUP_URL),
)
pdf = pdftotext.PDF(file, physical=True)
@@ -185,13 +207,16 @@ class OCRMyPDFProcessor(PDFProcessor):
PDF processor using PyMuPDF for text extraction and OCRMyPDF for OCR.
"""
+ SETUP_URL = (
+ "https://github.com/resilient-tech/transaction-parser#2-ocrmypdf-optional"
+ )
+
def process(self, file: io.BytesIO | File, page_limit: int | None = None) -> str:
file = self.get_sanitized_file(file, page_limit)
file = self.apply_ocr(file)
return self.get_text(file)
- # TODO: Give detail of install `tesseract-ocr` system dependency
def apply_ocr(self, file: io.BytesIO) -> io.BytesIO:
try:
import ocrmypdf
@@ -200,8 +225,9 @@ def apply_ocr(self, file: io.BytesIO) -> io.BytesIO:
title=_("Missing Dependency"),
msg=_(
"ocrmypdf is not installed.
"
- "Install it with: bench pip install transaction_parser[ocrmypdf]"
- ),
+ "Install it with: bench pip install transaction_parser[ocrmypdf]
"
+ "See setup instructions for more details."
+ ).format(self.SETUP_URL),
)
file.seek(0)