Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 0 additions & 1 deletion .github/workflows/build_addon.yml
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,6 @@ jobs:
pip install --upgrade pip wheel
pip install -r requirements.txt
pip install -r requirements-libs.txt --target ./addon/globalPlugins/CaptionLocal/libs --platform win_amd64 --only-binary=:all: --no-binary=:none:
pip install miniqinference[cli]==0.1.2 --target ./addon/globalPlugins/CaptionLocal/libs --upgrade --platform win_amd64 --only-binary=:all: --no-binary=:none:

- name: Code checks
run: |
Expand Down
3 changes: 3 additions & 0 deletions addon/globalPlugins/CaptionLocal/captioner/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -40,5 +40,8 @@ def imageCaptionerFactory(
from .qwen import QwenImageCaptioner
modelDir = os.path.dirname(configPath)
return QwenImageCaptioner(modelDir)
elif modelArchitecture == "CustomEndpoint":
from .customEndpoint import CustomEndpointCaptioner
return CustomEndpointCaptioner.from_config(configPath)
else:
raise NotImplementedError(f"Unsupported model architecture: {modelArchitecture}")
137 changes: 137 additions & 0 deletions addon/globalPlugins/CaptionLocal/captioner/customEndpoint.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,137 @@
# -*- coding: UTF-8 -*-
# A part of NonVisual Desktop Access (NVDA)
# Copyright (C) 2025 NV Access Limited, Tianze
# This file may be used under the terms of the GNU General Public License, version 2 or later, as modified by the NVDA license.
# For full terms and any additional permissions, see the NVDA license file: https://github.com/nvaccess/nvda/blob/master/copying.txt

import base64
import json
import requests
import io
from typing import Callable
from logHandler import log
from .base import ImageCaptioner

try:
_
except NameError:
_ = lambda x: x

class CustomEndpointCaptioner(ImageCaptioner):
"""Captioner using custom OpenAI-compatible API endpoints."""

def __init__(self, endpoint: str, api_key: str, model: str, prompt: str = None):
"""
Initialize the custom endpoint captioner.

:param endpoint: Base URL of the API.
:param api_key: API key.
:param model: Model name.
:param prompt: Custom prompt.
"""
self.endpoint = endpoint.rstrip('/')
self.api_key = api_key
self.model = model
# Translators: default prompt for image captioning
self.prompt = prompt or _("Please describe the picture in one sentence")

@classmethod
def from_config(cls, config_path: str):
"""
Create a CustomEndpointCaptioner from a config file.
"""
with open(config_path, "r", encoding="utf-8") as f:
config = json.load(f)

endpoint = config.get("endpoint")
api_key = config.get("api_key")
model = config.get("model")
prompt = config.get("prompt", None)

if endpoint and model:
return cls(endpoint, api_key, model, prompt)

raise ValueError(f"Invalid custom endpoint configuration in {config_path}")

def generateCaption(
self,
image: str | bytes,
maxLength: int | None = None,
onToken: Callable[[str], None] | None = None,
) -> str:
"""
Generate caption via custom endpoint.
"""
try:
if isinstance(image, str):
with open(image, "rb") as f:
img_data = f.read()
else:
img_data = image

base64_image = base64.b64encode(img_data).decode('utf-8')

headers = {
"Content-Type": "application/json"
}
if self.api_key:
headers["Authorization"] = f"Bearer {self.api_key}"

payload = {
"model": self.model,
"messages": [
{
"role": "user",
"content": [
{"type": "text", "text": self.prompt},
{
"type": "image_url",
"image_url": {
"url": f"data:image/jpeg;base64,{base64_image}"
}
}
]
}
],
"stream": bool(onToken)
}

if maxLength:
payload["max_tokens"] = maxLength

response = requests.post(
f"{self.endpoint}/chat/completions",
headers=headers,
json=payload,
stream=bool(onToken),
timeout=60
)
response.raise_for_status()

if onToken:
full_text = ""
for line in response.iter_lines():
if line:
line_str = line.decode('utf-8').strip()
if line_str.startswith("data:"):
data_str = line_str[len("data:"):].strip()
if data_str == "[DONE]":
break
try:
data = json.loads(data_str)
choices = data.get('choices', [])
if not choices:
continue
content = choices[0].get('delta', {}).get('content', '')
if content:
full_text += content
onToken(content)
except Exception:
continue
return full_text.strip()
else:
data = response.json()
return data['choices'][0]['message']['content'].strip()
except Exception as e:
log.exception(f"Custom endpoint API request failed: {e}")
raise
99 changes: 99 additions & 0 deletions addon/globalPlugins/CaptionLocal/customEndpointConfig.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,99 @@
# -*- coding: UTF-8 -*-
# A part of NonVisual Desktop Access (NVDA)
# Copyright (C) 2025 NV Access Limited, Tianze
# This file may be used under the terms of the GNU General Public License, version 2 or later, as modified by the NVDA license.
# For full terms and any additional permissions, see the NVDA license file: https://github.com/nvaccess/nvda/blob/master/copying.txt

import wx
import json
import os
from logHandler import log

try:
_
except NameError:
_ = lambda x: x

class CustomEndpointConfigDialog(wx.Dialog):
"""Dialog for configuring custom API endpoints."""

def __init__(self, parent, config_path):
super().__init__(parent, title=_("Configure Custom Endpoint"), size=(500, 350))
self.config_path = config_path
self.config_data = {}
self._load_existing_config()
self._initUI()

def _load_existing_config(self):
if os.path.exists(self.config_path):
try:
with open(self.config_path, "r", encoding="utf-8") as f:
self.config_data = json.load(f)
except Exception:
log.exception(f"Failed to load config from {self.config_path}")

def _initUI(self):
mainSizer = wx.BoxSizer(wx.VERTICAL)

flexSizer = wx.FlexGridSizer(rows=4, cols=2, vgap=10, hgap=10)
flexSizer.AddGrowableCol(1)

# Endpoint URL
flexSizer.Add(wx.StaticText(self, label=_("Endpoint URL:")), 0, wx.ALIGN_CENTER_VERTICAL)
self.endpointCtrl = wx.TextCtrl(self, value=self.config_data.get("endpoint", "https://api.openai.com/v1"))
flexSizer.Add(self.endpointCtrl, 1, wx.EXPAND)

# API Key
flexSizer.Add(wx.StaticText(self, label=_("API Key:")), 0, wx.ALIGN_CENTER_VERTICAL)
self.apiKeyCtrl = wx.TextCtrl(self, value=self.config_data.get("api_key", ""), style=wx.TE_PASSWORD)
flexSizer.Add(self.apiKeyCtrl, 1, wx.EXPAND)

# Model Name
flexSizer.Add(wx.StaticText(self, label=_("Model Name:")), 0, wx.ALIGN_CENTER_VERTICAL)
self.modelCtrl = wx.TextCtrl(self, value=self.config_data.get("model", "gpt-4o-mini"))
flexSizer.Add(self.modelCtrl, 1, wx.EXPAND)

# Custom Prompt
flexSizer.Add(wx.StaticText(self, label=_("Custom Prompt (Optional):")), 0, wx.ALIGN_CENTER_VERTICAL)
self.promptCtrl = wx.TextCtrl(self, value=self.config_data.get("prompt", ""))
flexSizer.Add(self.promptCtrl, 1, wx.EXPAND)

mainSizer.Add(flexSizer, 1, wx.ALL | wx.EXPAND, 15)

# Buttons
btnSizer = self.CreateButtonSizer(wx.OK | wx.CANCEL)
mainSizer.Add(btnSizer, 0, wx.ALL | wx.CENTER, 10)

self.SetSizer(mainSizer)

def get_config(self):
return {
"architectures": ["CustomEndpoint"],
"endpoint": self.endpointCtrl.GetValue().strip(),
"api_key": self.apiKeyCtrl.GetValue().strip(),
"model": self.modelCtrl.GetValue().strip(),
"prompt": self.promptCtrl.GetValue().strip() or None
}

def show_config_dialog(parent, config_path):
"""Show the config dialog and save if OK is pressed."""
dlg = CustomEndpointConfigDialog(parent, config_path)
if dlg.ShowModal() == wx.ID_OK:
config = dlg.get_config()
# Ensure directory exists
os.makedirs(os.path.dirname(config_path), exist_ok=True)
with open(config_path, "w", encoding="utf-8") as f:
json.dump(config, f, indent=4, ensure_ascii=False)
return True
return False

def is_config_valid(config_path):
"""Check if the config file exists and has the required fields."""
if not os.path.exists(config_path):
return False
try:
with open(config_path, "r", encoding="utf-8") as f:
config = json.load(f)
return all(k in config for k in ["endpoint", "model"])
except Exception:
return False
Loading
Loading