diff --git a/.github/workflows/huggingface-nightly.yml b/.github/workflows/huggingface-nightly.yml index 3ef74b3ec2..8c7976acd4 100644 --- a/.github/workflows/huggingface-nightly.yml +++ b/.github/workflows/huggingface-nightly.yml @@ -7,13 +7,12 @@ on: workflow_dispatch: inputs: upload_to: - description: "Where to upload (none/testpypi/pypi)" + description: "Where to upload (none/pypi)" required: true - default: "testpypi" + default: "pypi" type: choice options: - none - - testpypi - pypi skip_existing: description: "Skip already-uploaded versions" @@ -90,12 +89,15 @@ jobs: - id: nemotron-page-elements-v3 url: https://huggingface.co/nvidia/nemotron-page-elements-v3 project_subdir: "" + nightly_base_version: "3.0.2" - id: nemotron-table-structure-v1 url: https://huggingface.co/nvidia/nemotron-table-structure-v1 project_subdir: "" + nightly_base_version: "1.0.1" - id: nemotron-graphic-elements-v1 url: https://huggingface.co/nvidia/nemotron-graphic-elements-v1 project_subdir: "" + nightly_base_version: "1.0.1" steps: - name: Checkout orchestrator repo @@ -117,8 +119,8 @@ jobs: shell: bash run: | set -euo pipefail - # Default for scheduled runs: testpypi - upload_to="testpypi" + # Default for scheduled runs: pypi + upload_to="pypi" skip_existing="true" if [[ "${{ github.event_name }}" == "workflow_dispatch" ]]; then upload_to="${{ inputs.upload_to }}" @@ -130,25 +132,19 @@ jobs: - name: Build (and maybe upload) env: NIGHTLY_DATE_SUFFIX: ${{ needs.nightly_coordinate.outputs.nightly_date_suffix }} - TEST_PYPI_API_TOKEN: ${{ secrets.TEST_PYPI_API_TOKEN }} PYPI_API_TOKEN: ${{ secrets.PYPI_API_TOKEN }} shell: bash run: | set -euo pipefail upload_flag="" - repo_url="https://test.pypi.org/legacy/" - token_env="TEST_PYPI_API_TOKEN" + repo_url="https://upload.pypi.org/legacy/" + token_env="PYPI_API_TOKEN" if [[ "${{ steps.target.outputs.upload_to }}" == "none" ]]; then upload_flag="" else upload_flag="--upload" fi - if [[ "${{ steps.target.outputs.upload_to }}" == "pypi" ]]; then - repo_url="https://upload.pypi.org/legacy/" - token_env="PYPI_API_TOKEN" - fi - skip_existing_flag="" if [[ "${{ steps.target.outputs.skip_existing }}" == "true" ]]; then skip_existing_flag="--skip-existing" @@ -160,6 +156,7 @@ jobs: --work-dir ".work" \ --dist-dir "dist-out" \ --project-subdir "${{ matrix.repo.project_subdir }}" \ + --nightly-base-version "${{ matrix.repo.nightly_base_version }}" \ ${upload_flag} \ --repository-url "${repo_url}" \ --token-env "${token_env}" \ @@ -228,8 +225,8 @@ jobs: shell: bash run: | set -euo pipefail - # Default for scheduled runs: testpypi - upload_to="testpypi" + # Default for scheduled runs: pypi + upload_to="pypi" skip_existing="true" if [[ "${{ github.event_name }}" == "workflow_dispatch" ]]; then upload_to="${{ inputs.upload_to }}" @@ -241,7 +238,6 @@ jobs: - name: Build ${{ matrix.ocr.id }} (and maybe upload) env: NIGHTLY_DATE_SUFFIX: ${{ needs.nightly_coordinate.outputs.nightly_date_suffix }} - TEST_PYPI_API_TOKEN: ${{ secrets.TEST_PYPI_API_TOKEN }} PYPI_API_TOKEN: ${{ secrets.PYPI_API_TOKEN }} CUDA_HOME: /usr/local/cuda BUILD_CPP_EXTENSION: "1" @@ -257,18 +253,13 @@ jobs: set -euo pipefail upload_flag="" - repo_url="https://test.pypi.org/legacy/" - token_env="TEST_PYPI_API_TOKEN" + repo_url="https://upload.pypi.org/legacy/" + token_env="PYPI_API_TOKEN" if [[ "${{ steps.target.outputs.upload_to }}" == "none" ]]; then upload_flag="" else upload_flag="--upload" fi - if [[ "${{ steps.target.outputs.upload_to }}" == "pypi" ]]; then - repo_url="https://upload.pypi.org/legacy/" - token_env="PYPI_API_TOKEN" - fi - skip_existing_flag="" if [[ "${{ steps.target.outputs.skip_existing }}" == "true" ]]; then skip_existing_flag="--skip-existing" @@ -428,7 +419,7 @@ jobs: "-linux_aarch64.whl" ): raise SystemExit( - "Wheel still has a bare linux_* tag; TestPyPI rejects these. " + "Wheel still has a bare linux_* tag; PyPI rejects these. " "auditwheel repair should emit manylinux_*. Got: " f"{wheel.name}" ) diff --git a/.github/workflows/pypi-nightly-publish.yml b/.github/workflows/pypi-nightly-publish.yml index 06f5509f16..2dcc55e1d6 100644 --- a/.github/workflows/pypi-nightly-publish.yml +++ b/.github/workflows/pypi-nightly-publish.yml @@ -25,13 +25,12 @@ on: - dev - release upload_to: - description: 'Where to upload (none/testpypi/pypi)' + description: 'Where to upload (none/pypi)' required: true - default: testpypi + default: pypi type: choice options: - none - - testpypi - pypi skip_existing: description: 'Skip already-uploaded versions' @@ -39,6 +38,9 @@ on: default: true type: boolean +permissions: + contents: read + jobs: build: runs-on: linux-large-disk @@ -48,8 +50,8 @@ jobs: shell: bash run: | set -euo pipefail - # Default for scheduled runs: testpypi - upload_to="testpypi" + # Default for scheduled runs: pypi + upload_to="pypi" skip_existing="true" if [[ "${{ github.event_name }}" == "workflow_dispatch" ]]; then upload_to="${{ inputs.upload_to }}" @@ -124,20 +126,13 @@ jobs: - name: Publish wheels env: - TEST_PYPI_API_TOKEN: ${{ secrets.TEST_PYPI_API_TOKEN }} - PYPI_API_TOKEN: ${{ secrets.PYPI_API_TOKEN }} + TWINE_PASSWORD: ${{ secrets.PYPI_API_TOKEN }} shell: bash run: | set -euo pipefail upload_to="${{ steps.target.outputs.upload_to }}" - repository_url="https://test.pypi.org/legacy/" - token="${TEST_PYPI_API_TOKEN:-}" - if [[ "${upload_to}" == "pypi" ]]; then - repository_url="https://upload.pypi.org/legacy/" - token="${PYPI_API_TOKEN:-}" - fi - + repository_url="https://upload.pypi.org/legacy/" if [[ "${upload_to}" == "none" ]]; then echo "upload_to=none; skipping package upload." exit 0 @@ -148,4 +143,4 @@ jobs: skip_existing_flag="--skip-existing" fi - twine upload ${skip_existing_flag} --repository-url "${repository_url}" -u __token__ -p "${token}" nemo_retriever/dist/* + twine upload ${skip_existing_flag} --repository-url "${repository_url}" -u __token__ nemo_retriever/dist/* diff --git a/ci/scripts/nightly_build_publish.py b/ci/scripts/nightly_build_publish.py index 85d996c475..ee2a076d01 100644 --- a/ci/scripts/nightly_build_publish.py +++ b/ci/scripts/nightly_build_publish.py @@ -650,7 +650,7 @@ def _build( def _auditwheel_repair_dist_dir(dist_dir: Path, *, exclude_libs: list[str] | None = None) -> None: """ - Rewrite linux_* wheels to manylinux_* so TestPyPI/PyPI accept the upload. + Rewrite linux_* wheels to manylinux_* so PyPI accepts the upload. Requires ``patchelf`` on PATH (e.g. apt install patchelf). *exclude_libs* is a list of shared library basenames (e.g. ``libtorch_cpu.so``) @@ -804,8 +804,8 @@ def main() -> int: "before building (repeatable; useful for ABI-coupled deps like torch)", ) ap.add_argument("--upload", action="store_true", help="Upload built dists via twine") - ap.add_argument("--repository-url", default="https://test.pypi.org/legacy/", help="Twine repository URL") - ap.add_argument("--token-env", default="TEST_PYPI_API_TOKEN", help="Env var containing API token") + ap.add_argument("--repository-url", default="https://upload.pypi.org/legacy/", help="Twine repository URL") + ap.add_argument("--token-env", default="PYPI_API_TOKEN", help="Env var containing API token") ap.add_argument("--skip-existing", action="store_true", help="Pass --skip-existing to twine") ap.add_argument( "--twine-verbose", @@ -821,7 +821,7 @@ def main() -> int: ap.add_argument( "--auditwheel-repair", action="store_true", - help="Run auditwheel repair on built wheels (manylinux tag; needed for PyPI/TestPyPI)", + help="Run auditwheel repair on built wheels (manylinux tag; needed for PyPI)", ) ap.add_argument( "--auditwheel-exclude", diff --git a/ci/tests/test_huggingface_release_workflow.py b/ci/tests/test_huggingface_release_workflow.py index 55e17cd44b..c156b755d3 100644 --- a/ci/tests/test_huggingface_release_workflow.py +++ b/ci/tests/test_huggingface_release_workflow.py @@ -113,3 +113,21 @@ def test_huggingface_workflow_has_manual_stable_ocr_release_controls() -> None: assert "--release-version" in workflow assert 'expected_version="${INPUT_RELEASE_VERSION}"' in workflow assert "Built wheel metadata does not declare expected version" in workflow + + +def test_huggingface_non_ocr_nightlies_are_versioned_after_current_stable() -> None: + workflow = (REPO_ROOT / ".github" / "workflows" / "huggingface-nightly.yml").read_text(encoding="utf-8") + + assert '--nightly-base-version "${{ matrix.repo.nightly_base_version }}"' in workflow + assert "id: nemotron-page-elements-v3" in workflow + assert 'nightly_base_version: "3.0.2"' in workflow + assert "id: nemotron-table-structure-v1" in workflow + assert workflow.count('nightly_base_version: "1.0.1"') == 2 + assert "id: nemotron-graphic-elements-v1" in workflow + + +def test_huggingface_nightly_builder_defaults_to_public_pypi() -> None: + script = (REPO_ROOT / "ci" / "scripts" / "nightly_build_publish.py").read_text(encoding="utf-8") + + assert 'default="https://upload.pypi.org/legacy/"' in script + assert 'default="PYPI_API_TOKEN"' in script diff --git a/nemo_retriever/README.md b/nemo_retriever/README.md index 94f34b2f82..da5e4f9f4f 100644 --- a/nemo_retriever/README.md +++ b/nemo_retriever/README.md @@ -47,6 +47,14 @@ source retriever/bin/activate uv pip install "nemo-retriever[local]==26.05-RC1" ``` +The `[local]` extra resolves stable Nemotron extraction packages by default. To +try prerelease/nightly Nemotron packages from PyPI within the same supported +major-version windows, opt in with `--pre`: + +```bash +uv pip install --pre "nemo-retriever[local]==26.05-RC1" +``` + Install matching **ingestion client** and **ingestion runtime** wheels at the same version when your workflow expects them (see the [NeMo Retriever Library prerequisites](https://docs.nvidia.com/nemo/retriever/latest/extraction/overview/) for the exact PyPI coordinates for your release). For **remote NIM inference only** (no local GPU required), the base package is sufficient: diff --git a/nemo_retriever/pyproject.toml b/nemo_retriever/pyproject.toml index 577974ffcf..2f996e0424 100644 --- a/nemo_retriever/pyproject.toml +++ b/nemo_retriever/pyproject.toml @@ -102,9 +102,8 @@ service = [ ] # ── Local model inference (GPU assumed; torch resolves to CUDA on Linux) ───── -# Adds HuggingFace transformers, torch, nemotron models, GPU monitoring, and vLLM. +# Stable Nemotron extraction package selection for published local installs. local = [ - "glom", "transformers>=4.57.6,<5", "tokenizers>=0.21.1", @@ -123,9 +122,9 @@ local = [ "scikit-learn>=1.6.0", "timm==1.0.22", "albumentations==2.0.8", - "nemotron-page-elements-v3==3.0.1", - "nemotron-graphic-elements-v1==1.0.0", - "nemotron-table-structure-v1==1.0.0", + "nemotron-page-elements-v3>=3.0.1,<4", + "nemotron-graphic-elements-v1>=1.0.0,<2", + "nemotron-table-structure-v1>=1.0.0,<2", "nemotron-ocr>=2.0.0,<3; sys_platform == 'linux' and (platform_machine == 'x86_64' or platform_machine == 'aarch64')", "nvidia-ml-py", "apscheduler>=3.10", @@ -197,6 +196,19 @@ all = [ retriever = "nemo_retriever.__main__:main" retriever-harness = "nemo_retriever.harness:main" +# uv-only developer install: include every published extra plus the published +# dev extra, then overlay the prerelease Nemotron selection without exposing it +# in wheel metadata. +[dependency-groups] +dev = [ + "nemo_retriever[all]", + "nemo_retriever[dev]", + "nemotron-page-elements-v3>=3.0.1.dev0,!=3.0.1,!=3.0.2,<4", + "nemotron-graphic-elements-v1>=1.0.1.dev0,!=1.0.1,<2", + "nemotron-table-structure-v1>=1.0.1.dev0,!=1.0.1,<2", + "nemotron-ocr>=2.0.1.dev0,!=2.0.1,<3; sys_platform == 'linux' and (platform_machine == 'x86_64' or platform_machine == 'aarch64')", +] + [tool.setuptools.dynamic] version = {attr = "nemo_retriever.version.get_build_version"} @@ -209,11 +221,6 @@ torch = [ torchvision = [ { index = "pytorch-cu130", marker = "sys_platform == 'linux' or sys_platform == 'win32'" }, ] -[[tool.uv.index]] -name = "test-pypi" -url = "https://test.pypi.org/simple/" -explicit = true - [[tool.uv.index]] name = "pytorch-cu130" url = "https://download.pytorch.org/whl/cu130" diff --git a/nemo_retriever/tests/test_ci_workflows.py b/nemo_retriever/tests/test_ci_workflows.py index ee72ec8d14..a4b2a252ad 100644 --- a/nemo_retriever/tests/test_ci_workflows.py +++ b/nemo_retriever/tests/test_ci_workflows.py @@ -127,6 +127,40 @@ def test_legacy_ghcr_push_publish_workflow_is_removed(): assert not (WORKFLOWS / "docker-build-publish-retriever.yml").exists() +@requires_workflows +def test_public_nightly_python_publish_workflows_do_not_target_testpypi(): + workflow_names = ("pypi-nightly-publish.yml", "huggingface-nightly.yml") + + for workflow_name in workflow_names: + workflow = (WORKFLOWS / workflow_name).read_text(encoding="utf-8") + + assert "testpypi" not in workflow.lower(), workflow_name + assert "test.pypi.org" not in workflow.lower(), workflow_name + assert "https://upload.pypi.org/legacy/" in workflow, workflow_name + assert "PYPI_API_TOKEN" in workflow, workflow_name + + +@requires_workflows +def test_public_nightly_python_publish_workflows_use_read_only_token_permissions(): + workflow_names = ("pypi-nightly-publish.yml", "huggingface-nightly.yml") + + for workflow_name in workflow_names: + workflow = _load_workflow(workflow_name) + + assert workflow["permissions"] == {"contents": "read"}, workflow_name + + +@requires_workflows +def test_pypi_nightly_publish_uses_twine_password_env(): + workflow = _load_workflow("pypi-nightly-publish.yml") + steps = workflow["jobs"]["build"]["steps"] + publish_step = next(step for step in steps if step.get("name") == "Publish wheels") + + assert publish_step["env"] == {"TWINE_PASSWORD": "${{ secrets.PYPI_API_TOKEN }}"} + assert ' -p "${token}"' not in publish_step["run"] + assert "PYPI_API_TOKEN" not in publish_step["run"] + + def test_legacy_nv_ingest_root_compose_stack_is_removed(): legacy_paths = ( "docker-compose.yaml", diff --git a/nemo_retriever/tests/test_nemotron_ocr_v2_nightly.py b/nemo_retriever/tests/test_nemotron_ocr_v2_nightly.py index fd41e6bb11..7ffc9892eb 100644 --- a/nemo_retriever/tests/test_nemotron_ocr_v2_nightly.py +++ b/nemo_retriever/tests/test_nemotron_ocr_v2_nightly.py @@ -65,28 +65,86 @@ def __init__(self, **kwargs: object) -> None: return captured_kwargs -def test_local_extra_accepts_stable_ocr_2_and_newer_dev_releases() -> None: +def _requirement(dependencies: list[str], name: str) -> Requirement: + return next(Requirement(dep) for dep in dependencies if Requirement(dep).name == name) + + +def test_local_extra_is_stable_and_uv_dev_group_uses_nightly_nemotron_specs() -> None: pyproject = tomllib.loads((PROJECT_ROOT / "pyproject.toml").read_text(encoding="utf-8")) - local_deps = pyproject["project"]["optional-dependencies"]["local"] + optional_deps = pyproject["project"]["optional-dependencies"] + dependency_groups = pyproject["dependency-groups"] + local_deps = optional_deps["local"] + dev_group = dependency_groups["dev"] + all_deps = optional_deps["all"] uv_tool = pyproject["tool"]["uv"] uv_sources = uv_tool["sources"] - - ocr_dep = next(dep for dep in local_deps if dep.startswith("nemotron-ocr")) - ocr_requirement = Requirement(ocr_dep) - - assert ocr_requirement.specifier.contains("2.0.0") - assert not ocr_requirement.specifier.contains("3.0.0") - assert ocr_requirement.specifier.contains("2.0.1") - assert not ocr_requirement.specifier.contains("1.0.1") - assert str(ocr_requirement.marker) == ( + uv_indexes = uv_tool["index"] + + assert "local-nightly" not in optional_deps + assert "local-base" not in optional_deps + assert "conflicts" not in uv_tool + assert "nemo_retriever[all]" in dev_group + assert "nemo_retriever[dev]" in dev_group + assert "uv-local" not in dependency_groups + assert {"include-group": "uv-local"} not in dev_group + assert "build>=1.2.2" not in dev_group + assert "pytest>=8.0.2" not in dev_group + assert "tritonclient" in local_deps + dev_nemotron_deps = [dep for dep in dev_group if isinstance(dep, str) and dep.startswith("nemotron-")] + assert len(dev_nemotron_deps) == 4 + assert any("local" in dep for dep in all_deps) + assert not any("local-nightly" in dep or "uv-local" in dep for dep in all_deps) + + stable_page_requirement = _requirement(local_deps, "nemotron-page-elements-v3") + assert stable_page_requirement.specifier.contains("3.0.1") + assert stable_page_requirement.specifier.contains("3.5.0") + assert stable_page_requirement.specifier.contains("3.0.2.dev1", prereleases=True) + assert not stable_page_requirement.specifier.contains("4.0.0") + + for package in ("nemotron-graphic-elements-v1", "nemotron-table-structure-v1"): + stable_requirement = _requirement(local_deps, package) + assert stable_requirement.specifier.contains("1.0.0") + assert stable_requirement.specifier.contains("1.5.0") + assert stable_requirement.specifier.contains("1.0.1.dev1", prereleases=True) + assert not stable_requirement.specifier.contains("2.0.0") + + stable_ocr_requirement = _requirement(local_deps, "nemotron-ocr") + assert stable_ocr_requirement.specifier.contains("2.0.0") + assert stable_ocr_requirement.specifier.contains("2.5.0") + assert stable_ocr_requirement.specifier.contains("2.0.1.dev1", prereleases=True) + assert not stable_ocr_requirement.specifier.contains("3.0.0") + assert not stable_ocr_requirement.specifier.contains("1.0.1") + assert str(stable_ocr_requirement.marker) == ( 'sys_platform == "linux" and (platform_machine == "x86_64" or platform_machine == "aarch64")' ) - assert not any(dep.startswith("nemotron-ocr-v2") for dep in local_deps) + + nightly_page_requirement = _requirement(dev_nemotron_deps, "nemotron-page-elements-v3") + assert nightly_page_requirement.specifier.contains("3.0.1.dev1", prereleases=True) + assert nightly_page_requirement.specifier.contains("3.0.2.dev1", prereleases=True) + assert not nightly_page_requirement.specifier.contains("3.0.1", prereleases=True) + assert not nightly_page_requirement.specifier.contains("3.0.2", prereleases=True) + + for package in ("nemotron-graphic-elements-v1", "nemotron-table-structure-v1"): + nightly_requirement = _requirement(dev_nemotron_deps, package) + assert not nightly_requirement.specifier.contains("1.0.0.dev1", prereleases=True) + assert nightly_requirement.specifier.contains("1.0.1.dev1", prereleases=True) + assert not nightly_requirement.specifier.contains("1.0.0", prereleases=True) + assert not nightly_requirement.specifier.contains("1.0.1", prereleases=True) + + nightly_ocr_requirement = _requirement(dev_nemotron_deps, "nemotron-ocr") + assert nightly_ocr_requirement.specifier.contains("2.0.1.dev1", prereleases=True) + assert not nightly_ocr_requirement.specifier.contains("2.0.0", prereleases=True) + assert not nightly_ocr_requirement.specifier.contains("2.0.1", prereleases=True) + assert str(nightly_ocr_requirement.marker) == str(stable_ocr_requirement.marker) + + assert not any(dep.startswith("nemotron-ocr-v2") for dep in local_deps + dev_nemotron_deps) assert "nemotron-ocr" in uv_tool["no-build-package"] assert "nemotron-ocr-v2" not in uv_tool["no-build-package"] assert "nemotron-ocr" not in uv_sources assert "nemotron-ocr-v2" not in uv_sources + assert "test-pypi" not in {index["name"] for index in uv_indexes} + assert all("test.pypi.org" not in index["url"] for index in uv_indexes) def test_local_ocr_v2_wrapper_uses_original_namespace_and_package_lang_selectors() -> None: diff --git a/nemo_retriever/uv.lock b/nemo_retriever/uv.lock index 0090d174e8..99922eb8e8 100644 --- a/nemo_retriever/uv.lock +++ b/nemo_retriever/uv.lock @@ -2,14 +2,14 @@ version = 1 revision = 3 requires-python = "==3.12.*" resolution-markers = [ - "platform_machine != 'aarch64' and platform_machine != 's390x' and platform_machine != 'x86_64' and sys_platform == 'linux'", + "platform_machine != 's390x' and sys_platform == 'linux'", "platform_machine == 's390x' and sys_platform == 'linux'", - "platform_machine == 'x86_64' and sys_platform == 'linux'", - "platform_machine == 'aarch64' and sys_platform == 'linux'", "platform_machine != 's390x' and sys_platform == 'win32'", "platform_machine == 's390x' and sys_platform == 'win32'", - "platform_machine != 's390x' and sys_platform != 'linux' and sys_platform != 'win32'", - "platform_machine == 's390x' and sys_platform != 'linux' and sys_platform != 'win32'", + "platform_machine != 's390x' and sys_platform == 'darwin'", + "platform_machine == 's390x' and sys_platform == 'darwin'", + "platform_machine != 's390x' and sys_platform != 'darwin' and sys_platform != 'linux' and sys_platform != 'win32'", + "platform_machine == 's390x' and sys_platform != 'darwin' and sys_platform != 'linux' and sys_platform != 'win32'", ] [manifest] @@ -386,7 +386,7 @@ name = "build" version = "1.5.0" source = { registry = "https://pypi.org/simple" } dependencies = [ - { name = "colorama", marker = "os_name == 'nt' and sys_platform != 'linux'" }, + { name = "colorama", marker = "os_name == 'nt' and sys_platform != 'darwin' and sys_platform != 'linux'" }, { name = "packaging" }, { name = "pyproject-hooks" }, ] @@ -2078,8 +2078,10 @@ source = { registry = "https://pypi.org/simple" } resolution-markers = [ "platform_machine != 's390x' and sys_platform == 'win32'", "platform_machine == 's390x' and sys_platform == 'win32'", - "platform_machine != 's390x' and sys_platform != 'linux' and sys_platform != 'win32'", - "platform_machine == 's390x' and sys_platform != 'linux' and sys_platform != 'win32'", + "platform_machine != 's390x' and sys_platform == 'darwin'", + "platform_machine == 's390x' and sys_platform == 'darwin'", + "platform_machine != 's390x' and sys_platform != 'darwin' and sys_platform != 'linux' and sys_platform != 'win32'", + "platform_machine == 's390x' and sys_platform != 'darwin' and sys_platform != 'linux' and sys_platform != 'win32'", ] sdist = { url = "https://files.pythonhosted.org/packages/89/6a/95a3d3610d5c75293d5dbbb2a76480d5d4eeba641557b69fe90af6c5b84e/llvmlite-0.44.0.tar.gz", hash = "sha256:07667d66a5d150abed9157ab6c0b9393c9356f229784a4385c02f99e94fc94d4", size = 171880, upload-time = "2025-01-20T11:14:41.342Z" } wheels = [ @@ -2093,10 +2095,8 @@ name = "llvmlite" version = "0.47.0" source = { registry = "https://pypi.org/simple" } resolution-markers = [ - "platform_machine != 'aarch64' and platform_machine != 's390x' and platform_machine != 'x86_64' and sys_platform == 'linux'", + "platform_machine != 's390x' and sys_platform == 'linux'", "platform_machine == 's390x' and sys_platform == 'linux'", - "platform_machine == 'x86_64' and sys_platform == 'linux'", - "platform_machine == 'aarch64' and sys_platform == 'linux'", ] sdist = { url = "https://files.pythonhosted.org/packages/01/88/a8952b6d5c21e74cbf158515b779666f692846502623e9e3c39d8e8ba25f/llvmlite-0.47.0.tar.gz", hash = "sha256:62031ce968ec74e95092184d4b0e857e444f8fdff0b8f9213707699570c33ccc", size = 193614, upload-time = "2026-03-31T18:29:53.497Z" } wheels = [ @@ -2571,6 +2571,15 @@ tabular = [ { name = "neo4j" }, ] +[package.dev-dependencies] +dev = [ + { name = "nemo-retriever", extra = ["all", "dev"] }, + { name = "nemotron-graphic-elements-v1" }, + { name = "nemotron-ocr", marker = "(platform_machine == 'aarch64' and sys_platform == 'linux') or (platform_machine == 'x86_64' and sys_platform == 'linux')" }, + { name = "nemotron-page-elements-v3" }, + { name = "nemotron-table-structure-v1" }, +] + [package.metadata] requires-dist = [ { name = "accelerate", marker = "extra == 'local'", specifier = "==1.12.0" }, @@ -2608,10 +2617,10 @@ requires-dist = [ { name = "litellm", marker = "extra == 'llm'", specifier = ">=1.86.0,<2" }, { name = "markitdown" }, { name = "nemo-retriever", extras = ["benchmarks", "llm", "local", "multimedia", "nemotron-parse", "service", "tabular"], marker = "extra == 'all'" }, - { name = "nemotron-graphic-elements-v1", marker = "extra == 'local'", specifier = "==1.0.0" }, + { name = "nemotron-graphic-elements-v1", marker = "extra == 'local'", specifier = ">=1.0.0,<2" }, { name = "nemotron-ocr", marker = "(platform_machine == 'aarch64' and sys_platform == 'linux' and extra == 'local') or (platform_machine == 'x86_64' and sys_platform == 'linux' and extra == 'local')", specifier = ">=2.0.0,<3" }, - { name = "nemotron-page-elements-v3", marker = "extra == 'local'", specifier = "==3.0.1" }, - { name = "nemotron-table-structure-v1", marker = "extra == 'local'", specifier = "==1.0.0" }, + { name = "nemotron-page-elements-v3", marker = "extra == 'local'", specifier = ">=3.0.1,<4" }, + { name = "nemotron-table-structure-v1", marker = "extra == 'local'", specifier = ">=1.0.0,<2" }, { name = "neo4j", marker = "extra == 'tabular'", specifier = ">=5.0" }, { name = "nltk", specifier = "==3.9.4" }, { name = "numpy", specifier = ">=1.26.0" }, @@ -2658,9 +2667,19 @@ requires-dist = [ ] provides-extras = ["service", "local", "multimedia", "nemotron-parse", "tabular", "benchmarks", "llm", "dev", "all"] +[package.metadata.requires-dev] +dev = [ + { name = "nemo-retriever", extras = ["all"] }, + { name = "nemo-retriever", extras = ["dev"] }, + { name = "nemotron-graphic-elements-v1", specifier = ">=1.0.1.dev0,!=1.0.1,<2" }, + { name = "nemotron-ocr", marker = "(platform_machine == 'aarch64' and sys_platform == 'linux') or (platform_machine == 'x86_64' and sys_platform == 'linux')", specifier = ">=2.0.1.dev0,!=2.0.1,<3" }, + { name = "nemotron-page-elements-v3", specifier = ">=3.0.1.dev0,!=3.0.1,!=3.0.2,<4" }, + { name = "nemotron-table-structure-v1", specifier = ">=1.0.1.dev0,!=1.0.1,<2" }, +] + [[package]] name = "nemotron-graphic-elements-v1" -version = "1.0.0" +version = "1.0.1.dev20260608195023" source = { registry = "https://pypi.org/simple" } dependencies = [ { name = "huggingface-hub" }, @@ -2671,31 +2690,32 @@ dependencies = [ { name = "torch", version = "2.11.0", source = { registry = "https://pypi.org/simple" }, marker = "sys_platform != 'linux' and sys_platform != 'win32'" }, { name = "torch", version = "2.11.0+cu130", source = { registry = "https://download.pytorch.org/whl/cu130" }, marker = "sys_platform == 'linux' or sys_platform == 'win32'" }, ] +sdist = { url = "https://files.pythonhosted.org/packages/da/91/cd578f94cf70708ee65a3342e7954a683757332b231cc7378806ddfcc79f/nemotron_graphic_elements_v1-1.0.1.dev20260608195023.tar.gz", hash = "sha256:7c7412632d01e28af35cb2b1a8671e10dd2e13ca93cd311e6dc81ed82ebd396c", size = 40143, upload-time = "2026-06-08T19:50:56.548Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/79/13/6d9b9c06aa58fe9c558dabb6d50532dbcfe98eba32e2e8975da2f83d01b7/nemotron_graphic_elements_v1-1.0.0-py3-none-any.whl", hash = "sha256:806b37f4fd740786105cf160769dd5506ca5dce8b4c65847b656e0ffd9cff5d6", size = 28738, upload-time = "2025-12-19T16:26:48.803Z" }, + { url = "https://files.pythonhosted.org/packages/20/27/49cbae1da591806231cb802dcc0640a999718747c73de1826d4b1856a4c8/nemotron_graphic_elements_v1-1.0.1.dev20260608195023-py3-none-any.whl", hash = "sha256:9ebc965bee30de76ed28a90e251e42d5e0f0eafe4b9da124d6202e750751fca5", size = 34239, upload-time = "2026-06-08T19:50:55.649Z" }, ] [[package]] name = "nemotron-ocr" -version = "2.0.0" +version = "2.0.1.dev20260608195023" source = { registry = "https://pypi.org/simple" } dependencies = [ - { name = "huggingface-hub", marker = "(platform_machine == 'aarch64' and sys_platform == 'linux') or (platform_machine == 'x86_64' and sys_platform == 'linux')" }, - { name = "numpy", marker = "(platform_machine == 'aarch64' and sys_platform == 'linux') or (platform_machine == 'x86_64' and sys_platform == 'linux')" }, - { name = "pillow", marker = "(platform_machine == 'aarch64' and sys_platform == 'linux') or (platform_machine == 'x86_64' and sys_platform == 'linux')" }, - { name = "shapely", marker = "(platform_machine == 'aarch64' and sys_platform == 'linux') or (platform_machine == 'x86_64' and sys_platform == 'linux')" }, - { name = "torch", version = "2.11.0+cu130", source = { registry = "https://download.pytorch.org/whl/cu130" }, marker = "(platform_machine == 'aarch64' and sys_platform == 'linux') or (platform_machine == 'x86_64' and sys_platform == 'linux')" }, - { name = "torchvision", version = "0.26.0+cu130", source = { registry = "https://download.pytorch.org/whl/cu130" }, marker = "(platform_machine == 'aarch64' and sys_platform == 'linux') or (platform_machine == 'x86_64' and sys_platform == 'linux')" }, + { name = "huggingface-hub", marker = "platform_machine != 's390x' and sys_platform == 'linux'" }, + { name = "numpy", marker = "platform_machine != 's390x' and sys_platform == 'linux'" }, + { name = "pillow", marker = "platform_machine != 's390x' and sys_platform == 'linux'" }, + { name = "shapely", marker = "platform_machine != 's390x' and sys_platform == 'linux'" }, + { name = "torch", version = "2.11.0+cu130", source = { registry = "https://download.pytorch.org/whl/cu130" }, marker = "platform_machine != 's390x' and sys_platform == 'linux'" }, + { name = "torchvision", version = "0.26.0+cu130", source = { registry = "https://download.pytorch.org/whl/cu130" }, marker = "platform_machine != 's390x' and sys_platform == 'linux'" }, ] -sdist = { url = "https://files.pythonhosted.org/packages/b7/ef/9dbba22f5de348a5f9c3af0488bf61258872926c40b7d513d71ef465b418/nemotron_ocr-2.0.0.tar.gz", hash = "sha256:84eb64f8af2ae12fbd83e38e482348ecce6a932b30946c873f8b8a95afae7355", size = 155817, upload-time = "2026-05-21T00:06:36.975Z" } +sdist = { url = "https://files.pythonhosted.org/packages/f0/56/746c4c70b76e008b529e711d785fb0f0864f6eb3e9b778e949957dfb7468/nemotron_ocr-2.0.1.dev20260608195023.tar.gz", hash = "sha256:5877235ccddd60d1ebb856c8c2524dc232e3fd5c7b9665559edf65da1d3f1868", size = 155961, upload-time = "2026-06-08T20:00:32.987Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/69/03/1d487d3bef63df377bd5f81311963ce24c4182984d47387bd8bf70f8ed20/nemotron_ocr-2.0.0-cp312-cp312-manylinux_2_34_aarch64.whl", hash = "sha256:8bd3afc1dbfaae67cf20ec06b95d48056db8372e66fb46212cc302775734cb54", size = 36094927, upload-time = "2026-05-21T00:07:05.772Z" }, - { url = "https://files.pythonhosted.org/packages/1d/89/547df1d8c4a7fd97b49fc662078707d1f8b5740ce29fbb94db4cc6a3abd1/nemotron_ocr-2.0.0-cp312-cp312-manylinux_2_34_x86_64.whl", hash = "sha256:fd5cf31259e236dd213edd36a4cdace2d4afc1972a5fad26e457804b1752d7de", size = 36806333, upload-time = "2026-05-21T00:06:34.545Z" }, + { url = "https://files.pythonhosted.org/packages/6d/8a/d48edafe9db5ae63e4339b4c34a4dbcf77692da5ea90d7eb58c0944d3b0a/nemotron_ocr-2.0.1.dev20260608195023-cp312-cp312-manylinux_2_34_aarch64.whl", hash = "sha256:c1fa421678ea93676701eee61c1779928cca9a2ab07d57df13d64d44e1e0d3b1", size = 36095213, upload-time = "2026-06-08T20:00:30.522Z" }, + { url = "https://files.pythonhosted.org/packages/c8/29/cfda79f914b49f73504d8aef8cc639562090f37918e5bcb67b8342b26ac6/nemotron_ocr-2.0.1.dev20260608195023-cp312-cp312-manylinux_2_34_x86_64.whl", hash = "sha256:83c9a2303f738eaa08363a35dcd86c8d85c725ae3fbdfaf9c7d9e5e7ada3fd0a", size = 36806594, upload-time = "2026-06-08T20:01:35.977Z" }, ] [[package]] name = "nemotron-page-elements-v3" -version = "3.0.1" +version = "3.0.2.dev20260608195023" source = { registry = "https://pypi.org/simple" } dependencies = [ { name = "huggingface-hub" }, @@ -2715,13 +2735,14 @@ dependencies = [ { name = "torchvision", version = "0.26.0+cu130", source = { registry = "https://download.pytorch.org/whl/cu130" }, marker = "sys_platform == 'linux' or sys_platform == 'win32'" }, { name = "tqdm" }, ] +sdist = { url = "https://files.pythonhosted.org/packages/1b/61/205fe9dfc0881e26b014a1cbef1892f73bb3f4db0595b755c7dce0321fb5/nemotron_page_elements_v3-3.0.2.dev20260608195023.tar.gz", hash = "sha256:d7f80a3346cbdd4ca10dfedec2eee1c22da50130c76554844772a4771fe9e03e", size = 44767, upload-time = "2026-06-08T19:51:02.209Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/62/e1/25e7c782b97113fc4a6bcedc8ec98899d9ee8e72f4320f524c93fd29747c/nemotron_page_elements_v3-3.0.1-py3-none-any.whl", hash = "sha256:d29c47e19594ae2c546634bfa5ceaeb17262752c3a0510137d6dec501cf29d99", size = 32761, upload-time = "2025-12-19T17:03:59.787Z" }, + { url = "https://files.pythonhosted.org/packages/c6/e3/f29c97f63862eb6b9f877e8ec3a6ac9539930301721999e41b785d34c425/nemotron_page_elements_v3-3.0.2.dev20260608195023-py3-none-any.whl", hash = "sha256:dbea7243eefe4b84b21debcd92d047d9f373400a60869fbeda831687ab0a33d7", size = 40034, upload-time = "2026-06-08T19:51:01.054Z" }, ] [[package]] name = "nemotron-table-structure-v1" -version = "1.0.0" +version = "1.0.1.dev20260608195023" source = { registry = "https://pypi.org/simple" } dependencies = [ { name = "huggingface-hub" }, @@ -2732,8 +2753,9 @@ dependencies = [ { name = "torch", version = "2.11.0", source = { registry = "https://pypi.org/simple" }, marker = "sys_platform != 'linux' and sys_platform != 'win32'" }, { name = "torch", version = "2.11.0+cu130", source = { registry = "https://download.pytorch.org/whl/cu130" }, marker = "sys_platform == 'linux' or sys_platform == 'win32'" }, ] +sdist = { url = "https://files.pythonhosted.org/packages/05/10/3f048a60f6cbd74e3d3865a10717d96e18373e022ae04fa1ba6c17ace173/nemotron_table_structure_v1-1.0.1.dev20260608195023.tar.gz", hash = "sha256:b77fc177a8ebfb6b5aac806bf401199a82835f62b3bbec85acc5a37dcbf7b506", size = 48628, upload-time = "2026-06-08T19:51:05.425Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/58/be/17551a3321df07138f8637e1481360e5f85407e3061af89a988da9f02f25/nemotron_table_structure_v1-1.0.0-py3-none-any.whl", hash = "sha256:e65b9fc66da9e7df30ef823ace23df36b377f27131c266a8adec005a775af3e3", size = 31832, upload-time = "2025-12-19T16:36:23.667Z" }, + { url = "https://files.pythonhosted.org/packages/ff/ed/12d25e22b3a0a988f4d8f0f96df4a7bc95bea4d3d86af66857d9f9261982/nemotron_table_structure_v1-1.0.1.dev20260608195023-py3-none-any.whl", hash = "sha256:6fb120d0a8920ee99d681441672cfb8b665cafa48e094415adc1109a64139406", size = 39015, upload-time = "2026-06-08T19:51:04.535Z" }, ] [[package]] @@ -2805,8 +2827,10 @@ source = { registry = "https://pypi.org/simple" } resolution-markers = [ "platform_machine != 's390x' and sys_platform == 'win32'", "platform_machine == 's390x' and sys_platform == 'win32'", - "platform_machine != 's390x' and sys_platform != 'linux' and sys_platform != 'win32'", - "platform_machine == 's390x' and sys_platform != 'linux' and sys_platform != 'win32'", + "platform_machine != 's390x' and sys_platform == 'darwin'", + "platform_machine == 's390x' and sys_platform == 'darwin'", + "platform_machine != 's390x' and sys_platform != 'darwin' and sys_platform != 'linux' and sys_platform != 'win32'", + "platform_machine == 's390x' and sys_platform != 'darwin' and sys_platform != 'linux' and sys_platform != 'win32'", ] dependencies = [ { name = "llvmlite", version = "0.44.0", source = { registry = "https://pypi.org/simple" }, marker = "sys_platform != 'linux'" }, @@ -2824,10 +2848,8 @@ name = "numba" version = "0.65.0" source = { registry = "https://pypi.org/simple" } resolution-markers = [ - "platform_machine != 'aarch64' and platform_machine != 's390x' and platform_machine != 'x86_64' and sys_platform == 'linux'", + "platform_machine != 's390x' and sys_platform == 'linux'", "platform_machine == 's390x' and sys_platform == 'linux'", - "platform_machine == 'x86_64' and sys_platform == 'linux'", - "platform_machine == 'aarch64' and sys_platform == 'linux'", ] dependencies = [ { name = "llvmlite", version = "0.47.0", source = { registry = "https://pypi.org/simple" }, marker = "sys_platform == 'linux'" }, @@ -3111,12 +3133,12 @@ name = "onnxruntime" version = "1.25.1" source = { registry = "https://pypi.org/simple" } resolution-markers = [ - "platform_machine != 'aarch64' and platform_machine != 's390x' and platform_machine != 'x86_64' and sys_platform == 'linux'", + "platform_machine != 's390x' and sys_platform == 'linux'", "platform_machine == 's390x' and sys_platform == 'linux'", - "platform_machine == 'x86_64' and sys_platform == 'linux'", - "platform_machine == 'aarch64' and sys_platform == 'linux'", - "platform_machine != 's390x' and sys_platform != 'linux' and sys_platform != 'win32'", - "platform_machine == 's390x' and sys_platform != 'linux' and sys_platform != 'win32'", + "platform_machine != 's390x' and sys_platform == 'darwin'", + "platform_machine == 's390x' and sys_platform == 'darwin'", + "platform_machine != 's390x' and sys_platform != 'darwin' and sys_platform != 'linux' and sys_platform != 'win32'", + "platform_machine == 's390x' and sys_platform != 'darwin' and sys_platform != 'linux' and sys_platform != 'win32'", ] dependencies = [ { name = "flatbuffers", marker = "sys_platform != 'win32'" }, @@ -3230,7 +3252,7 @@ name = "opencv-python" version = "4.13.0.92" source = { registry = "https://pypi.org/simple" } dependencies = [ - { name = "numpy", marker = "sys_platform != 'linux' and sys_platform != 'win32'" }, + { name = "numpy", marker = "sys_platform != 'darwin' and sys_platform != 'linux' and sys_platform != 'win32'" }, ] [[package]] @@ -4445,7 +4467,7 @@ name = "shapely" version = "2.1.2" source = { registry = "https://pypi.org/simple" } dependencies = [ - { name = "numpy", marker = "(platform_machine == 'aarch64' and sys_platform == 'linux') or (platform_machine == 'x86_64' and sys_platform == 'linux')" }, + { name = "numpy", marker = "platform_machine != 's390x' and sys_platform == 'linux'" }, ] sdist = { url = "https://files.pythonhosted.org/packages/4d/bc/0989043118a27cccb4e906a46b7565ce36ca7b57f5a18b78f4f1b0f72d9d/shapely-2.1.2.tar.gz", hash = "sha256:2ed4ecb28320a433db18a5bf029986aa8afcfd740745e78847e330d5d94922a9", size = 315489, upload-time = "2025-09-24T13:51:41.432Z" } wheels = [ @@ -4825,8 +4847,10 @@ name = "torch" version = "2.11.0" source = { registry = "https://pypi.org/simple" } resolution-markers = [ - "platform_machine != 's390x' and sys_platform != 'linux' and sys_platform != 'win32'", - "platform_machine == 's390x' and sys_platform != 'linux' and sys_platform != 'win32'", + "platform_machine != 's390x' and sys_platform == 'darwin'", + "platform_machine == 's390x' and sys_platform == 'darwin'", + "platform_machine != 's390x' and sys_platform != 'darwin' and sys_platform != 'linux' and sys_platform != 'win32'", + "platform_machine == 's390x' and sys_platform != 'darwin' and sys_platform != 'linux' and sys_platform != 'win32'", ] dependencies = [ { name = "filelock", marker = "sys_platform != 'linux' and sys_platform != 'win32'" }, @@ -4846,10 +4870,8 @@ name = "torch" version = "2.11.0+cu130" source = { registry = "https://download.pytorch.org/whl/cu130" } resolution-markers = [ - "platform_machine != 'aarch64' and platform_machine != 's390x' and platform_machine != 'x86_64' and sys_platform == 'linux'", + "platform_machine != 's390x' and sys_platform == 'linux'", "platform_machine == 's390x' and sys_platform == 'linux'", - "platform_machine == 'x86_64' and sys_platform == 'linux'", - "platform_machine == 'aarch64' and sys_platform == 'linux'", "platform_machine != 's390x' and sys_platform == 'win32'", "platform_machine == 's390x' and sys_platform == 'win32'", ] @@ -4902,8 +4924,10 @@ name = "torchvision" version = "0.26.0" source = { registry = "https://pypi.org/simple" } resolution-markers = [ - "platform_machine != 's390x' and sys_platform != 'linux' and sys_platform != 'win32'", - "platform_machine == 's390x' and sys_platform != 'linux' and sys_platform != 'win32'", + "platform_machine != 's390x' and sys_platform == 'darwin'", + "platform_machine == 's390x' and sys_platform == 'darwin'", + "platform_machine != 's390x' and sys_platform != 'darwin' and sys_platform != 'linux' and sys_platform != 'win32'", + "platform_machine == 's390x' and sys_platform != 'darwin' and sys_platform != 'linux' and sys_platform != 'win32'", ] dependencies = [ { name = "numpy", marker = "sys_platform != 'linux' and sys_platform != 'win32'" }, @@ -4919,10 +4943,8 @@ name = "torchvision" version = "0.26.0+cu130" source = { registry = "https://download.pytorch.org/whl/cu130" } resolution-markers = [ - "platform_machine != 'aarch64' and platform_machine != 's390x' and platform_machine != 'x86_64' and sys_platform == 'linux'", + "platform_machine != 's390x' and sys_platform == 'linux'", "platform_machine == 's390x' and sys_platform == 'linux'", - "platform_machine == 'x86_64' and sys_platform == 'linux'", - "platform_machine == 'aarch64' and sys_platform == 'linux'", "platform_machine != 's390x' and sys_platform == 'win32'", "platform_machine == 's390x' and sys_platform == 'win32'", ]