From 3a444d1eea38e6f4cc33057fcd75aa9ac96e7e97 Mon Sep 17 00:00:00 2001 From: John McCall Date: Thu, 11 Jun 2026 16:03:54 -0400 Subject: [PATCH 1/9] chore: deprecate overture_releases.yaml, migrate fetch script to STAC - Replace fetch-releases-from-s3.py with fetch_releases_from_stac.py driven by https://stac.overturemaps.org/catalog.json instead of S3 listing - Drop obstore dependency from requirements.txt - Add deprecation header to overture_releases.yaml (kept for existing consumers) - Add index.html landing page for labs.overturemaps.org/data (was 404) - Add unit tests (19) for fetch_releases_from_stac.py - Add test.yml CI workflow (runs on push + PR) - Update Python 3.11 -> 3.12 in both workflows Published bundle before/after: releases.json S3 listing (~30 entries) -> STAC catalog (~2 active releases) latest.ddb unchanged latest.dbb unchanged (legacy alias, now marked deprecated) registry-manifest unchanged overture_releases.yaml unchanged data, gains deprecation comment index.html new: directory landing page (was 404) Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> Signed-off-by: John McCall --- .github/workflows/build-manifests.yml | 11 +- .github/workflows/test.yml | 35 ++++ .gitignore | 3 + index.html | 193 +++++++++++++++++++ overture_releases.yaml | 3 + utils/fetch-releases-from-s3.py | 89 --------- utils/fetch_releases_from_stac.py | 76 ++++++++ utils/requirements-test.txt | 2 + utils/requirements.txt | 1 - utils/tests/__init__.py | 0 utils/tests/test_fetch_releases_from_stac.py | 168 ++++++++++++++++ 11 files changed, 486 insertions(+), 95 deletions(-) create mode 100644 .github/workflows/test.yml create mode 100644 .gitignore create mode 100644 index.html delete mode 100644 utils/fetch-releases-from-s3.py create mode 100644 utils/fetch_releases_from_stac.py create mode 100644 utils/requirements-test.txt create mode 100644 utils/tests/__init__.py create mode 100644 utils/tests/test_fetch_releases_from_stac.py diff --git a/.github/workflows/build-manifests.yml b/.github/workflows/build-manifests.yml index 51a9d8a..67cd9cc 100644 --- a/.github/workflows/build-manifests.yml +++ b/.github/workflows/build-manifests.yml @@ -1,4 +1,4 @@ -name: Fetch releases from S3 +name: Build and publish releases artifacts on: push: @@ -33,10 +33,10 @@ jobs: with: persist-credentials: false - - name: Set up Python 3.11 + - name: Set up Python 3.12 uses: actions/setup-python@a309ff8b426b58ec0e2a45f0f869d46889d02405 # v6.2.0 with: - python-version: "3.11" + python-version: "3.12" - name: Install dependencies run: | @@ -44,15 +44,16 @@ jobs: cd utils pip install -r requirements.txt - - name: Build releases.json and latest.dbb + - name: Build releases.json and latest.ddb run: | cd utils - python3 fetch-releases-from-s3.py + python3 fetch_releases_from_stac.py python3 simple-registry-manifest.py - name: Copy output to publish directory run: | mkdir publish + cp index.html publish/ cp utils/releases.json publish/ cp utils/registry-manifest.json publish/ cp utils/latest.ddb publish/ diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml new file mode 100644 index 0000000..2688aeb --- /dev/null +++ b/.github/workflows/test.yml @@ -0,0 +1,35 @@ +name: Test utils + +on: + push: + branches: main + pull_request: + +permissions: + contents: read + +jobs: + test: + name: Run unit tests + runs-on: ubuntu-latest + steps: + - name: Checkout + uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2 + with: + persist-credentials: false + + - name: Set up Python 3.12 + uses: actions/setup-python@a309ff8b426b58ec0e2a45f0f869d46889d02405 # v6.2.0 + with: + python-version: "3.12" + + - name: Install dependencies + run: | + python -m pip install --upgrade pip + cd utils + pip install -r requirements-test.txt + + - name: Run tests + run: | + cd utils + python -m pytest tests/ -v diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..41c33c8 --- /dev/null +++ b/.gitignore @@ -0,0 +1,3 @@ + +__pycache__/ +*.pyc diff --git a/index.html b/index.html new file mode 100644 index 0000000..664210a --- /dev/null +++ b/index.html @@ -0,0 +1,193 @@ + + + + + + Overture Maps Data + + + + + + + +
+

Overture Maps Foundation

+

labs.overturemaps.org/data: published release artifacts

+
+ +
+

Available Files

+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
FileDescription
releases.json deprecatedRelease list with latest pointer, supplanted by STAC (SpatioTemporal Asset Catalog)
latest.ddbDuckDB database with views pointing to the latest Overture release on S3
latest.dbb deprecatedAlias for latest.ddb (legacy filename)
registry-manifest.jsonParquet file manifest for the Overture GERS registry
overture_releases.yaml deprecatedHistorical release list, no longer maintained. Use STAC (SpatioTemporal Asset Catalog) instead
+ +

+ For authoritative release discovery, use the + STAC (SpatioTemporal Asset Catalog). + Full documentation at docs.overturemaps.org. +

+
+ + + + + diff --git a/overture_releases.yaml b/overture_releases.yaml index 05da21c..fcca45c 100644 --- a/overture_releases.yaml +++ b/overture_releases.yaml @@ -1,3 +1,6 @@ +# DEPRECATED: This file is no longer maintained and will be removed in a future release. +# Use the Overture STAC catalog for authoritative release discovery: +# https://stac.overturemaps.org/catalog.json - schema: "1.16.0" release: "2026-04-15.0" - schema: "1.16.0" diff --git a/utils/fetch-releases-from-s3.py b/utils/fetch-releases-from-s3.py deleted file mode 100644 index 56d3ee2..0000000 --- a/utils/fetch-releases-from-s3.py +++ /dev/null @@ -1,89 +0,0 @@ -import duckdb, json -from obstore.store import S3Store - -store = S3Store("overturemaps-us-west-2", region="us-west-2", skip_signature=True) - -releases = store.list_with_delimiter("release/") - -output = {} - -for idx, release in enumerate(sorted(releases.get("common_prefixes"), reverse=True)): - path = release.split("/")[1] - if idx == 0: - output["latest"] = path - output["releases"] = [] - output["releases"].append(path) - - print(f" - {path}") - -with open("releases.json", "w") as output_file: - output_file.write(json.dumps(output, indent=4)) - -conn = duckdb.connect("latest.ddb") - -conn.sql( - f""" -INSTALL spatial; -LOAD spatial; - -CREATE OR REPLACE VIEW address AS ( - SELECT * FROM read_parquet('s3://overturemaps-us-west-2/release/{output.get("latest")}/theme=addresses/type=address/*.parquet') -); - -CREATE OR REPLACE VIEW bathymetry AS ( - SELECT * FROM read_parquet('s3://overturemaps-us-west-2/release/{output.get("latest")}/theme=base/type=bathymetry/*.parquet') -); - -CREATE OR REPLACE VIEW building AS ( - SELECT * FROM read_parquet('s3://overturemaps-us-west-2/release/{output.get("latest")}/theme=buildings/type=building/*.parquet') -); - -CREATE OR REPLACE VIEW building_part AS ( - SELECT * FROM read_parquet('s3://overturemaps-us-west-2/release/{output.get("latest")}/theme=buildings/type=building_part/*.parquet') -); - -CREATE OR REPLACE VIEW connector AS ( - SELECT * FROM read_parquet('s3://overturemaps-us-west-2/release/{output.get("latest")}/theme=transportation/type=connector/*.parquet') -); - -CREATE OR REPLACE VIEW division AS ( - SELECT * FROM read_parquet('s3://overturemaps-us-west-2/release/{output.get("latest")}/theme=divisions/type=division/*.parquet') -); - -CREATE OR REPLACE VIEW division_area AS ( - SELECT * FROM read_parquet('s3://overturemaps-us-west-2/release/{output.get("latest")}/theme=divisions/type=division_area/*.parquet') -); - -CREATE OR REPLACE VIEW division_boundary AS ( - SELECT * FROM read_parquet('s3://overturemaps-us-west-2/release/{output.get("latest")}/theme=divisions/type=division_boundary/*.parquet') -); - -CREATE OR REPLACE VIEW infrastructure AS ( - SELECT * FROM read_parquet('s3://overturemaps-us-west-2/release/{output.get("latest")}/theme=base/type=infrastructure/*.parquet') -); - -CREATE OR REPLACE VIEW land AS ( - SELECT * FROM read_parquet('s3://overturemaps-us-west-2/release/{output.get("latest")}/theme=base/type=land/*.parquet') -); - -CREATE OR REPLACE VIEW land_cover AS ( - SELECT * FROM read_parquet('s3://overturemaps-us-west-2/release/{output.get("latest")}/theme=base/type=land_cover/*.parquet') -); - -CREATE OR REPLACE VIEW land_use AS ( - SELECT * FROM read_parquet('s3://overturemaps-us-west-2/release/{output.get("latest")}/theme=base/type=land_use/*.parquet') -); - -CREATE OR REPLACE VIEW place AS ( - SELECT * FROM read_parquet('s3://overturemaps-us-west-2/release/{output.get("latest")}/theme=places/type=place/*.parquet') -); - -CREATE OR REPLACE VIEW segment AS ( - SELECT * FROM read_parquet('s3://overturemaps-us-west-2/release/{output.get("latest")}/theme=transportation/type=segment/*.parquet') -); - -CREATE OR REPLACE VIEW water AS ( - SELECT * FROM read_parquet('s3://overturemaps-us-west-2/release/{output.get("latest")}/theme=base/type=water/*.parquet') -); -""" -) diff --git a/utils/fetch_releases_from_stac.py b/utils/fetch_releases_from_stac.py new file mode 100644 index 0000000..295153a --- /dev/null +++ b/utils/fetch_releases_from_stac.py @@ -0,0 +1,76 @@ +import json +import urllib.request + +import duckdb + +STAC_CATALOG = "https://stac.overturemaps.org/catalog.json" +S3_BASE = "s3://overturemaps-us-west-2/release" + +VIEWS = [ + ("address", "addresses", "address"), + ("bathymetry", "base", "bathymetry"), + ("building", "buildings", "building"), + ("building_part", "buildings", "building_part"), + ("connector", "transportation", "connector"), + ("division", "divisions", "division"), + ("division_area", "divisions", "division_area"), + ("division_boundary", "divisions", "division_boundary"), + ("infrastructure", "base", "infrastructure"), + ("land", "base", "land"), + ("land_cover", "base", "land_cover"), + ("land_use", "base", "land_use"), + ("place", "places", "place"), + ("segment", "transportation", "segment"), + ("water", "base", "water"), +] + + +def fetch_catalog(url: str) -> dict: + with urllib.request.urlopen(url) as response: + return json.loads(response.read()) + + +def parse_releases(catalog: dict) -> dict: + latest = catalog["latest"] + releases = sorted( + [ + link["href"].split("/")[1] + for link in catalog["links"] + if link["rel"] == "child" + ], + reverse=True, + ) + return {"latest": latest, "releases": releases} + + +def build_views_sql(latest: str, s3_base: str = S3_BASE) -> str: + stmts = ["INSTALL spatial;", "LOAD spatial;"] + for view_name, theme, type_ in VIEWS: + path = f"{s3_base}/{latest}/theme={theme}/type={type_}/*.parquet" + stmts.append( + f"CREATE OR REPLACE VIEW {view_name} AS (\n" + f" SELECT * FROM read_parquet('{path}')\n);" + ) + return "\n\n".join(stmts) + + +def create_duckdb_views(db_path: str, latest: str, s3_base: str = S3_BASE) -> None: + conn = duckdb.connect(db_path) + conn.sql(build_views_sql(latest, s3_base)) + + +def main(): + catalog = fetch_catalog(STAC_CATALOG) + output = parse_releases(catalog) + + for release in output["releases"]: + print(f" - {release}") + + with open("releases.json", "w") as f: + f.write(json.dumps(output, indent=4)) + + create_duckdb_views("latest.ddb", output["latest"]) + + +if __name__ == "__main__": + main() diff --git a/utils/requirements-test.txt b/utils/requirements-test.txt new file mode 100644 index 0000000..13f6026 --- /dev/null +++ b/utils/requirements-test.txt @@ -0,0 +1,2 @@ +-r requirements.txt +pytest>=8.0.0 diff --git a/utils/requirements.txt b/utils/requirements.txt index 2718dd1..7c43c30 100644 --- a/utils/requirements.txt +++ b/utils/requirements.txt @@ -1,3 +1,2 @@ -obstore>=0.7.0 duckdb==1.3.2 pyarrow>=20.0.0 diff --git a/utils/tests/__init__.py b/utils/tests/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/utils/tests/test_fetch_releases_from_stac.py b/utils/tests/test_fetch_releases_from_stac.py new file mode 100644 index 0000000..e053f3d --- /dev/null +++ b/utils/tests/test_fetch_releases_from_stac.py @@ -0,0 +1,168 @@ +import json +import os +import sys +import tempfile +from io import BytesIO +from unittest.mock import MagicMock, patch + +import duckdb +import pytest + +sys.path.insert(0, os.path.join(os.path.dirname(__file__), "..")) + +from fetch_releases_from_stac import ( + VIEWS, + build_views_sql, + create_duckdb_views, + fetch_catalog, + parse_releases, +) + +SAMPLE_CATALOG = { + "type": "Catalog", + "id": "Overture Releases", + "stac_version": "1.1.0", + "description": "All Overture Releases", + "links": [ + {"rel": "root", "href": "./catalog.json", "type": "application/json"}, + { + "rel": "child", + "href": "./2026-05-20.0/catalog.json", + "type": "application/json", + "latest": True, + }, + { + "rel": "child", + "href": "./2026-04-15.0/catalog.json", + "type": "application/json", + }, + ], + "latest": "2026-05-20.0", +} + + +class TestFetchCatalog: + def test_returns_parsed_json(self): + mock_response = MagicMock() + mock_response.__enter__ = lambda s: s + mock_response.__exit__ = MagicMock(return_value=False) + mock_response.read.return_value = json.dumps(SAMPLE_CATALOG).encode() + + with patch("urllib.request.urlopen", return_value=mock_response): + result = fetch_catalog("https://stac.overturemaps.org/catalog.json") + + assert result == SAMPLE_CATALOG + + def test_uses_provided_url(self): + mock_response = MagicMock() + mock_response.__enter__ = lambda s: s + mock_response.__exit__ = MagicMock(return_value=False) + mock_response.read.return_value = json.dumps(SAMPLE_CATALOG).encode() + + with patch("urllib.request.urlopen", return_value=mock_response) as mock_open: + fetch_catalog("https://custom.example.com/catalog.json") + mock_open.assert_called_once_with("https://custom.example.com/catalog.json") + + +class TestParseReleases: + def test_extracts_latest(self): + result = parse_releases(SAMPLE_CATALOG) + assert result["latest"] == "2026-05-20.0" + + def test_extracts_child_releases(self): + result = parse_releases(SAMPLE_CATALOG) + assert "2026-05-20.0" in result["releases"] + assert "2026-04-15.0" in result["releases"] + + def test_excludes_root_link(self): + result = parse_releases(SAMPLE_CATALOG) + # root link href is "./catalog.json" — split("/")[1] would be "catalog.json" + # but more importantly rel="root" should be excluded + assert "catalog.json" not in result["releases"] + assert len(result["releases"]) == 2 + + def test_releases_sorted_descending(self): + result = parse_releases(SAMPLE_CATALOG) + assert result["releases"] == sorted(result["releases"], reverse=True) + + def test_returns_dict_with_expected_keys(self): + result = parse_releases(SAMPLE_CATALOG) + assert set(result.keys()) == {"latest", "releases"} + + def test_empty_links(self): + catalog = {**SAMPLE_CATALOG, "links": [], "latest": "2026-05-20.0"} + result = parse_releases(catalog) + assert result["latest"] == "2026-05-20.0" + assert result["releases"] == [] + + def test_single_release(self): + catalog = { + **SAMPLE_CATALOG, + "links": [ + {"rel": "child", "href": "./2026-05-20.0/catalog.json"}, + ], + "latest": "2026-05-20.0", + } + result = parse_releases(catalog) + assert result["releases"] == ["2026-05-20.0"] + + +class TestBuildViewsSql: + def test_contains_install_spatial(self): + sql = build_views_sql("2026-05-20.0") + assert "INSTALL spatial" in sql + + def test_contains_load_spatial(self): + sql = build_views_sql("2026-05-20.0") + assert "LOAD spatial" in sql + + def test_all_views_present(self): + sql = build_views_sql("2026-05-20.0") + for view_name, _, _ in VIEWS: + assert f"CREATE OR REPLACE VIEW {view_name}" in sql + + def test_latest_release_in_paths(self): + release = "2026-05-20.0" + sql = build_views_sql(release) + assert release in sql + + def test_custom_s3_base(self): + sql = build_views_sql("2026-05-20.0", s3_base="s3://my-bucket/release") + assert "s3://my-bucket/release" in sql + + def test_correct_theme_type_mapping(self): + sql = build_views_sql("2026-05-20.0") + assert "theme=addresses/type=address" in sql + assert "theme=buildings/type=building_part" in sql + assert "theme=transportation/type=segment" in sql + assert "theme=divisions/type=division_boundary" in sql + + def test_view_count_matches_views_constant(self): + sql = build_views_sql("2026-05-20.0") + count = sql.count("CREATE OR REPLACE VIEW") + assert count == len(VIEWS) + + +class TestCreateDuckdbViews: + def test_creates_all_views(self): + mock_conn = MagicMock() + with patch("duckdb.connect", return_value=mock_conn): + create_duckdb_views(":memory:", "2026-05-20.0") + mock_conn.sql.assert_called_once() + sql_arg = mock_conn.sql.call_args[0][0] + for view_name, _, _ in VIEWS: + assert f"CREATE OR REPLACE VIEW {view_name}" in sql_arg + + def test_views_reference_correct_release(self): + release = "2026-05-20.0" + mock_conn = MagicMock() + with patch("duckdb.connect", return_value=mock_conn): + create_duckdb_views(":memory:", release) + sql_arg = mock_conn.sql.call_args[0][0] + assert release in sql_arg + + def test_connects_to_provided_path(self): + mock_conn = MagicMock() + with patch("duckdb.connect", return_value=mock_conn) as mock_connect: + create_duckdb_views("some/path/latest.ddb", "2026-05-20.0") + mock_connect.assert_called_once_with("some/path/latest.ddb") From 8a213d9b51bcd043af758d859740e7ec3cfcb121 Mon Sep 17 00:00:00 2001 From: John McCall Date: Thu, 11 Jun 2026 16:08:00 -0400 Subject: [PATCH 2/9] fix: add concurrency setting to test workflow Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> Signed-off-by: John McCall --- .github/workflows/test.yml | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index 2688aeb..37795b6 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -8,6 +8,10 @@ on: permissions: contents: read +concurrency: + group: "test-${{ github.ref }}" + cancel-in-progress: true + jobs: test: name: Run unit tests From 5c98db902e2a9c69cd23cebc417b969241e40b60 Mon Sep 17 00:00:00 2001 From: John McCall Date: Thu, 11 Jun 2026 16:23:41 -0400 Subject: [PATCH 3/9] fix: address PR review feedback - Add 30s timeout + User-Agent header to fetch_catalog - Robust href parsing via urlparse (handles absolute + relative hrefs) - Close DuckDB connection in finally block - Remove unused test imports (tempfile, BytesIO, duckdb, pytest) - Update test_uses_provided_url to assert on Request.full_url + timeout - Add tests for absolute href and no-./-prefix relative href Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> Signed-off-by: John McCall --- utils/fetch_releases_from_stac.py | 19 +- utils/tests/test_fetch_releases_from_stac.py | 367 ++++++++++--------- 2 files changed, 214 insertions(+), 172 deletions(-) diff --git a/utils/fetch_releases_from_stac.py b/utils/fetch_releases_from_stac.py index 295153a..a7cf5a0 100644 --- a/utils/fetch_releases_from_stac.py +++ b/utils/fetch_releases_from_stac.py @@ -1,10 +1,12 @@ import json import urllib.request +from urllib.parse import urlparse import duckdb STAC_CATALOG = "https://stac.overturemaps.org/catalog.json" S3_BASE = "s3://overturemaps-us-west-2/release" +_USER_AGENT = "overturemaps-data/1.0" VIEWS = [ ("address", "addresses", "address"), @@ -25,16 +27,22 @@ ] -def fetch_catalog(url: str) -> dict: - with urllib.request.urlopen(url) as response: +def fetch_catalog(url: str, timeout: int = 30) -> dict: + req = urllib.request.Request(url, headers={"User-Agent": _USER_AGENT}) + with urllib.request.urlopen(req, timeout=timeout) as response: return json.loads(response.read()) +def _release_id_from_href(href: str) -> str: + parts = [p for p in urlparse(href).path.split("/") if p and p != "."] + return parts[0] + + def parse_releases(catalog: dict) -> dict: latest = catalog["latest"] releases = sorted( [ - link["href"].split("/")[1] + _release_id_from_href(link["href"]) for link in catalog["links"] if link["rel"] == "child" ], @@ -56,7 +64,10 @@ def build_views_sql(latest: str, s3_base: str = S3_BASE) -> str: def create_duckdb_views(db_path: str, latest: str, s3_base: str = S3_BASE) -> None: conn = duckdb.connect(db_path) - conn.sql(build_views_sql(latest, s3_base)) + try: + conn.sql(build_views_sql(latest, s3_base)) + finally: + conn.close() def main(): diff --git a/utils/tests/test_fetch_releases_from_stac.py b/utils/tests/test_fetch_releases_from_stac.py index e053f3d..e482282 100644 --- a/utils/tests/test_fetch_releases_from_stac.py +++ b/utils/tests/test_fetch_releases_from_stac.py @@ -1,168 +1,199 @@ -import json -import os -import sys -import tempfile -from io import BytesIO -from unittest.mock import MagicMock, patch - -import duckdb -import pytest - -sys.path.insert(0, os.path.join(os.path.dirname(__file__), "..")) - -from fetch_releases_from_stac import ( - VIEWS, - build_views_sql, - create_duckdb_views, - fetch_catalog, - parse_releases, -) - -SAMPLE_CATALOG = { - "type": "Catalog", - "id": "Overture Releases", - "stac_version": "1.1.0", - "description": "All Overture Releases", - "links": [ - {"rel": "root", "href": "./catalog.json", "type": "application/json"}, - { - "rel": "child", - "href": "./2026-05-20.0/catalog.json", - "type": "application/json", - "latest": True, - }, - { - "rel": "child", - "href": "./2026-04-15.0/catalog.json", - "type": "application/json", - }, - ], - "latest": "2026-05-20.0", -} - - -class TestFetchCatalog: - def test_returns_parsed_json(self): - mock_response = MagicMock() - mock_response.__enter__ = lambda s: s - mock_response.__exit__ = MagicMock(return_value=False) - mock_response.read.return_value = json.dumps(SAMPLE_CATALOG).encode() - - with patch("urllib.request.urlopen", return_value=mock_response): - result = fetch_catalog("https://stac.overturemaps.org/catalog.json") - - assert result == SAMPLE_CATALOG - - def test_uses_provided_url(self): - mock_response = MagicMock() - mock_response.__enter__ = lambda s: s - mock_response.__exit__ = MagicMock(return_value=False) - mock_response.read.return_value = json.dumps(SAMPLE_CATALOG).encode() - - with patch("urllib.request.urlopen", return_value=mock_response) as mock_open: - fetch_catalog("https://custom.example.com/catalog.json") - mock_open.assert_called_once_with("https://custom.example.com/catalog.json") - - -class TestParseReleases: - def test_extracts_latest(self): - result = parse_releases(SAMPLE_CATALOG) - assert result["latest"] == "2026-05-20.0" - - def test_extracts_child_releases(self): - result = parse_releases(SAMPLE_CATALOG) - assert "2026-05-20.0" in result["releases"] - assert "2026-04-15.0" in result["releases"] - - def test_excludes_root_link(self): - result = parse_releases(SAMPLE_CATALOG) - # root link href is "./catalog.json" — split("/")[1] would be "catalog.json" - # but more importantly rel="root" should be excluded - assert "catalog.json" not in result["releases"] - assert len(result["releases"]) == 2 - - def test_releases_sorted_descending(self): - result = parse_releases(SAMPLE_CATALOG) - assert result["releases"] == sorted(result["releases"], reverse=True) - - def test_returns_dict_with_expected_keys(self): - result = parse_releases(SAMPLE_CATALOG) - assert set(result.keys()) == {"latest", "releases"} - - def test_empty_links(self): - catalog = {**SAMPLE_CATALOG, "links": [], "latest": "2026-05-20.0"} - result = parse_releases(catalog) - assert result["latest"] == "2026-05-20.0" - assert result["releases"] == [] - - def test_single_release(self): - catalog = { - **SAMPLE_CATALOG, - "links": [ - {"rel": "child", "href": "./2026-05-20.0/catalog.json"}, - ], - "latest": "2026-05-20.0", - } - result = parse_releases(catalog) - assert result["releases"] == ["2026-05-20.0"] - - -class TestBuildViewsSql: - def test_contains_install_spatial(self): - sql = build_views_sql("2026-05-20.0") - assert "INSTALL spatial" in sql - - def test_contains_load_spatial(self): - sql = build_views_sql("2026-05-20.0") - assert "LOAD spatial" in sql - - def test_all_views_present(self): - sql = build_views_sql("2026-05-20.0") - for view_name, _, _ in VIEWS: - assert f"CREATE OR REPLACE VIEW {view_name}" in sql - - def test_latest_release_in_paths(self): - release = "2026-05-20.0" - sql = build_views_sql(release) - assert release in sql - - def test_custom_s3_base(self): - sql = build_views_sql("2026-05-20.0", s3_base="s3://my-bucket/release") - assert "s3://my-bucket/release" in sql - - def test_correct_theme_type_mapping(self): - sql = build_views_sql("2026-05-20.0") - assert "theme=addresses/type=address" in sql - assert "theme=buildings/type=building_part" in sql - assert "theme=transportation/type=segment" in sql - assert "theme=divisions/type=division_boundary" in sql - - def test_view_count_matches_views_constant(self): - sql = build_views_sql("2026-05-20.0") - count = sql.count("CREATE OR REPLACE VIEW") - assert count == len(VIEWS) - - -class TestCreateDuckdbViews: - def test_creates_all_views(self): - mock_conn = MagicMock() - with patch("duckdb.connect", return_value=mock_conn): - create_duckdb_views(":memory:", "2026-05-20.0") - mock_conn.sql.assert_called_once() - sql_arg = mock_conn.sql.call_args[0][0] - for view_name, _, _ in VIEWS: - assert f"CREATE OR REPLACE VIEW {view_name}" in sql_arg - - def test_views_reference_correct_release(self): - release = "2026-05-20.0" - mock_conn = MagicMock() - with patch("duckdb.connect", return_value=mock_conn): - create_duckdb_views(":memory:", release) - sql_arg = mock_conn.sql.call_args[0][0] - assert release in sql_arg - - def test_connects_to_provided_path(self): - mock_conn = MagicMock() - with patch("duckdb.connect", return_value=mock_conn) as mock_connect: - create_duckdb_views("some/path/latest.ddb", "2026-05-20.0") - mock_connect.assert_called_once_with("some/path/latest.ddb") +import json +import sys +import os +from unittest.mock import MagicMock, patch + +sys.path.insert(0, os.path.join(os.path.dirname(__file__), "..")) + +from fetch_releases_from_stac import ( + VIEWS, + build_views_sql, + create_duckdb_views, + fetch_catalog, + parse_releases, +) + +SAMPLE_CATALOG = { + "type": "Catalog", + "id": "Overture Releases", + "stac_version": "1.1.0", + "description": "All Overture Releases", + "links": [ + {"rel": "root", "href": "./catalog.json", "type": "application/json"}, + { + "rel": "child", + "href": "./2026-05-20.0/catalog.json", + "type": "application/json", + "latest": True, + }, + { + "rel": "child", + "href": "./2026-04-15.0/catalog.json", + "type": "application/json", + }, + ], + "latest": "2026-05-20.0", +} + + +class TestFetchCatalog: + def test_returns_parsed_json(self): + mock_response = MagicMock() + mock_response.__enter__ = lambda s: s + mock_response.__exit__ = MagicMock(return_value=False) + mock_response.read.return_value = json.dumps(SAMPLE_CATALOG).encode() + + with patch("urllib.request.urlopen", return_value=mock_response): + result = fetch_catalog("https://stac.overturemaps.org/catalog.json") + + assert result == SAMPLE_CATALOG + + def test_uses_provided_url(self): + mock_response = MagicMock() + mock_response.__enter__ = lambda s: s + mock_response.__exit__ = MagicMock(return_value=False) + mock_response.read.return_value = json.dumps(SAMPLE_CATALOG).encode() + + with patch("urllib.request.urlopen", return_value=mock_response) as mock_open: + fetch_catalog("https://custom.example.com/catalog.json") + req = mock_open.call_args[0][0] + assert req.full_url == "https://custom.example.com/catalog.json" + + def test_applies_timeout(self): + mock_response = MagicMock() + mock_response.__enter__ = lambda s: s + mock_response.__exit__ = MagicMock(return_value=False) + mock_response.read.return_value = json.dumps(SAMPLE_CATALOG).encode() + + with patch("urllib.request.urlopen", return_value=mock_response) as mock_open: + fetch_catalog("https://stac.overturemaps.org/catalog.json", timeout=10) + assert mock_open.call_args[1]["timeout"] == 10 + + +class TestParseReleases: + def test_extracts_latest(self): + result = parse_releases(SAMPLE_CATALOG) + assert result["latest"] == "2026-05-20.0" + + def test_extracts_child_releases(self): + result = parse_releases(SAMPLE_CATALOG) + assert "2026-05-20.0" in result["releases"] + assert "2026-04-15.0" in result["releases"] + + def test_excludes_root_link(self): + result = parse_releases(SAMPLE_CATALOG) + # root link href is "./catalog.json" — split("/")[1] would be "catalog.json" + # but more importantly rel="root" should be excluded + assert "catalog.json" not in result["releases"] + assert len(result["releases"]) == 2 + + def test_releases_sorted_descending(self): + result = parse_releases(SAMPLE_CATALOG) + assert result["releases"] == sorted(result["releases"], reverse=True) + + def test_returns_dict_with_expected_keys(self): + result = parse_releases(SAMPLE_CATALOG) + assert set(result.keys()) == {"latest", "releases"} + + def test_empty_links(self): + catalog = {**SAMPLE_CATALOG, "links": [], "latest": "2026-05-20.0"} + result = parse_releases(catalog) + assert result["latest"] == "2026-05-20.0" + assert result["releases"] == [] + + def test_single_release(self): + catalog = { + **SAMPLE_CATALOG, + "links": [ + {"rel": "child", "href": "./2026-05-20.0/catalog.json"}, + ], + "latest": "2026-05-20.0", + } + result = parse_releases(catalog) + assert result["releases"] == ["2026-05-20.0"] + + def test_absolute_href_parsed_correctly(self): + catalog = { + **SAMPLE_CATALOG, + "links": [ + { + "rel": "child", + "href": "https://stac.overturemaps.org/2026-05-20.0/catalog.json", + }, + ], + "latest": "2026-05-20.0", + } + result = parse_releases(catalog) + assert result["releases"] == ["2026-05-20.0"] + + def test_relative_href_without_dotslash_parsed_correctly(self): + catalog = { + **SAMPLE_CATALOG, + "links": [ + {"rel": "child", "href": "2026-05-20.0/catalog.json"}, + ], + "latest": "2026-05-20.0", + } + result = parse_releases(catalog) + assert result["releases"] == ["2026-05-20.0"] + + + def test_contains_install_spatial(self): + sql = build_views_sql("2026-05-20.0") + assert "INSTALL spatial" in sql + + def test_contains_load_spatial(self): + sql = build_views_sql("2026-05-20.0") + assert "LOAD spatial" in sql + + def test_all_views_present(self): + sql = build_views_sql("2026-05-20.0") + for view_name, _, _ in VIEWS: + assert f"CREATE OR REPLACE VIEW {view_name}" in sql + + def test_latest_release_in_paths(self): + release = "2026-05-20.0" + sql = build_views_sql(release) + assert release in sql + + def test_custom_s3_base(self): + sql = build_views_sql("2026-05-20.0", s3_base="s3://my-bucket/release") + assert "s3://my-bucket/release" in sql + + def test_correct_theme_type_mapping(self): + sql = build_views_sql("2026-05-20.0") + assert "theme=addresses/type=address" in sql + assert "theme=buildings/type=building_part" in sql + assert "theme=transportation/type=segment" in sql + assert "theme=divisions/type=division_boundary" in sql + + def test_view_count_matches_views_constant(self): + sql = build_views_sql("2026-05-20.0") + count = sql.count("CREATE OR REPLACE VIEW") + assert count == len(VIEWS) + + +class TestCreateDuckdbViews: + def test_creates_all_views(self): + mock_conn = MagicMock() + with patch("duckdb.connect", return_value=mock_conn): + create_duckdb_views(":memory:", "2026-05-20.0") + mock_conn.sql.assert_called_once() + sql_arg = mock_conn.sql.call_args[0][0] + for view_name, _, _ in VIEWS: + assert f"CREATE OR REPLACE VIEW {view_name}" in sql_arg + + def test_views_reference_correct_release(self): + release = "2026-05-20.0" + mock_conn = MagicMock() + with patch("duckdb.connect", return_value=mock_conn): + create_duckdb_views(":memory:", release) + sql_arg = mock_conn.sql.call_args[0][0] + assert release in sql_arg + + def test_closes_connection(self): + mock_conn = MagicMock() + with patch("duckdb.connect", return_value=mock_conn): + create_duckdb_views(":memory:", "2026-05-20.0") + mock_conn.close.assert_called_once() + From 8904283e9d316b448f1aa32b089dd3fb4c6f8be8 Mon Sep 17 00:00:00 2001 From: John McCall Date: Mon, 15 Jun 2026 09:56:39 -0400 Subject: [PATCH 4/9] chore: remove latest.dbb (legacy alias) from published bundle Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> Signed-off-by: John McCall --- .github/workflows/build-manifests.yml | 1 - index.html | 4 ---- 2 files changed, 5 deletions(-) diff --git a/.github/workflows/build-manifests.yml b/.github/workflows/build-manifests.yml index 67cd9cc..0421e1e 100644 --- a/.github/workflows/build-manifests.yml +++ b/.github/workflows/build-manifests.yml @@ -57,7 +57,6 @@ jobs: cp utils/releases.json publish/ cp utils/registry-manifest.json publish/ cp utils/latest.ddb publish/ - cp utils/latest.ddb publish/latest.dbb cp overture_releases.yaml publish/ - name: Setup Pages diff --git a/index.html b/index.html index 664210a..0dbb472 100644 --- a/index.html +++ b/index.html @@ -163,10 +163,6 @@

Available Files

latest.ddb DuckDB database with views pointing to the latest Overture release on S3 - - latest.dbb deprecated - Alias for latest.ddb (legacy filename) - registry-manifest.json Parquet file manifest for the Overture GERS registry From 412a6bc03f65c2ce01478385d55a6db873c44072 Mon Sep 17 00:00:00 2001 From: John McCall Date: Mon, 15 Jun 2026 11:11:36 -0400 Subject: [PATCH 5/9] chore: remove index.html, add deprecation notice to README Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> Signed-off-by: John McCall --- .github/workflows/build-manifests.yml | 1 - README.md | 6 + index.html | 189 -------------------------- 3 files changed, 6 insertions(+), 190 deletions(-) delete mode 100644 index.html diff --git a/.github/workflows/build-manifests.yml b/.github/workflows/build-manifests.yml index 0421e1e..5f8acd7 100644 --- a/.github/workflows/build-manifests.yml +++ b/.github/workflows/build-manifests.yml @@ -53,7 +53,6 @@ jobs: - name: Copy output to publish directory run: | mkdir publish - cp index.html publish/ cp utils/releases.json publish/ cp utils/registry-manifest.json publish/ cp utils/latest.ddb publish/ diff --git a/README.md b/README.md index 68b70d8..47c84ed 100644 --- a/README.md +++ b/README.md @@ -49,3 +49,9 @@ for each theme with the following enhancements: Data Release Feedback --- We are very interested in feedback on the Overture data. Please use the [Discussion](https://github.com/OvertureMaps/data/discussions) section of this repo to comment. Tagging it with the release and relevant theme name (Places, Transportation) will help direct your ideas. Please include as much detail as possible. The associated Task Force will carefully review each submission and offer feedback where required. + +Release Discovery +--- +Use the [Overture STAC catalog](https://stac.overturemaps.org/catalog.json) (SpatioTemporal Asset Catalog) for authoritative release discovery. + +> **Deprecated:** `overture_releases.yaml` and `releases.json` are no longer maintained and will be removed in a future release. Use the STAC catalog instead. diff --git a/index.html b/index.html deleted file mode 100644 index 0dbb472..0000000 --- a/index.html +++ /dev/null @@ -1,189 +0,0 @@ - - - - - - Overture Maps Data - - - - - - - -
-

Overture Maps Foundation

-

labs.overturemaps.org/data: published release artifacts

-
- -
-

Available Files

- - - - - - - - - - - - - - - - - - - - - - - - - -
FileDescription
releases.json deprecatedRelease list with latest pointer, supplanted by STAC (SpatioTemporal Asset Catalog)
latest.ddbDuckDB database with views pointing to the latest Overture release on S3
registry-manifest.jsonParquet file manifest for the Overture GERS registry
overture_releases.yaml deprecatedHistorical release list, no longer maintained. Use STAC (SpatioTemporal Asset Catalog) instead
- -

- For authoritative release discovery, use the - STAC (SpatioTemporal Asset Catalog). - Full documentation at docs.overturemaps.org. -

-
- - - - - From 24cda7f6037d1c183efb69fcac742ddbf405a3d8 Mon Sep 17 00:00:00 2001 From: John McCall Date: Mon, 15 Jun 2026 11:13:50 -0400 Subject: [PATCH 6/9] chore: remove simple-registry-manifest.py and registry-manifest.json output Registry manifest is available via the STAC catalog. Also drop pyarrow dependency which was only needed by this script. Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> Signed-off-by: John McCall --- .github/workflows/build-manifests.yml | 2 - utils/requirements.txt | 1 - utils/simple-registry-manifest.py | 142 -------------------------- 3 files changed, 145 deletions(-) delete mode 100644 utils/simple-registry-manifest.py diff --git a/.github/workflows/build-manifests.yml b/.github/workflows/build-manifests.yml index 5f8acd7..4143052 100644 --- a/.github/workflows/build-manifests.yml +++ b/.github/workflows/build-manifests.yml @@ -48,13 +48,11 @@ jobs: run: | cd utils python3 fetch_releases_from_stac.py - python3 simple-registry-manifest.py - name: Copy output to publish directory run: | mkdir publish cp utils/releases.json publish/ - cp utils/registry-manifest.json publish/ cp utils/latest.ddb publish/ cp overture_releases.yaml publish/ diff --git a/utils/requirements.txt b/utils/requirements.txt index 7c43c30..348ac20 100644 --- a/utils/requirements.txt +++ b/utils/requirements.txt @@ -1,2 +1 @@ duckdb==1.3.2 -pyarrow>=20.0.0 diff --git a/utils/simple-registry-manifest.py b/utils/simple-registry-manifest.py deleted file mode 100644 index 381134f..0000000 --- a/utils/simple-registry-manifest.py +++ /dev/null @@ -1,142 +0,0 @@ -import json -import logging - -import pyarrow.dataset as ds -import pyarrow.fs as fs - -# Set up logging -logging.basicConfig() -logger = logging.getLogger("registry-manifest") -logger.setLevel(logging.INFO) - - -def create_registry_manifest(): - """ - Simple script to read all parquet files in s3://overturemaps-us-west-2/registry/ - and create a JSON manifest with min/max IDs and file paths. - """ - - # Initialize S3 filesystem (following the pattern from registry-manifest.py) - filesystem = fs.S3FileSystem(anonymous=True, region="us-west-2") - - # Registry path (without s3:// prefix for filesystem operations) - registry_path = "overturemaps-us-west-2/registry" - - logger.info(f"Scanning registry path: {registry_path}") - - # Get all files in the registry directory using proper FileSelector - registry_selector = fs.FileSelector(registry_path, recursive=True) - all_files = filesystem.get_file_info(registry_selector) - - # Filter for parquet files only - parquet_files = [ - f - for f in all_files - if f.path.endswith(".parquet") and f.type == fs.FileType.File - ] - - logger.info(f"Found {len(parquet_files)} parquet files in registry") - - # Simple arrays for bounds and files - bounds = [] - files = [] - - # Process each parquet file - for file_info in parquet_files: - logger.info(f"Processing: {file_info.path}") - - try: - # Create dataset for this single file - file_dataset = ds.dataset( - file_info.path, filesystem=filesystem, format="parquet" - ) - - # Get fragments - fragments = list(file_dataset.get_fragments()) - - if not fragments: - logger.warning(f"No fragments found for {file_info.path}") - continue - - # Use first fragment to get schema - fragment = fragments[0] - schema = fragment.metadata.schema.to_arrow_schema() - - # Check if 'id' column exists - has_id_column = "id" in [field.name for field in schema] - - if has_id_column: - # Since ID field is always sorted, we only need first and last row group - min_id = None - max_id = None - - # Find the ID column index - id_column_index = next( - i for i, field in enumerate(schema) if field.name == "id" - ) - - # Get min from first row group and max from last row group - first_fragment = fragments[0] - last_fragment = fragments[-1] - - # Get min from first row group of first fragment - first_metadata = first_fragment.metadata - if first_metadata.num_row_groups > 0: - first_row_group = first_metadata.row_group(0) - first_id_column = first_row_group.column(id_column_index) - - if ( - first_id_column.statistics - and first_id_column.statistics.has_min_max - ): - min_id = first_id_column.statistics.min - if isinstance(min_id, bytes): - min_id = min_id.decode("utf-8") - - # Get max from last row group of last fragment - last_metadata = last_fragment.metadata - if last_metadata.num_row_groups > 0: - last_row_group = last_metadata.row_group( - last_metadata.num_row_groups - 1 - ) - last_id_column = last_row_group.column(id_column_index) - - if ( - last_id_column.statistics - and last_id_column.statistics.has_min_max - ): - max_id = last_id_column.statistics.max - if isinstance(max_id, bytes): - max_id = max_id.decode("utf-8") - - filename = file_info.path.replace( - "overturemaps-us-west-2/registry/", "" - ) - bounds.append([min_id, max_id]) - files.append(filename) - - logger.info( - f"Successfully processed {file_info.path}: {min_id} - {max_id}" - ) - else: - logger.warning(f"No 'id' column found in {file_info.path}") - - except Exception as e: - logger.error(f"Error processing {file_info.path}: {e}") - - # Create simple manifest - manifest = {"bounds": bounds, "files": files} - - # Write manifest to JSON file (compact format for smallest size) - output_file = "registry-manifest.json" - with open(output_file, "w") as f: - json.dump(manifest, f, separators=(",", ":")) - - print(f"Registry manifest written to {output_file}") - print(f"Total files processed: {len(manifest['files'])}") - - return manifest - - -if __name__ == "__main__": - manifest = create_registry_manifest() From 6b87737e2d94dc034407d68c90725fa7bea32c51 Mon Sep 17 00:00:00 2001 From: John McCall Date: Mon, 15 Jun 2026 11:16:10 -0400 Subject: [PATCH 7/9] docs: add registry-manifest.json to README deprecation notice Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> Signed-off-by: John McCall --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index 47c84ed..483c20b 100644 --- a/README.md +++ b/README.md @@ -54,4 +54,4 @@ Release Discovery --- Use the [Overture STAC catalog](https://stac.overturemaps.org/catalog.json) (SpatioTemporal Asset Catalog) for authoritative release discovery. -> **Deprecated:** `overture_releases.yaml` and `releases.json` are no longer maintained and will be removed in a future release. Use the STAC catalog instead. +> **Deprecated:** `overture_releases.yaml`, `releases.json`, and `registry-manifest.json` (previously at `labs.overturemaps.org/data/`) are no longer maintained and will be removed in a future release. Use the STAC catalog instead. From 8e2dba683b468bbd473f3d93e95e65088117c826 Mon Sep 17 00:00:00 2001 From: John McCall Date: Mon, 15 Jun 2026 11:17:47 -0400 Subject: [PATCH 8/9] chore: add custom 404 page redirecting to README and STAC catalog Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> Signed-off-by: John McCall --- .github/workflows/build-manifests.yml | 1 + 404.html | 26 ++++++++++++++++++++++++++ 2 files changed, 27 insertions(+) create mode 100644 404.html diff --git a/.github/workflows/build-manifests.yml b/.github/workflows/build-manifests.yml index 4143052..4a1acd6 100644 --- a/.github/workflows/build-manifests.yml +++ b/.github/workflows/build-manifests.yml @@ -52,6 +52,7 @@ jobs: - name: Copy output to publish directory run: | mkdir publish + cp 404.html publish/ cp utils/releases.json publish/ cp utils/latest.ddb publish/ cp overture_releases.yaml publish/ diff --git a/404.html b/404.html new file mode 100644 index 0000000..c83e898 --- /dev/null +++ b/404.html @@ -0,0 +1,26 @@ + + + + + + Moved | Overture Maps Data + + + +

This resource has moved

+

+ The file you requested is no longer published here. Release discovery is now handled + by the Overture STAC catalog + (SpatioTemporal Asset Catalog). +

+

+ See the data repo README + for details. Redirecting in 5 seconds… +

+ + From 7c7d147802e0201e5a4dd06c15d51baa7449c3d4 Mon Sep 17 00:00:00 2001 From: John McCall Date: Mon, 15 Jun 2026 11:19:16 -0400 Subject: [PATCH 9/9] chore: apply OMF brand styling to 404 page Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> Signed-off-by: John McCall --- 404.html | 85 ++++++++++++++++++++++++++++++++++++++++++++++++++++---- 1 file changed, 79 insertions(+), 6 deletions(-) diff --git a/404.html b/404.html index c83e898..bb19ca2 100644 --- a/404.html +++ b/404.html @@ -4,23 +4,96 @@ Moved | Overture Maps Data + + + +

This resource has moved

+
+

- The file you requested is no longer published here. Release discovery is now handled - by the Overture STAC catalog + The file you requested is no longer published here. Release discovery is + now handled by the + Overture STAC catalog (SpatioTemporal Asset Catalog).

See the data repo README for details. Redirecting in 5 seconds…

+ + +