Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion .github/workflows/pr-ci.yml
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,7 @@ jobs:
uses: actions/upload-artifact@v4
with:
name: integration-test-logs-${{ matrix.os }}
path: tests/integration/results/integration_test/logs
path: tests/integration/resources/geo_boundaries/logs
if-no-files-found: ignore
retention-days: 30
- name: Fail if integration or linting failed
Expand Down
7 changes: 4 additions & 3 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -30,9 +30,10 @@ Data processing steps:
- Country area data: [GADM](https://gadm.org/), [Overture Maps](https://overturemaps.org/) and [NUTS](https://ec.europa.eu/eurostat/web/gisco/geodata/statistical-units/territorial-units-statistics) divisions are supported.
- Exclusive Economic Zone (EEZ) data: [Marine regions](https://www.marineregions.org/).
2. Individual country files are downloaded and harmonised to fit a standardised schema.
Contested regions are removed at this stage.
3. Land is clipped using maritime Exclusive Economic Zones (EEZ).
4. Each polygon is clipped using its neighbours to minimise overlapping polygons.
- Contested regions are removed at this stage.
- Land is clipped using maritime Exclusive Economic Zones (EEZ).
- Optionally, a Voronoi algorithm is run to separate EEZ areas to fit subnational regions.
3. Each country file is combined and then clipped using its neighbours to minimise overlapping polygons.

> [!TIP]
> The `subtype` naming matches that of the source database. For example, NUTS uses 0, 1, 2 and 3 (NUTS0, NUTS1, NUTS2, etc.).
Expand Down
4 changes: 4 additions & 0 deletions config/config.yaml
Original file line number Diff line number Diff line change
@@ -1,5 +1,8 @@
# A minimal example of how to configure this module
overture_release: "latest"
voronoi_eez:
enabled: True
sample_spacing: 10000 # sample every 10 km
crs:
projected: "epsg:3857"
geographic: "epsg:4326"
Expand All @@ -26,3 +29,4 @@ countries:
source: "nuts"
resolution: 01M
year: 2024
extra_eez: 8364
3 changes: 3 additions & 0 deletions config/europe_example.yaml
Original file line number Diff line number Diff line change
@@ -1,5 +1,8 @@
# A minimal example of how to configure this module
overture_release: "latest"
voronoi_eez:
enabled: True
sample_spacing: 10000
crs:
projected: "epsg:3035"
geographic: "epsg:4326"
Expand Down
14 changes: 14 additions & 0 deletions config/usa_example.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
# Example of regional disaggregation of a large country
# USA has multiple marine zones, which can be appended as extras
overture_release: "latest"
voronoi_eez:
enabled: True
sample_spacing: 10000 # sample every 10 km
crs:
projected: "epsg:3857"
geographic: "epsg:4326"
countries:
"USA":
subtype: "1"
source: "gadm"
extra_eez: [8463, 8453]
Binary file modified figures/rulegraph.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Binary file modified figures/shapes.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
211 changes: 62 additions & 149 deletions pixi.lock

Large diffs are not rendered by default.

4 changes: 3 additions & 1 deletion tests/local_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,9 @@ def module_path():
return Path(__file__).parent.parent


@pytest.mark.parametrize("scenario", ["config", "china_example", "europe_example"])
@pytest.mark.parametrize(
"scenario", ["config", "china_example", "europe_example", "usa_example"]
)
def test_config_example(module_path, scenario):
"""Example files should result in a successful run."""
result_file = "results/shapes.parquet"
Expand Down
2 changes: 2 additions & 0 deletions workflow/Snakefile
Original file line number Diff line number Diff line change
Expand Up @@ -26,12 +26,14 @@ with open(workflow.source_path("internal/settings.yaml"), "r") as f:


# Add all your includes here.
include: "rules/_utils.smk"
include: "rules/automatic.smk"
include: "rules/build.smk"


# Add all files to be delivered alongside the workflow here
workflow.source_path("scripts/_schemas.py")
workflow.source_path("scripts/_utils.py")


rule all:
Expand Down
43 changes: 36 additions & 7 deletions workflow/internal/config.schema.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -12,13 +12,6 @@ $defs:
description: "Schema for user-provided configuration files."
type: object
properties:
overture_release:
description: |
Overture data release to use. Two options are possible:
- A specific release string in the form of 'yyyy-mm-dd.x'.
- A request for the latest release in the form of 'latest'.
type: string
pattern: "^(?:latest|(?:\\d{4})-(?:0[1-9]|1[0-2])-(?:0[1-9]|[12]\\d|3[01])\\.\\d+)$"
crs:
description: CRS codes (i.e., 'epsg:3035', 'EPSG:4326', 8857).
type: object
Expand Down Expand Up @@ -60,6 +53,15 @@ properties:
year:
description: "Year for NUTS source (required if source is 'nuts')."
type: integer
extra_eez:
description: >
Optional. Additional EEZ zones to append to this country.
default: []
anyOf:
- type: integer
- type: array
items:
type: integer
required:
- subtype
- source
Expand All @@ -80,6 +82,33 @@ properties:
- year
minProperties: 1
additionalProperties: false
overture_release:
description: |
Overture data release to use. Two options are possible:
- A specific release string in the form of 'yyyy-mm-dd.x'.
- A request for the latest release in the form of 'latest'.
type: string
pattern: "^(?:latest|(?:\\d{4})-(?:0[1-9]|1[0-2])-(?:0[1-9]|[12]\\d|3[01])\\.\\d+)$"
voronoi_eez:
description: >
Optional configuration for the Voronoi EEZ splitting algorithm.
type: object
required:
- enabled
- sample_spacing
properties:
enabled:
description: Activation of the splitting algorithm.
type: boolean
default: false
sample_spacing:
description: >
Distance between sample points for shorelines.
Higher for finer splits at the cost of slower processing.
Matches the unit of the projected CRS.
type: integer
default: 10000
minimum: 1
required:
- countries
- crs
Expand Down
3 changes: 3 additions & 0 deletions workflow/internal/settings.yaml
Original file line number Diff line number Diff line change
@@ -1,3 +1,6 @@
# Module settings that users cannot modify.
nuts:
epsg: 3035 # will be converted later to the user-requested epsg
voronoi_eez: # defaults for Voronoi EEZ splitting
enabled: false
sample_spacing: 10000
File renamed without changes.
3 changes: 3 additions & 0 deletions workflow/report/build_country.rst
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
Built combined file for {{ snakemake.wildcards.country }}.
- Contested EEZ regions are removed.
- Optionally, EEZ regions are broken down using a Voronoi algorithm.
15 changes: 15 additions & 0 deletions workflow/rules/_utils.smk
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
"""Utility functions for snakemake rule handling."""


def get_country_filename(country: str):
"""Build unique file names to avoid overwriting source files."""
source = config["countries"][country]["source"]
subtype = config["countries"][country]["subtype"]

filename = f"{source}_{country}_{subtype}"
if source == "nuts":
resolution = config["countries"][country]["resolution"]
year = config["countries"][country]["year"]

filename += f"_{year}_{resolution}"
return filename
28 changes: 13 additions & 15 deletions workflow/rules/automatic.smk
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@ Small transformations might be performed to make the data easier to work with.

rule download_country_overture:
output:
path="<resources>/automatic/countries/overture_{country}_{subtype}.parquet",
path="<resources>/automatic/land/overture_{country}_{subtype}.parquet",
log:
"<logs>/{country}/download_country_overture_{subtype}.log",
conda:
Expand All @@ -21,9 +21,7 @@ rule download_country_overture:

rule download_country_gadm:
output:
path=temp(
"<resources>/automatic/countries/raw_gadm_{country}_{subtype}.parquet"
),
path=temp("<resources>/automatic/land/raw_gadm_{country}_{subtype}.parquet"),
log:
"<logs>/{country}/download_country_gadm_{subtype}.log",
conda:
Expand All @@ -36,9 +34,9 @@ rule download_country_gadm:

rule standardise_country_gadm:
input:
raw="<resources>/automatic/countries/raw_gadm_{country}_{subtype}.parquet",
raw=rules.download_country_gadm.output.path,
output:
standardised="<resources>/automatic/countries/gadm_{country}_{subtype}.parquet",
standardised="<resources>/automatic/land/gadm_{country}_{subtype}.parquet",
log:
"<logs>/{country}/standardise_country_gadm_{subtype}.log",
conda:
Expand All @@ -54,32 +52,30 @@ rule standardise_country_gadm:

rule download_nuts:
output:
path="<resources>/automatic/nuts/nuts_{resolution}_{year}_{level}.parquet",
path="<resources>/automatic/nuts/nuts_{subtype}_{resolution}_{year}.parquet",
log:
"<logs>/download_nuts_{resolution}_{year}_{level}.log",
"<logs>/download_nuts_{subtype}_{resolution}_{year}.log",
conda:
"../envs/shape.yaml"
params:
epsg=internal["nuts"]["epsg"],
message:
"Download '{wildcards.resolution}_{wildcards.year}_{wildcards.level}' from NUTS."
"Download '{wildcards.subtype}_{wildcards.resolution}_{wildcards.year}' from NUTS."
script:
"../scripts/download_nuts.py"


rule standardise_country_nuts:
input:
raw=lambda wc: f"<resources>/automatic/nuts/nuts_{config["countries"][wc.country]["resolution"]}_{config["countries"][wc.country]["year"]}_{wc.subtype}.parquet",
raw=rules.download_nuts.output.path,
output:
path="<resources>/automatic/countries/nuts_{country}_{subtype}.parquet",
path="<resources>/automatic/land/nuts_{country}_{subtype}_{year}_{resolution}.parquet",
log:
"<logs>/{country}/standardise_country_nuts_{subtype}.log",
"<logs>/{country}/standardise_country_nuts_{subtype}_{year}_{resolution}.log",
conda:
"../envs/shape.yaml"
params:
year=lambda wc: config["countries"][wc.country]["year"],
message:
"Standardise '{wildcards.country}_{wildcards.subtype}' NUTS dataset."
"Standardise '{wildcards.country}' NUTS dataset for '{wildcards.subtype}_{wildcards.resolution}_{wildcards.year}'."
script:
"../scripts/standardise_country_nuts.py"

Expand All @@ -92,6 +88,8 @@ rule download_marine_eez_area:
"<logs>/{country}/download_marine_eez_area.log",
conda:
"../envs/shape.yaml"
params:
extra_eez=lambda wc: config["countries"][wc.country].get("extra_eez", []),
message:
"Download and standardise '{wildcards.country}' EEZ dataset."
script:
Expand Down
36 changes: 29 additions & 7 deletions workflow/rules/build.smk
Original file line number Diff line number Diff line change
@@ -1,22 +1,44 @@
"""Rules used to construct the final dataset."""


rule build_country:
input:
land=lambda wc: f"<resources>/automatic/land/{get_country_filename(wc.country)}.parquet",
maritime="<resources>/automatic/eez/{country}.parquet",
output:
country="<resources>/automatic/country/{country}.parquet",
plot=report(
"<resources>/automatic/country/{country}.png",
caption="../report/build_country.rst",
category="Module Geo-Boundaries",
subcategory="Combined countries",
),
log:
"<logs>/{country}/build_country.log",
conda:
"../envs/shape.yaml"
params:
crs=config["crs"],
voronoi=internal["voronoi_eez"] | config.get("voronoi_eez", {}),
message:
"{wildcards.country}: build combined land and marine polygons."
script:
"../scripts/build_country.py"


rule build_combined_area:
input:
countries=[
f"<resources>/automatic/countries/{data['source']}_{country}_{data['subtype']}.parquet"
for country, data in config["countries"].items()
],
marine=[
f"<resources>/automatic/eez/{country}.parquet"
f"<resources>/automatic/country/{country}.parquet"
for country in config["countries"]
],
output:
combined="<shapes>",
plot=report(
"<results>/shapes.png",
caption="../report/results.rst",
category="Combined area",
caption="../report/build_combined_area.rst",
category="Module Geo-Boundaries",
subcategory="Combined area",
),
log:
"<logs>/build_combined_area.log",
Expand Down
14 changes: 10 additions & 4 deletions workflow/scripts/_schemas.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,10 @@
class ShapesSchema(pa.DataFrameModel):
"""Schema for geographic shapes."""

class Config:
coerce = True
strict = True

shape_id: Series[str] = pa.Field(unique=True)
"A unique identifier for this shape."
country_id: Series[str]
Expand Down Expand Up @@ -38,7 +42,9 @@ def fix_geometries(cls, df):
def check_geometries(cls, geom):
return (geom is not None) and (not geom.is_empty) and geom.is_valid

class Config:
# top-level schema options from your YAML
coerce = True
strict = True

class EEZSchema(ShapesSchema):
"""Schema for marine shapes."""

contested: Series[bool]
"""Specifies if the EEZ is contested."""
27 changes: 27 additions & 0 deletions workflow/scripts/_utils.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,27 @@
"""Reusable utility functions."""

import geopandas as gpd
from matplotlib import pyplot as plt
from matplotlib.axes import Axes
from matplotlib.figure import Figure
from pyproj import CRS


def check_crs_config(crs: dict[str, int | str]) -> dict[str, CRS]:
"""Check the crs configuration settings."""
result = {k: CRS.from_user_input(v) for k, v in crs.items()}
if not result["projected"].is_projected:
raise ValueError(f"CRS must be projected. Got {crs['projected']!r}.")
if not result["geographic"].is_geographic:
raise ValueError(f"CRS must be geographic. Got {crs['geographic']!r}.")
return result


def plot_shapes(shapes: gpd.GeoDataFrame, crs: str | int | CRS) -> tuple[Figure, Axes]:
"""Generate a nice figure of dataframes that fit the module's schema."""
gdf = shapes.copy().to_crs(crs)
fig, ax = plt.subplots(layout="constrained")
gdf.boundary.plot(ax=ax, color="black", lw=0.5)
ax = gdf.plot(ax=ax, column="shape_class", legend=False)
ax.set(xticks=[], yticks=[], xlabel="", ylabel="")
return fig, ax
Loading
Loading