diff --git a/.github/workflows/pr-ci.yml b/.github/workflows/pr-ci.yml index 728b9c5..8360155 100644 --- a/.github/workflows/pr-ci.yml +++ b/.github/workflows/pr-ci.yml @@ -33,7 +33,7 @@ jobs: uses: actions/upload-artifact@v4 with: name: integration-test-logs-${{ matrix.os }} - path: tests/integration/results/integration_test/logs + path: tests/integration/resources/geo_boundaries/logs if-no-files-found: ignore retention-days: 30 - name: Fail if integration or linting failed diff --git a/README.md b/README.md index 05d3844..832f249 100644 --- a/README.md +++ b/README.md @@ -30,9 +30,10 @@ Data processing steps: - Country area data: [GADM](https://gadm.org/), [Overture Maps](https://overturemaps.org/) and [NUTS](https://ec.europa.eu/eurostat/web/gisco/geodata/statistical-units/territorial-units-statistics) divisions are supported. - Exclusive Economic Zone (EEZ) data: [Marine regions](https://www.marineregions.org/). 2. Individual country files are downloaded and harmonised to fit a standardised schema. -Contested regions are removed at this stage. -3. Land is clipped using maritime Exclusive Economic Zones (EEZ). -4. Each polygon is clipped using its neighbours to minimise overlapping polygons. + - Contested regions are removed at this stage. + - Land is clipped using maritime Exclusive Economic Zones (EEZ). + - Optionally, a Voronoi algorithm is run to separate EEZ areas to fit subnational regions. +3. Each country file is combined and then clipped using its neighbours to minimise overlapping polygons. > [!TIP] > The `subtype` naming matches that of the source database. For example, NUTS uses 0, 1, 2 and 3 (NUTS0, NUTS1, NUTS2, etc.). diff --git a/config/config.yaml b/config/config.yaml index f4a45bd..0d67e1d 100644 --- a/config/config.yaml +++ b/config/config.yaml @@ -1,5 +1,8 @@ # A minimal example of how to configure this module overture_release: "latest" +voronoi_eez: + enabled: True + sample_spacing: 10000 # sample every 10 km crs: projected: "epsg:3857" geographic: "epsg:4326" @@ -26,3 +29,4 @@ countries: source: "nuts" resolution: 01M year: 2024 + extra_eez: 8364 diff --git a/config/europe_example.yaml b/config/europe_example.yaml index f91a7ea..10ce4d8 100644 --- a/config/europe_example.yaml +++ b/config/europe_example.yaml @@ -1,5 +1,8 @@ # A minimal example of how to configure this module overture_release: "latest" +voronoi_eez: + enabled: True + sample_spacing: 10000 crs: projected: "epsg:3035" geographic: "epsg:4326" diff --git a/config/usa_example.yaml b/config/usa_example.yaml new file mode 100644 index 0000000..1510184 --- /dev/null +++ b/config/usa_example.yaml @@ -0,0 +1,14 @@ +# Example of regional disaggregation of a large country +# USA has multiple marine zones, which can be appended as extras +overture_release: "latest" +voronoi_eez: + enabled: True + sample_spacing: 10000 # sample every 10 km +crs: + projected: "epsg:3857" + geographic: "epsg:4326" +countries: + "USA": + subtype: "1" + source: "gadm" + extra_eez: [8463, 8453] diff --git a/figures/rulegraph.png b/figures/rulegraph.png index 349d38f..98b1249 100644 Binary files a/figures/rulegraph.png and b/figures/rulegraph.png differ diff --git a/figures/shapes.png b/figures/shapes.png index abfb2bf..892a5d3 100644 Binary files a/figures/shapes.png and b/figures/shapes.png differ diff --git a/pixi.lock b/pixi.lock index 304222c..17edf2a 100644 --- a/pixi.lock +++ b/pixi.lock @@ -104,14 +104,14 @@ environments: - conda: https://conda.anaconda.org/conda-forge/noarch/jupyter_client-8.7.0-pyhcf101f3_0.conda - conda: https://conda.anaconda.org/conda-forge/noarch/jupyter_core-5.9.1-pyhc90fa1f_0.conda - conda: https://conda.anaconda.org/conda-forge/linux-64/keyutils-1.6.3-hb9d3cd8_0.conda - - conda: https://conda.anaconda.org/conda-forge/linux-64/krb5-1.21.3-h659f571_0.conda + - conda: https://conda.anaconda.org/conda-forge/linux-64/krb5-1.22.2-ha1258a1_0.conda - conda: https://conda.anaconda.org/conda-forge/linux-64/ld_impl_linux-64-2.45-default_hbd61a6d_105.conda - conda: https://conda.anaconda.org/conda-forge/linux-64/lerc-4.0.0-h0aef613_1.conda - conda: https://conda.anaconda.org/conda-forge/linux-64/libarchive-3.8.5-gpl_hc2c16d8_100.conda - conda: https://conda.anaconda.org/conda-forge/linux-64/libblas-3.11.0-5_h4a7cf45_openblas.conda - conda: https://conda.anaconda.org/conda-forge/linux-64/libcblas-3.11.0-5_h0358290_openblas.conda - - conda: https://conda.anaconda.org/conda-forge/linux-64/libcups-2.3.3-hb8b1518_5.conda - - conda: https://conda.anaconda.org/conda-forge/linux-64/libcurl-8.17.0-h4e3cde8_1.conda + - conda: https://conda.anaconda.org/conda-forge/linux-64/libcups-2.3.3-h7a8fb5f_6.conda + - conda: https://conda.anaconda.org/conda-forge/linux-64/libcurl-8.19.0-hcf29cc6_0.conda - conda: https://conda.anaconda.org/conda-forge/linux-64/libdeflate-1.25-h17f619e_0.conda - conda: https://conda.anaconda.org/conda-forge/linux-64/libdrm-2.4.125-hb03c661_1.conda - conda: https://conda.anaconda.org/conda-forge/linux-64/libedit-3.1.20250104-pl5321h7949ede_0.conda @@ -147,7 +147,7 @@ environments: - conda: https://conda.anaconda.org/conda-forge/linux-64/libpciaccess-0.18-hb9d3cd8_0.conda - conda: https://conda.anaconda.org/conda-forge/linux-64/libpng-1.6.53-h421ea60_0.conda - conda: https://conda.anaconda.org/conda-forge/linux-64/librsvg-2.60.0-h61e6d4b_0.conda - - conda: https://conda.anaconda.org/conda-forge/linux-64/libsodium-1.0.20-h4ab18f5_0.conda + - conda: https://conda.anaconda.org/conda-forge/linux-64/libsodium-1.0.21-h280c20c_3.conda - conda: https://conda.anaconda.org/conda-forge/linux-64/libsolv-0.7.35-h9463b59_0.conda - conda: https://conda.anaconda.org/conda-forge/linux-64/libsqlite-3.51.1-hf4e2dac_1.conda - conda: https://conda.anaconda.org/conda-forge/linux-64/libssh2-1.11.1-hcf80075_0.conda @@ -282,7 +282,7 @@ environments: - conda: https://conda.anaconda.org/conda-forge/linux-64/yaml-0.2.5-h280c20c_3.conda - conda: https://conda.anaconda.org/conda-forge/linux-64/yaml-cpp-0.8.0-h3f2d84a_0.conda - conda: https://conda.anaconda.org/conda-forge/noarch/yte-1.9.4-pyhd8ed1ab_0.conda - - conda: https://conda.anaconda.org/conda-forge/linux-64/zeromq-4.3.5-h387f397_9.conda + - conda: https://conda.anaconda.org/conda-forge/linux-64/zeromq-4.3.5-h41580af_10.conda - conda: https://conda.anaconda.org/conda-forge/noarch/zipp-3.23.0-pyhcf101f3_1.conda - conda: https://conda.anaconda.org/conda-forge/linux-64/zstandard-0.25.0-py312h5253ce2_1.conda - conda: https://conda.anaconda.org/conda-forge/linux-64/zstd-1.5.7-hb78ec9c_6.conda @@ -379,12 +379,12 @@ environments: - conda: https://conda.anaconda.org/conda-forge/noarch/jsonschema-specifications-2025.9.1-pyhcf101f3_0.conda - conda: https://conda.anaconda.org/conda-forge/noarch/jupyter_client-8.7.0-pyhcf101f3_0.conda - conda: https://conda.anaconda.org/conda-forge/noarch/jupyter_core-5.9.1-pyhc90fa1f_0.conda - - conda: https://conda.anaconda.org/conda-forge/osx-arm64/krb5-1.21.3-h237132a_0.conda + - conda: https://conda.anaconda.org/conda-forge/osx-arm64/krb5-1.22.2-h385eeb1_0.conda - conda: https://conda.anaconda.org/conda-forge/osx-arm64/lerc-4.0.0-hd64df32_1.conda - conda: https://conda.anaconda.org/conda-forge/osx-arm64/libarchive-3.8.5-gpl_h6fbacd7_100.conda - conda: https://conda.anaconda.org/conda-forge/osx-arm64/libblas-3.11.0-5_h51639a9_openblas.conda - conda: https://conda.anaconda.org/conda-forge/osx-arm64/libcblas-3.11.0-5_hb0561ab_openblas.conda - - conda: https://conda.anaconda.org/conda-forge/osx-arm64/libcurl-8.17.0-hdece5d2_1.conda + - conda: https://conda.anaconda.org/conda-forge/osx-arm64/libcurl-8.19.0-hd5a2499_0.conda - conda: https://conda.anaconda.org/conda-forge/osx-arm64/libcxx-21.1.8-hf598326_0.conda - conda: https://conda.anaconda.org/conda-forge/osx-arm64/libdeflate-1.25-hc11a715_0.conda - conda: https://conda.anaconda.org/conda-forge/osx-arm64/libedit-3.1.20250104-pl5321hafb1f1b_0.conda @@ -411,7 +411,7 @@ environments: - conda: https://conda.anaconda.org/conda-forge/osx-arm64/libopenblas-0.3.30-openmp_ha158390_3.conda - conda: https://conda.anaconda.org/conda-forge/osx-arm64/libpng-1.6.53-hfab5511_0.conda - conda: https://conda.anaconda.org/conda-forge/osx-arm64/librsvg-2.60.0-h5c55ec3_0.conda - - conda: https://conda.anaconda.org/conda-forge/osx-arm64/libsodium-1.0.20-h99b78c6_0.conda + - conda: https://conda.anaconda.org/conda-forge/osx-arm64/libsodium-1.0.21-h1a92334_3.conda - conda: https://conda.anaconda.org/conda-forge/osx-arm64/libsolv-0.7.35-h5f525b2_0.conda - conda: https://conda.anaconda.org/conda-forge/osx-arm64/libsqlite-3.51.1-h1b79a29_1.conda - conda: https://conda.anaconda.org/conda-forge/osx-arm64/libssh2-1.11.1-h1590b86_0.conda @@ -521,7 +521,7 @@ environments: - conda: https://conda.anaconda.org/conda-forge/osx-arm64/yaml-0.2.5-h925e9cb_3.conda - conda: https://conda.anaconda.org/conda-forge/osx-arm64/yaml-cpp-0.8.0-ha1acc90_0.conda - conda: https://conda.anaconda.org/conda-forge/noarch/yte-1.9.4-pyhd8ed1ab_0.conda - - conda: https://conda.anaconda.org/conda-forge/osx-arm64/zeromq-4.3.5-h888dc83_9.conda + - conda: https://conda.anaconda.org/conda-forge/osx-arm64/zeromq-4.3.5-h4818236_10.conda - conda: https://conda.anaconda.org/conda-forge/noarch/zipp-3.23.0-pyhcf101f3_1.conda - conda: https://conda.anaconda.org/conda-forge/osx-arm64/zstandard-0.25.0-py313h9734d34_1.conda - conda: https://conda.anaconda.org/conda-forge/osx-arm64/zstd-1.5.7-hbf9d68e_6.conda @@ -610,12 +610,12 @@ environments: - conda: https://conda.anaconda.org/conda-forge/noarch/jsonschema-specifications-2025.9.1-pyhcf101f3_0.conda - conda: https://conda.anaconda.org/conda-forge/noarch/jupyter_client-8.7.0-pyhcf101f3_0.conda - conda: https://conda.anaconda.org/conda-forge/noarch/jupyter_core-5.9.1-pyh6dadd2b_0.conda - - conda: https://conda.anaconda.org/conda-forge/win-64/krb5-1.21.3-hdf4eb48_0.conda + - conda: https://conda.anaconda.org/conda-forge/win-64/krb5-1.22.2-h0ea6238_0.conda - conda: https://conda.anaconda.org/conda-forge/win-64/lerc-4.0.0-h6470a55_1.conda - conda: https://conda.anaconda.org/conda-forge/win-64/libarchive-3.8.5-gpl_he24518a_100.conda - conda: https://conda.anaconda.org/conda-forge/win-64/libblas-3.11.0-5_hf2e6a31_mkl.conda - conda: https://conda.anaconda.org/conda-forge/win-64/libcblas-3.11.0-5_h2a3cdd5_mkl.conda - - conda: https://conda.anaconda.org/conda-forge/win-64/libcurl-8.17.0-h43ecb02_1.conda + - conda: https://conda.anaconda.org/conda-forge/win-64/libcurl-8.19.0-h8206538_0.conda - conda: https://conda.anaconda.org/conda-forge/win-64/libdeflate-1.25-h51727cc_0.conda - conda: https://conda.anaconda.org/conda-forge/win-64/libexpat-2.7.3-hac47afa_0.conda - conda: https://conda.anaconda.org/conda-forge/win-64/libffi-3.5.2-h52bdfb6_0.conda @@ -635,7 +635,7 @@ environments: - conda: https://conda.anaconda.org/conda-forge/win-64/libmambapy-2.4.0-py313haf3ff99_2.conda - conda: https://conda.anaconda.org/conda-forge/win-64/libmpdec-4.0.0-h2466b09_0.conda - conda: https://conda.anaconda.org/conda-forge/win-64/libpng-1.6.53-h7351971_0.conda - - conda: https://conda.anaconda.org/conda-forge/win-64/libsodium-1.0.20-hc70643c_0.conda + - conda: https://conda.anaconda.org/conda-forge/win-64/libsodium-1.0.21-h6a83c73_3.conda - conda: https://conda.anaconda.org/conda-forge/win-64/libsolv-0.7.35-h8883371_0.conda - conda: https://conda.anaconda.org/conda-forge/win-64/libsqlite-3.51.1-hf5d6505_1.conda - conda: https://conda.anaconda.org/conda-forge/win-64/libssh2-1.11.1-h9aa295b_0.conda @@ -761,7 +761,7 @@ environments: - conda: https://conda.anaconda.org/conda-forge/win-64/yaml-0.2.5-h6a83c73_3.conda - conda: https://conda.anaconda.org/conda-forge/win-64/yaml-cpp-0.8.0-he0c23c2_0.conda - conda: https://conda.anaconda.org/conda-forge/noarch/yte-1.9.4-pyhd8ed1ab_0.conda - - conda: https://conda.anaconda.org/conda-forge/win-64/zeromq-4.3.5-h5bddc39_9.conda + - conda: https://conda.anaconda.org/conda-forge/win-64/zeromq-4.3.5-h507cc87_10.conda - conda: https://conda.anaconda.org/conda-forge/noarch/zipp-3.23.0-pyhcf101f3_1.conda - conda: https://conda.anaconda.org/conda-forge/win-64/zstandard-0.25.0-py313h5fd188c_1.conda - conda: https://conda.anaconda.org/conda-forge/win-64/zstd-1.5.7-h534d264_6.conda @@ -5214,20 +5214,6 @@ packages: license_family: BSD size: 73548 timestamp: 1773067061126 -- conda: https://conda.anaconda.org/conda-forge/linux-64/krb5-1.21.3-h659f571_0.conda - sha256: 99df692f7a8a5c27cd14b5fb1374ee55e756631b9c3d659ed3ee60830249b238 - md5: 3f43953b7d3fb3aaa1d0d0723d91e368 - depends: - - keyutils >=1.6.1,<2.0a0 - - libedit >=3.1.20191231,<3.2.0a0 - - libedit >=3.1.20191231,<4.0a0 - - libgcc-ng >=12 - - libstdcxx-ng >=12 - - openssl >=3.3.1,<4.0a0 - license: MIT - license_family: MIT - size: 1370023 - timestamp: 1719463201255 - conda: https://conda.anaconda.org/conda-forge/linux-64/krb5-1.22.2-ha1258a1_0.conda sha256: 3e307628ca3527448dd1cb14ad7bb9d04d1d28c7d4c5f97ba196ae984571dd25 md5: fb53fb07ce46a575c5d004bbc96032c2 @@ -5243,19 +5229,6 @@ packages: license_family: MIT size: 1386730 timestamp: 1769769569681 -- conda: https://conda.anaconda.org/conda-forge/osx-arm64/krb5-1.21.3-h237132a_0.conda - sha256: 4442f957c3c77d69d9da3521268cad5d54c9033f1a73f99cde0a3658937b159b - md5: c6dc8a0fdec13a0565936655c33069a1 - depends: - - __osx >=11.0 - - libcxx >=16 - - libedit >=3.1.20191231,<3.2.0a0 - - libedit >=3.1.20191231,<4.0a0 - - openssl >=3.3.1,<4.0a0 - license: MIT - license_family: MIT - size: 1155530 - timestamp: 1719463474401 - conda: https://conda.anaconda.org/conda-forge/osx-arm64/krb5-1.22.2-h385eeb1_0.conda sha256: c0a0bf028fe7f3defcdcaa464e536cf1b202d07451e18ad83fdd169d15bef6ed md5: e446e1822f4da8e5080a9de93474184d @@ -5269,18 +5242,6 @@ packages: license_family: MIT size: 1160828 timestamp: 1769770119811 -- conda: https://conda.anaconda.org/conda-forge/win-64/krb5-1.21.3-hdf4eb48_0.conda - sha256: 18e8b3430d7d232dad132f574268f56b3eb1a19431d6d5de8c53c29e6c18fa81 - md5: 31aec030344e962fbd7dbbbbd68e60a9 - depends: - - openssl >=3.3.1,<4.0a0 - - ucrt >=10.0.20348.0 - - vc >=14.2,<15 - - vc14_runtime >=14.29.30139 - license: MIT - license_family: MIT - size: 712034 - timestamp: 1719463874284 - conda: https://conda.anaconda.org/conda-forge/win-64/krb5-1.22.2-h0ea6238_0.conda sha256: eb60f1ad8b597bcf95dee11bc11fe71a8325bc1204cf51d2bb1f2120ffd77761 md5: 4432f52dc0c8eb6a7a6abc00a037d93c @@ -6139,35 +6100,19 @@ packages: license_family: BSD size: 25694 timestamp: 1633684287072 -- conda: https://conda.anaconda.org/conda-forge/linux-64/libcups-2.3.3-hb8b1518_5.conda - sha256: cb83980c57e311783ee831832eb2c20ecb41e7dee6e86e8b70b8cef0e43eab55 - md5: d4a250da4737ee127fb1fa6452a9002e +- conda: https://conda.anaconda.org/conda-forge/linux-64/libcups-2.3.3-h7a8fb5f_6.conda + sha256: 205c4f19550f3647832ec44e35e6d93c8c206782bdd620c1d7cf66237580ff9c + md5: 49c553b47ff679a6a1e9fc80b9c5a2d4 depends: - __glibc >=2.17,<3.0.a0 - - krb5 >=1.21.3,<1.22.0a0 - - libgcc >=13 - - libstdcxx >=13 + - krb5 >=1.22.2,<1.23.0a0 + - libgcc >=14 + - libstdcxx >=14 - libzlib >=1.3.1,<2.0a0 license: Apache-2.0 license_family: Apache - size: 4523621 - timestamp: 1749905341688 -- conda: https://conda.anaconda.org/conda-forge/linux-64/libcurl-8.17.0-h4e3cde8_1.conda - sha256: 2d7be2fe0f58a0945692abee7bb909f8b19284b518d958747e5ff51d0655c303 - md5: 117499f93e892ea1e57fdca16c2e8351 - depends: - - __glibc >=2.17,<3.0.a0 - - krb5 >=1.21.3,<1.22.0a0 - - libgcc >=14 - - libnghttp2 >=1.67.0,<2.0a0 - - libssh2 >=1.11.1,<2.0a0 - - libzlib >=1.3.1,<2.0a0 - - openssl >=3.5.4,<4.0a0 - - zstd >=1.5.7,<1.6.0a0 - license: curl - license_family: MIT - size: 459417 - timestamp: 1765379027010 + size: 4518030 + timestamp: 1770902209173 - conda: https://conda.anaconda.org/conda-forge/linux-64/libcurl-8.19.0-hcf29cc6_0.conda sha256: a0390fd0536ebcd2244e243f5f00ab8e76ab62ed9aa214cd54470fe7496620f4 md5: d50608c443a30c341c24277d28290f76 @@ -6184,21 +6129,6 @@ packages: license_family: MIT size: 466704 timestamp: 1773218522665 -- conda: https://conda.anaconda.org/conda-forge/osx-arm64/libcurl-8.17.0-hdece5d2_1.conda - sha256: 1a8a958448610ca3f8facddfe261fdbb010e7029a1571b84052ec9770fc0a36e - md5: 1d6e791c6e264ae139d469ce011aab51 - depends: - - __osx >=11.0 - - krb5 >=1.21.3,<1.22.0a0 - - libnghttp2 >=1.67.0,<2.0a0 - - libssh2 >=1.11.1,<2.0a0 - - libzlib >=1.3.1,<2.0a0 - - openssl >=3.5.4,<4.0a0 - - zstd >=1.5.7,<1.6.0a0 - license: curl - license_family: MIT - size: 394471 - timestamp: 1765379821294 - conda: https://conda.anaconda.org/conda-forge/osx-arm64/libcurl-8.19.0-hd5a2499_0.conda sha256: c4d581b067fa60f9dc0e1c5f18b756760ff094a03139e6b206eb98d185ae2bb1 md5: 9fc7771fc8104abed9119113160be15a @@ -6214,20 +6144,6 @@ packages: license_family: MIT size: 399616 timestamp: 1773219210246 -- conda: https://conda.anaconda.org/conda-forge/win-64/libcurl-8.17.0-h43ecb02_1.conda - sha256: 5ebab5c980c09d31b35a25095b295124d89fd8bdffdb3487604218ad56512885 - md5: c02248f96a0073904bb085a437143895 - depends: - - krb5 >=1.21.3,<1.22.0a0 - - libssh2 >=1.11.1,<2.0a0 - - libzlib >=1.3.1,<2.0a0 - - ucrt >=10.0.20348.0 - - vc >=14.3,<15 - - vc14_runtime >=14.44.35208 - license: curl - license_family: MIT - size: 379189 - timestamp: 1765379273605 - conda: https://conda.anaconda.org/conda-forge/win-64/libcurl-8.19.0-h8206538_0.conda sha256: 6b2143ba5454b399dab4471e9e1d07352a2f33b569975e6b8aedc2d9bf51cbb0 md5: ed181e29a7ebf0f60b84b98d6140a340 @@ -8370,32 +8286,33 @@ packages: license_family: GPL size: 404359 timestamp: 1755880940428 -- conda: https://conda.anaconda.org/conda-forge/linux-64/libsodium-1.0.20-h4ab18f5_0.conda - sha256: 0105bd108f19ea8e6a78d2d994a6d4a8db16d19a41212070d2d1d48a63c34161 - md5: a587892d3c13b6621a6091be690dbca2 +- conda: https://conda.anaconda.org/conda-forge/linux-64/libsodium-1.0.21-h280c20c_3.conda + sha256: 64e5c80cbce4680a2d25179949739a6def695d72c40ca28f010711764e372d97 + md5: 7af961ef4aa2c1136e11dd43ded245ab depends: - - libgcc-ng >=12 + - libgcc >=14 + - __glibc >=2.17,<3.0.a0 license: ISC - size: 205978 - timestamp: 1716828628198 -- conda: https://conda.anaconda.org/conda-forge/osx-arm64/libsodium-1.0.20-h99b78c6_0.conda - sha256: fade8223e1e1004367d7101dd17261003b60aa576df6d7802191f8972f7470b1 - md5: a7ce36e284c5faaf93c220dfc39e3abd + size: 277661 + timestamp: 1772479381288 +- conda: https://conda.anaconda.org/conda-forge/osx-arm64/libsodium-1.0.21-h1a92334_3.conda + sha256: df603472ea1ebd8e7d4fb71e4360fe48d10b11c240df51c129de1da2ff9e8227 + md5: 7cc5247987e6d115134ebab15186bc13 depends: - __osx >=11.0 license: ISC - size: 164972 - timestamp: 1716828607917 -- conda: https://conda.anaconda.org/conda-forge/win-64/libsodium-1.0.20-hc70643c_0.conda - sha256: 7bcb3edccea30f711b6be9601e083ecf4f435b9407d70fc48fbcf9e5d69a0fc6 - md5: 198bb594f202b205c7d18b936fa4524f + size: 248039 + timestamp: 1772479570912 +- conda: https://conda.anaconda.org/conda-forge/win-64/libsodium-1.0.21-h6a83c73_3.conda + sha256: d915f4fa8ebbf237c7a6e511ed458f2cfdc7c76843a924740318a15d0dd33d6d + md5: da2aa614d16a795b3007b6f4a1318a81 depends: + - vc >=14.3,<15 + - vc14_runtime >=14.44.35208 - ucrt >=10.0.20348.0 - - vc >=14.2,<15 - - vc14_runtime >=14.29.30139 license: ISC - size: 202344 - timestamp: 1716828757533 + size: 276860 + timestamp: 1772479407566 - conda: https://conda.anaconda.org/conda-forge/linux-64/libsolv-0.7.35-h9463b59_0.conda sha256: 2fc2cdc8ea4dfd9277ae910fa3cfbf342d7890837a2002cf427fd306a869150b md5: 21769ce326958ec230cdcbd0f2ad97eb @@ -13947,48 +13864,44 @@ packages: license_family: MIT size: 16215 timestamp: 1764250734338 -- conda: https://conda.anaconda.org/conda-forge/linux-64/zeromq-4.3.5-h387f397_9.conda - sha256: 47cfe31255b91b4a6fa0e9dbaf26baa60ac97e033402dbc8b90ba5fee5ffe184 - md5: 8035e5b54c08429354d5d64027041cad +- conda: https://conda.anaconda.org/conda-forge/linux-64/zeromq-4.3.5-h41580af_10.conda + sha256: 325d370b28e2b9cc1f765c5b4cdb394c91a5d958fbd15da1a14607a28fee09f6 + md5: 755b096086851e1193f3b10347415d7c depends: - - libstdcxx >=14 - libgcc >=14 - __glibc >=2.17,<3.0.a0 - - libgcc >=14 - - libsodium >=1.0.20,<1.0.21.0a0 - - krb5 >=1.21.3,<1.22.0a0 + - libstdcxx >=14 + - krb5 >=1.22.2,<1.23.0a0 + - libsodium >=1.0.21,<1.0.22.0a0 license: MPL-2.0 license_family: MOZILLA - size: 310648 - timestamp: 1757370847287 -- conda: https://conda.anaconda.org/conda-forge/osx-arm64/zeromq-4.3.5-h888dc83_9.conda - sha256: b6f9c130646e5971f6cad708e1eee278f5c7eea3ca97ec2fdd36e7abb764a7b8 - md5: 26f39dfe38a2a65437c29d69906a0f68 + size: 311150 + timestamp: 1772476812121 +- conda: https://conda.anaconda.org/conda-forge/osx-arm64/zeromq-4.3.5-h4818236_10.conda + sha256: 2705360c72d4db8de34291493379ffd13b09fd594d0af20c9eefa8a3f060d868 + md5: e85dcd3bde2b10081cdcaeae15797506 depends: - __osx >=11.0 - libcxx >=19 - - libsodium >=1.0.20,<1.0.21.0a0 - - krb5 >=1.21.3,<1.22.0a0 + - krb5 >=1.22.2,<1.23.0a0 + - libsodium >=1.0.21,<1.0.22.0a0 license: MPL-2.0 license_family: MOZILLA - size: 244772 - timestamp: 1757371008525 -- conda: https://conda.anaconda.org/conda-forge/win-64/zeromq-4.3.5-h5bddc39_9.conda - sha256: 690cf749692c8ea556646d1a47b5824ad41b2f6dfd949e4cdb6c44a352fcb1aa - md5: a6c8f8ee856f7c3c1576e14b86cd8038 + size: 245246 + timestamp: 1772476886668 +- conda: https://conda.anaconda.org/conda-forge/win-64/zeromq-4.3.5-h507cc87_10.conda + sha256: b8568dfde46edf3455458912ea6ffb760e4456db8230a0cf34ecbc557d3c275f + md5: 1ab0237036bfb14e923d6107473b0021 depends: - vc >=14.3,<15 - vc14_runtime >=14.44.35208 - ucrt >=10.0.20348.0 - - vc >=14.3,<15 - - vc14_runtime >=14.44.35208 - - ucrt >=10.0.20348.0 - - libsodium >=1.0.20,<1.0.21.0a0 - - krb5 >=1.21.3,<1.22.0a0 + - libsodium >=1.0.21,<1.0.22.0a0 + - krb5 >=1.22.2,<1.23.0a0 license: MPL-2.0 license_family: MOZILLA - size: 265212 - timestamp: 1757370864284 + size: 265665 + timestamp: 1772476832995 - conda: https://conda.anaconda.org/conda-forge/noarch/zipp-3.23.0-pyhcf101f3_1.conda sha256: b4533f7d9efc976511a73ef7d4a2473406d7f4c750884be8e8620b0ce70f4dae md5: 30cd29cb87d819caead4d55184c1d115 diff --git a/tests/local_test.py b/tests/local_test.py index a25c04f..8c1fe56 100644 --- a/tests/local_test.py +++ b/tests/local_test.py @@ -19,7 +19,9 @@ def module_path(): return Path(__file__).parent.parent -@pytest.mark.parametrize("scenario", ["config", "china_example", "europe_example"]) +@pytest.mark.parametrize( + "scenario", ["config", "china_example", "europe_example", "usa_example"] +) def test_config_example(module_path, scenario): """Example files should result in a successful run.""" result_file = "results/shapes.parquet" diff --git a/workflow/Snakefile b/workflow/Snakefile index dfb4785..2493883 100644 --- a/workflow/Snakefile +++ b/workflow/Snakefile @@ -26,12 +26,14 @@ with open(workflow.source_path("internal/settings.yaml"), "r") as f: # Add all your includes here. +include: "rules/_utils.smk" include: "rules/automatic.smk" include: "rules/build.smk" # Add all files to be delivered alongside the workflow here workflow.source_path("scripts/_schemas.py") +workflow.source_path("scripts/_utils.py") rule all: diff --git a/workflow/internal/config.schema.yaml b/workflow/internal/config.schema.yaml index d9f90fc..c317e60 100644 --- a/workflow/internal/config.schema.yaml +++ b/workflow/internal/config.schema.yaml @@ -12,13 +12,6 @@ $defs: description: "Schema for user-provided configuration files." type: object properties: - overture_release: - description: | - Overture data release to use. Two options are possible: - - A specific release string in the form of 'yyyy-mm-dd.x'. - - A request for the latest release in the form of 'latest'. - type: string - pattern: "^(?:latest|(?:\\d{4})-(?:0[1-9]|1[0-2])-(?:0[1-9]|[12]\\d|3[01])\\.\\d+)$" crs: description: CRS codes (i.e., 'epsg:3035', 'EPSG:4326', 8857). type: object @@ -60,6 +53,15 @@ properties: year: description: "Year for NUTS source (required if source is 'nuts')." type: integer + extra_eez: + description: > + Optional. Additional EEZ zones to append to this country. + default: [] + anyOf: + - type: integer + - type: array + items: + type: integer required: - subtype - source @@ -80,6 +82,33 @@ properties: - year minProperties: 1 additionalProperties: false + overture_release: + description: | + Overture data release to use. Two options are possible: + - A specific release string in the form of 'yyyy-mm-dd.x'. + - A request for the latest release in the form of 'latest'. + type: string + pattern: "^(?:latest|(?:\\d{4})-(?:0[1-9]|1[0-2])-(?:0[1-9]|[12]\\d|3[01])\\.\\d+)$" + voronoi_eez: + description: > + Optional configuration for the Voronoi EEZ splitting algorithm. + type: object + required: + - enabled + - sample_spacing + properties: + enabled: + description: Activation of the splitting algorithm. + type: boolean + default: false + sample_spacing: + description: > + Distance between sample points for shorelines. + Higher for finer splits at the cost of slower processing. + Matches the unit of the projected CRS. + type: integer + default: 10000 + minimum: 1 required: - countries - crs diff --git a/workflow/internal/settings.yaml b/workflow/internal/settings.yaml index da3cf3c..44f62ef 100644 --- a/workflow/internal/settings.yaml +++ b/workflow/internal/settings.yaml @@ -1,3 +1,6 @@ # Module settings that users cannot modify. nuts: epsg: 3035 # will be converted later to the user-requested epsg +voronoi_eez: # defaults for Voronoi EEZ splitting + enabled: false + sample_spacing: 10000 diff --git a/workflow/report/results.rst b/workflow/report/build_combined_area.rst similarity index 100% rename from workflow/report/results.rst rename to workflow/report/build_combined_area.rst diff --git a/workflow/report/build_country.rst b/workflow/report/build_country.rst new file mode 100644 index 0000000..6fbf999 --- /dev/null +++ b/workflow/report/build_country.rst @@ -0,0 +1,3 @@ +Built combined file for {{ snakemake.wildcards.country }}. +- Contested EEZ regions are removed. +- Optionally, EEZ regions are broken down using a Voronoi algorithm. diff --git a/workflow/rules/_utils.smk b/workflow/rules/_utils.smk new file mode 100644 index 0000000..417517d --- /dev/null +++ b/workflow/rules/_utils.smk @@ -0,0 +1,15 @@ +"""Utility functions for snakemake rule handling.""" + + +def get_country_filename(country: str): + """Build unique file names to avoid overwriting source files.""" + source = config["countries"][country]["source"] + subtype = config["countries"][country]["subtype"] + + filename = f"{source}_{country}_{subtype}" + if source == "nuts": + resolution = config["countries"][country]["resolution"] + year = config["countries"][country]["year"] + + filename += f"_{year}_{resolution}" + return filename diff --git a/workflow/rules/automatic.smk b/workflow/rules/automatic.smk index c4045df..eebf8ac 100644 --- a/workflow/rules/automatic.smk +++ b/workflow/rules/automatic.smk @@ -6,7 +6,7 @@ Small transformations might be performed to make the data easier to work with. rule download_country_overture: output: - path="/automatic/countries/overture_{country}_{subtype}.parquet", + path="/automatic/land/overture_{country}_{subtype}.parquet", log: "/{country}/download_country_overture_{subtype}.log", conda: @@ -21,9 +21,7 @@ rule download_country_overture: rule download_country_gadm: output: - path=temp( - "/automatic/countries/raw_gadm_{country}_{subtype}.parquet" - ), + path=temp("/automatic/land/raw_gadm_{country}_{subtype}.parquet"), log: "/{country}/download_country_gadm_{subtype}.log", conda: @@ -36,9 +34,9 @@ rule download_country_gadm: rule standardise_country_gadm: input: - raw="/automatic/countries/raw_gadm_{country}_{subtype}.parquet", + raw=rules.download_country_gadm.output.path, output: - standardised="/automatic/countries/gadm_{country}_{subtype}.parquet", + standardised="/automatic/land/gadm_{country}_{subtype}.parquet", log: "/{country}/standardise_country_gadm_{subtype}.log", conda: @@ -54,32 +52,30 @@ rule standardise_country_gadm: rule download_nuts: output: - path="/automatic/nuts/nuts_{resolution}_{year}_{level}.parquet", + path="/automatic/nuts/nuts_{subtype}_{resolution}_{year}.parquet", log: - "/download_nuts_{resolution}_{year}_{level}.log", + "/download_nuts_{subtype}_{resolution}_{year}.log", conda: "../envs/shape.yaml" params: epsg=internal["nuts"]["epsg"], message: - "Download '{wildcards.resolution}_{wildcards.year}_{wildcards.level}' from NUTS." + "Download '{wildcards.subtype}_{wildcards.resolution}_{wildcards.year}' from NUTS." script: "../scripts/download_nuts.py" rule standardise_country_nuts: input: - raw=lambda wc: f"/automatic/nuts/nuts_{config["countries"][wc.country]["resolution"]}_{config["countries"][wc.country]["year"]}_{wc.subtype}.parquet", + raw=rules.download_nuts.output.path, output: - path="/automatic/countries/nuts_{country}_{subtype}.parquet", + path="/automatic/land/nuts_{country}_{subtype}_{year}_{resolution}.parquet", log: - "/{country}/standardise_country_nuts_{subtype}.log", + "/{country}/standardise_country_nuts_{subtype}_{year}_{resolution}.log", conda: "../envs/shape.yaml" - params: - year=lambda wc: config["countries"][wc.country]["year"], message: - "Standardise '{wildcards.country}_{wildcards.subtype}' NUTS dataset." + "Standardise '{wildcards.country}' NUTS dataset for '{wildcards.subtype}_{wildcards.resolution}_{wildcards.year}'." script: "../scripts/standardise_country_nuts.py" @@ -92,6 +88,8 @@ rule download_marine_eez_area: "/{country}/download_marine_eez_area.log", conda: "../envs/shape.yaml" + params: + extra_eez=lambda wc: config["countries"][wc.country].get("extra_eez", []), message: "Download and standardise '{wildcards.country}' EEZ dataset." script: diff --git a/workflow/rules/build.smk b/workflow/rules/build.smk index c5dac70..c2b7c55 100644 --- a/workflow/rules/build.smk +++ b/workflow/rules/build.smk @@ -1,22 +1,44 @@ """Rules used to construct the final dataset.""" +rule build_country: + input: + land=lambda wc: f"/automatic/land/{get_country_filename(wc.country)}.parquet", + maritime="/automatic/eez/{country}.parquet", + output: + country="/automatic/country/{country}.parquet", + plot=report( + "/automatic/country/{country}.png", + caption="../report/build_country.rst", + category="Module Geo-Boundaries", + subcategory="Combined countries", + ), + log: + "/{country}/build_country.log", + conda: + "../envs/shape.yaml" + params: + crs=config["crs"], + voronoi=internal["voronoi_eez"] | config.get("voronoi_eez", {}), + message: + "{wildcards.country}: build combined land and marine polygons." + script: + "../scripts/build_country.py" + + rule build_combined_area: input: countries=[ - f"/automatic/countries/{data['source']}_{country}_{data['subtype']}.parquet" - for country, data in config["countries"].items() - ], - marine=[ - f"/automatic/eez/{country}.parquet" + f"/automatic/country/{country}.parquet" for country in config["countries"] ], output: combined="", plot=report( "/shapes.png", - caption="../report/results.rst", - category="Combined area", + caption="../report/build_combined_area.rst", + category="Module Geo-Boundaries", + subcategory="Combined area", ), log: "/build_combined_area.log", diff --git a/workflow/scripts/_schemas.py b/workflow/scripts/_schemas.py index cc9d7c5..38b69b6 100644 --- a/workflow/scripts/_schemas.py +++ b/workflow/scripts/_schemas.py @@ -7,6 +7,10 @@ class ShapesSchema(pa.DataFrameModel): """Schema for geographic shapes.""" + class Config: + coerce = True + strict = True + shape_id: Series[str] = pa.Field(unique=True) "A unique identifier for this shape." country_id: Series[str] @@ -38,7 +42,9 @@ def fix_geometries(cls, df): def check_geometries(cls, geom): return (geom is not None) and (not geom.is_empty) and geom.is_valid - class Config: - # top-level schema options from your YAML - coerce = True - strict = True + +class EEZSchema(ShapesSchema): + """Schema for marine shapes.""" + + contested: Series[bool] + """Specifies if the EEZ is contested.""" diff --git a/workflow/scripts/_utils.py b/workflow/scripts/_utils.py new file mode 100644 index 0000000..6868249 --- /dev/null +++ b/workflow/scripts/_utils.py @@ -0,0 +1,27 @@ +"""Reusable utility functions.""" + +import geopandas as gpd +from matplotlib import pyplot as plt +from matplotlib.axes import Axes +from matplotlib.figure import Figure +from pyproj import CRS + + +def check_crs_config(crs: dict[str, int | str]) -> dict[str, CRS]: + """Check the crs configuration settings.""" + result = {k: CRS.from_user_input(v) for k, v in crs.items()} + if not result["projected"].is_projected: + raise ValueError(f"CRS must be projected. Got {crs['projected']!r}.") + if not result["geographic"].is_geographic: + raise ValueError(f"CRS must be geographic. Got {crs['geographic']!r}.") + return result + + +def plot_shapes(shapes: gpd.GeoDataFrame, crs: str | int | CRS) -> tuple[Figure, Axes]: + """Generate a nice figure of dataframes that fit the module's schema.""" + gdf = shapes.copy().to_crs(crs) + fig, ax = plt.subplots(layout="constrained") + gdf.boundary.plot(ax=ax, color="black", lw=0.5) + ax = gdf.plot(ax=ax, column="shape_class", legend=False) + ax.set(xticks=[], yticks=[], xlabel="", ylabel="") + return fig, ax diff --git a/workflow/scripts/build_combined_area.py b/workflow/scripts/build_combined_area.py index 05a7a50..c3b8d51 100644 --- a/workflow/scripts/build_combined_area.py +++ b/workflow/scripts/build_combined_area.py @@ -1,151 +1,89 @@ """Combine country shapes and marine regions into one harmonized dataset.""" import sys +from collections import defaultdict from typing import TYPE_CHECKING, Any import _schemas +import _utils import geopandas as gpd -import matplotlib.pyplot as plt import pandas as pd +import shapely from pyproj import CRS if TYPE_CHECKING: snakemake: Any -def plot_combined_area(combined_file: str, path: str, crs: str): - """Generate a nice figure of the resulting file.""" - gdf = gpd.read_parquet(combined_file).to_crs(crs) - fig, ax = plt.subplots(figsize=(7, 7), layout="constrained") - ax = gdf.plot(ax=ax, column="shape_class", legend=False) - ax.set(xticks=[], yticks=[], xlabel="", ylabel="") - ax.set_title("Combined regions") - fig.savefig(path, dpi=200, bbox_inches="tight") +def remove_overlaps(gdf: gpd.GeoDataFrame, crs: dict[str, CRS]) -> gpd.GeoDataFrame: + """Remove overlaps deterministically by keeping earlier rows and clipping later rows. + The first row wins. Any area shared with an earlier kept geometry is removed + from the later geometry. + """ + projected = gdf.to_crs(crs["projected"]).copy() -def _remove_overlaps(gdf: gpd.GeoDataFrame, projected_crs: str) -> gpd.GeoDataFrame: - """Remove overlaps between regional shapes and clip shapes using neighbors. + left_idx, right_idx = projected.sindex.query( + projected.geometry, predicate="intersects" + ) - Args: - gdf (gpd.GeoDataFrame): dataframe with regional shapes. - projected_crs (str): CRS to use. Must be projected. + overlaps_by_right: dict[int, list[int]] = defaultdict(list) + for left, right in zip(left_idx, right_idx, strict=True): + if left >= right: + continue + overlaps_by_right[right].append(left) - Returns: - gpd.GeoDataFrame: dataframe in the projected CRS. - """ - # Buffering requires a projected CRS - assert CRS(projected_crs).is_projected - projected = gdf.to_crs(projected_crs) + geoms = projected.geometry.to_list() + for right in range(len(geoms)): + geom = geoms[right] + if geom is None or geom.is_empty: + continue - # A buffer of 0 resolves floating point mismatches that occur during geospatial operations - buffered = projected.buffer(0) + cutters = [ + geoms[left] + for left in overlaps_by_right.get(right, []) + if geoms[left] is not None + and not geoms[left].is_empty + and geom.intersects(geoms[left]) + ] + if not cutters: + continue - for index, row in projected.iterrows(): - minx, miny, maxx, maxy = row.geometry.bounds + geom = geom.difference(shapely.unary_union(cutters)) + if geom is not None and not geom.is_empty and not geom.is_valid: + geom = shapely.make_valid(geom) - # Find neighbouring regions and only use those for the calculation - neighbours = buffered.cx[minx:maxx, miny:maxy] - neighbours = neighbours[neighbours.index != index] - new_geometry = row["geometry"].difference(neighbours.union_all()) + geoms[right] = geom - if not new_geometry.is_valid: - new_geometry = new_geometry.buffer(0) - assert new_geometry.is_valid, "Invalid bowties could not be corrected." - assert new_geometry is not None - projected.loc[index, "geometry"] = new_geometry - return projected + projected["geometry"] = geoms + projected = projected.loc[projected.geometry.notna() & ~projected.geometry.is_empty] + result = projected.to_crs(crs["geographic"]) + result.geometry = result.geometry.buffer(0) -def _combine_shapes( - country_files: list[str], eez: gpd.GeoDataFrame, geographic_crs: str -) -> gpd.GeoDataFrame: - """Merge all countries and maritime boundaries into one file. + return result - Args: - country_files (list[str]): List of standardised country files to combine. - eez (gpd.GeoDataFrame): Standardised EEZ shapes. - geographic_crs (str): CRS to use. Must be geographic. - Raises: - ValueError: Country file is not a unique country. +def main() -> None: + """Main snakemake process.""" + crs = _utils.check_crs_config(snakemake.params.crs) - Returns: - gpd.GeoDataFrame: Combined dataframe using the given CRS. - """ - assert CRS(geographic_crs).is_geographic - - frames = [] - if not eez.empty: - eez = eez.to_crs(geographic_crs) - # No contested or ambiguous EEZs - eez = eez[eez["contested"].eq(False)].drop(columns="contested") - - # Combine land and marine boundary for each country - for file in country_files: - # Fetch the country file and ensure crs is compatible - country_land = gpd.read_parquet(file).to_crs(geographic_crs) - country_id = country_land["country_id"].unique() - if len(country_id) != 1: - raise ValueError( - f"Country file {file} should be a single country. Found {country_id}." - ) - country_id = country_id[0] - - country_marine = eez[eez["country_id"] == country_id] - if not country_marine.empty: - marine_geom = country_marine.geometry.union_all() - # clip land with maritime boundaries - country_land = country_land.copy() - country_land.geometry = country_land.geometry.difference(marine_geom) - - # only add uncontested maritime boundaries - frames.extend([country_land, country_marine]) - else: - frames.append(country_land) - - combined = gpd.GeoDataFrame( - pd.concat(frames, ignore_index=True), crs=geographic_crs + country_list = [ + gpd.read_parquet(i).to_crs(crs["geographic"]) for i in snakemake.input.countries + ] + combined = _schemas.ShapesSchema.validate( + pd.concat(country_list, ignore_index=True) ) - return combined - - -def _combine_eez(eez_files: list[str]) -> gpd.GeoDataFrame: - """Merge all eez files into a big dataset to be processed further.""" - frames: list[gpd.GeoDataFrame] = [] - for path in eez_files: - gdf = gpd.read_parquet(path) - frames.append(gdf) - return pd.concat(frames, axis="rows", ignore_index=True) - - -def build_combined_area( - country_files: list[str], - marine_files: list[str], - crs: dict[str, str], - combined_file: str, -) -> None: - """Create a single file with the requested geographical scope.""" - eezs = _combine_eez(marine_files) - combined = _combine_shapes(country_files, eezs, crs["geographic"]) - combined = _remove_overlaps(combined, crs["projected"]) - - combined = combined.to_crs(crs["geographic"]) - # A buffer of 0 resolves floating point mismatches that occur during CRS conversion - combined["geometry"] = combined.buffer(0) + + combined = remove_overlaps(combined, crs) combined = _schemas.ShapesSchema.validate(combined) - combined.reset_index(drop=True).to_parquet(combined_file) + combined.to_parquet(snakemake.output.combined) + + fig, _ = _utils.plot_shapes(combined, crs["projected"]) + fig.suptitle("Combined shapes") + fig.savefig(snakemake.output.plot, dpi=200, bbox_inches="tight") if __name__ == "__main__": sys.stderr = open(snakemake.log[0], "w") - build_combined_area( - country_files=snakemake.input.countries, - marine_files=snakemake.input.marine, - crs=snakemake.params.crs, - combined_file=snakemake.output.combined, - ) - plot_combined_area( - combined_file=snakemake.output.combined, - path=snakemake.output.plot, - crs=snakemake.params.crs["projected"], - ) + main() diff --git a/workflow/scripts/build_country.py b/workflow/scripts/build_country.py new file mode 100644 index 0000000..1198197 --- /dev/null +++ b/workflow/scripts/build_country.py @@ -0,0 +1,229 @@ +"""Combine country shapes and marine regions into one harmonized dataset.""" + +import math +import sys +from collections.abc import Iterator +from typing import TYPE_CHECKING, Any + +import _schemas +import _utils +import geopandas as gpd +import pandas as pd +from pyproj import CRS +from shapely import voronoi_polygons +from shapely.geometry import ( + GeometryCollection, + LineString, + MultiLineString, + MultiPoint, + Point, +) +from shapely.geometry.base import BaseGeometry + +if TYPE_CHECKING: + snakemake: Any + +ROUND_DECIMALS: int = 3 + + +def _iter_lines(geom: BaseGeometry) -> Iterator[LineString]: + if geom.is_empty: + return + if isinstance(geom, LineString): + yield geom + return + if isinstance(geom, MultiLineString): + yield from geom.geoms + return + if isinstance(geom, GeometryCollection): + for part in geom.geoms: + yield from _iter_lines(part) + + +def _sample_line_midpoints(line: LineString, spacing: float) -> list[Point]: + if line.length == 0: + return [] + n = max(1, math.ceil(line.length / spacing)) + return [line.interpolate((i + 0.5) * line.length / n) for i in range(n)] + + +def _split_one_maritime( + maritime_row, + land: gpd.GeoDataFrame, + *, + crs: int | str, + sample_spacing: float, + coverage_area_tolerance: float, +) -> gpd.GeoDataFrame: + if land.empty: + raise ValueError( + f"No land shapes found for country_id={maritime_row.country_id!r}." + ) + + # Sample through the shoreline + boundary = maritime_row.geometry.boundary + candidate_land = land.iloc[land.sindex.query(boundary, predicate="intersects")] + + seeds: list[dict[str, object]] = [] + for land_row in candidate_land.itertuples(index=False): + shoreline = land_row.geometry.boundary.intersection(boundary) + + for line in _iter_lines(shoreline): + for point in _sample_line_midpoints(line, sample_spacing): + seeds.append( + {"assigned_shape_id": land_row.shape_id, "geometry": point} + ) + if not seeds: + raise ValueError( + f"No touching shoreline regions found for maritime shape {maritime_row.shape_id!r}." + ) + seed_gdf = gpd.GeoDataFrame(seeds, geometry="geometry", crs=crs) + + # Drop seeds too close to each other + xy = seed_gdf.geometry.map( + lambda p: (round(p.x, ROUND_DECIMALS), round(p.y, ROUND_DECIMALS)) + ) + seed_gdf = seed_gdf.loc[~xy.duplicated()].reset_index(drop=True) + + base_row = maritime_row._asdict() + assigned_ids = seed_gdf["assigned_shape_id"].unique() + if len(assigned_ids) == 1: + # Nothing to do + pieces = gpd.GeoDataFrame([base_row.copy()], geometry="geometry", crs=crs) + else: + # Run Voronoi + cells = gpd.GeoDataFrame( + geometry=list( + voronoi_polygons( + MultiPoint(list(seed_gdf.geometry)), + extend_to=maritime_row.geometry.envelope.buffer(sample_spacing * 2), + ).geoms + ), + crs=crs, + ) + # Identify where each voronoi cell belongs + cells = gpd.sjoin( + cells, + seed_gdf[["assigned_shape_id", "geometry"]], + how="left", + predicate="contains", + ).drop_duplicates() + if cells["assigned_shape_id"].isna().any(): + raise RuntimeError( + f"Could not assign every Voronoi cell for maritime shape {maritime_row.shape_id!r}." + ) + + # Keep only cells on the maritime area and test for completeness + cells["geometry"] = cells.geometry.intersection(maritime_row.geometry) + cells = cells.loc[~cells.geometry.is_empty & (cells.geometry.area > 0)] + pieces = ( + cells[["assigned_shape_id", "geometry"]] + .dissolve(by="assigned_shape_id", as_index=False) + .rename(columns={"assigned_shape_id": "_assigned_shape_id"}) + ) + uncovered_area = maritime_row.geometry.difference( + pieces.geometry.union_all() + ).area + if uncovered_area > coverage_area_tolerance: + raise RuntimeError( + f"Voronoi split did not fully cover maritime shape {maritime_row.shape_id!r}. " + f"Uncovered area: {uncovered_area}" + ) + + # Re-apply row values, keeping the new geometry and a new id + piece_rows = [] + for assigned_id, geometry in zip( + pieces["_assigned_shape_id"], pieces.geometry, strict=True + ): + piece = base_row.copy() + piece["shape_id"] = f"{maritime_row.shape_id}_to_{assigned_id}" + piece["geometry"] = geometry + piece_rows.append(piece) + pieces = gpd.GeoDataFrame(piece_rows, geometry="geometry", crs=crs) + + return pieces + + +def split_maritime_by_shoreline_voronoi( + shapes: gpd.GeoDataFrame, + *, + crs: dict[str, CRS], + sample_spacing: float = 10_000.0, + coverage_area_tolerance: float = 1.0, +) -> gpd.GeoDataFrame: + """Split EEZ zones to fit shoreline land regions.""" + shapes = shapes.copy().to_crs(crs["projected"]) + + land = shapes.loc[shapes["shape_class"].eq("land")] + maritime = shapes.loc[shapes["shape_class"].eq("maritime")] + + if maritime.empty: + return ValueError("Requested voronoi without maritime shapes.") + + split_maritime = [ + _split_one_maritime( + maritime_row, + land.loc[land["country_id"].eq(maritime_row.country_id)], + sample_spacing=sample_spacing, + coverage_area_tolerance=coverage_area_tolerance, + crs=shapes.crs, + ) + for maritime_row in maritime.itertuples(index=False) + ] + + result = pd.concat([land, *split_maritime], ignore_index=True) + result = result.to_crs(crs["geographic"]) + result.geometry = result.geometry.buffer(0) + return result + + +def combine_shapes( + land: gpd.GeoDataFrame, maritime: gpd.GeoDataFrame, geo_crs: CRS +) -> gpd.GeoDataFrame: + """Combine land and marine shapes.""" + combined = land.copy().to_crs(geo_crs) + if not maritime.empty: + # remove contested zones and clip to give priority to maritime polygons + eez = maritime.copy().to_crs(geo_crs) + eez = eez[eez["contested"].eq(False)].drop(columns="contested") + combined.geometry = combined.geometry.difference(eez.geometry.union_all()) + combined = pd.concat([combined, eez], ignore_index=True) + + # Resolve floating point mismatches that occur during CRS conversion + combined.geometry = combined.geometry.buffer(0) + return combined + + +def main() -> None: + """Main snakemake process.""" + crs = _utils.check_crs_config(snakemake.params.crs) + + country = snakemake.wildcards.country + land = _schemas.ShapesSchema.validate(gpd.read_parquet(snakemake.input.land)) + maritime = _schemas.EEZSchema.validate(gpd.read_parquet(snakemake.input.maritime)) + + country_ids = set(land["country_id"]) | set(maritime["country_id"]) + if set(country_ids) - set([country]): + raise ValueError( + f"Country processing mismatch for {country!r}. Found {country_ids!r}." + ) + + shapes = combine_shapes(land, maritime, crs["geographic"]) + shapes = _schemas.ShapesSchema.validate(shapes) + + voronoi = snakemake.params.voronoi + if not maritime.empty and voronoi["enabled"]: + shapes = split_maritime_by_shoreline_voronoi( + shapes, crs=crs, sample_spacing=voronoi["sample_spacing"] + ) + shapes = _schemas.ShapesSchema.validate(shapes) + shapes.to_parquet(snakemake.output.country) + + fig, _ = _utils.plot_shapes(shapes, crs["projected"]) + fig.suptitle(f"{country} shapes") + fig.savefig(snakemake.output.plot, dpi=200, bbox_inches="tight") + + +if __name__ == "__main__": + sys.stderr = open(snakemake.log[0], "w") + main() diff --git a/workflow/scripts/download_marine_eez_area.py b/workflow/scripts/download_marine_eez_area.py index e3abedd..1924eaf 100644 --- a/workflow/scripts/download_marine_eez_area.py +++ b/workflow/scripts/download_marine_eez_area.py @@ -8,6 +8,7 @@ import _schemas import geopandas as gpd +import pandas as pd import requests from matplotlib import pyplot as plt @@ -36,12 +37,10 @@ def _raise_for_geoserver_exception(response: requests.Response) -> None: ) -def get_country_eez_by_iso3( - iso3: str, id_col: str, *, srs_name: str = "EPSG:4326", timeout: int = 180 +def get_eez_by_cql( + cql_filter: str, *, srs_name: str = "EPSG:4326", timeout: int = 180 ) -> gpd.GeoDataFrame | None: - """Fetch EEZ polygons by ISO3 code using WFS + CQL.""" - cql_filter = f"{id_col}='{iso3}'" - + """Fetch EEZ polygons using a raw WFS CQL filter.""" params = { "service": "WFS", "version": WFS_VERSION, @@ -71,18 +70,40 @@ def get_country_eez_by_iso3( if data["features"]: # Let geopandas build the frame, CRS will match request result = gpd.GeoDataFrame.from_features(data["features"], crs=srs_name) + if result.empty: + result = None return result -def transform_to_clio(gdf: gpd.GeoDataFrame, id_col: str) -> gpd.GeoDataFrame: - """Transform the MarineRegions dataset for better clio compatibility. +def get_country_eez_by_iso3( + iso3: str, + *, + id_col: str = "iso_ter1", + srs_name: str = "EPSG:4326", + timeout: int = 180, +) -> gpd.GeoDataFrame | None: + """Fetch EEZ polygons by ISO3 code using WFS + CQL.""" + return get_eez_by_cql(f"{id_col}='{iso3}'", srs_name=srs_name, timeout=timeout) + + +def get_country_eez_by_mrgid( + mrgid: int, *, srs_name: str = "EPSG:4326", timeout: int = 180 +) -> gpd.GeoDataFrame | None: + """Fetch EEZ polygons by Marine Regions ID.""" + return get_eez_by_cql(f"mrgid={mrgid}", srs_name=srs_name, timeout=timeout) + + +def transform_to_schema( + gdf: gpd.GeoDataFrame | None, country_id: str +) -> gpd.GeoDataFrame: + """Transform the MarineRegions dataset for better compatibility. - Removes geopolitically contested areas - Adds common naming conventions. Args: gdf (gpd.GeoDataFrame): A marine regions geo-dataframe. - id_col (str): Name of the column used for ID fetching. + country_id (str): ISO3 country id from the workflow. Returns: gpd.GeoDataFrame: standardised dataframe. @@ -90,33 +111,26 @@ def transform_to_clio(gdf: gpd.GeoDataFrame, id_col: str) -> gpd.GeoDataFrame: if gdf is not None: standardised = gpd.GeoDataFrame( { - "shape_id": gdf.apply( - lambda x: "_".join( - [str(x[id_col]), "marineregions", str(x["mrgid"])] - ), - axis="columns", + "shape_id": gdf["mrgid"].apply( + lambda mrgid: "_".join([country_id, "marineregions", str(mrgid)]) ), - "country_id": gdf[id_col], + "country_id": country_id, "shape_class": "maritime", "geometry": gdf["geometry"], "parent": "marineregions", "parent_subtype": "eez", "parent_id": gdf["mrgid"], "parent_name": gdf["geoname"], + "contested": gdf["pol_type"].apply( + lambda x: x in ["Joint regime", "Overlapping claim"] + ), } ) # Remove cases without territorial ISO code standardised = standardised[~standardised["country_id"].isna()] - # Check that the base columns fit the schema - standardised = _schemas.ShapesSchema.validate(standardised) - # Extra: identify contested areas and potential attribution conflicts - standardised["contested"] = gdf["pol_type"].apply( - lambda x: True if x in ["Joint regime", "Overlapping claim"] else False - ) + standardised = _schemas.EEZSchema.validate(standardised) else: - standardised = gpd.GeoDataFrame( - columns=_schemas.ShapesSchema.to_schema().columns - ) + standardised = gpd.GeoDataFrame(columns=_schemas.EEZSchema.to_schema().columns) return standardised @@ -125,9 +139,17 @@ def plot(gdf: gpd.GeoDataFrame, country: str): fig, ax = plt.subplots(layout="constrained") if gdf.empty: ax.set_title(f"{country!r} has no EEZ") - ax.set_axis_off() else: - gdf.plot("contested", ax=ax, legend=True, legend_kwds={"title": "contested"}) + gdf.plot( + "contested", + ax=ax, + legend=True, + legend_kwds={ + "title": "contested", + "bbox_to_anchor": (1, 1), + "loc": "upper left", + }, + ) ax.set_title(f"{country!r}: EEZ") return fig, ax @@ -135,10 +157,27 @@ def plot(gdf: gpd.GeoDataFrame, country: str): def main() -> None: """Main snakemake process.""" iso3 = snakemake.wildcards.country - # Fetch by territory ID rather than sovereign ID. - id_col = "iso_ter1" - gdf = get_country_eez_by_iso3(iso3, id_col) - gdf = transform_to_clio(gdf, id_col) + extra_eez = snakemake.params.extra_eez + if not isinstance(extra_eez, list): + extra_eez = [extra_eez] + + eez_gdfs = [] + + iso3_eez = get_country_eez_by_iso3(iso3) + if iso3_eez is not None: + eez_gdfs.append(iso3_eez) + + for mrgid in extra_eez: + extra_gdf = get_country_eez_by_mrgid(int(mrgid)) + if extra_gdf is None: + raise RuntimeError(f"Configured extra_eez {mrgid!r} returned no features") + eez_gdfs.append(extra_gdf) + + combined_gdf = None + if eez_gdfs: + combined_gdf = pd.concat([i for i in eez_gdfs if i is not None]) + + gdf = transform_to_schema(combined_gdf, iso3) gdf.to_parquet(snakemake.output.path) fig, _ = plot(gdf, iso3) diff --git a/workflow/scripts/download_nuts.py b/workflow/scripts/download_nuts.py index 249b4e2..938bd52 100644 --- a/workflow/scripts/download_nuts.py +++ b/workflow/scripts/download_nuts.py @@ -24,7 +24,7 @@ def download_nuts_version(year: int, resolution: str, level: str, epsg: str, pat download_nuts_version( year=snakemake.wildcards.year, resolution=snakemake.wildcards.resolution, - level=snakemake.wildcards.level, + level=snakemake.wildcards.subtype, epsg=snakemake.params.epsg, path=snakemake.output.path, ) diff --git a/workflow/scripts/standardise_country_nuts.py b/workflow/scripts/standardise_country_nuts.py index 4107aa4..2f1d049 100644 --- a/workflow/scripts/standardise_country_nuts.py +++ b/workflow/scripts/standardise_country_nuts.py @@ -64,7 +64,7 @@ def standardise_country_nuts( standardise_country_nuts( raw_file=snakemake.input.raw, country_id=snakemake.wildcards.country, - year=snakemake.params.year, + year=snakemake.wildcards.year, subtype=snakemake.wildcards.subtype, output_path=snakemake.output.path, )