From c7ed211c3119a8b92ad693bfb2d858a29866bab8 Mon Sep 17 00:00:00 2001
From: Jan Streffing <jan.streffing@awi.de>
Date: Mon, 22 Dec 2025 12:43:51 +0100
Subject: [PATCH 1/3] Add ocean grid name to slt filenames for
 configuration-specific LPJ-GUESS files

- Modified create_slt_output_for_lpjg() to include ocean grid name in output filenames
- Output format now: slt_{RESOLUTION}_{OCEAN_GRID}.nc (e.g., slt_TCO95_CORE2.nc)
- Ensures correct soil type files are used for each ocean grid configuration
- Different ocean grids (CORE2, CORE3, DARS2) have different coastlines and land-sea masks
- Added TCO95_CORE3 configuration file
---
 configs/TCO95_CORE3.yaml |  38 ++++
 ocp_tool/lpjg_state.py   | 385 +++++++++++++++++++++++++++++++++++++++
 ocp_tool/lsm.py          |   6 +-
 3 files changed, 426 insertions(+), 3 deletions(-)
 create mode 100644 configs/TCO95_CORE3.yaml
 create mode 100644 ocp_tool/lpjg_state.py

diff --git a/configs/TCO95_CORE3.yaml b/configs/TCO95_CORE3.yaml
new file mode 100644
index 0000000..e6675e6
--- /dev/null
+++ b/configs/TCO95_CORE3.yaml
@@ -0,0 +1,38 @@
+# OCP-Tool Configuration
+# OpenIFS Coupling Preparation Tool
+# Configuration: TCO319 + CORE3 (with ice cavities)
+
+# Atmosphere grid settings
+atmosphere:
+  resolution_list: [95]  # TCO319
+  truncation_type: "cubic-octahedral"
+  experiment_name: "ab45"  # IMPORTANT: abns for TCO319, ab45 for TCO95
+
+# Ocean grid settings
+ocean:
+  grid_name: "CORE3"
+  has_ice_cavities: true  # CORE3 has ice cavities
+  mesh_file: "/work/ab0246/a270092/input/fesom2/core3/mesh.nc"
+
+# Basin modifications for runoff
+runoff:
+  manual_basin_removal:
+    - "caspian-sea"
+
+# Paths (relative to auto-detected root_dir, or absolute)
+# Output dirs auto-computed as: output/{TCO|TL}{resolution}_{ocean_grid}/
+paths:
+  input:
+    fesom_mesh: "input/fesom_mesh/"
+    gaussian_grids_full: "input/gaussian_grids_full/"
+    gaussian_grids_octahedral_reduced: "input/gaussian_grids_octahedral_reduced/"
+    gaussian_grids_linear_reduced: "input/gaussian_grids_linear_reduced/"
+    openifs_default: "input/openifs_input_default/"
+    runoff_default: "input/runoff_map_default/"
+    lpj_guess: "input/lpj-guess/"
+
+# Processing options
+options:
+  verbose: true
+  parallel_workers: 4
+  use_dask: true
diff --git a/ocp_tool/lpjg_state.py b/ocp_tool/lpjg_state.py
new file mode 100644
index 0000000..a05f512
--- /dev/null
+++ b/ocp_tool/lpjg_state.py
@@ -0,0 +1,385 @@
+#!/usr/bin/env python3
+# -*- coding: utf-8 -*-
+"""
+LPJ-GUESS State File Interpolation
+
+This module handles reading, interpolating, and writing LPJ-GUESS binary
+state files. It treats gridcell state blobs as opaque binary data and
+uses nearest-neighbor interpolation to remap between resolutions.
+
+State file format:
+- meta.bin: num_processes, vegmode, npft, pft_names
+- N.state: gridcell blobs + index (coords + file positions)
+"""
+
+import os
+import struct
+import numpy as np
+from pathlib import Path
+from dataclasses import dataclass
+from typing import List, Dict, Tuple, Optional
+from scipy.spatial import cKDTree
+
+
+@dataclass
+class LPJGMetadata:
+    """Metadata from meta.bin file."""
+    num_processes: int
+    vegmode: int
+    npft: int
+    pft_names: List[str]
+
+
+@dataclass
+class GridcellBlob:
+    """A single gridcell's state as opaque binary data."""
+    lon: float
+    lat: float
+    data: bytes
+
+
+def read_meta_bin(state_dir: Path) -> LPJGMetadata:
+    """
+    Read the meta.bin file from an LPJ-GUESS state directory.
+    
+    Args:
+        state_dir: Path to the state directory containing meta.bin
+        
+    Returns:
+        LPJGMetadata with process count, vegetation mode, and PFT info
+    """
+    meta_path = state_dir / "meta.bin"
+    
+    with open(meta_path, 'rb') as f:
+        # Read header
+        num_processes = struct.unpack('i', f.read(4))[0]
+        vegmode = struct.unpack('i', f.read(4))[0]
+        npft = struct.unpack('i', f.read(4))[0]
+        
+        # Read PFT names
+        pft_names = []
+        for _ in range(npft):
+            name_len = struct.unpack('Q', f.read(8))[0]  # unsigned long
+            name = f.read(name_len).decode('ascii')
+            pft_names.append(name)
+    
+    return LPJGMetadata(
+        num_processes=num_processes,
+        vegmode=vegmode,
+        npft=npft,
+        pft_names=pft_names
+    )
+
+
+def write_meta_bin(state_dir: Path, metadata: LPJGMetadata, num_processes: int) -> None:
+    """
+    Write a meta.bin file with updated process count.
+    
+    Args:
+        state_dir: Path to output state directory
+        metadata: Original metadata (vegmode, npft, pft_names preserved)
+        num_processes: New number of processes for output
+    """
+    meta_path = state_dir / "meta.bin"
+    
+    with open(meta_path, 'wb') as f:
+        f.write(struct.pack('i', num_processes))
+        f.write(struct.pack('i', metadata.vegmode))
+        f.write(struct.pack('i', metadata.npft))
+        
+        for name in metadata.pft_names:
+            name_bytes = name.encode('ascii')
+            f.write(struct.pack('Q', len(name_bytes)))
+            f.write(name_bytes)
+
+
+def read_state_file(state_path: Path) -> List[GridcellBlob]:
+    """
+    Read a single .state file and extract all gridcell blobs.
+    
+    Args:
+        state_path: Path to a .state file
+        
+    Returns:
+        List of GridcellBlob objects with coordinates and binary data
+    """
+    with open(state_path, 'rb') as f:
+        # Read entire file
+        f.seek(0, 2)  # Seek to end
+        file_size = f.tell()
+        
+        # Read number of elements from end of file
+        f.seek(file_size - 8)  # size_t is 8 bytes
+        num_elements = struct.unpack('Q', f.read(8))[0]
+        
+        if num_elements == 0:
+            return []
+        
+        # Calculate index start position
+        # Each index entry: 2 doubles (lon, lat) + 1 streamsize (8 bytes)
+        index_entry_size = 8 + 8 + 8  # lon, lat, file_position
+        index_size = num_elements * index_entry_size + 8  # +8 for num_elements
+        index_start = file_size - index_size
+        
+        # Read index
+        f.seek(index_start)
+        index = []
+        for _ in range(num_elements):
+            lon = struct.unpack('d', f.read(8))[0]
+            lat = struct.unpack('d', f.read(8))[0]
+            file_pos = struct.unpack('q', f.read(8))[0]  # streamsize is signed
+            index.append((lon, lat, file_pos))
+        
+        # Sort index by file position to read blobs sequentially
+        index_sorted = sorted(enumerate(index), key=lambda x: x[1][2])
+        
+        # Read blobs
+        blobs = [None] * num_elements
+        for i, (orig_idx, (lon, lat, file_pos)) in enumerate(index_sorted):
+            # Determine blob size
+            if i + 1 < len(index_sorted):
+                next_pos = index_sorted[i + 1][1][2]
+            else:
+                next_pos = index_start
+            
+            blob_size = next_pos - file_pos
+            
+            f.seek(file_pos)
+            data = f.read(blob_size)
+            
+            blobs[orig_idx] = GridcellBlob(lon=lon, lat=lat, data=data)
+        
+        return blobs
+
+
+def read_all_state_files(state_dir: Path) -> Tuple[LPJGMetadata, List[GridcellBlob]]:
+    """
+    Read all state files from a directory.
+    
+    Args:
+        state_dir: Path to state directory
+        
+    Returns:
+        Tuple of (metadata, list of all gridcell blobs)
+    """
+    metadata = read_meta_bin(state_dir)
+    
+    all_blobs = []
+    for rank in range(metadata.num_processes):
+        state_path = state_dir / f"{rank}.state"
+        if state_path.exists():
+            blobs = read_state_file(state_path)
+            all_blobs.extend(blobs)
+            print(f"  Read {len(blobs)} gridcells from {rank}.state")
+    
+    print(f"  Total: {len(all_blobs)} gridcells from {metadata.num_processes} state files")
+    return metadata, all_blobs
+
+
+def write_state_file(state_path: Path, blobs: List[GridcellBlob]) -> None:
+    """
+    Write a single .state file with the given gridcell blobs.
+    
+    Args:
+        state_path: Output path for the .state file
+        blobs: List of GridcellBlob objects to write
+    """
+    with open(state_path, 'wb') as f:
+        # Write blobs and track positions
+        index = []
+        for blob in blobs:
+            file_pos = f.tell()
+            f.write(blob.data)
+            index.append((blob.lon, blob.lat, file_pos))
+        
+        # Sort index by coordinates (as LPJ-GUESS does)
+        index_sorted = sorted(index)
+        
+        # Write index
+        for lon, lat, file_pos in index_sorted:
+            f.write(struct.pack('d', lon))
+            f.write(struct.pack('d', lat))
+            f.write(struct.pack('q', file_pos))
+        
+        # Write number of elements
+        f.write(struct.pack('Q', len(blobs)))
+
+
+def interpolate_lpjg_state(
+    source_dir: Path,
+    target_dir: Path,
+    target_lons: np.ndarray,
+    target_lats: np.ndarray,
+    target_land_mask: np.ndarray,
+    num_output_files: int = 1,
+    verbose: bool = True
+) -> Dict[str, any]:
+    """
+    Interpolate LPJ-GUESS state from source to target grid using nearest-neighbor.
+    
+    Uses the same KDTree-based NN approach as ocp-tool's other interpolations.
+    
+    Args:
+        source_dir: Path to source state directory
+        target_dir: Path to output state directory (will be created)
+        target_lons: 1D array of target grid longitudes
+        target_lats: 1D array of target grid latitudes
+        target_land_mask: Boolean array, True = land (needs vegetation)
+        num_output_files: Number of output .state files (MPI ranks)
+        verbose: Print progress information
+        
+    Returns:
+        Dictionary with statistics about the interpolation
+    """
+    if verbose:
+        print(f"\n{'='*60}")
+        print(" LPJ-GUESS State Interpolation")
+        print(f"{'='*60}")
+        print(f"  Source: {source_dir}")
+        print(f"  Target: {target_dir}")
+    
+    # Create output directory
+    target_dir = Path(target_dir)
+    target_dir.mkdir(parents=True, exist_ok=True)
+    
+    # Read source state
+    if verbose:
+        print("\nStep 1: Reading source state files...")
+    metadata, source_blobs = read_all_state_files(source_dir)
+    
+    if len(source_blobs) == 0:
+        raise ValueError("No gridcells found in source state directory")
+    
+    # Extract source coordinates
+    source_lons = np.array([b.lon for b in source_blobs])
+    source_lats = np.array([b.lat for b in source_blobs])
+    
+    # Get target land points
+    target_lons = np.asarray(target_lons).flatten()
+    target_lats = np.asarray(target_lats).flatten()
+    target_land_mask = np.asarray(target_land_mask).flatten().astype(bool)
+    
+    land_lons = target_lons[target_land_mask]
+    land_lats = target_lats[target_land_mask]
+    n_target_land = len(land_lons)
+    
+    if verbose:
+        print(f"\nStep 2: Building KDTree for nearest-neighbor interpolation...")
+        print(f"  Source land cells: {len(source_blobs)}")
+        print(f"  Target land cells: {n_target_land}")
+    
+    # Build KDTree (reusing ocp-tool pattern)
+    src_points = np.column_stack((source_lons, source_lats))
+    target_points = np.column_stack((land_lons, land_lats))
+    tree = cKDTree(src_points)
+    distances, indices = tree.query(target_points)
+    
+    if verbose:
+        print(f"  Max NN distance: {distances.max():.4f} degrees")
+        print(f"  Mean NN distance: {distances.mean():.4f} degrees")
+    
+    # Create target blobs by copying source blobs with new coordinates
+    if verbose:
+        print(f"\nStep 3: Creating interpolated gridcells...")
+    
+    target_blobs = []
+    for i, (lon, lat, src_idx) in enumerate(zip(land_lons, land_lats, indices)):
+        # Copy blob data from nearest source, but use target coordinates
+        src_blob = source_blobs[src_idx]
+        target_blobs.append(GridcellBlob(
+            lon=lon,
+            lat=lat,
+            data=src_blob.data  # Binary data copied unchanged
+        ))
+    
+    # Distribute blobs across output files
+    if verbose:
+        print(f"\nStep 4: Writing {num_output_files} output state files...")
+    
+    blobs_per_file = len(target_blobs) // num_output_files
+    remainder = len(target_blobs) % num_output_files
+    
+    start_idx = 0
+    for rank in range(num_output_files):
+        # Distribute remainder evenly
+        n_blobs = blobs_per_file + (1 if rank < remainder else 0)
+        end_idx = start_idx + n_blobs
+        
+        file_blobs = target_blobs[start_idx:end_idx]
+        state_path = target_dir / f"{rank}.state"
+        write_state_file(state_path, file_blobs)
+        
+        if verbose:
+            print(f"  Wrote {len(file_blobs)} gridcells to {rank}.state")
+        
+        start_idx = end_idx
+    
+    # Write metadata
+    write_meta_bin(target_dir, metadata, num_output_files)
+    if verbose:
+        print(f"  Wrote meta.bin (vegmode={metadata.vegmode}, npft={metadata.npft})")
+    
+    # Compute statistics
+    stats = {
+        'source_gridcells': len(source_blobs),
+        'target_gridcells': n_target_land,
+        'target_total_points': len(target_lons),
+        'land_fraction': n_target_land / len(target_lons),
+        'max_nn_distance': float(distances.max()),
+        'mean_nn_distance': float(distances.mean()),
+        'output_files': num_output_files,
+        'pft_names': metadata.pft_names,
+    }
+    
+    if verbose:
+        print(f"\n{'='*60}")
+        print(" Interpolation Complete")
+        print(f"{'='*60}")
+        print(f"  Source → Target: {stats['source_gridcells']} → {stats['target_gridcells']} gridcells")
+        print(f"  Land fraction: {stats['land_fraction']:.2%}")
+        print(f"  Max NN distance: {stats['max_nn_distance']:.4f}°")
+    
+    return stats
+
+
+def interpolate_lpjg_state_with_config(
+    source_dir: Path,
+    target_dir: Path,
+    grid,  # GaussianGrid from ocp-tool
+    lsm_data,  # LSMData from ocp-tool
+    num_output_files: int = 1,
+    verbose: bool = True
+) -> Dict[str, any]:
+    """
+    Convenience wrapper that uses ocp-tool's grid and LSM data.
+    
+    This integrates with the existing ocp-tool workflow, using the
+    polygon-method land mask already computed.
+    
+    Args:
+        source_dir: Path to source LPJ-GUESS state directory
+        target_dir: Path to output state directory
+        grid: GaussianGrid object with target grid coordinates
+        lsm_data: LSMData object with land-sea mask (1=land, 0=ocean)
+        num_output_files: Number of output .state files
+        verbose: Print progress
+        
+    Returns:
+        Dictionary with interpolation statistics
+    """
+    # Extract coordinates and land mask from ocp-tool objects
+    target_lons = np.squeeze(grid.center_lons)
+    target_lats = np.squeeze(grid.center_lats)
+    
+    # lsm_binary_land: 1 = land, 0 = ocean
+    land_mask = np.squeeze(lsm_data.lsm_binary_land) == 1
+    
+    return interpolate_lpjg_state(
+        source_dir=Path(source_dir),
+        target_dir=Path(target_dir),
+        target_lons=target_lons,
+        target_lats=target_lats,
+        target_land_mask=land_mask,
+        num_output_files=num_output_files,
+        verbose=verbose
+    )
diff --git a/ocp_tool/lsm.py b/ocp_tool/lsm.py
index 6856cb0..cca71ef 100644
--- a/ocp_tool/lsm.py
+++ b/ocp_tool/lsm.py
@@ -342,11 +342,11 @@ def create_slt_output_for_lpjg(
     
     input_file = config.get_icmgg_output_file()
     
-    # Generate output filename
+    # Generate output filename with ocean grid name
     if config.atmosphere.truncation_type == 'cubic-octahedral':
-        slt_output_name = f'slt_TCO{resolution}.nc'
+        slt_output_name = f'slt_TCO{resolution}_{config.ocean.grid_name}.nc'
     elif config.atmosphere.truncation_type == 'linear':
-        slt_output_name = f'slt_TL{resolution}.nc'
+        slt_output_name = f'slt_TL{resolution}_{config.ocean.grid_name}.nc'
     else:
         raise ValueError(f"Unknown truncation type: {config.atmosphere.truncation_type}")
     

From 201cca6964628e9e2757df46befc2619d268e566 Mon Sep 17 00:00:00 2001
From: Jan Streffing <jan.streffing@awi.de>
Date: Wed, 21 Jan 2026 11:49:48 +0100
Subject: [PATCH 2/3] get the right grib ids for co2 coupling in ICMGG????INIT
 file

---
 ocp_tool/field_interpolation.py | 12 +++++++++++-
 1 file changed, 11 insertions(+), 1 deletion(-)

diff --git a/ocp_tool/field_interpolation.py b/ocp_tool/field_interpolation.py
index 7ad41c5..f80017e 100644
--- a/ocp_tool/field_interpolation.py
+++ b/ocp_tool/field_interpolation.py
@@ -112,6 +112,12 @@ def interpolate_co2_emissions(input_grib_file, icmgg_init_file, output_file, var
         # Second pass: read, interpolate and prepare each variable for writing
         interpolated_emissions = {}
         
+        # Parameter ID mapping for CO2 emissions
+        # Map input paramIds to desired output paramIds
+        param_id_mapping = {
+            228083: 210068,  # fco2nee -> co2nbf
+        }
+        
         with open(input_grib_file, 'rb') as f:
             while True:
                 gid = eccodes.codes_grib_new_from_file(f)
@@ -129,10 +135,14 @@ def interpolate_co2_emissions(input_grib_file, icmgg_init_file, output_file, var
                     # Create source points for interpolation
                     source_points = np.column_stack((lons, lats))
                     
+                    # Get the original paramId and apply mapping if necessary
+                    original_param_id = eccodes.codes_get(gid, 'paramId')
+                    mapped_param_id = param_id_mapping.get(original_param_id, original_param_id)
+                    
                     # Store the original GRIB message information for template
                     template_info = {
                         'short_name': short_name,
-                        'param_id': eccodes.codes_get(gid, 'paramId'),
+                        'param_id': mapped_param_id,
                         'level_type': eccodes.codes_get(gid, 'typeOfLevel'),
                         'level': eccodes.codes_get(gid, 'level'),
                         'grid_type': eccodes.codes_get(gid, 'gridType')

From f0efcb76dd061d427fb92572de185184fd6d22a7 Mon Sep 17 00:00:00 2001
From: Jan Streffing <jan.streffing@awi.de>
Date: Mon, 2 Feb 2026 12:39:47 +0100
Subject: [PATCH 3/3] Delete ocp_tool/lpjg_state.py

---
 ocp_tool/lpjg_state.py | 385 -----------------------------------------
 1 file changed, 385 deletions(-)
 delete mode 100644 ocp_tool/lpjg_state.py

diff --git a/ocp_tool/lpjg_state.py b/ocp_tool/lpjg_state.py
deleted file mode 100644
index a05f512..0000000
--- a/ocp_tool/lpjg_state.py
+++ /dev/null
@@ -1,385 +0,0 @@
-#!/usr/bin/env python3
-# -*- coding: utf-8 -*-
-"""
-LPJ-GUESS State File Interpolation
-
-This module handles reading, interpolating, and writing LPJ-GUESS binary
-state files. It treats gridcell state blobs as opaque binary data and
-uses nearest-neighbor interpolation to remap between resolutions.
-
-State file format:
-- meta.bin: num_processes, vegmode, npft, pft_names
-- N.state: gridcell blobs + index (coords + file positions)
-"""
-
-import os
-import struct
-import numpy as np
-from pathlib import Path
-from dataclasses import dataclass
-from typing import List, Dict, Tuple, Optional
-from scipy.spatial import cKDTree
-
-
-@dataclass
-class LPJGMetadata:
-    """Metadata from meta.bin file."""
-    num_processes: int
-    vegmode: int
-    npft: int
-    pft_names: List[str]
-
-
-@dataclass
-class GridcellBlob:
-    """A single gridcell's state as opaque binary data."""
-    lon: float
-    lat: float
-    data: bytes
-
-
-def read_meta_bin(state_dir: Path) -> LPJGMetadata:
-    """
-    Read the meta.bin file from an LPJ-GUESS state directory.
-    
-    Args:
-        state_dir: Path to the state directory containing meta.bin
-        
-    Returns:
-        LPJGMetadata with process count, vegetation mode, and PFT info
-    """
-    meta_path = state_dir / "meta.bin"
-    
-    with open(meta_path, 'rb') as f:
-        # Read header
-        num_processes = struct.unpack('i', f.read(4))[0]
-        vegmode = struct.unpack('i', f.read(4))[0]
-        npft = struct.unpack('i', f.read(4))[0]
-        
-        # Read PFT names
-        pft_names = []
-        for _ in range(npft):
-            name_len = struct.unpack('Q', f.read(8))[0]  # unsigned long
-            name = f.read(name_len).decode('ascii')
-            pft_names.append(name)
-    
-    return LPJGMetadata(
-        num_processes=num_processes,
-        vegmode=vegmode,
-        npft=npft,
-        pft_names=pft_names
-    )
-
-
-def write_meta_bin(state_dir: Path, metadata: LPJGMetadata, num_processes: int) -> None:
-    """
-    Write a meta.bin file with updated process count.
-    
-    Args:
-        state_dir: Path to output state directory
-        metadata: Original metadata (vegmode, npft, pft_names preserved)
-        num_processes: New number of processes for output
-    """
-    meta_path = state_dir / "meta.bin"
-    
-    with open(meta_path, 'wb') as f:
-        f.write(struct.pack('i', num_processes))
-        f.write(struct.pack('i', metadata.vegmode))
-        f.write(struct.pack('i', metadata.npft))
-        
-        for name in metadata.pft_names:
-            name_bytes = name.encode('ascii')
-            f.write(struct.pack('Q', len(name_bytes)))
-            f.write(name_bytes)
-
-
-def read_state_file(state_path: Path) -> List[GridcellBlob]:
-    """
-    Read a single .state file and extract all gridcell blobs.
-    
-    Args:
-        state_path: Path to a .state file
-        
-    Returns:
-        List of GridcellBlob objects with coordinates and binary data
-    """
-    with open(state_path, 'rb') as f:
-        # Read entire file
-        f.seek(0, 2)  # Seek to end
-        file_size = f.tell()
-        
-        # Read number of elements from end of file
-        f.seek(file_size - 8)  # size_t is 8 bytes
-        num_elements = struct.unpack('Q', f.read(8))[0]
-        
-        if num_elements == 0:
-            return []
-        
-        # Calculate index start position
-        # Each index entry: 2 doubles (lon, lat) + 1 streamsize (8 bytes)
-        index_entry_size = 8 + 8 + 8  # lon, lat, file_position
-        index_size = num_elements * index_entry_size + 8  # +8 for num_elements
-        index_start = file_size - index_size
-        
-        # Read index
-        f.seek(index_start)
-        index = []
-        for _ in range(num_elements):
-            lon = struct.unpack('d', f.read(8))[0]
-            lat = struct.unpack('d', f.read(8))[0]
-            file_pos = struct.unpack('q', f.read(8))[0]  # streamsize is signed
-            index.append((lon, lat, file_pos))
-        
-        # Sort index by file position to read blobs sequentially
-        index_sorted = sorted(enumerate(index), key=lambda x: x[1][2])
-        
-        # Read blobs
-        blobs = [None] * num_elements
-        for i, (orig_idx, (lon, lat, file_pos)) in enumerate(index_sorted):
-            # Determine blob size
-            if i + 1 < len(index_sorted):
-                next_pos = index_sorted[i + 1][1][2]
-            else:
-                next_pos = index_start
-            
-            blob_size = next_pos - file_pos
-            
-            f.seek(file_pos)
-            data = f.read(blob_size)
-            
-            blobs[orig_idx] = GridcellBlob(lon=lon, lat=lat, data=data)
-        
-        return blobs
-
-
-def read_all_state_files(state_dir: Path) -> Tuple[LPJGMetadata, List[GridcellBlob]]:
-    """
-    Read all state files from a directory.
-    
-    Args:
-        state_dir: Path to state directory
-        
-    Returns:
-        Tuple of (metadata, list of all gridcell blobs)
-    """
-    metadata = read_meta_bin(state_dir)
-    
-    all_blobs = []
-    for rank in range(metadata.num_processes):
-        state_path = state_dir / f"{rank}.state"
-        if state_path.exists():
-            blobs = read_state_file(state_path)
-            all_blobs.extend(blobs)
-            print(f"  Read {len(blobs)} gridcells from {rank}.state")
-    
-    print(f"  Total: {len(all_blobs)} gridcells from {metadata.num_processes} state files")
-    return metadata, all_blobs
-
-
-def write_state_file(state_path: Path, blobs: List[GridcellBlob]) -> None:
-    """
-    Write a single .state file with the given gridcell blobs.
-    
-    Args:
-        state_path: Output path for the .state file
-        blobs: List of GridcellBlob objects to write
-    """
-    with open(state_path, 'wb') as f:
-        # Write blobs and track positions
-        index = []
-        for blob in blobs:
-            file_pos = f.tell()
-            f.write(blob.data)
-            index.append((blob.lon, blob.lat, file_pos))
-        
-        # Sort index by coordinates (as LPJ-GUESS does)
-        index_sorted = sorted(index)
-        
-        # Write index
-        for lon, lat, file_pos in index_sorted:
-            f.write(struct.pack('d', lon))
-            f.write(struct.pack('d', lat))
-            f.write(struct.pack('q', file_pos))
-        
-        # Write number of elements
-        f.write(struct.pack('Q', len(blobs)))
-
-
-def interpolate_lpjg_state(
-    source_dir: Path,
-    target_dir: Path,
-    target_lons: np.ndarray,
-    target_lats: np.ndarray,
-    target_land_mask: np.ndarray,
-    num_output_files: int = 1,
-    verbose: bool = True
-) -> Dict[str, any]:
-    """
-    Interpolate LPJ-GUESS state from source to target grid using nearest-neighbor.
-    
-    Uses the same KDTree-based NN approach as ocp-tool's other interpolations.
-    
-    Args:
-        source_dir: Path to source state directory
-        target_dir: Path to output state directory (will be created)
-        target_lons: 1D array of target grid longitudes
-        target_lats: 1D array of target grid latitudes
-        target_land_mask: Boolean array, True = land (needs vegetation)
-        num_output_files: Number of output .state files (MPI ranks)
-        verbose: Print progress information
-        
-    Returns:
-        Dictionary with statistics about the interpolation
-    """
-    if verbose:
-        print(f"\n{'='*60}")
-        print(" LPJ-GUESS State Interpolation")
-        print(f"{'='*60}")
-        print(f"  Source: {source_dir}")
-        print(f"  Target: {target_dir}")
-    
-    # Create output directory
-    target_dir = Path(target_dir)
-    target_dir.mkdir(parents=True, exist_ok=True)
-    
-    # Read source state
-    if verbose:
-        print("\nStep 1: Reading source state files...")
-    metadata, source_blobs = read_all_state_files(source_dir)
-    
-    if len(source_blobs) == 0:
-        raise ValueError("No gridcells found in source state directory")
-    
-    # Extract source coordinates
-    source_lons = np.array([b.lon for b in source_blobs])
-    source_lats = np.array([b.lat for b in source_blobs])
-    
-    # Get target land points
-    target_lons = np.asarray(target_lons).flatten()
-    target_lats = np.asarray(target_lats).flatten()
-    target_land_mask = np.asarray(target_land_mask).flatten().astype(bool)
-    
-    land_lons = target_lons[target_land_mask]
-    land_lats = target_lats[target_land_mask]
-    n_target_land = len(land_lons)
-    
-    if verbose:
-        print(f"\nStep 2: Building KDTree for nearest-neighbor interpolation...")
-        print(f"  Source land cells: {len(source_blobs)}")
-        print(f"  Target land cells: {n_target_land}")
-    
-    # Build KDTree (reusing ocp-tool pattern)
-    src_points = np.column_stack((source_lons, source_lats))
-    target_points = np.column_stack((land_lons, land_lats))
-    tree = cKDTree(src_points)
-    distances, indices = tree.query(target_points)
-    
-    if verbose:
-        print(f"  Max NN distance: {distances.max():.4f} degrees")
-        print(f"  Mean NN distance: {distances.mean():.4f} degrees")
-    
-    # Create target blobs by copying source blobs with new coordinates
-    if verbose:
-        print(f"\nStep 3: Creating interpolated gridcells...")
-    
-    target_blobs = []
-    for i, (lon, lat, src_idx) in enumerate(zip(land_lons, land_lats, indices)):
-        # Copy blob data from nearest source, but use target coordinates
-        src_blob = source_blobs[src_idx]
-        target_blobs.append(GridcellBlob(
-            lon=lon,
-            lat=lat,
-            data=src_blob.data  # Binary data copied unchanged
-        ))
-    
-    # Distribute blobs across output files
-    if verbose:
-        print(f"\nStep 4: Writing {num_output_files} output state files...")
-    
-    blobs_per_file = len(target_blobs) // num_output_files
-    remainder = len(target_blobs) % num_output_files
-    
-    start_idx = 0
-    for rank in range(num_output_files):
-        # Distribute remainder evenly
-        n_blobs = blobs_per_file + (1 if rank < remainder else 0)
-        end_idx = start_idx + n_blobs
-        
-        file_blobs = target_blobs[start_idx:end_idx]
-        state_path = target_dir / f"{rank}.state"
-        write_state_file(state_path, file_blobs)
-        
-        if verbose:
-            print(f"  Wrote {len(file_blobs)} gridcells to {rank}.state")
-        
-        start_idx = end_idx
-    
-    # Write metadata
-    write_meta_bin(target_dir, metadata, num_output_files)
-    if verbose:
-        print(f"  Wrote meta.bin (vegmode={metadata.vegmode}, npft={metadata.npft})")
-    
-    # Compute statistics
-    stats = {
-        'source_gridcells': len(source_blobs),
-        'target_gridcells': n_target_land,
-        'target_total_points': len(target_lons),
-        'land_fraction': n_target_land / len(target_lons),
-        'max_nn_distance': float(distances.max()),
-        'mean_nn_distance': float(distances.mean()),
-        'output_files': num_output_files,
-        'pft_names': metadata.pft_names,
-    }
-    
-    if verbose:
-        print(f"\n{'='*60}")
-        print(" Interpolation Complete")
-        print(f"{'='*60}")
-        print(f"  Source → Target: {stats['source_gridcells']} → {stats['target_gridcells']} gridcells")
-        print(f"  Land fraction: {stats['land_fraction']:.2%}")
-        print(f"  Max NN distance: {stats['max_nn_distance']:.4f}°")
-    
-    return stats
-
-
-def interpolate_lpjg_state_with_config(
-    source_dir: Path,
-    target_dir: Path,
-    grid,  # GaussianGrid from ocp-tool
-    lsm_data,  # LSMData from ocp-tool
-    num_output_files: int = 1,
-    verbose: bool = True
-) -> Dict[str, any]:
-    """
-    Convenience wrapper that uses ocp-tool's grid and LSM data.
-    
-    This integrates with the existing ocp-tool workflow, using the
-    polygon-method land mask already computed.
-    
-    Args:
-        source_dir: Path to source LPJ-GUESS state directory
-        target_dir: Path to output state directory
-        grid: GaussianGrid object with target grid coordinates
-        lsm_data: LSMData object with land-sea mask (1=land, 0=ocean)
-        num_output_files: Number of output .state files
-        verbose: Print progress
-        
-    Returns:
-        Dictionary with interpolation statistics
-    """
-    # Extract coordinates and land mask from ocp-tool objects
-    target_lons = np.squeeze(grid.center_lons)
-    target_lats = np.squeeze(grid.center_lats)
-    
-    # lsm_binary_land: 1 = land, 0 = ocean
-    land_mask = np.squeeze(lsm_data.lsm_binary_land) == 1
-    
-    return interpolate_lpjg_state(
-        source_dir=Path(source_dir),
-        target_dir=Path(target_dir),
-        target_lons=target_lons,
-        target_lats=target_lats,
-        target_land_mask=land_mask,
-        num_output_files=num_output_files,
-        verbose=verbose
-    )