Skip to content
Open
4 changes: 4 additions & 0 deletions deployment/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,8 @@ srgan-hpc submit patch \

srgan-hpc submit grid \
--config deployment/configs/runtime.default.yaml \
--output-root /data/srgan_new_area \
--project-name srgan_new_area \
--lat1 52.3 --lon1 12.9 \
--lat2 52.7 --lon2 13.8 \
--start-date 2025-07-01 \
Expand Down Expand Up @@ -62,4 +64,6 @@ AOI submission accepts either a `.shp` file or a directory containing exactly on

`staging.item_strategy: mosaic_valid` is the default for STAC staging. When a Cubo cutout intersects multiple Sentinel-2 tiles, the launcher ranks candidate items by valid-data coverage near the cutout center, then fills remaining nodata pixels from the other candidates before inference. Use `item_strategy: fixed_index` only when you explicitly want legacy `staging.image_index` behavior.

For repeated runs with the same settings, keep one standard config file and override only the destination at submit time with `--output-root`; use `--project-name` when you also want a readable run-name prefix.

`deliver-bbox` merges patch outputs per run and writes clipped GeoTIFFs for sharing in GIS tools.
4 changes: 4 additions & 0 deletions deployment/srgan_hpc/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,4 +15,8 @@ def bundled_slurm_entrypoint() -> Path:
return Path(__file__).resolve().parent / "slurm" / "slurm_task_entrypoint.sh"


def bundled_slurm_collect_entrypoint() -> Path:
return Path(__file__).resolve().parent / "slurm" / "slurm_collect_entrypoint.sh"


__version__ = get_version()
109 changes: 102 additions & 7 deletions deployment/srgan_hpc/cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -64,13 +64,62 @@ def build_parser() -> argparse.ArgumentParser:

def _add_submit_common_args(parser: argparse.ArgumentParser) -> None:
parser.add_argument("--config", required=True)
parser.add_argument(
"--output-root",
help="Override config.output_root for this run without creating a new runtime YAML.",
)
parser.add_argument(
"--project-name",
help="Override config.project_name for this run without creating a new runtime YAML.",
)
parser.add_argument("--start-date", required=True)
parser.add_argument("--end-date", required=True)
parser.add_argument("--script-path")
parser.add_argument("--dry-run", action="store_true")
parser.add_argument("--verbose", action="store_true")


def _submit_config_overrides(args: argparse.Namespace) -> dict[str, str]:
overrides: dict[str, str] = {}
if args.output_root:
overrides["output_root"] = args.output_root
if args.project_name:
overrides["project_name"] = args.project_name
return overrides


def _load_submit_config(args: argparse.Namespace):
from deployment.srgan_hpc.config import load_runtime_config

return load_runtime_config(args.config, overrides=_submit_config_overrides(args))


def _write_and_print_summary(
*,
run_dir: Path,
config,
submission,
request: dict[str, object],
start_date: str,
end_date: str,
) -> dict[str, str]:
from deployment.srgan_hpc.submission_summary import (
format_submission_summary,
write_submission_summary,
)

summary, summary_json, summary_txt = write_submission_summary(
run_dir=run_dir,
config=config,
submission=submission,
request=request,
start_date=start_date,
end_date=end_date,
)
print(format_submission_summary(summary))
return {"json": str(summary_json), "text": str(summary_txt)}


def _resolve_script_path(script_path: str | None) -> Path:
if script_path is None:
return bundled_slurm_entrypoint().resolve()
Expand Down Expand Up @@ -98,13 +147,12 @@ def _handle_validate(args: argparse.Namespace) -> int:


def _handle_submit_patch(args: argparse.Namespace) -> int:
from deployment.srgan_hpc.config import load_runtime_config
from deployment.srgan_hpc.logging_utils import configure_logging
from deployment.srgan_hpc.patching import Patch
from deployment.srgan_hpc.submit import submit_patch_run

logger = configure_logging(verbose=args.verbose)
config = load_runtime_config(args.config)
config = _load_submit_config(args)
patch = Patch(
patch_id="patch_000001",
latitude=args.lat,
Expand All @@ -124,23 +172,41 @@ def _handle_submit_patch(args: argparse.Namespace) -> int:
dry_run=args.dry_run,
)
logger.info("submitted patch run_id=%s run_dir=%s", run_id, run_dir)
summary_paths = _write_and_print_summary(
run_dir=run_dir,
config=config,
submission=submission,
request={
"type": "patch",
"lat": args.lat,
"lon": args.lon,
"planned_patch_count": 1,
},
start_date=args.start_date,
end_date=args.end_date,
)
print(
json.dumps(
{"run_id": run_id, "run_dir": str(run_dir), "submission": submission},
{
"run_id": run_id,
"run_dir": str(run_dir),
"submission": submission,
"summary": summary_paths,
},
indent=2,
)
)
return 0


def _handle_submit_grid(args: argparse.Namespace) -> int:
from deployment.srgan_hpc.config import load_runtime_config, patch_resolution
from deployment.srgan_hpc.config import patch_resolution
from deployment.srgan_hpc.logging_utils import configure_logging
from deployment.srgan_hpc.patching import build_patches
from deployment.srgan_hpc.submit import submit_grid_run

logger = configure_logging(verbose=args.verbose)
config = load_runtime_config(args.config)
config = _load_submit_config(args)
patches = build_patches(
args.lat1,
args.lon1,
Expand All @@ -162,13 +228,29 @@ def _handle_submit_grid(args: argparse.Namespace) -> int:
logger.info(
"submitted grid run_id=%s run_dir=%s patches=%d", run_id, run_dir, len(patches)
)
summary_paths = _write_and_print_summary(
run_dir=run_dir,
config=config,
submission=submission,
request={
"type": "grid",
"lat1": args.lat1,
"lon1": args.lon1,
"lat2": args.lat2,
"lon2": args.lon2,
"planned_patch_count": len(patches),
},
start_date=args.start_date,
end_date=args.end_date,
)
print(
json.dumps(
{
"run_id": run_id,
"run_dir": str(run_dir),
"patches": len(patches),
"submission": submission,
"summary": summary_paths,
},
indent=2,
)
Expand All @@ -178,12 +260,12 @@ def _handle_submit_grid(args: argparse.Namespace) -> int:

def _handle_submit_aoi(args: argparse.Namespace) -> int:
from deployment.srgan_hpc.aoi import select_aoi_patches
from deployment.srgan_hpc.config import load_runtime_config, patch_resolution
from deployment.srgan_hpc.config import patch_resolution
from deployment.srgan_hpc.logging_utils import configure_logging
from deployment.srgan_hpc.submit import submit_aoi_run

logger = configure_logging(verbose=args.verbose)
config = load_runtime_config(args.config)
config = _load_submit_config(args)
aoi_path = args.aoi_path or config.aoi.path
if aoi_path is None:
raise ValueError("AOI path must be provided via --aoi-path or config.aoi.path")
Expand Down Expand Up @@ -222,6 +304,19 @@ def _handle_submit_aoi(args: argparse.Namespace) -> int:
}
if selection.aoi_layer is not None:
payload["aoi_layer"] = selection.aoi_layer
payload["summary"] = _write_and_print_summary(
run_dir=run_dir,
config=config,
submission=submission,
request={
"type": "aoi",
"aoi_path": str(selection.aoi_path),
"aoi_layer": selection.aoi_layer,
"planned_patch_count": len(selection.patches),
},
start_date=args.start_date,
end_date=args.end_date,
)
print(json.dumps(payload, indent=2))
return 0

Expand Down
8 changes: 4 additions & 4 deletions deployment/srgan_hpc/collect.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,11 +7,11 @@
def collect_outputs(run_dir: Path, destination: Path | None = None) -> tuple[Path, int]:
destination = destination or run_dir / "collected"
destination.mkdir(parents=True, exist_ok=True)
copied = 0
moved = 0
for tif_path in sorted(run_dir.glob("patches/*/outputs/*.tif")):
patch_id = tif_path.parent.parent.name
patch_destination = destination / patch_id
patch_destination.mkdir(parents=True, exist_ok=True)
shutil.copy2(tif_path, patch_destination / tif_path.name)
copied += 1
return destination, copied
shutil.move(str(tif_path), patch_destination / tif_path.name)
moved += 1
return destination, moved
8 changes: 6 additions & 2 deletions deployment/srgan_hpc/slurm.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,8 @@ class SlurmJobSpec:
slurm: SlurmConfig
environment: EnvironmentConfig
array: str | None = None
dependency: str | None = None
request_gpus: bool = True


def build_sbatch_command(spec: SlurmJobSpec) -> list[str]:
Expand All @@ -40,9 +42,9 @@ def build_sbatch_command(spec: SlurmJobSpec) -> list[str]:
]
if spec.slurm.partition:
cmd.append(f"--partition={spec.slurm.partition}")
if spec.slurm.gres:
if spec.request_gpus and spec.slurm.gres:
cmd.append(f"--gres={spec.slurm.gres}")
elif spec.slurm.gpus:
elif spec.request_gpus and spec.slurm.gpus:
if spec.slurm.gpu_type:
cmd.append(f"--gpus={spec.slurm.gpu_type}:{spec.slurm.gpus}")
else:
Expand All @@ -53,6 +55,8 @@ def build_sbatch_command(spec: SlurmJobSpec) -> list[str]:
cmd.append(f"--qos={spec.slurm.qos}")
if spec.array:
cmd.append(f"--array={spec.array}")
if spec.dependency:
cmd.append(f"--dependency={spec.dependency}")
cmd.extend(spec.slurm.extra_args)
cmd.append(str(spec.script_path))
cmd.append(str(spec.manifest_path))
Expand Down
25 changes: 25 additions & 0 deletions deployment/srgan_hpc/slurm/slurm_collect_entrypoint.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,25 @@
#!/usr/bin/env bash

set -euo pipefail

MANIFEST_PATH="${1:?manifest path required}"
PYTHON_BIN="${SRGAN_HPC_PYTHON:-python}"
RUN_DIR="$(dirname "${MANIFEST_PATH}")"

if [[ -n "${SRGAN_HPC_MODULES:-}" ]] && command -v module >/dev/null 2>&1; then
IFS=',' read -r -a MODULE_LIST <<< "${SRGAN_HPC_MODULES}"
for module_name in "${MODULE_LIST[@]}"; do
module load "${module_name}"
done
fi

if [[ -n "${SRGAN_HPC_CONDA_ENV:-}" ]]; then
if command -v conda >/dev/null 2>&1; then
eval "$(conda shell.bash hook)"
conda activate "${SRGAN_HPC_CONDA_ENV}"
else
source activate "${SRGAN_HPC_CONDA_ENV}"
fi
fi

exec "${PYTHON_BIN}" -m deployment.srgan_hpc.cli collect --run-dir "${RUN_DIR}"
Loading