Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
42 commits
Select commit Hold shift + click to select a range
986470a
Initial commit for adding NERSC IRI-API support alongside SFAPI for j…
davramov Mar 16, 2026
0512e58
Adding an abstraction for _submit_job() and _wait_for_job() that use …
davramov Mar 16, 2026
fe27519
moving NERSCLoginMethod(Enum) to the job_controller.py module
davramov Mar 17, 2026
eaf02fe
Removed NERSCLoginMethod(Enum) from nersc.py. Created a temporary tes…
davramov Mar 17, 2026
be2c571
Updating pytests
davramov Mar 17, 2026
cf15c20
Updating multires() method to use the generic _submit_job() and _wait…
davramov Mar 17, 2026
d0e8068
successfully ran reconstruction using the IRI-API
davramov Mar 30, 2026
6b8c843
removing token.py and moving the logic to get_globus_token.py
davramov Apr 1, 2026
27ea5b2
moving get_globus_token.py to orchestration/globus/ to be used as a m…
davramov Apr 1, 2026
bad1db5
Cleaning up nersc.py
davramov Apr 1, 2026
da16341
cleaning up old commented code
davramov Apr 1, 2026
d1d65ad
Updating unit tests
davramov Apr 1, 2026
dda78c5
updating login script
davramov Apr 7, 2026
596106a
Rebasing and including segmentation flows as part of iri/sfapi abstra…
davramov Apr 7, 2026
9da5e6e
commenting out petiole segmentation prune block for now, while testing
davramov Apr 13, 2026
ef227af
Making reconstruction run as a task
davramov Apr 13, 2026
b4558be
Making IRIAPI the default login method for now
davramov Apr 13, 2026
241c889
adjusting queue name and account
davramov Apr 14, 2026
c9e7b14
Making the IRI job submission read sbatch settings
davramov Apr 14, 2026
698d243
Switching to debug queue/2 nodes for the IRI demo
davramov Apr 14, 2026
6e01f8f
check globus token expiration before minting a new one. avoids race c…
davramov Apr 14, 2026
f4388e8
Fixing IRIAPI bugs, also commenting out Globus transfers for now
davramov Apr 14, 2026
a490bfe
removing IRIAPI client ID from nersc.py, since it is only used in glo…
davramov Apr 15, 2026
041f336
Updating logger comments
davramov Apr 23, 2026
863b24e
connecting to AmSC MLflow service
davramov Apr 24, 2026
0144f52
removing old commented code
davramov Apr 24, 2026
0ad03ac
updating pytest
davramov Apr 24, 2026
9d8e2c1
linting
davramov Apr 24, 2026
e4f4e08
adjusting import in pytest to avoid error on github that did not occu…
davramov Apr 24, 2026
98c7064
Getting NERSC reservations working with IRI API
davramov May 7, 2026
f9200fc
Updating pytests
davramov May 7, 2026
4f4a3d8
launch jobs with IRI API and a reservation
davramov May 7, 2026
2ff3876
fixing dino extra_flags bug
davramov May 7, 2026
405b197
fixing globus token race condition when jobs are launch simultaneously
davramov May 7, 2026
33c3d95
Adding frontend/prefect_runner.html
davramov May 10, 2026
81bf47d
updating html page with a timer and collapsible logs
davramov May 12, 2026
273593f
updating config with confab reservation
davramov May 12, 2026
b7f57e1
Separated out general MLflow tests (non-specific to beamlines)
davramov May 20, 2026
ff8a45f
removing quotes around enum
davramov May 20, 2026
d95abcf
moving nersc iri/sf-api resource definitions to config (no longer glo…
davramov May 20, 2026
b333e8e
Updating nersc.py to pull iri/sf-api parameters from the config, rath…
davramov May 20, 2026
b7a0fa3
removing redundant logging setLevel
davramov May 20, 2026
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 6 additions & 1 deletion .env.example
Original file line number Diff line number Diff line change
@@ -1,7 +1,12 @@
BEAMLINE=8.3.2
GLOBUS_CLIENT_ID=<globus_client_id>
GLOBUS_CLIENT_SECRET=<globus_client_secret>
PREFECT_API_URL=<url_of_prefect_server>
PREFECT_API_KEY=<prefect_client_secret>
PUSHGATEWAY_URL=<url_of_pushgateway_server>
JOB_NAME=<jobname_for_pushgateway>
INSTANCE_LABEL=<label_for_pushgateway>
INSTANCE_LABEL=<label_for_pushgateway>
PATH_NERSC_CLIENT_ID=<path_to_nersc_client_id>
PATH_NERSC_PRI_KEY=<path_to_nersc_priv_key>
NERSC_USERNAME=<nersc_username>
AMSC_API_KEY=<amsc_api_key> # found here: https://profile.american-science-cloud.org/
59 changes: 42 additions & 17 deletions config.yml
Original file line number Diff line number Diff line change
Expand Up @@ -173,37 +173,62 @@ mlflow:
staging:
tracking_uri: https://mlflow-staging.computing.als.lbl.gov
registry_uri: https://mlflow-staging.computing.als.lbl.gov
amsc:
tracking_uri: https://mlflow.american-science-cloud.org/
registry_uri: https://mlflow.american-science-cloud.org/
experiment_name: als-bl832-models

nersc_resources:
iri:
api_base_url: https://api.iri.nersc.gov
compute_resource: "compute"
# Perlmutter compute
perlmutter_compute: "94351904-6dba-4c16-b5cd-fbd280d8615b"
perlmutter_login: "e525a224-61c1-419f-9642-91168c792e39"
perlmutter_realtime: "3776417d-747c-4753-895a-6323c17b9c98"
perlmutter_job_submit: "3cf3c048-855e-4dd8-a189-065a483954bb"
# Storage
scratch: "43d8f6c0-f900-48ce-b267-73714103f4ac"
homes: "65b28619-c3b6-4942-8da1-044a3b3a2a9e"
common: "7e07a611-f927-4a39-a44d-b1d6e307accd"
cfs: "59e80c79-4dfd-4c53-9c07-7405685fcd37"
archive: "f4916c65-9001-49c2-b0bf-6fe4276b564c"
# Services
globus: "0a207df3-4bec-45b8-9060-13505d269da9"
dtns: "a762cbdc-af7a-4b2b-9463-67f0189dd2ae"
sfapi:
api_base_url: https://api.nersc.gov/api/v1.2

hpc_submission_settings832:
# ── RECON + MULTIRES SETTINGS ───────────────────────────────────────────────
nersc_reconstruction:
# ── SLURM resource allocation ─────────────────────────────────────────────
qos: realtime
account: als
reservation: "_CAP_TOMO_MOON_CPU"
qos: regular
account: amsc006
reservation: "_CAP_SYNAPS_LIVEDEMO_CPU2"
num_nodes: 16
cpus-per-task: 128
walltime: "0:30:00"
nersc_multiresolution:
# ── SLURM resource allocation ─────────────────────────────────────────────
qos: realtime
qos: debug
account: als
reservation: ""
reservation: "_CAP_SYNAPS_LIVEDEMO_CPU2"
cpus-per-task: 128
walltime: "0:15:00"

# ── PETIOLE SEGMENTATION SETTINGS ───────────────────────────────────────────
nersc_segmentation_sam3:
# ── SLURM resource allocation ─────────────────────────────────────────────
qos: regular
account: als
account: amsc006
constraint: gpu
reservation: ""
num_nodes: 4
reservation: "_CAP_SYNAPS_LIVEDEMO_GPU2"
num_nodes: 32
ntasks-per-node: 1
gpus-per-node: 4
cpus-per-task: 128
walltime: "00:59:00"
walltime: "00:30:00"
# ── Inference parameters ──────────────────────────────────────────────────
script_name: "src/inference_v6.py"
batch_size: 1
Expand All @@ -227,15 +252,15 @@ hpc_submission_settings832:
nersc_segmentation_dinov3:
# ── SLURM resource allocation ─────────────────────────────────────────────
qos: regular
account: als
account: amsc006
constraint: gpu
reservation: ""
num_nodes: 4
reservation: "_CAP_SYNAPS_LIVEDEMO_GPU2"
num_nodes: 8
ntasks-per-node: 1
nproc_per_node: 4
gpus-per-node: 4
cpus-per-task: 128
walltime: "00:59:00"
walltime: "00:30:00"
# ── Inference parameters ──────────────────────────────────────────────────
script_name: "src.inference_dino_v1"
batch_size: 4
Expand All @@ -247,13 +272,13 @@ hpc_submission_settings832:
nersc_combine_segmentations:
# ── SLURM resource allocation ─────────────────────────────────────────────
qos: regular
account: als
account: amsc006
constraint: cpu
reservation: ""
reservation: "_CAP_SYNAPS_LIVEDEMO_CPU2"
num_nodes: 4
ntasks: 128
cpus-per-task: 1
walltime: "01:00:00"
walltime: "00:30:00"
# ── Combination parameters ────────────────────────────────────────────────
script_name: "src.combine_sam_dino_v3"
dilate_px: 5
Expand All @@ -268,7 +293,7 @@ hpc_submission_settings832:
qos: regular
account: als
constraint: gpu
reservation: "_CAP_TOMO_MOON_GPU"
reservation: "_CAP_TOMO_MOON_GPU2"
num_nodes: 4
ntasks-per-node: 1
nproc_per_node: 4
Expand Down
Loading
Loading