From 7b8dc3f706fd42075532a99594ce9894046ef866 Mon Sep 17 00:00:00 2001 From: Mirko von Leipzig <48352201+Mirko-von-Leipzig@users.noreply.github.com> Date: Thu, 28 May 2026 17:03:45 +0200 Subject: [PATCH 01/16] Move store spawns to block producer. --- Cargo.lock | 8 +- Cargo.toml | 3 +- bin/node/Cargo.toml | 1 + bin/node/src/commands/block_producer.rs | 4 +- bin/node/src/commands/lifecycle.rs | 4 +- bin/node/src/commands/mod.rs | 4 +- bin/node/src/commands/modes.rs | 165 +++++++-- bin/stress-test/src/seeding/mod.rs | 7 +- crates/block-producer/Cargo.toml | 2 + .../src/block_prover.rs} | 14 +- crates/block-producer/src/errors.rs | 14 + crates/block-producer/src/lib.rs | 9 +- .../src}/proof_scheduler.rs | 83 ++--- crates/block-producer/src/rpc_sync.rs | 142 +++++++ crates/block-producer/src/server/mod.rs | 124 ++++--- crates/block-producer/src/server/tests.rs | 19 +- crates/rpc/src/lib.rs | 2 +- crates/rpc/src/tests.rs | 5 +- crates/store/Cargo.toml | 5 - crates/store/src/data_directory.rs | 33 ++ crates/store/src/db/mod.rs | 15 + crates/store/src/errors.rs | 13 - crates/store/src/lib.rs | 8 +- crates/store/src/server/mod.rs | 347 ------------------ crates/store/src/server/replica_sync.rs | 161 -------- crates/store/src/state/bootstrap.rs | 43 +++ crates/store/src/state/disk_monitor.rs | 100 +++++ crates/store/src/state/mod.rs | 62 ++-- 28 files changed, 663 insertions(+), 734 deletions(-) rename crates/{store/src/server/block_prover_client.rs => block-producer/src/block_prover.rs} (82%) rename crates/{store/src/server => block-producer/src}/proof_scheduler.rs (76%) create mode 100644 crates/block-producer/src/rpc_sync.rs create mode 100644 crates/store/src/data_directory.rs delete mode 100644 crates/store/src/server/mod.rs delete mode 100644 crates/store/src/server/replica_sync.rs create mode 100644 crates/store/src/state/bootstrap.rs create mode 100644 crates/store/src/state/disk_monitor.rs diff --git a/Cargo.lock b/Cargo.lock index bab9671bd..031f69ce1 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -3334,6 +3334,7 @@ dependencies = [ "miden-node-utils", "miden-protocol", "tokio", + "tonic", "url", ] @@ -3345,6 +3346,7 @@ dependencies = [ "assert_matches", "futures", "itertools 0.14.0", + "miden-block-prover", "miden-node-proto", "miden-node-store", "miden-node-utils", @@ -3359,6 +3361,7 @@ dependencies = [ "tempfile", "thiserror 2.0.18", "tokio", + "tokio-stream", "tonic", "tracing", "url", @@ -3472,7 +3475,6 @@ version = "0.15.0" dependencies = [ "anyhow", "assert_matches", - "async-trait", "build-rs", "criterion", "deadpool", @@ -3483,7 +3485,6 @@ dependencies = [ "indexmap", "libsqlite3-sys", "miden-agglayer", - "miden-block-prover", "miden-crypto", "miden-large-smt-backend-rocksdb", "miden-node-db", @@ -3491,7 +3492,6 @@ dependencies = [ "miden-node-test-macro", "miden-node-utils", "miden-protocol", - "miden-remote-prover-client", "miden-standards", "pretty_assertions", "rand 0.9.4", @@ -3502,9 +3502,7 @@ dependencies = [ "tokio", "tokio-stream", "toml", - "tonic", "tracing", - "url", ] [[package]] diff --git a/Cargo.toml b/Cargo.toml index 36f8d2246..c3f172c3a 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -133,8 +133,7 @@ url = { features = ["serde"], version = "2.5" } # libsqlite3-sys is kept to control the bundled SQLite linkage. # tonic-prost is used by generated gRPC code rather than handwritten Rust. # -# TODO(mirko): remove rpc again once we've completed the refactoring. -ignored = ["libsqlite3-sys", "miden-node-rpc", "tonic-prost"] +ignored = ["libsqlite3-sys", "tonic-prost"] # Lints are set to warn for development, which are promoted to errors in CI. [workspace.lints.clippy] diff --git a/bin/node/Cargo.toml b/bin/node/Cargo.toml index fc716f8e5..314bdaa73 100644 --- a/bin/node/Cargo.toml +++ b/bin/node/Cargo.toml @@ -29,6 +29,7 @@ miden-node-store = { workspace = true } miden-node-utils = { workspace = true } miden-protocol = { workspace = true } tokio = { features = ["macros", "net", "rt-multi-thread"], workspace = true } +tonic = { default-features = false, workspace = true } url = { workspace = true } [dev-dependencies] diff --git a/bin/node/src/commands/block_producer.rs b/bin/node/src/commands/block_producer.rs index 03d441392..5c159f5dd 100644 --- a/bin/node/src/commands/block_producer.rs +++ b/bin/node/src/commands/block_producer.rs @@ -5,9 +5,9 @@ use miden_node_block_producer::{ DEFAULT_BATCH_INTERVAL, DEFAULT_BLOCK_INTERVAL, DEFAULT_MAX_BATCHES_PER_BLOCK, + DEFAULT_MAX_CONCURRENT_PROOFS, DEFAULT_MAX_TXS_PER_BATCH, }; -use miden_node_store::DEFAULT_MAX_CONCURRENT_PROOFS; use miden_node_utils::clap::duration_to_human_readable_string; use url::Url; @@ -74,7 +74,7 @@ mod tests { block: BlockOptions { interval: DEFAULT_BLOCK_INTERVAL, max_batches, - max_concurrent_proofs: miden_node_store::DEFAULT_MAX_CONCURRENT_PROOFS, + max_concurrent_proofs: miden_node_block_producer::DEFAULT_MAX_CONCURRENT_PROOFS, }, block_prover: BlockProverOptions { url: None }, mempool: MempoolOptions { diff --git a/bin/node/src/commands/lifecycle.rs b/bin/node/src/commands/lifecycle.rs index ee3be1a3d..f53ba833b 100644 --- a/bin/node/src/commands/lifecycle.rs +++ b/bin/node/src/commands/lifecycle.rs @@ -2,7 +2,7 @@ use std::path::{Path, PathBuf}; use anyhow::Context; use miden_node_store::genesis::GenesisBlock; -use miden_node_store::{DataDirectory, Db, Store}; +use miden_node_store::{DataDirectory, Db, State}; use miden_node_utils::fs::ensure_empty_directory; use miden_protocol::block::SignedBlock; use miden_protocol::utils::serde::Deserializable; @@ -38,7 +38,7 @@ pub fn bootstrap_store(data_directory: &Path, genesis_block_path: &Path) -> anyh let genesis_block = GenesisBlock::try_from(signed_block).context("genesis block validation failed")?; - Store::bootstrap(genesis_block, data_directory) + State::bootstrap(genesis_block, data_directory) } // MIGRATE diff --git a/bin/node/src/commands/mod.rs b/bin/node/src/commands/mod.rs index 425a76c64..fab6d0b0e 100644 --- a/bin/node/src/commands/mod.rs +++ b/bin/node/src/commands/mod.rs @@ -65,8 +65,8 @@ impl Command { match self { Command::Bootstrap(bootstrap_command) => bootstrap_command.handle(), Command::Migrate(migrate_command) => migrate_command.handle().await, - Command::Sequencer(sequencer_command) => sequencer_command.handle(), - Command::Full(full_node_command) => full_node_command.handle(), + Command::Sequencer(sequencer_command) => sequencer_command.handle().await, + Command::Full(full_node_command) => full_node_command.handle().await, } } } diff --git a/bin/node/src/commands/modes.rs b/bin/node/src/commands/modes.rs index 8382ff7c5..12b55edc2 100644 --- a/bin/node/src/commands/modes.rs +++ b/bin/node/src/commands/modes.rs @@ -1,9 +1,18 @@ +use std::sync::Arc; + +use anyhow::Context; +use miden_node_block_producer::{RpcSync, Sequencer}; use miden_node_proto::clients::{Builder, NtxBuilderClient, RpcClient, ValidatorClient}; +use miden_node_rpc::{NetworkTxAuth, Rpc, RpcMode}; +use miden_node_store::{ApplyBlockError, State}; +use tokio::net::TcpListener; +use tokio::task::JoinError; +use tonic::metadata::AsciiMetadataValue; use url::Url; use super::block_producer::BlockProducerOptions; use super::rpc::SyncOptions; -use super::runtime::RuntimeOptions; +use super::runtime::{RuntimeConfig, RuntimeOptions}; use super::store::StoreOptions; // RUNTIME MODES @@ -25,27 +34,51 @@ pub struct SequencerCommand { } impl SequencerCommand { - pub fn handle(self) -> anyhow::Result<()> { + pub async fn handle(self) -> anyhow::Result<()> { let runtime = self.runtime.runtime_config(&self.store); self.block_producer.validate()?; - let validator = self.external_services.validator_client(); - let ntx_builder = self.external_services.ntx_builder_client(); - let _ = ( - runtime.rpc_listen, - runtime.data_directory, - validator, - ntx_builder, - self.block_producer.block_prover.url, - runtime.database_options, - runtime.external_grpc_options, - runtime.storage_options, - self.block_producer.block.max_concurrent_proofs, - ); - - anyhow::bail!( - "sequencer mode runtime composition is not implemented yet; this stage only defines \ - the CLI" - ) + let network_tx_auth = self.runtime.rpc.network_tx_auth()?; + let (state, mut termination_signal) = load_state(&runtime).await?; + let _disk_monitor = state.spawn_disk_monitor(); + + let sequencer = Sequencer { + store: Arc::clone(&state), + validator_url: self.external_services.validator_url.clone(), + batch_prover_url: self.block_producer.batch.prover_url, + block_prover_url: self.block_producer.block_prover.url, + batch_interval: self.block_producer.batch.interval, + block_interval: self.block_producer.block.interval, + max_txs_per_batch: self.block_producer.batch.max_txs, + max_batches_per_block: self.block_producer.block.max_batches, + max_concurrent_proofs: self.block_producer.block.max_concurrent_proofs, + mempool_tx_capacity: self.block_producer.mempool.tx_capacity, + } + .spawn() + .await + .context("failed to spawn sequencer")?; + let block_producer = sequencer.api(); + + let rpc = Rpc { + listener: bind_rpc(runtime.rpc_listen).await?, + store: state, + mode: RpcMode::sequencer(block_producer, self.external_services.validator_client()), + ntx_builder: Some(self.external_services.ntx_builder_client()), + grpc_options: runtime.external_grpc_options, + network_tx_auth, + }; + let rpc_task = tokio::spawn(rpc.serve()); + + tokio::select! { + Some(err) = termination_signal.recv() => { + Err(anyhow::anyhow!("received termination signal").context(err)) + }, + result = sequencer.wait() => { + result.context("sequencer task stopped") + }, + result = rpc_task => { + task_result("RPC server", result) + }, + } } } @@ -95,22 +128,39 @@ pub struct FullNodeCommand { } impl FullNodeCommand { - pub fn handle(self) -> anyhow::Result<()> { + pub async fn handle(self) -> anyhow::Result<()> { let runtime = self.runtime.runtime_config(&self.store); let source_rpc = self.sync.source_rpc_client(); - let _ = ( - runtime.rpc_listen, - runtime.data_directory, - runtime.database_options, - runtime.external_grpc_options, - runtime.storage_options, - source_rpc, - ); - - anyhow::bail!( - "full node mode block-stream sync is not implemented yet; this stage only defines the \ - CLI" - ) + let network_tx_auth = self.runtime.rpc.network_tx_auth()?; + let (state, mut termination_signal) = load_state(&runtime).await?; + let _disk_monitor = state.spawn_disk_monitor(); + + let sync_task = RpcSync { + state: Arc::clone(&state), + source_rpc: source_rpc.clone(), + } + .spawn(); + let rpc = Rpc { + listener: bind_rpc(runtime.rpc_listen).await?, + store: state, + mode: RpcMode::full_node(source_rpc), + ntx_builder: None, + grpc_options: runtime.external_grpc_options, + network_tx_auth, + }; + let rpc_task = tokio::spawn(rpc.serve()); + + tokio::select! { + Some(err) = termination_signal.recv() => { + Err(anyhow::anyhow!("received termination signal").context(err)) + }, + result = sync_task => { + task_result("RPC sync", result) + }, + result = rpc_task => { + task_result("RPC server", result) + }, + } } } @@ -125,3 +175,50 @@ impl SyncOptions { .connect_lazy::() } } + +impl super::rpc::RpcOptions { + fn network_tx_auth(&self) -> anyhow::Result> { + self.network_tx_auth_header_value + .as_deref() + .map(|value| { + value + .parse::() + .map(NetworkTxAuth) + .context("invalid rpc.network-tx-auth-header-value") + }) + .transpose() + } +} + +async fn load_state( + runtime: &RuntimeConfig, +) -> anyhow::Result<(Arc, tokio::sync::mpsc::Receiver)> { + let (termination_ask, termination_signal) = tokio::sync::mpsc::channel::(1); + let state = State::load_with_database_options( + &runtime.data_directory, + runtime.storage_options.clone(), + runtime.database_options, + termination_ask, + ) + .await + .context("failed to load state")?; + + Ok((Arc::new(state), termination_signal)) +} + +async fn bind_rpc(listen: std::net::SocketAddr) -> anyhow::Result { + TcpListener::bind(listen) + .await + .with_context(|| format!("failed to bind RPC listener to {listen}")) +} + +fn task_result( + task: &'static str, + result: Result, JoinError>, +) -> anyhow::Result<()> { + match result { + Ok(Ok(())) => Err(anyhow::anyhow!("{task} exited unexpectedly")), + Ok(Err(err)) => Err(err).with_context(|| format!("{task} fatal error")), + Err(err) => Err(err).with_context(|| format!("{task} panicked")), + } +} diff --git a/bin/stress-test/src/seeding/mod.rs b/bin/stress-test/src/seeding/mod.rs index 099001a83..1c0fe2e4a 100644 --- a/bin/stress-test/src/seeding/mod.rs +++ b/bin/stress-test/src/seeding/mod.rs @@ -5,8 +5,7 @@ use std::time::Instant; use metrics::SeedingMetrics; use miden_node_proto::domain::batch::BatchInputs; -use miden_node_store::state::State; -use miden_node_store::{DataDirectory, GenesisState, Store}; +use miden_node_store::{DataDirectory, GenesisState, State}; use miden_node_utils::clap::StorageOptions; use miden_protocol::account::auth::AuthScheme; use miden_protocol::account::delta::AccountUpdateDetails; @@ -122,7 +121,7 @@ pub async fn seed_store( .clone() .into_block(&signer) .expect("genesis block should be created"); - Store::bootstrap(genesis_block, &data_directory).expect("store should bootstrap"); + State::bootstrap(genesis_block, &data_directory).expect("store should bootstrap"); let store_state = load_state(data_directory.clone()).await; @@ -839,7 +838,7 @@ pub async fn start_store(data_directory: PathBuf) -> Arc { async fn load_state(data_directory: PathBuf) -> Arc { let (termination_ask, _termination_signal) = tokio::sync::mpsc::channel(1); - let (state, _) = State::load(&data_directory, StorageOptions::bench(), termination_ask) + let state = State::load(&data_directory, StorageOptions::bench(), termination_ask) .await .expect("store state should load"); Arc::new(state) diff --git a/crates/block-producer/Cargo.toml b/crates/block-producer/Cargo.toml index b0d1f2a5b..cdc127ed9 100644 --- a/crates/block-producer/Cargo.toml +++ b/crates/block-producer/Cargo.toml @@ -25,6 +25,7 @@ tracing-forest = ["miden-node-utils/tracing-forest"] anyhow = { workspace = true } futures = { workspace = true } itertools = { workspace = true } +miden-block-prover = { workspace = true } miden-node-proto = { workspace = true } miden-node-store = { workspace = true } miden-node-utils = { features = ["testing"], workspace = true } @@ -34,6 +35,7 @@ miden-tx-batch-prover = { workspace = true } rand = { workspace = true } thiserror = { workspace = true } tokio = { features = ["macros", "net", "rt-multi-thread"], workspace = true } +tokio-stream = { workspace = true } tonic = { default-features = true, features = ["transport"], workspace = true } tracing = { workspace = true } url = { workspace = true } diff --git a/crates/store/src/server/block_prover_client.rs b/crates/block-producer/src/block_prover.rs similarity index 82% rename from crates/store/src/server/block_prover_client.rs rename to crates/block-producer/src/block_prover.rs index f34bba685..005457bf2 100644 --- a/crates/store/src/server/block_prover_client.rs +++ b/crates/block-producer/src/block_prover.rs @@ -1,4 +1,4 @@ -use miden_block_prover::{BlockProverError, LocalBlockProver}; +use miden_block_prover::{BlockProverError as LocalBlockProverError, LocalBlockProver}; use miden_node_utils::spawn::spawn_blocking_in_current_span; use miden_protocol::batch::OrderedBatches; use miden_protocol::block::{BlockHeader, BlockInputs, BlockProof}; @@ -8,9 +8,9 @@ use tracing::instrument; use crate::COMPONENT; #[derive(Debug, thiserror::Error)] -pub enum StoreProverError { +pub enum ProverError { #[error("local proving failed")] - LocalProvingFailed(#[source] BlockProverError), + LocalProvingFailed(#[source] LocalBlockProverError), #[error("remote proving failed")] RemoteProvingFailed(#[source] RemoteProverClientError), #[error("local proving task join error")] @@ -44,7 +44,7 @@ impl BlockProver { tx_batches: OrderedBatches, block_inputs: BlockInputs, block_header: &BlockHeader, - ) -> Result { + ) -> Result { match self { Self::Local(prover) => { let prover = prover.clone(); @@ -53,15 +53,15 @@ impl BlockProver { spawn_blocking_in_current_span(move || { prover .prove(tx_batches, &block_header, block_inputs) - .map_err(StoreProverError::LocalProvingFailed) + .map_err(ProverError::LocalProvingFailed) }) .await - .map_err(StoreProverError::LocalProvingTaskJoin)? + .map_err(ProverError::LocalProvingTaskJoin)? }, Self::Remote(prover) => Ok(prover .prove(tx_batches, block_header, block_inputs) .await - .map_err(StoreProverError::RemoteProvingFailed)?), + .map_err(ProverError::RemoteProvingFailed)?), } } } diff --git a/crates/block-producer/src/errors.rs b/crates/block-producer/src/errors.rs index 651093a6e..596f4329f 100644 --- a/crates/block-producer/src/errors.rs +++ b/crates/block-producer/src/errors.rs @@ -10,6 +10,7 @@ use miden_node_store::{ use miden_protocol::Word; use miden_protocol::account::AccountId; use miden_protocol::block::BlockNumber; +use miden_protocol::crypto::utils::DeserializationError; use miden_protocol::errors::{ProposedBatchError, ProposedBlockError, ProvenBatchError}; use miden_protocol::note::Nullifier; use miden_remote_prover_client::RemoteProverClientError; @@ -40,6 +41,19 @@ pub enum BlockProducerError { }, } +// Proof scheduler errors +// ================================================================================================= + +#[derive(Debug, Error)] +pub enum ProofSchedulerError { + #[error("no proving inputs found for block {0}")] + MissingProvingInputs(BlockNumber), + #[error("failed to deserialize proving inputs for block")] + DeserializationFailed(#[source] DeserializationError), + #[error("invalid remote prover endpoint: {0}")] + InvalidProverEndpoint(String), +} + // Add transaction and add user batch errors // ================================================================================================= diff --git a/crates/block-producer/src/lib.rs b/crates/block-producer/src/lib.rs index a8ced3923..35691a6e6 100644 --- a/crates/block-producer/src/lib.rs +++ b/crates/block-producer/src/lib.rs @@ -7,8 +7,11 @@ pub mod test_utils; mod batch_builder; mod block_builder; +mod block_prover; mod domain; mod mempool; +mod proof_scheduler; +mod rpc_sync; pub mod store; mod validator; @@ -19,13 +22,15 @@ mod errors; pub mod server; pub use errors::MempoolSubmissionError; +pub use proof_scheduler::DEFAULT_MAX_CONCURRENT_PROOFS; +pub use rpc_sync::RpcSync; pub use server::{ - BlockProducer, BlockProducerApi, BlockProducerApiConfig, - BlockProducerRuntime, BlockProducerStatus, MempoolStats, + Sequencer, + SequencerHandle, }; // CONSTANTS diff --git a/crates/store/src/server/proof_scheduler.rs b/crates/block-producer/src/proof_scheduler.rs similarity index 76% rename from crates/store/src/server/proof_scheduler.rs rename to crates/block-producer/src/proof_scheduler.rs index 6b6fc9668..32ef7411b 100644 --- a/crates/store/src/server/proof_scheduler.rs +++ b/crates/block-producer/src/proof_scheduler.rs @@ -1,6 +1,6 @@ //! Background task that drives deferred block proving. //! -//! The [`proof_scheduler`] is spawned as an internal Store task. It: +//! The scheduler: //! //! 1. Tracks `chain_tip` via a [`watch::Receiver`]. //! 2. Maintains up to `max_concurrent_proofs` in-flight proving jobs via a [`JoinSet`]. @@ -17,22 +17,19 @@ use std::sync::Arc; use std::time::Duration; use anyhow::Context; -use miden_crypto::utils::Serializable; use miden_node_proto::BlockProofRequest; +use miden_node_store::state::{Finality, State}; use miden_protocol::block::{BlockNumber, BlockProof}; -use miden_protocol::utils::serde::Deserializable; +use miden_protocol::utils::serde::{Deserializable, Serializable}; use miden_remote_prover_client::RemoteProverClientError; use thiserror::Error; use tokio::sync::watch; -use tokio::task::{JoinHandle, JoinSet}; +use tokio::task::JoinSet; use tracing::{Instrument, info, instrument}; use crate::COMPONENT; -use crate::blocks::BlockStore; +use crate::block_prover::{BlockProver, ProverError}; use crate::errors::ProofSchedulerError; -use crate::proven_tip::ProvenTipWriter; -use crate::server::block_prover_client::{BlockProver, StoreProverError}; -use crate::state::{ProofCache, ProofNotification}; // CONSTANTS // ================================================================================================ @@ -65,14 +62,13 @@ impl ProofTaskJoinSet { /// Spawns a new task to prove a block. fn spawn( &mut self, - block_store: &Arc, + state: &Arc, block_prover: &Arc, block_num: BlockNumber, ) { - let block_store = Arc::clone(block_store); + let state = Arc::clone(state); let block_prover = Arc::clone(block_prover); - self.0 - .spawn(async move { prove_block(&block_store, &block_prover, block_num).await }); + self.0.spawn(async move { prove_block(&state, &block_prover, block_num).await }); } /// Returns the result of the next completed task, or pends forever if the set is empty. @@ -93,33 +89,6 @@ impl ProofTaskJoinSet { // PROOF SCHEDULER // ================================================================================================ -/// Spawns the proof scheduler as a background tokio task. -/// -/// The scheduler uses `chain_tip_rx` to learn about newly committed blocks and checks the -/// block store for proving inputs files to determine which blocks need proving. After each proof -/// is saved, the result is pushed into `proof_cache` and the proven tip watch and file are -/// updated so replica subscribers are notified. -/// -/// Returns a [`JoinHandle`] that resolves when the scheduler encounters a fatal error or -/// completes unexpectedly. -pub fn spawn( - block_prover: Arc, - block_store: Arc, - chain_tip_rx: watch::Receiver, - proven_tip: ProvenTipWriter, - max_concurrent_proofs: NonZeroUsize, - proof_cache: ProofCache, -) -> JoinHandle> { - tokio::spawn(run( - block_prover, - block_store, - chain_tip_rx, - proven_tip, - max_concurrent_proofs, - proof_cache, - )) -} - /// Main loop of the proof scheduler. /// /// Maintains a pool of concurrent proving jobs via [`JoinSet`], fills them up to @@ -130,13 +99,11 @@ pub fn spawn( /// /// Returns `Err` on irrecoverable errors (missing proving inputs, I/O failures). /// Transient errors are retried internally. -async fn run( +pub(crate) async fn run( block_prover: Arc, - block_store: Arc, + state: Arc, mut chain_tip_rx: watch::Receiver, - proven_tip: ProvenTipWriter, max_concurrent_proofs: NonZeroUsize, - proof_cache: ProofCache, ) -> anyhow::Result<()> { info!(target: COMPONENT, "Proof scheduler started"); @@ -145,7 +112,7 @@ async fn run( // Next block number to schedule. Initialized from the proven tip's child so we skip // already-proven blocks on restart. - let mut next_to_prove = proven_tip.read().child(); + let mut next_to_prove = state.chain_tip(Finality::Proven).await.child(); // Completed proofs waiting to be committed in order. let mut pending: BTreeMap> = BTreeMap::new(); @@ -154,28 +121,26 @@ async fn run( // Schedule blocks up to chain_tip that haven't been scheduled yet. let chain_tip = *chain_tip_rx.borrow(); while proving_tasks.len() < max_concurrent_proofs.get() && next_to_prove <= chain_tip { - proving_tasks.spawn(&block_store, &block_prover, next_to_prove); + proving_tasks.spawn(&state, &block_prover, next_to_prove); next_to_prove = next_to_prove.child(); } // Wait for either a job to complete or the chain tip to advance. tokio::select! { - // Proving a block has completed — cache and commit the proof. + // Proving a block has completed - cache and commit the proof. proving_result = proving_tasks.join_next() => { let (block_num, proof_bytes) = proving_result?; pending.insert(block_num, proof_bytes); // Drain completed proofs in ascending order so the proven tip advances without // gaps. - let mut next = proven_tip.read().child(); + let mut next = state.chain_tip(Finality::Proven).await.child(); while let Some(proof_bytes) = pending.remove(&next) { - block_store.commit_proof(next, &proof_bytes).await?; - proof_cache.push(next, ProofNotification::new(next, proof_bytes)); - proven_tip.advance(next); + state.apply_proof(next, proof_bytes).await?; next = next.child(); } }, - // New chain tip received — re-enter the scheduling loop on next iteration. + // New chain tip received - re-enter the scheduling loop on next iteration. result = chain_tip_rx.changed() => { if result.is_err() { info!(target: COMPONENT, "Chain tip channel closed, proof scheduler exiting"); @@ -193,7 +158,7 @@ async fn run( #[instrument(target = COMPONENT, name = "prove_block", skip_all, fields(block.number=block_num.as_u32()), err)] async fn prove_block( - block_store: &BlockStore, + state: &State, block_prover: &BlockProver, block_num: BlockNumber, ) -> anyhow::Result<(BlockNumber, Vec)> { @@ -211,7 +176,7 @@ async fn prove_block( let result = tokio::time::timeout( BLOCK_PROVE_ATTEMPT_TIMEOUT, - generate_block_proof(block_store, block_prover, block_num), + generate_block_proof(state, block_prover, block_num), ) .instrument(attempt_span.clone()) .await; @@ -241,11 +206,11 @@ async fn prove_block( /// Generates a block proof by loading inputs from the block store and invoking the block prover. #[instrument(target = COMPONENT, name = "prove_block.generate", skip_all, fields(block.number=block_num.as_u32()), err)] async fn generate_block_proof( - block_store: &BlockStore, + state: &State, block_prover: &BlockProver, block_num: BlockNumber, ) -> Result { - let bytes = block_store + let bytes = state .load_proving_inputs(block_num) .await .map_err(|e| ProveBlockError::Transient(e.into()))? @@ -279,11 +244,11 @@ enum ProveBlockError { } impl ProveBlockError { - fn from_prover_error(err: StoreProverError) -> Self { + fn from_prover_error(err: ProverError) -> Self { match err { - StoreProverError::RemoteProvingFailed(RemoteProverClientError::InvalidEndpoint( - uri, - )) => Self::Fatal(ProofSchedulerError::InvalidProverEndpoint(uri)), + ProverError::RemoteProvingFailed(RemoteProverClientError::InvalidEndpoint(uri)) => { + Self::Fatal(ProofSchedulerError::InvalidProverEndpoint(uri)) + }, _ => Self::Transient(err.into()), } } diff --git a/crates/block-producer/src/rpc_sync.rs b/crates/block-producer/src/rpc_sync.rs new file mode 100644 index 000000000..757fefcd2 --- /dev/null +++ b/crates/block-producer/src/rpc_sync.rs @@ -0,0 +1,142 @@ +use std::sync::Arc; +use std::time::Duration; + +use anyhow::Context; +use miden_node_proto::clients::RpcClient; +use miden_node_proto::generated::rpc::{BlockSubscriptionRequest, ProofSubscriptionRequest}; +use miden_node_store::state::{Finality, State}; +use miden_protocol::block::{BlockNumber, SignedBlock}; +use miden_protocol::utils::serde::Deserializable; +use tokio_stream::StreamExt; +use tracing::{info, warn}; + +pub(crate) const RECONNECT_DELAY: Duration = Duration::from_secs(5); + +// RPC SYNC +// ================================================================================================ + +/// Synchronizes local state from an upstream RPC service. +pub struct RpcSync { + pub state: Arc, + pub source_rpc: RpcClient, +} + +impl RpcSync { + /// Spawns the block and proof synchronization loops as a supervised Tokio task. + pub fn spawn(self) -> tokio::task::JoinHandle> { + tokio::spawn(async move { + let block_sync = BlockSync { + state: Arc::clone(&self.state), + source_rpc: self.source_rpc.clone(), + }; + let proof_sync = ProofSync { + state: self.state, + source_rpc: self.source_rpc, + }; + + let block_handle = block_sync.spawn(); + let proof_handle = proof_sync.spawn(); + + tokio::select! { + result = block_handle => result?, + result = proof_handle => result?, + } + }) + } +} + +// SYNC LOOP +// ================================================================================================ + +struct BlockSync { + state: Arc, + source_rpc: RpcClient, +} + +struct ProofSync { + state: Arc, + source_rpc: RpcClient, +} + +impl BlockSync { + fn spawn(self) -> tokio::task::JoinHandle> { + tokio::spawn(self.run()) + } + + async fn run(self) -> anyhow::Result<()> { + loop { + let err = self + .sync() + .await + .and_then(|_| Err::<(), _>(anyhow::anyhow!("unexpected end of stream"))) + .unwrap_err(); + warn!( + err = %format!("{err:#}"), + retry.delay = %RECONNECT_DELAY.as_secs(), + "Block sync failed, retrying", + ); + tokio::time::sleep(RECONNECT_DELAY).await; + } + } + + async fn sync(&self) -> anyhow::Result<()> { + let block_from = self.state.chain_tip(Finality::Committed).await.child().as_u32(); + info!(block_from, "Connecting to upstream RPC for blocks"); + + let mut client = self.source_rpc.clone(); + let mut stream = client + .block_subscription(BlockSubscriptionRequest { block_from }) + .await? + .into_inner(); + + while let Some(result) = stream.next().await { + let event = result?; + let block = SignedBlock::read_from_bytes(&event.block) + .context("failed to deserialize block from upstream")?; + self.state.apply_block(block).await?; + } + + Ok(()) + } +} + +impl ProofSync { + fn spawn(self) -> tokio::task::JoinHandle> { + tokio::spawn(self.run()) + } + + async fn run(self) -> anyhow::Result<()> { + loop { + let err = self + .sync() + .await + .and_then(|_| Err::<(), _>(anyhow::anyhow!("unexpected end of stream"))) + .unwrap_err(); + warn!( + err = %format!("{err:#}"), + retry.delay = %RECONNECT_DELAY.as_secs(), + "Proof sync failed, retrying", + ); + tokio::time::sleep(RECONNECT_DELAY).await; + } + } + + async fn sync(&self) -> anyhow::Result<()> { + let block_from = self.state.chain_tip(Finality::Proven).await.as_u32().saturating_add(1); + info!(block_from, "Connecting to upstream RPC for proofs"); + + let mut client = self.source_rpc.clone(); + let mut stream = client + .proof_subscription(ProofSubscriptionRequest { block_from }) + .await? + .into_inner(); + + while let Some(result) = stream.next().await { + let event = result?; + let block_num = BlockNumber::from(event.block_num); + self.state.apply_proof(block_num, event.proof).await?; + } + + Ok(()) + } +} diff --git a/crates/block-producer/src/server/mod.rs b/crates/block-producer/src/server/mod.rs index 60f172b54..fa723143a 100644 --- a/crates/block-producer/src/server/mod.rs +++ b/crates/block-producer/src/server/mod.rs @@ -10,17 +10,23 @@ use miden_protocol::batch::ProposedBatch; use miden_protocol::block::BlockNumber; use miden_protocol::transaction::ProvenTransaction; use tokio::sync::{Mutex, RwLock}; -use tokio::task::{Id, JoinSet}; +use tokio::task::{Id, JoinHandle, JoinSet}; use tracing::{debug, info, instrument}; use url::Url; use crate::batch_builder::BatchBuilder; use crate::block_builder::BlockBuilder; +use crate::block_prover::BlockProver; use crate::domain::transaction::AuthenticatedTransaction; use crate::errors::{BlockProducerError, MempoolSubmissionError}; use crate::mempool::{BatchBudget, BlockBudget, Mempool, MempoolConfig, SharedMempool}; use crate::validator::BlockProducerValidatorClient; -use crate::{CACHED_MEMPOOL_STATS_UPDATE_INTERVAL, COMPONENT, SERVER_NUM_BATCH_BUILDERS}; +use crate::{ + CACHED_MEMPOOL_STATS_UPDATE_INTERVAL, + COMPONENT, + SERVER_NUM_BATCH_BUILDERS, + proof_scheduler, +}; #[cfg(test)] mod tests; @@ -60,16 +66,18 @@ impl BlockProducerApiConfig { } } -/// The block producer runtime. +/// The sequencer runtime configuration. /// /// Specifies how to connect to the batch prover and block prover components. -pub struct BlockProducer { +pub struct Sequencer { /// The store state shared with the block producer. pub store: Arc, /// The address of the validator component. pub validator_url: Url, /// The address of the batch prover component. pub batch_prover_url: Option, + /// The address of the block prover component. + pub block_prover_url: Option, /// The interval at which to produce batches. pub batch_interval: Duration, /// The interval at which to produce blocks. @@ -78,6 +86,8 @@ pub struct BlockProducer { pub max_txs_per_batch: usize, /// The maximum number of batches per block. pub max_batches_per_block: usize, + /// The maximum number of concurrent block proofs to schedule. + pub max_concurrent_proofs: NonZeroUsize, /// The maximum number of inflight transactions allowed in the mempool at once. pub mempool_tx_capacity: NonZeroUsize, @@ -86,18 +96,15 @@ pub struct BlockProducer { // BLOCK PRODUCER // ================================================================================================ -impl BlockProducer { - /// Starts the block producer and returns its in-process API. - /// - /// The returned handle owns the batch and block builder tasks. Dropping the handle stops those - /// tasks. - pub async fn start(self) -> Result { - info!(target: COMPONENT, "Initializing block producer"); +impl Sequencer { + /// Spawns the sequencer tasks and returns its in-process API. + pub async fn spawn(self) -> Result { + info!(target: COMPONENT, "Initializing sequencer"); let store = self.store; let validator = BlockProducerValidatorClient::new(self.validator_url.clone()); let chain_tip = store.chain_tip(Finality::Committed).await; - info!(target: COMPONENT, "Block producer initialized"); + info!(target: COMPONENT, "Sequencer initialized"); let block_builder = BlockBuilder::new(Arc::clone(&store), validator, self.block_interval); let batch_builder = BatchBuilder::new( @@ -113,8 +120,15 @@ impl BlockProducer { }; let mempool = Mempool::shared(chain_tip, api_config.mempool_config()); let api = BlockProducerApi::from_shared_mempool(mempool.clone(), store); + let block_prover = if let Some(url) = self.block_prover_url { + Arc::new(BlockProver::remote(url)) + } else { + Arc::new(BlockProver::local()) + }; + let chain_tip_rx = api.store.subscribe_committed_tip(); - // Spawn batch and block builders. These communicate indirectly via a shared mempool. + // Spawn batch builder, block builder, and proof scheduler. The builders communicate + // indirectly via a shared mempool. // // These should run forever, so we combine them into a joinset so that if // any complete or fail, we can shutdown the rest (somewhat) gracefully. @@ -132,65 +146,83 @@ impl BlockProducer { async { block_builder.run(mempool).await } }) .id(); + let proof_scheduler_id = tasks + .spawn({ + let store = Arc::clone(&api.store); + async move { + proof_scheduler::run( + block_prover, + store, + chain_tip_rx, + self.max_concurrent_proofs, + ) + .await + } + }) + .id(); let task_ids = HashMap::from([ (batch_builder_id, "batch-builder"), (block_builder_id, "block-builder"), + (proof_scheduler_id, "proof-scheduler"), ]); + let task = tokio::spawn(wait_for_tasks(tasks, task_ids)); - Ok(BlockProducerRuntime { api, tasks, task_ids }) + Ok(SequencerHandle { api, task }) } - /// Serves the block producer's batch-builder and block-builder tasks. + /// Serves the sequencer tasks. /// /// Executes in place (i.e. not spawned) and will run indefinitely until a fatal error is /// encountered. pub async fn serve(self) -> anyhow::Result<()> { - self.start().await?.wait().await + self.spawn().await?.wait().await } } -/// Running block producer tasks plus the API used to submit work to them. -pub struct BlockProducerRuntime { +/// Running sequencer tasks plus the API used to submit work to them. +pub struct SequencerHandle { api: BlockProducerApi, - tasks: JoinSet>, - task_ids: HashMap, + task: JoinHandle>, } -impl BlockProducerRuntime { +impl SequencerHandle { /// Returns a cloneable handle to the block producer API. pub fn api(&self) -> BlockProducerApi { self.api.clone() } - /// Waits for the block producer runtime to end. - /// - /// The batch and block builder tasks should run indefinitely, so this returns an error when any - /// task completes. - pub async fn wait(mut self) -> anyhow::Result<()> { - // Wait for any task to end. They should run indefinitely, so this is an unexpected result. - // - // SAFETY: The JoinSet is definitely not empty. - let task_result = self.tasks.join_next_with_id().await.unwrap(); - - let task_id = match &task_result { - Ok((id, _)) => *id, - Err(err) => err.id(), - }; - let task = self.task_ids.get(&task_id).copied().unwrap_or("unknown"); - - // We could abort the other tasks here, but not much point as we're probably crashing the - // node. - task_result - .map_err(|source| BlockProducerError::JoinError { task, source }) - .map(|(_, result)| match result { - Ok(_) => Err(BlockProducerError::UnexpectedTaskCompletion { task }), - Err(source) => Err(BlockProducerError::TaskError { task, source }), - }) - .and_then(|x| x)? + /// Waits for the sequencer tasks to end. + pub async fn wait(self) -> anyhow::Result<()> { + self.task.await? } } +async fn wait_for_tasks( + mut tasks: JoinSet>, + task_ids: HashMap, +) -> anyhow::Result<()> { + // Wait for any task to end. They should run indefinitely, so this is an unexpected result. + // + // SAFETY: The JoinSet is definitely not empty. + let task_result = tasks.join_next_with_id().await.unwrap(); + + let task_id = match &task_result { + Ok((id, _)) => *id, + Err(err) => err.id(), + }; + let task = task_ids.get(&task_id).copied().unwrap_or("unknown"); + + // We could abort the other tasks here, but not much point as we're probably crashing the node. + task_result + .map_err(|source| BlockProducerError::JoinError { task, source }) + .map(|(_, result)| match result { + Ok(_) => Err(BlockProducerError::UnexpectedTaskCompletion { task }), + Err(source) => Err(BlockProducerError::TaskError { task, source }), + }) + .and_then(|x| x)? +} + // BLOCK PRODUCER API // ================================================================================================ diff --git a/crates/block-producer/src/server/tests.rs b/crates/block-producer/src/server/tests.rs index 1f315f7eb..6fb9b0c82 100644 --- a/crates/block-producer/src/server/tests.rs +++ b/crates/block-producer/src/server/tests.rs @@ -2,15 +2,20 @@ use std::num::NonZeroUsize; use std::sync::Arc; use std::time::Duration; +use miden_node_store::GenesisState; use miden_node_store::state::State; -use miden_node_store::{GenesisState, Store}; use miden_node_utils::clap::StorageOptions; use miden_node_utils::fee::test_fee_params; use miden_protocol::block::BlockNumber; use miden_protocol::testing::random_secret_key::random_secret_key; use url::Url; -use crate::{BlockProducer, DEFAULT_MAX_BATCHES_PER_BLOCK, DEFAULT_MAX_TXS_PER_BATCH}; +use crate::{ + DEFAULT_MAX_BATCHES_PER_BLOCK, + DEFAULT_MAX_CONCURRENT_PROOFS, + DEFAULT_MAX_TXS_PER_BATCH, + Sequencer, +}; #[tokio::test] async fn block_producer_starts_with_store_state() { @@ -18,17 +23,19 @@ async fn block_producer_starts_with_store_state() { bootstrap_store(data_directory.path()); let store = load_state(data_directory.path()).await; - let block_producer = BlockProducer { + let block_producer = Sequencer { store, validator_url: Url::parse("http://127.0.0.1:0").unwrap(), batch_prover_url: None, + block_prover_url: None, batch_interval: Duration::from_secs(3600), block_interval: Duration::from_secs(3600), max_txs_per_batch: DEFAULT_MAX_TXS_PER_BATCH, max_batches_per_block: DEFAULT_MAX_BATCHES_PER_BLOCK, + max_concurrent_proofs: DEFAULT_MAX_CONCURRENT_PROOFS, mempool_tx_capacity: NonZeroUsize::new(100).unwrap(), } - .start() + .spawn() .await .unwrap(); @@ -42,12 +49,12 @@ fn bootstrap_store(path: &std::path::Path) { let genesis_state = GenesisState::new(vec![], test_fee_params(), 1, 1, signer.public_key()); let genesis_block = genesis_state.into_block(&signer).expect("genesis block should be created"); - Store::bootstrap(genesis_block, path).expect("store should bootstrap"); + State::bootstrap(genesis_block, path).expect("store should bootstrap"); } async fn load_state(path: &std::path::Path) -> Arc { let (termination_ask, _termination_signal) = tokio::sync::mpsc::channel(1); - let (state, _) = State::load(path, StorageOptions::default(), termination_ask) + let state = State::load(path, StorageOptions::default(), termination_ask) .await .expect("state should load"); Arc::new(state) diff --git a/crates/rpc/src/lib.rs b/crates/rpc/src/lib.rs index 4254d631e..f47b64583 100644 --- a/crates/rpc/src/lib.rs +++ b/crates/rpc/src/lib.rs @@ -2,7 +2,7 @@ mod server; #[cfg(test)] mod tests; -pub use server::{Rpc, RpcMode}; +pub use server::{NetworkTxAuth, Rpc, RpcMode}; // CONSTANTS // ================================================================================================= diff --git a/crates/rpc/src/tests.rs b/crates/rpc/src/tests.rs index 3ec10eca2..91c90e0a4 100644 --- a/crates/rpc/src/tests.rs +++ b/crates/rpc/src/tests.rs @@ -10,7 +10,6 @@ use miden_node_proto::clients::{Builder, GrpcClient, Interceptor, RpcClient, Val use miden_node_proto::generated::rpc::api_client::ApiClient as ProtoClient; use miden_node_proto::generated::rpc::api_server::Api; use miden_node_proto::generated::{self as proto}; -use miden_node_store::Store; use miden_node_store::genesis::config::GenesisConfig; use miden_node_store::state::State; use miden_node_utils::clap::{GrpcOptionsExternal, StorageOptions}; @@ -84,7 +83,7 @@ impl TestStore { .expect("genesis block should be created"); let genesis_commitment = genesis_block.inner().header().commitment(); - Store::bootstrap(genesis_block, path).expect("store should bootstrap"); + State::bootstrap(genesis_block, path).expect("store should bootstrap"); genesis_commitment } @@ -92,7 +91,7 @@ impl TestStore { async fn load_state(path: &std::path::Path) -> Arc { let (termination_ask, _termination_signal) = tokio::sync::mpsc::channel(1); - let (state, _) = State::load(path, StorageOptions::default(), termination_ask) + let state = State::load(path, StorageOptions::default(), termination_ask) .await .expect("state should load"); Arc::new(state) diff --git a/crates/store/Cargo.toml b/crates/store/Cargo.toml index 4759aacc5..30111a1e3 100644 --- a/crates/store/Cargo.toml +++ b/crates/store/Cargo.toml @@ -19,7 +19,6 @@ doctest = false [dependencies] anyhow = { workspace = true } -async-trait = { workspace = true } deadpool = { features = ["managed", "rt_tokio_1"], workspace = true } deadpool-diesel = { features = ["sqlite"], workspace = true } diesel = { features = ["numeric", "sqlite"], workspace = true } @@ -27,13 +26,11 @@ fs-err = { workspace = true } hex = { workspace = true } indexmap = { workspace = true } libsqlite3-sys = { workspace = true } -miden-block-prover = { workspace = true } miden-crypto = { features = ["concurrent"], workspace = true } miden-large-smt-backend-rocksdb = { optional = true, workspace = true } miden-node-db = { workspace = true } miden-node-proto = { workspace = true } miden-node-utils = { workspace = true } -miden-remote-prover-client = { features = ["block-prover"], workspace = true } miden-standards = { workspace = true } # TODO remove `testing` from `miden-protocol`, required for `BlockProof::new_dummy` miden-protocol = { features = ["std", "testing"], workspace = true } @@ -45,9 +42,7 @@ thiserror = { workspace = true } tokio = { features = ["fs", "rt-multi-thread"], workspace = true } tokio-stream = { features = ["sync"], workspace = true } toml = { workspace = true } -tonic = { default-features = true, workspace = true } tracing = { workspace = true } -url = { workspace = true } [build-dependencies] build-rs = { workspace = true } diff --git a/crates/store/src/data_directory.rs b/crates/store/src/data_directory.rs new file mode 100644 index 000000000..44bf55798 --- /dev/null +++ b/crates/store/src/data_directory.rs @@ -0,0 +1,33 @@ +use std::ops::Not; +use std::path::PathBuf; + +/// Represents the store's data-directory and its content paths. +/// +/// Used to keep our filepath assumptions in one location. +#[derive(Clone)] +pub struct DataDirectory(PathBuf); + +impl DataDirectory { + /// Creates a new [`DataDirectory`], ensuring that the directory exists and is accessible + /// insofar as is possible. + pub fn load(path: PathBuf) -> std::io::Result { + let meta = fs_err::metadata(&path)?; + if meta.is_dir().not() { + return Err(std::io::ErrorKind::NotConnected.into()); + } + + Ok(Self(path)) + } + + pub fn block_store_dir(&self) -> PathBuf { + self.0.join("blocks") + } + + pub fn database_path(&self) -> PathBuf { + self.0.join("miden-store.sqlite3") + } + + pub fn display(&self) -> std::path::Display<'_> { + self.0.display() + } +} diff --git a/crates/store/src/db/mod.rs b/crates/store/src/db/mod.rs index e8f98bfc7..1f26d579e 100644 --- a/crates/store/src/db/mod.rs +++ b/crates/store/src/db/mod.rs @@ -65,6 +65,21 @@ pub(crate) mod schema; pub type Result = std::result::Result; +/// Database options used by the store state. +#[derive(Copy, Clone, Debug, PartialEq, Eq)] +pub struct DatabaseOptions { + /// Maximum number of SQLite connections in the connection pool. + pub connection_pool_size: NonZeroUsize, +} + +impl Default for DatabaseOptions { + fn default() -> Self { + Self { + connection_pool_size: miden_node_db::default_connection_pool_size(), + } + } +} + /// The Store's database. /// /// Extends the underlying [`miden_node_db::Db`] type with functionality specific to the Store. diff --git a/crates/store/src/errors.rs b/crates/store/src/errors.rs index 876d15c99..2de361521 100644 --- a/crates/store/src/errors.rs +++ b/crates/store/src/errors.rs @@ -27,19 +27,6 @@ use tokio::sync::oneshot::error::RecvError; use crate::account_state_forest::AccountStateForestError; use crate::db::models::conv::DatabaseTypeConversionError; -// PROOF SCHEDULER ERRORS -// ================================================================================================= - -#[derive(Debug, Error)] -pub enum ProofSchedulerError { - #[error("no proving inputs found for block {0}")] - MissingProvingInputs(BlockNumber), - #[error("failed to deserialize proving inputs for block")] - DeserializationFailed(#[source] DeserializationError), - #[error("invalid remote prover endpoint: {0}")] - InvalidProverEndpoint(String), -} - // DATABASE ERRORS // ================================================================================================= diff --git a/crates/store/src/lib.rs b/crates/store/src/lib.rs index 171731780..3b36b6858 100644 --- a/crates/store/src/lib.rs +++ b/crates/store/src/lib.rs @@ -1,20 +1,22 @@ mod account_state_forest; mod accounts; mod blocks; +mod data_directory; mod db; mod errors; pub mod genesis; mod proven_tip; -mod server; pub mod state; #[cfg(feature = "rocksdb")] pub use accounts::PersistentAccountTree; pub use accounts::{AccountTreeWithHistory, HistoricalError, InMemoryAccountTree}; +pub use data_directory::DataDirectory; pub use db::models::conv::SqlTypeConvert; pub use db::models::queries::StorageMapValuesPage; pub use db::{ AccountVaultValue, + DatabaseOptions, Db, NoteRecord, NoteSyncRecord, @@ -34,9 +36,7 @@ pub use errors::{ StateSyncError, }; pub use genesis::GenesisState; -pub use server::block_prover_client::BlockProver; -pub use server::proof_scheduler::DEFAULT_MAX_CONCURRENT_PROOFS; -pub use server::{DataDirectory, DatabaseOptions, Store, StoreMode}; +pub use state::State; /// Returns the store crate version. pub fn version() -> &'static str { diff --git a/crates/store/src/server/mod.rs b/crates/store/src/server/mod.rs deleted file mode 100644 index da140b24d..000000000 --- a/crates/store/src/server/mod.rs +++ /dev/null @@ -1,347 +0,0 @@ -use std::num::NonZeroUsize; -use std::ops::Not; -use std::path::{Path, PathBuf}; -use std::sync::Arc; -use std::time::Duration; - -use anyhow::Context; -use miden_node_utils::clap::StorageOptions; -use miden_node_utils::spawn::spawn_blocking_in_span; -use miden_node_utils::tracing::OpenTelemetrySpanExt; -use tracing::{info, info_span, instrument}; -use url::Url; - -use crate::blocks::BlockStore; -use crate::db::Db; -use crate::errors::ApplyBlockError; -use crate::genesis::GenesisBlock; -use crate::proven_tip::ProvenTipWriter; -use crate::server::replica_sync::{BlockReplicaSync, ProofReplicaSync}; -use crate::state::{ProofCache, State}; -use crate::{BlockProver, COMPONENT}; - -pub mod block_prover_client; -mod replica_sync; - -use replica_sync::ReplicaSync as _; -pub mod proof_scheduler; - -/// Determines how the store receives new blocks. -/// -/// The two modes are mutually exclusive: a store either acts as the primary writer for locally -/// produced blocks, or it syncs blocks from an upstream store instance. -pub enum StoreMode { - /// Store mode for a sequencing node that produces local blocks. - /// - /// Runs the proof scheduler to generate block proofs. - Sequencer { - /// URL of the remote block prover. Uses a local prover if `None`. - block_prover_url: Option, - /// Maximum number of blocks proven concurrently by the proof scheduler. - max_concurrent_proofs: NonZeroUsize, - }, - - /// Store mode for a full node that syncs from an upstream RPC service. - Full { upstream_url: Url }, -} - -/// Database options used by the store. -#[derive(Copy, Clone, Debug, PartialEq, Eq)] -pub struct DatabaseOptions { - /// Maximum number of SQLite connections in the connection pool. - pub connection_pool_size: NonZeroUsize, -} - -impl Default for DatabaseOptions { - fn default() -> Self { - Self { - connection_pool_size: miden_node_db::default_connection_pool_size(), - } - } -} - -struct ModeSetup { - /// Keeps the loaded state alive for background tasks that subscribe to its watch channels. - _state: Arc, - /// Mode-specific background task: proof scheduler or replica sync. - mode_task: tokio::task::JoinHandle>, -} - -/// The store server. -pub struct Store { - pub mode: StoreMode, - pub data_directory: PathBuf, - pub database_options: DatabaseOptions, - pub storage_options: StorageOptions, -} - -impl Store { - /// Bootstraps the Store, creating the database state and inserting the genesis block data. - #[instrument( - target = COMPONENT, - name = "store.bootstrap", - skip_all, - err, - )] - pub fn bootstrap(genesis: GenesisBlock, data_directory: &Path) -> anyhow::Result<()> { - let data_directory = - DataDirectory::load(data_directory.to_path_buf()).with_context(|| { - format!("failed to load data directory at {}", data_directory.display()) - })?; - tracing::info!(target=COMPONENT, path=%data_directory.display(), "Data directory loaded"); - - let block_store_path = data_directory.block_store_dir(); - let block_store = - BlockStore::bootstrap(block_store_path.clone(), &genesis).with_context(|| { - format!("failed to bootstrap block store at {}", block_store_path.display()) - })?; - tracing::info!(target=COMPONENT, path=%block_store.display(), "Block store created"); - - // Create the genesis block and insert it into the database. - let database_filepath = data_directory.database_path(); - Db::bootstrap(database_filepath.clone(), genesis).with_context(|| { - format!("failed to bootstrap database at {}", database_filepath.display()) - })?; - tracing::info!(target=COMPONENT, path=%database_filepath.display(), "Database created"); - - Ok(()) - } - - /// Serves the store APIs and background tasks. - /// - /// Note: this blocks until the server dies. - pub async fn serve(self) -> anyhow::Result<()> { - info!(target: COMPONENT, - data_directory = ?self.data_directory, - sqlite_connection_pool_size = %self.database_options.connection_pool_size, - "Loading database"); - - let (termination_ask, mut termination_signal) = - tokio::sync::mpsc::channel::(1); - let (state, tx_proven_tip) = State::load_with_database_options( - &self.data_directory, - self.storage_options, - self.database_options, - termination_ask, - ) - .await - .context("failed to load state")?; - let _disk_monitor_task = Self::spawn_disk_monitor(self.data_directory.clone()); - - let ModeSetup { _state, mode_task } = match self.mode { - StoreMode::Sequencer { block_prover_url, max_concurrent_proofs } => { - Self::setup_sequencer_mode( - state, - block_prover_url, - max_concurrent_proofs, - tx_proven_tip, - ) - }, - StoreMode::Full { upstream_url } => Self::setup_full_mode(state, upstream_url), - }; - - tokio::select! { - // Termination signal from apply_block. - Some(err) = termination_signal.recv() => { - Err(anyhow::anyhow!("received termination signal").context(err)) - }, - // Proof scheduler or replica task, depending on mode the store is running. - result = mode_task => { - match result { - Ok(Ok(())) => Err(anyhow::anyhow!("task exited unexpectedly")), - Ok(Err(err)) => Err(err.context("task fatal error")), - Err(join_err) => Err(join_err).context("task panicked"), - } - } - } - } - - fn setup_sequencer_mode( - state: State, - block_prover_url: Option, - max_concurrent_proofs: NonZeroUsize, - tx_proven_tip: ProvenTipWriter, - ) -> ModeSetup { - info!(target: COMPONENT, "Starting in sequencer mode"); - - let state = Arc::new(state); - let proof_cache = state.proof_cache.clone(); - let proof_scheduler_task = Self::spawn_proof_scheduler( - &state, - block_prover_url, - max_concurrent_proofs, - tx_proven_tip, - proof_cache, - ); - - ModeSetup { - _state: state, - mode_task: proof_scheduler_task, - } - } - - fn setup_full_mode(state: State, upstream_url: Url) -> ModeSetup { - info!(target: COMPONENT, %upstream_url, "Starting in full mode"); - - let state = Arc::new(state); - let block_handle = BlockReplicaSync::new(Arc::clone(&state), upstream_url.clone()).spawn(); - let proof_handle = ProofReplicaSync::new(Arc::clone(&state), upstream_url).spawn(); - let replica_task = tokio::spawn(async move { - tokio::select! { - result = block_handle => result?, - result = proof_handle => result?, - } - }); - - ModeSetup { _state: state, mode_task: replica_task } - } - - /// Initializes the block prover client and spawns the proof scheduler as a background task. - /// - /// Returns the scheduler task handle. - fn spawn_proof_scheduler( - state: &State, - block_prover_url: Option, - max_concurrent_proofs: NonZeroUsize, - proven_tip: ProvenTipWriter, - proof_cache: ProofCache, - ) -> tokio::task::JoinHandle> { - let block_prover = if let Some(url) = block_prover_url { - Arc::new(BlockProver::remote(url)) - } else { - Arc::new(BlockProver::local()) - }; - - let chain_tip_rx = state.subscribe_committed_tip(); - - proof_scheduler::spawn( - block_prover, - state.block_store(), - chain_tip_rx, - proven_tip, - max_concurrent_proofs, - proof_cache, - ) - } - - /// Spawns a background task that periodically records the on-disk size of every store data path - /// as `OTel` span attributes. - fn spawn_disk_monitor(data_directory: PathBuf) -> tokio::task::JoinHandle<()> { - tokio::spawn(async move { - let mut interval = tokio::time::interval(Duration::from_mins(5)); - loop { - interval.tick().await; - let dir = data_directory.clone(); - let span = info_span!(target: COMPONENT, "measure_disk_space_usage"); - let result = - spawn_blocking_in_span(move || measure_disk_usage_bytes(&dir), span.clone()) - .await; - match result { - Ok(usage) => { - span.set_attribute("db.sqlite.size", usage.sqlite_db); - span.set_attribute("db.sqlite.wal.size", usage.sqlite_wal); - span.set_attribute("db.block_store.size", usage.block_store); - #[cfg(feature = "rocksdb")] - { - span.set_attribute("db.account_tree.size", usage.account_tree); - span.set_attribute("db.nullifier_tree.size", usage.nullifier_tree); - span.set_attribute( - "db.account_state_forest.size", - usage.account_state_forest, - ); - } - }, - Err(err) => span.set_error(&err), - } - } - }) - } -} - -// DISK USAGE HELPERS -// ================================================================================================ - -/// Byte counts for each on-disk storage component. -struct DiskUsage { - sqlite_db: u64, - sqlite_wal: u64, - block_store: u64, - #[cfg(feature = "rocksdb")] - account_tree: u64, - #[cfg(feature = "rocksdb")] - nullifier_tree: u64, - #[cfg(feature = "rocksdb")] - account_state_forest: u64, -} - -/// Collects on-disk byte sizes for every store data path under `data_dir`. -fn measure_disk_usage_bytes(data_dir: &Path) -> DiskUsage { - DiskUsage { - sqlite_db: path_size_bytes(&data_dir.join("miden-store.sqlite3")), - sqlite_wal: path_size_bytes(&data_dir.join("miden-store.sqlite3-wal")), - block_store: dir_size_bytes(&data_dir.join("blocks")), - #[cfg(feature = "rocksdb")] - account_tree: dir_size_bytes(&data_dir.join("accounttree")), - #[cfg(feature = "rocksdb")] - nullifier_tree: dir_size_bytes(&data_dir.join("nullifiertree")), - #[cfg(feature = "rocksdb")] - account_state_forest: dir_size_bytes(&data_dir.join("accountstateforest")), - } -} - -/// Returns the byte length of the file at `path`, or `0` if it does not exist. -fn path_size_bytes(path: &Path) -> u64 { - fs_err::metadata(path).map(|m| m.len()).unwrap_or(0) -} - -/// Returns the total byte length of all files in `path` iteratively, or `0` on any error. -fn dir_size_bytes(path: &Path) -> u64 { - let mut to_process = vec![path.to_path_buf()]; - let mut total = 0u64; - while let Some(dir) = to_process.pop() { - let Ok(entries) = fs_err::read_dir(&dir) else { - continue; - }; - for entry in entries.flatten() { - if let Ok(meta) = entry.metadata() { - if meta.is_dir() { - to_process.push(entry.path()); - } else { - total += meta.len(); - } - } - } - } - total -} - -/// Represents the store's data-directory and its content paths. -/// -/// Used to keep our filepath assumptions in one location. -#[derive(Clone)] -pub struct DataDirectory(PathBuf); - -impl DataDirectory { - /// Creates a new [`DataDirectory`], ensuring that the directory exists and is accessible - /// insofar as is possible. - pub fn load(path: PathBuf) -> std::io::Result { - let meta = fs_err::metadata(&path)?; - if meta.is_dir().not() { - return Err(std::io::ErrorKind::NotConnected.into()); - } - - Ok(Self(path)) - } - - pub fn block_store_dir(&self) -> PathBuf { - self.0.join("blocks") - } - - pub fn database_path(&self) -> PathBuf { - self.0.join("miden-store.sqlite3") - } - - pub fn display(&self) -> std::path::Display<'_> { - self.0.display() - } -} diff --git a/crates/store/src/server/replica_sync.rs b/crates/store/src/server/replica_sync.rs deleted file mode 100644 index 53ebed574..000000000 --- a/crates/store/src/server/replica_sync.rs +++ /dev/null @@ -1,161 +0,0 @@ -use std::sync::Arc; -use std::time::Duration; - -use anyhow::Context; -use async_trait::async_trait; -use miden_crypto::utils::Deserializable; -use miden_node_proto::generated::rpc::{ - BlockSubscriptionRequest, - ProofSubscriptionRequest, - api_client, -}; -use miden_protocol::block::{BlockNumber, SignedBlock}; -use tokio_stream::StreamExt; -use tracing::{info, warn}; -use url::Url; - -use crate::state::{Finality, State}; - -pub(crate) const RECONNECT_DELAY: Duration = Duration::from_secs(5); - -type RpcClient = api_client::ApiClient; - -// REPLICA SYNC -// ================================================================================================ - -/// Shared reconnect-loop scaffolding for replica client types. -/// -/// Implementors provide [`SYNC_KIND`](ReplicaSync::SYNC_KIND), -/// [`upstream_url`](ReplicaSync::upstream_url), and [`subscribe`](ReplicaSync::subscribe). The -/// default [`sync`](ReplicaSync::sync) opens the upstream connection and passes the client to -/// `subscribe`; [`run`](ReplicaSync::run) and [`spawn`](ReplicaSync::spawn) wrap `sync` in an -/// infinite reconnect loop. -#[async_trait] -pub(crate) trait ReplicaSync: Sized + Send + Sync + 'static { - /// Short label used in log messages, e.g. `"Block"` or `"Proof"`. - const SYNC_KIND: &'static str; - - /// Returns the upstream RPC URL to connect to. - fn upstream_url(&self) -> &Url; - - /// Subscribes to the upstream stream via `client` and processes events until the stream ends or - /// an error occurs. - async fn subscribe(&self, client: RpcClient) -> anyhow::Result<()>; - - /// Opens a connection to [`upstream_url`](Self::upstream_url) and calls - /// [`subscribe`](Self::subscribe) with the resulting client. - async fn sync(&self) -> anyhow::Result<()> { - let channel = tonic::transport::Channel::from_shared(self.upstream_url().to_string())? - .connect() - .await?; - self.subscribe(RpcClient::new(channel)).await - } - - /// Runs [`sync`](Self::sync) in an infinite loop, sleeping [`RECONNECT_DELAY`] on failure. - async fn run(self) -> anyhow::Result<()> { - loop { - let err = self - .sync() - .await - .and_then(|_| Err::<(), _>(anyhow::anyhow!("unexpected end of stream"))) - .unwrap_err(); - warn!( - err = %format!("{err:#}"), - retry.delay = %RECONNECT_DELAY.as_secs(), - "{} sync failed, retrying", - Self::SYNC_KIND - ); - tokio::time::sleep(RECONNECT_DELAY).await; - } - } - - /// Spawns [`run`](Self::run) as a Tokio task. - fn spawn(self) -> tokio::task::JoinHandle> { - tokio::spawn(self.run()) - } -} - -// BLOCK REPLICA SYNC -// ================================================================================================ - -/// Subscribes to blocks from an upstream RPC service and applies them locally. -pub struct BlockReplicaSync { - state: Arc, - upstream_url: Url, -} - -impl BlockReplicaSync { - pub fn new(state: Arc, upstream_url: Url) -> Self { - Self { state, upstream_url } - } -} - -#[async_trait] -impl ReplicaSync for BlockReplicaSync { - const SYNC_KIND: &'static str = "Block"; - - fn upstream_url(&self) -> &Url { - &self.upstream_url - } - - async fn subscribe(&self, mut client: RpcClient) -> anyhow::Result<()> { - let block_from = self.state.chain_tip(Finality::Committed).await.child().as_u32(); - info!(block_from, upstream_url = %self.upstream_url, "Connecting to upstream RPC for blocks"); - - let mut stream = client - .block_subscription(BlockSubscriptionRequest { block_from }) - .await? - .into_inner(); - - while let Some(result) = stream.next().await { - let event = result?; - let block = SignedBlock::read_from_bytes(&event.block) - .context("failed to deserialize block from upstream")?; - self.state.apply_block(block).await?; - } - - Ok(()) - } -} - -// PROOF REPLICA SYNC -// ================================================================================================ - -/// Subscribes to proofs from an upstream RPC service and applies them locally. -pub struct ProofReplicaSync { - state: Arc, - upstream_url: Url, -} - -impl ProofReplicaSync { - pub fn new(state: Arc, upstream_url: Url) -> Self { - Self { state, upstream_url } - } -} - -#[async_trait] -impl ReplicaSync for ProofReplicaSync { - const SYNC_KIND: &'static str = "Proof"; - - fn upstream_url(&self) -> &Url { - &self.upstream_url - } - - async fn subscribe(&self, mut client: RpcClient) -> anyhow::Result<()> { - let block_from = self.state.chain_tip(Finality::Proven).await.as_u32().saturating_add(1); - info!(block_from, upstream_url = %self.upstream_url, "Connecting to upstream RPC for proofs"); - - let mut stream = client - .proof_subscription(ProofSubscriptionRequest { block_from }) - .await? - .into_inner(); - - while let Some(result) = stream.next().await { - let event = result?; - let block_num = BlockNumber::from(event.block_num); - self.state.apply_proof(block_num, event.proof).await?; - } - - Ok(()) - } -} diff --git a/crates/store/src/state/bootstrap.rs b/crates/store/src/state/bootstrap.rs new file mode 100644 index 000000000..69d00a64c --- /dev/null +++ b/crates/store/src/state/bootstrap.rs @@ -0,0 +1,43 @@ +use std::path::Path; + +use anyhow::Context; +use tracing::instrument; + +use crate::blocks::BlockStore; +use crate::db::Db; +use crate::genesis::GenesisBlock; +use crate::state::State; +use crate::{COMPONENT, DataDirectory}; + +impl State { + /// Bootstraps the store state, creating the database state and inserting the genesis block + /// data. + #[instrument( + target = COMPONENT, + name = "store.bootstrap", + skip_all, + err, + )] + pub fn bootstrap(genesis: GenesisBlock, data_directory: &Path) -> anyhow::Result<()> { + let data_directory = + DataDirectory::load(data_directory.to_path_buf()).with_context(|| { + format!("failed to load data directory at {}", data_directory.display()) + })?; + tracing::info!(target=COMPONENT, path=%data_directory.display(), "Data directory loaded"); + + let block_store_path = data_directory.block_store_dir(); + let block_store = + BlockStore::bootstrap(block_store_path.clone(), &genesis).with_context(|| { + format!("failed to bootstrap block store at {}", block_store_path.display()) + })?; + tracing::info!(target=COMPONENT, path=%block_store.display(), "Block store created"); + + let database_filepath = data_directory.database_path(); + Db::bootstrap(database_filepath.clone(), genesis).with_context(|| { + format!("failed to bootstrap database at {}", database_filepath.display()) + })?; + tracing::info!(target=COMPONENT, path=%database_filepath.display(), "Database created"); + + Ok(()) + } +} diff --git a/crates/store/src/state/disk_monitor.rs b/crates/store/src/state/disk_monitor.rs new file mode 100644 index 000000000..ec7eaca9c --- /dev/null +++ b/crates/store/src/state/disk_monitor.rs @@ -0,0 +1,100 @@ +use std::path::Path; +use std::time::Duration; + +use miden_node_utils::spawn::spawn_blocking_in_span; +use miden_node_utils::tracing::OpenTelemetrySpanExt; +use tracing::info_span; + +use crate::COMPONENT; +use crate::state::State; + +impl State { + /// Spawns a background task that periodically records the on-disk size of every store data path + /// as `OTel` span attributes. + pub fn spawn_disk_monitor(&self) -> tokio::task::JoinHandle<()> { + let data_directory = self.data_directory.clone(); + + tokio::spawn(async move { + let mut interval = tokio::time::interval(Duration::from_mins(5)); + loop { + interval.tick().await; + let dir = data_directory.clone(); + let span = info_span!(target: COMPONENT, "measure_disk_space_usage"); + let result = + spawn_blocking_in_span(move || measure_disk_usage_bytes(&dir), span.clone()) + .await; + match result { + Ok(usage) => { + span.set_attribute("db.sqlite.size", usage.sqlite_db); + span.set_attribute("db.sqlite.wal.size", usage.sqlite_wal); + span.set_attribute("db.block_store.size", usage.block_store); + #[cfg(feature = "rocksdb")] + { + span.set_attribute("db.account_tree.size", usage.account_tree); + span.set_attribute("db.nullifier_tree.size", usage.nullifier_tree); + span.set_attribute( + "db.account_state_forest.size", + usage.account_state_forest, + ); + } + }, + Err(err) => span.set_error(&err), + } + } + }) + } +} + +/// Byte counts for each on-disk storage component. +struct DiskUsage { + sqlite_db: u64, + sqlite_wal: u64, + block_store: u64, + #[cfg(feature = "rocksdb")] + account_tree: u64, + #[cfg(feature = "rocksdb")] + nullifier_tree: u64, + #[cfg(feature = "rocksdb")] + account_state_forest: u64, +} + +/// Collects on-disk byte sizes for every store data path under `data_dir`. +fn measure_disk_usage_bytes(data_dir: &Path) -> DiskUsage { + DiskUsage { + sqlite_db: path_size_bytes(&data_dir.join("miden-store.sqlite3")), + sqlite_wal: path_size_bytes(&data_dir.join("miden-store.sqlite3-wal")), + block_store: dir_size_bytes(&data_dir.join("blocks")), + #[cfg(feature = "rocksdb")] + account_tree: dir_size_bytes(&data_dir.join("accounttree")), + #[cfg(feature = "rocksdb")] + nullifier_tree: dir_size_bytes(&data_dir.join("nullifiertree")), + #[cfg(feature = "rocksdb")] + account_state_forest: dir_size_bytes(&data_dir.join("accountstateforest")), + } +} + +/// Returns the byte length of the file at `path`, or `0` if it does not exist. +fn path_size_bytes(path: &Path) -> u64 { + fs_err::metadata(path).map(|m| m.len()).unwrap_or(0) +} + +/// Returns the total byte length of all files in `path` iteratively, or `0` on any error. +fn dir_size_bytes(path: &Path) -> u64 { + let mut to_process = vec![path.to_path_buf()]; + let mut total = 0u64; + while let Some(dir) = to_process.pop() { + let Ok(entries) = fs_err::read_dir(&dir) else { + continue; + }; + for entry in entries.flatten() { + if let Ok(meta) = entry.metadata() { + if meta.is_dir() { + to_process.push(entry.path()); + } else { + total += meta.len(); + } + } + } + } + total +} diff --git a/crates/store/src/state/mod.rs b/crates/store/src/state/mod.rs index 77c26c90d..5a8d29740 100644 --- a/crates/store/src/state/mod.rs +++ b/crates/store/src/state/mod.rs @@ -6,7 +6,7 @@ use std::collections::{BTreeMap, BTreeSet, HashSet}; use std::num::NonZeroUsize; use std::ops::RangeInclusive; -use std::path::Path; +use std::path::{Path, PathBuf}; use std::sync::Arc; use miden_node_proto::domain::account::{ @@ -93,6 +93,8 @@ pub use subscription::{ mod apply_block; mod apply_proof; +mod bootstrap; +mod disk_monitor; mod sync_state; // FINALITY @@ -149,6 +151,9 @@ impl InnerState { /// The rollup state. pub struct State { + /// Root directory containing the store's on-disk data. + data_directory: PathBuf, + /// The database which stores block headers, nullifiers, notes, and the latest states of /// accounts. db: Arc, @@ -193,15 +198,14 @@ impl State { /// Loads the state from the data directory. /// - /// Returns `(Self, ProvenTipWriter)`. The `ProvenTipWriter` is used by the proof scheduler - /// (in sequencer mode) to advance the proven tip; callers can subscribe to tip changes - /// via the methods on `Self`. + /// The loaded state owns all store data structures and exposes subscription methods for + /// sequencer and replica tasks. #[instrument(target = COMPONENT, skip_all)] pub async fn load( data_path: &Path, storage_options: StorageOptions, termination_ask: tokio::sync::mpsc::Sender, - ) -> Result<(Self, ProvenTipWriter), StateInitializationError> { + ) -> Result { Self::load_with_database_options( data_path, storage_options, @@ -213,16 +217,15 @@ impl State { /// Loads the state from the data directory using explicit database options. /// - /// Returns `(Self, ProvenTipWriter)`. The `ProvenTipWriter` is used by the proof scheduler - /// (in sequencer mode) to advance the proven tip; callers can subscribe to tip changes - /// via the methods on `Self`. + /// The loaded state owns all store data structures and exposes subscription methods for + /// sequencer and replica tasks. #[instrument(target = COMPONENT, skip_all)] pub async fn load_with_database_options( data_path: &Path, storage_options: StorageOptions, database_options: DatabaseOptions, termination_ask: tokio::sync::mpsc::Sender, - ) -> Result<(Self, ProvenTipWriter), StateInitializationError> { + ) -> Result { let data_directory = DataDirectory::load(data_path.to_path_buf()) .map_err(StateInitializationError::DataDirectoryLoadError)?; @@ -291,33 +294,34 @@ impl State { // Committed-tip watch: fires after each successful apply_block. let (committed_tip_tx, _rx) = watch::channel(latest_block_num); - Ok(( - Self { - db, - block_store, - inner, - forest, - writer, - termination_ask, - proven_tip: proven_tip.clone(), - committed_tip_tx, - block_cache: BlockCache::new(BLOCK_CACHE_CAPACITY), - proof_cache: ProofCache::new(PROOF_CACHE_CAPACITY), - }, + Ok(Self { + data_directory: data_path.to_path_buf(), + db, + block_store, + inner, + forest, + writer, + termination_ask, proven_tip, - )) - } - - /// Returns the block store. - pub(crate) fn block_store(&self) -> Arc { - Arc::clone(&self.block_store) + committed_tip_tx, + block_cache: BlockCache::new(BLOCK_CACHE_CAPACITY), + proof_cache: ProofCache::new(PROOF_CACHE_CAPACITY), + }) } /// Returns a watch receiver that wakes every time a new block is committed. - pub(crate) fn subscribe_committed_tip(&self) -> watch::Receiver { + pub fn subscribe_committed_tip(&self) -> watch::Receiver { self.committed_tip_tx.subscribe() } + /// Loads serialized block proving inputs from the block store. + pub async fn load_proving_inputs( + &self, + block_num: BlockNumber, + ) -> std::io::Result>> { + self.block_store.load_proving_inputs(block_num).await + } + /// Returns a watch receiver that wakes every time the proven-in-sequence tip advances. pub(crate) fn subscribe_proven_tip(&self) -> watch::Receiver { self.proven_tip.subscribe() From 7983818cc4f56af247c8aa61951dacc4214729c5 Mon Sep 17 00:00:00 2001 From: Mirko von Leipzig <48352201+Mirko-von-Leipzig@users.noreply.github.com> Date: Thu, 28 May 2026 17:23:34 +0200 Subject: [PATCH 02/16] Task supervisor --- bin/network-monitor/src/monitor/tasks.rs | 31 ++++---- bin/node/src/commands/modes.rs | 36 +++++----- bin/ntx-builder/src/builder.rs | 20 ++++-- crates/block-producer/src/errors.rs | 9 +-- crates/block-producer/src/rpc_sync.rs | 24 +++---- crates/block-producer/src/server/mod.rs | 90 +++++++++--------------- crates/utils/src/lib.rs | 1 + crates/utils/src/tasks.rs | 73 +++++++++++++++++++ 8 files changed, 164 insertions(+), 120 deletions(-) create mode 100644 crates/utils/src/tasks.rs diff --git a/bin/network-monitor/src/monitor/tasks.rs b/bin/network-monitor/src/monitor/tasks.rs index 13d25413e..c1f453a6b 100644 --- a/bin/network-monitor/src/monitor/tasks.rs +++ b/bin/network-monitor/src/monitor/tasks.rs @@ -1,14 +1,13 @@ //! Task management for the network monitor. -use std::collections::HashMap; use std::sync::Arc; use std::sync::atomic::AtomicU64; use anyhow::Result; use miden_node_proto::clients::RemoteProverClient; +use miden_node_utils::tasks::Tasks as SupervisedTasks; use tokio::sync::watch::Receiver; use tokio::sync::{Mutex, watch}; -use tokio::task::{Id, JoinSet}; use tracing::debug; use crate::COMPONENT; @@ -24,20 +23,16 @@ use crate::service::{Service, build_tls_client}; use crate::status::{RpcService, ServiceStatus}; use crate::validator::ValidatorService; -/// Task management structure that encapsulates `JoinSet` and component names. +/// Task management structure that supervises named component tasks. #[derive(Default)] pub struct Tasks { - handles: JoinSet<()>, - names: HashMap, + handles: SupervisedTasks<()>, } impl Tasks { /// Create a new Tasks instance. pub fn new() -> Self { - Self { - handles: JoinSet::new(), - names: HashMap::new(), - } + Self { handles: SupervisedTasks::new() } } /// Spawn the RPC status checker task. @@ -178,17 +173,15 @@ impl Tasks { pub fn spawn_service(&mut self, svc: S) -> Receiver { let (tx, rx) = watch::channel(svc.initial_status()); let service_name = svc.name().to_string(); - let id = self.handles.spawn(async move { svc.run(tx).await }).id(); + self.handles.spawn(service_name.clone(), async move { svc.run(tx).await }); debug!(target: COMPONENT, service = %service_name, "spawned service"); - self.names.insert(id, service_name); rx } /// Spawn the HTTP frontend server. pub fn spawn_http_server(&mut self, server_state: ServerState, config: &MonitorConfig) { let config = config.clone(); - let id = self.handles.spawn(async move { serve(server_state, config).await }).id(); - self.names.insert(id, "frontend".to_string()); + self.handles.spawn("frontend", async move { serve(server_state, config).await }); } /// Handles the failure of a task. @@ -196,14 +189,14 @@ impl Tasks { /// Waits for any task to complete or fail and returns an error. Since components are /// expected to run indefinitely, any task completion is treated as fatal. pub async fn handle_failure(&mut self) -> Result<()> { - let component_result = - self.handles.join_next_with_id().await.expect("join set is not empty"); + let component_result = self.handles.join_next().await.expect("join set is not empty"); + self.handles.abort_all(); - let (id, err) = match component_result { - Ok((id, ())) => (id, anyhow::anyhow!("component completed unexpectedly")), - Err(join_err) => (join_err.id(), anyhow::Error::from(join_err)), + let component_name = component_result.name; + let err = match component_result.result { + Ok(()) => anyhow::anyhow!("component completed unexpectedly"), + Err(join_err) => anyhow::Error::from(join_err), }; - let component_name = self.names.get(&id).map_or("unknown", String::as_str); Err(err.context(format!("component {component_name} failed"))) } diff --git a/bin/node/src/commands/modes.rs b/bin/node/src/commands/modes.rs index 12b55edc2..9a28ca6f0 100644 --- a/bin/node/src/commands/modes.rs +++ b/bin/node/src/commands/modes.rs @@ -5,8 +5,8 @@ use miden_node_block_producer::{RpcSync, Sequencer}; use miden_node_proto::clients::{Builder, NtxBuilderClient, RpcClient, ValidatorClient}; use miden_node_rpc::{NetworkTxAuth, Rpc, RpcMode}; use miden_node_store::{ApplyBlockError, State}; +use miden_node_utils::tasks::{TaskResult, Tasks}; use tokio::net::TcpListener; -use tokio::task::JoinError; use tonic::metadata::AsciiMetadataValue; use url::Url; @@ -66,17 +66,18 @@ impl SequencerCommand { grpc_options: runtime.external_grpc_options, network_tx_auth, }; - let rpc_task = tokio::spawn(rpc.serve()); + let mut tasks = Tasks::new(); + tasks.spawn("sequencer", sequencer.wait()); + tasks.spawn("RPC server", rpc.serve()); tokio::select! { Some(err) = termination_signal.recv() => { + tasks.abort_all(); Err(anyhow::anyhow!("received termination signal").context(err)) }, - result = sequencer.wait() => { - result.context("sequencer task stopped") - }, - result = rpc_task => { - task_result("RPC server", result) + result = tasks.join_next() => { + tasks.abort_all(); + task_result(result.expect("node tasks should be running")) }, } } @@ -148,17 +149,18 @@ impl FullNodeCommand { grpc_options: runtime.external_grpc_options, network_tx_auth, }; - let rpc_task = tokio::spawn(rpc.serve()); + let mut tasks = Tasks::new(); + tasks.spawn("RPC sync", async move { sync_task.await? }); + tasks.spawn("RPC server", rpc.serve()); tokio::select! { Some(err) = termination_signal.recv() => { + tasks.abort_all(); Err(anyhow::anyhow!("received termination signal").context(err)) }, - result = sync_task => { - task_result("RPC sync", result) - }, - result = rpc_task => { - task_result("RPC server", result) + result = tasks.join_next() => { + tasks.abort_all(); + task_result(result.expect("node tasks should be running")) }, } } @@ -212,11 +214,9 @@ async fn bind_rpc(listen: std::net::SocketAddr) -> anyhow::Result { .with_context(|| format!("failed to bind RPC listener to {listen}")) } -fn task_result( - task: &'static str, - result: Result, JoinError>, -) -> anyhow::Result<()> { - match result { +fn task_result(task_result: TaskResult>) -> anyhow::Result<()> { + let task = task_result.name; + match task_result.result { Ok(Ok(())) => Err(anyhow::anyhow!("{task} exited unexpectedly")), Ok(Err(err)) => Err(err).with_context(|| format!("{task} fatal error")), Err(err) => Err(err).with_context(|| format!("{task} panicked")), diff --git a/bin/ntx-builder/src/builder.rs b/bin/ntx-builder/src/builder.rs index 1d024176f..460a45275 100644 --- a/bin/ntx-builder/src/builder.rs +++ b/bin/ntx-builder/src/builder.rs @@ -3,10 +3,10 @@ use std::sync::Arc; use anyhow::Context; use futures::Stream; +use miden_node_utils::tasks::Tasks; use miden_protocol::block::{BlockNumber, SignedBlock}; use tokio::net::TcpListener; use tokio::sync::mpsc; -use tokio::task::JoinSet; use tokio_stream::StreamExt; use crate::NtxBuilderConfig; @@ -101,20 +101,28 @@ impl NetworkTransactionBuilder { /// Runs the network transaction builder event loop until a fatal error occurs. pub async fn run(self, listener: TcpListener) -> anyhow::Result<()> { - let mut join_set = JoinSet::new(); + let mut tasks = Tasks::new(); // Start the gRPC server. let server = NtxBuilderRpcServer::new(self.db.clone(), self.config.max_note_attempts); - join_set.spawn(async move { + tasks.spawn("grpc-server", async move { server.serve(listener).await.context("ntx-builder gRPC server failed") }); - join_set.spawn(self.run_event_loop()); + tasks.spawn("event-loop", self.run_event_loop()); // Wait for either the event loop or the gRPC server to complete. Any completion is treated // as fatal. - if let Some(result) = join_set.join_next().await { - result.context("ntx-builder task panicked")??; + if let Some(task_result) = tasks.join_next().await { + tasks.abort_all(); + let task = task_result.name; + match task_result.result { + Ok(Ok(())) => anyhow::bail!("ntx-builder task {task} completed unexpectedly"), + Ok(Err(err)) => { + Err(err).with_context(|| format!("ntx-builder task {task} failed"))?; + }, + Err(err) => Err(err).context("ntx-builder task panicked")?, + } } Ok(()) diff --git a/crates/block-producer/src/errors.rs b/crates/block-producer/src/errors.rs index 596f4329f..c01dd8635 100644 --- a/crates/block-producer/src/errors.rs +++ b/crates/block-producer/src/errors.rs @@ -27,18 +27,15 @@ use crate::validator::ValidatorError; pub enum BlockProducerError { /// A block-producer task completed although it should have ran indefinitely. #[error("task {task} completed unexpectedly")] - UnexpectedTaskCompletion { task: &'static str }, + UnexpectedTaskCompletion { task: String }, /// A block-producer task panic'd. #[error("task {task} panic'd")] - JoinError { task: &'static str, source: JoinError }, + JoinError { task: String, source: JoinError }, /// A block-producer task reported a transport error. #[error("task {task} failed")] - TaskError { - task: &'static str, - source: anyhow::Error, - }, + TaskError { task: String, source: anyhow::Error }, } // Proof scheduler errors diff --git a/crates/block-producer/src/rpc_sync.rs b/crates/block-producer/src/rpc_sync.rs index 757fefcd2..949efcf76 100644 --- a/crates/block-producer/src/rpc_sync.rs +++ b/crates/block-producer/src/rpc_sync.rs @@ -5,6 +5,7 @@ use anyhow::Context; use miden_node_proto::clients::RpcClient; use miden_node_proto::generated::rpc::{BlockSubscriptionRequest, ProofSubscriptionRequest}; use miden_node_store::state::{Finality, State}; +use miden_node_utils::tasks::Tasks; use miden_protocol::block::{BlockNumber, SignedBlock}; use miden_protocol::utils::serde::Deserializable; use tokio_stream::StreamExt; @@ -25,6 +26,7 @@ impl RpcSync { /// Spawns the block and proof synchronization loops as a supervised Tokio task. pub fn spawn(self) -> tokio::task::JoinHandle> { tokio::spawn(async move { + let mut tasks = Tasks::new(); let block_sync = BlockSync { state: Arc::clone(&self.state), source_rpc: self.source_rpc.clone(), @@ -34,12 +36,16 @@ impl RpcSync { source_rpc: self.source_rpc, }; - let block_handle = block_sync.spawn(); - let proof_handle = proof_sync.spawn(); + tasks.spawn("block-sync", block_sync.run()); + tasks.spawn("proof-sync", proof_sync.run()); - tokio::select! { - result = block_handle => result?, - result = proof_handle => result?, + let task_result = tasks.join_next().await.expect("sync tasks should be running"); + tasks.abort_all(); + let task = task_result.name; + match task_result.result { + Ok(Ok(())) => anyhow::bail!("{task} exited unexpectedly"), + Ok(Err(err)) => Err(err).with_context(|| format!("{task} fatal error")), + Err(err) => Err(err).with_context(|| format!("{task} panicked")), } }) } @@ -59,10 +65,6 @@ struct ProofSync { } impl BlockSync { - fn spawn(self) -> tokio::task::JoinHandle> { - tokio::spawn(self.run()) - } - async fn run(self) -> anyhow::Result<()> { loop { let err = self @@ -101,10 +103,6 @@ impl BlockSync { } impl ProofSync { - fn spawn(self) -> tokio::task::JoinHandle> { - tokio::spawn(self.run()) - } - async fn run(self) -> anyhow::Result<()> { loop { let err = self diff --git a/crates/block-producer/src/server/mod.rs b/crates/block-producer/src/server/mod.rs index fa723143a..138baf618 100644 --- a/crates/block-producer/src/server/mod.rs +++ b/crates/block-producer/src/server/mod.rs @@ -1,4 +1,3 @@ -use std::collections::HashMap; use std::num::NonZeroUsize; use std::sync::Arc; use std::time::Duration; @@ -6,11 +5,12 @@ use std::time::Duration; use anyhow::Result; use miden_node_store::state::{Finality, State}; use miden_node_utils::formatting::{format_input_notes, format_output_notes}; +use miden_node_utils::tasks::{TaskResult, Tasks}; use miden_protocol::batch::ProposedBatch; use miden_protocol::block::BlockNumber; use miden_protocol::transaction::ProvenTransaction; use tokio::sync::{Mutex, RwLock}; -use tokio::task::{Id, JoinHandle, JoinSet}; +use tokio::task::JoinHandle; use tracing::{debug, info, instrument}; use url::Url; @@ -132,41 +132,24 @@ impl Sequencer { // // These should run forever, so we combine them into a joinset so that if // any complete or fail, we can shutdown the rest (somewhat) gracefully. - let mut tasks = JoinSet::new(); - - let batch_builder_id = tasks - .spawn({ - let mempool = mempool.clone(); - async { batch_builder.run(mempool).await } - }) - .id(); - let block_builder_id = tasks - .spawn({ - let mempool = mempool.clone(); - async { block_builder.run(mempool).await } - }) - .id(); - let proof_scheduler_id = tasks - .spawn({ - let store = Arc::clone(&api.store); - async move { - proof_scheduler::run( - block_prover, - store, - chain_tip_rx, - self.max_concurrent_proofs, - ) - .await - } - }) - .id(); + let mut tasks = Tasks::new(); - let task_ids = HashMap::from([ - (batch_builder_id, "batch-builder"), - (block_builder_id, "block-builder"), - (proof_scheduler_id, "proof-scheduler"), - ]); - let task = tokio::spawn(wait_for_tasks(tasks, task_ids)); + tasks.spawn("batch-builder", { + let mempool = mempool.clone(); + async { batch_builder.run(mempool).await } + }); + tasks.spawn("block-builder", { + let mempool = mempool.clone(); + async { block_builder.run(mempool).await } + }); + tasks.spawn("proof-scheduler", { + let store = Arc::clone(&api.store); + async move { + proof_scheduler::run(block_prover, store, chain_tip_rx, self.max_concurrent_proofs) + .await + } + }); + let task = tokio::spawn(wait_for_tasks(tasks)); Ok(SequencerHandle { api, task }) } @@ -198,29 +181,20 @@ impl SequencerHandle { } } -async fn wait_for_tasks( - mut tasks: JoinSet>, - task_ids: HashMap, -) -> anyhow::Result<()> { +async fn wait_for_tasks(mut tasks: Tasks>) -> anyhow::Result<()> { // Wait for any task to end. They should run indefinitely, so this is an unexpected result. - // - // SAFETY: The JoinSet is definitely not empty. - let task_result = tasks.join_next_with_id().await.unwrap(); - - let task_id = match &task_result { - Ok((id, _)) => *id, - Err(err) => err.id(), - }; - let task = task_ids.get(&task_id).copied().unwrap_or("unknown"); - - // We could abort the other tasks here, but not much point as we're probably crashing the node. - task_result - .map_err(|source| BlockProducerError::JoinError { task, source }) - .map(|(_, result)| match result { - Ok(_) => Err(BlockProducerError::UnexpectedTaskCompletion { task }), - Err(source) => Err(BlockProducerError::TaskError { task, source }), - }) - .and_then(|x| x)? + let task_result = tasks.join_next().await.expect("join set is not empty"); + tasks.abort_all(); + map_task_result(task_result) +} + +fn map_task_result(task_result: TaskResult>) -> anyhow::Result<()> { + let task = task_result.name; + match task_result.result { + Ok(Ok(())) => Err(BlockProducerError::UnexpectedTaskCompletion { task })?, + Ok(Err(source)) => Err(BlockProducerError::TaskError { task, source })?, + Err(source) => Err(BlockProducerError::JoinError { task, source })?, + } } // BLOCK PRODUCER API diff --git a/crates/utils/src/lib.rs b/crates/utils/src/lib.rs index 8732c72bf..41e4f9ae6 100644 --- a/crates/utils/src/lib.rs +++ b/crates/utils/src/lib.rs @@ -12,6 +12,7 @@ pub mod logging; pub mod lru_cache; pub mod panic; pub mod spawn; +pub mod tasks; pub mod tracing; pub trait ErrorReport: std::error::Error { diff --git a/crates/utils/src/tasks.rs b/crates/utils/src/tasks.rs new file mode 100644 index 000000000..28b3a09ca --- /dev/null +++ b/crates/utils/src/tasks.rs @@ -0,0 +1,73 @@ +use std::collections::HashMap; +use std::future::Future; + +use tokio::task::{Id, JoinError, JoinSet}; + +/// Result of a completed named task. +pub struct TaskResult { + /// Human-readable task name supplied when the task was spawned. + pub name: String, + /// Task output, or a join error if the task panicked or was cancelled. + pub result: Result, +} + +/// A named task set for supervising concurrently-running Tokio tasks. +pub struct Tasks { + handles: JoinSet, + names: HashMap, +} + +impl Default for Tasks { + fn default() -> Self { + Self { + handles: JoinSet::new(), + names: HashMap::new(), + } + } +} + +impl Tasks { + /// Creates an empty task set. + pub fn new() -> Self { + Self::default() + } + + /// Spawns a named task into the set. + pub fn spawn( + &mut self, + name: impl Into, + task: impl Future + Send + 'static, + ) -> Id { + let id = self.handles.spawn(task).id(); + self.names.insert(id, name.into()); + id + } + + /// Waits for the next task to complete. + pub async fn join_next(&mut self) -> Option> { + let result = self.handles.join_next_with_id().await?; + let id = match &result { + Ok((id, _)) => *id, + Err(err) => err.id(), + }; + let name = self.names.remove(&id).unwrap_or_else(|| "unknown".to_string()); + let result = result.map(|(_, output)| output); + + Some(TaskResult { name, result }) + } + + /// Aborts all tasks still running in the set. + pub fn abort_all(&mut self) { + self.handles.abort_all(); + } + + /// Returns `true` if no tasks are currently in the set. + pub fn is_empty(&self) -> bool { + self.handles.is_empty() + } + + /// Returns the number of tasks currently in the set. + pub fn len(&self) -> usize { + self.handles.len() + } +} From 361dddfca047d4a5dff108755dd81a91056f5886 Mon Sep 17 00:00:00 2001 From: Mirko von Leipzig <48352201+Mirko-von-Leipzig@users.noreply.github.com> Date: Thu, 28 May 2026 18:51:37 +0200 Subject: [PATCH 03/16] fixup --- bin/node/src/commands/modes.rs | 40 +++++++---------------- bin/stress-test/src/seeding/mod.rs | 3 +- bin/stress-test/src/store/mod.rs | 5 +-- crates/block-producer/src/server/tests.rs | 5 +-- crates/rpc/src/tests.rs | 5 +-- crates/store/src/state/apply_block.rs | 8 ----- crates/store/src/state/mod.rs | 16 ++------- 7 files changed, 17 insertions(+), 65 deletions(-) diff --git a/bin/node/src/commands/modes.rs b/bin/node/src/commands/modes.rs index 9a28ca6f0..977f73abf 100644 --- a/bin/node/src/commands/modes.rs +++ b/bin/node/src/commands/modes.rs @@ -4,7 +4,7 @@ use anyhow::Context; use miden_node_block_producer::{RpcSync, Sequencer}; use miden_node_proto::clients::{Builder, NtxBuilderClient, RpcClient, ValidatorClient}; use miden_node_rpc::{NetworkTxAuth, Rpc, RpcMode}; -use miden_node_store::{ApplyBlockError, State}; +use miden_node_store::State; use miden_node_utils::tasks::{TaskResult, Tasks}; use tokio::net::TcpListener; use tonic::metadata::AsciiMetadataValue; @@ -38,7 +38,7 @@ impl SequencerCommand { let runtime = self.runtime.runtime_config(&self.store); self.block_producer.validate()?; let network_tx_auth = self.runtime.rpc.network_tx_auth()?; - let (state, mut termination_signal) = load_state(&runtime).await?; + let state = load_state(&runtime).await?; let _disk_monitor = state.spawn_disk_monitor(); let sequencer = Sequencer { @@ -70,16 +70,9 @@ impl SequencerCommand { tasks.spawn("sequencer", sequencer.wait()); tasks.spawn("RPC server", rpc.serve()); - tokio::select! { - Some(err) = termination_signal.recv() => { - tasks.abort_all(); - Err(anyhow::anyhow!("received termination signal").context(err)) - }, - result = tasks.join_next() => { - tasks.abort_all(); - task_result(result.expect("node tasks should be running")) - }, - } + let result = tasks.join_next().await; + tasks.abort_all(); + task_result(result.expect("node tasks should be running")) } } @@ -133,7 +126,7 @@ impl FullNodeCommand { let runtime = self.runtime.runtime_config(&self.store); let source_rpc = self.sync.source_rpc_client(); let network_tx_auth = self.runtime.rpc.network_tx_auth()?; - let (state, mut termination_signal) = load_state(&runtime).await?; + let state = load_state(&runtime).await?; let _disk_monitor = state.spawn_disk_monitor(); let sync_task = RpcSync { @@ -153,16 +146,9 @@ impl FullNodeCommand { tasks.spawn("RPC sync", async move { sync_task.await? }); tasks.spawn("RPC server", rpc.serve()); - tokio::select! { - Some(err) = termination_signal.recv() => { - tasks.abort_all(); - Err(anyhow::anyhow!("received termination signal").context(err)) - }, - result = tasks.join_next() => { - tasks.abort_all(); - task_result(result.expect("node tasks should be running")) - }, - } + let result = tasks.join_next().await; + tasks.abort_all(); + task_result(result.expect("node tasks should be running")) } } @@ -192,20 +178,16 @@ impl super::rpc::RpcOptions { } } -async fn load_state( - runtime: &RuntimeConfig, -) -> anyhow::Result<(Arc, tokio::sync::mpsc::Receiver)> { - let (termination_ask, termination_signal) = tokio::sync::mpsc::channel::(1); +async fn load_state(runtime: &RuntimeConfig) -> anyhow::Result> { let state = State::load_with_database_options( &runtime.data_directory, runtime.storage_options.clone(), runtime.database_options, - termination_ask, ) .await .context("failed to load state")?; - Ok((Arc::new(state), termination_signal)) + Ok(Arc::new(state)) } async fn bind_rpc(listen: std::net::SocketAddr) -> anyhow::Result { diff --git a/bin/stress-test/src/seeding/mod.rs b/bin/stress-test/src/seeding/mod.rs index 1c0fe2e4a..cfe56b3bf 100644 --- a/bin/stress-test/src/seeding/mod.rs +++ b/bin/stress-test/src/seeding/mod.rs @@ -837,8 +837,7 @@ pub async fn start_store(data_directory: PathBuf) -> Arc { } async fn load_state(data_directory: PathBuf) -> Arc { - let (termination_ask, _termination_signal) = tokio::sync::mpsc::channel(1); - let state = State::load(&data_directory, StorageOptions::bench(), termination_ask) + let state = State::load(&data_directory, StorageOptions::bench()) .await .expect("store state should load"); Arc::new(state) diff --git a/bin/stress-test/src/store/mod.rs b/bin/stress-test/src/store/mod.rs index 2605b56a2..4e2cf8d5c 100644 --- a/bin/stress-test/src/store/mod.rs +++ b/bin/stress-test/src/store/mod.rs @@ -675,10 +675,7 @@ fn transaction_record_to_proto( pub async fn load_state(data_directory: &Path) { let start = Instant::now(); - let (termination_ask, _) = tokio::sync::mpsc::channel(1); - let _state = State::load(data_directory, StorageOptions::default(), termination_ask) - .await - .unwrap(); + let _state = State::load(data_directory, StorageOptions::default()).await.unwrap(); let elapsed = start.elapsed(); // Get database path and run SQL commands to count records diff --git a/crates/block-producer/src/server/tests.rs b/crates/block-producer/src/server/tests.rs index 6fb9b0c82..da9a50db1 100644 --- a/crates/block-producer/src/server/tests.rs +++ b/crates/block-producer/src/server/tests.rs @@ -53,9 +53,6 @@ fn bootstrap_store(path: &std::path::Path) { } async fn load_state(path: &std::path::Path) -> Arc { - let (termination_ask, _termination_signal) = tokio::sync::mpsc::channel(1); - let state = State::load(path, StorageOptions::default(), termination_ask) - .await - .expect("state should load"); + let state = State::load(path, StorageOptions::default()).await.expect("state should load"); Arc::new(state) } diff --git a/crates/rpc/src/tests.rs b/crates/rpc/src/tests.rs index 91c90e0a4..7bebc604b 100644 --- a/crates/rpc/src/tests.rs +++ b/crates/rpc/src/tests.rs @@ -90,10 +90,7 @@ impl TestStore { } async fn load_state(path: &std::path::Path) -> Arc { - let (termination_ask, _termination_signal) = tokio::sync::mpsc::channel(1); - let state = State::load(path, StorageOptions::default(), termination_ask) - .await - .expect("state should load"); + let state = State::load(path, StorageOptions::default()).await.expect("state should load"); Arc::new(state) } diff --git a/crates/store/src/state/apply_block.rs b/crates/store/src/state/apply_block.rs index aa473d1bd..08ec80f13 100644 --- a/crates/store/src/state/apply_block.rs +++ b/crates/store/src/state/apply_block.rs @@ -276,11 +276,6 @@ impl State { .map_err(InvalidBlockError::NewBlockNullifierAlreadySpent)?; if nullifier_tree_update.as_mutation_set().root() != header.nullifier_root() { - // We do our best here to notify the serve routine, if it doesn't care (dropped the - // receiver) we can't do much. - let _ = self.termination_ask.try_send(ApplyBlockError::InvalidBlockError( - InvalidBlockError::NewBlockInvalidNullifierRoot, - )); return Err(InvalidBlockError::NewBlockInvalidNullifierRoot.into()); } @@ -302,9 +297,6 @@ impl State { })?; if account_tree_update.as_mutation_set().root() != header.account_root() { - let _ = self.termination_ask.try_send(ApplyBlockError::InvalidBlockError( - InvalidBlockError::NewBlockInvalidAccountRoot, - )); return Err(InvalidBlockError::NewBlockInvalidAccountRoot.into()); } diff --git a/crates/store/src/state/mod.rs b/crates/store/src/state/mod.rs index 5a8d29740..0ff3a987f 100644 --- a/crates/store/src/state/mod.rs +++ b/crates/store/src/state/mod.rs @@ -48,7 +48,6 @@ use crate::blocks::BlockStore; use crate::db::models::Page; use crate::db::{Db, NoteRecord, NullifierInfo}; use crate::errors::{ - ApplyBlockError, DatabaseError, GetAccountError, GetBatchInputsError, @@ -173,9 +172,6 @@ pub struct State { /// TOCTOU issues, there must be no concurrent writers. This locks to serialize the writers. writer: Mutex<()>, - /// Request termination of the process due to a fatal internal state error. - termination_ask: tokio::sync::mpsc::Sender, - /// The latest proven-in-sequence block number, updated by the proof scheduler or `apply_proof`. proven_tip: ProvenTipWriter, @@ -204,15 +200,9 @@ impl State { pub async fn load( data_path: &Path, storage_options: StorageOptions, - termination_ask: tokio::sync::mpsc::Sender, ) -> Result { - Self::load_with_database_options( - data_path, - storage_options, - DatabaseOptions::default(), - termination_ask, - ) - .await + Self::load_with_database_options(data_path, storage_options, DatabaseOptions::default()) + .await } /// Loads the state from the data directory using explicit database options. @@ -224,7 +214,6 @@ impl State { data_path: &Path, storage_options: StorageOptions, database_options: DatabaseOptions, - termination_ask: tokio::sync::mpsc::Sender, ) -> Result { let data_directory = DataDirectory::load(data_path.to_path_buf()) .map_err(StateInitializationError::DataDirectoryLoadError)?; @@ -301,7 +290,6 @@ impl State { inner, forest, writer, - termination_ask, proven_tip, committed_tip_tx, block_cache: BlockCache::new(BLOCK_CACHE_CAPACITY), From 4a30be40a3d499caf70ed0579c80b3f3d91a25ae Mon Sep 17 00:00:00 2001 From: Mirko von Leipzig <48352201+Mirko-von-Leipzig@users.noreply.github.com> Date: Fri, 29 May 2026 07:29:50 +0200 Subject: [PATCH 04/16] Move supervisor error to join set --- bin/network-monitor/src/monitor/tasks.rs | 22 ++++++++-------- bin/node/src/commands/modes.rs | 19 +++----------- bin/ntx-builder/src/builder.rs | 14 +--------- crates/block-producer/src/errors.rs | 19 -------------- crates/block-producer/src/rpc_sync.rs | 9 +------ crates/block-producer/src/server/mod.rs | 17 +++--------- crates/utils/src/tasks.rs | 33 +++++++++++++++++------- 7 files changed, 42 insertions(+), 91 deletions(-) diff --git a/bin/network-monitor/src/monitor/tasks.rs b/bin/network-monitor/src/monitor/tasks.rs index c1f453a6b..56de8d624 100644 --- a/bin/network-monitor/src/monitor/tasks.rs +++ b/bin/network-monitor/src/monitor/tasks.rs @@ -26,7 +26,7 @@ use crate::validator::ValidatorService; /// Task management structure that supervises named component tasks. #[derive(Default)] pub struct Tasks { - handles: SupervisedTasks<()>, + handles: SupervisedTasks>, } impl Tasks { @@ -173,7 +173,10 @@ impl Tasks { pub fn spawn_service(&mut self, svc: S) -> Receiver { let (tx, rx) = watch::channel(svc.initial_status()); let service_name = svc.name().to_string(); - self.handles.spawn(service_name.clone(), async move { svc.run(tx).await }); + self.handles.spawn(service_name.clone(), async move { + svc.run(tx).await; + Ok(()) + }); debug!(target: COMPONENT, service = %service_name, "spawned service"); rx } @@ -181,7 +184,10 @@ impl Tasks { /// Spawn the HTTP frontend server. pub fn spawn_http_server(&mut self, server_state: ServerState, config: &MonitorConfig) { let config = config.clone(); - self.handles.spawn("frontend", async move { serve(server_state, config).await }); + self.handles.spawn("frontend", async move { + serve(server_state, config).await; + Ok(()) + }); } /// Handles the failure of a task. @@ -189,15 +195,9 @@ impl Tasks { /// Waits for any task to complete or fail and returns an error. Since components are /// expected to run indefinitely, any task completion is treated as fatal. pub async fn handle_failure(&mut self) -> Result<()> { - let component_result = self.handles.join_next().await.expect("join set is not empty"); + let result = self.handles.join_next_as_error().await; self.handles.abort_all(); - let component_name = component_result.name; - let err = match component_result.result { - Ok(()) => anyhow::anyhow!("component completed unexpectedly"), - Err(join_err) => anyhow::Error::from(join_err), - }; - - Err(err.context(format!("component {component_name} failed"))) + result } } diff --git a/bin/node/src/commands/modes.rs b/bin/node/src/commands/modes.rs index 977f73abf..afd7b60ea 100644 --- a/bin/node/src/commands/modes.rs +++ b/bin/node/src/commands/modes.rs @@ -5,7 +5,7 @@ use miden_node_block_producer::{RpcSync, Sequencer}; use miden_node_proto::clients::{Builder, NtxBuilderClient, RpcClient, ValidatorClient}; use miden_node_rpc::{NetworkTxAuth, Rpc, RpcMode}; use miden_node_store::State; -use miden_node_utils::tasks::{TaskResult, Tasks}; +use miden_node_utils::tasks::Tasks; use tokio::net::TcpListener; use tonic::metadata::AsciiMetadataValue; use url::Url; @@ -70,9 +70,7 @@ impl SequencerCommand { tasks.spawn("sequencer", sequencer.wait()); tasks.spawn("RPC server", rpc.serve()); - let result = tasks.join_next().await; - tasks.abort_all(); - task_result(result.expect("node tasks should be running")) + tasks.join_next_as_error().await } } @@ -146,9 +144,7 @@ impl FullNodeCommand { tasks.spawn("RPC sync", async move { sync_task.await? }); tasks.spawn("RPC server", rpc.serve()); - let result = tasks.join_next().await; - tasks.abort_all(); - task_result(result.expect("node tasks should be running")) + tasks.join_next_as_error().await } } @@ -195,12 +191,3 @@ async fn bind_rpc(listen: std::net::SocketAddr) -> anyhow::Result { .await .with_context(|| format!("failed to bind RPC listener to {listen}")) } - -fn task_result(task_result: TaskResult>) -> anyhow::Result<()> { - let task = task_result.name; - match task_result.result { - Ok(Ok(())) => Err(anyhow::anyhow!("{task} exited unexpectedly")), - Ok(Err(err)) => Err(err).with_context(|| format!("{task} fatal error")), - Err(err) => Err(err).with_context(|| format!("{task} panicked")), - } -} diff --git a/bin/ntx-builder/src/builder.rs b/bin/ntx-builder/src/builder.rs index 460a45275..192ce8454 100644 --- a/bin/ntx-builder/src/builder.rs +++ b/bin/ntx-builder/src/builder.rs @@ -113,19 +113,7 @@ impl NetworkTransactionBuilder { // Wait for either the event loop or the gRPC server to complete. Any completion is treated // as fatal. - if let Some(task_result) = tasks.join_next().await { - tasks.abort_all(); - let task = task_result.name; - match task_result.result { - Ok(Ok(())) => anyhow::bail!("ntx-builder task {task} completed unexpectedly"), - Ok(Err(err)) => { - Err(err).with_context(|| format!("ntx-builder task {task} failed"))?; - }, - Err(err) => Err(err).context("ntx-builder task panicked")?, - } - } - - Ok(()) + tasks.join_next_as_error().await.context("ntx-builder task failed") } async fn run_event_loop(mut self) -> anyhow::Result<()> { diff --git a/crates/block-producer/src/errors.rs b/crates/block-producer/src/errors.rs index c01dd8635..bd3e5f3c6 100644 --- a/crates/block-producer/src/errors.rs +++ b/crates/block-producer/src/errors.rs @@ -15,29 +15,10 @@ use miden_protocol::errors::{ProposedBatchError, ProposedBlockError, ProvenBatch use miden_protocol::note::Nullifier; use miden_remote_prover_client::RemoteProverClientError; use thiserror::Error; -use tokio::task::JoinError; use crate::mempool::MempoolPoisonError; use crate::validator::ValidatorError; -// Block-producer errors -// ================================================================================================= - -#[derive(Debug, Error)] -pub enum BlockProducerError { - /// A block-producer task completed although it should have ran indefinitely. - #[error("task {task} completed unexpectedly")] - UnexpectedTaskCompletion { task: String }, - - /// A block-producer task panic'd. - #[error("task {task} panic'd")] - JoinError { task: String, source: JoinError }, - - /// A block-producer task reported a transport error. - #[error("task {task} failed")] - TaskError { task: String, source: anyhow::Error }, -} - // Proof scheduler errors // ================================================================================================= diff --git a/crates/block-producer/src/rpc_sync.rs b/crates/block-producer/src/rpc_sync.rs index 949efcf76..afcb189a6 100644 --- a/crates/block-producer/src/rpc_sync.rs +++ b/crates/block-producer/src/rpc_sync.rs @@ -39,14 +39,7 @@ impl RpcSync { tasks.spawn("block-sync", block_sync.run()); tasks.spawn("proof-sync", proof_sync.run()); - let task_result = tasks.join_next().await.expect("sync tasks should be running"); - tasks.abort_all(); - let task = task_result.name; - match task_result.result { - Ok(Ok(())) => anyhow::bail!("{task} exited unexpectedly"), - Ok(Err(err)) => Err(err).with_context(|| format!("{task} fatal error")), - Err(err) => Err(err).with_context(|| format!("{task} panicked")), - } + tasks.join_next_as_error().await }) } } diff --git a/crates/block-producer/src/server/mod.rs b/crates/block-producer/src/server/mod.rs index 138baf618..6578d79f9 100644 --- a/crates/block-producer/src/server/mod.rs +++ b/crates/block-producer/src/server/mod.rs @@ -5,7 +5,7 @@ use std::time::Duration; use anyhow::Result; use miden_node_store::state::{Finality, State}; use miden_node_utils::formatting::{format_input_notes, format_output_notes}; -use miden_node_utils::tasks::{TaskResult, Tasks}; +use miden_node_utils::tasks::Tasks; use miden_protocol::batch::ProposedBatch; use miden_protocol::block::BlockNumber; use miden_protocol::transaction::ProvenTransaction; @@ -18,7 +18,7 @@ use crate::batch_builder::BatchBuilder; use crate::block_builder::BlockBuilder; use crate::block_prover::BlockProver; use crate::domain::transaction::AuthenticatedTransaction; -use crate::errors::{BlockProducerError, MempoolSubmissionError}; +use crate::errors::MempoolSubmissionError; use crate::mempool::{BatchBudget, BlockBudget, Mempool, MempoolConfig, SharedMempool}; use crate::validator::BlockProducerValidatorClient; use crate::{ @@ -183,18 +183,7 @@ impl SequencerHandle { async fn wait_for_tasks(mut tasks: Tasks>) -> anyhow::Result<()> { // Wait for any task to end. They should run indefinitely, so this is an unexpected result. - let task_result = tasks.join_next().await.expect("join set is not empty"); - tasks.abort_all(); - map_task_result(task_result) -} - -fn map_task_result(task_result: TaskResult>) -> anyhow::Result<()> { - let task = task_result.name; - match task_result.result { - Ok(Ok(())) => Err(BlockProducerError::UnexpectedTaskCompletion { task })?, - Ok(Err(source)) => Err(BlockProducerError::TaskError { task, source })?, - Err(source) => Err(BlockProducerError::JoinError { task, source })?, - } + tasks.join_next_as_error().await } // BLOCK PRODUCER API diff --git a/crates/utils/src/tasks.rs b/crates/utils/src/tasks.rs index 28b3a09ca..8e8d6e072 100644 --- a/crates/utils/src/tasks.rs +++ b/crates/utils/src/tasks.rs @@ -1,17 +1,13 @@ use std::collections::HashMap; use std::future::Future; +use anyhow::Context; use tokio::task::{Id, JoinError, JoinSet}; -/// Result of a completed named task. -pub struct TaskResult { - /// Human-readable task name supplied when the task was spawned. - pub name: String, - /// Task output, or a join error if the task panicked or was cancelled. - pub result: Result, -} - /// A named task set for supervising concurrently-running Tokio tasks. +/// +/// Dropping a task set aborts all tasks that are still running. Use [`Self::abort_all`] when the +/// tasks should be cancelled before the task set itself is dropped. pub struct Tasks { handles: JoinSet, names: HashMap, @@ -44,7 +40,7 @@ impl Tasks { } /// Waits for the next task to complete. - pub async fn join_next(&mut self) -> Option> { + pub async fn join_next(&mut self) -> Option<(String, Result)> { let result = self.handles.join_next_with_id().await?; let id = match &result { Ok((id, _)) => *id, @@ -53,7 +49,7 @@ impl Tasks { let name = self.names.remove(&id).unwrap_or_else(|| "unknown".to_string()); let result = result.map(|(_, output)| output); - Some(TaskResult { name, result }) + Some((name, result)) } /// Aborts all tasks still running in the set. @@ -71,3 +67,20 @@ impl Tasks { self.handles.len() } } + +impl Tasks> { + /// Waits for the next task to complete, treating that completion as an error. + /// + /// This is intended for supervised task sets where every task is expected to run indefinitely. + pub async fn join_next_as_error(&mut self) -> anyhow::Result<()> { + let Some((task, result)) = self.join_next().await else { + anyhow::bail!("task set is empty"); + }; + + match result { + Ok(Ok(())) => anyhow::bail!("task {task} completed unexpectedly"), + Ok(Err(err)) => Err(err).with_context(|| format!("task {task} failed")), + Err(err) => Err(err).with_context(|| format!("task {task} failed to join")), + } + } +} From 677ebc32c54d134fad0082dd3bdf54f8531ef5cb Mon Sep 17 00:00:00 2001 From: Mirko von Leipzig <48352201+Mirko-von-Leipzig@users.noreply.github.com> Date: Fri, 29 May 2026 07:49:34 +0200 Subject: [PATCH 05/16] Simplify supervised taskset --- bin/network-monitor/src/monitor/tasks.rs | 21 +++++--------- bin/node/src/commands/modes.rs | 9 +++--- crates/block-producer/src/rpc_sync.rs | 35 +++++++++++++----------- crates/block-producer/src/server/mod.rs | 11 ++------ crates/utils/src/tasks.rs | 34 +++++++++++++---------- 5 files changed, 52 insertions(+), 58 deletions(-) diff --git a/bin/network-monitor/src/monitor/tasks.rs b/bin/network-monitor/src/monitor/tasks.rs index 56de8d624..e0ba469dc 100644 --- a/bin/network-monitor/src/monitor/tasks.rs +++ b/bin/network-monitor/src/monitor/tasks.rs @@ -26,7 +26,7 @@ use crate::validator::ValidatorService; /// Task management structure that supervises named component tasks. #[derive(Default)] pub struct Tasks { - handles: SupervisedTasks>, + handles: SupervisedTasks, } impl Tasks { @@ -173,10 +173,8 @@ impl Tasks { pub fn spawn_service(&mut self, svc: S) -> Receiver { let (tx, rx) = watch::channel(svc.initial_status()); let service_name = svc.name().to_string(); - self.handles.spawn(service_name.clone(), async move { - svc.run(tx).await; - Ok(()) - }); + self.handles + .spawn_infallible(service_name.clone(), async move { svc.run(tx).await }); debug!(target: COMPONENT, service = %service_name, "spawned service"); rx } @@ -184,20 +182,15 @@ impl Tasks { /// Spawn the HTTP frontend server. pub fn spawn_http_server(&mut self, server_state: ServerState, config: &MonitorConfig) { let config = config.clone(); - self.handles.spawn("frontend", async move { - serve(server_state, config).await; - Ok(()) - }); + self.handles + .spawn_infallible("frontend", async move { serve(server_state, config).await }); } /// Handles the failure of a task. /// /// Waits for any task to complete or fail and returns an error. Since components are /// expected to run indefinitely, any task completion is treated as fatal. - pub async fn handle_failure(&mut self) -> Result<()> { - let result = self.handles.join_next_as_error().await; - self.handles.abort_all(); - - result + pub async fn handle_failure(mut self) -> Result<()> { + self.handles.join_next_as_error().await } } diff --git a/bin/node/src/commands/modes.rs b/bin/node/src/commands/modes.rs index afd7b60ea..55cd9fa10 100644 --- a/bin/node/src/commands/modes.rs +++ b/bin/node/src/commands/modes.rs @@ -126,12 +126,11 @@ impl FullNodeCommand { let network_tx_auth = self.runtime.rpc.network_tx_auth()?; let state = load_state(&runtime).await?; let _disk_monitor = state.spawn_disk_monitor(); - - let sync_task = RpcSync { + let sync = RpcSync { state: Arc::clone(&state), source_rpc: source_rpc.clone(), - } - .spawn(); + }; + let rpc = Rpc { listener: bind_rpc(runtime.rpc_listen).await?, store: state, @@ -141,7 +140,7 @@ impl FullNodeCommand { network_tx_auth, }; let mut tasks = Tasks::new(); - tasks.spawn("RPC sync", async move { sync_task.await? }); + tasks.spawn("RPC sync", sync.run()); tasks.spawn("RPC server", rpc.serve()); tasks.join_next_as_error().await diff --git a/crates/block-producer/src/rpc_sync.rs b/crates/block-producer/src/rpc_sync.rs index afcb189a6..3705c7cb6 100644 --- a/crates/block-producer/src/rpc_sync.rs +++ b/crates/block-producer/src/rpc_sync.rs @@ -23,24 +23,27 @@ pub struct RpcSync { } impl RpcSync { + /// Runs the block and proof synchronization loops until one exits unexpectedly. + pub async fn run(self) -> anyhow::Result<()> { + let mut tasks = Tasks::new(); + let block_sync = BlockSync { + state: Arc::clone(&self.state), + source_rpc: self.source_rpc.clone(), + }; + let proof_sync = ProofSync { + state: self.state, + source_rpc: self.source_rpc, + }; + + tasks.spawn("block-sync", block_sync.run()); + tasks.spawn("proof-sync", proof_sync.run()); + + tasks.join_next_as_error().await + } + /// Spawns the block and proof synchronization loops as a supervised Tokio task. pub fn spawn(self) -> tokio::task::JoinHandle> { - tokio::spawn(async move { - let mut tasks = Tasks::new(); - let block_sync = BlockSync { - state: Arc::clone(&self.state), - source_rpc: self.source_rpc.clone(), - }; - let proof_sync = ProofSync { - state: self.state, - source_rpc: self.source_rpc, - }; - - tasks.spawn("block-sync", block_sync.run()); - tasks.spawn("proof-sync", proof_sync.run()); - - tasks.join_next_as_error().await - }) + tokio::spawn(self.run()) } } diff --git a/crates/block-producer/src/server/mod.rs b/crates/block-producer/src/server/mod.rs index 6578d79f9..7859fd4f6 100644 --- a/crates/block-producer/src/server/mod.rs +++ b/crates/block-producer/src/server/mod.rs @@ -130,8 +130,8 @@ impl Sequencer { // Spawn batch builder, block builder, and proof scheduler. The builders communicate // indirectly via a shared mempool. // - // These should run forever, so we combine them into a joinset so that if - // any complete or fail, we can shutdown the rest (somewhat) gracefully. + // These should run forever, so if any complete or fail, the sequencer reports the failure + // and aborts the rest when the task set is dropped. let mut tasks = Tasks::new(); tasks.spawn("batch-builder", { @@ -149,7 +149,7 @@ impl Sequencer { .await } }); - let task = tokio::spawn(wait_for_tasks(tasks)); + let task = tokio::spawn(async move { tasks.join_next_as_error().await }); Ok(SequencerHandle { api, task }) } @@ -181,11 +181,6 @@ impl SequencerHandle { } } -async fn wait_for_tasks(mut tasks: Tasks>) -> anyhow::Result<()> { - // Wait for any task to end. They should run indefinitely, so this is an unexpected result. - tasks.join_next_as_error().await -} - // BLOCK PRODUCER API // ================================================================================================ diff --git a/crates/utils/src/tasks.rs b/crates/utils/src/tasks.rs index 8e8d6e072..c92393f23 100644 --- a/crates/utils/src/tasks.rs +++ b/crates/utils/src/tasks.rs @@ -6,14 +6,13 @@ use tokio::task::{Id, JoinError, JoinSet}; /// A named task set for supervising concurrently-running Tokio tasks. /// -/// Dropping a task set aborts all tasks that are still running. Use [`Self::abort_all`] when the -/// tasks should be cancelled before the task set itself is dropped. -pub struct Tasks { - handles: JoinSet, +/// Dropping a task set aborts all tasks that are still running. +pub struct Tasks { + handles: JoinSet>, names: HashMap, } -impl Default for Tasks { +impl Default for Tasks { fn default() -> Self { Self { handles: JoinSet::new(), @@ -22,7 +21,7 @@ impl Default for Tasks { } } -impl Tasks { +impl Tasks { /// Creates an empty task set. pub fn new() -> Self { Self::default() @@ -32,15 +31,27 @@ impl Tasks { pub fn spawn( &mut self, name: impl Into, - task: impl Future + Send + 'static, + task: impl Future> + Send + 'static, ) -> Id { let id = self.handles.spawn(task).id(); self.names.insert(id, name.into()); id } + /// Spawns a named task that does not return an error. + pub fn spawn_infallible( + &mut self, + name: impl Into, + task: impl Future + Send + 'static, + ) -> Id { + self.spawn(name, async move { + task.await; + Ok(()) + }) + } + /// Waits for the next task to complete. - pub async fn join_next(&mut self) -> Option<(String, Result)> { + pub async fn join_next(&mut self) -> Option<(String, Result, JoinError>)> { let result = self.handles.join_next_with_id().await?; let id = match &result { Ok((id, _)) => *id, @@ -52,11 +63,6 @@ impl Tasks { Some((name, result)) } - /// Aborts all tasks still running in the set. - pub fn abort_all(&mut self) { - self.handles.abort_all(); - } - /// Returns `true` if no tasks are currently in the set. pub fn is_empty(&self) -> bool { self.handles.is_empty() @@ -66,9 +72,7 @@ impl Tasks { pub fn len(&self) -> usize { self.handles.len() } -} -impl Tasks> { /// Waits for the next task to complete, treating that completion as an error. /// /// This is intended for supervised task sets where every task is expected to run indefinitely. From a8dc3af01a9bb175becde9da6ffee95e876a930e Mon Sep 17 00:00:00 2001 From: Mirko von Leipzig <48352201+Mirko-von-Leipzig@users.noreply.github.com> Date: Fri, 29 May 2026 07:59:13 +0200 Subject: [PATCH 06/16] Review comment --- bin/node/src/commands/modes.rs | 17 +---------------- bin/node/src/commands/rpc.rs | 15 +++++++++++++++ 2 files changed, 16 insertions(+), 16 deletions(-) diff --git a/bin/node/src/commands/modes.rs b/bin/node/src/commands/modes.rs index 55cd9fa10..27c2273ec 100644 --- a/bin/node/src/commands/modes.rs +++ b/bin/node/src/commands/modes.rs @@ -3,11 +3,10 @@ use std::sync::Arc; use anyhow::Context; use miden_node_block_producer::{RpcSync, Sequencer}; use miden_node_proto::clients::{Builder, NtxBuilderClient, RpcClient, ValidatorClient}; -use miden_node_rpc::{NetworkTxAuth, Rpc, RpcMode}; +use miden_node_rpc::{Rpc, RpcMode}; use miden_node_store::State; use miden_node_utils::tasks::Tasks; use tokio::net::TcpListener; -use tonic::metadata::AsciiMetadataValue; use url::Url; use super::block_producer::BlockProducerOptions; @@ -159,20 +158,6 @@ impl SyncOptions { } } -impl super::rpc::RpcOptions { - fn network_tx_auth(&self) -> anyhow::Result> { - self.network_tx_auth_header_value - .as_deref() - .map(|value| { - value - .parse::() - .map(NetworkTxAuth) - .context("invalid rpc.network-tx-auth-header-value") - }) - .transpose() - } -} - async fn load_state(runtime: &RuntimeConfig) -> anyhow::Result> { let state = State::load_with_database_options( &runtime.data_directory, diff --git a/bin/node/src/commands/rpc.rs b/bin/node/src/commands/rpc.rs index e9c06618d..24ef8d05c 100644 --- a/bin/node/src/commands/rpc.rs +++ b/bin/node/src/commands/rpc.rs @@ -2,7 +2,10 @@ use std::net::SocketAddr; use std::num::{NonZeroU32, NonZeroU64}; use std::time::Duration; +use anyhow::Context; +use miden_node_rpc::NetworkTxAuth; use miden_node_utils::clap::{GrpcOptionsExternal, duration_to_human_readable_string}; +use tonic::metadata::AsciiMetadataValue; use url::Url; // RPC OPTIONS @@ -40,6 +43,18 @@ impl RpcOptions { max_concurrent_connections: self.rate_limit.max_concurrent_connections, } } + + pub(super) fn network_tx_auth(&self) -> anyhow::Result> { + self.network_tx_auth_header_value + .as_deref() + .map(|value| { + value + .parse::() + .map(NetworkTxAuth) + .context("invalid rpc.network-tx-auth-header-value") + }) + .transpose() + } } #[derive(clap::Args, Clone, Debug)] From 2ccfe2583de0213f387313abb2d4a31c7424e03b Mon Sep 17 00:00:00 2001 From: Mirko von Leipzig <48352201+Mirko-von-Leipzig@users.noreply.github.com> Date: Fri, 29 May 2026 11:10:02 +0200 Subject: [PATCH 07/16] Simplify bootstrapping using sentinels --- Makefile | 7 ----- docker-compose.yml | 66 +++++++++++++++++++++++++++++++++++++--------- 2 files changed, 54 insertions(+), 19 deletions(-) diff --git a/Makefile b/Makefile index fb3d3a91e..141cb682a 100644 --- a/Makefile +++ b/Makefile @@ -137,13 +137,6 @@ install-network-monitor: ## Installs network monitor binary # --- docker -------------------------------------------------------------------------------------- -.PHONY: compose-genesis -compose-genesis: ## Wipes node volumes and creates a fresh genesis block - docker compose $(COMPOSE_FILES) down --volumes --remove-orphans - docker volume rm -f miden-node_node-data - docker compose $(COMPOSE_FILES) --profile genesis run --rm genesis-store - docker compose $(COMPOSE_FILES) --profile genesis rm -f - .PHONY: compose-up compose-up: ## Starts all node components, telemetry, and monitor via docker compose docker compose $(COMPOSE_FILES) up -d diff --git a/docker-compose.yml b/docker-compose.yml index 60a76d13e..d305ce8d3 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -1,41 +1,83 @@ services: - genesis-validator: + bootstrap-validator: image: miden-validator pull_policy: if_not_present - profiles: - - genesis volumes: - node-data:/data entrypoint: ["/bin/sh", "-c"] command: - | set -e - mkdir -p /data/genesis /data/store /data/validator /data/accounts /data/ntx-builder - echo "Bootstrapping validator (creating genesis block)..." + if [ -f /data/validator/.bootstrapped ]; then + echo "Validator already bootstrapped." + exit 0 + fi + + echo "Cleaning incomplete validator bootstrap state..." + rm -rf /data/genesis /data/validator /data/accounts + mkdir -p /data/genesis /data/validator /data/accounts + + echo "Bootstrapping validator..." miden-validator bootstrap \ --data-directory /data/validator \ --genesis-block-directory /data/genesis \ --accounts-directory /data/accounts - genesis-store: + touch /data/validator/.bootstrapped + + bootstrap-node: image: miden-node pull_policy: if_not_present - profiles: - - genesis volumes: - node-data:/data entrypoint: ["/bin/sh", "-c"] depends_on: - genesis-validator: + bootstrap-validator: condition: service_completed_successfully command: - | set -e - echo "Bootstrapping store..." - miden-node store bootstrap \ - --data-directory /data/store \ + if [ -f /data/node/.bootstrapped ]; then + echo "Node already bootstrapped." + exit 0 + fi + + echo "Cleaning incomplete node bootstrap state..." + rm -rf /data/node + mkdir -p /data/node + + echo "Bootstrapping node..." + miden-node bootstrap \ + --data-directory /data/node \ --genesis-block /data/genesis/genesis.dat + touch /data/node/.bootstrapped + + bootstrap-ntx-builder: + image: miden-ntx-builder + pull_policy: if_not_present + volumes: + - node-data:/data + entrypoint: ["/bin/sh", "-c"] + depends_on: + bootstrap-node: + condition: service_completed_successfully + command: + - | + set -e + if [ -f /data/ntx-builder/.bootstrapped ]; then + echo "Network transaction builder already bootstrapped." + exit 0 + fi + + echo "Cleaning incomplete network transaction builder bootstrap state..." + rm -rf /data/ntx-builder + mkdir -p /data/ntx-builder + + # TODO: replace this no-op once miden-ntx-builder supports offline bootstrap. + # Do not write /data/ntx-builder/.bootstrapped until real bootstrap work runs. + echo "TODO: bootstrap network transaction builder offline." + store: image: miden-node pull_policy: if_not_present From 1a6deb5de10030e16c450e8ba4287e88e356accf Mon Sep 17 00:00:00 2001 From: Mirko von Leipzig <48352201+Mirko-von-Leipzig@users.noreply.github.com> Date: Fri, 29 May 2026 11:36:17 +0200 Subject: [PATCH 08/16] Docker compose: use sequencer mode --- docker-compose.yml | 73 ++++++++++++++-------------------------------- 1 file changed, 22 insertions(+), 51 deletions(-) diff --git a/docker-compose.yml b/docker-compose.yml index d305ce8d3..1bd86dde6 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -78,35 +78,38 @@ services: # Do not write /data/ntx-builder/.bootstrapped until real bootstrap work runs. echo "TODO: bootstrap network transaction builder offline." - store: + sequencer: image: miden-node pull_policy: if_not_present volumes: - node-data:/data + depends_on: + bootstrap-node: + condition: service_completed_successfully + validator: + condition: service_started environment: - MIDEN_NODE_ENABLE_OTEL=true - OTEL_EXPORTER_OTLP_ENDPOINT=http://tempo:4317 - - OTEL_SERVICE_NAME=store + - OTEL_SERVICE_NAME=sequencer command: - miden-node - - store - - start - - --rpc.listen=0.0.0.0:50001 - - --block-producer.listen=0.0.0.0:50003 - - --data-directory=/data/store - - --account_tree.rocksdb.max_cache_size=4294967296 - - --account_tree.rocksdb.max_open_fds=512 - - --nullifier_tree.rocksdb.max_cache_size=4294967296 - - --nullifier_tree.rocksdb.max_open_fds=512 + - sequencer + - --rpc.listen=0.0.0.0:57291 + - --data-directory=/data/node + - --validator.url=http://validator:50101 + - --ntx-builder.url=http://ntx-builder:50301 ports: - - "50001:50001" - - "50003:50003" + - "57291:57291" validator: image: miden-validator pull_policy: if_not_present volumes: - node-data:/data + depends_on: + bootstrap-validator: + condition: service_completed_successfully environment: - MIDEN_NODE_ENABLE_OTEL=true - OTEL_EXPORTER_OTLP_ENDPOINT=http://tempo:4317 @@ -119,46 +122,16 @@ services: ports: - "50101:50101" - block-producer: - image: miden-node - pull_policy: if_not_present - environment: - - MIDEN_NODE_ENABLE_OTEL=true - - OTEL_EXPORTER_OTLP_ENDPOINT=http://tempo:4317 - - OTEL_SERVICE_NAME=block-producer - command: - - miden-node - - block-producer - - start - - --listen=0.0.0.0:50201 - - --store.url=http://store:50003 - - --validator.url=http://validator:50101 - ports: - - "50201:50201" - - rpc: - image: miden-node - pull_policy: if_not_present - environment: - - MIDEN_NODE_ENABLE_OTEL=true - - OTEL_EXPORTER_OTLP_ENDPOINT=http://tempo:4317 - - OTEL_SERVICE_NAME=rpc - command: - - miden-node - - rpc - - start - - --listen=0.0.0.0:57291 - - --store.url=http://store:50001 - - --block-producer.url=http://block-producer:50201 - - --validator.url=http://validator:50101 - ports: - - "57291:57291" - ntx-builder: image: miden-ntx-builder pull_policy: if_not_present volumes: - node-data:/data + depends_on: + bootstrap-ntx-builder: + condition: service_completed_successfully + sequencer: + condition: service_started environment: - MIDEN_NODE_ENABLE_OTEL=true - OTEL_EXPORTER_OTLP_ENDPOINT=http://tempo:4317 @@ -167,9 +140,7 @@ services: - miden-ntx-builder - start - --listen=0.0.0.0:50301 - - --store.url=http://store:50001 - - --block-producer.url=http://block-producer:50201 - - --validator.url=http://validator:50101 + - --rpc.url=http://sequencer:57291 - --data-directory=/data/ntx-builder From 8307da628de889f548e3e0a5e528b07df0333ea6 Mon Sep 17 00:00:00 2001 From: Mirko von Leipzig <48352201+Mirko-von-Leipzig@users.noreply.github.com> Date: Fri, 29 May 2026 11:41:30 +0200 Subject: [PATCH 09/16] Simplify the otel setup --- compose/telemetry.yml | 16 ++-------------- docker-compose.yml | 12 ------------ 2 files changed, 2 insertions(+), 26 deletions(-) diff --git a/compose/telemetry.yml b/compose/telemetry.yml index b8302fff8..d582f4f37 100644 --- a/compose/telemetry.yml +++ b/compose/telemetry.yml @@ -23,11 +23,11 @@ services: depends_on: - tempo - store: + sequencer: environment: - MIDEN_NODE_ENABLE_OTEL=true - OTEL_EXPORTER_OTLP_ENDPOINT=http://tempo:4317 - - OTEL_SERVICE_NAME=store + - OTEL_SERVICE_NAME=sequencer validator: environment: @@ -35,18 +35,6 @@ services: - OTEL_EXPORTER_OTLP_ENDPOINT=http://tempo:4317 - OTEL_SERVICE_NAME=validator - block-producer: - environment: - - MIDEN_NODE_ENABLE_OTEL=true - - OTEL_EXPORTER_OTLP_ENDPOINT=http://tempo:4317 - - OTEL_SERVICE_NAME=block-producer - - rpc: - environment: - - MIDEN_NODE_ENABLE_OTEL=true - - OTEL_EXPORTER_OTLP_ENDPOINT=http://tempo:4317 - - OTEL_SERVICE_NAME=rpc - ntx-builder: environment: - MIDEN_NODE_ENABLE_OTEL=true diff --git a/docker-compose.yml b/docker-compose.yml index 1bd86dde6..790b2f0d3 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -88,10 +88,6 @@ services: condition: service_completed_successfully validator: condition: service_started - environment: - - MIDEN_NODE_ENABLE_OTEL=true - - OTEL_EXPORTER_OTLP_ENDPOINT=http://tempo:4317 - - OTEL_SERVICE_NAME=sequencer command: - miden-node - sequencer @@ -110,10 +106,6 @@ services: depends_on: bootstrap-validator: condition: service_completed_successfully - environment: - - MIDEN_NODE_ENABLE_OTEL=true - - OTEL_EXPORTER_OTLP_ENDPOINT=http://tempo:4317 - - OTEL_SERVICE_NAME=validator command: - miden-validator - start @@ -132,10 +124,6 @@ services: condition: service_completed_successfully sequencer: condition: service_started - environment: - - MIDEN_NODE_ENABLE_OTEL=true - - OTEL_EXPORTER_OTLP_ENDPOINT=http://tempo:4317 - - OTEL_SERVICE_NAME=ntx-builder command: - miden-ntx-builder - start From 997a7d5b7d1dec90fc8b00c9ec491d73a43d9bed Mon Sep 17 00:00:00 2001 From: Mirko von Leipzig <48352201+Mirko-von-Leipzig@users.noreply.github.com> Date: Fri, 29 May 2026 11:49:11 +0200 Subject: [PATCH 10/16] Small improvements --- Makefile | 2 +- compose/monitor.yml | 4 +--- 2 files changed, 2 insertions(+), 4 deletions(-) diff --git a/Makefile b/Makefile index 141cb682a..54a73645c 100644 --- a/Makefile +++ b/Makefile @@ -143,7 +143,7 @@ compose-up: ## Starts all node components, telemetry, and monitor via docker com .PHONY: compose-down compose-down: ## Stops and removes all containers via docker compose - docker compose $(COMPOSE_FILES) down + docker compose $(COMPOSE_FILES) down --remove-orphans .PHONY: compose-logs compose-logs: ## Follows logs for all components via docker compose diff --git a/compose/monitor.yml b/compose/monitor.yml index 2d8636aec..c96eca172 100644 --- a/compose/monitor.yml +++ b/compose/monitor.yml @@ -6,14 +6,12 @@ services: - miden-network-monitor - start environment: - - MIDEN_MONITOR_RPC_URL=http://localhost:57291 + - MIDEN_MONITOR_RPC_URL=http://sequencer:57291 - MIDEN_MONITOR_PORT=3001 - MIDEN_MONITOR_NETWORK_NAME=Localhost - MIDEN_MONITOR_DISABLE_NTX_SERVICE=true - MIDEN_MONITOR_ENABLE_OTEL=true - OTEL_EXPORTER_OTLP_ENDPOINT=http://tempo:4317 - OTEL_SERVICE_NAME=monitor - extra_hosts: - - "localhost:host-gateway" ports: - "3001:3001" From d90e095cfca6090da65313d36113a0e595fae4cb Mon Sep 17 00:00:00 2001 From: Mirko von Leipzig <48352201+Mirko-von-Leipzig@users.noreply.github.com> Date: Fri, 29 May 2026 11:56:17 +0200 Subject: [PATCH 11/16] Hide validator port --- docker-compose.yml | 3 --- 1 file changed, 3 deletions(-) diff --git a/docker-compose.yml b/docker-compose.yml index 790b2f0d3..e4a72104e 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -111,8 +111,6 @@ services: - start - --listen=0.0.0.0:50101 - --data-directory=/data/validator - ports: - - "50101:50101" ntx-builder: image: miden-ntx-builder @@ -131,6 +129,5 @@ services: - --rpc.url=http://sequencer:57291 - --data-directory=/data/ntx-builder - volumes: node-data: From 8637589610db0acccf6cea5e6626fd3937da5a5b Mon Sep 17 00:00:00 2001 From: Mirko von Leipzig <48352201+Mirko-von-Leipzig@users.noreply.github.com> Date: Fri, 29 May 2026 12:18:37 +0200 Subject: [PATCH 12/16] Update `run-node.sh` --- compose/monitor.yml | 1 + compose/telemetry.yml | 5 +- docker-compose.yml | 9 +- scripts/run-node.sh | 212 ++++++++++++++++++++---------------------- 4 files changed, 114 insertions(+), 113 deletions(-) diff --git a/compose/monitor.yml b/compose/monitor.yml index c96eca172..db639b1ad 100644 --- a/compose/monitor.yml +++ b/compose/monitor.yml @@ -13,5 +13,6 @@ services: - MIDEN_MONITOR_ENABLE_OTEL=true - OTEL_EXPORTER_OTLP_ENDPOINT=http://tempo:4317 - OTEL_SERVICE_NAME=monitor + - OTEL_RESOURCE_ATTRIBUTES=service.namespace=miden ports: - "3001:3001" diff --git a/compose/telemetry.yml b/compose/telemetry.yml index d582f4f37..10a9c5437 100644 --- a/compose/telemetry.yml +++ b/compose/telemetry.yml @@ -27,16 +27,19 @@ services: environment: - MIDEN_NODE_ENABLE_OTEL=true - OTEL_EXPORTER_OTLP_ENDPOINT=http://tempo:4317 - - OTEL_SERVICE_NAME=sequencer + - OTEL_SERVICE_NAME=node + - OTEL_RESOURCE_ATTRIBUTES=service.namespace=miden,service.instance.id=sequencer,miden.node.role=sequencer validator: environment: - MIDEN_NODE_ENABLE_OTEL=true - OTEL_EXPORTER_OTLP_ENDPOINT=http://tempo:4317 - OTEL_SERVICE_NAME=validator + - OTEL_RESOURCE_ATTRIBUTES=service.namespace=miden ntx-builder: environment: - MIDEN_NODE_ENABLE_OTEL=true - OTEL_EXPORTER_OTLP_ENDPOINT=http://tempo:4317 - OTEL_SERVICE_NAME=ntx-builder + - OTEL_RESOURCE_ATTRIBUTES=service.namespace=miden diff --git a/docker-compose.yml b/docker-compose.yml index e4a72104e..567343e81 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -74,9 +74,12 @@ services: rm -rf /data/ntx-builder mkdir -p /data/ntx-builder - # TODO: replace this no-op once miden-ntx-builder supports offline bootstrap. - # Do not write /data/ntx-builder/.bootstrapped until real bootstrap work runs. - echo "TODO: bootstrap network transaction builder offline." + echo "Bootstrapping network transaction builder..." + miden-ntx-builder bootstrap \ + --data-directory /data/ntx-builder \ + --genesis-block /data/genesis/genesis.dat + + touch /data/ntx-builder/.bootstrapped sequencer: image: miden-node diff --git a/scripts/run-node.sh b/scripts/run-node.sh index 0dd92bd44..a3148d95c 100755 --- a/scripts/run-node.sh +++ b/scripts/run-node.sh @@ -3,8 +3,9 @@ set -euo pipefail # Configuration SKIP_BOOTSTRAP="${SKIP_BOOTSTRAP:-false}" +ENABLE_FULL_NODES="${ENABLE_FULL_NODES:-true}" EXTRA_ARGS="${EXTRA_ARGS:-}" -BINARY="${MIDEN_NODE_BIN:-./target/debug/miden-node}" +NODE_BINARY="${MIDEN_NODE_BIN:-./target/debug/miden-node}" VALIDATOR_BINARY="${MIDEN_VALIDATOR_BIN:-./target/debug/miden-validator}" NTX_BUILDER_BINARY="${MIDEN_NTX_BUILDER_BIN:-./target/debug/miden-ntx-builder}" KMS_KEY_ID="${KMS_KEY_ID:-}" @@ -14,27 +15,18 @@ if [[ -n "$KMS_KEY_ID" ]]; then fi GENESIS_CONFIG="crates/store/src/genesis/config/samples/01-simple.toml" -STORE_DIR="/tmp/store" -STORE_REPLICA_1_DIR="/tmp/store-replica-1" -STORE_REPLICA_2_DIR="/tmp/store-replica-2" +NODE_DIR="/tmp/node" +FULL_NODE_1_DIR="/tmp/full-node-1" +FULL_NODE_2_DIR="/tmp/full-node-2" VALIDATOR_DIR="/tmp/validator" NTX_BUILDER_DIR="/tmp/ntx-builder" ACCOUNTS_DIR="/tmp/accounts" -# Sequencer store. -STORE_RPC_PORT=50001 -STORE_BLOCK_PRODUCER_PORT=50003 - -# Replica stores expose only the RPC API (no block-producer endpoint). -STORE_REPLICA_1_RPC_PORT=50011 -STORE_REPLICA_2_RPC_PORT=50021 - VALIDATOR_PORT=50101 -BLOCK_PRODUCER_PORT=50201 NTX_BUILDER_PORT=50301 RPC_PORT=57291 -RPC_REPLICA_1_PORT=57292 -RPC_REPLICA_2_PORT=57293 +FULL_NODE_1_RPC_PORT=57292 +FULL_NODE_2_RPC_PORT=57293 PIDS=() @@ -43,34 +35,59 @@ cleanup() { for pid in "${PIDS[@]}"; do kill "$pid" 2>/dev/null || true done - wait + wait "${PIDS[@]}" 2>/dev/null || true echo "All components stopped." } trap cleanup EXIT INT TERM -# --- Kill processes on required ports --- +kill_ports() { + local ports=("$VALIDATOR_PORT" "$NTX_BUILDER_PORT" "$RPC_PORT") -PORTS=(50001 50002 50003 50011 50021 50101 50201 50301 57291 57292 57293) -echo "=== Killing processes on required ports ===" -for port in "${PORTS[@]}"; do - pids=$(lsof -ti :"$port" 2>/dev/null || true) - if [[ -n "$pids" ]]; then - for pid in $pids; do - echo "Killing PID $pid on port $port" - kill -9 "$pid" 2>/dev/null || true - done + if [[ "$ENABLE_FULL_NODES" == "true" ]]; then + ports+=("$FULL_NODE_1_RPC_PORT" "$FULL_NODE_2_RPC_PORT") fi -done -sleep 1 + + echo "=== Killing processes on required ports ===" + for port in "${ports[@]}"; do + pids=$(lsof -ti :"$port" 2>/dev/null || true) + if [[ -n "$pids" ]]; then + for pid in $pids; do + echo "Killing PID $pid on port $port" + kill -9 "$pid" 2>/dev/null || true + done + fi + done + sleep 1 +} + +bootstrap_node_data_dir() { + local label="$1" + local data_dir="$2" + + echo "Bootstrapping $label..." + "$NODE_BINARY" bootstrap \ + --data-directory "$data_dir" \ + --genesis-block "$VALIDATOR_DIR/genesis.dat" +} + +bootstrap_ntx_builder() { + echo "Bootstrapping network transaction builder..." + + "$NTX_BUILDER_BINARY" bootstrap \ + --data-directory "$NTX_BUILDER_DIR" \ + --genesis-block "$VALIDATOR_DIR/genesis.dat" +} + +# --- Kill processes on required ports --- + +kill_ports # --- Bootstrap --- if [[ "$SKIP_BOOTSTRAP" != "true" ]]; then echo "=== Bootstrapping ===" - rm -rf "$VALIDATOR_DIR" "$ACCOUNTS_DIR" "$STORE_DIR" \ - "$STORE_REPLICA_1_DIR" "$STORE_REPLICA_2_DIR" "$NTX_BUILDER_DIR" - mkdir -p "$NTX_BUILDER_DIR" + rm -rf "$VALIDATOR_DIR" "$ACCOUNTS_DIR" "$NODE_DIR" "$FULL_NODE_1_DIR" "$FULL_NODE_2_DIR" "$NTX_BUILDER_DIR" echo "Bootstrapping validator..." KMS_BOOTSTRAP_ARGS=() @@ -78,27 +95,20 @@ if [[ "$SKIP_BOOTSTRAP" != "true" ]]; then KMS_BOOTSTRAP_ARGS+=(--validator.key.kms-id "$KMS_KEY_ID") fi - $VALIDATOR_BINARY bootstrap \ + "$VALIDATOR_BINARY" bootstrap \ --data-directory "$VALIDATOR_DIR" \ --genesis-block-directory "$VALIDATOR_DIR" \ --accounts-directory "$ACCOUNTS_DIR" \ --genesis-config-file "$GENESIS_CONFIG" \ "${KMS_BOOTSTRAP_ARGS[@]+"${KMS_BOOTSTRAP_ARGS[@]}"}" - echo "Bootstrapping store..." - $BINARY store bootstrap \ - --data-directory "$STORE_DIR" \ - --genesis-block "$VALIDATOR_DIR/genesis.dat" - - echo "Bootstrapping store replica 1..." - $BINARY store bootstrap \ - --data-directory "$STORE_REPLICA_1_DIR" \ - --genesis-block "$VALIDATOR_DIR/genesis.dat" + bootstrap_node_data_dir "sequencer node" "$NODE_DIR" + bootstrap_ntx_builder - echo "Bootstrapping store replica 2..." - $BINARY store bootstrap \ - --data-directory "$STORE_REPLICA_2_DIR" \ - --genesis-block "$VALIDATOR_DIR/genesis.dat" + if [[ "$ENABLE_FULL_NODES" == "true" ]]; then + bootstrap_node_data_dir "full node 1" "$FULL_NODE_1_DIR" + bootstrap_node_data_dir "full node 2" "$FULL_NODE_2_DIR" + fi else echo "=== Skipping bootstrap (SKIP_BOOTSTRAP=true) ===" fi @@ -107,92 +117,76 @@ fi echo "=== Starting components ===" -echo "Starting sequencer store..." -OTEL_SERVICE_NAME=miden-store-primary $BINARY store start \ - --rpc.listen "0.0.0.0:$STORE_RPC_PORT" \ - --block-producer.listen "0.0.0.0:$STORE_BLOCK_PRODUCER_PORT" \ - --data-directory "$STORE_DIR" \ - $EXTRA_ARGS & -PIDS+=($!) - KMS_START_ARGS=() if [[ -n "$KMS_KEY_ID" ]]; then KMS_START_ARGS+=(--key.kms-id "$KMS_KEY_ID") fi echo "Starting validator..." -OTEL_SERVICE_NAME=miden-validator $VALIDATOR_BINARY start --listen "0.0.0.0:$VALIDATOR_PORT" \ +OTEL_SERVICE_NAME=validator \ +OTEL_RESOURCE_ATTRIBUTES=service.namespace=miden \ +"$VALIDATOR_BINARY" start --listen "0.0.0.0:$VALIDATOR_PORT" \ --data-directory "$VALIDATOR_DIR" \ $EXTRA_ARGS \ "${KMS_START_ARGS[@]+"${KMS_START_ARGS[@]}"}" & PIDS+=($!) -# Give store and validator a moment to bind their ports. +# Give the validator a moment to bind before the sequencer starts producing blocks. sleep 2 -# Replica 1 syncs from the primary store. -echo "Starting store replica 1 (upstream: primary store at 127.0.0.1:$STORE_RPC_PORT)..." -OTEL_SERVICE_NAME=miden-store-replica-1 $BINARY store start-replica \ - --rpc.listen "0.0.0.0:$STORE_REPLICA_1_RPC_PORT" \ - --upstream-store.url "http://127.0.0.1:$STORE_RPC_PORT" \ - --data-directory "$STORE_REPLICA_1_DIR" \ - $EXTRA_ARGS & -PIDS+=($!) - -# Replica 2 syncs from replica 1, proving replicas can act as upstreams. -echo "Starting store replica 2 (upstream: replica 1 at 127.0.0.1:$STORE_REPLICA_1_RPC_PORT)..." -OTEL_SERVICE_NAME=miden-store-replica-2 $BINARY store start-replica \ - --rpc.listen "0.0.0.0:$STORE_REPLICA_2_RPC_PORT" \ - --upstream-store.url "http://127.0.0.1:$STORE_REPLICA_1_RPC_PORT" \ - --data-directory "$STORE_REPLICA_2_DIR" \ - $EXTRA_ARGS & -PIDS+=($!) - -echo "Starting block producer..." -OTEL_SERVICE_NAME=miden-block-producer $BINARY block-producer start --listen "0.0.0.0:$BLOCK_PRODUCER_PORT" \ - --store.url "http://127.0.0.1:$STORE_BLOCK_PRODUCER_PORT" \ - --validator.url "http://127.0.0.1:$VALIDATOR_PORT" \ - $EXTRA_ARGS & -PIDS+=($!) - -echo "Starting RPC server (primary store)..." -OTEL_SERVICE_NAME=miden-rpc-primary $BINARY rpc start \ - --listen "0.0.0.0:$RPC_PORT" \ - --store.url "http://127.0.0.1:$STORE_RPC_PORT" \ - --block-producer.url "http://127.0.0.1:$BLOCK_PRODUCER_PORT" \ - --validator.url "http://127.0.0.1:$VALIDATOR_PORT" \ - $EXTRA_ARGS & -PIDS+=($!) - -echo "Starting RPC server (replica 1)..." -OTEL_SERVICE_NAME=miden-rpc-replica-1 $BINARY rpc start \ - --listen "0.0.0.0:$RPC_REPLICA_1_PORT" \ - --store.url "http://127.0.0.1:$STORE_REPLICA_1_RPC_PORT" \ - --block-producer.url "http://127.0.0.1:$BLOCK_PRODUCER_PORT" \ - --validator.url "http://127.0.0.1:$VALIDATOR_PORT" \ - $EXTRA_ARGS & -PIDS+=($!) - -echo "Starting RPC server (replica 2)..." -OTEL_SERVICE_NAME=miden-rpc-replica-2 $BINARY rpc start \ - --listen "0.0.0.0:$RPC_REPLICA_2_PORT" \ - --store.url "http://127.0.0.1:$STORE_REPLICA_2_RPC_PORT" \ - --block-producer.url "http://127.0.0.1:$BLOCK_PRODUCER_PORT" \ +echo "Starting sequencer..." +OTEL_SERVICE_NAME=node \ +OTEL_RESOURCE_ATTRIBUTES=service.namespace=miden,service.instance.id=sequencer,miden.node.role=sequencer \ +"$NODE_BINARY" sequencer \ + --rpc.listen "0.0.0.0:$RPC_PORT" \ + --data-directory "$NODE_DIR" \ --validator.url "http://127.0.0.1:$VALIDATOR_PORT" \ + --ntx-builder.url "http://127.0.0.1:$NTX_BUILDER_PORT" \ $EXTRA_ARGS & PIDS+=($!) echo "Starting network transaction builder..." -OTEL_SERVICE_NAME=miden-ntx-builder $NTX_BUILDER_BINARY start \ +OTEL_SERVICE_NAME=ntx-builder \ +OTEL_RESOURCE_ATTRIBUTES=service.namespace=miden \ +"$NTX_BUILDER_BINARY" start \ --listen "0.0.0.0:$NTX_BUILDER_PORT" \ - --store.url "http://127.0.0.1:$STORE_RPC_PORT" \ - --block-producer.url "http://127.0.0.1:$BLOCK_PRODUCER_PORT" \ - --validator.url "http://127.0.0.1:$VALIDATOR_PORT" \ + --rpc.url "http://127.0.0.1:$RPC_PORT" \ --data-directory "$NTX_BUILDER_DIR" \ $EXTRA_ARGS & PIDS+=($!) +if [[ "$ENABLE_FULL_NODES" == "true" ]]; then + echo "Starting full node 1 (upstream: sequencer at 127.0.0.1:$RPC_PORT)..." + OTEL_SERVICE_NAME=node \ + OTEL_RESOURCE_ATTRIBUTES=service.namespace=miden,service.instance.id=full-node-1,miden.node.role=full \ + "$NODE_BINARY" full \ + --rpc.listen "0.0.0.0:$FULL_NODE_1_RPC_PORT" \ + --sync.block-source.url "http://127.0.0.1:$RPC_PORT" \ + --data-directory "$FULL_NODE_1_DIR" \ + $EXTRA_ARGS & + PIDS+=($!) + + # Give full node 1 a moment to bind before full node 2 uses it as an upstream. + sleep 2 + + echo "Starting full node 2 (upstream: full node 1 at 127.0.0.1:$FULL_NODE_1_RPC_PORT)..." + OTEL_SERVICE_NAME=node \ + OTEL_RESOURCE_ATTRIBUTES=service.namespace=miden,service.instance.id=full-node-2,miden.node.role=full \ + "$NODE_BINARY" full \ + --rpc.listen "0.0.0.0:$FULL_NODE_2_RPC_PORT" \ + --sync.block-source.url "http://127.0.0.1:$FULL_NODE_1_RPC_PORT" \ + --data-directory "$FULL_NODE_2_DIR" \ + $EXTRA_ARGS & + PIDS+=($!) +else + echo "=== Full nodes disabled (ENABLE_FULL_NODES=false) ===" +fi + echo "=== All components running. Ctrl+C to stop. ===" -echo "=== Block propagation chain: :$STORE_RPC_PORT -> :$STORE_REPLICA_1_RPC_PORT -> :$STORE_REPLICA_2_RPC_PORT ===" -echo "=== RPC endpoints: :$RPC_PORT, :$RPC_REPLICA_1_PORT, :$RPC_REPLICA_2_PORT ===" +if [[ "$ENABLE_FULL_NODES" == "true" ]]; then + echo "=== Block propagation chain: :$RPC_PORT -> :$FULL_NODE_1_RPC_PORT -> :$FULL_NODE_2_RPC_PORT ===" + echo "=== RPC endpoints: :$RPC_PORT, :$FULL_NODE_1_RPC_PORT, :$FULL_NODE_2_RPC_PORT ===" +else + echo "=== RPC endpoint: :$RPC_PORT ===" +fi wait From b3696071640d119dfe71a1c2780e31603b4a2f24 Mon Sep 17 00:00:00 2001 From: Mirko von Leipzig <48352201+Mirko-von-Leipzig@users.noreply.github.com> Date: Fri, 29 May 2026 14:04:33 +0200 Subject: [PATCH 13/16] OTel attributes are now handled in the code --- bin/network-monitor/src/commands/start.rs | 2 +- bin/node/src/commands/mod.rs | 12 +- bin/node/src/commands/runtime.rs | 2 +- bin/ntx-builder/src/main.rs | 2 +- bin/remote-prover/src/main.rs | 2 +- bin/validator/src/main.rs | 2 +- compose/monitor.yml | 2 - compose/telemetry.yml | 7 +- crates/utils/src/logging.rs | 197 +++++++++++++++++++++- scripts/run-node.sh | 23 +-- 10 files changed, 220 insertions(+), 31 deletions(-) diff --git a/bin/network-monitor/src/commands/start.rs b/bin/network-monitor/src/commands/start.rs index 9b9499daa..ea7730341 100644 --- a/bin/network-monitor/src/commands/start.rs +++ b/bin/network-monitor/src/commands/start.rs @@ -29,7 +29,7 @@ pub async fn start_monitor(config: MonitorConfig) -> Result<()> { info!("Loaded configuration: {:?}", config); let _otel_guard = if config.enable_otel { - miden_node_utils::logging::setup_tracing(OpenTelemetry::Enabled)? + miden_node_utils::logging::setup_tracing(OpenTelemetry::enabled().with_name("monitor"))? } else { miden_node_utils::logging::setup_tracing(OpenTelemetry::Disabled)? }; diff --git a/bin/node/src/commands/mod.rs b/bin/node/src/commands/mod.rs index fab6d0b0e..f7620b263 100644 --- a/bin/node/src/commands/mod.rs +++ b/bin/node/src/commands/mod.rs @@ -53,8 +53,16 @@ pub enum Command { impl Command { pub(crate) fn open_telemetry(&self) -> miden_node_utils::logging::OpenTelemetry { match self { - Command::Sequencer(command) => command.runtime.open_telemetry(), - Command::Full(command) => command.runtime.open_telemetry(), + Command::Sequencer(command) => command + .runtime + .open_telemetry() + .with_name("node") + .with_attribute("miden.node.role", "sequencer"), + Command::Full(command) => command + .runtime + .open_telemetry() + .with_name("node") + .with_attribute("miden.node.role", "full"), Command::Bootstrap(_) | Command::Migrate(_) => { miden_node_utils::logging::OpenTelemetry::Disabled }, diff --git a/bin/node/src/commands/runtime.rs b/bin/node/src/commands/runtime.rs index 8931761fe..831dbbe0e 100644 --- a/bin/node/src/commands/runtime.rs +++ b/bin/node/src/commands/runtime.rs @@ -37,7 +37,7 @@ pub struct RuntimeOptions { impl RuntimeOptions { pub fn open_telemetry(&self) -> OpenTelemetry { if self.enable_otel { - OpenTelemetry::Enabled + OpenTelemetry::enabled() } else { OpenTelemetry::Disabled } diff --git a/bin/ntx-builder/src/main.rs b/bin/ntx-builder/src/main.rs index 6c71a1060..c5b0df422 100644 --- a/bin/ntx-builder/src/main.rs +++ b/bin/ntx-builder/src/main.rs @@ -8,7 +8,7 @@ async fn main() -> anyhow::Result<()> { let command = commands::NtxBuilderCommand::parse(); let otel = if command.is_open_telemetry_enabled() { - OpenTelemetry::Enabled + OpenTelemetry::enabled().with_name("ntx-builder") } else { OpenTelemetry::Disabled }; diff --git a/bin/remote-prover/src/main.rs b/bin/remote-prover/src/main.rs index 9de86e089..619cf314c 100644 --- a/bin/remote-prover/src/main.rs +++ b/bin/remote-prover/src/main.rs @@ -9,7 +9,7 @@ const COMPONENT: &str = "miden-prover"; #[tokio::main] async fn main() -> anyhow::Result<()> { - let _otel_guard = setup_tracing(OpenTelemetry::Enabled)?; + let _otel_guard = setup_tracing(OpenTelemetry::enabled().with_name("remote-prover"))?; info!(target: COMPONENT, "Tracing initialized"); let (handle, _port) = diff --git a/bin/validator/src/main.rs b/bin/validator/src/main.rs index 6f27a578c..c19cabca3 100644 --- a/bin/validator/src/main.rs +++ b/bin/validator/src/main.rs @@ -11,7 +11,7 @@ async fn main() -> anyhow::Result<()> { let command = commands::ValidatorCommand::parse(); let otel = if command.is_open_telemetry_enabled() { - OpenTelemetry::Enabled + OpenTelemetry::enabled().with_name("validator") } else { OpenTelemetry::Disabled }; diff --git a/compose/monitor.yml b/compose/monitor.yml index db639b1ad..60efac733 100644 --- a/compose/monitor.yml +++ b/compose/monitor.yml @@ -12,7 +12,5 @@ services: - MIDEN_MONITOR_DISABLE_NTX_SERVICE=true - MIDEN_MONITOR_ENABLE_OTEL=true - OTEL_EXPORTER_OTLP_ENDPOINT=http://tempo:4317 - - OTEL_SERVICE_NAME=monitor - - OTEL_RESOURCE_ATTRIBUTES=service.namespace=miden ports: - "3001:3001" diff --git a/compose/telemetry.yml b/compose/telemetry.yml index 10a9c5437..4c7f3b7a5 100644 --- a/compose/telemetry.yml +++ b/compose/telemetry.yml @@ -27,19 +27,14 @@ services: environment: - MIDEN_NODE_ENABLE_OTEL=true - OTEL_EXPORTER_OTLP_ENDPOINT=http://tempo:4317 - - OTEL_SERVICE_NAME=node - - OTEL_RESOURCE_ATTRIBUTES=service.namespace=miden,service.instance.id=sequencer,miden.node.role=sequencer + - OTEL_RESOURCE_ATTRIBUTES=service.instance.id=sequencer validator: environment: - MIDEN_NODE_ENABLE_OTEL=true - OTEL_EXPORTER_OTLP_ENDPOINT=http://tempo:4317 - - OTEL_SERVICE_NAME=validator - - OTEL_RESOURCE_ATTRIBUTES=service.namespace=miden ntx-builder: environment: - MIDEN_NODE_ENABLE_OTEL=true - OTEL_EXPORTER_OTLP_ENDPOINT=http://tempo:4317 - - OTEL_SERVICE_NAME=ntx-builder - - OTEL_RESOURCE_ATTRIBUTES=service.namespace=miden diff --git a/crates/utils/src/logging.rs b/crates/utils/src/logging.rs index 8e38ac792..83c646c39 100644 --- a/crates/utils/src/logging.rs +++ b/crates/utils/src/logging.rs @@ -2,7 +2,10 @@ use std::str::FromStr; use std::sync::OnceLock; use opentelemetry::trace::TracerProvider as _; +use opentelemetry::{KeyValue, Value}; +use opentelemetry_sdk::Resource; use opentelemetry_sdk::propagation::TraceContextPropagator; +use opentelemetry_sdk::resource::{EnvResourceDetector, TelemetryResourceDetector}; use opentelemetry_sdk::trace::SdkTracerProvider; use tracing::subscriber::Subscriber; use tracing_opentelemetry::OpenTelemetryLayer; @@ -17,16 +20,68 @@ use crate::tracing::OpenTelemetrySpanExt; /// pending spans before the program terminates. static TRACER_PROVIDER: OnceLock = OnceLock::new(); +/// Default OpenTelemetry resource attributes for this process. +#[derive(Clone, Default)] +pub struct ResourceConfig { + service_name: Option<&'static str>, + attributes: Vec<(&'static str, &'static str)>, +} + +impl ResourceConfig { + #[must_use] + pub fn with_name(mut self, service_name: &'static str) -> Self { + self.service_name = Some(service_name); + self + } + + #[must_use] + pub fn with_attribute(mut self, key: &'static str, value: &'static str) -> Self { + self.attributes.push((key, value)); + self + } +} + /// Configures [`setup_tracing`] to enable or disable the open-telemetry exporter. -#[derive(Clone, Copy)] +#[derive(Clone)] pub enum OpenTelemetry { - Enabled, + Enabled(ResourceConfig), Disabled, } impl OpenTelemetry { - fn is_enabled(self) -> bool { - matches!(self, OpenTelemetry::Enabled) + pub fn enabled() -> Self { + OpenTelemetry::Enabled(ResourceConfig::default()) + } + + #[must_use] + pub fn with_name(self, service_name: &'static str) -> Self { + match self { + OpenTelemetry::Enabled(config) => { + OpenTelemetry::Enabled(config.with_name(service_name)) + }, + OpenTelemetry::Disabled => OpenTelemetry::Disabled, + } + } + + #[must_use] + pub fn with_attribute(self, key: &'static str, value: &'static str) -> Self { + match self { + OpenTelemetry::Enabled(config) => { + OpenTelemetry::Enabled(config.with_attribute(key, value)) + }, + OpenTelemetry::Disabled => OpenTelemetry::Disabled, + } + } + + fn is_enabled(&self) -> bool { + matches!(self, OpenTelemetry::Enabled(_)) + } + + fn resource_config(self) -> Option { + match self { + OpenTelemetry::Enabled(config) => Some(config), + OpenTelemetry::Disabled => None, + } } } @@ -65,7 +120,10 @@ pub fn setup_tracing(otel: OpenTelemetry) -> anyhow::Result> { // `then_some`) to avoid crashing sync callers (with OpenTelemetry::Disabled set). Examples of // such callers are tests with logging enabled. let tracer_provider = if otel.is_enabled() { - let provider = init_tracer_provider()?; + let provider = init_tracer_provider( + otel.resource_config() + .expect("resource config is set when OpenTelemetry is enabled"), + )?; // Store the provider globally so the panic hook can flush it. SdkTracerProvider is // internally reference-counted, so cloning is cheap. @@ -112,16 +170,68 @@ pub fn setup_tracing(otel: OpenTelemetry) -> anyhow::Result> { Ok(tracer_provider.map(|tracer_provider| OtelGuard { tracer_provider })) } -fn init_tracer_provider() -> anyhow::Result { +fn init_tracer_provider(resource_config: ResourceConfig) -> anyhow::Result { let builder = opentelemetry_otlp::SpanExporter::builder().with_tonic(); let exporter = builder.build()?; + let resource = resource(resource_config); Ok(opentelemetry_sdk::trace::SdkTracerProvider::builder() + .with_resource(resource) .with_batch_exporter(exporter) .build()) } +fn resource(config: ResourceConfig) -> Resource { + let detected_resource = Resource::builder_empty() + .with_detector(Box::new(TelemetryResourceDetector)) + .with_detector(Box::new(EnvResourceDetector::new())) + .build(); + + resource_from_detected(config, &detected_resource, otel_service_name_override()) +} + +fn resource_from_detected( + config: ResourceConfig, + detected_resource: &Resource, + service_name_override: Option, +) -> Resource { + const SERVICE_NAME: &str = "service.name"; + const SERVICE_NAMESPACE: &str = "service.namespace"; + + let mut attributes = + std::collections::BTreeMap::from([(SERVICE_NAMESPACE.to_string(), Value::from("miden"))]); + + if let Some(service_name) = config.service_name { + attributes.insert(SERVICE_NAME.to_string(), Value::from(service_name)); + } + + for (key, value) in config.attributes { + attributes.insert(key.to_string(), Value::from(value)); + } + + // Environment resource attributes override defaults above, and OTEL_SERVICE_NAME overrides + // both. + for (key, value) in detected_resource { + attributes.insert(key.as_str().to_string(), value.clone()); + } + + if let Some(service_name) = service_name_override { + attributes.insert(SERVICE_NAME.to_string(), service_name); + } + + Resource::builder_empty() + .with_attributes(attributes.into_iter().map(|(key, value)| KeyValue::new(key, value))) + .build() +} + +fn otel_service_name_override() -> Option { + std::env::var("OTEL_SERVICE_NAME") + .ok() + .filter(|value| !value.is_empty()) + .map(Value::from) +} + /// Initializes tracing to a test exporter. /// /// Allows trace content to be inspected via the returned receiver. @@ -206,3 +316,78 @@ fn env_or_default_filter() -> Box + Send + Sync + 'static> { }, } } + +#[cfg(test)] +mod tests { + use opentelemetry::Key; + + use super::*; + + #[test] + fn resource_uses_configured_defaults() { + let detected_resource = Resource::builder_empty() + .with_attributes([KeyValue::new("telemetry.sdk.language", "rust")]) + .build(); + + let resource = resource_from_detected( + ResourceConfig::default() + .with_name("node") + .with_attribute("miden.node.role", "sequencer"), + &detected_resource, + None, + ); + + assert_eq!(resource_value(&resource, "service.name"), Some(Value::from("node")),); + assert_eq!(resource_value(&resource, "service.namespace"), Some(Value::from("miden")),); + assert_eq!(resource_value(&resource, "miden.node.role"), Some(Value::from("sequencer")),); + assert_eq!(resource_value(&resource, "telemetry.sdk.language"), Some(Value::from("rust")),); + } + + #[test] + fn resource_prefers_detected_attributes_over_configured_defaults() { + let detected_resource = Resource::builder_empty() + .with_attributes([ + KeyValue::new("service.name", "custom-node"), + KeyValue::new("service.namespace", "custom-namespace"), + KeyValue::new("miden.node.role", "custom-role"), + ]) + .build(); + + let resource = resource_from_detected( + ResourceConfig::default() + .with_name("node") + .with_attribute("miden.node.role", "sequencer"), + &detected_resource, + None, + ); + + assert_eq!(resource_value(&resource, "service.name"), Some(Value::from("custom-node")),); + assert_eq!( + resource_value(&resource, "service.namespace"), + Some(Value::from("custom-namespace")), + ); + assert_eq!(resource_value(&resource, "miden.node.role"), Some(Value::from("custom-role")),); + } + + #[test] + fn resource_prefers_explicit_service_name_override() { + let detected_resource = Resource::builder_empty() + .with_attributes([KeyValue::new("service.name", "resource-attribute-node")]) + .build(); + + let resource = resource_from_detected( + ResourceConfig::default().with_name("node"), + &detected_resource, + Some(Value::from("service-env-node")), + ); + + assert_eq!( + resource_value(&resource, "service.name"), + Some(Value::from("service-env-node")), + ); + } + + fn resource_value(resource: &Resource, key: &'static str) -> Option { + resource.get(&Key::from_static_str(key)) + } +} diff --git a/scripts/run-node.sh b/scripts/run-node.sh index a3148d95c..930debdb3 100755 --- a/scripts/run-node.sh +++ b/scripts/run-node.sh @@ -78,6 +78,16 @@ bootstrap_ntx_builder() { --genesis-block "$VALIDATOR_DIR/genesis.dat" } +node_resource_attributes() { + local instance_id="$1" + + if [[ -n "${OTEL_RESOURCE_ATTRIBUTES:-}" ]]; then + printf "service.instance.id=%s,%s" "$instance_id" "$OTEL_RESOURCE_ATTRIBUTES" + else + printf "service.instance.id=%s" "$instance_id" + fi +} + # --- Kill processes on required ports --- kill_ports @@ -123,8 +133,6 @@ if [[ -n "$KMS_KEY_ID" ]]; then fi echo "Starting validator..." -OTEL_SERVICE_NAME=validator \ -OTEL_RESOURCE_ATTRIBUTES=service.namespace=miden \ "$VALIDATOR_BINARY" start --listen "0.0.0.0:$VALIDATOR_PORT" \ --data-directory "$VALIDATOR_DIR" \ $EXTRA_ARGS \ @@ -135,8 +143,7 @@ PIDS+=($!) sleep 2 echo "Starting sequencer..." -OTEL_SERVICE_NAME=node \ -OTEL_RESOURCE_ATTRIBUTES=service.namespace=miden,service.instance.id=sequencer,miden.node.role=sequencer \ +OTEL_RESOURCE_ATTRIBUTES="$(node_resource_attributes sequencer)" \ "$NODE_BINARY" sequencer \ --rpc.listen "0.0.0.0:$RPC_PORT" \ --data-directory "$NODE_DIR" \ @@ -146,8 +153,6 @@ OTEL_RESOURCE_ATTRIBUTES=service.namespace=miden,service.instance.id=sequencer,m PIDS+=($!) echo "Starting network transaction builder..." -OTEL_SERVICE_NAME=ntx-builder \ -OTEL_RESOURCE_ATTRIBUTES=service.namespace=miden \ "$NTX_BUILDER_BINARY" start \ --listen "0.0.0.0:$NTX_BUILDER_PORT" \ --rpc.url "http://127.0.0.1:$RPC_PORT" \ @@ -157,8 +162,7 @@ PIDS+=($!) if [[ "$ENABLE_FULL_NODES" == "true" ]]; then echo "Starting full node 1 (upstream: sequencer at 127.0.0.1:$RPC_PORT)..." - OTEL_SERVICE_NAME=node \ - OTEL_RESOURCE_ATTRIBUTES=service.namespace=miden,service.instance.id=full-node-1,miden.node.role=full \ + OTEL_RESOURCE_ATTRIBUTES="$(node_resource_attributes full-node-1)" \ "$NODE_BINARY" full \ --rpc.listen "0.0.0.0:$FULL_NODE_1_RPC_PORT" \ --sync.block-source.url "http://127.0.0.1:$RPC_PORT" \ @@ -170,8 +174,7 @@ if [[ "$ENABLE_FULL_NODES" == "true" ]]; then sleep 2 echo "Starting full node 2 (upstream: full node 1 at 127.0.0.1:$FULL_NODE_1_RPC_PORT)..." - OTEL_SERVICE_NAME=node \ - OTEL_RESOURCE_ATTRIBUTES=service.namespace=miden,service.instance.id=full-node-2,miden.node.role=full \ + OTEL_RESOURCE_ATTRIBUTES="$(node_resource_attributes full-node-2)" \ "$NODE_BINARY" full \ --rpc.listen "0.0.0.0:$FULL_NODE_2_RPC_PORT" \ --sync.block-source.url "http://127.0.0.1:$FULL_NODE_1_RPC_PORT" \ From f541a6e0a27501b1b08d91c86190c5ca5fc61574 Mon Sep 17 00:00:00 2001 From: Mirko von Leipzig <48352201+Mirko-von-Leipzig@users.noreply.github.com> Date: Fri, 29 May 2026 14:27:17 +0200 Subject: [PATCH 14/16] Simplify OTel CLI --- bin/network-monitor/README.md | 10 +++++----- bin/network-monitor/src/commands/start.rs | 7 ++----- bin/network-monitor/src/config.rs | 10 ---------- bin/node/src/commands/mod.rs | 15 +++++---------- bin/node/src/commands/runtime.rs | 21 --------------------- bin/ntx-builder/src/commands/mod.rs | 16 ++++------------ bin/ntx-builder/src/main.rs | 10 +--------- bin/remote-prover/src/main.rs | 2 +- bin/validator/src/commands/mod.rs | 15 ++++----------- bin/validator/src/main.rs | 10 +--------- compose/monitor.yml | 1 - compose/telemetry.yml | 3 --- crates/utils/src/logging.rs | 14 ++++++++++++++ 13 files changed, 37 insertions(+), 97 deletions(-) diff --git a/bin/network-monitor/README.md b/bin/network-monitor/README.md index 51c87ce68..0e2554140 100644 --- a/bin/network-monitor/README.md +++ b/bin/network-monitor/README.md @@ -23,7 +23,7 @@ miden-network-monitor --help # Common usage examples miden-network-monitor start --port 8080 --rpc.listen http://localhost:50051 miden-network-monitor start --remote-prover-urls http://prover1.com:50052,http://prover2.com:50053 -miden-network-monitor start --faucet-url http://localhost:8080 --enable-otel +OTEL_EXPORTER_OTLP_ENDPOINT=http://localhost:4317 miden-network-monitor start --faucet-url http://localhost:8080 ``` **Available Options:** @@ -41,7 +41,6 @@ miden-network-monitor start --faucet-url http://localhost:8080 --enable-otel - `--request-timeout`: Timeout for outgoing requests (default: `10s`) - `--stale-chain-tip-threshold`: Maximum time without a chain tip update before marking RPC as unhealthy (default: `1m`) - `--port, -p`: Web server port (default: `3000`) -- `--enable-otel`: Enable OpenTelemetry tracing - `--counter-increment-interval`: Interval at which to send the increment counter transaction (default: `30s`) - `--counter-pending-unhealthy-threshold`: Mark the Network Transactions card unhealthy when the gap between expected and observed counter values stays above this for several consecutive polls (default: `5`) - `--counter-latency-timeout`: Maximum time to wait for a counter update after submitting a transaction (default: `2m`) @@ -66,7 +65,6 @@ If command-line arguments are not provided, the application falls back to enviro - `MIDEN_MONITOR_REQUEST_TIMEOUT`: Timeout for outgoing requests - `MIDEN_MONITOR_STALE_CHAIN_TIP_THRESHOLD`: Maximum time without a chain tip update before marking RPC as unhealthy - `MIDEN_MONITOR_PORT`: Web server port -- `MIDEN_MONITOR_ENABLE_OTEL`: Enable OpenTelemetry tracing - `MIDEN_MONITOR_COUNTER_INCREMENT_INTERVAL`: Interval at which to send the increment counter transaction - `MIDEN_MONITOR_COUNTER_PENDING_UNHEALTHY_THRESHOLD`: Mark the Network Transactions card unhealthy when the gap between expected and observed counter values stays above this for several consecutive polls - `MIDEN_MONITOR_COUNTER_LATENCY_TIMEOUT`: Maximum time to wait for a counter update after submitting a transaction @@ -122,8 +120,7 @@ miden-network-monitor start \ --remote-prover-test-interval 2m \ --faucet-test-interval 2m \ --status-check-interval 3s \ - --port 8080 \ - --enable-otel + --port 8080 # Get help miden-network-monitor --help @@ -144,6 +141,9 @@ miden-network-monitor start Once running, the monitor will be available at `http://localhost:3000` (or the configured port). +OpenTelemetry tracing is enabled automatically when `OTEL_EXPORTER_OTLP_ENDPOINT` or +`OTEL_EXPORTER_OTLP_TRACES_ENDPOINT` is set. + ## Currently Supported Monitor The monitor application provides real-time status monitoring for the following Miden network components: diff --git a/bin/network-monitor/src/commands/start.rs b/bin/network-monitor/src/commands/start.rs index ea7730341..e5fbb0b0c 100644 --- a/bin/network-monitor/src/commands/start.rs +++ b/bin/network-monitor/src/commands/start.rs @@ -28,11 +28,8 @@ use crate::monitor::tasks::Tasks; pub async fn start_monitor(config: MonitorConfig) -> Result<()> { info!("Loaded configuration: {:?}", config); - let _otel_guard = if config.enable_otel { - miden_node_utils::logging::setup_tracing(OpenTelemetry::enabled().with_name("monitor"))? - } else { - miden_node_utils::logging::setup_tracing(OpenTelemetry::Disabled)? - }; + let _otel_guard = + miden_node_utils::logging::setup_tracing(OpenTelemetry::from_env().with_name("monitor"))?; let mut tasks = Tasks::new(); diff --git a/bin/network-monitor/src/config.rs b/bin/network-monitor/src/config.rs index d569247b3..91b7722a4 100644 --- a/bin/network-monitor/src/config.rs +++ b/bin/network-monitor/src/config.rs @@ -94,16 +94,6 @@ pub struct MonitorConfig { )] pub port: u16, - /// Whether to enable OpenTelemetry. - #[arg( - long = "enable-otel", - env = "MIDEN_MONITOR_ENABLE_OTEL", - action = clap::ArgAction::SetTrue, - default_value_t = false, - help = "Whether to enable OpenTelemetry" - )] - pub enable_otel: bool, - /// Whether to disable the network transaction service checks (enabled by default). The network /// transaction service is a network account with a counter deployed at startup and incremented /// by sending a transaction to it. diff --git a/bin/node/src/commands/mod.rs b/bin/node/src/commands/mod.rs index f7620b263..e04db6375 100644 --- a/bin/node/src/commands/mod.rs +++ b/bin/node/src/commands/mod.rs @@ -8,6 +8,7 @@ mod store; use clap::Subcommand; pub use lifecycle::{BootstrapCommand, MigrateCommand}; +use miden_node_utils::logging::OpenTelemetry; pub use modes::{FullNodeCommand, SequencerCommand}; const ENV_DATA_DIRECTORY: &str = "MIDEN_NODE_DATA_DIRECTORY"; @@ -51,21 +52,15 @@ pub enum Command { } impl Command { - pub(crate) fn open_telemetry(&self) -> miden_node_utils::logging::OpenTelemetry { + pub(crate) fn open_telemetry(&self) -> OpenTelemetry { match self { - Command::Sequencer(command) => command - .runtime - .open_telemetry() + Command::Sequencer(_) => OpenTelemetry::from_env() .with_name("node") .with_attribute("miden.node.role", "sequencer"), - Command::Full(command) => command - .runtime - .open_telemetry() + Command::Full(_) => OpenTelemetry::from_env() .with_name("node") .with_attribute("miden.node.role", "full"), - Command::Bootstrap(_) | Command::Migrate(_) => { - miden_node_utils::logging::OpenTelemetry::Disabled - }, + Command::Bootstrap(_) | Command::Migrate(_) => OpenTelemetry::Disabled, } } diff --git a/bin/node/src/commands/runtime.rs b/bin/node/src/commands/runtime.rs index 831dbbe0e..4d9d28ec9 100644 --- a/bin/node/src/commands/runtime.rs +++ b/bin/node/src/commands/runtime.rs @@ -3,7 +3,6 @@ use std::path::PathBuf; use miden_node_store::DatabaseOptions; use miden_node_utils::clap::{GrpcOptionsExternal, StorageOptions}; -use miden_node_utils::logging::OpenTelemetry; use super::ENV_DATA_DIRECTORY; use super::rpc::RpcOptions; @@ -18,31 +17,11 @@ pub struct RuntimeOptions { #[arg(long, env = ENV_DATA_DIRECTORY, value_name = "DIR")] pub data_directory: PathBuf, - /// Enables the exporting of traces for OpenTelemetry. - /// - /// This can be further configured using environment variables as defined in the official - /// OpenTelemetry documentation. See our operator manual for further details. - #[arg( - long = "enable-otel", - default_value_t = false, - env = "MIDEN_NODE_ENABLE_OTEL", - value_name = "BOOL" - )] - pub enable_otel: bool, - #[command(flatten)] pub rpc: RpcOptions, } impl RuntimeOptions { - pub fn open_telemetry(&self) -> OpenTelemetry { - if self.enable_otel { - OpenTelemetry::enabled() - } else { - OpenTelemetry::Disabled - } - } - pub(super) fn runtime_config(&self, store: &StoreOptions) -> RuntimeConfig { RuntimeConfig { data_directory: self.data_directory.clone(), diff --git a/bin/ntx-builder/src/commands/mod.rs b/bin/ntx-builder/src/commands/mod.rs index 3fb3f6fa0..3b9a7a0a6 100644 --- a/bin/ntx-builder/src/commands/mod.rs +++ b/bin/ntx-builder/src/commands/mod.rs @@ -6,11 +6,11 @@ use std::time::Duration; use anyhow::Context; use clap::Parser; use miden_node_utils::clap::duration_to_human_readable_string; +use miden_node_utils::logging::OpenTelemetry; use tokio::net::TcpListener; use tonic::metadata::AsciiMetadataValue; use url::Url; -const ENV_ENABLE_OTEL: &str = "MIDEN_NODE_ENABLE_OTEL"; const ENV_DATA_DIRECTORY: &str = "MIDEN_NODE_DATA_DIRECTORY"; const ENV_LISTEN: &str = "MIDEN_NODE_NTX_BUILDER_LISTEN"; const ENV_RPC_URL: &str = "MIDEN_NODE_NTX_BUILDER_RPC_URL"; @@ -103,13 +103,6 @@ pub enum NtxBuilderCommand { /// Directory for the ntx-builder's persistent database. #[arg(long = "data-directory", env = ENV_DATA_DIRECTORY, value_name = "DIR")] data_directory: PathBuf, - - /// Enables the exporting of traces for OpenTelemetry. - /// - /// This can be further configured using environment variables as defined in the official - /// OpenTelemetry documentation. See our operator manual for further details. - #[arg(long = "enable-otel", default_value_t = false, env = ENV_ENABLE_OTEL, value_name = "BOOL")] - enable_otel: bool, }, /// Bootstraps the ntx-builder database with the genesis block fetched from the node RPC. @@ -164,7 +157,6 @@ impl NtxBuilderCommand { max_tx_cycles, sqlite_connection_pool_size, data_directory, - enable_otel: _, } = self else { unreachable!("start is only called for the Start variant") @@ -197,11 +189,11 @@ impl NtxBuilderCommand { .context("failed while running ntx builder component") } - pub fn is_open_telemetry_enabled(&self) -> bool { + pub fn open_telemetry(&self) -> OpenTelemetry { match self { - Self::Start { enable_otel, .. } => *enable_otel, + Self::Start { .. } => OpenTelemetry::from_env().with_name("ntx-builder"), // Bootstrap is a one-shot command and does not set up a tracing pipeline. - Self::Bootstrap { .. } => false, + Self::Bootstrap { .. } => OpenTelemetry::Disabled, } } } diff --git a/bin/ntx-builder/src/main.rs b/bin/ntx-builder/src/main.rs index c5b0df422..d4136460a 100644 --- a/bin/ntx-builder/src/main.rs +++ b/bin/ntx-builder/src/main.rs @@ -1,19 +1,11 @@ use clap::Parser; -use miden_node_utils::logging::OpenTelemetry; - mod commands; #[tokio::main] async fn main() -> anyhow::Result<()> { let command = commands::NtxBuilderCommand::parse(); - let otel = if command.is_open_telemetry_enabled() { - OpenTelemetry::enabled().with_name("ntx-builder") - } else { - OpenTelemetry::Disabled - }; - - let _otel_guard = miden_node_utils::logging::setup_tracing(otel)?; + let _otel_guard = miden_node_utils::logging::setup_tracing(command.open_telemetry())?; command.handle().await } diff --git a/bin/remote-prover/src/main.rs b/bin/remote-prover/src/main.rs index 619cf314c..a040c8f65 100644 --- a/bin/remote-prover/src/main.rs +++ b/bin/remote-prover/src/main.rs @@ -9,7 +9,7 @@ const COMPONENT: &str = "miden-prover"; #[tokio::main] async fn main() -> anyhow::Result<()> { - let _otel_guard = setup_tracing(OpenTelemetry::enabled().with_name("remote-prover"))?; + let _otel_guard = setup_tracing(OpenTelemetry::from_env().with_name("remote-prover"))?; info!(target: COMPONENT, "Tracing initialized"); let (handle, _port) = diff --git a/bin/validator/src/commands/mod.rs b/bin/validator/src/commands/mod.rs index 96fda9abf..506403860 100644 --- a/bin/validator/src/commands/mod.rs +++ b/bin/validator/src/commands/mod.rs @@ -6,6 +6,7 @@ use std::path::PathBuf; use clap::Parser; use miden_node_utils::clap::GrpcOptionsInternal; +use miden_node_utils::logging::OpenTelemetry; use miden_protocol::crypto::dsa::ecdsa_k256_keccak::SigningKey; use miden_protocol::utils::serde::Deserializable; use miden_validator::ValidatorSigner; @@ -14,7 +15,6 @@ const ENV_DATA_DIRECTORY: &str = "MIDEN_NODE_DATA_DIRECTORY"; const ENV_LISTEN: &str = "MIDEN_NODE_VALIDATOR_LISTEN"; const ENV_KEY: &str = "MIDEN_NODE_VALIDATOR_KEY"; const ENV_KMS_KEY_ID: &str = "MIDEN_NODE_VALIDATOR_KMS_KEY_ID"; -const ENV_ENABLE_OTEL: &str = "MIDEN_NODE_ENABLE_OTEL"; const ENV_GENESIS_CONFIG_FILE: &str = "MIDEN_NODE_VALIDATOR_GENESIS_CONFIG_FILE"; const ENV_SQLITE_CONNECTION_POOL_SIZE: &str = "MIDEN_NODE_VALIDATOR_SQLITE_CONNECTION_POOL_SIZE"; @@ -65,13 +65,6 @@ pub enum ValidatorCommand { #[arg(long = "listen", env = ENV_LISTEN, value_name = "LISTEN")] listen: std::net::SocketAddr, - /// Enables the exporting of traces for OpenTelemetry. - /// - /// This can be further configured using environment variables as defined in the official - /// OpenTelemetry documentation. See our operator manual for further details. - #[arg(long = "enable-otel", default_value_t = false, env = ENV_ENABLE_OTEL, value_name = "BOOL")] - enable_otel: bool, - #[command(flatten)] grpc_options: GrpcOptionsInternal, @@ -173,10 +166,10 @@ impl ValidatorCommand { } } - pub fn is_open_telemetry_enabled(&self) -> bool { + pub fn open_telemetry(&self) -> OpenTelemetry { match self { - Self::Start { enable_otel, .. } => *enable_otel, - Self::Bootstrap { .. } => false, + Self::Start { .. } => OpenTelemetry::from_env().with_name("validator"), + Self::Bootstrap { .. } => OpenTelemetry::Disabled, } } } diff --git a/bin/validator/src/main.rs b/bin/validator/src/main.rs index c19cabca3..b6a04d5a3 100644 --- a/bin/validator/src/main.rs +++ b/bin/validator/src/main.rs @@ -1,6 +1,4 @@ use clap::Parser; -use miden_node_utils::logging::OpenTelemetry; - mod commands; // MAIN @@ -10,13 +8,7 @@ mod commands; async fn main() -> anyhow::Result<()> { let command = commands::ValidatorCommand::parse(); - let otel = if command.is_open_telemetry_enabled() { - OpenTelemetry::enabled().with_name("validator") - } else { - OpenTelemetry::Disabled - }; - - let _otel_guard = miden_node_utils::logging::setup_tracing(otel)?; + let _otel_guard = miden_node_utils::logging::setup_tracing(command.open_telemetry())?; command.handle().await } diff --git a/compose/monitor.yml b/compose/monitor.yml index 60efac733..70849a83d 100644 --- a/compose/monitor.yml +++ b/compose/monitor.yml @@ -10,7 +10,6 @@ services: - MIDEN_MONITOR_PORT=3001 - MIDEN_MONITOR_NETWORK_NAME=Localhost - MIDEN_MONITOR_DISABLE_NTX_SERVICE=true - - MIDEN_MONITOR_ENABLE_OTEL=true - OTEL_EXPORTER_OTLP_ENDPOINT=http://tempo:4317 ports: - "3001:3001" diff --git a/compose/telemetry.yml b/compose/telemetry.yml index 4c7f3b7a5..4e6f9a24e 100644 --- a/compose/telemetry.yml +++ b/compose/telemetry.yml @@ -25,16 +25,13 @@ services: sequencer: environment: - - MIDEN_NODE_ENABLE_OTEL=true - OTEL_EXPORTER_OTLP_ENDPOINT=http://tempo:4317 - OTEL_RESOURCE_ATTRIBUTES=service.instance.id=sequencer validator: environment: - - MIDEN_NODE_ENABLE_OTEL=true - OTEL_EXPORTER_OTLP_ENDPOINT=http://tempo:4317 ntx-builder: environment: - - MIDEN_NODE_ENABLE_OTEL=true - OTEL_EXPORTER_OTLP_ENDPOINT=http://tempo:4317 diff --git a/crates/utils/src/logging.rs b/crates/utils/src/logging.rs index 83c646c39..6cf288327 100644 --- a/crates/utils/src/logging.rs +++ b/crates/utils/src/logging.rs @@ -53,6 +53,14 @@ impl OpenTelemetry { OpenTelemetry::Enabled(ResourceConfig::default()) } + pub fn from_env() -> Self { + if otlp_endpoint_configured() { + OpenTelemetry::enabled() + } else { + OpenTelemetry::Disabled + } + } + #[must_use] pub fn with_name(self, service_name: &'static str) -> Self { match self { @@ -232,6 +240,12 @@ fn otel_service_name_override() -> Option { .map(Value::from) } +fn otlp_endpoint_configured() -> bool { + ["OTEL_EXPORTER_OTLP_TRACES_ENDPOINT", "OTEL_EXPORTER_OTLP_ENDPOINT"] + .into_iter() + .any(|key| std::env::var(key).is_ok_and(|value| !value.trim().is_empty())) +} + /// Initializes tracing to a test exporter. /// /// Allows trace content to be inspected via the returned receiver. From f9291e8dbd9488e56f37b67d849b7f8dd05ee9d5 Mon Sep 17 00:00:00 2001 From: Mirko von Leipzig <48352201+Mirko-von-Leipzig@users.noreply.github.com> Date: Fri, 29 May 2026 14:39:01 +0200 Subject: [PATCH 15/16] Also do prover --- bin/remote-prover/src/main.rs | 8 ++++---- bin/remote-prover/src/server/mod.rs | 7 +++++++ bin/remote-prover/src/server/proof_kind.rs | 16 +++++++++++----- 3 files changed, 22 insertions(+), 9 deletions(-) diff --git a/bin/remote-prover/src/main.rs b/bin/remote-prover/src/main.rs index a040c8f65..b58ff2dd5 100644 --- a/bin/remote-prover/src/main.rs +++ b/bin/remote-prover/src/main.rs @@ -1,6 +1,5 @@ use anyhow::Context; use clap::Parser; -use miden_node_utils::logging::{OpenTelemetry, setup_tracing}; use tracing::info; mod server; @@ -9,11 +8,12 @@ const COMPONENT: &str = "miden-prover"; #[tokio::main] async fn main() -> anyhow::Result<()> { - let _otel_guard = setup_tracing(OpenTelemetry::from_env().with_name("remote-prover"))?; + let server = server::Server::parse(); + + let _otel_guard = miden_node_utils::logging::setup_tracing(server.open_telemetry())?; info!(target: COMPONENT, "Tracing initialized"); - let (handle, _port) = - server::Server::parse().spawn().await.context("failed to spawn server")?; + let (handle, _port) = server.spawn().await.context("failed to spawn server")?; handle.await.context("proof server panicked").flatten() } diff --git a/bin/remote-prover/src/server/mod.rs b/bin/remote-prover/src/server/mod.rs index 087820a19..8650e0c78 100644 --- a/bin/remote-prover/src/server/mod.rs +++ b/bin/remote-prover/src/server/mod.rs @@ -4,6 +4,7 @@ use anyhow::Context; use miden_node_proto::generated::remote_prover::api_server::ApiServer; use miden_node_proto::generated::remote_prover::worker_status_api_server::WorkerStatusApiServer; use miden_node_utils::cors::cors_for_grpc_web_layer; +use miden_node_utils::logging::OpenTelemetry; use miden_node_utils::panic::catch_panic_layer_fn; use miden_node_utils::tracing::grpc::grpc_trace_fn; use proof_kind::ProofKind; @@ -47,6 +48,12 @@ pub struct Server { } impl Server { + pub fn open_telemetry(&self) -> OpenTelemetry { + OpenTelemetry::from_env() + .with_name("remote-prover") + .with_attribute("miden.prover.kind", self.kind.as_str()) + } + /// Spawns the prover server, returning its handle and the port it is listening on. pub async fn spawn(&self) -> anyhow::Result<(JoinHandle>, u16)> { let listener = TcpListener::bind(format!("0.0.0.0:{}", self.port)) diff --git a/bin/remote-prover/src/server/proof_kind.rs b/bin/remote-prover/src/server/proof_kind.rs index 9971dda15..596783542 100644 --- a/bin/remote-prover/src/server/proof_kind.rs +++ b/bin/remote-prover/src/server/proof_kind.rs @@ -8,6 +8,16 @@ pub enum ProofKind { Block, } +impl ProofKind { + pub const fn as_str(self) -> &'static str { + match self { + ProofKind::Transaction => "transaction", + ProofKind::Batch => "batch", + ProofKind::Block => "block", + } + } +} + impl From for ProofKind { fn from(value: proto::ProofType) -> Self { match value { @@ -20,11 +30,7 @@ impl From for ProofKind { impl std::fmt::Display for ProofKind { fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { - match self { - ProofKind::Transaction => write!(f, "transaction"), - ProofKind::Batch => write!(f, "batch"), - ProofKind::Block => write!(f, "block"), - } + f.write_str(self.as_str()) } } From 97b742d69ae36ce8bf13b47d857ee00686bd9178 Mon Sep 17 00:00:00 2001 From: Mirko von Leipzig <48352201+Mirko-von-Leipzig@users.noreply.github.com> Date: Sat, 30 May 2026 09:38:16 +0200 Subject: [PATCH 16/16] Ensure ntx data directory is created. --- bin/ntx-builder/src/commands/mod.rs | 2 ++ 1 file changed, 2 insertions(+) diff --git a/bin/ntx-builder/src/commands/mod.rs b/bin/ntx-builder/src/commands/mod.rs index 9261a4b43..fc32efeeb 100644 --- a/bin/ntx-builder/src/commands/mod.rs +++ b/bin/ntx-builder/src/commands/mod.rs @@ -6,6 +6,7 @@ use std::time::Duration; use anyhow::Context; use clap::Parser; use miden_node_utils::clap::duration_to_human_readable_string; +use miden_node_utils::fs::ensure_empty_directory; use miden_node_utils::logging::OpenTelemetry; use miden_protocol::block::SignedBlock; use miden_protocol::utils::serde::Deserializable; @@ -143,6 +144,7 @@ impl NtxBuilderCommand { match self { Self::Start { .. } => self.start().await, Self::Bootstrap { data_directory, genesis_block } => { + ensure_empty_directory(&data_directory)?; let database_filepath = data_directory.join("ntx-builder.sqlite3"); let genesis = read_genesis_block(&genesis_block)?; miden_ntx_builder::bootstrap(database_filepath, &genesis)