diff --git a/benches/bimodal_ke.rs b/benches/bimodal_ke.rs index b6f12d869..97d805c91 100644 --- a/benches/bimodal_ke.rs +++ b/benches/bimodal_ke.rs @@ -111,4 +111,3 @@ criterion_group! { targets = benchmark_bimodal_ke_npag, benchmark_bimodal_ke_npod, benchmark_bimodal_ke_postprob } criterion_main!(benches); - diff --git a/examples/bestdose_auc.rs b/examples/bestdose_auc.rs index 0bcacc0e4..36e6ddf4e 100644 --- a/examples/bestdose_auc.rs +++ b/examples/bestdose_auc.rs @@ -43,10 +43,8 @@ fn main() -> Result<()> { // Load realistic prior from previous NPAG run (47 support points) println!("Loading prior from bimodal_ke example..."); - let (theta, prior) = Theta::from_file( - "examples/bimodal_ke/output/theta.csv", - ¶meter_space, - )?; + let (theta, prior) = + Theta::from_file("examples/bimodal_ke/output/theta.csv", ¶meter_space)?; let weights = prior.as_ref().unwrap(); println!("Prior: {} support points\n", theta.matrix().nrows()); diff --git a/examples/bestdose_bounds.rs b/examples/bestdose_bounds.rs index 286251618..396451257 100644 --- a/examples/bestdose_bounds.rs +++ b/examples/bestdose_bounds.rs @@ -40,10 +40,8 @@ fn main() -> Result<()> { // Load realistic prior from previous NPAG run println!("Loading prior from bimodal_ke example..."); - let (theta, prior) = Theta::from_file( - "examples/bimodal_ke/output/theta.csv", - ¶meter_space, - )?; + let (theta, prior) = + Theta::from_file("examples/bimodal_ke/output/theta.csv", ¶meter_space)?; let weights = prior.as_ref().unwrap(); println!("Prior: {} support points\n", theta.matrix().nrows()); diff --git a/examples/bestdose_cov.rs b/examples/bestdose_cov.rs index 1cb6ac3e2..41997ae8e 100644 --- a/examples/bestdose_cov.rs +++ b/examples/bestdose_cov.rs @@ -90,10 +90,8 @@ fn main() -> Result<()> { .observation(18.0, conc(6.0, 75.0) + conc(18.0, 150.0), 0) .build(); - let (mut theta, prior) = Theta::from_file( - "examples/bimodal_ke/output/theta.csv", - ¶meter_space, - )?; + let (mut theta, prior) = + Theta::from_file("examples/bimodal_ke/output/theta.csv", ¶meter_space)?; let m_t = theta.matrix_mut(); for i in 0..m_t.nrows() { diff --git a/examples/bimodal_ke_backend_compare.rs b/examples/bimodal_ke_backend_compare.rs index 5aa08b8bc..81cf95807 100644 --- a/examples/bimodal_ke_backend_compare.rs +++ b/examples/bimodal_ke_backend_compare.rs @@ -313,4 +313,3 @@ fn print_summary(results: &[ComparisonResult]) -> Result<()> { Ok(()) } - diff --git a/examples/bootstrap_model.rs b/examples/bootstrap_model.rs new file mode 100644 index 000000000..bdd58b957 --- /dev/null +++ b/examples/bootstrap_model.rs @@ -0,0 +1,104 @@ +//! Model complexity bootstrapping: expand a 1-compartment posterior into a +//! prior for a 2-compartment model using `bootstrap_theta()`. +//! +//! Stage 1 fits (ke, v) with NPAG. Stage 2 expands the posterior into a +//! prior over (ke, v, kcp, kpc) and fits the 2-compartment model. +//! +//! Uses inline synthetic data to keep the example self-contained. + +use pharmsol::SubjectBuilderExt; +use pmcore::prelude::*; + +fn main() -> Result<()> { + // ── Synthetic data ── + let subject = Subject::builder("1") + .bolus(0.0, 100.0, 1) + .observation(1.0, 8.0, 1) + .observation(2.0, 5.0, 1) + .observation(4.0, 2.0, 1) + .build(); + let data = Data::new(vec![subject]); + + // ── Stage 1: 1-compartment ODE ── + + let ode_1cmt = ode! { + name: "one_cmt_bootstrap", + params: [ke, v], + states: [central], + outputs: [outeq_1], + routes: [bolus(input_1) -> central], + diffeq: |x, p, _t, dx, _cov| { + fetch_params!(p, ke); + dx[0] = -ke * x[0]; + }, + out: |x, p, _t, _cov, y| { + fetch_params!(p, v); + y[0] = x[0] / v; + }, + }; + + let params_1cmt = ParameterSpace::bounded() + .add("ke", 0.001, 3.0) + .add("v", 5.0, 50.0); + let prior = Theta::sobol(¶ms_1cmt, 100)?; + let error_models = AssayErrorModels::new().add( + "outeq_1", + AssayErrorModel::additive(ErrorPoly::new(0.0, 0.5, 0.0, 0.0), 0.0), + )?; + + println!("Stage 1: Fitting 1-cmt model..."); + let r1 = EstimationProblem::nonparametric(ode_1cmt, data.clone(), prior, error_models.clone())? + .fit_with(NonParametricAlgorithm::npag())?; + + println!( + " {} support points, OBJF = {:.2}", + r1.get_theta().nspp(), + r1.objf(), + ); + + // ── Stage 2: bootstrap into 2-compartment prior ── + + let ode_2cmt = ode! { + name: "two_cmt_bootstrap", + params: [ke, v, kcp, kpc], + states: [central, peripheral], + outputs: [outeq_1], + routes: [bolus(input_1) -> central], + diffeq: |x, p, _t, dx, _cov| { + fetch_params!(p, ke, _v, kcp, kpc); + dx[0] = -ke * x[0] - kcp * x[0] + kpc * x[1]; + dx[1] = -kpc * x[1] + kcp * x[0]; + }, + out: |x, p, _t, _cov, y| { + fetch_params!(p, _ke, v); + y[0] = x[0] / v; + }, + }; + + let params_2cmt = ParameterSpace::bounded() + .add("ke", 0.001, 3.0) + .add("v", 5.0, 50.0) + .add("kcp", 0.01, 5.0) + .add("kpc", 0.01, 5.0); + + // Expand posterior: common params keep their values, new ones are Sobol-sampled + let prior_2cmt = bootstrap_theta(&r1, &ode_2cmt, ¶ms_2cmt, 500)?; + + println!( + "Prior expanded: {} support points over {:?}", + prior_2cmt.nspp(), + prior_2cmt.parameters().names(), + ); + + println!("Stage 2: Fitting 2-cmt model..."); + let r2 = EstimationProblem::nonparametric(ode_2cmt, data, prior_2cmt, error_models)? + .fit_with(NonParametricAlgorithm::npag())?; + + println!( + " {} support points, OBJF = {:.2}", + r2.get_theta().nspp(), + r2.objf(), + ); + + Ok(()) +} diff --git a/examples/chain_algorithms.rs b/examples/chain_algorithms.rs new file mode 100644 index 000000000..84665e529 --- /dev/null +++ b/examples/chain_algorithms.rs @@ -0,0 +1,65 @@ +//! Algorithm chaining: NPAG(10) → NPOD(5) on a simple 1-compartment IV bolus model. +//! +//! Demonstrates `NonParametricResult::chain()`. +//! Uses inline synthetic data to keep the example self-contained. + +use pharmsol::SubjectBuilderExt; +use pmcore::prelude::*; + +fn main() -> Result<()> { + let eq = ode! { + name: "chain_example", + params: [ke, v], + states: [central], + outputs: [outeq_1], + routes: [bolus(input_1) -> central], + diffeq: |x, p, _t, dx, _cov| { + fetch_params!(p, ke); + dx[0] = -ke * x[0]; + }, + out: |x, p, _t, _cov, y| { + fetch_params!(p, v); + y[0] = x[0] / v; + }, + }; + + // Small synthetic dataset: 2 subjects, 3 obs each + let subject1 = Subject::builder("1") + .bolus(0.0, 100.0, 1) + .observation(1.0, 8.0, 1) + .observation(2.0, 5.0, 1) + .observation(4.0, 2.0, 1) + .build(); + + let subject2 = Subject::builder("2") + .bolus(0.0, 80.0, 1) + .observation(1.0, 6.0, 1) + .observation(2.0, 4.0, 1) + .observation(4.0, 1.5, 1) + .build(); + + let data = Data::new(vec![subject1, subject2]); + + let parameters = ParameterSpace::bounded() + .add("ke", 0.001, 3.0) + .add("v", 5.0, 50.0); + let prior = Theta::sobol(¶meters, 100)?; + let error_models = AssayErrorModels::new().add( + "outeq_1", + AssayErrorModel::additive(ErrorPoly::new(0.0, 0.5, 0.0, 0.0), 0.0), + )?; + + // Chain: NPAG runs first, then NPOD continues from its result + let result = EstimationProblem::nonparametric(eq, data, prior, error_models)? + .fit_with(NpagConfig::new().max_cycles(10))? + .chain(NpodConfig::new().max_cycles(5))?; + + println!( + "Chained NPAG→NPOD: OBJF = {:.2}, {} support points, {} total cycles", + result.objf(), + result.get_theta().nspp(), + result.cycles(), + ); + + Ok(()) +} diff --git a/examples/drusano/main.rs b/examples/drusano/main.rs index 240b69656..fa601d0a9 100644 --- a/examples/drusano/main.rs +++ b/examples/drusano/main.rs @@ -111,4 +111,3 @@ fn main() -> Result<()> { .fit_with(NonParametricAlgorithm::npag())?; Ok(()) } - diff --git a/examples/iov/main.rs b/examples/iov/main.rs index 7e0f5ba4c..5412c3fa6 100644 --- a/examples/iov/main.rs +++ b/examples/iov/main.rs @@ -39,4 +39,3 @@ fn main() -> Result<()> { Ok(()) } - diff --git a/examples/iov_analysis.rs b/examples/iov_analysis.rs new file mode 100644 index 000000000..353f168ea --- /dev/null +++ b/examples/iov_analysis.rs @@ -0,0 +1,116 @@ +//! SDE Inter-Occasion Variability (IOV) analysis. +//! +//! Stage 1: NPAG fit with ODE to find support points. +//! Stage 2: Optimize SDE diffusion parameter (ske) per support point. +//! +//! Uses small synthetic data and low particle count (50) to keep the example fast. + +use pharmsol::SubjectBuilderExt; +use pmcore::iov::DiffusionConfig; +use pmcore::prelude::*; + +fn main() -> Result<()> { + // ── Synthetic data ── + let subject = Subject::builder("1") + .bolus(0.0, 100.0, 1) + .observation(1.0, 10.0, 1) + .observation(2.0, 6.0, 1) + .observation(4.0, 2.5, 1) + .build(); + let data = Data::new(vec![subject]); + + // ── Stage 1: ODE fit ── + + let ode = ode! { + name: "iov_stage1_ode", + params: [ke, v], + states: [central], + outputs: [outeq_1], + routes: [bolus(input_1) -> central], + diffeq: |x, p, _t, dx, _cov| { + fetch_params!(p, ke); + dx[0] = -ke * x[0]; + }, + out: |x, p, _t, _cov, y| { + fetch_params!(p, v); + y[0] = x[0] / v; + }, + }; + + let parameters = ParameterSpace::bounded() + .add("ke", 0.001, 2.0) + .add("v", 5.0, 50.0); + let prior = Theta::sobol(¶meters, 50)?; + let error_models = AssayErrorModels::new().add( + "outeq_1", + AssayErrorModel::additive(ErrorPoly::new(0.0, 0.5, 0.0, 0.0), 0.0), + )?; + + println!("Stage 1: Fitting ODE with NPAG..."); + let r_ode = EstimationProblem::nonparametric(ode, data.clone(), prior, error_models.clone())? + .fit_with(NonParametricAlgorithm::npag())?; + + let n_spp = r_ode.get_theta().nspp(); + println!(" {} support points, OBJF = {:.2}", n_spp, r_ode.objf()); + + // ── Stage 2: SDE IOV ── + + let sde = sde! { + name: "iov_stage2_sde", + params: [ke, v, ske], + states: [central, ke0], + outputs: [outeq_1], + particles: 50, + routes: [bolus(input_1) -> central], + drift: |x, p, _t, dx, _cov| { + fetch_params!(p, ke); + dx[ke0] = -x[ke0] + ke; + dx[central] = -x[ke0] * x[central]; + }, + diffusion: |p, sigma| { + fetch_params!(p, _ke, _v, ske); + sigma[ke0] = ske; + }, + init: |p, _t, _cov, x| { + fetch_params!(p, ke); + x[ke0] = ke; + }, + out: |x, p, _t, _cov, y| { + fetch_params!(p, _ke, v); + y[0] = x[central] / v; + }, + }; + + let mut joint = r_ode + .get_theta() + .with_added_parameter("ske", 1e-6, 1.0, 0.01)?; + + println!( + "Stage 2: Optimizing sigma for {} support points...", + joint.nspp() + ); + + let diff = sde.optimize_diffusion( + r_ode.data(), + &mut joint, + &["ske".to_string()], + r_ode.error_models(), + DiffusionConfig { + max_iter: 10, + ..DiffusionConfig::default() + }, + )?; + + let n_converged = diff.per_point_converged.iter().filter(|&&c| c).count(); + let mean_ll: f64 = + diff.per_point_likelihood.iter().sum::() / diff.per_point_likelihood.len() as f64; + + println!( + " Converged: {}/{} points, mean log-likelihood: {:.4}", + n_converged, + diff.per_point_converged.len(), + mean_ll, + ); + + Ok(()) +} diff --git a/examples/meta/main.rs b/examples/meta/main.rs index ea89bdfd4..01d7edb64 100644 --- a/examples/meta/main.rs +++ b/examples/meta/main.rs @@ -55,4 +55,3 @@ fn main() -> Result<()> { Ok(()) } - diff --git a/examples/neely/main.rs b/examples/neely/main.rs index 922547721..a2293828d 100644 --- a/examples/neely/main.rs +++ b/examples/neely/main.rs @@ -77,4 +77,3 @@ fn main() -> Result<()> { Ok(()) } - diff --git a/examples/new_iov/main.rs b/examples/new_iov/main.rs index 9d1ea6eb6..22ee5413c 100644 --- a/examples/new_iov/main.rs +++ b/examples/new_iov/main.rs @@ -39,4 +39,3 @@ fn main() -> Result<()> { Ok(()) } - diff --git a/examples/theophylline/main.rs b/examples/theophylline/main.rs index 1d48aaa97..e2ef20ce9 100644 --- a/examples/theophylline/main.rs +++ b/examples/theophylline/main.rs @@ -30,4 +30,3 @@ fn main() -> Result<()> { Ok(()) } - diff --git a/examples/two_eq_lag/main.rs b/examples/two_eq_lag/main.rs index c6185517d..0afa96c9d 100644 --- a/examples/two_eq_lag/main.rs +++ b/examples/two_eq_lag/main.rs @@ -37,4 +37,3 @@ fn main() -> Result<()> { Ok(()) } - diff --git a/examples/vanco_sde/main.rs b/examples/vanco_sde/main.rs index 36e77449a..febc0b326 100644 --- a/examples/vanco_sde/main.rs +++ b/examples/vanco_sde/main.rs @@ -65,4 +65,3 @@ fn main() -> Result<()> { Ok(()) } - diff --git a/src/algorithms/nonparametric/npod.rs b/src/algorithms/nonparametric/npod.rs index 634c673f8..f65993563 100644 --- a/src/algorithms/nonparametric/npod.rs +++ b/src/algorithms/nonparametric/npod.rs @@ -85,7 +85,7 @@ impl NPOD { equation, psi: Psi::new(), prior: theta.clone(), - theta: theta, + theta, lambda: Weights::default(), w: Weights::default(), last_objf: -1e30, diff --git a/src/bestdose/types.rs b/src/bestdose/types.rs index 5c08de9c5..5b614ced5 100644 --- a/src/bestdose/types.rs +++ b/src/bestdose/types.rs @@ -194,7 +194,10 @@ pub struct BestDoseConfig { } impl BestDoseConfig { - pub fn new(parameter_space: ParameterSpace, error_models: AssayErrorModels) -> Self { + pub fn new( + parameter_space: ParameterSpace, + error_models: AssayErrorModels, + ) -> Self { Self { parameter_space, error_models, diff --git a/src/estimation/error_models.rs b/src/estimation/error_models.rs index 4dcc27b5b..a48d06e57 100644 --- a/src/estimation/error_models.rs +++ b/src/estimation/error_models.rs @@ -19,8 +19,8 @@ impl ErrorModels { } } -impl Into for ErrorModels { - fn into(self) -> AssayErrorModels { - self.models().clone() +impl From for AssayErrorModels { + fn from(val: ErrorModels) -> Self { + val.models().clone() } } diff --git a/src/estimation/mod.rs b/src/estimation/mod.rs index 2bf9f0104..296c55e7d 100644 --- a/src/estimation/mod.rs +++ b/src/estimation/mod.rs @@ -8,7 +8,5 @@ pub use crate::algorithms::nonparametric::{ }; pub use crate::algorithms::parametric::{ParametricAlgorithm, SaemConfig}; pub use error_models::ErrorModels; -pub use problem::{ - EstimationProblem, Framework, NonParametric, Parametric, ParametricBuilder, -}; +pub use problem::{EstimationProblem, Framework, NonParametric, Parametric, ParametricBuilder}; pub use progress::{FitProgress, NonparametricCycleProgress}; diff --git a/src/estimation/nonparametric/bootstrap.rs b/src/estimation/nonparametric/bootstrap.rs new file mode 100644 index 000000000..0f725cbd6 --- /dev/null +++ b/src/estimation/nonparametric/bootstrap.rs @@ -0,0 +1,596 @@ +//! Construct a prior [`Theta`] by expanding the posterior of a previous fit. +//! +//! This module provides [`bootstrap_theta`], which takes a non-parametric +//! result's posterior distribution and expands it into a prior for a target +//! model — typically a model with more parameters (e.g. 1-compartment → +//! 2-compartment). +//! +//! # How it works +//! +//! 1. Validates the target parameter space against the target equation's metadata. +//! 2. Matches parameters between source and target by name. +//! 3. Allocates Sobol samples per source support point, weighted by population +//! probability. +//! 4. Builds a product distribution: source values for common parameters, +//! Sobol-sampled values for new parameters. + +use std::collections::HashSet; + +use anyhow::bail; +use faer::Mat; + +use super::sampling::DEFAULT_SEED; +use super::{NonParametricResult, Theta, Weights}; +use crate::model::{BoundedParameter, EquationMetadataSource, ParameterSpace}; +use pharmsol::Equation; + +/// Internal: validated parameter mapping between source and target. +struct ParamMapping { + /// For each target column index, the source column index (if common) or None (if new). + source_col: Vec>, + /// Names of parameters in source but not in target (informational). + dropped: Vec, +} + +/// Expand a posterior into a prior for a target model. +/// +/// This is a plain prior constructor — alongside [`Theta::sobol`], +/// [`Theta::latin`], and [`Theta::from_file`]. +/// +/// See the [module-level documentation](self) for details. +pub fn bootstrap_theta( + source: &NonParametricResult, + target_equation: &ETarget, + target_params: &ParameterSpace, + target_points: usize, +) -> anyhow::Result { + // ── Step 0: Validate target_params against the target equation ── + validate_against_equation(target_params, target_equation)?; + + // ── Step 1: Extract source data ── + let source_theta = source.get_theta(); + let source_weights = source.weights(); + let n_source = source_theta.nspp(); + + if n_source == 0 { + bail!("cannot bootstrap from an empty theta (0 support points)"); + } + if target_points == 0 { + bail!("target_points must be > 0"); + } + + // Validate source weights + if source_weights.len() != n_source { + bail!( + "weight count ({}) does not match support point count ({})", + source_weights.len(), + n_source + ); + } + for (i, w) in source_weights.iter().enumerate() { + if !w.is_finite() { + bail!("non-finite weight {} at support point {}", w, i); + } + } + + // ── Step 2: Match parameters by name ── + let mapping = build_param_mapping(source_theta.parameters(), target_params); + + let n_target = target_params.len(); + + // ── Step 3: Allocate samples per source point (weighted) ── + let allocations = allocate_weighted(source_weights, n_source, target_points); + + // ── Step 4: Build target theta matrix ── + let total_allocated: usize = allocations.iter().sum(); + let mut target_matrix = Mat::zeros(total_allocated, n_target); + + let mut row = 0usize; + for i in 0..n_source { + let k_i = allocations[i]; + if k_i == 0 { + continue; + } + + // Seed: mix source point index with the DEFAULT_SEED for reproducibility + let point_seed = (DEFAULT_SEED as u32) + .wrapping_mul(31) + .wrapping_add(i as u32); + + for _j in 0..k_i { + for (tcol, scol_opt) in mapping.source_col.iter().enumerate() { + if let Some(scol) = scol_opt { + // Common parameter: copy value from source + target_matrix[(row, tcol)] = source_theta.matrix()[(i, *scol)]; + } else { + // New parameter: Sobol sample + let bp = &target_params.items[tcol]; + let unscaled = sobol_burley::sample(row as u32, tcol as u32, point_seed) as f64; + target_matrix[(row, tcol)] = bp.lower + unscaled * (bp.upper - bp.lower); + } + } + row += 1; + } + } + + // Warn about dropped params + if !mapping.dropped.is_empty() { + tracing::warn!( + "Dropped parameter(s) present in source but absent from target: {:?}", + mapping.dropped + ); + } + + // Warn if any source values fall outside target bounds + for i in 0..n_source { + if allocations[i] == 0 { + continue; + } + for tcol in 0..n_target { + if let Some(scol) = mapping.source_col[tcol] { + let val = source_theta.matrix()[(i, scol)]; + let bp = &target_params.items[tcol]; + if val < bp.lower || val > bp.upper { + tracing::warn!( + "Source value {} for '{}' at point {} falls outside target bounds [{}, {}]", + val, + bp.name, + i, + bp.lower, + bp.upper + ); + } + } + } + } + + // ── Step 5: Return Theta ── + Theta::from_parts(target_matrix, target_params.clone()) +} + +// ── Internal helpers ── + +/// Validate `params` against `equation`'s metadata. +fn validate_against_equation( + params: &ParameterSpace, + equation: &E, +) -> anyhow::Result<()> { + if params.is_empty() { + bail!("at least one parameter is required"); + } + + // Check bounds + for p in params.iter() { + if !p.lower.is_finite() || !p.upper.is_finite() { + bail!( + "invalid bounds for parameter '{}': bounds must be finite numbers", + p.name + ); + } + if p.lower >= p.upper { + bail!( + "invalid bounds for parameter '{}': lower bound ({}) must be strictly less than upper bound ({})", + p.name, p.lower, p.upper + ); + } + } + + // Check duplicates + let mut seen: HashSet<&str> = HashSet::new(); + for p in params.iter() { + if !seen.insert(p.name.as_str()) { + bail!("duplicate parameter declarations found: {}", p.name); + } + } + + // Check against equation metadata + let metadata = equation + .equation_metadata() + .ok_or_else(|| anyhow::anyhow!("target equation has no metadata"))?; + + let declared_names: Vec<&str> = metadata.parameters().iter().map(|p| p.name()).collect(); + + // Unknown names in params + let unknown: Vec<&str> = params + .iter() + .filter(|p| !declared_names.contains(&p.name.as_str())) + .map(|p| p.name.as_str()) + .collect(); + if !unknown.is_empty() { + bail!( + "unknown parameter name(s): {}. Valid parameters are: {}", + unknown.join(", "), + declared_names.join(", ") + ); + } + + // Declared names missing from params + let param_names: HashSet<&str> = params.iter().map(|p| p.name.as_str()).collect(); + let missing: Vec<&str> = declared_names + .iter() + .filter(|name| !param_names.contains(*name)) + .copied() + .collect(); + if !missing.is_empty() { + bail!("missing parameter declaration(s): {}", missing.join(", ")); + } + + Ok(()) +} + +/// Build column mapping: for each target column, which source column (if any). +fn build_param_mapping( + source_params: &ParameterSpace, + target_params: &ParameterSpace, +) -> ParamMapping { + let mut source_col = Vec::with_capacity(target_params.len()); + let mut dropped = Vec::new(); + + for tp in target_params.iter() { + if let Some(scol) = source_params + .iter() + .position(|sp| sp.name.as_str() == tp.name.as_str()) + { + source_col.push(Some(scol)); + } else { + source_col.push(None); + } + } + + // Find source params not in target + for sp in source_params.iter() { + if !target_params + .iter() + .any(|tp| tp.name.as_str() == sp.name.as_str()) + { + dropped.push(sp.name.clone()); + } + } + + ParamMapping { + source_col, + dropped, + } +} + +/// Weighted allocation: `k_i = round(target_points × w[i] / sum_w)`, +/// with integer adjustment so `sum(k_i) == target_points`. +/// +/// Points with zero or negative weight get `k_i = 0`. +fn allocate_weighted(weights: &Weights, n_source: usize, target_points: usize) -> Vec { + let sum_w: f64 = weights.iter().sum(); + + if sum_w <= 0.0 { + // All weights are zero or negative — fall back to uniform + let base = target_points / n_source; + let rem = target_points % n_source; + let mut alloc = vec![base; n_source]; + for item in alloc.iter_mut().take(rem) { + *item += 1; + } + return alloc; + } + + // First pass: compute raw allocations + let mut alloc: Vec = (0..n_source) + .map(|i| { + let w = weights[i]; + if w <= 0.0 { + return 0; + } + let k = (target_points as f64 * w / sum_w).round() as usize; + k.max(1) // ensure at least 1 per non-zero-weight point + }) + .collect(); + + let total: usize = alloc.iter().sum(); + + // Adjust to hit target_points exactly + if total == target_points { + return alloc; + } + + // If we over-allocated, reduce from highest-weighted points + if total > target_points { + let mut excess = total - target_points; + let mut indices: Vec = (0..n_source).collect(); + indices.sort_by(|&a, &b| { + weights[b] + .partial_cmp(&weights[a]) + .unwrap_or(std::cmp::Ordering::Equal) + }); + for &i in &indices { + if alloc[i] > 0 { + let reduce = excess.min(alloc[i]); + alloc[i] -= reduce; + excess -= reduce; + if excess == 0 { + break; + } + } + } + // If still excess, reduce any point (down to 0) + for item in alloc.iter_mut() { + if excess == 0 { + break; + } + if *item > 0 { + let reduce = excess.min(*item); + *item -= reduce; + excess -= reduce; + } + } + } else { + // Under-allocated: add to highest-weighted points + let mut deficit = target_points - total; + let mut indices: Vec = (0..n_source).collect(); + indices.sort_by(|&a, &b| { + weights[b] + .partial_cmp(&weights[a]) + .unwrap_or(std::cmp::Ordering::Equal) + }); + for &i in &indices { + if deficit == 0 { + break; + } + alloc[i] += 1; + deficit -= 1; + } + } + + alloc +} + +#[cfg(test)] +mod tests { + use super::*; + use crate::model::ParameterSpace; + use pharmsol::prelude::data::AssayErrorModels; + use pharmsol::SubjectBuilderExt; + + // ── Helpers ── + + /// Build a trivial ODE that declares the given parameter names. + /// Used as a target equation for metadata validation in bootstrap tests. + fn target_ode_for(param_names: &[&str]) -> pharmsol::ODE { + pharmsol::equation::ODE::new( + |_x, _p, _t, _dx, _b, _rateiv, _cov| {}, + |_p, _t, _cov| pharmsol::lag! {}, + |_p, _t, _cov| pharmsol::fa! {}, + |_p, _t, _cov, _x| {}, + |_x, _p, _t, _cov, y| { + y[0] = 1.0; + }, + ) + .with_nstates(1) + .with_ndrugs(0) + .with_nout(1) + .with_metadata( + pharmsol::equation::metadata::new("bootstrap_test") + .parameters(param_names.iter().copied()) + .states(["central"]) + .outputs(["cp"]), + ) + .expect("metadata should validate") + } + + /// Build a minimal NonParametricResult for testing. + /// The equation's metadata is derived from theta's parameter names. + fn make_test_result(theta: Theta, weights: Weights) -> NonParametricResult { + use crate::algorithms::{Status, StopReason}; + use crate::estimation::nonparametric::Psi; + + let param_names_list = theta.parameters().names(); + let param_names: Vec<&str> = param_names_list.iter().map(|s| s.as_str()).collect(); + let ode = target_ode_for(¶m_names); + + let data = pharmsol::Data::from( + pharmsol::Subject::builder("test") + .observation(0.0, 1.0, "cp") + .build(), + ); + + let err = AssayErrorModels::new() + .add( + "cp", + pharmsol::AssayErrorModel::additive( + pharmsol::ErrorPoly::new(0.0, 1.0, 0.0, 0.0), + 0.0, + ), + ) + .unwrap(); + + let cycle_log = crate::estimation::nonparametric::CycleLog::new(); + let psi = Psi::from(faer::Mat::from_fn(1, weights.len(), |_r, c| weights[c])); + + NonParametricResult::new( + ode, + data, + err, + theta.clone(), + theta, + psi, + weights, + -10.0, + 1, + Status::Stop(StopReason::Converged), + cycle_log, + ) + .unwrap() + } + + // ── Tests ── + + #[test] + fn identity_bootstrap_same_params_same_count() { + let params = ParameterSpace::bounded() + .add("ke", 0.1, 1.0) + .add("v", 5.0, 50.0); + let theta = Theta::sobol_with_seed(¶ms, 10, 42).unwrap(); + let weights = Weights::uniform(10); + let result = make_test_result(theta.clone(), weights); + + let boot = bootstrap_theta(&result, result.equation(), ¶ms, 10).unwrap(); + + assert_eq!(boot.nspp(), 10); + assert_eq!(boot.parameters().names(), vec!["ke", "v"]); + for r in 0..10 { + for c in 0..2 { + assert_eq!(boot.matrix()[(r, c)], theta.matrix()[(r, c)]); + } + } + } + + #[test] + fn upsample_with_new_params() { + let source_params = ParameterSpace::bounded() + .add("ke", 0.1, 1.0) + .add("v", 5.0, 50.0); + let source_theta = Theta::sobol_with_seed(&source_params, 4, 42).unwrap(); + let weights = Weights::uniform(4); + let result = make_test_result(source_theta, weights); + + let target_params = ParameterSpace::bounded() + .add("ke", 0.1, 1.0) + .add("v", 5.0, 50.0) + .add("kcp", 0.01, 5.0); + + let target_eq = target_ode_for(&["ke", "v", "kcp"]); + let boot = bootstrap_theta(&result, &target_eq, &target_params, 20).unwrap(); + + assert_eq!(boot.nspp(), 20); + assert_eq!(boot.matrix().ncols(), 3); + assert_eq!(boot.parameters().names(), vec!["ke", "v", "kcp"]); + + for r in 0..20 { + let val = boot.matrix()[(r, 2)]; + assert!(val >= 0.01, "kcp value {} below lower bound", val); + assert!(val <= 5.0, "kcp value {} above upper bound", val); + } + } + + #[test] + fn reordering_handled_by_name() { + let source_params = ParameterSpace::bounded() + .add("ke", 0.1, 1.0) + .add("v", 5.0, 50.0); + let source_theta = Theta::sobol_with_seed(&source_params, 3, 42).unwrap(); + let weights = Weights::uniform(3); + let result = make_test_result(source_theta.clone(), weights); + + let target_params = ParameterSpace::bounded() + .add("v", 5.0, 50.0) + .add("ke", 0.1, 1.0); + + let boot = bootstrap_theta(&result, result.equation(), &target_params, 3).unwrap(); + + for r in 0..3 { + for c in 0..2 { + let original_val = source_theta.matrix()[(r, c)]; + let source_name = source_params.items[c].name.as_str(); + let target_col = target_params + .iter() + .position(|p| p.name.as_str() == source_name) + .unwrap(); + assert_eq!(boot.matrix()[(r, target_col)], original_val); + } + } + } + + #[test] + fn downsample_with_weighted_selection() { + let params = ParameterSpace::bounded().add("ke", 0.1, 1.0); + let theta = Theta::sobol_with_seed(¶ms, 10, 42).unwrap(); + let mut w = vec![0.01; 10]; + w[0] = 10.0; + w[1] = 5.0; + let weights = Weights::from_vec(w); + let result = make_test_result(theta, weights); + + let boot = bootstrap_theta(&result, result.equation(), ¶ms, 5).unwrap(); + assert_eq!(boot.nspp(), 5); + } + + #[test] + fn dropped_params_warns() { + let source_params = ParameterSpace::bounded() + .add("ke", 0.1, 1.0) + .add("v", 5.0, 50.0) + .add("cl", 1.0, 100.0); + let source_theta = Theta::sobol_with_seed(&source_params, 3, 42).unwrap(); + let weights = Weights::uniform(3); + let result = make_test_result(source_theta, weights); + + let target_params = ParameterSpace::bounded() + .add("ke", 0.1, 1.0) + .add("v", 5.0, 50.0); + let target_eq = target_ode_for(&["ke", "v"]); + + let boot = bootstrap_theta(&result, &target_eq, &target_params, 3).unwrap(); + assert_eq!(boot.matrix().ncols(), 2); // cl dropped + } + + #[test] + fn empty_source_errors() { + let source_params = ParameterSpace::bounded().add("ke", 0.1, 1.0); + let source_theta = Theta::from_parts(Mat::zeros(0, 1), source_params).unwrap(); + let weights = Weights::uniform(0); + let result = make_test_result(source_theta, weights); + + let target_params = ParameterSpace::bounded().add("ke", 0.1, 1.0); + let err = bootstrap_theta(&result, result.equation(), &target_params, 10).unwrap_err(); + assert!(err.to_string().contains("empty theta")); + } + + #[test] + fn metadata_validation_rejects_unknown_param() { + let source_params = ParameterSpace::bounded().add("ke", 0.1, 1.0); + let theta = Theta::sobol_with_seed(&source_params, 3, 42).unwrap(); + let weights = Weights::uniform(3); + let result = make_test_result(theta, weights); + + let bad_params = ParameterSpace::bounded() + .add("ke", 0.1, 1.0) + .add("nonexistent", 0.0, 1.0); + + let err = bootstrap_theta(&result, result.equation(), &bad_params, 10).unwrap_err(); + assert!(err.to_string().contains("unknown parameter")); + assert!(err.to_string().contains("nonexistent")); + } + + #[test] + fn metadata_validation_rejects_missing_param() { + let source_params = ParameterSpace::bounded() + .add("ke", 0.1, 1.0) + .add("v", 5.0, 50.0); + let theta = Theta::sobol_with_seed(&source_params, 3, 42).unwrap(); + let weights = Weights::uniform(3); + let result = make_test_result(theta, weights); + + let bad_params = ParameterSpace::bounded().add("ke", 0.1, 1.0); + let err = bootstrap_theta(&result, result.equation(), &bad_params, 10).unwrap_err(); + assert!(err.to_string().contains("missing parameter")); + } + + #[test] + fn zero_target_points_errors() { + let params = ParameterSpace::bounded().add("ke", 0.1, 1.0); + let theta = Theta::sobol_with_seed(¶ms, 3, 42).unwrap(); + let weights = Weights::uniform(3); + let result = make_test_result(theta, weights); + + let err = bootstrap_theta(&result, result.equation(), ¶ms, 0).unwrap_err(); + assert!(err.to_string().contains("target_points must be > 0")); + } + + #[test] + fn nan_weights_errors() { + let params = ParameterSpace::bounded().add("ke", 0.1, 1.0); + let theta = Theta::sobol_with_seed(¶ms, 3, 42).unwrap(); + let weights = Weights::from_vec(vec![0.5, f64::NAN, 0.5]); + let result = make_test_result(theta, weights); + + let err = bootstrap_theta(&result, result.equation(), ¶ms, 3).unwrap_err(); + assert!(err.to_string().contains("non-finite weight")); + } +} diff --git a/src/estimation/nonparametric/mod.rs b/src/estimation/nonparametric/mod.rs index c3cdb91cb..7cfbc2f6a 100644 --- a/src/estimation/nonparametric/mod.rs +++ b/src/estimation/nonparametric/mod.rs @@ -1,19 +1,21 @@ mod cycles; +mod bootstrap; mod expansion; pub(crate) mod ipm; mod posterior; mod predictions; -pub mod sampling; mod psi; pub(crate) mod qr; mod result; +pub mod sampling; mod statistics; mod summaries; mod theta; mod weights; +pub use bootstrap::bootstrap_theta; pub use cycles::{CycleLog, NPCycle}; pub(crate) use expansion::adaptative_grid; pub use ipm::burke; diff --git a/src/estimation/nonparametric/result.rs b/src/estimation/nonparametric/result.rs index 96cc75bd3..64caa177d 100644 --- a/src/estimation/nonparametric/result.rs +++ b/src/estimation/nonparametric/result.rs @@ -89,6 +89,36 @@ impl NonParametricResult { &self.cyclelog } + /// Chain this result into a new fit with a different algorithm. + /// + /// Uses the optimized theta as the prior for the new run. Keeps the same + /// equation, data, and error models. Consumes `self`. + /// + /// # Example + /// + /// ```ignore + /// let result = problem + /// .fit_with(NpagConfig::new().max_cycles(100))? + /// .chain(NpodConfig::new().max_cycles(50))?; + /// ``` + pub fn chain(self, algorithm: A) -> anyhow::Result> + where + A: crate::algorithms::Algorithm< + E, + crate::estimation::NonParametric, + Output = NonParametricResult, + >, + E: crate::model::EquationMetadataSource, + { + crate::estimation::EstimationProblem::nonparametric( + self.equation, + self.data, + self.theta, + self.error_models, + )? + .fit_with(algorithm) + } + pub fn psi(&self) -> &Psi { &self.psi } @@ -262,3 +292,144 @@ impl NonParametricResult { Ok(()) } } + +#[cfg(test)] +mod tests { + use super::*; + use crate::algorithms::nonparametric::{NpagConfig, NpodConfig}; + use crate::estimation::EstimationProblem; + use crate::model::ParameterSpace; + use pharmsol::equation::metadata; + use pharmsol::prelude::data::{AssayErrorModel, AssayErrorModels}; + use pharmsol::{ErrorPoly, SubjectBuilderExt}; + + fn minimal_ode() -> pharmsol::ODE { + pharmsol::equation::ODE::new( + |x, p, _t, dx, b, _rateiv, _cov| { + let ke = p[0]; + dx[0] = -ke * x[0] + b[0]; + }, + |_p, _t, _cov| pharmsol::lag! {}, + |_p, _t, _cov| pharmsol::fa! {}, + |_p, _t, _cov, _x| {}, + |x, p, _t, _cov, y| { + let v = p[1]; + y[0] = x[0] / v; + }, + ) + .with_nstates(1) + .with_ndrugs(1) + .with_nout(1) + .with_metadata( + metadata::new("chain_test") + .parameters(["ke", "v"]) + .states(["central"]) + .outputs(["0"]) + .route(pharmsol::equation::Route::bolus("0").to_state("central")), + ) + .expect("metadata should validate") + } + + fn minimal_data() -> pharmsol::Data { + let subject = pharmsol::Subject::builder("1") + .bolus(0.0, 100.0, 0) + .observation(1.0, 10.0, 0) + .observation(2.0, 8.0, 0) + .build(); + pharmsol::Data::new(vec![subject]) + } + + #[test] + fn chain_npag_to_npod_preserves_support_points() { + let ode = minimal_ode(); + let data = minimal_data(); + let params = ParameterSpace::bounded() + .add("ke", 0.001, 3.0) + .add("v", 25.0, 250.0); + let prior = Theta::sobol_default(¶ms).unwrap(); + let err = AssayErrorModels::new() + .add( + "0", + AssayErrorModel::additive(ErrorPoly::new(0.0, 0.5, 0.0, 0.0), 0.0), + ) + .unwrap(); + + let r1 = EstimationProblem::nonparametric(ode, data, prior, err) + .unwrap() + .fit_with(NpagConfig::new().max_cycles(2)) + .unwrap(); + + let n_spp = r1.get_theta().nspp(); + assert!(n_spp > 0, "NPAG should produce support points"); + + // Chain into NPOD — should complete without error + let r2 = r1.chain(NpodConfig::new().max_cycles(1)).unwrap(); + + assert!(r2.get_theta().nspp() > 0); + assert_eq!(r2.data().subjects().len(), 1); + assert_eq!(r2.cycles(), 1); + } + + #[test] + fn chain_npag_to_npag_maintains_or_improves_objf() { + let ode = minimal_ode(); + let data = minimal_data(); + let params = ParameterSpace::bounded() + .add("ke", 0.001, 3.0) + .add("v", 25.0, 250.0); + let prior = Theta::sobol_with_seed(¶ms, 5, 42).unwrap(); + let err = AssayErrorModels::new() + .add( + "0", + AssayErrorModel::additive(ErrorPoly::new(0.0, 0.5, 0.0, 0.0), 0.0), + ) + .unwrap(); + + let r1 = EstimationProblem::nonparametric(ode, data, prior, err) + .unwrap() + .fit_with(NpagConfig::new().max_cycles(5)) + .unwrap(); + + let objf1 = r1.objf(); + + // Chain into another NPAG run + let r2 = r1.chain(NpagConfig::new().max_cycles(3)).unwrap(); + + // Second run should not regress significantly + assert!( + r2.objf() <= objf1 + 0.5, + "OBJF regressed: {} -> {}", + objf1, + r2.objf() + ); + } + + #[test] + fn chain_npag_to_npod_with_unconverged_result() { + let ode = minimal_ode(); + let data = minimal_data(); + let params = ParameterSpace::bounded() + .add("ke", 0.001, 3.0) + .add("v", 25.0, 250.0); + let prior = Theta::sobol_with_seed(¶ms, 5, 42).unwrap(); + let err = AssayErrorModels::new() + .add( + "0", + AssayErrorModel::additive(ErrorPoly::new(0.0, 0.5, 0.0, 0.0), 0.0), + ) + .unwrap(); + + // Run only 1 cycle — will be unconverged + let r1 = EstimationProblem::nonparametric(ode, data, prior, err) + .unwrap() + .fit_with(NpagConfig::new().max_cycles(1)) + .unwrap(); + + // Chaining from unconverged should still work + let r2 = r1.chain(NpodConfig::new().max_cycles(1)); + assert!( + r2.is_ok(), + "Chaining from unconverged result should succeed" + ); + } +} diff --git a/src/estimation/nonparametric/sampling/latin.rs b/src/estimation/nonparametric/sampling/latin.rs index b6f7f4f32..b7051c73d 100644 --- a/src/estimation/nonparametric/sampling/latin.rs +++ b/src/estimation/nonparametric/sampling/latin.rs @@ -6,7 +6,11 @@ use rand::rngs::StdRng; use crate::estimation::nonparametric::Theta; use crate::model::{BoundedParameter, ParameterSpace}; -pub fn generate(parameters: &ParameterSpace, points: usize, seed: usize) -> Result { +pub fn generate( + parameters: &ParameterSpace, + points: usize, + seed: usize, +) -> Result { let ranges = parameters.finite_ranges(); let mut rng = StdRng::seed_from_u64(seed as u64); diff --git a/src/estimation/nonparametric/theta.rs b/src/estimation/nonparametric/theta.rs index abe0c0a70..f0504c677 100644 --- a/src/estimation/nonparametric/theta.rs +++ b/src/estimation/nonparametric/theta.rs @@ -144,6 +144,61 @@ impl Theta { true } + /// Create a new Theta with an additional parameter column. + /// + /// All existing rows keep their values. The new column is filled with + /// `initial_value` for every row. Returns a new `Theta` — does not + /// mutate in place. + /// + /// # Errors + /// + /// - `name` already exists in the parameter space + /// - `lower` or `upper` are non-finite + /// - `lower >= upper` + pub fn with_added_parameter( + &self, + name: &str, + lower: f64, + upper: f64, + initial_value: f64, + ) -> Result { + // Validate uniqueness + if self.parameters().iter().any(|p| p.name.as_str() == name) { + bail!("parameter '{}' already exists in theta", name); + } + + // Validate bounds + if !lower.is_finite() || !upper.is_finite() { + bail!( + "bounds must be finite for parameter '{}': [{}, {}]", + name, + lower, + upper + ); + } + if lower >= upper { + bail!( + "lower bound ({}) must be strictly less than upper bound ({}) for parameter '{}'", + lower, + upper, + name + ); + } + + let (nrows, ncols) = (self.matrix().nrows(), self.matrix().ncols()); + let new_matrix = faer::Mat::from_fn(nrows, ncols + 1, |r, c| { + if c < ncols { + self.matrix()[(r, c)] + } else { + initial_value + } + }); + + let new_params = self.parameters().clone().add(name, lower, upper); + + Theta::from_parts(new_matrix, new_params) + } + /// Write the matrix to a CSV file pub fn write(&self, path: &str) { let mut writer = csv::Writer::from_path(path).unwrap(); diff --git a/src/iov/mod.rs b/src/iov/mod.rs new file mode 100644 index 000000000..7f4322b1b --- /dev/null +++ b/src/iov/mod.rs @@ -0,0 +1,243 @@ +//! SDE-based Inter-Occasion Variability (IOV) analysis. +//! +//! This module provides [`optimize_diffusion`], which optimizes SDE diffusion +//! (sigma) parameters for each support point independently, using the NelderMead +//! algorithm. The optimization runs in parallel over support points via rayon. +//! +//! # Workflow +//! +//! 1. Fit an ODE model with NPAG/NPOD to obtain support points (Stage 1). +//! 2. Add sigma parameter columns to the theta using [`Theta::with_added_parameter`]. +//! 3. Construct an SDE model (user-provided) that maps sigma parameters to +//! diffusion terms. +//! 4. Call [`optimize_diffusion`] to optimize sigma per support point. +//! +//! # Example +//! +//! ```ignore +//! use pmcore::prelude::*; +//! use pmcore::iov::DiffusionOptimize; +//! use pmcore::iov::DiffusionConfig; +//! +//! let r_ode = problem.fit_with(NPAG::default())?; +//! let mut joint = r_ode.get_theta() +//! .with_added_parameter("ske", 1e-6, 1.0, 0.01)?; +//! +//! let diff = sde.optimize_diffusion( +//! &r_ode.data(), &mut joint, +//! &["ske"], &r_ode.error_models(), +//! DiffusionConfig::default(), +//! )?; +//! ``` + +mod optimizer; + +use anyhow::bail; +use rayon::prelude::*; + +use pharmsol::prelude::data::AssayErrorModels; +use pharmsol::{Data, SDE}; + +use crate::estimation::nonparametric::Theta; + +/// Configuration for SDE diffusion parameter optimization. +#[derive(Debug, Clone)] +pub struct DiffusionConfig { + /// Maximum NelderMead iterations per support point. + /// + /// For 1D sigma optimization, convergence typically occurs in 10-20 + /// iterations. Higher values are a safety net for difficult cases. + /// Default: 25. + pub max_iter: usize, + + /// Convergence tolerance on simplex standard deviation. + /// + /// NelderMead stops when the standard deviation of function values + /// across the simplex vertices falls below this threshold. + /// Default: 1e-3. + pub sd_tolerance: f64, + + /// Relative perturbation for initial simplex construction. + /// + /// For a 1D parameter value `x`, the simplex is `[x, x·(1 + perturbation)]`. + /// For N-D, N+1 vertices are constructed, each perturbed along one axis. + /// Default: 0.1. + pub initial_perturbation: f64, +} + +impl Default for DiffusionConfig { + fn default() -> Self { + Self { + max_iter: 25, + sd_tolerance: 1e-3, + initial_perturbation: 0.1, + } + } +} + +/// Results of SDE diffusion parameter optimization. +#[derive(Debug, Clone)] +pub struct DiffusionResult { + /// Final log-likelihood for each support point after optimization. + /// Length equals `theta.nspp()`. + pub per_point_likelihood: Vec, + + /// Number of NelderMead iterations used for each point. + pub per_point_iterations: Vec, + + /// Whether NelderMead converged within `max_iter` for each point. + pub per_point_converged: Vec, +} + +/// Trait for SDEs that support diffusion parameter optimization. +/// +/// This enables method-style calls: `sde.optimize_diffusion(...)`. +pub trait DiffusionOptimize { + /// Optimize SDE diffusion parameters for each support point independently. + /// + /// Modifies `theta` **in-place**: for each support point, the sigma parameter + /// columns are replaced with values that maximize the log-likelihood of all + /// subjects under this SDE. Primary (non-sigma) parameter values are held fixed. + /// + /// # Panics + /// + /// Panics if any name in `sigma_params` is not found in `theta.parameters()`. + fn optimize_diffusion( + &self, + data: &Data, + theta: &mut Theta, + sigma_params: &[String], + error_models: &AssayErrorModels, + config: DiffusionConfig, + ) -> anyhow::Result; +} + +impl DiffusionOptimize for SDE { + fn optimize_diffusion( + &self, + data: &Data, + theta: &mut Theta, + sigma_params: &[String], + error_models: &AssayErrorModels, + config: DiffusionConfig, + ) -> anyhow::Result { + optimize_diffusion(self, data, theta, sigma_params, error_models, config) + } +} + +/// Optimize SDE diffusion parameters for each support point independently. +/// +/// Free-function form of [`DiffusionOptimize::optimize_diffusion`]. +/// Prefer `sde.optimize_diffusion(...)` for readability. +pub fn optimize_diffusion( + sde: &SDE, + data: &Data, + theta: &mut Theta, + sigma_params: &[String], + error_models: &AssayErrorModels, + config: DiffusionConfig, +) -> anyhow::Result { + let n_spp = theta.nspp(); + if n_spp == 0 { + bail!("theta has no support points"); + } + + // Resolve sigma parameter indices in theta + let sigma_indices: Vec = sigma_params + .iter() + .map(|name| { + theta + .parameters() + .iter() + .position(|p| p.name.as_str() == name.as_str()) + .unwrap_or_else(|| { + panic!( + "sigma parameter '{}' not found in theta parameters: {:?}", + name, + theta.parameters().names() + ) + }) + }) + .collect(); + + // Identify primary parameter indices (all others) + let n_total = theta.matrix().ncols(); + let sigma_set: std::collections::HashSet = sigma_indices.iter().copied().collect(); + let primary_indices: Vec = (0..n_total).filter(|i| !sigma_set.contains(i)).collect(); + + // Check for sigma initialized to zero + for &si in &sigma_indices { + for r in 0..n_spp { + if theta.matrix()[(r, si)] == 0.0 { + tracing::warn!( + "sigma parameter at column {} (support point {}) initialized to 0.0; \ + the SDE degenerates to an ODE at sigma=0. Consider using a small \ + non-zero initial value (e.g., 0.01)", + si, + r + ); + } + } + } + + // Parallel optimization over support points + let results: Vec = (0..n_spp) + .into_par_iter() + .map(|i| { + // Extract fixed primary parameters + let primary: Vec = primary_indices + .iter() + .map(|&pi| theta.matrix()[(i, pi)]) + .collect(); + + // Extract initial sigma values + let sigma_init: Vec = sigma_indices + .iter() + .map(|&si| theta.matrix()[(i, si)]) + .collect(); + + // Build the cost function + let cost = optimizer::SigmaCost::new( + sde, + data, + &primary, + &primary_indices, + &sigma_indices, + error_models, + ); + + // Run NelderMead + optimizer::optimize_sigma(cost, &sigma_init, &config) + }) + .collect(); + + // Update theta with optimized sigma values + let mut per_point_likelihood = Vec::with_capacity(n_spp); + let mut per_point_iterations = Vec::with_capacity(n_spp); + let mut per_point_converged = Vec::with_capacity(n_spp); + + for (i, outcome) in results.iter().enumerate() { + for (j, &si) in sigma_indices.iter().enumerate() { + theta.matrix_mut()[(i, si)] = outcome.optimized_params[j]; + } + per_point_likelihood.push(-outcome.final_cost); + per_point_iterations.push(outcome.iterations); + per_point_converged.push(outcome.converged); + } + + let n_converged = per_point_converged.iter().filter(|&&c| c).count(); + tracing::info!( + "SDE IOV optimization: {}/{} support points converged, \ + mean iterations: {:.1}, mean log-likelihood: {:.2}", + n_converged, + n_spp, + per_point_iterations.iter().sum::() as f64 / n_spp.max(1) as f64, + per_point_likelihood.iter().sum::() / n_spp.max(1) as f64, + ); + + Ok(DiffusionResult { + per_point_likelihood, + per_point_iterations, + per_point_converged, + }) +} diff --git a/src/iov/optimizer.rs b/src/iov/optimizer.rs new file mode 100644 index 000000000..9c281588b --- /dev/null +++ b/src/iov/optimizer.rs @@ -0,0 +1,196 @@ +//! NelderMead-based sigma optimization for SDE IOV. +//! +//! Internal module — not exposed publicly. + +use argmin::core::{CostFunction, Error, Executor}; +use argmin::solver::neldermead::NelderMead; +use pharmsol::prelude::data::AssayErrorModels; +use pharmsol::prelude::simulator::Equation; +use pharmsol::{Data, SDE}; + +use super::DiffusionConfig; + +/// Outcome of a single support point's sigma optimization. +pub(crate) struct OptimizationOutcome { + /// Optimized sigma parameter values (in order matching input). + pub optimized_params: Vec, + /// Final cost (-log_likelihood) at the optimum. + pub final_cost: f64, + /// Number of NelderMead iterations executed. + pub iterations: usize, + /// Whether the solver converged within max_iter. + pub converged: bool, +} + +/// Cost function: -Σ log P(data_i | primary_fixed, sigma_params, SDE) +/// +/// Minimizing this maximizes the total log-likelihood across all subjects. +pub(crate) struct SigmaCost<'a> { + sde: &'a SDE, + data: &'a Data, + /// Fixed primary parameter values. + primary: Vec, + /// Column indices in theta where primary parameters are located. + primary_indices: Vec, + /// Column indices in theta where sigma parameters are located. + sigma_indices: Vec, + error_models: &'a AssayErrorModels, + /// Total number of columns in the full parameter vector. + n_total: usize, +} + +impl<'a> SigmaCost<'a> { + pub(crate) fn new( + sde: &'a SDE, + data: &'a Data, + primary: &[f64], + primary_indices: &[usize], + sigma_indices: &[usize], + error_models: &'a AssayErrorModels, + ) -> Self { + let n_total = primary.len() + sigma_indices.len(); + Self { + sde, + data, + primary: primary.to_vec(), + primary_indices: primary_indices.to_vec(), + sigma_indices: sigma_indices.to_vec(), + error_models, + n_total, + } + } + + /// Build the full parameter vector with values at their correct column positions. + fn build_params(&self, sigma: &[f64]) -> Vec { + let mut full = vec![0.0; self.n_total]; + + for (&pi, &val) in self.primary_indices.iter().zip(self.primary.iter()) { + full[pi] = val; + } + for (&si, &val) in self.sigma_indices.iter().zip(sigma) { + full[si] = val; + } + + full + } +} + +impl CostFunction for SigmaCost<'_> { + type Param = Vec; + type Output = f64; + + fn cost(&self, sigma: &Self::Param) -> Result { + // Bounds check: sigma must stay in [0, ∞) to keep the SDE well-defined. + if sigma.iter().any(|&s| s < 0.0) { + return Ok(1e10); + } + + let full_params = self.build_params(sigma); + + let total_ll: f64 = self + .data + .subjects() + .iter() + .map(|subject| -> Result { + self.sde + .estimate_log_likelihood_dense(subject, &full_params, self.error_models) + .map_err(|e| Error::msg(e.to_string())) + }) + .collect::, _>>()? + .iter() + .sum(); + + if !total_ll.is_finite() { + return Ok(1e10); + } + + Ok(-total_ll) + } +} + +/// Run NelderMead optimization on sigma parameters. +pub(crate) fn optimize_sigma( + cost: SigmaCost<'_>, + sigma_init: &[f64], + config: &DiffusionConfig, +) -> OptimizationOutcome { + let simplex = build_simplex(sigma_init, config.initial_perturbation); + + let solver: NelderMead, f64> = NelderMead::new(simplex) + .with_sd_tolerance(config.sd_tolerance) + .expect("NelderMead construction should succeed with valid parameters"); + + let result = Executor::new(cost, solver) + .configure(|state| state.max_iters(config.max_iter as u64)) + .run(); + + match result { + Ok(res) => { + let best = res.state.best_param.unwrap_or_else(|| sigma_init.to_vec()); + let cost_val = res.state.best_cost; + let iterations = res.state.iter as usize; + + OptimizationOutcome { + optimized_params: best, + final_cost: cost_val, + iterations, + converged: iterations < config.max_iter, + } + } + Err(e) => { + tracing::warn!( + "NelderMead optimization failed for a support point: {}. \ + Returning initial values.", + e + ); + OptimizationOutcome { + optimized_params: sigma_init.to_vec(), + final_cost: f64::INFINITY, + iterations: 0, + converged: false, + } + } + } +} + +/// Build initial simplex for NelderMead. +/// +/// For N-dimensional optimization, the simplex has N+1 vertices. +/// Vertex 0 is the initial point. Vertices 1..N perturb along each axis. +fn build_simplex(initial: &[f64], perturbation: f64) -> Vec> { + let n = initial.len(); + let mut vertices = Vec::with_capacity(n + 1); + vertices.push(initial.to_vec()); + + for i in 0..n { + let mut vertex = initial.to_vec(); + if initial[i] == 0.0 { + vertex[i] = 1e-3; + } else { + vertex[i] *= 1.0 + perturbation; + } + vertices.push(vertex); + } + + vertices +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn simplex_has_correct_shape() { + let simplex = build_simplex(&[0.5, 0.3], 0.1); + assert_eq!(simplex.len(), 3); + assert_eq!(simplex[0], vec![0.5, 0.3]); + assert_eq!(simplex[1], vec![0.55, 0.3]); + assert_eq!(simplex[2], vec![0.5, 0.33]); + } + + #[test] + fn simplex_handles_zero_init() { + let simplex = build_simplex(&[0.0, 0.5], 0.1); + assert_eq!(simplex[1], vec![1e-3, 0.5]); + } +} diff --git a/src/lib.rs b/src/lib.rs index 1ef54e274..3c7d91044 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -47,6 +47,9 @@ pub use std::collections::HashMap; // BestDose pub mod bestdose; +/// SDE-based Inter-Occasion Variability optimization. +pub mod iov; + /// A collection of commonly used items to simplify imports. pub mod prelude { pub use super::logs::Logger; @@ -68,8 +71,10 @@ pub mod prelude { }; pub use crate::estimation::nonparametric::{ - CycleLog, NPCycle, NPPredictions, NonParametricResult, Posterior, Psi, Theta, Weights, + bootstrap_theta, CycleLog, NPCycle, NPPredictions, NonParametricResult, Posterior, Psi, + Theta, Weights, }; + pub use crate::iov::{optimize_diffusion, DiffusionConfig, DiffusionOptimize, DiffusionResult}; pub use crate::model::{EquationMetadataSource, ModelMetadata}; pub use crate::results::{ FitResult, FitSummary, IndividualSummary, ParameterSummary, PopulationSummary, diff --git a/src/model/parameter_space.rs b/src/model/parameter_space.rs index b92785bea..ec1d25a63 100644 --- a/src/model/parameter_space.rs +++ b/src/model/parameter_space.rs @@ -192,9 +192,10 @@ impl ParameterMeta for UnboundedParameter { } /// Scale transform for parametric parameters. -#[derive(Debug, Clone, Copy, Serialize, Deserialize, PartialEq)] +#[derive(Debug, Clone, Copy, Serialize, Deserialize, PartialEq, Default)] pub enum ParameterScale { /// Identity transform. + #[default] Identity, /// Log transform. Log, @@ -204,12 +205,6 @@ pub enum ParameterScale { Probit { lower: f64, upper: f64 }, } -impl Default for ParameterScale { - fn default() -> Self { - ParameterScale::Identity - } -} - /// Entry point for building parameter declarations. /// /// ```ignore diff --git a/tests/onecomp.rs b/tests/onecomp.rs index 85c6a27be..c63509b52 100644 --- a/tests/onecomp.rs +++ b/tests/onecomp.rs @@ -202,4 +202,3 @@ fn test_one_compartment_postprob() -> Result<()> { Ok(()) } - diff --git a/tests/results_summary_tests.rs b/tests/results_summary_tests.rs index 6edcc8a89..88363e4b9 100644 --- a/tests/results_summary_tests.rs +++ b/tests/results_summary_tests.rs @@ -42,11 +42,14 @@ fn simple_data() -> Data { #[test] fn test_nonparametric_fit_result_summary_surface() -> Result<()> { let assay_error = AssayErrorModel::additive(ErrorPoly::new(0.0, 0.10, 0.0, 0.0), 2.0); - let parameters = ParameterSpace::bounded().add("ke", 0.1, 1.0).add("v", 1.0, 20.0); + let parameters = ParameterSpace::bounded() + .add("ke", 0.1, 1.0) + .add("v", 1.0, 20.0); let prior = Theta::sobol_default(¶meters)?; let error_models = AssayErrorModels::new().add("0", assay_error)?; - let result = EstimationProblem::nonparametric(simple_equation(), simple_data(), prior, error_models)? - .fit_with(NonParametricAlgorithm::npag())?; + let result = + EstimationProblem::nonparametric(simple_equation(), simple_data(), prior, error_models)? + .fit_with(NonParametricAlgorithm::npag())?; let summary = result.summary(); @@ -58,4 +61,3 @@ fn test_nonparametric_fit_result_summary_surface() -> Result<()> { Ok(()) } -