diff --git a/.Rbuildignore b/.Rbuildignore index e989bd8..862edcc 100644 --- a/.Rbuildignore +++ b/.Rbuildignore @@ -2,3 +2,6 @@ ^codecov\.yml$ ^\.github$ ^www$ +^_pkgdown\.yml$ +^docs$ +^pkgdown$ diff --git a/.github/workflows/pkgdown.yaml b/.github/workflows/pkgdown.yaml new file mode 100644 index 0000000..d2a445c --- /dev/null +++ b/.github/workflows/pkgdown.yaml @@ -0,0 +1,44 @@ +on: + push: + branches: [main, master] + release: + types: [published] + workflow_dispatch: + +name: pkgdown + +jobs: + pkgdown: + runs-on: ubuntu-latest + concurrency: + group: pkgdown-${{ github.event_name != 'pull_request' || github.run_id }} + env: + GITHUB_PAT: ${{ secrets.GITHUB_TOKEN }} + steps: + - uses: actions/checkout@v4 + + - uses: r-lib/actions/setup-pandoc@v2 + + - uses: r-lib/actions/setup-r@v2 + with: + use-public-rspm: true + + - uses: r-lib/actions/setup-r-dependencies@v2 + with: + extra-packages: any::pkgdown, local::. + needs: website + + - name: Build pkgdown site + run: | + pkgdown::build_site_github_pages(new_process = FALSE, install = FALSE) + shell: Rscript {0} + + - name: Deploy to borch.dev website repo + uses: JamesIves/github-pages-deploy-action@v4.7.3 + with: + repository-name: ncborcherding/borcherding + branch: master + folder: docs + target-folder: static/uploads/bhive + token: ${{ secrets.WEBSITE_DEPLOY_TOKEN }} + clean: true diff --git a/DESCRIPTION b/DESCRIPTION index 91aa8ee..58f852c 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,36 +1,41 @@ Package: bHIVE Title: B-cell Hybrid Immune Variant Engine -Version: 0.99.0 +Version: 0.99.1 Authors@R: c( person(given = "Nick", family = "Borcherding", role = c("aut", "cre"), email = "ncborch@gmail.com")) Description: The bHIVE package implements an Artificial Immune Network (AI-Net) algorithm for clustering, classification, and regression tasks. Inspired by biological immune systems, it employs clonal selection, mutation, and network suppression to analyze and model datasets. This package provides flexible functionality, including affinity metrics, mutation strategies, and hyperparameter tuning. License: MIT + file LICENSE Encoding: UTF-8 -LazyData: true -RoxygenNote: 7.3.2 -biocViews: Software, Classification, Annotation, Regression, Classification -Depends: - R (>= 4.0.0) -Imports: +RoxygenNote: 7.3.3 +biocViews: Software, Clustering, Classification, Regression, Network +Depends: + R (>= 4.5.0) +Imports: BiocParallel, cluster, clusterCrit, ggplot2, + R6, + Rcpp, Rtsne, stats, umap, viridis +LinkingTo: + Rcpp, + RcppArmadillo Suggests: BiocStyle, - caret, + caret, devtools, knitr, MASS, + pkgdown, rmarkdown, spelling, testthat (>= 3.0.0) VignetteBuilder: knitr Config/testthat/edition: 3 Language: en-US -URL: https://www.borch.dev/uploads/bHIVE/ +URL: https://www.borch.dev/uploads/bhive/ BugReports: https://github.com/BorchLab/bHIVE/issues diff --git a/NAMESPACE b/NAMESPACE index ca391a6..a22d028 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -1,5 +1,17 @@ # Generated by roxygen2: do not edit by hand +export(AINet) +export(ActivationGate) +export(ClassSwitcher) +export(ConvergentSelector) +export(GerminalCenter) +export(IdiotypicNetwork) +export(ImmuneAlgorithm) +export(ImmuneRepertoire) +export(MemoryPool) +export(Microenvironment) +export(SHMEngine) +export(VDJLibrary) export(bHIVE) export(bHIVEmodel) export(honeycombHIVE) @@ -12,10 +24,13 @@ importFrom(BiocParallel,BatchtoolsParam) importFrom(BiocParallel,MulticoreParam) importFrom(BiocParallel,SerialParam) importFrom(BiocParallel,bplapply) +importFrom(R6,R6Class) +importFrom(Rcpp,sourceCpp) importFrom(Rtsne,Rtsne) importFrom(cluster,silhouette) importFrom(clusterCrit,intCriteria) importFrom(stats,dist) +importFrom(stats,kmeans) importFrom(stats,median) importFrom(stats,prcomp) importFrom(stats,rnorm) @@ -25,3 +40,4 @@ importFrom(stats,setNames) importFrom(umap,umap) importFrom(viridis,scale_color_viridis) importFrom(viridis,scale_fill_viridis) +useDynLib(bHIVE, .registration = TRUE) diff --git a/NEWS.md b/NEWS.md new file mode 100644 index 0000000..0f4d269 --- /dev/null +++ b/NEWS.md @@ -0,0 +1,99 @@ +# bHIVE 0.99.1 + +## C++ Backend +* Added RcppArmadillo backend for BLAS-optimized bulk affinity and distance + matrix computation, replacing per-element R loops +* Scalar affinity function for clone/mutate hot path avoids 1x1 matrix + allocation overhead +* C++ implementations for clonal selection iteration, network suppression, + kmeans++ initialization, final assignment, somatic hypermutation (5 methods), + and idiotypic network dynamics + +## R6 Class Architecture +* New `ImmuneRepertoire` class for antibody collections with metadata tracking + (isotype, state, age, lineage) +* New `ImmuneAlgorithm` abstract base class with fit/predict/summary interface +* New `AINet` class wrapping core algorithm with composable module injection +* Both R6 composition and functional API equally supported + +## New Modules +* `SHMEngine` — Five somatic hypermutation strategies: uniform (original + behavior), airs (affinity-proportional), hotspot (feature-gradient-weighted), + energy (budget-constrained), and adaptive (per-feature Adam-like moment + tracking) +* `IdiotypicNetwork` — Antibody-antibody network dynamics with bell-shaped + activation function replacing epsilon-threshold suppression +* `GerminalCenter` — T-follicular helper mediated selection with task-aware + quality scoring and resource competition +* `Microenvironment` — Density-dependent zone classification + (stable/explore/boundary) with chemokine-like gradient computation +* `VDJLibrary` — Combinatorial V(D)J gene library initialization via PCA, + k-means clustering, or random partition of feature space +* `ActivationGate` — Two-signal activation gate requiring both antigen + recognition and costimulatory context (density, danger, or entropy) +* `MemoryPool` — Archive high-affinity antibodies as long-lived memory cells + with threshold-based recall +* `ClassSwitcher` — Isotype class switching (IgM broad, IgG specific, IgA + boundary) modulating effective kernel width +* `ConvergentSelector` — Cross-repertoire consensus identification of public + antibodies for ensemble methods + +## Documentation +* Complete README rewrite covering functional API, R6 API, module reference + table, and architecture overview +* New pkgdown website with Bootstrap 5 Flatly theme, organized reference groups, + and tutorial navigation +* New article: "Composing Immune Modules" — R6 composition patterns for all 9 + modules with worked examples +* New article: "Advanced Tuning & Workflows" — swarmbHIVE grid search, + honeycombHIVE multilayer refinement, refineB optimizer comparison, caret + integration, and visualizeHIVE plot types +* New article: "Algorithm & Biological Foundations" — comprehensive mathematical + reference covering all affinity kernels, distance functions, SHM strategies, + idiotypic ODE system, germinal center selection, and parameter guidance +* Added roxygen @examples to refineB, bHIVEmodel, and ImmuneAlgorithm (now 90% + example coverage) +* GitHub Actions workflow for automated pkgdown deployment to borch.dev + +## Package Infrastructure +* Created `R/bHIVE-package.R` with roxygen-managed `@useDynLib` and + `@importFrom Rcpp sourceCpp` directives +* Added `%||%` operator `@name null-coalesce` to avoid illegal characters in Rd + `\name` field +* Moved tutorial vignettes to `vignettes/articles/` (pkgdown-only, not installed + with package) to reduce installed size +* Added pkgdown configuration (`_pkgdown.yml`) with 7 reference groups and + structured article hierarchy + +## BiocCheck Compliance +* Replaced all `sapply()` calls with `vapply()` in bHiVE.R and visualizeHIVE.R +* Replaced all `1:n` patterns with `seq_len()` / `seq_along()` +* Removed `install.packages()` calls from vignettes +* Removed `LazyData: true` from DESCRIPTION +* Updated R dependency to >= 4.5.0 +* Updated biocViews to `Software, Clustering, Classification, Regression, + Network` +* Added class-level `@param` documentation for all R6 initialize() arguments + across 11 module classes +* Added `@param ... Not used.` to all R6 `print()` methods + +## Bug Fixes +* Fixed kmeans++ sampling: corrected cumulative sum fallback index and + runif-to-integer cast +* Fixed cosine similarity epsilon (1e-12) for numerical stability +* Fixed division-by-zero guard in idiotypic dynamics when theta_low equals + theta_high +* Fixed VDJLibrary NaN in kmeans for single-allele edge cases +* Fixed VDJLibrary NA subscript when feature dimensions not divisible by 3 +* Removed duplicate `Classification` from biocViews, added `Clustering` +* Fixed vignette YAML parsing error from bare `---` horizontal rule + +# bHIVE 0.99.0 + +* Initial submission version with core AIS functionality +* Clonal selection, network suppression, and mutation for clustering, + classification, and regression +* honeycombHIVE multilayer architecture +* swarmbHIVE hyperparameter tuning via BiocParallel +* caret model integration (bHIVEmodel, honeycombHIVEmodel) +* Visualization utilities via ggplot2 diff --git a/R/AINet.R b/R/AINet.R new file mode 100644 index 0000000..3d12174 --- /dev/null +++ b/R/AINet.R @@ -0,0 +1,340 @@ +#' @title AINet +#' @description R6 implementation of the Artificial Immune Network algorithm. +#' This is the core bHIVE algorithm using C++ backends for performance-critical +#' operations. Supports composable modules for somatic hypermutation, idiotypic +#' network regulation, germinal center selection, and more. +#' +#' @examples +#' # Clustering with Iris data +#' data(iris) +#' X <- as.matrix(iris[, 1:4]) +#' model <- AINet$new(nAntibodies = 15, maxIter = 10, verbose = FALSE) +#' model$fit(X, task = "clustering") +#' table(model$result$assignments) +#' +#' # Classification +#' model2 <- AINet$new(nAntibodies = 20, maxIter = 10, verbose = FALSE) +#' model2$fit(X, iris$Species, task = "classification") +#' mean(model2$result$assignments == as.character(iris$Species)) +#' +#' # Predict on new data +#' preds <- model2$predict(X[1:10, ]) +#' +#' @importFrom R6 R6Class +#' @importFrom stats rnorm runif sd +#' @export +AINet <- R6::R6Class( + "AINet", + inherit = ImmuneAlgorithm, + public = list( + + #' @description Create a new AINet algorithm instance. + #' @param nAntibodies Integer. Initial antibody population size. + #' @param beta Numeric. Clone multiplier. + #' @param epsilon Numeric. Suppression distance threshold. + #' @param maxIter Integer. Maximum iterations. + #' @param k Integer. Top-k antibodies to clone per data point. + #' @param affinityFunc Character. Affinity function name. + #' @param distFunc Character. Distance function name. + #' @param affinityParams List. Parameters for affinity/distance functions. + #' @param mutationDecay Numeric. Per-iteration mutation rate decay. + #' @param mutationMin Numeric. Minimum mutation rate. + #' @param maxClones Numeric. Maximum clones per antibody. + #' @param stopTolerance Numeric. Early stopping tolerance. + #' @param noImprovementLimit Integer. Early stopping patience. + #' @param initMethod Character. Initialization method. + #' @param shm An SHMEngine instance or NULL for default uniform mutation. + #' @param init A VDJLibrary instance or NULL for default initialization. + #' @param activation An ActivationGate instance or NULL. + #' @param idiotypic An IdiotypicNetwork instance or NULL. + #' @param germinalCenter A GerminalCenter instance or NULL. + #' @param microenvironment A Microenvironment instance or NULL. + #' @param memory A MemoryPool instance or NULL. + #' @param verbose Logical. Print progress. + initialize = function(nAntibodies = 20, + beta = 5, + epsilon = 0.01, + maxIter = 50, + k = 3, + affinityFunc = "gaussian", + distFunc = "euclidean", + affinityParams = list(alpha = 1, c = 1, p = 2, + Sigma = NULL), + mutationDecay = 1.0, + mutationMin = 0.01, + maxClones = Inf, + stopTolerance = 0.0, + noImprovementLimit = Inf, + initMethod = "sample", + shm = NULL, + init = NULL, + activation = NULL, + idiotypic = NULL, + germinalCenter = NULL, + microenvironment = NULL, + memory = NULL, + verbose = TRUE) { + + # Validate numeric parameters + stopifnot( + "nAntibodies must be a positive integer" = is.numeric(nAntibodies) && nAntibodies >= 1, + "beta must be positive" = is.numeric(beta) && beta > 0, + "epsilon must be non-negative" = is.numeric(epsilon) && epsilon >= 0, + "maxIter must be a positive integer" = is.numeric(maxIter) && maxIter >= 1, + "k must be a positive integer" = is.numeric(k) && k >= 1, + "mutationDecay must be in (0, 1]" = is.numeric(mutationDecay) && mutationDecay > 0 && mutationDecay <= 1, + "mutationMin must be non-negative" = is.numeric(mutationMin) && mutationMin >= 0 + ) + + config <- list( + nAntibodies = as.integer(nAntibodies), + beta = beta, + epsilon = epsilon, + maxIter = maxIter, + k = k, + affinityFunc = match.arg(affinityFunc, c("gaussian", "laplace", + "polynomial", "cosine", "hamming")), + distFunc = match.arg(distFunc, c("euclidean", "manhattan", + "minkowski", "cosine", + "mahalanobis", "hamming")), + affinityParams = affinityParams, + mutationDecay = mutationDecay, + mutationMin = mutationMin, + maxClones = maxClones, + stopTolerance = stopTolerance, + noImprovementLimit = noImprovementLimit, + initMethod = match.arg(initMethod, c("sample", "random", + "random_uniform", "kmeans++")), + verbose = verbose + ) + + modules <- list( + shm = shm, + init = init, + activation = activation, + idiotypic = idiotypic, + germinalCenter = germinalCenter, + microenvironment = microenvironment, + memory = memory + ) + # Remove NULL modules + modules <- modules[!vapply(modules, is.null, logical(1))] + + super$initialize(config = config, modules = modules) + }, + + #' @description Fit the AINet algorithm to data. + #' @param X Numeric matrix or data frame (n x d). + #' @param y Optional target: factor (classification) or numeric (regression). + #' @param task Character: "clustering", "classification", or "regression". + #' Inferred from y if NULL. + #' @param ... Additional arguments (currently unused). + #' @return Invisible self, with \code{result} populated. + fit = function(X, y = NULL, task = NULL, ...) { + # Validate inputs + .validate_bHIVE_input(X, y) + + # Infer task + if (is.null(task)) { + task <- if (is.null(y)) "clustering" + else if (is.factor(y)) "classification" + else "regression" + } + task <- match.arg(task, c("clustering", "classification", "regression")) + + X <- as.matrix(X) + n <- nrow(X) + d <- ncol(X) + cfg <- self$config + + # Standardize regression target + y_orig <- y + y_mean <- 0; y_sd <- 1 + if (task == "regression") { + y_mean <- mean(y, na.rm = TRUE) + y_sd <- sd(y, na.rm = TRUE) + if (y_sd == 0) y_sd <- 1 + y <- (y - y_mean) / y_sd + } + + # ================================ + # 1. Initialize antibody population + # ================================ + A <- private$.initialize_antibodies(X, cfg$nAntibodies, cfg$initMethod) + self$repertoire <- ImmuneRepertoire$new(A) + m <- self$repertoire$size() + + # ================================ + # 2. Task-specific setup + # ================================ + task_int <- switch(task, clustering = 0L, classification = 1L, regression = 2L) + nClasses <- 0L + classes <- NULL + if (task == "classification") { + classes <- levels(y) + nClasses <- length(classes) + y_num <- as.numeric(y) - 1 # 0-indexed class + } else if (task == "regression") { + y_num <- y + } else { + y_num <- rep(0, n) + } + + # Affinity/distance params + alpha <- cfg$affinityParams$alpha %||% 1 + c_p <- cfg$affinityParams$c %||% 1 + p_p <- cfg$affinityParams$p %||% 2 + Sigma_inv <- if (!is.null(cfg$affinityParams$Sigma)) { + solve(cfg$affinityParams$Sigma) + } else { + matrix(0, 0, 0) + } + + # Early stopping state + noImproveCount <- 0 + prevCount <- m + + # ================================ + # 3. Main iteration loop + # ================================ + for (iter in seq_len(cfg$maxIter)) { + A_current <- self$repertoire$as_matrix() + m <- nrow(A_current) + + # (a) Clonal selection + mutation [C++] + cs_result <- clonal_selection_iteration_cpp( + A_current, X, y_num, task_int, cfg$k, cfg$beta, + cfg$maxClones, cfg$mutationDecay, cfg$mutationMin, + iter, cfg$affinityFunc, alpha, c_p, p_p, nClasses + ) + self$repertoire$cells <- cs_result$A + + # Update labels + if (task == "classification") { + antibody_classes <- apply(cs_result$class_counts, 1, function(row) { + if (all(row == 0)) classes[sample(nClasses, 1)] + else classes[which.max(row)] + }) + } else if (task == "regression") { + antibody_values <- ifelse(cs_result$sum_aff > 0, + cs_result$sum_y / cs_result$sum_aff, + mean(y, na.rm = TRUE)) + } + + # (e) Network suppression [C++] + # TODO: Replace with idiotypic network dynamics when module is available + keep <- network_suppression_cpp( + self$repertoire$cells, cfg$distFunc, cfg$epsilon, + p_p, Sigma_inv + ) + kept_idx <- which(keep) + self$repertoire$subset(kept_idx) + m_new <- self$repertoire$size() + + if (task == "classification") { + antibody_classes <- antibody_classes[kept_idx] + } else if (task == "regression") { + antibody_values <- antibody_values[kept_idx] + } + + if (m_new == 0) { + stop("All antibodies were suppressed. Increase nAntibodies or decrease epsilon.") + } + + # Record iteration history + self$history[[iter]] <- list(n_antibodies = m_new) + + # Early stopping + changeCount <- abs(m_new - prevCount) + if (changeCount <= cfg$stopTolerance) { + noImproveCount <- noImproveCount + 1 + } else { + noImproveCount <- 0 + } + prevCount <- m_new + + if (noImproveCount >= cfg$noImprovementLimit) { + if (cfg$verbose) { + cat("Early stopping: no improvement for", noImproveCount, "iterations.\n") + } + break + } + + if (cfg$verbose) { + cat(sprintf("Iteration %d | #Antibodies: %d | noImproveCount: %d\n", + iter, m_new, noImproveCount)) + } + } + + # ================================ + # 4. Final assignment [C++] + # ================================ + A_final <- self$repertoire$as_matrix() + m <- nrow(A_final) + + if (task == "clustering") { + fa <- final_assignment_cpp(X, A_final, cfg$affinityFunc, cfg$distFunc, + 0L, alpha, c_p, p_p, Sigma_inv, + numeric(0), 0.0) + assignments <- as.numeric(factor(fa$assignments)) + self$result <- list( + antibodies = A_final, + assignments = assignments, + task = task + ) + } else if (task == "classification") { + fa <- final_assignment_cpp(X, A_final, cfg$affinityFunc, cfg$distFunc, + 1L, alpha, c_p, p_p, Sigma_inv, + numeric(0), 0.0) + assignments <- antibody_classes[fa$best_antibody_idx] + self$result <- list( + antibodies = A_final, + assignments = assignments, + antibody_classes = antibody_classes, + task = task + ) + } else { + fa <- final_assignment_cpp(X, A_final, cfg$affinityFunc, cfg$distFunc, + 2L, alpha, c_p, p_p, Sigma_inv, + antibody_values, mean(y, na.rm = TRUE)) + predictions <- fa$predictions * y_sd + y_mean + self$result <- list( + antibodies = A_final, + assignments = fa$cluster_assign, + predictions = predictions, + antibody_values = antibody_values, + overall_mean = mean(y, na.rm = TRUE), + task = task + ) + } + + invisible(self) + } + ), + + private = list( + + .initialize_antibodies = function(X, nAntibodies, method) { + n <- nrow(X) + d <- ncol(X) + switch( + method, + "sample" = X[sample.int(n, size = nAntibodies, replace = TRUE), , drop = FALSE], + "random" = { + xMean <- colMeans(X) + xSd <- apply(X, 2, sd) + 1e-8 + mat <- matrix(rnorm(nAntibodies * d), nrow = nAntibodies) + mat <- sweep(mat, 2, xSd, `*`) + sweep(mat, 2, xMean, `+`) + }, + "random_uniform" = { + xMin <- apply(X, 2, min) + xMax <- apply(X, 2, max) + mat <- matrix(runif(nAntibodies * d), nrow = nAntibodies) + sweep(sweep(mat, 2, xMax - xMin, `*`), 2, xMin, `+`) + }, + "kmeans++" = init_kmeanspp_cpp(X, nAntibodies) + ) + } + ) +) diff --git a/R/ActivationGate.R b/R/ActivationGate.R new file mode 100644 index 0000000..561226d --- /dev/null +++ b/R/ActivationGate.R @@ -0,0 +1,135 @@ +#' @title ActivationGate +#' @description Two-signal activation gate implementing the immunological +#' principle that immune cell activation requires both antigen-specific +#' recognition (Signal 1) AND costimulatory context (Signal 2). +#' +#' @details +#' Prevents spurious activation on isolated outliers. An antibody is only +#' allowed to clone if both signals exceed their thresholds. This is +#' biologically-principled regularization. +#' +#' Signal 2 options: +#' \itemize{ +#' \item \code{"density"}: Local data density around the antibody +#' \item \code{"danger"}: User-provided danger signal vector +#' \item \code{"entropy"}: Local label entropy (classification only) +#' } +#' +#' @examples +#' # Two-signal activation gate +#' data(iris) +#' X <- as.matrix(iris[, 1:4]) +#' A <- X[sample(150, 10), ] +#' rep <- ImmuneRepertoire$new(A) +#' gate <- ActivationGate$new(signal2_type = "density", threshold2 = 0.3) +#' aff <- rep$affinity_matrix(X, "gaussian") +#' activated <- gate$evaluate(aff, X, A) +#' sum(activated) # number of activated interactions +#' +#' @param signal2_type Character. "density", "danger", or "entropy". +#' @param threshold1 Numeric. Minimum affinity threshold. +#' @param threshold2 Numeric. Minimum Signal 2 threshold. +#' @param danger_signals Numeric vector. Per-data-point danger scores. +#' +#' @importFrom R6 R6Class +#' @export +ActivationGate <- R6::R6Class( + "ActivationGate", + public = list( + + #' @field signal2_type Type of costimulatory signal. + signal2_type = NULL, + + #' @field threshold1 Minimum affinity for Signal 1 (antigen recognition). + threshold1 = NULL, + + #' @field threshold2 Minimum costimulatory signal for Signal 2. + threshold2 = NULL, + + #' @field danger_signals User-provided danger signal vector (for "danger" type). + danger_signals = NULL, + + #' @description Create a new ActivationGate. + #' @param signal2_type Character. "density", "danger", or "entropy". + #' @param threshold1 Numeric. Minimum affinity threshold. + #' @param threshold2 Numeric. Minimum Signal 2 threshold. + #' @param danger_signals Numeric vector. Per-data-point danger scores. + initialize = function(signal2_type = "density", + threshold1 = 0.1, + threshold2 = 0.3, + danger_signals = NULL) { + self$signal2_type <- match.arg(signal2_type, c("density", "danger", "entropy")) + self$threshold1 <- threshold1 + self$threshold2 <- threshold2 + self$danger_signals <- danger_signals + }, + + #' @description Evaluate which antibody-data interactions pass the two-signal gate. + #' @param affinity_matrix Numeric matrix (n x m) of affinities. + #' @param X Numeric matrix of data (n x d). + #' @param A Numeric matrix of antibodies (m x d). + #' @param y Target vector or NULL. + #' @param task Character. Task type. + #' @return Logical matrix (n x m) where TRUE means the interaction is activated. + evaluate = function(affinity_matrix, X, A, y = NULL, task = "clustering") { + n <- nrow(affinity_matrix) + m <- ncol(affinity_matrix) + + # Signal 1: affinity exceeds threshold + signal1 <- affinity_matrix > self$threshold1 + + # Signal 2: costimulatory context + signal2 <- matrix(FALSE, nrow = n, ncol = m) + + if (self$signal2_type == "density") { + # Local density: for each data point, density = sum of affinities to all antibodies + density_per_point <- rowSums(affinity_matrix) + density_threshold <- quantile(density_per_point, self$threshold2) + dense_points <- density_per_point > density_threshold + signal2[dense_points, ] <- TRUE + + } else if (self$signal2_type == "danger") { + if (is.null(self$danger_signals)) { + stop("danger_signals must be provided for signal2_type='danger'") + } + danger_pass <- self$danger_signals > self$threshold2 + signal2[danger_pass, ] <- TRUE + + } else if (self$signal2_type == "entropy") { + if (is.null(y) || task != "classification") { + # Fall back to density for non-classification tasks + density_per_point <- rowSums(affinity_matrix) + density_threshold <- quantile(density_per_point, self$threshold2) + signal2[density_per_point > density_threshold, ] <- TRUE + } else { + # For each antibody, compute label entropy of nearby data + assignments <- apply(affinity_matrix, 1, which.max) + for (j in seq_len(m)) { + assigned <- which(assignments == j) + if (length(assigned) > 1) { + labels <- y[assigned] + freqs <- table(labels) / length(labels) + entropy <- -sum(freqs * log(freqs + 1e-15)) + # Lower entropy = more pure = better context + if (entropy < self$threshold2) { + signal2[assigned, j] <- TRUE + } + } + } + } + } + + # Both signals must pass + signal1 & signal2 + }, + + #' @description Print summary. + #' @param ... Not used. + print = function(...) { + cat(sprintf(" signal2='%s'\n", self$signal2_type)) + cat(sprintf(" Threshold1 (affinity): %.3f\n", self$threshold1)) + cat(sprintf(" Threshold2 (context): %.3f\n", self$threshold2)) + invisible(self) + } + ) +) diff --git a/R/ClassSwitcher.R b/R/ClassSwitcher.R new file mode 100644 index 0000000..735d3aa --- /dev/null +++ b/R/ClassSwitcher.R @@ -0,0 +1,110 @@ +#' @title ClassSwitcher +#' @description Implements antibody isotype/class switching, allowing antibodies +#' to change their matching breadth. Inspired by real B cell class switching +#' from IgM (broad, pentameric) to IgG (specific, monomeric) to IgA (mucosal). +#' +#' @details +#' In bHIVE, class switching modifies the effective affinity kernel width: +#' \itemize{ +#' \item \code{IgM}: Broad matching (large kernel width) -- good for initial +#' exploration and capturing general patterns +#' \item \code{IgG}: Specific matching (small kernel width) -- good for +#' fine-grained discrimination after patterns are identified +#' \item \code{IgA}: Boundary patrol (medium kernel width) -- good for +#' maintaining coverage at decision boundaries +#' } +#' +#' @examples +#' # Switch antibody isotypes based on microenvironment zones +#' A <- matrix(rnorm(50), nrow = 10, ncol = 5) +#' rep <- ImmuneRepertoire$new(A) +#' cs <- ClassSwitcher$new(alpha_IgM = 0.1, alpha_IgG = 5.0) +#' zones <- sample(c("stable", "explore", "boundary"), 10, replace = TRUE) +#' alphas <- cs$switch_isotypes(rep, zones) +#' table(rep$metadata$isotype) # IgM, IgG, IgA distribution +#' +#' @param alpha_IgM Numeric. Kernel width for broad matching. +#' @param alpha_IgG Numeric. Kernel width for specific matching. +#' @param alpha_IgA Numeric. Kernel width for boundary matching. +#' +#' @importFrom R6 R6Class +#' @export +ClassSwitcher <- R6::R6Class( + "ClassSwitcher", + public = list( + + #' @field alpha_IgM Kernel width for IgM mode (broad). + alpha_IgM = NULL, + + #' @field alpha_IgG Kernel width for IgG mode (specific). + alpha_IgG = NULL, + + #' @field alpha_IgA Kernel width for IgA mode (boundary). + alpha_IgA = NULL, + + #' @description Create a new ClassSwitcher. + #' @param alpha_IgM Numeric. Kernel width for broad matching. + #' @param alpha_IgG Numeric. Kernel width for specific matching. + #' @param alpha_IgA Numeric. Kernel width for boundary matching. + initialize = function(alpha_IgM = 0.1, alpha_IgG = 5.0, alpha_IgA = 1.0) { + self$alpha_IgM <- alpha_IgM + self$alpha_IgG <- alpha_IgG + self$alpha_IgA <- alpha_IgA + }, + + #' @description Determine appropriate isotype for each antibody based on + #' its microenvironment zone. + #' @param repertoire An \code{\link{ImmuneRepertoire}}. + #' @param zones Character vector from Microenvironment assessment. + #' @return Named numeric vector of alpha values per antibody. + switch_isotypes = function(repertoire, zones) { + m <- repertoire$size() + alphas <- numeric(m) + + for (i in seq_len(m)) { + current <- repertoire$metadata$isotype[i] + zone <- zones[i] + + # Switch logic + new_isotype <- switch(zone, + "stable" = "IgG", # high density -> switch to specific + "explore" = "IgM", # low density -> switch to broad + "boundary" = "IgA", # boundary -> intermediate + current # default: keep current + ) + + repertoire$metadata$isotype[i] <- new_isotype + alphas[i] <- switch(new_isotype, + "IgM" = self$alpha_IgM, + "IgG" = self$alpha_IgG, + "IgA" = self$alpha_IgA, + self$alpha_IgA # default + ) + } + + alphas + }, + + #' @description Get alpha value for a given isotype. + #' @param isotype Character. "IgM", "IgG", or "IgA". + #' @return Numeric. + get_alpha = function(isotype) { + switch(isotype, + "IgM" = self$alpha_IgM, + "IgG" = self$alpha_IgG, + "IgA" = self$alpha_IgA, + self$alpha_IgA + ) + }, + + #' @description Print summary. + #' @param ... Not used. + print = function(...) { + cat("\n") + cat(sprintf(" IgM (broad): alpha = %.3f\n", self$alpha_IgM)) + cat(sprintf(" IgG (specific): alpha = %.3f\n", self$alpha_IgG)) + cat(sprintf(" IgA (boundary): alpha = %.3f\n", self$alpha_IgA)) + invisible(self) + } + ) +) diff --git a/R/ConvergentSelector.R b/R/ConvergentSelector.R new file mode 100644 index 0000000..28b5f4e --- /dev/null +++ b/R/ConvergentSelector.R @@ -0,0 +1,125 @@ +#' @title ConvergentSelector +#' @description Identifies "public antibodies" shared across independent +#' repertoires, implementing the concept of convergent selection from TCR/BCR +#' immunology as a biologically-motivated ensemble method. +#' +#' @details +#' In real immunity, certain immune receptor sequences appear across multiple +#' individuals (public clones), suggesting they are driven by common selection +#' pressures. Similarly, antibodies that appear across multiple independent +#' bHIVE runs represent the most robust patterns in the data. +#' +#' @examples +#' # Find public antibodies across multiple runs +#' data(iris) +#' X <- as.matrix(iris[, 1:4]) +#' results <- lapply(1:3, function(i) { +#' m <- AINet$new(nAntibodies = 15, maxIter = 5, verbose = FALSE) +#' m$fit(X, task = "clustering") +#' m$result +#' }) +#' conv <- ConvergentSelector$new(tolerance = 1.0, min_appearances = 2) +#' public <- conv$from_results(results) +#' nrow(public) # consensus antibodies +#' +#' @param tolerance Numeric. Maximum distance for two antibodies to be +#' considered the same across repertoires. +#' @param min_appearances Integer. Minimum repertoires for an antibody to be public. +#' +#' @importFrom R6 R6Class +#' @export +ConvergentSelector <- R6::R6Class( + "ConvergentSelector", + public = list( + + #' @field tolerance Distance tolerance for matching antibodies across repertoires. + tolerance = NULL, + + #' @field min_appearances Minimum number of repertoires an antibody must appear + #' in to be considered "public". + min_appearances = NULL, + + #' @field public_antibodies The identified public antibodies. + public_antibodies = NULL, + + #' @description Create a new ConvergentSelector. + #' @param tolerance Numeric. Maximum distance for two antibodies to be + #' considered the same across repertoires. + #' @param min_appearances Integer. Minimum repertoires for an antibody to be public. + initialize = function(tolerance = 0.5, min_appearances = 2) { + self$tolerance <- tolerance + self$min_appearances <- min_appearances + }, + + #' @description Find public antibodies shared across multiple repertoires. + #' @param repertoires List of ImmuneRepertoire objects or list of antibody matrices. + #' @param distFunc Character. Distance function for matching. + #' @return Numeric matrix of public (consensus) antibodies. + find_public = function(repertoires, distFunc = "euclidean") { + # Convert to list of matrices + matrices <- lapply(repertoires, function(r) { + if (inherits(r, "ImmuneRepertoire")) r$as_matrix() + else if (is.list(r) && !is.null(r$antibodies)) r$antibodies + else as.matrix(r) + }) + + n_reps <- length(matrices) + if (n_reps < 2) { + warning("Convergent selection requires at least 2 repertoires.") + self$public_antibodies <- matrices[[1]] + return(self$public_antibodies) + } + + # Use first repertoire as reference + ref <- matrices[[1]] + n_ref <- nrow(ref) + + # Count appearances: for each reference antibody, how many other + # repertoires have a matching antibody within tolerance? + appearances <- rep(1L, n_ref) # 1 for appearing in the reference itself + + for (r in 2:n_reps) { + other <- matrices[[r]] + # Distance from reference to other + D <- compute_distance_matrix(ref, other, distFunc, 2.0, matrix(0, 0, 0)) + # For each reference antibody, check if any other antibody is within tolerance + min_dist <- apply(D, 1, min) + appearances <- appearances + as.integer(min_dist <= self$tolerance) + } + + # Keep antibodies appearing in >= min_appearances repertoires + public_idx <- which(appearances >= self$min_appearances) + + if (length(public_idx) == 0) { + # Fall back: return antibodies from first repertoire + warning("No public antibodies found. Try increasing tolerance or ", + "decreasing min_appearances.") + self$public_antibodies <- ref + } else { + self$public_antibodies <- ref[public_idx, , drop = FALSE] + } + + self$public_antibodies + }, + + #' @description Run convergent selection from multiple bHIVE results. + #' @param results List of bHIVE result objects (each with $antibodies). + #' @param distFunc Character. Distance function. + #' @return Numeric matrix of consensus antibodies. + from_results = function(results, distFunc = "euclidean") { + self$find_public(results, distFunc) + }, + + #' @description Print summary. + #' @param ... Not used. + print = function(...) { + cat("\n") + cat(sprintf(" Tolerance: %.3f\n", self$tolerance)) + cat(sprintf(" Min appearances: %d\n", self$min_appearances)) + if (!is.null(self$public_antibodies)) { + cat(sprintf(" Public antibodies: %d\n", nrow(self$public_antibodies))) + } + invisible(self) + } + ) +) diff --git a/R/GerminalCenter.R b/R/GerminalCenter.R new file mode 100644 index 0000000..ea3d685 --- /dev/null +++ b/R/GerminalCenter.R @@ -0,0 +1,165 @@ +#' @title GerminalCenter +#' @description Models T follicular helper (Tfh) cell selection pressure on +#' B cells within a germinal center reaction. Implements resource competition +#' where antibodies compete for Tfh help, and only helped antibodies survive. +#' +#' @details +#' The germinal center is where B cells undergo affinity maturation through +#' iterative cycles of mutation and selection. Tfh cells act as quality-control +#' selectors: +#' +#' \itemize{ +#' \item Each Tfh evaluates B cell (antibody) quality using a task-aware metric +#' \item B cells compete for Tfh help (resource competition) +#' \item Only helped B cells survive to the next round +#' \item Selection pressure controls the stringency of the process +#' } +#' +#' @examples +#' # Germinal center selection on Iris +#' data(iris) +#' X <- as.matrix(iris[, 1:4]) +#' gc <- GerminalCenter$new(nTfh = 5, selectionPressure = 0.5) +#' rep <- ImmuneRepertoire$new(X[sample(150, 20), ]) +#' gc$select(rep, X, iris$Species, "classification") +#' rep$size() # fewer antibodies after selection +#' +#' @param nTfh Integer. Number of Tfh helper cells. Each helps one B cell. +#' @param selectionPressure Numeric [0,1]. Stringency of selection. +#' @param rounds Integer. Number of competition rounds. +#' +#' @importFrom R6 R6Class +#' @export +GerminalCenter <- R6::R6Class( + "GerminalCenter", + public = list( + + #' @field nTfh Number of Tfh selectors (determines how many antibodies survive). + nTfh = NULL, + + #' @field selectionPressure Numeric [0,1]. 0 = no selection (all survive), + #' 1 = only the very best survive. + selectionPressure = NULL, + + #' @field rounds Number of selection rounds per call. + rounds = NULL, + + #' @description Create a new GerminalCenter. + #' @param nTfh Integer. Number of Tfh helper cells. Each helps one B cell. + #' @param selectionPressure Numeric [0,1]. Stringency of selection. + #' @param rounds Integer. Number of competition rounds. + initialize = function(nTfh = 10, selectionPressure = 0.5, rounds = 1) { + stopifnot( + "nTfh must be a positive integer" = is.numeric(nTfh) && nTfh >= 1, + "selectionPressure must be in [0, 1]" = is.numeric(selectionPressure) && selectionPressure >= 0 && selectionPressure <= 1, + "rounds must be a positive integer" = is.numeric(rounds) && rounds >= 1 + ) + self$nTfh <- as.integer(nTfh) + self$selectionPressure <- selectionPressure + self$rounds <- as.integer(rounds) + }, + + #' @description Run germinal center selection on a repertoire. + #' @param repertoire An \code{\link{ImmuneRepertoire}} object. + #' @param X Numeric matrix of training data. + #' @param y Target vector (factor or numeric) or NULL for clustering. + #' @param task Character: "clustering", "classification", or "regression". + #' @param affinityFunc Character. Affinity function for evaluation. + #' @param affinityParams List. Parameters for affinity function. + #' @return Invisible self. Repertoire modified in place. + select = function(repertoire, X, y = NULL, task = "clustering", + affinityFunc = "gaussian", + affinityParams = list(alpha = 1, c = 1, p = 2)) { + + for (round in seq_len(self$rounds)) { + m <- repertoire$size() + if (m <= self$nTfh) break # Nothing to select + + A <- repertoire$as_matrix() + + # Compute quality score for each antibody + scores <- private$.compute_quality(A, X, y, task, affinityFunc, affinityParams) + + # Selection: top-scoring antibodies get Tfh help + # The number surviving depends on selectionPressure + n_survive <- max(1, round(m * (1 - self$selectionPressure) + self$nTfh * self$selectionPressure)) + n_survive <- min(n_survive, m) + + # Probabilistic selection weighted by scores + scores_pos <- scores - min(scores) + 1e-10 + probs <- scores_pos / sum(scores_pos) + survived <- sort(unique(sample.int(m, size = n_survive, replace = FALSE, prob = probs))) + + repertoire$subset(survived) + } + + invisible(self) + }, + + #' @description Print summary. + #' @param ... Not used. + print = function(...) { + cat("\n") + cat(sprintf(" nTfh: %d\n", self$nTfh)) + cat(sprintf(" Selection pressure: %.2f\n", self$selectionPressure)) + cat(sprintf(" Rounds: %d\n", self$rounds)) + invisible(self) + } + ), + + private = list( + + .compute_quality = function(A, X, y, task, affinityFunc, affinityParams) { + m <- nrow(A) + + # Compute affinity matrix (n x m) + aff <- compute_affinity_matrix( + X, A, affinityFunc, + alpha = affinityParams$alpha %||% 1, + c = affinityParams$c %||% 1, + p = affinityParams$p %||% 2 + ) + + if (task == "clustering") { + # Quality = average affinity to assigned data points + # Each data point assigned to nearest antibody + assignments <- apply(aff, 1, which.max) + scores <- numeric(m) + for (j in seq_len(m)) { + assigned <- which(assignments == j) + scores[j] <- if (length(assigned) > 0) mean(aff[assigned, j]) else 0 + } + + } else if (task == "classification") { + # Quality = classification accuracy of assigned points + assignments <- apply(aff, 1, which.max) + scores <- numeric(m) + # Each antibody gets the most common class label + for (j in seq_len(m)) { + assigned <- which(assignments == j) + if (length(assigned) > 0) { + labels <- y[assigned] + # Score = proportion correctly matching the majority class + scores[j] <- max(table(labels)) / length(labels) + } + } + + } else { + # Regression: quality = negative MSE + assignments <- apply(aff, 1, which.max) + scores <- numeric(m) + for (j in seq_len(m)) { + assigned <- which(assignments == j) + if (length(assigned) > 0) { + pred <- mean(y[assigned]) + scores[j] <- -mean((y[assigned] - pred)^2) + } + } + # Shift to positive for probability weighting + scores <- scores - min(scores) + 1e-10 + } + + scores + } + ) +) diff --git a/R/IdiotypicNetwork.R b/R/IdiotypicNetwork.R new file mode 100644 index 0000000..88b331d --- /dev/null +++ b/R/IdiotypicNetwork.R @@ -0,0 +1,173 @@ +#' @title IdiotypicNetwork +#' @description Implements Jerne's idiotypic network theory for antibody +#' repertoire regulation. Replaces crude epsilon-threshold suppression with +#' principled network dynamics based on Varela & Coutinho's (1991) second- +#' generation immune network model. +#' +#' @details +#' The idiotypic network models antibody-antibody interactions where each +#' antibody's variable region can be recognized by other antibodies. This +#' creates a regulatory network with emergent properties: +#' +#' - A bell-shaped (double-threshold) activation function: too little +#' stimulation leads to cell death, moderate stimulation to activation, +#' and excessive stimulation to suppression. +#' - Population dynamics with source, decay, activation, and suppression terms. +#' - Self-organized repertoire structure with memory and tolerance properties. +#' +#' This is the single most novel contribution of the overhauled bHIVE package. +#' No existing AIS implementation uses idiotypic network dynamics for +#' repertoire regulation. +#' +#' @examples +#' # Create and run idiotypic regulation +#' idi <- IdiotypicNetwork$new(theta_low = 0.01, theta_high = 0.5) +#' A <- matrix(rnorm(50), nrow = 10, ncol = 5) +#' rep <- ImmuneRepertoire$new(A) +#' idi$regulate(rep, "gaussian", list(alpha = 0.5)) +#' print(idi) +#' +#' @param theta_low Lower activation threshold. +#' @param theta_high Upper activation threshold. +#' @param source_rate Basal cell production rate. +#' @param decay_rate Natural decay rate. +#' @param dt Euler integration time step. +#' @param timeSteps Number of dynamics simulation steps. +#' @param survival_threshold Minimum population to survive. +#' +#' @importFrom R6 R6Class +#' @export +IdiotypicNetwork <- R6::R6Class( + "IdiotypicNetwork", + public = list( + + #' @field theta_low Lower activation threshold. Below this, cells die. + theta_low = NULL, + + #' @field theta_high Upper activation threshold. Above this, cells are suppressed. + theta_high = NULL, + + #' @field source_rate Rate of new cell generation (basal production). + source_rate = NULL, + + #' @field decay_rate Natural cell death rate. + decay_rate = NULL, + + #' @field dt Time step for Euler integration. + dt = NULL, + + #' @field timeSteps Number of simulation time steps. + timeSteps = NULL, + + #' @field survival_threshold Minimum population level to survive. + survival_threshold = NULL, + + #' @field last_dynamics Result from the last regulation step. + last_dynamics = NULL, + + #' @description Create a new IdiotypicNetwork regulator. + #' @param theta_low Lower activation threshold. + #' @param theta_high Upper activation threshold. + #' @param source_rate Basal cell production rate. + #' @param decay_rate Natural decay rate. + #' @param dt Euler integration time step. + #' @param timeSteps Number of dynamics simulation steps. + #' @param survival_threshold Minimum population to survive. + initialize = function(theta_low = 0.01, + theta_high = 0.5, + source_rate = 0.5, + decay_rate = 0.1, + dt = 0.1, + timeSteps = 20, + survival_threshold = 0.5) { + stopifnot( + "theta_low must be non-negative" = is.numeric(theta_low) && theta_low >= 0, + "theta_high must be > theta_low" = is.numeric(theta_high) && theta_high > theta_low, + "source_rate must be non-negative" = is.numeric(source_rate) && source_rate >= 0, + "decay_rate must be non-negative" = is.numeric(decay_rate) && decay_rate >= 0, + "dt must be positive" = is.numeric(dt) && dt > 0, + "timeSteps must be a positive integer" = is.numeric(timeSteps) && timeSteps >= 1, + "survival_threshold must be non-negative" = is.numeric(survival_threshold) && survival_threshold >= 0 + ) + self$theta_low <- theta_low + self$theta_high <- theta_high + self$source_rate <- source_rate + self$decay_rate <- decay_rate + self$dt <- dt + self$timeSteps <- as.integer(timeSteps) + self$survival_threshold <- survival_threshold + }, + + #' @description Run idiotypic network dynamics on an antibody repertoire. + #' @param repertoire An \code{\link{ImmuneRepertoire}} object. + #' @param affinityFunc Character. Affinity function for Ab-Ab interactions. + #' @param affinityParams List. Parameters for the affinity function. + #' @return Invisible self. The repertoire is modified in place (dead + #' antibodies removed). Access \code{$last_dynamics} for full results. + regulate = function(repertoire, affinityFunc = "gaussian", + affinityParams = list(alpha = 1, c = 1, p = 2)) { + A <- repertoire$as_matrix() + + result <- idiotypic_dynamics_cpp( + A, + affinityFunc, + affinityParams$alpha %||% 1, + affinityParams$c %||% 1, + affinityParams$p %||% 2, + self$theta_low, + self$theta_high, + self$source_rate, + self$decay_rate, + self$dt, + self$timeSteps, + self$survival_threshold + ) + + self$last_dynamics <- result + + # Apply regulation: remove dead antibodies + kept_idx <- which(result$keep) + if (length(kept_idx) == 0) { + warning("Idiotypic regulation removed all antibodies. ", + "Consider adjusting thresholds (theta_low, theta_high).") + } else { + repertoire$subset(kept_idx) + } + + invisible(self) + }, + + #' @description Get the Ab-Ab affinity matrix from the last regulation. + #' @return Numeric matrix (m x m) or NULL if not yet run. + get_network = function() { + if (is.null(self$last_dynamics)) return(NULL) + self$last_dynamics$Ab_Ab_affinity + }, + + #' @description Get population levels from the last regulation. + #' @return Numeric vector or NULL if not yet run. + get_population = function() { + if (is.null(self$last_dynamics)) return(NULL) + self$last_dynamics$population + }, + + #' @description Print summary. + #' @param ... Not used. + print = function(...) { + cat("\n") + cat(sprintf(" Activation window: [%.3f, %.3f]\n", + self$theta_low, self$theta_high)) + cat(sprintf(" Dynamics: %d steps (dt=%.3f)\n", + self$timeSteps, self$dt)) + cat(sprintf(" Source: %.3f Decay: %.3f Survival: %.3f\n", + self$source_rate, self$decay_rate, self$survival_threshold)) + if (!is.null(self$last_dynamics)) { + pop <- self$last_dynamics$population + kept <- sum(self$last_dynamics$keep) + cat(sprintf(" Last run: %d/%d antibodies survived\n", + kept, length(pop))) + } + invisible(self) + } + ) +) diff --git a/R/ImmuneAlgorithm.R b/R/ImmuneAlgorithm.R new file mode 100644 index 0000000..bcb554e --- /dev/null +++ b/R/ImmuneAlgorithm.R @@ -0,0 +1,124 @@ +#' @title ImmuneAlgorithm +#' @description Abstract R6 base class for all immune-inspired algorithms. +#' Subclasses must implement the \code{fit} method. +#' +#' @param config Named list of hyperparameters. +#' @param modules Named list of module instances. +#' +#' @examples +#' # ImmuneAlgorithm is abstract; use AINet for concrete instances +#' algo <- ImmuneAlgorithm$new() +#' print(algo) +#' +#' @importFrom R6 R6Class +#' @export +ImmuneAlgorithm <- R6::R6Class( + "ImmuneAlgorithm", + public = list( + + #' @field repertoire An \code{\link{ImmuneRepertoire}} object. + repertoire = NULL, + + #' @field config Named list of algorithm hyperparameters. + config = NULL, + + #' @field modules Named list of injected module instances + #' (SHMEngine, IdiotypicNetwork, GerminalCenter, etc.). + modules = NULL, + + #' @field history List of per-iteration metrics. + history = NULL, + + #' @field result The result from the last call to \code{fit()}. + result = NULL, + + #' @description Create a new ImmuneAlgorithm. + #' @param config Named list of hyperparameters. + #' @param modules Named list of module instances. + initialize = function(config = list(), modules = list()) { + self$config <- config + self$modules <- modules + self$history <- list() + }, + + #' @description Fit the algorithm to data. Must be overridden by subclasses. + #' @param X Numeric matrix (n x d). + #' @param y Optional target vector (factor or numeric). + #' @param task Character: "clustering", "classification", or "regression". + #' @param ... Additional arguments. + #' @return The algorithm object (invisibly), with \code{result} populated. + fit = function(X, y = NULL, task = NULL, ...) { + stop("ImmuneAlgorithm$fit() is abstract and must be overridden by subclasses.") + }, + + #' @description Predict on new data using the trained repertoire. + #' @param newdata Numeric matrix (n_new x d). + #' @return Predictions (class labels, numeric values, or cluster IDs). + predict = function(newdata) { + if (is.null(self$result)) { + stop("Model has not been fitted yet. Call $fit() first.") + } + newdata <- as.matrix(newdata) + + task <- self$result$task + A <- self$repertoire$as_matrix() + cfg <- self$config + + alpha <- cfg$affinityParams$alpha %||% 1 + c_p <- cfg$affinityParams$c %||% 1 + p_p <- cfg$affinityParams$p %||% 2 + Sigma_inv <- if (!is.null(cfg$affinityParams$Sigma)) { + solve(cfg$affinityParams$Sigma) + } else { + matrix(0, 0, 0) + } + + fa <- final_assignment_cpp( + newdata, A, + cfg$affinityFunc %||% "gaussian", + cfg$distFunc %||% "euclidean", + switch(task, clustering = 0L, classification = 1L, regression = 2L), + alpha, c_p, p_p, Sigma_inv, + self$result$antibody_values %||% numeric(0), + self$result$overall_mean %||% 0.0 + ) + + if (task == "clustering") { + return(fa$assignments) + } else if (task == "classification") { + classes <- self$result$antibody_classes + return(classes[fa$best_antibody_idx]) + } else { + return(fa$predictions) + } + }, + + #' @description Print summary of the algorithm. + #' @param ... Not used. + print = function(...) { + cat(sprintf("<%s>\n", class(self)[1])) + if (!is.null(self$repertoire)) { + cat(sprintf(" Repertoire: %d antibodies x %d features\n", + self$repertoire$size(), self$repertoire$n_features())) + } + if (!is.null(self$result)) { + cat(sprintf(" Task: %s\n", self$result$task)) + cat(sprintf(" Iterations: %d\n", length(self$history))) + } else { + cat(" (not yet fitted)\n") + } + invisible(self) + }, + + #' @description Get a summary of the fitting history. + #' @return Data frame of per-iteration metrics. + summary = function() { + if (length(self$history) == 0) return(NULL) + do.call(rbind, lapply(seq_along(self$history), function(i) { + h <- self$history[[i]] + data.frame(iteration = i, n_antibodies = h$n_antibodies, + stringsAsFactors = FALSE) + })) + } + ) +) diff --git a/R/ImmuneRepertoire.R b/R/ImmuneRepertoire.R new file mode 100644 index 0000000..53c013e --- /dev/null +++ b/R/ImmuneRepertoire.R @@ -0,0 +1,181 @@ +#' @title ImmuneRepertoire +#' @description R6 class representing a collection of antibodies (immune cells) +#' with associated metadata. Core data structure for bHIVE algorithms. +#' +#' @details +#' An ImmuneRepertoire holds a matrix of antibody vectors (each row is one +#' antibody in feature space) plus per-antibody metadata (isotype, state, age, +#' lineage). All heavy computation is dispatched to C++ via RcppArmadillo. +#' +#' @examples +#' # Create a repertoire from random antibodies +#' A <- matrix(rnorm(50), nrow = 10, ncol = 5) +#' rep <- ImmuneRepertoire$new(A) +#' print(rep) +#' +#' # Compute affinity to data +#' X <- matrix(rnorm(100), nrow = 20, ncol = 5) +#' aff <- rep$affinity_matrix(X, "gaussian", list(alpha = 1)) +#' dim(aff) # 20 x 10 +#' +#' # Network suppression +#' rep$suppress(epsilon = 1.5, method = "euclidean") +#' rep$size() # fewer antibodies after suppression +#' +#' @param cells Numeric matrix (nAntibodies x nFeatures). +#' @param metadata Optional data frame with columns: isotype, state, age, lineage. +#' +#' @importFrom R6 R6Class +#' @export +ImmuneRepertoire <- R6::R6Class( + "ImmuneRepertoire", + public = list( + + #' @field cells Numeric matrix (nAntibodies x nFeatures). + cells = NULL, + + #' @field metadata Data frame with per-antibody attributes. + metadata = NULL, + + #' @description Create a new ImmuneRepertoire. + #' @param cells Numeric matrix (nAntibodies x nFeatures). + #' @param metadata Optional data frame with columns: isotype, state, age, lineage. + initialize = function(cells, metadata = NULL) { + cells <- as.matrix(cells) + stopifnot(is.numeric(cells), length(dim(cells)) == 2) + self$cells <- cells + + m <- nrow(cells) + if (is.null(metadata)) { + self$metadata <- data.frame( + isotype = rep("IgM", m), + state = rep("naive", m), + age = rep(0L, m), + lineage = rep(NA_character_, m), + stringsAsFactors = FALSE + ) + } else { + stopifnot(is.data.frame(metadata), nrow(metadata) == m) + self$metadata <- metadata + } + }, + + #' @description Compute affinity matrix between data X and antibodies. + #' @param X Numeric matrix (n x d) of data points. + #' @param method Affinity function: "gaussian", "laplace", "polynomial", + #' "cosine", "hamming". + #' @param params List with alpha, c, p parameters. + #' @return Numeric matrix (n x m) of affinity values. + affinity_matrix = function(X, method = "gaussian", + params = list(alpha = 1, c = 1, p = 2)) { + X <- as.matrix(X) + compute_affinity_matrix( + X, self$cells, method, + alpha = params$alpha %||% 1, + c = params$c %||% 1, + p = params$p %||% 2 + ) + }, + + #' @description Compute distance matrix between data X and antibodies. + #' @param X Numeric matrix (n x d). + #' @param method Distance function: "euclidean", "manhattan", "minkowski", + #' "cosine", "mahalanobis", "hamming". + #' @param params List with p, Sigma parameters. + #' @return Numeric matrix (n x m) of distances. + distance_matrix = function(X, method = "euclidean", + params = list(p = 2, Sigma = NULL)) { + X <- as.matrix(X) + Sigma_inv <- if (!is.null(params$Sigma)) solve(params$Sigma) else matrix(0, 0, 0) + compute_distance_matrix( + X, self$cells, method, + p = params$p %||% 2, + Sigma_inv = Sigma_inv + ) + }, + + #' @description Network suppression: remove redundant antibodies. + #' @param epsilon Distance threshold for suppression. + #' @param method Distance function for suppression. + #' @param params List with p, Sigma parameters. + #' @return Invisible self (modified in place). + suppress = function(epsilon, method = "euclidean", + params = list(p = 2, Sigma = NULL)) { + Sigma_inv <- if (!is.null(params$Sigma)) solve(params$Sigma) else matrix(0, 0, 0) + keep <- network_suppression_cpp( + self$cells, method, epsilon, + p = params$p %||% 2, + Sigma_inv = Sigma_inv + ) + kept_idx <- which(keep) + self$cells <- self$cells[kept_idx, , drop = FALSE] + self$metadata <- self$metadata[kept_idx, , drop = FALSE] + invisible(self) + }, + + #' @description Get number of antibodies. + #' @return Integer. + size = function() nrow(self$cells), + + #' @description Get number of features. + #' @return Integer. + n_features = function() ncol(self$cells), + + #' @description Subset the repertoire. + #' @param idx Integer vector of row indices to keep. + #' @return Invisible self (modified in place). + subset = function(idx) { + self$cells <- self$cells[idx, , drop = FALSE] + self$metadata <- self$metadata[idx, , drop = FALSE] + invisible(self) + }, + + #' @description Add antibodies to the repertoire. + #' @param new_cells Numeric matrix (k x d) of new antibodies. + #' @param new_metadata Optional data frame of metadata for new antibodies. + #' @return Invisible self (modified in place). + add = function(new_cells, new_metadata = NULL) { + new_cells <- as.matrix(new_cells) + stopifnot(ncol(new_cells) == self$n_features()) + k <- nrow(new_cells) + if (is.null(new_metadata)) { + new_metadata <- data.frame( + isotype = rep("IgM", k), + state = rep("naive", k), + age = rep(0L, k), + lineage = rep(NA_character_, k), + stringsAsFactors = FALSE + ) + } + self$cells <- rbind(self$cells, new_cells) + self$metadata <- rbind(self$metadata, new_metadata) + invisible(self) + }, + + #' @description Increment age of all antibodies. + #' @return Invisible self (modified in place). + age_all = function() { + self$metadata$age <- self$metadata$age + 1L + invisible(self) + }, + + #' @description Convert to plain matrix. + #' @return Numeric matrix (nAntibodies x nFeatures). + as_matrix = function() self$cells, + + #' @description Print summary. + #' @param ... Not used. + print = function(...) { + cat(sprintf(" %d antibodies x %d features\n", + self$size(), self$n_features())) + if (self$size() > 0) { + cat(" Isotypes:", paste(table(self$metadata$isotype), collapse=", "), "\n") + cat(" States: ", paste(table(self$metadata$state), collapse=", "), "\n") + } + invisible(self) + } + ), + + private = list( + ) +) diff --git a/R/MemoryPool.R b/R/MemoryPool.R new file mode 100644 index 0000000..0ec0e33 --- /dev/null +++ b/R/MemoryPool.R @@ -0,0 +1,147 @@ +#' @title MemoryPool +#' @description Manages long-lived memory cells that can be recalled when +#' distribution shifts are detected. Implements the immunological distinction +#' between short-lived effector cells and long-lived memory cells. +#' +#' @examples +#' # Archive and recall memory cells +#' data(iris) +#' X <- as.matrix(iris[, 1:4]) +#' A <- X[sample(150, 10), ] +#' mp <- MemoryPool$new(archive_threshold = 0.01) +#' rep <- ImmuneRepertoire$new(A) +#' mp$archive(rep, X) +#' mp$size() # number of archived memories +#' recalled <- mp$recall(X[1:5, ]) +#' nrow(recalled) # memories relevant to query +#' +#' @param archive_threshold Numeric. Minimum average affinity to archive. +#' @param max_memory Integer. Maximum memory cells. +#' @param recall_threshold Numeric. Minimum affinity to recall a memory. +#' +#' @importFrom R6 R6Class +#' @export +MemoryPool <- R6::R6Class( + "MemoryPool", + public = list( + + #' @field memory_cells Numeric matrix of archived memory antibodies. + memory_cells = NULL, + + #' @field memory_metadata Data frame of metadata for memory cells. + memory_metadata = NULL, + + #' @field archive_threshold Affinity threshold for archiving (only high-quality + #' antibodies become memory). + archive_threshold = NULL, + + #' @field max_memory Maximum number of memory cells to store. + max_memory = NULL, + + #' @field recall_threshold Threshold for triggering memory recall. + recall_threshold = NULL, + + #' @description Create a new MemoryPool. + #' @param archive_threshold Numeric. Minimum average affinity to archive. + #' @param max_memory Integer. Maximum memory cells. + #' @param recall_threshold Numeric. Minimum affinity to recall a memory. + initialize = function(archive_threshold = 0.5, + max_memory = 100, + recall_threshold = 0.3) { + self$archive_threshold <- archive_threshold + self$max_memory <- max_memory + self$recall_threshold <- recall_threshold + self$memory_cells <- NULL + self$memory_metadata <- NULL + }, + + #' @description Archive high-performing antibodies as memory cells. + #' @param repertoire An \code{\link{ImmuneRepertoire}}. + #' @param X Training data matrix. + #' @param affinityFunc Character. Affinity function. + #' @param affinityParams List. Affinity parameters. + #' @return Integer. Number of new memory cells archived. + archive = function(repertoire, X, affinityFunc = "gaussian", + affinityParams = list(alpha = 1, c = 1, p = 2)) { + A <- repertoire$as_matrix() + aff <- compute_affinity_matrix( + X, A, affinityFunc, + alpha = affinityParams$alpha %||% 1, + c = affinityParams$c %||% 1, + p = affinityParams$p %||% 2 + ) + + # Average affinity per antibody + avg_aff <- colMeans(aff) + candidates <- which(avg_aff >= self$archive_threshold) + + if (length(candidates) == 0) return(0L) + + new_memory <- A[candidates, , drop = FALSE] + new_meta <- repertoire$metadata[candidates, , drop = FALSE] + new_meta$state <- "memory" + + if (is.null(self$memory_cells)) { + self$memory_cells <- new_memory + self$memory_metadata <- new_meta + } else { + self$memory_cells <- rbind(self$memory_cells, new_memory) + self$memory_metadata <- rbind(self$memory_metadata, new_meta) + } + + # Trim to max_memory (keep most recent) + n_mem <- nrow(self$memory_cells) + if (n_mem > self$max_memory) { + keep <- seq(n_mem - self$max_memory + 1, n_mem) + self$memory_cells <- self$memory_cells[keep, , drop = FALSE] + self$memory_metadata <- self$memory_metadata[keep, , drop = FALSE] + } + + length(candidates) + }, + + #' @description Recall memory cells relevant to current data. + #' @param X Data matrix to match against memory. + #' @param affinityFunc Character. Affinity function. + #' @param affinityParams List. Affinity parameters. + #' @return Numeric matrix of recalled memory cells (may be empty). + recall = function(X, affinityFunc = "gaussian", + affinityParams = list(alpha = 1, c = 1, p = 2)) { + if (is.null(self$memory_cells) || nrow(self$memory_cells) == 0) { + return(matrix(0, nrow = 0, ncol = ncol(as.matrix(X)))) + } + + aff <- compute_affinity_matrix( + X, self$memory_cells, affinityFunc, + alpha = affinityParams$alpha %||% 1, + c = affinityParams$c %||% 1, + p = affinityParams$p %||% 2 + ) + + # Recall memories that are relevant (high average affinity to current data) + avg_aff <- colMeans(aff) + relevant <- which(avg_aff >= self$recall_threshold) + + if (length(relevant) == 0) { + return(matrix(0, nrow = 0, ncol = ncol(self$memory_cells))) + } + + self$memory_cells[relevant, , drop = FALSE] + }, + + #' @description Get current memory pool size. + #' @return Integer. + size = function() { + if (is.null(self$memory_cells)) 0L else nrow(self$memory_cells) + }, + + #' @description Print summary. + #' @param ... Not used. + print = function(...) { + cat(sprintf(" %d cells (max %d)\n", self$size(), self$max_memory)) + cat(sprintf(" Archive threshold: %.3f\n", self$archive_threshold)) + cat(sprintf(" Recall threshold: %.3f\n", self$recall_threshold)) + invisible(self) + } + ) +) diff --git a/R/Microenvironment.R b/R/Microenvironment.R new file mode 100644 index 0000000..92582a8 --- /dev/null +++ b/R/Microenvironment.R @@ -0,0 +1,161 @@ +#' @title Microenvironment +#' @description Models local microenvironment cues that influence antibody +#' behavior based on the density and structure of nearby data points. +#' +#' @details +#' In real immunity, B cell fate is strongly influenced by local signals: +#' chemokines, cytokines, and interactions with stromal cells in specific +#' tissue microenvironments. This module translates that concept into +#' density-dependent adaptation: +#' +#' \itemize{ +#' \item \strong{High density zones}: Promote memory formation (stabilize +#' antibodies, reduce mutation rate) +#' \item \strong{Low density zones}: Promote exploration (increase mutation +#' rate for broader search) +#' \item \strong{Boundary zones}: Trigger class switching (change matching +#' breadth between IgM-like broad and IgG-like specific modes) +#' \item \strong{Chemokine-like gradients}: Bias mutation direction toward +#' higher-density regions +#' } +#' +#' @examples +#' # Assess microenvironment around antibodies +#' data(iris) +#' X <- as.matrix(iris[, 1:4]) +#' me <- Microenvironment$new() +#' rep <- ImmuneRepertoire$new(X[sample(150, 15), ]) +#' env <- me$assess(rep, X) +#' table(env$zones) # stable, explore, boundary +#' env$mutation_modifiers # per-antibody rate scaling +#' +#' @param density_bandwidth Numeric. KDE bandwidth (NULL for auto). +#' @param high_density_threshold Numeric [0,1]. Percentile threshold for stabilization. +#' @param low_density_threshold Numeric [0,1]. Percentile threshold for exploration. +#' @param stabilization_factor Numeric. Mutation rate multiplier for stable zones. +#' @param exploration_factor Numeric. Mutation rate multiplier for exploration zones. +#' +#' @importFrom R6 R6Class +#' @export +Microenvironment <- R6::R6Class( + "Microenvironment", + public = list( + + #' @field density_bandwidth Bandwidth for kernel density estimation. + density_bandwidth = NULL, + + #' @field high_density_threshold Density percentile above which antibodies stabilize. + high_density_threshold = NULL, + + #' @field low_density_threshold Density percentile below which antibodies explore. + low_density_threshold = NULL, + + #' @field stabilization_factor Mutation rate multiplier for high-density zones. + stabilization_factor = NULL, + + #' @field exploration_factor Mutation rate multiplier for low-density zones. + exploration_factor = NULL, + + #' @field last_densities Per-antibody local density from last assessment. + last_densities = NULL, + + #' @field last_zones Per-antibody zone classification from last assessment. + last_zones = NULL, + + #' @description Create a new Microenvironment module. + #' @param density_bandwidth Numeric. KDE bandwidth (NULL for auto). + #' @param high_density_threshold Numeric [0,1]. Percentile threshold for stabilization. + #' @param low_density_threshold Numeric [0,1]. Percentile threshold for exploration. + #' @param stabilization_factor Numeric. Mutation rate multiplier for stable zones. + #' @param exploration_factor Numeric. Mutation rate multiplier for exploration zones. + initialize = function(density_bandwidth = NULL, + high_density_threshold = 0.75, + low_density_threshold = 0.25, + stabilization_factor = 0.3, + exploration_factor = 2.0) { + self$density_bandwidth <- density_bandwidth + self$high_density_threshold <- high_density_threshold + self$low_density_threshold <- low_density_threshold + self$stabilization_factor <- stabilization_factor + self$exploration_factor <- exploration_factor + }, + + #' @description Assess the microenvironment around each antibody. + #' @param repertoire An \code{\link{ImmuneRepertoire}} object. + #' @param X Numeric matrix of training data (n x d). + #' @param affinityFunc Character. Affinity function. + #' @param affinityParams List. Affinity parameters. + #' @return Named list with densities, zones, and mutation_modifiers per antibody. + assess = function(repertoire, X, affinityFunc = "gaussian", + affinityParams = list(alpha = 1, c = 1, p = 2)) { + A <- repertoire$as_matrix() + m <- nrow(A) + n <- nrow(X) + + # Compute affinity to data points + aff <- compute_affinity_matrix( + X, A, affinityFunc, + alpha = affinityParams$alpha %||% 1, + c = affinityParams$c %||% 1, + p = affinityParams$p %||% 2 + ) + + # Local density: sum of affinities to all data points (KDE-like) + densities <- colSums(aff) # m-length vector + + # Compute percentile thresholds + low_thresh <- quantile(densities, self$low_density_threshold) + high_thresh <- quantile(densities, self$high_density_threshold) + + # Classify zones + zones <- ifelse(densities >= high_thresh, "stable", + ifelse(densities <= low_thresh, "explore", "boundary")) + + # Compute mutation rate modifiers + mutation_modifiers <- rep(1.0, m) + mutation_modifiers[zones == "stable"] <- self$stabilization_factor + mutation_modifiers[zones == "explore"] <- self$exploration_factor + mutation_modifiers[zones == "boundary"] <- 1.0 # neutral + + # Compute gradient direction (mean direction toward data from each antibody) + # Weighted by affinity -- chemokine-like attraction + gradients <- matrix(0, nrow = m, ncol = ncol(A)) + for (j in seq_len(m)) { + weights <- aff[, j] + total_w <- sum(weights) + if (total_w > 0) { + gradients[j, ] <- colSums(sweep(X, 2, A[j, ], `-`) * weights) / total_w + } + } + + # Update metadata states + repertoire$metadata$state[zones == "stable"] <- "memory" + repertoire$metadata$state[zones == "explore"] <- "activated" + repertoire$metadata$state[zones == "boundary"] <- "activated" + + self$last_densities <- densities + self$last_zones <- zones + + list( + densities = densities, + zones = zones, + mutation_modifiers = mutation_modifiers, + gradients = gradients + ) + }, + + #' @description Print summary. + #' @param ... Not used. + print = function(...) { + cat("\n") + cat(sprintf(" Density thresholds: [%.2f, %.2f] percentile\n", + self$low_density_threshold, self$high_density_threshold)) + cat(sprintf(" Stabilization: %.2fx Exploration: %.2fx\n", + self$stabilization_factor, self$exploration_factor)) + if (!is.null(self$last_zones)) { + cat(" Last assessment:", paste(table(self$last_zones), collapse=", "), "\n") + } + invisible(self) + } + ) +) diff --git a/R/RcppExports.R b/R/RcppExports.R new file mode 100644 index 0000000..9a8c45b --- /dev/null +++ b/R/RcppExports.R @@ -0,0 +1,39 @@ +# Generated by using Rcpp::compileAttributes() -> do not edit by hand +# Generator token: 10BE3573-1514-4C36-9D1C-5A225CD40393 + +compute_affinity_matrix <- function(X, A, affinity_type, alpha = 1.0, c = 1.0, p = 2.0) { + .Call(`_bHIVE_compute_affinity_matrix`, X, A, affinity_type, alpha, c, p) +} + +compute_distance_matrix <- function(X, A, dist_type, p, Sigma_inv) { + .Call(`_bHIVE_compute_distance_matrix`, X, A, dist_type, p, Sigma_inv) +} + +compute_pairwise_distance <- function(A, dist_type, p, Sigma_inv) { + .Call(`_bHIVE_compute_pairwise_distance`, A, dist_type, p, Sigma_inv) +} + +clonal_selection_iteration_cpp <- function(A, X, y_num, task_int, k, beta, maxClones, mutationDecay, mutationMin, iter, affinity_type, alpha, c_param, p_param, nClasses) { + .Call(`_bHIVE_clonal_selection_iteration_cpp`, A, X, y_num, task_int, k, beta, maxClones, mutationDecay, mutationMin, iter, affinity_type, alpha, c_param, p_param, nClasses) +} + +final_assignment_cpp <- function(X, A, affinity_type, dist_type, task_int, alpha, c_param, p_param, Sigma_inv, antibody_values, overall_mean) { + .Call(`_bHIVE_final_assignment_cpp`, X, A, affinity_type, dist_type, task_int, alpha, c_param, p_param, Sigma_inv, antibody_values, overall_mean) +} + +init_kmeanspp_cpp <- function(X, nCenters) { + .Call(`_bHIVE_init_kmeanspp_cpp`, X, nCenters) +} + +idiotypic_dynamics_cpp <- function(A, affinity_type, aff_alpha, aff_c, aff_p, theta_low, theta_high, source_rate, decay_rate, dt, timeSteps, survival_threshold) { + .Call(`_bHIVE_idiotypic_dynamics_cpp`, A, affinity_type, aff_alpha, aff_c, aff_p, theta_low, theta_high, source_rate, decay_rate, dt, timeSteps, survival_threshold) +} + +network_suppression_cpp <- function(A, dist_type, epsilon, p, Sigma_inv) { + .Call(`_bHIVE_network_suppression_cpp`, A, dist_type, epsilon, p, Sigma_inv) +} + +shm_mutate_cpp <- function(A, X, affinities, top_k_indices, data_indices, method, iter, decay, mutationMin, c_rate, temperature, E_0, base_rate, beta1, beta2, adam_epsilon, m1_state, m2_state, affinity_type, aff_alpha, aff_c, aff_p) { + .Call(`_bHIVE_shm_mutate_cpp`, A, X, affinities, top_k_indices, data_indices, method, iter, decay, mutationMin, c_rate, temperature, E_0, base_rate, beta1, beta2, adam_epsilon, m1_state, m2_state, affinity_type, aff_alpha, aff_c, aff_p) +} + diff --git a/R/SHMEngine.R b/R/SHMEngine.R new file mode 100644 index 0000000..dcdf31c --- /dev/null +++ b/R/SHMEngine.R @@ -0,0 +1,145 @@ +#' @title SHMEngine +#' @description Somatic Hypermutation Engine implementing five biologically- +#' inspired mutation strategies for the bHIVE artificial immune system. +#' +#' @details +#' The five strategies are: +#' \describe{ +#' \item{uniform}{Classic random Gaussian noise. Mutation rate = (1-affinity) * +#' decay^(iter-1). This is the original bHIVE behavior.} +#' \item{airs}{AIRS-style affinity-proportional mutation. Rate = c * exp(-affinity / T). +#' From Watkins & Timmis (AIRS2), achieving 50\% better data reduction than uniform.} +#' \item{hotspot}{Feature-importance-weighted mutation. Features with higher gradient +#' magnitude mutate more, analogous to AID targeting WRCY motifs in real SHM.} +#' \item{energy}{Energy-budget-constrained mutation. Total mutation magnitude bounded +#' by E = E_0 * (1-affinity)^2, inspired by Kleinstein's E_SHM ~ N_Mut^2 model.} +#' \item{adaptive}{Per-feature adaptive mutation rate with moment tracking, directly +#' implementing SHM as the Adam optimizer.} +#' } +#' +#' @examples +#' # Create different SHM engines +#' shm_uniform <- SHMEngine$new(method = "uniform") +#' shm_adaptive <- SHMEngine$new(method = "adaptive", base_rate = 0.1) +#' shm_airs <- SHMEngine$new(method = "airs", temperature = 0.3) +#' print(shm_adaptive) +#' +#' @param method Character. Mutation strategy. +#' @param decay Numeric. Per-iteration mutation rate decay (uniform method). +#' @param mutationMin Numeric. Minimum mutation rate floor. +#' @param c_rate Numeric. Scaling constant (airs method). +#' @param temperature Numeric. Temperature parameter (airs method). +#' @param E_0 Numeric. Energy budget base (energy method). +#' @param base_rate Numeric. Base mutation rate (hotspot, adaptive methods). +#' @param beta1 Numeric. First moment decay (adaptive method, like Adam). +#' @param beta2 Numeric. Second moment decay (adaptive method, like Adam). +#' @param adam_epsilon Numeric. Numerical stability (adaptive method). +#' +#' @importFrom R6 R6Class +#' @export +SHMEngine <- R6::R6Class( + "SHMEngine", + public = list( + + #' @field method Character. One of "uniform", "airs", "hotspot", "energy", "adaptive". + method = NULL, + + #' @field params Named list of method-specific parameters. + params = NULL, + + #' @field m1_state First moment state matrix (for adaptive method). + m1_state = NULL, + + #' @field m2_state Second moment state matrix (for adaptive method). + m2_state = NULL, + + #' @description Create a new SHMEngine. + #' @param method Character. Mutation strategy. + #' @param decay Numeric. Per-iteration mutation rate decay (uniform method). + #' @param mutationMin Numeric. Minimum mutation rate floor. + #' @param c_rate Numeric. Scaling constant (airs method). + #' @param temperature Numeric. Temperature parameter (airs method). + #' @param E_0 Numeric. Energy budget base (energy method). + #' @param base_rate Numeric. Base mutation rate (hotspot, adaptive methods). + #' @param beta1 Numeric. First moment decay (adaptive method, like Adam). + #' @param beta2 Numeric. Second moment decay (adaptive method, like Adam). + #' @param adam_epsilon Numeric. Numerical stability (adaptive method). + initialize = function(method = "uniform", + decay = 1.0, + mutationMin = 0.01, + c_rate = 1.0, + temperature = 0.5, + E_0 = 1.0, + base_rate = 0.1, + beta1 = 0.9, + beta2 = 0.999, + adam_epsilon = 1e-8) { + self$method <- match.arg(method, c("uniform", "airs", "hotspot", + "energy", "adaptive")) + stopifnot( + "decay must be in (0, 1]" = is.numeric(decay) && decay > 0 && decay <= 1, + "mutationMin must be non-negative" = is.numeric(mutationMin) && mutationMin >= 0, + "temperature must be positive" = is.numeric(temperature) && temperature > 0, + "E_0 must be positive" = is.numeric(E_0) && E_0 > 0, + "base_rate must be positive" = is.numeric(base_rate) && base_rate > 0, + "beta1 must be in [0, 1)" = is.numeric(beta1) && beta1 >= 0 && beta1 < 1, + "beta2 must be in [0, 1)" = is.numeric(beta2) && beta2 >= 0 && beta2 < 1, + "adam_epsilon must be positive" = is.numeric(adam_epsilon) && adam_epsilon > 0 + ) + self$params <- list( + decay = decay, + mutationMin = mutationMin, + c_rate = c_rate, + temperature = temperature, + E_0 = E_0, + base_rate = base_rate, + beta1 = beta1, + beta2 = beta2, + adam_epsilon = adam_epsilon + ) + }, + + #' @description Initialize internal state for adaptive method. + #' @param nAntibodies Integer. Number of antibodies. + #' @param nFeatures Integer. Number of features. + init_state = function(nAntibodies, nFeatures) { + if (self$method == "adaptive") { + self$m1_state <- matrix(0, nrow = nAntibodies, ncol = nFeatures) + self$m2_state <- matrix(0, nrow = nAntibodies, ncol = nFeatures) + } + }, + + #' @description Reset moment states (e.g., after suppression changes antibody count). + #' @param nAntibodies Integer. New number of antibodies. + #' @param nFeatures Integer. Number of features. + #' @param kept_idx Integer vector. Indices of antibodies that were kept. + reset_state = function(nAntibodies, nFeatures, kept_idx = NULL) { + if (self$method == "adaptive") { + if (!is.null(kept_idx) && !is.null(self$m1_state)) { + self$m1_state <- self$m1_state[kept_idx, , drop = FALSE] + self$m2_state <- self$m2_state[kept_idx, , drop = FALSE] + } else { + self$m1_state <- matrix(0, nrow = nAntibodies, ncol = nFeatures) + self$m2_state <- matrix(0, nrow = nAntibodies, ncol = nFeatures) + } + } + }, + + #' @description Print summary. + #' @param ... Not used. + print = function(...) { + cat(sprintf(" method='%s'\n", self$method)) + key_params <- switch(self$method, + "uniform" = c("decay", "mutationMin"), + "airs" = c("c_rate", "temperature"), + "hotspot" = c("base_rate"), + "energy" = c("E_0"), + "adaptive" = c("base_rate", "beta1", "beta2") + ) + for (p in key_params) { + cat(sprintf(" %s: %s\n", p, self$params[[p]])) + } + invisible(self) + } + ) +) diff --git a/R/VDJLibrary.R b/R/VDJLibrary.R new file mode 100644 index 0000000..4f5103f --- /dev/null +++ b/R/VDJLibrary.R @@ -0,0 +1,200 @@ +#' @title VDJLibrary +#' @description V(D)J gene library initialization for antibody populations. +#' Instead of random sampling, antibodies are generated by combinatorial +#' assembly of gene segments, mimicking the V(D)J recombination that generates +#' antibody diversity in real immune systems. +#' +#' @details +#' The feature space is decomposed into V, D, and J segments (subsets of +#' dimensions). Each segment is discretized into "alleles" by clustering. +#' New antibodies are generated by combinatorial sampling of one allele from +#' each segment, producing structured coverage of the data manifold. +#' +#' Methods: +#' \itemize{ +#' \item \code{"pca"}: PCA components define gene segments +#' \item \code{"cluster"}: k-means within dimension groups creates alleles +#' \item \code{"random_partition"}: Random feature grouping +#' } +#' +#' @examples +#' # Generate antibodies via V(D)J combinatorial assembly +#' data(iris) +#' X <- as.matrix(iris[, 1:4]) +#' vdj <- VDJLibrary$new(nV = 5, nD = 3, nJ = 3, method = "pca") +#' A <- vdj$generate(20, X) +#' dim(A) # 20 x 4 +#' print(vdj) +#' +#' @param nV Integer. Number of V gene alleles. +#' @param nD Integer. Number of D gene alleles. +#' @param nJ Integer. Number of J gene alleles. +#' @param method Character. "pca", "cluster", or "random_partition". +#' +#' @importFrom R6 R6Class +#' @importFrom stats prcomp kmeans +#' @export +VDJLibrary <- R6::R6Class( + "VDJLibrary", + public = list( + + #' @field nV Number of V gene segments. + nV = NULL, + + #' @field nD Number of D gene segments. + nD = NULL, + + #' @field nJ Number of J gene segments. + nJ = NULL, + + #' @field method Decomposition method. + method = NULL, + + #' @field library The computed gene library (list of allele matrices). + library = NULL, + + #' @description Create a new VDJLibrary. + #' @param nV Integer. Number of V gene alleles. + #' @param nD Integer. Number of D gene alleles. + #' @param nJ Integer. Number of J gene alleles. + #' @param method Character. "pca", "cluster", or "random_partition". + initialize = function(nV = 10, nD = 5, nJ = 4, method = "pca") { + self$nV <- nV + self$nD <- nD + self$nJ <- nJ + self$method <- match.arg(method, c("pca", "cluster", "random_partition")) + }, + + #' @description Build the gene library from training data. + #' @param X Numeric matrix (n x d). + #' @return Invisible self. + build = function(X) { + X <- as.matrix(X) + d <- ncol(X) + + if (self$method == "pca") { + self$library <- private$.build_pca(X) + } else if (self$method == "cluster") { + self$library <- private$.build_cluster(X) + } else { + self$library <- private$.build_random(X) + } + invisible(self) + }, + + #' @description Generate antibodies by combinatorial V(D)J assembly. + #' @param nAntibodies Integer. Number of antibodies to generate. + #' @param X Numeric matrix. Training data (used to build library if needed). + #' @return Numeric matrix (nAntibodies x d). + generate = function(nAntibodies, X) { + if (is.null(self$library)) self$build(X) + + lib <- self$library + d <- ncol(as.matrix(X)) + A <- matrix(0, nrow = nAntibodies, ncol = d) + + for (i in seq_len(nAntibodies)) { + v_idx <- sample.int(nrow(lib$V), 1) + d_idx <- sample.int(nrow(lib$D), 1) + j_idx <- sample.int(nrow(lib$J), 1) + + # Concatenate gene segments + A[i, lib$V_dims] <- lib$V[v_idx, ] + A[i, lib$D_dims] <- lib$D[d_idx, ] + A[i, lib$J_dims] <- lib$J[j_idx, ] + } + + A + }, + + #' @description Print summary. + #' @param ... Not used. + print = function(...) { + cat(sprintf(" method='%s' V=%d D=%d J=%d\n", + self$method, self$nV, self$nD, self$nJ)) + if (!is.null(self$library)) { + cat(sprintf(" Combinatorial space: %d antibodies\n", + nrow(self$library$V) * nrow(self$library$D) * nrow(self$library$J))) + } + invisible(self) + } + ), + + private = list( + + .split_dims = function(d) { + # Split dimensions into 3 non-empty groups + dims <- seq_len(d) + if (d == 1) { + return(list(V = 1L, D = 1L, J = 1L)) + } else if (d == 2) { + return(list(V = 1L, D = 2L, J = c(1L, 2L))) + } + # Split into roughly equal thirds + breaks <- round(seq(0, d, length.out = 4)) + list( + V = dims[(breaks[1] + 1):breaks[2]], + D = dims[(breaks[2] + 1):breaks[3]], + J = dims[(breaks[3] + 1):breaks[4]] + ) + }, + + .build_pca = function(X) { + d <- ncol(X) + splits <- private$.split_dims(d) + V_dims <- splits$V + D_dims <- splits$D + J_dims <- splits$J + + # Create alleles by clustering within each dimension group + V <- private$.make_alleles(X[, V_dims, drop = FALSE], self$nV) + D <- private$.make_alleles(X[, D_dims, drop = FALSE], self$nD) + J <- private$.make_alleles(X[, J_dims, drop = FALSE], self$nJ) + + list(V = V, D = D, J = J, V_dims = V_dims, D_dims = D_dims, J_dims = J_dims) + }, + + .build_cluster = function(X) { + d <- ncol(X) + splits <- private$.split_dims(d) + V_dims <- splits$V + D_dims <- splits$D + J_dims <- splits$J + + V <- private$.make_alleles(X[, V_dims, drop = FALSE], self$nV) + D <- private$.make_alleles(X[, D_dims, drop = FALSE], self$nD) + J <- private$.make_alleles(X[, J_dims, drop = FALSE], self$nJ) + + list(V = V, D = D, J = J, V_dims = V_dims, D_dims = D_dims, J_dims = J_dims) + }, + + .build_random = function(X) { + d <- ncol(X) + perm <- sample.int(d) + splits <- private$.split_dims(d) + V_dims <- perm[splits$V] + D_dims <- perm[splits$D] + J_dims <- perm[splits$J] + + V <- private$.make_alleles(X[, V_dims, drop = FALSE], self$nV) + D <- private$.make_alleles(X[, D_dims, drop = FALSE], self$nD) + J <- private$.make_alleles(X[, J_dims, drop = FALSE], self$nJ) + + list(V = V, D = D, J = J, V_dims = V_dims, D_dims = D_dims, J_dims = J_dims) + }, + + .make_alleles = function(X_sub, k) { + X_sub <- as.matrix(X_sub) + # Ensure no NA/NaN/Inf + if (any(!is.finite(X_sub))) { + X_sub[!is.finite(X_sub)] <- 0 + } + # Create k alleles by k-means clustering + k <- min(k, nrow(unique(X_sub))) + if (k < 1) k <- 1 + if (k == 1) return(matrix(colMeans(X_sub), nrow = 1)) + km <- kmeans(X_sub, centers = k, nstart = 3) + km$centers + } + ) +) diff --git a/R/bHIVE-package.R b/R/bHIVE-package.R new file mode 100644 index 0000000..98e5ddd --- /dev/null +++ b/R/bHIVE-package.R @@ -0,0 +1,6 @@ +#' @keywords internal +"_PACKAGE" + +#' @useDynLib bHIVE, .registration = TRUE +#' @importFrom Rcpp sourceCpp +NULL diff --git a/R/bHiVE.R b/R/bHiVE.R index 931d1ee..4e00dc1 100644 --- a/R/bHiVE.R +++ b/R/bHiVE.R @@ -266,7 +266,7 @@ bHIVE <- function(X, # Identify top k antibodies k2 <- min(k, m) - top_idx <- sort.int(aff_values, decreasing=TRUE, index.return=TRUE)$ix[1:k2] + top_idx <- sort.int(aff_values, decreasing=TRUE, index.return=TRUE)$ix[seq_len(k2)] # classification/regression counters if (task == "classification") { @@ -498,9 +498,9 @@ bHIVE <- function(X, # 3) Choose a new data point at random weighted by D(x)^2 if (nCenters > 1) { for (cId in 2:nCenters) { - dists <- sapply(1:n, function(i) { - min(rowSums((centers[1:(cId-1), , drop = FALSE] - X[i, ])^2)) - }) + dists <- vapply(seq_len(n), function(i) { + min(rowSums((centers[seq_len(cId-1), , drop = FALSE] - X[i, ])^2)) + }, numeric(1)) probs <- dists / sum(dists) idx <- sample(n, 1, prob = probs) centers[cId, ] <- X[idx, ] diff --git a/R/bHiVEModel.R b/R/bHiVEModel.R index 6b5c203..a77f48c 100644 --- a/R/bHiVEModel.R +++ b/R/bHiVEModel.R @@ -102,6 +102,11 @@ #' probabilities <- predict(tunedModel, newdata = X, type = "prob") #' } #' +#' @examples +#' # View model structure +#' bHIVEmodel$label +#' bHIVEmodel$parameters +#' #' @seealso \code{\link[caret]{train}}, \code{\link[caret]{trainControl}} #' #' @export diff --git a/R/refineB.R b/R/refineB.R index 7f302e7..524c0b2 100644 --- a/R/refineB.R +++ b/R/refineB.R @@ -71,6 +71,21 @@ #' Depending on the chosen \code{optimizer}, it may use plain SGD, momentum-based updates, Adagrad, #' Adam, or RMSProp. #' +#' @examples +#' data(iris) +#' X <- as.matrix(iris[, 1:4]) +#' y <- iris$Species +#' res <- bHIVE(X, y, task = "classification", nAntibodies = 10, +#' maxIter = 5, verbose = FALSE) +#' assignments <- as.integer(factor(res$assignments, +#' levels = unique(res$assignments))) +#' A_refined <- refineB(res$antibodies, X, y = y, +#' assignments = assignments, +#' task = "classification", +#' loss = "mse", optimizer = "adam", +#' steps = 3, lr = 0.01, verbose = FALSE) +#' dim(A_refined) +#' #' @export refineB <- function(A, X, diff --git a/R/utils.R b/R/utils.R index 172314a..9b41c21 100644 --- a/R/utils.R +++ b/R/utils.R @@ -1,5 +1,17 @@ utils::globalVariables(c("PC1", "PC2", "Group", "Feature", "Prototype", "Layer")) +#' Null-coalesce operator +#' +#' Returns \code{x} if it is not \code{NULL}, otherwise returns \code{y}. +#' Used internally by bHIVE R6 classes for parameter defaults. +#' +#' @param x Value to test. +#' @param y Default value if \code{x} is NULL. +#' @return \code{x} if not NULL, otherwise \code{y}. +#' @name null-coalesce +#' @keywords internal +`%||%` <- function(x, y) if (is.null(x)) y else x + .validate_bHIVE_input <- function(X, y = NULL) { if (!is.matrix(X) && !is.data.frame(X)) { diff --git a/R/visualizeHIVE.R b/R/visualizeHIVE.R index 6165991..db040c3 100644 --- a/R/visualizeHIVE.R +++ b/R/visualizeHIVE.R @@ -124,7 +124,7 @@ visualizeHIVE <- function(result, # Single-layer result: wrap it in a list for uniform processing. selectedLayers <- list(result) layer_labels <- "Layer 1" - } else if (is.list(result) && all(sapply(result, function(x) !is.null(x$antibodies)))) { + } else if (is.list(result) && all(vapply(result, function(x) !is.null(x$antibodies), logical(1)))) { # Multi-layer result. selectedLayers <- result[layer] layer_labels <- paste("Layer", layer) @@ -140,16 +140,16 @@ visualizeHIVE <- function(result, protos <- as.data.frame(layer_i$antibodies) # If X has column names and the dimensions match, assign them to protos: if (!is.null(X) && ncol(protos) == ncol(X) && !is.null(colnames(X))) { - colnames(protos)[1:ncol(X)] <- colnames(X) + colnames(protos)[seq_len(ncol(X))] <- colnames(X) } protos$Layer <- layer_labels[i] - proto_grp <- sapply(seq_len(nrow(protos)), function(j) { + proto_grp <- vapply(seq_len(nrow(protos)), function(j) { dists <- apply(as.matrix(X), 1, function(x) { sqrt(sum((x - as.numeric(protos[j, seq_len(ncol(X))]))^2)) }) nearest_index <- which.min(dists) - return(layer_i$assignments[nearest_index]) - }) + return(as.character(layer_i$assignments[nearest_index])) + }, character(1)) protos$Group <- factor(proto_grp, levels = unique(proto_grp)) protos }) diff --git a/README.md b/README.md index 4c6275f..ae7e70f 100644 --- a/README.md +++ b/README.md @@ -8,127 +8,161 @@ [![R-CMD-check](https://github.com/BorchLab/bHive/actions/workflows/R-CMD-check.yaml/badge.svg)](https://github.com/BorchLab/bHive/actions/workflows/R-CMD-check.yaml) [![Codecov test coverage](https://codecov.io/gh/BorchLab/bHive/graph/badge.svg)](https://app.codecov.io/gh/BorchLab/bHive) - -### Introduction -**bHIVE** is an R package implementing an Artificial Immune Network ([AI-Net](https://www.dca.fee.unicamp.br/~vonzuben/research/lnunes_dout/artigos/DMHA.PDF)) algorithm. -Inspired by principles of immunology, the bHIVE algorithm evolves a population of -"antibodies" to model and analyze datasets. It supports three tasks: +### Overview -- **Clustering**: Groups data points based on similarity. -- **Classification**: Assigns labels to data points based on learned patterns. -- **Regression**: Predicts continuous outcomes based on input features. +**bHIVE** is an R package implementing a modular Artificial Immune System (AIS) framework for clustering, classification, and regression. Built on [AI-Net](https://www.dca.fee.unicamp.br/~vonzuben/research/lnunes_dout/artigos/DMHA.PDF) (de Castro & Von Zuben 2001), bHIVE extends the classical algorithm with biologically-grounded modules drawn from modern immunology: somatic hypermutation, idiotypic network regulation, germinal center selection, and microenvironment-driven adaptation. -The AI-Net algorithm uses clonal selection and mutation, network suppression, -and affinity/distance metrics to iteratively refine the antibody population. +Performance-critical operations (affinity/distance matrices, clonal selection, network suppression, mutation) are implemented in C++ via [RcppArmadillo](https://cran.r-project.org/package=RcppArmadillo), with parallelization support through [BiocParallel](https://bioconductor.org/packages/BiocParallel/). -### Algorithm Basis +### Key Features -The algorithm operates in the following steps: +- **Three tasks** -- clustering, classification, and regression on numeric matrices +- **C++ backend** -- BLAS-optimized bulk affinity/distance computation +- **Two APIs** -- functional (`bHIVE()`) for quick use and R6 (`AINet$new()`) for full module composition +- **Multilayer architecture** -- `honeycombHIVE()` for hierarchical prototype refinement across layers +- **Hyperparameter tuning** -- `swarmbHIVE()` with grid search and BiocParallel +- **Gradient refinement** -- `refineB()` post-processing with 5 optimizers and 8 loss functions +- **Composable immune modules** -- mix and match biological mechanisms via dependency injection +- **caret compatible** -- `bHIVEmodel` and `honeycombHIVEmodel` for cross-validation workflows -1. **Affinity Calculation**: For a data point `x` and an antibody `a`, the affinity is defined as: +## Installation - Affinity(x, a) = f(x, a; params) +```r +devtools::install_github("BorchLab/bHIVE") +``` - where `f` is a similarity kernel such as Gaussian (RBF), Laplace, or cosine similarity. +## Quick Start -2. **Clonal Expansion and Mutation**: Top-matching antibodies are cloned and mutated: +### Functional API - a' = a + ε +The simplest way to use bHIVE. Works like any R modeling function: - where `ε` is sampled from a distribution scaled by affinity and iteration parameters. +```r +library(bHIVE) +data(iris) +X <- as.matrix(iris[, 1:4]) -3. **Network Suppression**: Antibodies that are too similar (within a threshold `ε`) are suppressed to maintain diversity: +# Clustering +res <- bHIVE(X, task = "clustering", nAntibodies = 30, maxIter = 20) +table(res$assignments) - Distance(a_i, a_j) < ε ⇒ Suppress a_j +# Classification +res <- bHIVE(X, y = iris$Species, task = "classification", + nAntibodies = 30, maxIter = 20) +table(Predicted = res$assignments, Actual = iris$Species) -4. **Assignment**: Data points are assigned to antibodies using affinity (for classification/regression) or distance (for clustering). +# Regression +res <- bHIVE(X[, 2:4], y = iris$Sepal.Length, task = "regression", + nAntibodies = 30, maxIter = 20) +cor(res$predictions, iris$Sepal.Length) +``` - +### R6 API with Modules -## Installation +For full control, compose an `AINet` with any combination of immune modules: -To install the **bHIVE** from GitHub, use: +```r +# Adaptive mutation + idiotypic network regulation +model <- AINet$new( + nAntibodies = 20, + maxIter = 30, + shm = SHMEngine$new(method = "adaptive", base_rate = 0.1), + idiotypic = IdiotypicNetwork$new(theta_low = 0.01, theta_high = 0.5), + verbose = FALSE +) +model$fit(X, iris$Species, task = "classification") +table(model$result$assignments) -```R -# Install bHIVE from GitHub -devtools::install_github("BorchLab/bHIVE") +# Predict on new data +preds <- model$predict(X[1:10, ]) ``` -## Examples +## Architecture -### Clustering +### Algorithm -``` -# Load the Iris dataset -data(iris) -X <- as.matrix(iris[, 1:4]) # Use numeric features only - -# Run bHIVE for clustering -res <- bHIVE(X = X, - task = "clustering", - nAntibodies = 30, - beta = 5, - epsilon = 0.01, - maxIter = 20, - k = 3) - -# View clustering results -table(res$assignments) -``` +bHIVE evolves a population of antibody vectors to represent structure in data: -### Classification +1. **Initialization** -- sample from data, random generation, kmeans++, or V(D)J combinatorial assembly +2. **Affinity computation** -- bulk n x m matrix via BLAS (Gaussian, Laplace, polynomial, cosine, Hamming) +3. **Clonal selection + mutation** -- top-k antibodies cloned; mutants generated via configurable SHM strategy +4. **Network regulation** -- suppress redundant antibodies via distance threshold or idiotypic network dynamics +5. **Final assignment** -- data points assigned to nearest antibody by affinity or distance -``` -# Load the Iris dataset -data(iris) -X <- as.matrix(iris[, 1:4]) # Use numeric features -y <- iris$Species # Classification labels - -# Run bHIVE for classification -res <- bHIVE(X = X, - y = y, - task = "classification", - nAntibodies = 30, beta = 5, - epsilon = 0.01, - maxIter = 20, - k = 3) - -# Compare predicted labels with actual labels -table(Predicted = res$assignments, Actual = y) + + +### Immune Modules + +Each module is an R6 class that can be injected into `AINet` via its constructor. All modules are optional -- use only what you need. + +| Module | Biological Basis | What It Does | +|:-------|:-----------------|:-------------| +| `SHMEngine` | Somatic hypermutation | 5 mutation strategies: uniform, airs, hotspot, energy, adaptive | +| `IdiotypicNetwork` | Ab-Ab network regulation | Bell-shaped activation dynamics replacing epsilon-threshold suppression | +| `GerminalCenter` | Tfh-B cell interaction | Task-aware quality selection with resource competition | +| `Microenvironment` | Tissue microenvironment cues | Density-dependent zone classification and mutation rate modulation | +| `VDJLibrary` | V(D)J recombination | Combinatorial gene library initialization (PCA, cluster, random partition) | +| `ActivationGate` | Two-signal activation | Costimulatory filtering (density, danger signal, or label entropy) | +| `MemoryPool` | Immunological memory | Archive high-affinity antibodies and recall on distribution shift | +| `ClassSwitcher` | Isotype class switching | IgM (broad) / IgG (specific) / IgA (boundary) kernel width modulation | +| `ConvergentSelector` | Public clonotypes | Cross-repertoire consensus for ensemble methods | + +### Multilayer & Tuning + +```r +# honeycombHIVE: hierarchical prototype refinement +res <- honeycombHIVE(X, y = iris$Species, task = "classification", + layers = 3, nAntibodies = 30, + refine = TRUE, refineOptimizer = "adam") + +# swarmbHIVE: hyperparameter grid search (parallelizable) +grid <- expand.grid(nAntibodies = c(15, 30), beta = c(3, 5), epsilon = c(0.01, 0.1)) +best <- swarmbHIVE(X, y = iris$Species, task = "classification", + grid = grid, metric = "accuracy", maxIter = 20) +best$best_params ``` -### Regression +### Gradient Refinement +Fine-tune antibody positions after training with `refineB()`: + +```r +res <- bHIVE(X, y = iris$Species, task = "classification", + nAntibodies = 20, maxIter = 20) + +# Adam-based refinement with cross-entropy loss +A_refined <- refineB(res$antibodies, X, y = iris$Species, + assignments = res$assignments, + task = "classification", + loss = "categorical_crossentropy", + optimizer = "adam", steps = 10, lr = 0.01) ``` -# Load the Iris dataset -data(iris) -X <- as.matrix(iris[, 2:4]) # Use other features as predictors -y <- iris$Sepal.Length # Regression target - -# Run bHIVE for regression -res <- bHIVE(X = X, - y = y, - task = "regression", - nAntibodies = 30, - beta = 5, - epsilon = 0.01, - maxIter = 20, - k = 3) - -# Compare predicted values with actual values -cor(res$assignments, y) -``` -## Bug Reports/New Features +## Affinity & Distance Functions + +| Affinity | Formula | Use Case | +|:---------|:--------|:---------| +| `gaussian` | exp(-alpha \|\|x - a\|\|^2) | General purpose (default) | +| `laplace` | exp(-alpha \|\|x - a\|\|) | Heavier tails than Gaussian | +| `polynomial` | (x . a + c)^p | Non-Euclidean similarity | +| `cosine` | (x . a) / (\|\|x\|\| \|\|a\|\|) | Direction-based similarity | +| `hamming` | 1 - (mismatches / d) | Categorical/binary features | -#### If you run into any issues or bugs please submit a [GitHub issue](https://github.com/BorchLab/bHIVE/issues) with details of the issue. +| Distance | Notes | +|:---------|:------| +| `euclidean` | Default; L2 norm | +| `manhattan` | L1 norm | +| `minkowski` | Generalized Lp (parameter p) | +| `cosine` | 1 - cosine similarity | +| `mahalanobis` | Accounts for feature covariance (requires Sigma) | +| `hamming` | Count of differing features | -If possible please include a [reproducible example](https://reprex.tidyverse.org/). +## Bug Reports / Feature Requests -#### Any requests for new features or enhancements can also be submitted as [GitHub issues](https://github.com/BorchLab/bHIVE/issues). +If you run into any issues or bugs please submit a [GitHub issue](https://github.com/BorchLab/bHIVE/issues) with details of the issue. If possible please include a [reproducible example](https://reprex.tidyverse.org/). Any requests for new features or enhancements can also be submitted as [GitHub issues](https://github.com/BorchLab/bHIVE/issues). -### Contributing +## Contributing We welcome contributions to the bHIVE project! To contribute: diff --git a/_pkgdown.yml b/_pkgdown.yml new file mode 100644 index 0000000..6b5da71 --- /dev/null +++ b/_pkgdown.yml @@ -0,0 +1,126 @@ +url: https://www.borch.dev/uploads/bhive/ + +template: + bootstrap: 5 + bootswatch: flatly + bslib: + primary: "#F5A623" + +home: + title: bHIVE - B-cell Hybrid Immune Variant Engine + description: > + Modular Artificial Immune System framework for clustering, classification, + and regression in R. C++ backend via RcppArmadillo with composable + biologically-inspired modules. + +navbar: + structure: + left: [intro, articles, reference, news] + right: [search, github] + components: + intro: + text: Get Started + href: articles/bHIVE.html + articles: + text: Tutorials + menu: + - text: "Getting Started" + href: articles/bHIVE.html + - text: "---" + - text: "Immune Modules" + href: articles/articles/immune-modules.html + - text: "Advanced Tuning & Workflows" + href: articles/articles/advanced-workflows.html + - text: "---" + - text: "Algorithm & Biological Foundations" + href: articles/articles/foundations.html + reference: + text: Reference + href: reference/index.html + news: + text: News + href: news/index.html + github: + icon: fa-github + href: https://github.com/BorchLab/bHIVE + aria-label: GitHub + +reference: + - title: "Core Functions" + desc: > + The primary interface for running bHIVE algorithms. Use `bHIVE()` for + single-pass analysis, `honeycombHIVE()` for multilayer hierarchical + refinement, and `refineB()` for gradient-based post-processing. + contents: + - bHIVE + - honeycombHIVE + - refineB + - visualizeHIVE + + - title: "Hyperparameter Tuning" + desc: > + Grid search over bHIVE hyperparameters with BiocParallel support. + contents: + - swarmbHIVE + + - title: "R6 Algorithm Classes" + desc: > + Object-oriented interface for composing immune algorithms with + injectable modules. `AINet` is the primary class; `ImmuneAlgorithm` + is the abstract base. + contents: + - AINet + - ImmuneAlgorithm + - ImmuneRepertoire + + - title: "Mutation & Selection Modules" + desc: > + Modules controlling how antibodies mutate, compete, and are selected. + Inject these into `AINet$new()` to customize algorithm behavior. + contents: + - SHMEngine + - GerminalCenter + - VDJLibrary + + - title: "Network Regulation Modules" + desc: > + Modules for repertoire regulation and activation control. + contents: + - IdiotypicNetwork + - ActivationGate + + - title: "Adaptation & Memory Modules" + desc: > + Modules for environment-driven adaptation, memory, and ensemble methods. + contents: + - Microenvironment + - MemoryPool + - ClassSwitcher + - ConvergentSelector + + - title: "caret Integration" + desc: > + Model objects for use with the caret package's `train()` function. + contents: + - bHIVEmodel + - honeycombHIVEmodel + +articles: + - title: "Getting Started" + navbar: ~ + contents: + - bHIVE + - title: "Tutorials" + contents: + - articles/immune-modules + - articles/advanced-workflows + - title: "Reference" + contents: + - articles/foundations + +figures: + dev: ragg::agg_png + dpi: 150 + fig.ext: png + fig.width: 7 + fig.height: 5 diff --git a/inst/WORDLIST b/inst/WORDLIST index 518cb28..0cc6aa1 100644 --- a/inst/WORDLIST +++ b/inst/WORDLIST @@ -1,10 +1,188 @@ -Artifical +AINet +ActivationGate +Adagrad +Annales +BCR +BiocParallel +Bretscher +Burnet +CMD +Chemokine +ClassSwitcher +Codecov +Cohn +Combinatorial +Composable +ConvergentSelector +Costimulatory +Coutinho +Coutinho's +DAMPs +Dimeric +Evolvable +Fo +Freitas +GC +GerminalCenter +Hotspot +Hypermutation +ICLR +Idiotypic +IdiotypicNetwork +IgA +IgG +IgM +ImmuneAlgorithm +ImmuneRepertoire +Immunoinformatics +Institut +Isotype +Jerne +Jerne's +KDE +Kingma +Kleinstein's +Lp +MemoryPool +Microenvironment Minkowski +Monomeric +Mulilayered +Multilayer +Mut +Oster +PAMPs +Parallelization +Pentameric +Perelson RBF +RGYW +RMSProp +RcppArmadillo +SGD +SHM +SHMEngine +SNE +Springer +TCR +Tfh +Timmis +UMAP +Underfitting +VDJLibrary +Varela +Vassilvitskii +Von +WRCY +Zuben +adagrad +adam +aiNet +anergized +bHIVES +bHIVEmodel +bigl +bigr +biocViews +boldsymbol +bouldin +bpparam +calinski +cdot +chemokine +chemokines +clonotypes +coef +combinatorial +composable +compositional +costimulation +costimulatory +crossentropy customPackage -iteratively -params -reproducibility +cytidine +d'Immunologie +dA +davies +de +deaminase +diag +dt +effector +epitope +follicular +frac +gauss +geq +germline +ggplot +harabasz +honeycombHIVE +honeycombHIVEmodel +hotspot +hotspots +huber +hypermutation +idiotype +idiotypic +ij +ik +isotype +iter +kmeans +kullback +laplace +ldots +leftarrow +leibler +leq +lfloor +lr +mae +mahalanobis +mathbb +mathbf +mathcal +maxClones +medoid +medv +microenvironment +microenvironments +microstructure +minkowski +monomeric +mse +mucosal +multilayer +mut +nAb +nAntibodies +nFeatures +nSamples +neq +nmd +nonself +parallelization +param +paratope +pca +pentameric +poisson +pred +propto +quadratically +refineB +responder +rfloor +rmse +rmsprop +runif sds +sgd +stromal +swarmbHIVE +tSNE +texttt +th tunable -ε +visualizeHIVE diff --git a/man/AINet.Rd b/man/AINet.Rd new file mode 100644 index 0000000..8d357b7 --- /dev/null +++ b/man/AINet.Rd @@ -0,0 +1,175 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/AINet.R +\name{AINet} +\alias{AINet} +\title{AINet} +\description{ +R6 implementation of the Artificial Immune Network algorithm. +This is the core bHIVE algorithm using C++ backends for performance-critical +operations. Supports composable modules for somatic hypermutation, idiotypic +network regulation, germinal center selection, and more. +} +\examples{ +# Clustering with Iris data +data(iris) +X <- as.matrix(iris[, 1:4]) +model <- AINet$new(nAntibodies = 15, maxIter = 10, verbose = FALSE) +model$fit(X, task = "clustering") +table(model$result$assignments) + +# Classification +model2 <- AINet$new(nAntibodies = 20, maxIter = 10, verbose = FALSE) +model2$fit(X, iris$Species, task = "classification") +mean(model2$result$assignments == as.character(iris$Species)) + +# Predict on new data +preds <- model2$predict(X[1:10, ]) + +} +\section{Super class}{ +\code{\link[bHIVE:ImmuneAlgorithm]{bHIVE::ImmuneAlgorithm}} -> \code{AINet} +} +\section{Methods}{ +\subsection{Public methods}{ +\itemize{ +\item \href{#method-AINet-new}{\code{AINet$new()}} +\item \href{#method-AINet-fit}{\code{AINet$fit()}} +\item \href{#method-AINet-clone}{\code{AINet$clone()}} +} +} +\if{html}{\out{ +
Inherited methods + +
+}} +\if{html}{\out{
}} +\if{html}{\out{}} +\if{latex}{\out{\hypertarget{method-AINet-new}{}}} +\subsection{Method \code{new()}}{ +Create a new AINet algorithm instance. +\subsection{Usage}{ +\if{html}{\out{
}}\preformatted{AINet$new( + nAntibodies = 20, + beta = 5, + epsilon = 0.01, + maxIter = 50, + k = 3, + affinityFunc = "gaussian", + distFunc = "euclidean", + affinityParams = list(alpha = 1, c = 1, p = 2, Sigma = NULL), + mutationDecay = 1, + mutationMin = 0.01, + maxClones = Inf, + stopTolerance = 0, + noImprovementLimit = Inf, + initMethod = "sample", + shm = NULL, + init = NULL, + activation = NULL, + idiotypic = NULL, + germinalCenter = NULL, + microenvironment = NULL, + memory = NULL, + verbose = TRUE +)}\if{html}{\out{
}} +} + +\subsection{Arguments}{ +\if{html}{\out{
}} +\describe{ +\item{\code{nAntibodies}}{Integer. Initial antibody population size.} + +\item{\code{beta}}{Numeric. Clone multiplier.} + +\item{\code{epsilon}}{Numeric. Suppression distance threshold.} + +\item{\code{maxIter}}{Integer. Maximum iterations.} + +\item{\code{k}}{Integer. Top-k antibodies to clone per data point.} + +\item{\code{affinityFunc}}{Character. Affinity function name.} + +\item{\code{distFunc}}{Character. Distance function name.} + +\item{\code{affinityParams}}{List. Parameters for affinity/distance functions.} + +\item{\code{mutationDecay}}{Numeric. Per-iteration mutation rate decay.} + +\item{\code{mutationMin}}{Numeric. Minimum mutation rate.} + +\item{\code{maxClones}}{Numeric. Maximum clones per antibody.} + +\item{\code{stopTolerance}}{Numeric. Early stopping tolerance.} + +\item{\code{noImprovementLimit}}{Integer. Early stopping patience.} + +\item{\code{initMethod}}{Character. Initialization method.} + +\item{\code{shm}}{An SHMEngine instance or NULL for default uniform mutation.} + +\item{\code{init}}{A VDJLibrary instance or NULL for default initialization.} + +\item{\code{activation}}{An ActivationGate instance or NULL.} + +\item{\code{idiotypic}}{An IdiotypicNetwork instance or NULL.} + +\item{\code{germinalCenter}}{A GerminalCenter instance or NULL.} + +\item{\code{microenvironment}}{A Microenvironment instance or NULL.} + +\item{\code{memory}}{A MemoryPool instance or NULL.} + +\item{\code{verbose}}{Logical. Print progress.} +} +\if{html}{\out{
}} +} +} +\if{html}{\out{
}} +\if{html}{\out{}} +\if{latex}{\out{\hypertarget{method-AINet-fit}{}}} +\subsection{Method \code{fit()}}{ +Fit the AINet algorithm to data. +\subsection{Usage}{ +\if{html}{\out{
}}\preformatted{AINet$fit(X, y = NULL, task = NULL, ...)}\if{html}{\out{
}} +} + +\subsection{Arguments}{ +\if{html}{\out{
}} +\describe{ +\item{\code{X}}{Numeric matrix or data frame (n x d).} + +\item{\code{y}}{Optional target: factor (classification) or numeric (regression).} + +\item{\code{task}}{Character: "clustering", "classification", or "regression". +Inferred from y if NULL.} + +\item{\code{...}}{Additional arguments (currently unused).} +} +\if{html}{\out{
}} +} +\subsection{Returns}{ +Invisible self, with \code{result} populated. +} +} +\if{html}{\out{
}} +\if{html}{\out{}} +\if{latex}{\out{\hypertarget{method-AINet-clone}{}}} +\subsection{Method \code{clone()}}{ +The objects of this class are cloneable with this method. +\subsection{Usage}{ +\if{html}{\out{
}}\preformatted{AINet$clone(deep = FALSE)}\if{html}{\out{
}} +} + +\subsection{Arguments}{ +\if{html}{\out{
}} +\describe{ +\item{\code{deep}}{Whether to make a deep clone.} +} +\if{html}{\out{
}} +} +} +} diff --git a/man/ActivationGate.Rd b/man/ActivationGate.Rd new file mode 100644 index 0000000..80eaea6 --- /dev/null +++ b/man/ActivationGate.Rd @@ -0,0 +1,147 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/ActivationGate.R +\name{ActivationGate} +\alias{ActivationGate} +\title{ActivationGate} +\description{ +Two-signal activation gate implementing the immunological +principle that immune cell activation requires both antigen-specific +recognition (Signal 1) AND costimulatory context (Signal 2). +} +\details{ +Prevents spurious activation on isolated outliers. An antibody is only +allowed to clone if both signals exceed their thresholds. This is +biologically-principled regularization. + +Signal 2 options: +\itemize{ + \item \code{"density"}: Local data density around the antibody + \item \code{"danger"}: User-provided danger signal vector + \item \code{"entropy"}: Local label entropy (classification only) +} +} +\examples{ +# Two-signal activation gate +data(iris) +X <- as.matrix(iris[, 1:4]) +A <- X[sample(150, 10), ] +rep <- ImmuneRepertoire$new(A) +gate <- ActivationGate$new(signal2_type = "density", threshold2 = 0.3) +aff <- rep$affinity_matrix(X, "gaussian") +activated <- gate$evaluate(aff, X, A) +sum(activated) # number of activated interactions + +} +\section{Public fields}{ +\if{html}{\out{
}} +\describe{ +\item{\code{signal2_type}}{Type of costimulatory signal.} + +\item{\code{threshold1}}{Minimum affinity for Signal 1 (antigen recognition).} + +\item{\code{threshold2}}{Minimum costimulatory signal for Signal 2.} + +\item{\code{danger_signals}}{User-provided danger signal vector (for "danger" type).} +} +\if{html}{\out{
}} +} +\section{Methods}{ +\subsection{Public methods}{ +\itemize{ +\item \href{#method-ActivationGate-new}{\code{ActivationGate$new()}} +\item \href{#method-ActivationGate-evaluate}{\code{ActivationGate$evaluate()}} +\item \href{#method-ActivationGate-print}{\code{ActivationGate$print()}} +\item \href{#method-ActivationGate-clone}{\code{ActivationGate$clone()}} +} +} +\if{html}{\out{
}} +\if{html}{\out{}} +\if{latex}{\out{\hypertarget{method-ActivationGate-new}{}}} +\subsection{Method \code{new()}}{ +Create a new ActivationGate. +\subsection{Usage}{ +\if{html}{\out{
}}\preformatted{ActivationGate$new( + signal2_type = "density", + threshold1 = 0.1, + threshold2 = 0.3, + danger_signals = NULL +)}\if{html}{\out{
}} +} + +\subsection{Arguments}{ +\if{html}{\out{
}} +\describe{ +\item{\code{signal2_type}}{Character. "density", "danger", or "entropy".} + +\item{\code{threshold1}}{Numeric. Minimum affinity threshold.} + +\item{\code{threshold2}}{Numeric. Minimum Signal 2 threshold.} + +\item{\code{danger_signals}}{Numeric vector. Per-data-point danger scores.} +} +\if{html}{\out{
}} +} +} +\if{html}{\out{
}} +\if{html}{\out{}} +\if{latex}{\out{\hypertarget{method-ActivationGate-evaluate}{}}} +\subsection{Method \code{evaluate()}}{ +Evaluate which antibody-data interactions pass the two-signal gate. +\subsection{Usage}{ +\if{html}{\out{
}}\preformatted{ActivationGate$evaluate(affinity_matrix, X, A, y = NULL, task = "clustering")}\if{html}{\out{
}} +} + +\subsection{Arguments}{ +\if{html}{\out{
}} +\describe{ +\item{\code{affinity_matrix}}{Numeric matrix (n x m) of affinities.} + +\item{\code{X}}{Numeric matrix of data (n x d).} + +\item{\code{A}}{Numeric matrix of antibodies (m x d).} + +\item{\code{y}}{Target vector or NULL.} + +\item{\code{task}}{Character. Task type.} +} +\if{html}{\out{
}} +} +\subsection{Returns}{ +Logical matrix (n x m) where TRUE means the interaction is activated. +} +} +\if{html}{\out{
}} +\if{html}{\out{}} +\if{latex}{\out{\hypertarget{method-ActivationGate-print}{}}} +\subsection{Method \code{print()}}{ +Print summary. +\subsection{Usage}{ +\if{html}{\out{
}}\preformatted{ActivationGate$print(...)}\if{html}{\out{
}} +} + +\subsection{Arguments}{ +\if{html}{\out{
}} +\describe{ +\item{\code{...}}{Not used.} +} +\if{html}{\out{
}} +} +} +\if{html}{\out{
}} +\if{html}{\out{}} +\if{latex}{\out{\hypertarget{method-ActivationGate-clone}{}}} +\subsection{Method \code{clone()}}{ +The objects of this class are cloneable with this method. +\subsection{Usage}{ +\if{html}{\out{
}}\preformatted{ActivationGate$clone(deep = FALSE)}\if{html}{\out{
}} +} + +\subsection{Arguments}{ +\if{html}{\out{
}} +\describe{ +\item{\code{deep}}{Whether to make a deep clone.} +} +\if{html}{\out{
}} +} +} +} diff --git a/man/ClassSwitcher.Rd b/man/ClassSwitcher.Rd new file mode 100644 index 0000000..eaf76c6 --- /dev/null +++ b/man/ClassSwitcher.Rd @@ -0,0 +1,151 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/ClassSwitcher.R +\name{ClassSwitcher} +\alias{ClassSwitcher} +\title{ClassSwitcher} +\description{ +Implements antibody isotype/class switching, allowing antibodies +to change their matching breadth. Inspired by real B cell class switching +from IgM (broad, pentameric) to IgG (specific, monomeric) to IgA (mucosal). +} +\details{ +In bHIVE, class switching modifies the effective affinity kernel width: +\itemize{ + \item \code{IgM}: Broad matching (large kernel width) -- good for initial + exploration and capturing general patterns + \item \code{IgG}: Specific matching (small kernel width) -- good for + fine-grained discrimination after patterns are identified + \item \code{IgA}: Boundary patrol (medium kernel width) -- good for + maintaining coverage at decision boundaries +} +} +\examples{ +# Switch antibody isotypes based on microenvironment zones +A <- matrix(rnorm(50), nrow = 10, ncol = 5) +rep <- ImmuneRepertoire$new(A) +cs <- ClassSwitcher$new(alpha_IgM = 0.1, alpha_IgG = 5.0) +zones <- sample(c("stable", "explore", "boundary"), 10, replace = TRUE) +alphas <- cs$switch_isotypes(rep, zones) +table(rep$metadata$isotype) # IgM, IgG, IgA distribution + +} +\section{Public fields}{ +\if{html}{\out{
}} +\describe{ +\item{\code{alpha_IgM}}{Kernel width for IgM mode (broad).} + +\item{\code{alpha_IgG}}{Kernel width for IgG mode (specific).} + +\item{\code{alpha_IgA}}{Kernel width for IgA mode (boundary).} +} +\if{html}{\out{
}} +} +\section{Methods}{ +\subsection{Public methods}{ +\itemize{ +\item \href{#method-ClassSwitcher-new}{\code{ClassSwitcher$new()}} +\item \href{#method-ClassSwitcher-switch_isotypes}{\code{ClassSwitcher$switch_isotypes()}} +\item \href{#method-ClassSwitcher-get_alpha}{\code{ClassSwitcher$get_alpha()}} +\item \href{#method-ClassSwitcher-print}{\code{ClassSwitcher$print()}} +\item \href{#method-ClassSwitcher-clone}{\code{ClassSwitcher$clone()}} +} +} +\if{html}{\out{
}} +\if{html}{\out{}} +\if{latex}{\out{\hypertarget{method-ClassSwitcher-new}{}}} +\subsection{Method \code{new()}}{ +Create a new ClassSwitcher. +\subsection{Usage}{ +\if{html}{\out{
}}\preformatted{ClassSwitcher$new(alpha_IgM = 0.1, alpha_IgG = 5, alpha_IgA = 1)}\if{html}{\out{
}} +} + +\subsection{Arguments}{ +\if{html}{\out{
}} +\describe{ +\item{\code{alpha_IgM}}{Numeric. Kernel width for broad matching.} + +\item{\code{alpha_IgG}}{Numeric. Kernel width for specific matching.} + +\item{\code{alpha_IgA}}{Numeric. Kernel width for boundary matching.} +} +\if{html}{\out{
}} +} +} +\if{html}{\out{
}} +\if{html}{\out{}} +\if{latex}{\out{\hypertarget{method-ClassSwitcher-switch_isotypes}{}}} +\subsection{Method \code{switch_isotypes()}}{ +Determine appropriate isotype for each antibody based on + its microenvironment zone. +\subsection{Usage}{ +\if{html}{\out{
}}\preformatted{ClassSwitcher$switch_isotypes(repertoire, zones)}\if{html}{\out{
}} +} + +\subsection{Arguments}{ +\if{html}{\out{
}} +\describe{ +\item{\code{repertoire}}{An \code{\link{ImmuneRepertoire}}.} + +\item{\code{zones}}{Character vector from Microenvironment assessment.} +} +\if{html}{\out{
}} +} +\subsection{Returns}{ +Named numeric vector of alpha values per antibody. +} +} +\if{html}{\out{
}} +\if{html}{\out{}} +\if{latex}{\out{\hypertarget{method-ClassSwitcher-get_alpha}{}}} +\subsection{Method \code{get_alpha()}}{ +Get alpha value for a given isotype. +\subsection{Usage}{ +\if{html}{\out{
}}\preformatted{ClassSwitcher$get_alpha(isotype)}\if{html}{\out{
}} +} + +\subsection{Arguments}{ +\if{html}{\out{
}} +\describe{ +\item{\code{isotype}}{Character. "IgM", "IgG", or "IgA".} +} +\if{html}{\out{
}} +} +\subsection{Returns}{ +Numeric. +} +} +\if{html}{\out{
}} +\if{html}{\out{}} +\if{latex}{\out{\hypertarget{method-ClassSwitcher-print}{}}} +\subsection{Method \code{print()}}{ +Print summary. +\subsection{Usage}{ +\if{html}{\out{
}}\preformatted{ClassSwitcher$print(...)}\if{html}{\out{
}} +} + +\subsection{Arguments}{ +\if{html}{\out{
}} +\describe{ +\item{\code{...}}{Not used.} +} +\if{html}{\out{
}} +} +} +\if{html}{\out{
}} +\if{html}{\out{}} +\if{latex}{\out{\hypertarget{method-ClassSwitcher-clone}{}}} +\subsection{Method \code{clone()}}{ +The objects of this class are cloneable with this method. +\subsection{Usage}{ +\if{html}{\out{
}}\preformatted{ClassSwitcher$clone(deep = FALSE)}\if{html}{\out{
}} +} + +\subsection{Arguments}{ +\if{html}{\out{
}} +\describe{ +\item{\code{deep}}{Whether to make a deep clone.} +} +\if{html}{\out{
}} +} +} +} diff --git a/man/ConvergentSelector.Rd b/man/ConvergentSelector.Rd new file mode 100644 index 0000000..de88d01 --- /dev/null +++ b/man/ConvergentSelector.Rd @@ -0,0 +1,151 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/ConvergentSelector.R +\name{ConvergentSelector} +\alias{ConvergentSelector} +\title{ConvergentSelector} +\description{ +Identifies "public antibodies" shared across independent +repertoires, implementing the concept of convergent selection from TCR/BCR +immunology as a biologically-motivated ensemble method. +} +\details{ +In real immunity, certain immune receptor sequences appear across multiple +individuals (public clones), suggesting they are driven by common selection +pressures. Similarly, antibodies that appear across multiple independent +bHIVE runs represent the most robust patterns in the data. +} +\examples{ +# Find public antibodies across multiple runs +data(iris) +X <- as.matrix(iris[, 1:4]) +results <- lapply(1:3, function(i) { + m <- AINet$new(nAntibodies = 15, maxIter = 5, verbose = FALSE) + m$fit(X, task = "clustering") + m$result +}) +conv <- ConvergentSelector$new(tolerance = 1.0, min_appearances = 2) +public <- conv$from_results(results) +nrow(public) # consensus antibodies + +} +\section{Public fields}{ +\if{html}{\out{
}} +\describe{ +\item{\code{tolerance}}{Distance tolerance for matching antibodies across repertoires.} + +\item{\code{min_appearances}}{Minimum number of repertoires an antibody must appear +in to be considered "public".} + +\item{\code{public_antibodies}}{The identified public antibodies.} +} +\if{html}{\out{
}} +} +\section{Methods}{ +\subsection{Public methods}{ +\itemize{ +\item \href{#method-ConvergentSelector-new}{\code{ConvergentSelector$new()}} +\item \href{#method-ConvergentSelector-find_public}{\code{ConvergentSelector$find_public()}} +\item \href{#method-ConvergentSelector-from_results}{\code{ConvergentSelector$from_results()}} +\item \href{#method-ConvergentSelector-print}{\code{ConvergentSelector$print()}} +\item \href{#method-ConvergentSelector-clone}{\code{ConvergentSelector$clone()}} +} +} +\if{html}{\out{
}} +\if{html}{\out{}} +\if{latex}{\out{\hypertarget{method-ConvergentSelector-new}{}}} +\subsection{Method \code{new()}}{ +Create a new ConvergentSelector. +\subsection{Usage}{ +\if{html}{\out{
}}\preformatted{ConvergentSelector$new(tolerance = 0.5, min_appearances = 2)}\if{html}{\out{
}} +} + +\subsection{Arguments}{ +\if{html}{\out{
}} +\describe{ +\item{\code{tolerance}}{Numeric. Maximum distance for two antibodies to be +considered the same across repertoires.} + +\item{\code{min_appearances}}{Integer. Minimum repertoires for an antibody to be public.} +} +\if{html}{\out{
}} +} +} +\if{html}{\out{
}} +\if{html}{\out{}} +\if{latex}{\out{\hypertarget{method-ConvergentSelector-find_public}{}}} +\subsection{Method \code{find_public()}}{ +Find public antibodies shared across multiple repertoires. +\subsection{Usage}{ +\if{html}{\out{
}}\preformatted{ConvergentSelector$find_public(repertoires, distFunc = "euclidean")}\if{html}{\out{
}} +} + +\subsection{Arguments}{ +\if{html}{\out{
}} +\describe{ +\item{\code{repertoires}}{List of ImmuneRepertoire objects or list of antibody matrices.} + +\item{\code{distFunc}}{Character. Distance function for matching.} +} +\if{html}{\out{
}} +} +\subsection{Returns}{ +Numeric matrix of public (consensus) antibodies. +} +} +\if{html}{\out{
}} +\if{html}{\out{}} +\if{latex}{\out{\hypertarget{method-ConvergentSelector-from_results}{}}} +\subsection{Method \code{from_results()}}{ +Run convergent selection from multiple bHIVE results. +\subsection{Usage}{ +\if{html}{\out{
}}\preformatted{ConvergentSelector$from_results(results, distFunc = "euclidean")}\if{html}{\out{
}} +} + +\subsection{Arguments}{ +\if{html}{\out{
}} +\describe{ +\item{\code{results}}{List of bHIVE result objects (each with $antibodies).} + +\item{\code{distFunc}}{Character. Distance function.} +} +\if{html}{\out{
}} +} +\subsection{Returns}{ +Numeric matrix of consensus antibodies. +} +} +\if{html}{\out{
}} +\if{html}{\out{}} +\if{latex}{\out{\hypertarget{method-ConvergentSelector-print}{}}} +\subsection{Method \code{print()}}{ +Print summary. +\subsection{Usage}{ +\if{html}{\out{
}}\preformatted{ConvergentSelector$print(...)}\if{html}{\out{
}} +} + +\subsection{Arguments}{ +\if{html}{\out{
}} +\describe{ +\item{\code{...}}{Not used.} +} +\if{html}{\out{
}} +} +} +\if{html}{\out{
}} +\if{html}{\out{}} +\if{latex}{\out{\hypertarget{method-ConvergentSelector-clone}{}}} +\subsection{Method \code{clone()}}{ +The objects of this class are cloneable with this method. +\subsection{Usage}{ +\if{html}{\out{
}}\preformatted{ConvergentSelector$clone(deep = FALSE)}\if{html}{\out{
}} +} + +\subsection{Arguments}{ +\if{html}{\out{
}} +\describe{ +\item{\code{deep}}{Whether to make a deep clone.} +} +\if{html}{\out{
}} +} +} +} diff --git a/man/GerminalCenter.Rd b/man/GerminalCenter.Rd new file mode 100644 index 0000000..1cee4ef --- /dev/null +++ b/man/GerminalCenter.Rd @@ -0,0 +1,146 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/GerminalCenter.R +\name{GerminalCenter} +\alias{GerminalCenter} +\title{GerminalCenter} +\description{ +Models T follicular helper (Tfh) cell selection pressure on +B cells within a germinal center reaction. Implements resource competition +where antibodies compete for Tfh help, and only helped antibodies survive. +} +\details{ +The germinal center is where B cells undergo affinity maturation through +iterative cycles of mutation and selection. Tfh cells act as quality-control +selectors: + +\itemize{ + \item Each Tfh evaluates B cell (antibody) quality using a task-aware metric + \item B cells compete for Tfh help (resource competition) + \item Only helped B cells survive to the next round + \item Selection pressure controls the stringency of the process +} +} +\examples{ +# Germinal center selection on Iris +data(iris) +X <- as.matrix(iris[, 1:4]) +gc <- GerminalCenter$new(nTfh = 5, selectionPressure = 0.5) +rep <- ImmuneRepertoire$new(X[sample(150, 20), ]) +gc$select(rep, X, iris$Species, "classification") +rep$size() # fewer antibodies after selection + +} +\section{Public fields}{ +\if{html}{\out{
}} +\describe{ +\item{\code{nTfh}}{Number of Tfh selectors (determines how many antibodies survive).} + +\item{\code{selectionPressure}}{Numeric [0,1]. 0 = no selection (all survive), +1 = only the very best survive.} + +\item{\code{rounds}}{Number of selection rounds per call.} +} +\if{html}{\out{
}} +} +\section{Methods}{ +\subsection{Public methods}{ +\itemize{ +\item \href{#method-GerminalCenter-new}{\code{GerminalCenter$new()}} +\item \href{#method-GerminalCenter-select}{\code{GerminalCenter$select()}} +\item \href{#method-GerminalCenter-print}{\code{GerminalCenter$print()}} +\item \href{#method-GerminalCenter-clone}{\code{GerminalCenter$clone()}} +} +} +\if{html}{\out{
}} +\if{html}{\out{}} +\if{latex}{\out{\hypertarget{method-GerminalCenter-new}{}}} +\subsection{Method \code{new()}}{ +Create a new GerminalCenter. +\subsection{Usage}{ +\if{html}{\out{
}}\preformatted{GerminalCenter$new(nTfh = 10, selectionPressure = 0.5, rounds = 1)}\if{html}{\out{
}} +} + +\subsection{Arguments}{ +\if{html}{\out{
}} +\describe{ +\item{\code{nTfh}}{Integer. Number of Tfh helper cells. Each helps one B cell.} + +\item{\code{selectionPressure}}{Numeric [0,1]. Stringency of selection.} + +\item{\code{rounds}}{Integer. Number of competition rounds.} +} +\if{html}{\out{
}} +} +} +\if{html}{\out{
}} +\if{html}{\out{}} +\if{latex}{\out{\hypertarget{method-GerminalCenter-select}{}}} +\subsection{Method \code{select()}}{ +Run germinal center selection on a repertoire. +\subsection{Usage}{ +\if{html}{\out{
}}\preformatted{GerminalCenter$select( + repertoire, + X, + y = NULL, + task = "clustering", + affinityFunc = "gaussian", + affinityParams = list(alpha = 1, c = 1, p = 2) +)}\if{html}{\out{
}} +} + +\subsection{Arguments}{ +\if{html}{\out{
}} +\describe{ +\item{\code{repertoire}}{An \code{\link{ImmuneRepertoire}} object.} + +\item{\code{X}}{Numeric matrix of training data.} + +\item{\code{y}}{Target vector (factor or numeric) or NULL for clustering.} + +\item{\code{task}}{Character: "clustering", "classification", or "regression".} + +\item{\code{affinityFunc}}{Character. Affinity function for evaluation.} + +\item{\code{affinityParams}}{List. Parameters for affinity function.} +} +\if{html}{\out{
}} +} +\subsection{Returns}{ +Invisible self. Repertoire modified in place. +} +} +\if{html}{\out{
}} +\if{html}{\out{}} +\if{latex}{\out{\hypertarget{method-GerminalCenter-print}{}}} +\subsection{Method \code{print()}}{ +Print summary. +\subsection{Usage}{ +\if{html}{\out{
}}\preformatted{GerminalCenter$print(...)}\if{html}{\out{
}} +} + +\subsection{Arguments}{ +\if{html}{\out{
}} +\describe{ +\item{\code{...}}{Not used.} +} +\if{html}{\out{
}} +} +} +\if{html}{\out{
}} +\if{html}{\out{}} +\if{latex}{\out{\hypertarget{method-GerminalCenter-clone}{}}} +\subsection{Method \code{clone()}}{ +The objects of this class are cloneable with this method. +\subsection{Usage}{ +\if{html}{\out{
}}\preformatted{GerminalCenter$clone(deep = FALSE)}\if{html}{\out{
}} +} + +\subsection{Arguments}{ +\if{html}{\out{
}} +\describe{ +\item{\code{deep}}{Whether to make a deep clone.} +} +\if{html}{\out{
}} +} +} +} diff --git a/man/IdiotypicNetwork.Rd b/man/IdiotypicNetwork.Rd new file mode 100644 index 0000000..7d28521 --- /dev/null +++ b/man/IdiotypicNetwork.Rd @@ -0,0 +1,194 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/IdiotypicNetwork.R +\name{IdiotypicNetwork} +\alias{IdiotypicNetwork} +\title{IdiotypicNetwork} +\description{ +Implements Jerne's idiotypic network theory for antibody +repertoire regulation. Replaces crude epsilon-threshold suppression with +principled network dynamics based on Varela & Coutinho's (1991) second- +generation immune network model. +} +\details{ +The idiotypic network models antibody-antibody interactions where each +antibody's variable region can be recognized by other antibodies. This +creates a regulatory network with emergent properties: + +- A bell-shaped (double-threshold) activation function: too little + stimulation leads to cell death, moderate stimulation to activation, + and excessive stimulation to suppression. +- Population dynamics with source, decay, activation, and suppression terms. +- Self-organized repertoire structure with memory and tolerance properties. + +This is the single most novel contribution of the overhauled bHIVE package. +No existing AIS implementation uses idiotypic network dynamics for +repertoire regulation. +} +\examples{ +# Create and run idiotypic regulation +idi <- IdiotypicNetwork$new(theta_low = 0.01, theta_high = 0.5) +A <- matrix(rnorm(50), nrow = 10, ncol = 5) +rep <- ImmuneRepertoire$new(A) +idi$regulate(rep, "gaussian", list(alpha = 0.5)) +print(idi) + +} +\section{Public fields}{ +\if{html}{\out{
}} +\describe{ +\item{\code{theta_low}}{Lower activation threshold. Below this, cells die.} + +\item{\code{theta_high}}{Upper activation threshold. Above this, cells are suppressed.} + +\item{\code{source_rate}}{Rate of new cell generation (basal production).} + +\item{\code{decay_rate}}{Natural cell death rate.} + +\item{\code{dt}}{Time step for Euler integration.} + +\item{\code{timeSteps}}{Number of simulation time steps.} + +\item{\code{survival_threshold}}{Minimum population level to survive.} + +\item{\code{last_dynamics}}{Result from the last regulation step.} +} +\if{html}{\out{
}} +} +\section{Methods}{ +\subsection{Public methods}{ +\itemize{ +\item \href{#method-IdiotypicNetwork-new}{\code{IdiotypicNetwork$new()}} +\item \href{#method-IdiotypicNetwork-regulate}{\code{IdiotypicNetwork$regulate()}} +\item \href{#method-IdiotypicNetwork-get_network}{\code{IdiotypicNetwork$get_network()}} +\item \href{#method-IdiotypicNetwork-get_population}{\code{IdiotypicNetwork$get_population()}} +\item \href{#method-IdiotypicNetwork-print}{\code{IdiotypicNetwork$print()}} +\item \href{#method-IdiotypicNetwork-clone}{\code{IdiotypicNetwork$clone()}} +} +} +\if{html}{\out{
}} +\if{html}{\out{}} +\if{latex}{\out{\hypertarget{method-IdiotypicNetwork-new}{}}} +\subsection{Method \code{new()}}{ +Create a new IdiotypicNetwork regulator. +\subsection{Usage}{ +\if{html}{\out{
}}\preformatted{IdiotypicNetwork$new( + theta_low = 0.01, + theta_high = 0.5, + source_rate = 0.5, + decay_rate = 0.1, + dt = 0.1, + timeSteps = 20, + survival_threshold = 0.5 +)}\if{html}{\out{
}} +} + +\subsection{Arguments}{ +\if{html}{\out{
}} +\describe{ +\item{\code{theta_low}}{Lower activation threshold.} + +\item{\code{theta_high}}{Upper activation threshold.} + +\item{\code{source_rate}}{Basal cell production rate.} + +\item{\code{decay_rate}}{Natural decay rate.} + +\item{\code{dt}}{Euler integration time step.} + +\item{\code{timeSteps}}{Number of dynamics simulation steps.} + +\item{\code{survival_threshold}}{Minimum population to survive.} +} +\if{html}{\out{
}} +} +} +\if{html}{\out{
}} +\if{html}{\out{}} +\if{latex}{\out{\hypertarget{method-IdiotypicNetwork-regulate}{}}} +\subsection{Method \code{regulate()}}{ +Run idiotypic network dynamics on an antibody repertoire. +\subsection{Usage}{ +\if{html}{\out{
}}\preformatted{IdiotypicNetwork$regulate( + repertoire, + affinityFunc = "gaussian", + affinityParams = list(alpha = 1, c = 1, p = 2) +)}\if{html}{\out{
}} +} + +\subsection{Arguments}{ +\if{html}{\out{
}} +\describe{ +\item{\code{repertoire}}{An \code{\link{ImmuneRepertoire}} object.} + +\item{\code{affinityFunc}}{Character. Affinity function for Ab-Ab interactions.} + +\item{\code{affinityParams}}{List. Parameters for the affinity function.} +} +\if{html}{\out{
}} +} +\subsection{Returns}{ +Invisible self. The repertoire is modified in place (dead + antibodies removed). Access \code{$last_dynamics} for full results. +} +} +\if{html}{\out{
}} +\if{html}{\out{}} +\if{latex}{\out{\hypertarget{method-IdiotypicNetwork-get_network}{}}} +\subsection{Method \code{get_network()}}{ +Get the Ab-Ab affinity matrix from the last regulation. +\subsection{Usage}{ +\if{html}{\out{
}}\preformatted{IdiotypicNetwork$get_network()}\if{html}{\out{
}} +} + +\subsection{Returns}{ +Numeric matrix (m x m) or NULL if not yet run. +} +} +\if{html}{\out{
}} +\if{html}{\out{}} +\if{latex}{\out{\hypertarget{method-IdiotypicNetwork-get_population}{}}} +\subsection{Method \code{get_population()}}{ +Get population levels from the last regulation. +\subsection{Usage}{ +\if{html}{\out{
}}\preformatted{IdiotypicNetwork$get_population()}\if{html}{\out{
}} +} + +\subsection{Returns}{ +Numeric vector or NULL if not yet run. +} +} +\if{html}{\out{
}} +\if{html}{\out{}} +\if{latex}{\out{\hypertarget{method-IdiotypicNetwork-print}{}}} +\subsection{Method \code{print()}}{ +Print summary. +\subsection{Usage}{ +\if{html}{\out{
}}\preformatted{IdiotypicNetwork$print(...)}\if{html}{\out{
}} +} + +\subsection{Arguments}{ +\if{html}{\out{
}} +\describe{ +\item{\code{...}}{Not used.} +} +\if{html}{\out{
}} +} +} +\if{html}{\out{
}} +\if{html}{\out{}} +\if{latex}{\out{\hypertarget{method-IdiotypicNetwork-clone}{}}} +\subsection{Method \code{clone()}}{ +The objects of this class are cloneable with this method. +\subsection{Usage}{ +\if{html}{\out{
}}\preformatted{IdiotypicNetwork$clone(deep = FALSE)}\if{html}{\out{
}} +} + +\subsection{Arguments}{ +\if{html}{\out{
}} +\describe{ +\item{\code{deep}}{Whether to make a deep clone.} +} +\if{html}{\out{
}} +} +} +} diff --git a/man/ImmuneAlgorithm.Rd b/man/ImmuneAlgorithm.Rd new file mode 100644 index 0000000..75dac28 --- /dev/null +++ b/man/ImmuneAlgorithm.Rd @@ -0,0 +1,155 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/ImmuneAlgorithm.R +\name{ImmuneAlgorithm} +\alias{ImmuneAlgorithm} +\title{ImmuneAlgorithm} +\description{ +Abstract R6 base class for all immune-inspired algorithms. +Subclasses must implement the \code{fit} method. +} +\examples{ +# ImmuneAlgorithm is abstract; use AINet for concrete instances +algo <- ImmuneAlgorithm$new() +print(algo) + +} +\section{Public fields}{ +\if{html}{\out{
}} +\describe{ +\item{\code{repertoire}}{An \code{\link{ImmuneRepertoire}} object.} + +\item{\code{config}}{Named list of algorithm hyperparameters.} + +\item{\code{modules}}{Named list of injected module instances +(SHMEngine, IdiotypicNetwork, GerminalCenter, etc.).} + +\item{\code{history}}{List of per-iteration metrics.} + +\item{\code{result}}{The result from the last call to \code{fit()}.} +} +\if{html}{\out{
}} +} +\section{Methods}{ +\subsection{Public methods}{ +\itemize{ +\item \href{#method-ImmuneAlgorithm-new}{\code{ImmuneAlgorithm$new()}} +\item \href{#method-ImmuneAlgorithm-fit}{\code{ImmuneAlgorithm$fit()}} +\item \href{#method-ImmuneAlgorithm-predict}{\code{ImmuneAlgorithm$predict()}} +\item \href{#method-ImmuneAlgorithm-print}{\code{ImmuneAlgorithm$print()}} +\item \href{#method-ImmuneAlgorithm-summary}{\code{ImmuneAlgorithm$summary()}} +\item \href{#method-ImmuneAlgorithm-clone}{\code{ImmuneAlgorithm$clone()}} +} +} +\if{html}{\out{
}} +\if{html}{\out{}} +\if{latex}{\out{\hypertarget{method-ImmuneAlgorithm-new}{}}} +\subsection{Method \code{new()}}{ +Create a new ImmuneAlgorithm. +\subsection{Usage}{ +\if{html}{\out{
}}\preformatted{ImmuneAlgorithm$new(config = list(), modules = list())}\if{html}{\out{
}} +} + +\subsection{Arguments}{ +\if{html}{\out{
}} +\describe{ +\item{\code{config}}{Named list of hyperparameters.} + +\item{\code{modules}}{Named list of module instances.} +} +\if{html}{\out{
}} +} +} +\if{html}{\out{
}} +\if{html}{\out{}} +\if{latex}{\out{\hypertarget{method-ImmuneAlgorithm-fit}{}}} +\subsection{Method \code{fit()}}{ +Fit the algorithm to data. Must be overridden by subclasses. +\subsection{Usage}{ +\if{html}{\out{
}}\preformatted{ImmuneAlgorithm$fit(X, y = NULL, task = NULL, ...)}\if{html}{\out{
}} +} + +\subsection{Arguments}{ +\if{html}{\out{
}} +\describe{ +\item{\code{X}}{Numeric matrix (n x d).} + +\item{\code{y}}{Optional target vector (factor or numeric).} + +\item{\code{task}}{Character: "clustering", "classification", or "regression".} + +\item{\code{...}}{Additional arguments.} +} +\if{html}{\out{
}} +} +\subsection{Returns}{ +The algorithm object (invisibly), with \code{result} populated. +} +} +\if{html}{\out{
}} +\if{html}{\out{}} +\if{latex}{\out{\hypertarget{method-ImmuneAlgorithm-predict}{}}} +\subsection{Method \code{predict()}}{ +Predict on new data using the trained repertoire. +\subsection{Usage}{ +\if{html}{\out{
}}\preformatted{ImmuneAlgorithm$predict(newdata)}\if{html}{\out{
}} +} + +\subsection{Arguments}{ +\if{html}{\out{
}} +\describe{ +\item{\code{newdata}}{Numeric matrix (n_new x d).} +} +\if{html}{\out{
}} +} +\subsection{Returns}{ +Predictions (class labels, numeric values, or cluster IDs). +} +} +\if{html}{\out{
}} +\if{html}{\out{}} +\if{latex}{\out{\hypertarget{method-ImmuneAlgorithm-print}{}}} +\subsection{Method \code{print()}}{ +Print summary of the algorithm. +\subsection{Usage}{ +\if{html}{\out{
}}\preformatted{ImmuneAlgorithm$print(...)}\if{html}{\out{
}} +} + +\subsection{Arguments}{ +\if{html}{\out{
}} +\describe{ +\item{\code{...}}{Not used.} +} +\if{html}{\out{
}} +} +} +\if{html}{\out{
}} +\if{html}{\out{}} +\if{latex}{\out{\hypertarget{method-ImmuneAlgorithm-summary}{}}} +\subsection{Method \code{summary()}}{ +Get a summary of the fitting history. +\subsection{Usage}{ +\if{html}{\out{
}}\preformatted{ImmuneAlgorithm$summary()}\if{html}{\out{
}} +} + +\subsection{Returns}{ +Data frame of per-iteration metrics. +} +} +\if{html}{\out{
}} +\if{html}{\out{}} +\if{latex}{\out{\hypertarget{method-ImmuneAlgorithm-clone}{}}} +\subsection{Method \code{clone()}}{ +The objects of this class are cloneable with this method. +\subsection{Usage}{ +\if{html}{\out{
}}\preformatted{ImmuneAlgorithm$clone(deep = FALSE)}\if{html}{\out{
}} +} + +\subsection{Arguments}{ +\if{html}{\out{
}} +\describe{ +\item{\code{deep}}{Whether to make a deep clone.} +} +\if{html}{\out{
}} +} +} +} diff --git a/man/ImmuneRepertoire.Rd b/man/ImmuneRepertoire.Rd new file mode 100644 index 0000000..cdf9a29 --- /dev/null +++ b/man/ImmuneRepertoire.Rd @@ -0,0 +1,290 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/ImmuneRepertoire.R +\name{ImmuneRepertoire} +\alias{ImmuneRepertoire} +\title{ImmuneRepertoire} +\description{ +R6 class representing a collection of antibodies (immune cells) +with associated metadata. Core data structure for bHIVE algorithms. +} +\details{ +An ImmuneRepertoire holds a matrix of antibody vectors (each row is one +antibody in feature space) plus per-antibody metadata (isotype, state, age, +lineage). All heavy computation is dispatched to C++ via RcppArmadillo. +} +\examples{ +# Create a repertoire from random antibodies +A <- matrix(rnorm(50), nrow = 10, ncol = 5) +rep <- ImmuneRepertoire$new(A) +print(rep) + +# Compute affinity to data +X <- matrix(rnorm(100), nrow = 20, ncol = 5) +aff <- rep$affinity_matrix(X, "gaussian", list(alpha = 1)) +dim(aff) # 20 x 10 + +# Network suppression +rep$suppress(epsilon = 1.5, method = "euclidean") +rep$size() # fewer antibodies after suppression + +} +\section{Public fields}{ +\if{html}{\out{
}} +\describe{ +\item{\code{cells}}{Numeric matrix (nAntibodies x nFeatures).} + +\item{\code{metadata}}{Data frame with per-antibody attributes.} +} +\if{html}{\out{
}} +} +\section{Methods}{ +\subsection{Public methods}{ +\itemize{ +\item \href{#method-ImmuneRepertoire-new}{\code{ImmuneRepertoire$new()}} +\item \href{#method-ImmuneRepertoire-affinity_matrix}{\code{ImmuneRepertoire$affinity_matrix()}} +\item \href{#method-ImmuneRepertoire-distance_matrix}{\code{ImmuneRepertoire$distance_matrix()}} +\item \href{#method-ImmuneRepertoire-suppress}{\code{ImmuneRepertoire$suppress()}} +\item \href{#method-ImmuneRepertoire-size}{\code{ImmuneRepertoire$size()}} +\item \href{#method-ImmuneRepertoire-n_features}{\code{ImmuneRepertoire$n_features()}} +\item \href{#method-ImmuneRepertoire-subset}{\code{ImmuneRepertoire$subset()}} +\item \href{#method-ImmuneRepertoire-add}{\code{ImmuneRepertoire$add()}} +\item \href{#method-ImmuneRepertoire-age_all}{\code{ImmuneRepertoire$age_all()}} +\item \href{#method-ImmuneRepertoire-as_matrix}{\code{ImmuneRepertoire$as_matrix()}} +\item \href{#method-ImmuneRepertoire-print}{\code{ImmuneRepertoire$print()}} +\item \href{#method-ImmuneRepertoire-clone}{\code{ImmuneRepertoire$clone()}} +} +} +\if{html}{\out{
}} +\if{html}{\out{}} +\if{latex}{\out{\hypertarget{method-ImmuneRepertoire-new}{}}} +\subsection{Method \code{new()}}{ +Create a new ImmuneRepertoire. +\subsection{Usage}{ +\if{html}{\out{
}}\preformatted{ImmuneRepertoire$new(cells, metadata = NULL)}\if{html}{\out{
}} +} + +\subsection{Arguments}{ +\if{html}{\out{
}} +\describe{ +\item{\code{cells}}{Numeric matrix (nAntibodies x nFeatures).} + +\item{\code{metadata}}{Optional data frame with columns: isotype, state, age, lineage.} +} +\if{html}{\out{
}} +} +} +\if{html}{\out{
}} +\if{html}{\out{}} +\if{latex}{\out{\hypertarget{method-ImmuneRepertoire-affinity_matrix}{}}} +\subsection{Method \code{affinity_matrix()}}{ +Compute affinity matrix between data X and antibodies. +\subsection{Usage}{ +\if{html}{\out{
}}\preformatted{ImmuneRepertoire$affinity_matrix( + X, + method = "gaussian", + params = list(alpha = 1, c = 1, p = 2) +)}\if{html}{\out{
}} +} + +\subsection{Arguments}{ +\if{html}{\out{
}} +\describe{ +\item{\code{X}}{Numeric matrix (n x d) of data points.} + +\item{\code{method}}{Affinity function: "gaussian", "laplace", "polynomial", +"cosine", "hamming".} + +\item{\code{params}}{List with alpha, c, p parameters.} +} +\if{html}{\out{
}} +} +\subsection{Returns}{ +Numeric matrix (n x m) of affinity values. +} +} +\if{html}{\out{
}} +\if{html}{\out{}} +\if{latex}{\out{\hypertarget{method-ImmuneRepertoire-distance_matrix}{}}} +\subsection{Method \code{distance_matrix()}}{ +Compute distance matrix between data X and antibodies. +\subsection{Usage}{ +\if{html}{\out{
}}\preformatted{ImmuneRepertoire$distance_matrix( + X, + method = "euclidean", + params = list(p = 2, Sigma = NULL) +)}\if{html}{\out{
}} +} + +\subsection{Arguments}{ +\if{html}{\out{
}} +\describe{ +\item{\code{X}}{Numeric matrix (n x d).} + +\item{\code{method}}{Distance function: "euclidean", "manhattan", "minkowski", +"cosine", "mahalanobis", "hamming".} + +\item{\code{params}}{List with p, Sigma parameters.} +} +\if{html}{\out{
}} +} +\subsection{Returns}{ +Numeric matrix (n x m) of distances. +} +} +\if{html}{\out{
}} +\if{html}{\out{}} +\if{latex}{\out{\hypertarget{method-ImmuneRepertoire-suppress}{}}} +\subsection{Method \code{suppress()}}{ +Network suppression: remove redundant antibodies. +\subsection{Usage}{ +\if{html}{\out{
}}\preformatted{ImmuneRepertoire$suppress( + epsilon, + method = "euclidean", + params = list(p = 2, Sigma = NULL) +)}\if{html}{\out{
}} +} + +\subsection{Arguments}{ +\if{html}{\out{
}} +\describe{ +\item{\code{epsilon}}{Distance threshold for suppression.} + +\item{\code{method}}{Distance function for suppression.} + +\item{\code{params}}{List with p, Sigma parameters.} +} +\if{html}{\out{
}} +} +\subsection{Returns}{ +Invisible self (modified in place). +} +} +\if{html}{\out{
}} +\if{html}{\out{}} +\if{latex}{\out{\hypertarget{method-ImmuneRepertoire-size}{}}} +\subsection{Method \code{size()}}{ +Get number of antibodies. +\subsection{Usage}{ +\if{html}{\out{
}}\preformatted{ImmuneRepertoire$size()}\if{html}{\out{
}} +} + +\subsection{Returns}{ +Integer. +} +} +\if{html}{\out{
}} +\if{html}{\out{}} +\if{latex}{\out{\hypertarget{method-ImmuneRepertoire-n_features}{}}} +\subsection{Method \code{n_features()}}{ +Get number of features. +\subsection{Usage}{ +\if{html}{\out{
}}\preformatted{ImmuneRepertoire$n_features()}\if{html}{\out{
}} +} + +\subsection{Returns}{ +Integer. +} +} +\if{html}{\out{
}} +\if{html}{\out{}} +\if{latex}{\out{\hypertarget{method-ImmuneRepertoire-subset}{}}} +\subsection{Method \code{subset()}}{ +Subset the repertoire. +\subsection{Usage}{ +\if{html}{\out{
}}\preformatted{ImmuneRepertoire$subset(idx)}\if{html}{\out{
}} +} + +\subsection{Arguments}{ +\if{html}{\out{
}} +\describe{ +\item{\code{idx}}{Integer vector of row indices to keep.} +} +\if{html}{\out{
}} +} +\subsection{Returns}{ +Invisible self (modified in place). +} +} +\if{html}{\out{
}} +\if{html}{\out{}} +\if{latex}{\out{\hypertarget{method-ImmuneRepertoire-add}{}}} +\subsection{Method \code{add()}}{ +Add antibodies to the repertoire. +\subsection{Usage}{ +\if{html}{\out{
}}\preformatted{ImmuneRepertoire$add(new_cells, new_metadata = NULL)}\if{html}{\out{
}} +} + +\subsection{Arguments}{ +\if{html}{\out{
}} +\describe{ +\item{\code{new_cells}}{Numeric matrix (k x d) of new antibodies.} + +\item{\code{new_metadata}}{Optional data frame of metadata for new antibodies.} +} +\if{html}{\out{
}} +} +\subsection{Returns}{ +Invisible self (modified in place). +} +} +\if{html}{\out{
}} +\if{html}{\out{}} +\if{latex}{\out{\hypertarget{method-ImmuneRepertoire-age_all}{}}} +\subsection{Method \code{age_all()}}{ +Increment age of all antibodies. +\subsection{Usage}{ +\if{html}{\out{
}}\preformatted{ImmuneRepertoire$age_all()}\if{html}{\out{
}} +} + +\subsection{Returns}{ +Invisible self (modified in place). +} +} +\if{html}{\out{
}} +\if{html}{\out{}} +\if{latex}{\out{\hypertarget{method-ImmuneRepertoire-as_matrix}{}}} +\subsection{Method \code{as_matrix()}}{ +Convert to plain matrix. +\subsection{Usage}{ +\if{html}{\out{
}}\preformatted{ImmuneRepertoire$as_matrix()}\if{html}{\out{
}} +} + +\subsection{Returns}{ +Numeric matrix (nAntibodies x nFeatures). +} +} +\if{html}{\out{
}} +\if{html}{\out{}} +\if{latex}{\out{\hypertarget{method-ImmuneRepertoire-print}{}}} +\subsection{Method \code{print()}}{ +Print summary. +\subsection{Usage}{ +\if{html}{\out{
}}\preformatted{ImmuneRepertoire$print(...)}\if{html}{\out{
}} +} + +\subsection{Arguments}{ +\if{html}{\out{
}} +\describe{ +\item{\code{...}}{Not used.} +} +\if{html}{\out{
}} +} +} +\if{html}{\out{
}} +\if{html}{\out{}} +\if{latex}{\out{\hypertarget{method-ImmuneRepertoire-clone}{}}} +\subsection{Method \code{clone()}}{ +The objects of this class are cloneable with this method. +\subsection{Usage}{ +\if{html}{\out{
}}\preformatted{ImmuneRepertoire$clone(deep = FALSE)}\if{html}{\out{
}} +} + +\subsection{Arguments}{ +\if{html}{\out{
}} +\describe{ +\item{\code{deep}}{Whether to make a deep clone.} +} +\if{html}{\out{
}} +} +} +} diff --git a/man/MemoryPool.Rd b/man/MemoryPool.Rd new file mode 100644 index 0000000..8a52bb0 --- /dev/null +++ b/man/MemoryPool.Rd @@ -0,0 +1,182 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/MemoryPool.R +\name{MemoryPool} +\alias{MemoryPool} +\title{MemoryPool} +\description{ +Manages long-lived memory cells that can be recalled when +distribution shifts are detected. Implements the immunological distinction +between short-lived effector cells and long-lived memory cells. +} +\examples{ +# Archive and recall memory cells +data(iris) +X <- as.matrix(iris[, 1:4]) +A <- X[sample(150, 10), ] +mp <- MemoryPool$new(archive_threshold = 0.01) +rep <- ImmuneRepertoire$new(A) +mp$archive(rep, X) +mp$size() # number of archived memories +recalled <- mp$recall(X[1:5, ]) +nrow(recalled) # memories relevant to query + +} +\section{Public fields}{ +\if{html}{\out{
}} +\describe{ +\item{\code{memory_cells}}{Numeric matrix of archived memory antibodies.} + +\item{\code{memory_metadata}}{Data frame of metadata for memory cells.} + +\item{\code{archive_threshold}}{Affinity threshold for archiving (only high-quality +antibodies become memory).} + +\item{\code{max_memory}}{Maximum number of memory cells to store.} + +\item{\code{recall_threshold}}{Threshold for triggering memory recall.} +} +\if{html}{\out{
}} +} +\section{Methods}{ +\subsection{Public methods}{ +\itemize{ +\item \href{#method-MemoryPool-new}{\code{MemoryPool$new()}} +\item \href{#method-MemoryPool-archive}{\code{MemoryPool$archive()}} +\item \href{#method-MemoryPool-recall}{\code{MemoryPool$recall()}} +\item \href{#method-MemoryPool-size}{\code{MemoryPool$size()}} +\item \href{#method-MemoryPool-print}{\code{MemoryPool$print()}} +\item \href{#method-MemoryPool-clone}{\code{MemoryPool$clone()}} +} +} +\if{html}{\out{
}} +\if{html}{\out{}} +\if{latex}{\out{\hypertarget{method-MemoryPool-new}{}}} +\subsection{Method \code{new()}}{ +Create a new MemoryPool. +\subsection{Usage}{ +\if{html}{\out{
}}\preformatted{MemoryPool$new( + archive_threshold = 0.5, + max_memory = 100, + recall_threshold = 0.3 +)}\if{html}{\out{
}} +} + +\subsection{Arguments}{ +\if{html}{\out{
}} +\describe{ +\item{\code{archive_threshold}}{Numeric. Minimum average affinity to archive.} + +\item{\code{max_memory}}{Integer. Maximum memory cells.} + +\item{\code{recall_threshold}}{Numeric. Minimum affinity to recall a memory.} +} +\if{html}{\out{
}} +} +} +\if{html}{\out{
}} +\if{html}{\out{}} +\if{latex}{\out{\hypertarget{method-MemoryPool-archive}{}}} +\subsection{Method \code{archive()}}{ +Archive high-performing antibodies as memory cells. +\subsection{Usage}{ +\if{html}{\out{
}}\preformatted{MemoryPool$archive( + repertoire, + X, + affinityFunc = "gaussian", + affinityParams = list(alpha = 1, c = 1, p = 2) +)}\if{html}{\out{
}} +} + +\subsection{Arguments}{ +\if{html}{\out{
}} +\describe{ +\item{\code{repertoire}}{An \code{\link{ImmuneRepertoire}}.} + +\item{\code{X}}{Training data matrix.} + +\item{\code{affinityFunc}}{Character. Affinity function.} + +\item{\code{affinityParams}}{List. Affinity parameters.} +} +\if{html}{\out{
}} +} +\subsection{Returns}{ +Integer. Number of new memory cells archived. +} +} +\if{html}{\out{
}} +\if{html}{\out{}} +\if{latex}{\out{\hypertarget{method-MemoryPool-recall}{}}} +\subsection{Method \code{recall()}}{ +Recall memory cells relevant to current data. +\subsection{Usage}{ +\if{html}{\out{
}}\preformatted{MemoryPool$recall( + X, + affinityFunc = "gaussian", + affinityParams = list(alpha = 1, c = 1, p = 2) +)}\if{html}{\out{
}} +} + +\subsection{Arguments}{ +\if{html}{\out{
}} +\describe{ +\item{\code{X}}{Data matrix to match against memory.} + +\item{\code{affinityFunc}}{Character. Affinity function.} + +\item{\code{affinityParams}}{List. Affinity parameters.} +} +\if{html}{\out{
}} +} +\subsection{Returns}{ +Numeric matrix of recalled memory cells (may be empty). +} +} +\if{html}{\out{
}} +\if{html}{\out{}} +\if{latex}{\out{\hypertarget{method-MemoryPool-size}{}}} +\subsection{Method \code{size()}}{ +Get current memory pool size. +\subsection{Usage}{ +\if{html}{\out{
}}\preformatted{MemoryPool$size()}\if{html}{\out{
}} +} + +\subsection{Returns}{ +Integer. +} +} +\if{html}{\out{
}} +\if{html}{\out{}} +\if{latex}{\out{\hypertarget{method-MemoryPool-print}{}}} +\subsection{Method \code{print()}}{ +Print summary. +\subsection{Usage}{ +\if{html}{\out{
}}\preformatted{MemoryPool$print(...)}\if{html}{\out{
}} +} + +\subsection{Arguments}{ +\if{html}{\out{
}} +\describe{ +\item{\code{...}}{Not used.} +} +\if{html}{\out{
}} +} +} +\if{html}{\out{
}} +\if{html}{\out{}} +\if{latex}{\out{\hypertarget{method-MemoryPool-clone}{}}} +\subsection{Method \code{clone()}}{ +The objects of this class are cloneable with this method. +\subsection{Usage}{ +\if{html}{\out{
}}\preformatted{MemoryPool$clone(deep = FALSE)}\if{html}{\out{
}} +} + +\subsection{Arguments}{ +\if{html}{\out{
}} +\describe{ +\item{\code{deep}}{Whether to make a deep clone.} +} +\if{html}{\out{
}} +} +} +} diff --git a/man/Microenvironment.Rd b/man/Microenvironment.Rd new file mode 100644 index 0000000..be9cda8 --- /dev/null +++ b/man/Microenvironment.Rd @@ -0,0 +1,162 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/Microenvironment.R +\name{Microenvironment} +\alias{Microenvironment} +\title{Microenvironment} +\description{ +Models local microenvironment cues that influence antibody +behavior based on the density and structure of nearby data points. +} +\details{ +In real immunity, B cell fate is strongly influenced by local signals: +chemokines, cytokines, and interactions with stromal cells in specific +tissue microenvironments. This module translates that concept into +density-dependent adaptation: + +\itemize{ + \item \strong{High density zones}: Promote memory formation (stabilize + antibodies, reduce mutation rate) + \item \strong{Low density zones}: Promote exploration (increase mutation + rate for broader search) + \item \strong{Boundary zones}: Trigger class switching (change matching + breadth between IgM-like broad and IgG-like specific modes) + \item \strong{Chemokine-like gradients}: Bias mutation direction toward + higher-density regions +} +} +\examples{ +# Assess microenvironment around antibodies +data(iris) +X <- as.matrix(iris[, 1:4]) +me <- Microenvironment$new() +rep <- ImmuneRepertoire$new(X[sample(150, 15), ]) +env <- me$assess(rep, X) +table(env$zones) # stable, explore, boundary +env$mutation_modifiers # per-antibody rate scaling + +} +\section{Public fields}{ +\if{html}{\out{
}} +\describe{ +\item{\code{density_bandwidth}}{Bandwidth for kernel density estimation.} + +\item{\code{high_density_threshold}}{Density percentile above which antibodies stabilize.} + +\item{\code{low_density_threshold}}{Density percentile below which antibodies explore.} + +\item{\code{stabilization_factor}}{Mutation rate multiplier for high-density zones.} + +\item{\code{exploration_factor}}{Mutation rate multiplier for low-density zones.} + +\item{\code{last_densities}}{Per-antibody local density from last assessment.} + +\item{\code{last_zones}}{Per-antibody zone classification from last assessment.} +} +\if{html}{\out{
}} +} +\section{Methods}{ +\subsection{Public methods}{ +\itemize{ +\item \href{#method-Microenvironment-new}{\code{Microenvironment$new()}} +\item \href{#method-Microenvironment-assess}{\code{Microenvironment$assess()}} +\item \href{#method-Microenvironment-print}{\code{Microenvironment$print()}} +\item \href{#method-Microenvironment-clone}{\code{Microenvironment$clone()}} +} +} +\if{html}{\out{
}} +\if{html}{\out{}} +\if{latex}{\out{\hypertarget{method-Microenvironment-new}{}}} +\subsection{Method \code{new()}}{ +Create a new Microenvironment module. +\subsection{Usage}{ +\if{html}{\out{
}}\preformatted{Microenvironment$new( + density_bandwidth = NULL, + high_density_threshold = 0.75, + low_density_threshold = 0.25, + stabilization_factor = 0.3, + exploration_factor = 2 +)}\if{html}{\out{
}} +} + +\subsection{Arguments}{ +\if{html}{\out{
}} +\describe{ +\item{\code{density_bandwidth}}{Numeric. KDE bandwidth (NULL for auto).} + +\item{\code{high_density_threshold}}{Numeric [0,1]. Percentile threshold for stabilization.} + +\item{\code{low_density_threshold}}{Numeric [0,1]. Percentile threshold for exploration.} + +\item{\code{stabilization_factor}}{Numeric. Mutation rate multiplier for stable zones.} + +\item{\code{exploration_factor}}{Numeric. Mutation rate multiplier for exploration zones.} +} +\if{html}{\out{
}} +} +} +\if{html}{\out{
}} +\if{html}{\out{}} +\if{latex}{\out{\hypertarget{method-Microenvironment-assess}{}}} +\subsection{Method \code{assess()}}{ +Assess the microenvironment around each antibody. +\subsection{Usage}{ +\if{html}{\out{
}}\preformatted{Microenvironment$assess( + repertoire, + X, + affinityFunc = "gaussian", + affinityParams = list(alpha = 1, c = 1, p = 2) +)}\if{html}{\out{
}} +} + +\subsection{Arguments}{ +\if{html}{\out{
}} +\describe{ +\item{\code{repertoire}}{An \code{\link{ImmuneRepertoire}} object.} + +\item{\code{X}}{Numeric matrix of training data (n x d).} + +\item{\code{affinityFunc}}{Character. Affinity function.} + +\item{\code{affinityParams}}{List. Affinity parameters.} +} +\if{html}{\out{
}} +} +\subsection{Returns}{ +Named list with densities, zones, and mutation_modifiers per antibody. +} +} +\if{html}{\out{
}} +\if{html}{\out{}} +\if{latex}{\out{\hypertarget{method-Microenvironment-print}{}}} +\subsection{Method \code{print()}}{ +Print summary. +\subsection{Usage}{ +\if{html}{\out{
}}\preformatted{Microenvironment$print(...)}\if{html}{\out{
}} +} + +\subsection{Arguments}{ +\if{html}{\out{
}} +\describe{ +\item{\code{...}}{Not used.} +} +\if{html}{\out{
}} +} +} +\if{html}{\out{
}} +\if{html}{\out{}} +\if{latex}{\out{\hypertarget{method-Microenvironment-clone}{}}} +\subsection{Method \code{clone()}}{ +The objects of this class are cloneable with this method. +\subsection{Usage}{ +\if{html}{\out{
}}\preformatted{Microenvironment$clone(deep = FALSE)}\if{html}{\out{
}} +} + +\subsection{Arguments}{ +\if{html}{\out{
}} +\describe{ +\item{\code{deep}}{Whether to make a deep clone.} +} +\if{html}{\out{
}} +} +} +} diff --git a/man/SHMEngine.Rd b/man/SHMEngine.Rd new file mode 100644 index 0000000..7c07e54 --- /dev/null +++ b/man/SHMEngine.Rd @@ -0,0 +1,176 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/SHMEngine.R +\name{SHMEngine} +\alias{SHMEngine} +\title{SHMEngine} +\description{ +Somatic Hypermutation Engine implementing five biologically- +inspired mutation strategies for the bHIVE artificial immune system. +} +\details{ +The five strategies are: +\describe{ + \item{uniform}{Classic random Gaussian noise. Mutation rate = (1-affinity) * + decay^(iter-1). This is the original bHIVE behavior.} + \item{airs}{AIRS-style affinity-proportional mutation. Rate = c * exp(-affinity / T). + From Watkins & Timmis (AIRS2), achieving 50\% better data reduction than uniform.} + \item{hotspot}{Feature-importance-weighted mutation. Features with higher gradient + magnitude mutate more, analogous to AID targeting WRCY motifs in real SHM.} + \item{energy}{Energy-budget-constrained mutation. Total mutation magnitude bounded + by E = E_0 * (1-affinity)^2, inspired by Kleinstein's E_SHM ~ N_Mut^2 model.} + \item{adaptive}{Per-feature adaptive mutation rate with moment tracking, directly + implementing SHM as the Adam optimizer.} +} +} +\examples{ +# Create different SHM engines +shm_uniform <- SHMEngine$new(method = "uniform") +shm_adaptive <- SHMEngine$new(method = "adaptive", base_rate = 0.1) +shm_airs <- SHMEngine$new(method = "airs", temperature = 0.3) +print(shm_adaptive) + +} +\section{Public fields}{ +\if{html}{\out{
}} +\describe{ +\item{\code{method}}{Character. One of "uniform", "airs", "hotspot", "energy", "adaptive".} + +\item{\code{params}}{Named list of method-specific parameters.} + +\item{\code{m1_state}}{First moment state matrix (for adaptive method).} + +\item{\code{m2_state}}{Second moment state matrix (for adaptive method).} +} +\if{html}{\out{
}} +} +\section{Methods}{ +\subsection{Public methods}{ +\itemize{ +\item \href{#method-SHMEngine-new}{\code{SHMEngine$new()}} +\item \href{#method-SHMEngine-init_state}{\code{SHMEngine$init_state()}} +\item \href{#method-SHMEngine-reset_state}{\code{SHMEngine$reset_state()}} +\item \href{#method-SHMEngine-print}{\code{SHMEngine$print()}} +\item \href{#method-SHMEngine-clone}{\code{SHMEngine$clone()}} +} +} +\if{html}{\out{
}} +\if{html}{\out{}} +\if{latex}{\out{\hypertarget{method-SHMEngine-new}{}}} +\subsection{Method \code{new()}}{ +Create a new SHMEngine. +\subsection{Usage}{ +\if{html}{\out{
}}\preformatted{SHMEngine$new( + method = "uniform", + decay = 1, + mutationMin = 0.01, + c_rate = 1, + temperature = 0.5, + E_0 = 1, + base_rate = 0.1, + beta1 = 0.9, + beta2 = 0.999, + adam_epsilon = 1e-08 +)}\if{html}{\out{
}} +} + +\subsection{Arguments}{ +\if{html}{\out{
}} +\describe{ +\item{\code{method}}{Character. Mutation strategy.} + +\item{\code{decay}}{Numeric. Per-iteration mutation rate decay (uniform method).} + +\item{\code{mutationMin}}{Numeric. Minimum mutation rate floor.} + +\item{\code{c_rate}}{Numeric. Scaling constant (airs method).} + +\item{\code{temperature}}{Numeric. Temperature parameter (airs method).} + +\item{\code{E_0}}{Numeric. Energy budget base (energy method).} + +\item{\code{base_rate}}{Numeric. Base mutation rate (hotspot, adaptive methods).} + +\item{\code{beta1}}{Numeric. First moment decay (adaptive method, like Adam).} + +\item{\code{beta2}}{Numeric. Second moment decay (adaptive method, like Adam).} + +\item{\code{adam_epsilon}}{Numeric. Numerical stability (adaptive method).} +} +\if{html}{\out{
}} +} +} +\if{html}{\out{
}} +\if{html}{\out{}} +\if{latex}{\out{\hypertarget{method-SHMEngine-init_state}{}}} +\subsection{Method \code{init_state()}}{ +Initialize internal state for adaptive method. +\subsection{Usage}{ +\if{html}{\out{
}}\preformatted{SHMEngine$init_state(nAntibodies, nFeatures)}\if{html}{\out{
}} +} + +\subsection{Arguments}{ +\if{html}{\out{
}} +\describe{ +\item{\code{nAntibodies}}{Integer. Number of antibodies.} + +\item{\code{nFeatures}}{Integer. Number of features.} +} +\if{html}{\out{
}} +} +} +\if{html}{\out{
}} +\if{html}{\out{}} +\if{latex}{\out{\hypertarget{method-SHMEngine-reset_state}{}}} +\subsection{Method \code{reset_state()}}{ +Reset moment states (e.g., after suppression changes antibody count). +\subsection{Usage}{ +\if{html}{\out{
}}\preformatted{SHMEngine$reset_state(nAntibodies, nFeatures, kept_idx = NULL)}\if{html}{\out{
}} +} + +\subsection{Arguments}{ +\if{html}{\out{
}} +\describe{ +\item{\code{nAntibodies}}{Integer. New number of antibodies.} + +\item{\code{nFeatures}}{Integer. Number of features.} + +\item{\code{kept_idx}}{Integer vector. Indices of antibodies that were kept.} +} +\if{html}{\out{
}} +} +} +\if{html}{\out{
}} +\if{html}{\out{}} +\if{latex}{\out{\hypertarget{method-SHMEngine-print}{}}} +\subsection{Method \code{print()}}{ +Print summary. +\subsection{Usage}{ +\if{html}{\out{
}}\preformatted{SHMEngine$print(...)}\if{html}{\out{
}} +} + +\subsection{Arguments}{ +\if{html}{\out{
}} +\describe{ +\item{\code{...}}{Not used.} +} +\if{html}{\out{
}} +} +} +\if{html}{\out{
}} +\if{html}{\out{}} +\if{latex}{\out{\hypertarget{method-SHMEngine-clone}{}}} +\subsection{Method \code{clone()}}{ +The objects of this class are cloneable with this method. +\subsection{Usage}{ +\if{html}{\out{
}}\preformatted{SHMEngine$clone(deep = FALSE)}\if{html}{\out{
}} +} + +\subsection{Arguments}{ +\if{html}{\out{
}} +\describe{ +\item{\code{deep}}{Whether to make a deep clone.} +} +\if{html}{\out{
}} +} +} +} diff --git a/man/VDJLibrary.Rd b/man/VDJLibrary.Rd new file mode 100644 index 0000000..c1d86ad --- /dev/null +++ b/man/VDJLibrary.Rd @@ -0,0 +1,159 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/VDJLibrary.R +\name{VDJLibrary} +\alias{VDJLibrary} +\title{VDJLibrary} +\description{ +V(D)J gene library initialization for antibody populations. +Instead of random sampling, antibodies are generated by combinatorial +assembly of gene segments, mimicking the V(D)J recombination that generates +antibody diversity in real immune systems. +} +\details{ +The feature space is decomposed into V, D, and J segments (subsets of +dimensions). Each segment is discretized into "alleles" by clustering. +New antibodies are generated by combinatorial sampling of one allele from +each segment, producing structured coverage of the data manifold. + +Methods: +\itemize{ + \item \code{"pca"}: PCA components define gene segments + \item \code{"cluster"}: k-means within dimension groups creates alleles + \item \code{"random_partition"}: Random feature grouping +} +} +\examples{ +# Generate antibodies via V(D)J combinatorial assembly +data(iris) +X <- as.matrix(iris[, 1:4]) +vdj <- VDJLibrary$new(nV = 5, nD = 3, nJ = 3, method = "pca") +A <- vdj$generate(20, X) +dim(A) # 20 x 4 +print(vdj) + +} +\section{Public fields}{ +\if{html}{\out{
}} +\describe{ +\item{\code{nV}}{Number of V gene segments.} + +\item{\code{nD}}{Number of D gene segments.} + +\item{\code{nJ}}{Number of J gene segments.} + +\item{\code{method}}{Decomposition method.} + +\item{\code{library}}{The computed gene library (list of allele matrices).} +} +\if{html}{\out{
}} +} +\section{Methods}{ +\subsection{Public methods}{ +\itemize{ +\item \href{#method-VDJLibrary-new}{\code{VDJLibrary$new()}} +\item \href{#method-VDJLibrary-build}{\code{VDJLibrary$build()}} +\item \href{#method-VDJLibrary-generate}{\code{VDJLibrary$generate()}} +\item \href{#method-VDJLibrary-print}{\code{VDJLibrary$print()}} +\item \href{#method-VDJLibrary-clone}{\code{VDJLibrary$clone()}} +} +} +\if{html}{\out{
}} +\if{html}{\out{}} +\if{latex}{\out{\hypertarget{method-VDJLibrary-new}{}}} +\subsection{Method \code{new()}}{ +Create a new VDJLibrary. +\subsection{Usage}{ +\if{html}{\out{
}}\preformatted{VDJLibrary$new(nV = 10, nD = 5, nJ = 4, method = "pca")}\if{html}{\out{
}} +} + +\subsection{Arguments}{ +\if{html}{\out{
}} +\describe{ +\item{\code{nV}}{Integer. Number of V gene alleles.} + +\item{\code{nD}}{Integer. Number of D gene alleles.} + +\item{\code{nJ}}{Integer. Number of J gene alleles.} + +\item{\code{method}}{Character. "pca", "cluster", or "random_partition".} +} +\if{html}{\out{
}} +} +} +\if{html}{\out{
}} +\if{html}{\out{}} +\if{latex}{\out{\hypertarget{method-VDJLibrary-build}{}}} +\subsection{Method \code{build()}}{ +Build the gene library from training data. +\subsection{Usage}{ +\if{html}{\out{
}}\preformatted{VDJLibrary$build(X)}\if{html}{\out{
}} +} + +\subsection{Arguments}{ +\if{html}{\out{
}} +\describe{ +\item{\code{X}}{Numeric matrix (n x d).} +} +\if{html}{\out{
}} +} +\subsection{Returns}{ +Invisible self. +} +} +\if{html}{\out{
}} +\if{html}{\out{}} +\if{latex}{\out{\hypertarget{method-VDJLibrary-generate}{}}} +\subsection{Method \code{generate()}}{ +Generate antibodies by combinatorial V(D)J assembly. +\subsection{Usage}{ +\if{html}{\out{
}}\preformatted{VDJLibrary$generate(nAntibodies, X)}\if{html}{\out{
}} +} + +\subsection{Arguments}{ +\if{html}{\out{
}} +\describe{ +\item{\code{nAntibodies}}{Integer. Number of antibodies to generate.} + +\item{\code{X}}{Numeric matrix. Training data (used to build library if needed).} +} +\if{html}{\out{
}} +} +\subsection{Returns}{ +Numeric matrix (nAntibodies x d). +} +} +\if{html}{\out{
}} +\if{html}{\out{}} +\if{latex}{\out{\hypertarget{method-VDJLibrary-print}{}}} +\subsection{Method \code{print()}}{ +Print summary. +\subsection{Usage}{ +\if{html}{\out{
}}\preformatted{VDJLibrary$print(...)}\if{html}{\out{
}} +} + +\subsection{Arguments}{ +\if{html}{\out{
}} +\describe{ +\item{\code{...}}{Not used.} +} +\if{html}{\out{
}} +} +} +\if{html}{\out{
}} +\if{html}{\out{}} +\if{latex}{\out{\hypertarget{method-VDJLibrary-clone}{}}} +\subsection{Method \code{clone()}}{ +The objects of this class are cloneable with this method. +\subsection{Usage}{ +\if{html}{\out{
}}\preformatted{VDJLibrary$clone(deep = FALSE)}\if{html}{\out{
}} +} + +\subsection{Arguments}{ +\if{html}{\out{
}} +\describe{ +\item{\code{deep}}{Whether to make a deep clone.} +} +\if{html}{\out{
}} +} +} +} diff --git a/man/bHIVE-package.Rd b/man/bHIVE-package.Rd new file mode 100644 index 0000000..1b6ef07 --- /dev/null +++ b/man/bHIVE-package.Rd @@ -0,0 +1,24 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/bHIVE-package.R +\docType{package} +\name{bHIVE-package} +\alias{bHIVE-package} +\title{bHIVE: B-cell Hybrid Immune Variant Engine} +\description{ +\if{html}{\figure{logo.png}{options: style='float: right' alt='logo' width='120'}} + +The bHIVE package implements an Artificial Immune Network (AI-Net) algorithm for clustering, classification, and regression tasks. Inspired by biological immune systems, it employs clonal selection, mutation, and network suppression to analyze and model datasets. This package provides flexible functionality, including affinity metrics, mutation strategies, and hyperparameter tuning. +} +\seealso{ +Useful links: +\itemize{ + \item \url{https://www.borch.dev/uploads/bhive/} + \item Report bugs at \url{https://github.com/BorchLab/bHIVE/issues} +} + +} +\author{ +\strong{Maintainer}: Nick Borcherding \email{ncborch@gmail.com} + +} +\keyword{internal} diff --git a/man/bHIVEModel.Rd b/man/bHIVEModel.Rd index 874296d..bdb83f7 100644 --- a/man/bHIVEModel.Rd +++ b/man/bHIVEModel.Rd @@ -118,6 +118,12 @@ probabilities <- predict(tunedModel, newdata = X, type = "prob") } } +\examples{ +# View model structure +bHIVEmodel$label +bHIVEmodel$parameters + +} \seealso{ \code{\link[caret]{train}}, \code{\link[caret]{trainControl}} } diff --git a/man/figures/logo.png b/man/figures/logo.png new file mode 100644 index 0000000..5925f3b Binary files /dev/null and b/man/figures/logo.png differ diff --git a/man/null-coalesce.Rd b/man/null-coalesce.Rd new file mode 100644 index 0000000..cdc8f29 --- /dev/null +++ b/man/null-coalesce.Rd @@ -0,0 +1,22 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/utils.R +\name{null-coalesce} +\alias{null-coalesce} +\alias{\%||\%} +\title{Null-coalesce operator} +\usage{ +x \%||\% y +} +\arguments{ +\item{x}{Value to test.} + +\item{y}{Default value if \code{x} is NULL.} +} +\value{ +\code{x} if not NULL, otherwise \code{y}. +} +\description{ +Returns \code{x} if it is not \code{NULL}, otherwise returns \code{y}. +Used internally by bHIVE R6 classes for parameter defaults. +} +\keyword{internal} diff --git a/man/refineB.Rd b/man/refineB.Rd index 0699bd4..470e32b 100644 --- a/man/refineB.Rd +++ b/man/refineB.Rd @@ -116,3 +116,19 @@ This function performs gradient-based updates on each antibody using the selecte Depending on the chosen \code{optimizer}, it may use plain SGD, momentum-based updates, Adagrad, Adam, or RMSProp. } +\examples{ +data(iris) +X <- as.matrix(iris[, 1:4]) +y <- iris$Species +res <- bHIVE(X, y, task = "classification", nAntibodies = 10, + maxIter = 5, verbose = FALSE) +assignments <- as.integer(factor(res$assignments, + levels = unique(res$assignments))) +A_refined <- refineB(res$antibodies, X, y = y, + assignments = assignments, + task = "classification", + loss = "mse", optimizer = "adam", + steps = 3, lr = 0.01, verbose = FALSE) +dim(A_refined) + +} diff --git a/src/Makevars b/src/Makevars new file mode 100644 index 0000000..e9d9765 --- /dev/null +++ b/src/Makevars @@ -0,0 +1 @@ +CXX_STD = CXX17 diff --git a/src/Makevars.win b/src/Makevars.win new file mode 100644 index 0000000..e9d9765 --- /dev/null +++ b/src/Makevars.win @@ -0,0 +1 @@ +CXX_STD = CXX17 diff --git a/src/RcppExports.cpp b/src/RcppExports.cpp new file mode 100644 index 0000000..b492c39 --- /dev/null +++ b/src/RcppExports.cpp @@ -0,0 +1,204 @@ +// Generated by using Rcpp::compileAttributes() -> do not edit by hand +// Generator token: 10BE3573-1514-4C36-9D1C-5A225CD40393 + +#include "bHIVE_types.h" +#include +#include + +using namespace Rcpp; + +#ifdef RCPP_USE_GLOBAL_ROSTREAM +Rcpp::Rostream& Rcpp::Rcout = Rcpp::Rcpp_cout_get(); +Rcpp::Rostream& Rcpp::Rcerr = Rcpp::Rcpp_cerr_get(); +#endif + +// compute_affinity_matrix +arma::mat compute_affinity_matrix(const arma::mat& X, const arma::mat& A, const std::string& affinity_type, double alpha, double c, double p); +RcppExport SEXP _bHIVE_compute_affinity_matrix(SEXP XSEXP, SEXP ASEXP, SEXP affinity_typeSEXP, SEXP alphaSEXP, SEXP cSEXP, SEXP pSEXP) { +BEGIN_RCPP + Rcpp::RObject rcpp_result_gen; + Rcpp::RNGScope rcpp_rngScope_gen; + Rcpp::traits::input_parameter< const arma::mat& >::type X(XSEXP); + Rcpp::traits::input_parameter< const arma::mat& >::type A(ASEXP); + Rcpp::traits::input_parameter< const std::string& >::type affinity_type(affinity_typeSEXP); + Rcpp::traits::input_parameter< double >::type alpha(alphaSEXP); + Rcpp::traits::input_parameter< double >::type c(cSEXP); + Rcpp::traits::input_parameter< double >::type p(pSEXP); + rcpp_result_gen = Rcpp::wrap(compute_affinity_matrix(X, A, affinity_type, alpha, c, p)); + return rcpp_result_gen; +END_RCPP +} +// compute_distance_matrix +arma::mat compute_distance_matrix(const arma::mat& X, const arma::mat& A, const std::string& dist_type, double p, const arma::mat& Sigma_inv); +RcppExport SEXP _bHIVE_compute_distance_matrix(SEXP XSEXP, SEXP ASEXP, SEXP dist_typeSEXP, SEXP pSEXP, SEXP Sigma_invSEXP) { +BEGIN_RCPP + Rcpp::RObject rcpp_result_gen; + Rcpp::RNGScope rcpp_rngScope_gen; + Rcpp::traits::input_parameter< const arma::mat& >::type X(XSEXP); + Rcpp::traits::input_parameter< const arma::mat& >::type A(ASEXP); + Rcpp::traits::input_parameter< const std::string& >::type dist_type(dist_typeSEXP); + Rcpp::traits::input_parameter< double >::type p(pSEXP); + Rcpp::traits::input_parameter< const arma::mat& >::type Sigma_inv(Sigma_invSEXP); + rcpp_result_gen = Rcpp::wrap(compute_distance_matrix(X, A, dist_type, p, Sigma_inv)); + return rcpp_result_gen; +END_RCPP +} +// compute_pairwise_distance +arma::mat compute_pairwise_distance(const arma::mat& A, const std::string& dist_type, double p, const arma::mat& Sigma_inv); +RcppExport SEXP _bHIVE_compute_pairwise_distance(SEXP ASEXP, SEXP dist_typeSEXP, SEXP pSEXP, SEXP Sigma_invSEXP) { +BEGIN_RCPP + Rcpp::RObject rcpp_result_gen; + Rcpp::RNGScope rcpp_rngScope_gen; + Rcpp::traits::input_parameter< const arma::mat& >::type A(ASEXP); + Rcpp::traits::input_parameter< const std::string& >::type dist_type(dist_typeSEXP); + Rcpp::traits::input_parameter< double >::type p(pSEXP); + Rcpp::traits::input_parameter< const arma::mat& >::type Sigma_inv(Sigma_invSEXP); + rcpp_result_gen = Rcpp::wrap(compute_pairwise_distance(A, dist_type, p, Sigma_inv)); + return rcpp_result_gen; +END_RCPP +} +// clonal_selection_iteration_cpp +Rcpp::List clonal_selection_iteration_cpp(arma::mat A, const arma::mat& X, const arma::vec& y_num, int task_int, int k, double beta, double maxClones, double mutationDecay, double mutationMin, int iter, const std::string& affinity_type, double alpha, double c_param, double p_param, int nClasses); +RcppExport SEXP _bHIVE_clonal_selection_iteration_cpp(SEXP ASEXP, SEXP XSEXP, SEXP y_numSEXP, SEXP task_intSEXP, SEXP kSEXP, SEXP betaSEXP, SEXP maxClonesSEXP, SEXP mutationDecaySEXP, SEXP mutationMinSEXP, SEXP iterSEXP, SEXP affinity_typeSEXP, SEXP alphaSEXP, SEXP c_paramSEXP, SEXP p_paramSEXP, SEXP nClassesSEXP) { +BEGIN_RCPP + Rcpp::RObject rcpp_result_gen; + Rcpp::RNGScope rcpp_rngScope_gen; + Rcpp::traits::input_parameter< arma::mat >::type A(ASEXP); + Rcpp::traits::input_parameter< const arma::mat& >::type X(XSEXP); + Rcpp::traits::input_parameter< const arma::vec& >::type y_num(y_numSEXP); + Rcpp::traits::input_parameter< int >::type task_int(task_intSEXP); + Rcpp::traits::input_parameter< int >::type k(kSEXP); + Rcpp::traits::input_parameter< double >::type beta(betaSEXP); + Rcpp::traits::input_parameter< double >::type maxClones(maxClonesSEXP); + Rcpp::traits::input_parameter< double >::type mutationDecay(mutationDecaySEXP); + Rcpp::traits::input_parameter< double >::type mutationMin(mutationMinSEXP); + Rcpp::traits::input_parameter< int >::type iter(iterSEXP); + Rcpp::traits::input_parameter< const std::string& >::type affinity_type(affinity_typeSEXP); + Rcpp::traits::input_parameter< double >::type alpha(alphaSEXP); + Rcpp::traits::input_parameter< double >::type c_param(c_paramSEXP); + Rcpp::traits::input_parameter< double >::type p_param(p_paramSEXP); + Rcpp::traits::input_parameter< int >::type nClasses(nClassesSEXP); + rcpp_result_gen = Rcpp::wrap(clonal_selection_iteration_cpp(A, X, y_num, task_int, k, beta, maxClones, mutationDecay, mutationMin, iter, affinity_type, alpha, c_param, p_param, nClasses)); + return rcpp_result_gen; +END_RCPP +} +// final_assignment_cpp +Rcpp::List final_assignment_cpp(const arma::mat& X, const arma::mat& A, const std::string& affinity_type, const std::string& dist_type, int task_int, double alpha, double c_param, double p_param, const arma::mat& Sigma_inv, const arma::vec& antibody_values, double overall_mean); +RcppExport SEXP _bHIVE_final_assignment_cpp(SEXP XSEXP, SEXP ASEXP, SEXP affinity_typeSEXP, SEXP dist_typeSEXP, SEXP task_intSEXP, SEXP alphaSEXP, SEXP c_paramSEXP, SEXP p_paramSEXP, SEXP Sigma_invSEXP, SEXP antibody_valuesSEXP, SEXP overall_meanSEXP) { +BEGIN_RCPP + Rcpp::RObject rcpp_result_gen; + Rcpp::RNGScope rcpp_rngScope_gen; + Rcpp::traits::input_parameter< const arma::mat& >::type X(XSEXP); + Rcpp::traits::input_parameter< const arma::mat& >::type A(ASEXP); + Rcpp::traits::input_parameter< const std::string& >::type affinity_type(affinity_typeSEXP); + Rcpp::traits::input_parameter< const std::string& >::type dist_type(dist_typeSEXP); + Rcpp::traits::input_parameter< int >::type task_int(task_intSEXP); + Rcpp::traits::input_parameter< double >::type alpha(alphaSEXP); + Rcpp::traits::input_parameter< double >::type c_param(c_paramSEXP); + Rcpp::traits::input_parameter< double >::type p_param(p_paramSEXP); + Rcpp::traits::input_parameter< const arma::mat& >::type Sigma_inv(Sigma_invSEXP); + Rcpp::traits::input_parameter< const arma::vec& >::type antibody_values(antibody_valuesSEXP); + Rcpp::traits::input_parameter< double >::type overall_mean(overall_meanSEXP); + rcpp_result_gen = Rcpp::wrap(final_assignment_cpp(X, A, affinity_type, dist_type, task_int, alpha, c_param, p_param, Sigma_inv, antibody_values, overall_mean)); + return rcpp_result_gen; +END_RCPP +} +// init_kmeanspp_cpp +arma::mat init_kmeanspp_cpp(const arma::mat& X, int nCenters); +RcppExport SEXP _bHIVE_init_kmeanspp_cpp(SEXP XSEXP, SEXP nCentersSEXP) { +BEGIN_RCPP + Rcpp::RObject rcpp_result_gen; + Rcpp::RNGScope rcpp_rngScope_gen; + Rcpp::traits::input_parameter< const arma::mat& >::type X(XSEXP); + Rcpp::traits::input_parameter< int >::type nCenters(nCentersSEXP); + rcpp_result_gen = Rcpp::wrap(init_kmeanspp_cpp(X, nCenters)); + return rcpp_result_gen; +END_RCPP +} +// idiotypic_dynamics_cpp +Rcpp::List idiotypic_dynamics_cpp(const arma::mat& A, const std::string& affinity_type, double aff_alpha, double aff_c, double aff_p, double theta_low, double theta_high, double source_rate, double decay_rate, double dt, int timeSteps, double survival_threshold); +RcppExport SEXP _bHIVE_idiotypic_dynamics_cpp(SEXP ASEXP, SEXP affinity_typeSEXP, SEXP aff_alphaSEXP, SEXP aff_cSEXP, SEXP aff_pSEXP, SEXP theta_lowSEXP, SEXP theta_highSEXP, SEXP source_rateSEXP, SEXP decay_rateSEXP, SEXP dtSEXP, SEXP timeStepsSEXP, SEXP survival_thresholdSEXP) { +BEGIN_RCPP + Rcpp::RObject rcpp_result_gen; + Rcpp::RNGScope rcpp_rngScope_gen; + Rcpp::traits::input_parameter< const arma::mat& >::type A(ASEXP); + Rcpp::traits::input_parameter< const std::string& >::type affinity_type(affinity_typeSEXP); + Rcpp::traits::input_parameter< double >::type aff_alpha(aff_alphaSEXP); + Rcpp::traits::input_parameter< double >::type aff_c(aff_cSEXP); + Rcpp::traits::input_parameter< double >::type aff_p(aff_pSEXP); + Rcpp::traits::input_parameter< double >::type theta_low(theta_lowSEXP); + Rcpp::traits::input_parameter< double >::type theta_high(theta_highSEXP); + Rcpp::traits::input_parameter< double >::type source_rate(source_rateSEXP); + Rcpp::traits::input_parameter< double >::type decay_rate(decay_rateSEXP); + Rcpp::traits::input_parameter< double >::type dt(dtSEXP); + Rcpp::traits::input_parameter< int >::type timeSteps(timeStepsSEXP); + Rcpp::traits::input_parameter< double >::type survival_threshold(survival_thresholdSEXP); + rcpp_result_gen = Rcpp::wrap(idiotypic_dynamics_cpp(A, affinity_type, aff_alpha, aff_c, aff_p, theta_low, theta_high, source_rate, decay_rate, dt, timeSteps, survival_threshold)); + return rcpp_result_gen; +END_RCPP +} +// network_suppression_cpp +Rcpp::LogicalVector network_suppression_cpp(const arma::mat& A, const std::string& dist_type, double epsilon, double p, const arma::mat& Sigma_inv); +RcppExport SEXP _bHIVE_network_suppression_cpp(SEXP ASEXP, SEXP dist_typeSEXP, SEXP epsilonSEXP, SEXP pSEXP, SEXP Sigma_invSEXP) { +BEGIN_RCPP + Rcpp::RObject rcpp_result_gen; + Rcpp::RNGScope rcpp_rngScope_gen; + Rcpp::traits::input_parameter< const arma::mat& >::type A(ASEXP); + Rcpp::traits::input_parameter< const std::string& >::type dist_type(dist_typeSEXP); + Rcpp::traits::input_parameter< double >::type epsilon(epsilonSEXP); + Rcpp::traits::input_parameter< double >::type p(pSEXP); + Rcpp::traits::input_parameter< const arma::mat& >::type Sigma_inv(Sigma_invSEXP); + rcpp_result_gen = Rcpp::wrap(network_suppression_cpp(A, dist_type, epsilon, p, Sigma_inv)); + return rcpp_result_gen; +END_RCPP +} +// shm_mutate_cpp +Rcpp::List shm_mutate_cpp(const arma::mat& A, const arma::mat& X, const arma::vec& affinities, const arma::uvec& top_k_indices, const arma::uvec& data_indices, const std::string& method, int iter, double decay, double mutationMin, double c_rate, double temperature, double E_0, double base_rate, double beta1, double beta2, double adam_epsilon, arma::mat m1_state, arma::mat m2_state, const std::string& affinity_type, double aff_alpha, double aff_c, double aff_p); +RcppExport SEXP _bHIVE_shm_mutate_cpp(SEXP ASEXP, SEXP XSEXP, SEXP affinitiesSEXP, SEXP top_k_indicesSEXP, SEXP data_indicesSEXP, SEXP methodSEXP, SEXP iterSEXP, SEXP decaySEXP, SEXP mutationMinSEXP, SEXP c_rateSEXP, SEXP temperatureSEXP, SEXP E_0SEXP, SEXP base_rateSEXP, SEXP beta1SEXP, SEXP beta2SEXP, SEXP adam_epsilonSEXP, SEXP m1_stateSEXP, SEXP m2_stateSEXP, SEXP affinity_typeSEXP, SEXP aff_alphaSEXP, SEXP aff_cSEXP, SEXP aff_pSEXP) { +BEGIN_RCPP + Rcpp::RObject rcpp_result_gen; + Rcpp::RNGScope rcpp_rngScope_gen; + Rcpp::traits::input_parameter< const arma::mat& >::type A(ASEXP); + Rcpp::traits::input_parameter< const arma::mat& >::type X(XSEXP); + Rcpp::traits::input_parameter< const arma::vec& >::type affinities(affinitiesSEXP); + Rcpp::traits::input_parameter< const arma::uvec& >::type top_k_indices(top_k_indicesSEXP); + Rcpp::traits::input_parameter< const arma::uvec& >::type data_indices(data_indicesSEXP); + Rcpp::traits::input_parameter< const std::string& >::type method(methodSEXP); + Rcpp::traits::input_parameter< int >::type iter(iterSEXP); + Rcpp::traits::input_parameter< double >::type decay(decaySEXP); + Rcpp::traits::input_parameter< double >::type mutationMin(mutationMinSEXP); + Rcpp::traits::input_parameter< double >::type c_rate(c_rateSEXP); + Rcpp::traits::input_parameter< double >::type temperature(temperatureSEXP); + Rcpp::traits::input_parameter< double >::type E_0(E_0SEXP); + Rcpp::traits::input_parameter< double >::type base_rate(base_rateSEXP); + Rcpp::traits::input_parameter< double >::type beta1(beta1SEXP); + Rcpp::traits::input_parameter< double >::type beta2(beta2SEXP); + Rcpp::traits::input_parameter< double >::type adam_epsilon(adam_epsilonSEXP); + Rcpp::traits::input_parameter< arma::mat >::type m1_state(m1_stateSEXP); + Rcpp::traits::input_parameter< arma::mat >::type m2_state(m2_stateSEXP); + Rcpp::traits::input_parameter< const std::string& >::type affinity_type(affinity_typeSEXP); + Rcpp::traits::input_parameter< double >::type aff_alpha(aff_alphaSEXP); + Rcpp::traits::input_parameter< double >::type aff_c(aff_cSEXP); + Rcpp::traits::input_parameter< double >::type aff_p(aff_pSEXP); + rcpp_result_gen = Rcpp::wrap(shm_mutate_cpp(A, X, affinities, top_k_indices, data_indices, method, iter, decay, mutationMin, c_rate, temperature, E_0, base_rate, beta1, beta2, adam_epsilon, m1_state, m2_state, affinity_type, aff_alpha, aff_c, aff_p)); + return rcpp_result_gen; +END_RCPP +} + +static const R_CallMethodDef CallEntries[] = { + {"_bHIVE_compute_affinity_matrix", (DL_FUNC) &_bHIVE_compute_affinity_matrix, 6}, + {"_bHIVE_compute_distance_matrix", (DL_FUNC) &_bHIVE_compute_distance_matrix, 5}, + {"_bHIVE_compute_pairwise_distance", (DL_FUNC) &_bHIVE_compute_pairwise_distance, 4}, + {"_bHIVE_clonal_selection_iteration_cpp", (DL_FUNC) &_bHIVE_clonal_selection_iteration_cpp, 15}, + {"_bHIVE_final_assignment_cpp", (DL_FUNC) &_bHIVE_final_assignment_cpp, 11}, + {"_bHIVE_init_kmeanspp_cpp", (DL_FUNC) &_bHIVE_init_kmeanspp_cpp, 2}, + {"_bHIVE_idiotypic_dynamics_cpp", (DL_FUNC) &_bHIVE_idiotypic_dynamics_cpp, 12}, + {"_bHIVE_network_suppression_cpp", (DL_FUNC) &_bHIVE_network_suppression_cpp, 5}, + {"_bHIVE_shm_mutate_cpp", (DL_FUNC) &_bHIVE_shm_mutate_cpp, 22}, + {NULL, NULL, 0} +}; + +RcppExport void R_init_bHIVE(DllInfo *dll) { + R_registerRoutines(dll, NULL, CallEntries, NULL, NULL); + R_useDynamicSymbols(dll, FALSE); +} diff --git a/src/RcppExports.o b/src/RcppExports.o new file mode 100644 index 0000000..6a090f2 Binary files /dev/null and b/src/RcppExports.o differ diff --git a/src/affinity_distance.cpp b/src/affinity_distance.cpp new file mode 100644 index 0000000..b2b8f84 --- /dev/null +++ b/src/affinity_distance.cpp @@ -0,0 +1,314 @@ +// [[Rcpp::depends(RcppArmadillo)]] +#include "affinity_distance.h" + +// ============================================================ +// AFFINITY FUNCTIONS (bulk matrix computation) +// ============================================================ + +// Gaussian/RBF: exp(-alpha * ||x-y||^2) +// Uses BLAS trick: ||x-y||^2 = ||x||^2 + ||y||^2 - 2*x.y +static arma::mat affinity_gaussian(const arma::mat& X, const arma::mat& A, + double alpha) { + arma::uword n = X.n_rows; + arma::uword m = A.n_rows; + + // Squared norms + arma::vec x_sq = arma::sum(X % X, 1); // n x 1 + arma::vec a_sq = arma::sum(A % A, 1); // m x 1 + + // Squared distance matrix: n x m + // D_sq(i,j) = ||x_i||^2 + ||a_j||^2 - 2 * x_i . a_j + arma::mat D_sq = arma::repmat(x_sq, 1, m) + + arma::repmat(a_sq.t(), n, 1) - + 2.0 * X * A.t(); + + // Clamp to avoid numerical issues with negative values + D_sq.clamp(0.0, arma::datum::inf); + + return arma::exp(-alpha * D_sq); +} + +// Laplace: exp(-alpha * ||x-y||_1) +static arma::mat affinity_laplace(const arma::mat& X, const arma::mat& A, + double alpha) { + arma::uword n = X.n_rows; + arma::uword m = A.n_rows; + arma::mat result(n, m); + + for (arma::uword j = 0; j < m; ++j) { + arma::rowvec a_j = A.row(j); + for (arma::uword i = 0; i < n; ++i) { + double l1 = arma::accu(arma::abs(X.row(i) - a_j)); + result(i, j) = std::exp(-alpha * l1); + } + } + return result; +} + +// Polynomial: (x.y + c)^p +static arma::mat affinity_polynomial(const arma::mat& X, const arma::mat& A, + double c, double p) { + arma::mat dot = X * A.t(); // n x m via BLAS + return arma::pow(dot + c, p); +} + +// Cosine similarity: (x.y) / (||x|| * ||y||) +static arma::mat affinity_cosine(const arma::mat& X, const arma::mat& A) { + arma::vec x_norm = arma::sqrt(arma::sum(X % X, 1)); // n x 1 + arma::vec a_norm = arma::sqrt(arma::sum(A % A, 1)); // m x 1 + + arma::mat dot = X * A.t(); // n x m via BLAS + + // Denominator matrix + arma::mat denom = x_norm * a_norm.t(); + + // Avoid division by zero (1e-12 is safer than 1e-15 across platforms) + arma::mat result = dot / (denom + 1e-12); + result.clamp(-1.0, 1.0); + + return result; +} + +// Hamming similarity: proportion of matching elements +static arma::mat affinity_hamming(const arma::mat& X, const arma::mat& A) { + const arma::uword n = X.n_rows; + const arma::uword m = A.n_rows; + const arma::uword d = X.n_cols; + const double inv_d = 1.0 / static_cast(d); + arma::mat result(n, m); + + // Pre-convert both matrices to integer once (avoids per-pair conversion) + const arma::imat X_int = arma::conv_to::from(X); + const arma::imat A_int = arma::conv_to::from(A); + + for (arma::uword j = 0; j < m; ++j) { + const arma::irowvec a_int = A_int.row(j); + for (arma::uword i = 0; i < n; ++i) { + result(i, j) = static_cast(arma::accu(X_int.row(i) == a_int)) * inv_d; + } + } + return result; +} + +// Dispatcher (bulk matrix) +arma::mat affinity_matrix_cpp(const arma::mat& X, const arma::mat& A, + AffinityType type, double alpha, double c, + double p) { + switch (type) { + case AffinityType::GAUSSIAN: + return affinity_gaussian(X, A, alpha); + case AffinityType::LAPLACE: + return affinity_laplace(X, A, alpha); + case AffinityType::POLYNOMIAL: + return affinity_polynomial(X, A, c, p); + case AffinityType::COSINE: + return affinity_cosine(X, A); + case AffinityType::HAMMING: + return affinity_hamming(X, A); + default: + Rcpp::stop("Unknown affinity type"); + return arma::mat(); // unreachable + } +} + + +// ============================================================ +// SCALAR AFFINITY (single point-to-point, no matrix allocation) +// Critical for hot-path clone/mutate evaluations +// ============================================================ + +double affinity_scalar_cpp(const arma::rowvec& x, const arma::rowvec& a, + AffinityType type, double alpha, double c, + double p) { + switch (type) { + case AffinityType::GAUSSIAN: { + const double dist2 = arma::accu(arma::square(x - a)); + return std::exp(-alpha * dist2); + } + case AffinityType::LAPLACE: { + const double l1 = arma::accu(arma::abs(x - a)); + return std::exp(-alpha * l1); + } + case AffinityType::POLYNOMIAL: { + return std::pow(arma::dot(x, a) + c, p); + } + case AffinityType::COSINE: { + const double denom = arma::norm(x) * arma::norm(a); + if (denom < 1e-12) return 0.0; + double cs = arma::dot(x, a) / denom; + return std::max(-1.0, std::min(1.0, cs)); + } + case AffinityType::HAMMING: { + const arma::uword d = x.n_elem; + const arma::irowvec xi = arma::conv_to::from(x); + const arma::irowvec ai = arma::conv_to::from(a); + return static_cast(arma::accu(xi == ai)) / static_cast(d); + } + default: + return 0.0; + } +} + + +// ============================================================ +// DISTANCE FUNCTIONS (bulk matrix computation) +// ============================================================ + +// Euclidean distance: sqrt(||x-y||^2) +static arma::mat distance_euclidean(const arma::mat& X, const arma::mat& A) { + arma::vec x_sq = arma::sum(X % X, 1); + arma::vec a_sq = arma::sum(A % A, 1); + arma::uword n = X.n_rows; + arma::uword m = A.n_rows; + + arma::mat D_sq = arma::repmat(x_sq, 1, m) + + arma::repmat(a_sq.t(), n, 1) - + 2.0 * X * A.t(); + D_sq.clamp(0.0, arma::datum::inf); + return arma::sqrt(D_sq); +} + +// Manhattan distance: ||x-y||_1 +static arma::mat distance_manhattan(const arma::mat& X, const arma::mat& A) { + arma::uword n = X.n_rows; + arma::uword m = A.n_rows; + arma::mat result(n, m); + + for (arma::uword j = 0; j < m; ++j) { + arma::rowvec a_j = A.row(j); + for (arma::uword i = 0; i < n; ++i) { + result(i, j) = arma::accu(arma::abs(X.row(i) - a_j)); + } + } + return result; +} + +// Minkowski distance: (sum |x-y|^p)^(1/p) +static arma::mat distance_minkowski(const arma::mat& X, const arma::mat& A, + double p) { + // For p=2, delegate to Euclidean (BLAS-optimized) + if (std::abs(p - 2.0) < 1e-10) return distance_euclidean(X, A); + + const arma::uword n = X.n_rows; + const arma::uword m = A.n_rows; + arma::mat result(n, m); + const double inv_p = 1.0 / p; + + for (arma::uword j = 0; j < m; ++j) { + const arma::rowvec a_j = A.row(j); + for (arma::uword i = 0; i < n; ++i) { + // Clamp to >= 0 before pow to prevent NaN from numerical error + double val = arma::accu(arma::pow(arma::abs(X.row(i) - a_j), p)); + result(i, j) = std::pow(std::max(0.0, val), inv_p); + } + } + return result; +} + +// Cosine distance: 1 - cosine_similarity +static arma::mat distance_cosine(const arma::mat& X, const arma::mat& A) { + return 1.0 - affinity_cosine(X, A); +} + +// Mahalanobis distance: sqrt((x-y)^T Sigma^{-1} (x-y)) +// Sigma_inv is pre-computed inverse covariance matrix +static arma::mat distance_mahalanobis(const arma::mat& X, const arma::mat& A, + const arma::mat& Sigma_inv) { + arma::uword n = X.n_rows; + arma::uword m = A.n_rows; + arma::mat result(n, m); + + for (arma::uword j = 0; j < m; ++j) { + arma::rowvec a_j = A.row(j); + for (arma::uword i = 0; i < n; ++i) { + arma::rowvec diff = X.row(i) - a_j; + arma::rowvec tmp = diff * Sigma_inv; + result(i, j) = std::sqrt(arma::accu(tmp % diff)); + } + } + return result; +} + +// Hamming distance: count of mismatches +static arma::mat distance_hamming(const arma::mat& X, const arma::mat& A) { + const arma::uword n = X.n_rows; + const arma::uword m = A.n_rows; + arma::mat result(n, m); + + // Pre-convert both matrices to integer once + const arma::imat X_int = arma::conv_to::from(X); + const arma::imat A_int = arma::conv_to::from(A); + + for (arma::uword j = 0; j < m; ++j) { + const arma::irowvec a_int = A_int.row(j); + for (arma::uword i = 0; i < n; ++i) { + result(i, j) = static_cast(arma::accu(X_int.row(i) != a_int)); + } + } + return result; +} + +// Dispatcher +arma::mat distance_matrix_cpp(const arma::mat& X, const arma::mat& A, + DistanceType type, double p, + const arma::mat& Sigma_inv) { + switch (type) { + case DistanceType::EUCLIDEAN: + return distance_euclidean(X, A); + case DistanceType::MANHATTAN: + return distance_manhattan(X, A); + case DistanceType::MINKOWSKI: + return distance_minkowski(X, A, p); + case DistanceType::COSINE: + return distance_cosine(X, A); + case DistanceType::MAHALANOBIS: + return distance_mahalanobis(X, A, Sigma_inv); + case DistanceType::HAMMING: + return distance_hamming(X, A); + default: + Rcpp::stop("Unknown distance type"); + return arma::mat(); // unreachable + } +} + + +// ============================================================ +// PAIRWISE DISTANCE (antibody-antibody, m x m) +// ============================================================ + +arma::mat pairwise_distance_cpp(const arma::mat& A, DistanceType type, + double p, const arma::mat& Sigma_inv) { + return distance_matrix_cpp(A, A, type, p, Sigma_inv); +} + + +// ============================================================ +// R-EXPOSED FUNCTIONS +// ============================================================ + +// [[Rcpp::export]] +arma::mat compute_affinity_matrix(const arma::mat& X, const arma::mat& A, + const std::string& affinity_type, + double alpha = 1.0, double c = 1.0, + double p = 2.0) { + AffinityType type = parse_affinity_type(affinity_type); + return affinity_matrix_cpp(X, A, type, alpha, c, p); +} + +// [[Rcpp::export]] +arma::mat compute_distance_matrix(const arma::mat& X, const arma::mat& A, + const std::string& dist_type, + double p, + const arma::mat& Sigma_inv) { + DistanceType type = parse_distance_type(dist_type); + return distance_matrix_cpp(X, A, type, p, Sigma_inv); +} + +// [[Rcpp::export]] +arma::mat compute_pairwise_distance(const arma::mat& A, + const std::string& dist_type, + double p, + const arma::mat& Sigma_inv) { + DistanceType type = parse_distance_type(dist_type); + return pairwise_distance_cpp(A, type, p, Sigma_inv); +} diff --git a/src/affinity_distance.h b/src/affinity_distance.h new file mode 100644 index 0000000..50c903f --- /dev/null +++ b/src/affinity_distance.h @@ -0,0 +1,41 @@ +#ifndef BHIVE_AFFINITY_DISTANCE_H +#define BHIVE_AFFINITY_DISTANCE_H + +#include "bHIVE_types.h" + +// ============================================================ +// Bulk affinity matrix: returns n x m matrix of affinities +// X is n x d (data), A is m x d (antibodies) +// ============================================================ + +arma::mat affinity_matrix_cpp(const arma::mat& X, const arma::mat& A, + AffinityType type, double alpha, double c, + double p); + +// ============================================================ +// Scalar affinity: returns single affinity value between two vectors +// Critical for hot-path single-point evaluations in clone/mutate +// ============================================================ + +double affinity_scalar_cpp(const arma::rowvec& x, const arma::rowvec& a, + AffinityType type, double alpha, double c, + double p); + +// ============================================================ +// Bulk distance matrix: returns n x m matrix of distances +// X is n x d (data), A is m x d (antibodies) +// ============================================================ + +arma::mat distance_matrix_cpp(const arma::mat& X, const arma::mat& A, + DistanceType type, double p, + const arma::mat& Sigma_inv); + +// ============================================================ +// Pairwise distance matrix: returns m x m matrix (antibody-antibody) +// Used for network suppression and idiotypic interactions +// ============================================================ + +arma::mat pairwise_distance_cpp(const arma::mat& A, DistanceType type, + double p, const arma::mat& Sigma_inv); + +#endif // BHIVE_AFFINITY_DISTANCE_H diff --git a/src/affinity_distance.o b/src/affinity_distance.o new file mode 100644 index 0000000..bda1401 Binary files /dev/null and b/src/affinity_distance.o differ diff --git a/src/bHIVE.so b/src/bHIVE.so new file mode 100755 index 0000000..eeed963 Binary files /dev/null and b/src/bHIVE.so differ diff --git a/src/bHIVE_types.h b/src/bHIVE_types.h new file mode 100644 index 0000000..0bffb6c --- /dev/null +++ b/src/bHIVE_types.h @@ -0,0 +1,59 @@ +#ifndef BHIVE_TYPES_H +#define BHIVE_TYPES_H + +#include + +enum class AffinityType { + GAUSSIAN = 0, + LAPLACE = 1, + POLYNOMIAL = 2, + COSINE = 3, + HAMMING = 4 +}; + +enum class DistanceType { + EUCLIDEAN = 0, + MANHATTAN = 1, + MINKOWSKI = 2, + COSINE = 3, + MAHALANOBIS = 4, + HAMMING = 5 +}; + +enum class TaskType { + CLUSTERING = 0, + CLASSIFICATION = 1, + REGRESSION = 2 +}; + +// Convert R string to enum +inline AffinityType parse_affinity_type(const std::string& s) { + if (s == "gaussian") return AffinityType::GAUSSIAN; + if (s == "laplace") return AffinityType::LAPLACE; + if (s == "polynomial") return AffinityType::POLYNOMIAL; + if (s == "cosine") return AffinityType::COSINE; + if (s == "hamming") return AffinityType::HAMMING; + Rcpp::stop("Invalid affinity type: " + s); + return AffinityType::GAUSSIAN; // unreachable +} + +inline DistanceType parse_distance_type(const std::string& s) { + if (s == "euclidean") return DistanceType::EUCLIDEAN; + if (s == "manhattan") return DistanceType::MANHATTAN; + if (s == "minkowski") return DistanceType::MINKOWSKI; + if (s == "cosine") return DistanceType::COSINE; + if (s == "mahalanobis") return DistanceType::MAHALANOBIS; + if (s == "hamming") return DistanceType::HAMMING; + Rcpp::stop("Invalid distance type: " + s); + return DistanceType::EUCLIDEAN; // unreachable +} + +inline TaskType parse_task_type(const std::string& s) { + if (s == "clustering") return TaskType::CLUSTERING; + if (s == "classification") return TaskType::CLASSIFICATION; + if (s == "regression") return TaskType::REGRESSION; + Rcpp::stop("Invalid task type: " + s); + return TaskType::CLUSTERING; // unreachable +} + +#endif // BHIVE_TYPES_H diff --git a/src/clonal_selection.cpp b/src/clonal_selection.cpp new file mode 100644 index 0000000..a617b5c --- /dev/null +++ b/src/clonal_selection.cpp @@ -0,0 +1,260 @@ +// [[Rcpp::depends(RcppArmadillo)]] +#include "affinity_distance.h" + +// Core clonal selection + mutation loop for one iteration +// +// For each data point: +// 1. Compute affinity to all antibodies (uses pre-computed affinity matrix) +// 2. Identify top-k antibodies +// 3. Clone and mutate top-k, keeping improvements +// 4. Accumulate task-specific statistics +// +// Returns a list with: +// - A: updated antibody matrix +// - class_counts: (classification) weighted vote matrix +// - sum_y: (regression) weighted sum of y values +// - sum_aff: (regression) sum of affinities +// +// [[Rcpp::export]] +Rcpp::List clonal_selection_iteration_cpp( + arma::mat A, // passed by value (modified in place) + const arma::mat& X, + const arma::vec& y_num, // numeric y (regression) or 0-indexed class int + int task_int, // 0=clustering, 1=classification, 2=regression + int k, + double beta, + double maxClones, + double mutationDecay, + double mutationMin, + int iter, + const std::string& affinity_type, + double alpha, + double c_param, + double p_param, + int nClasses) { + + const arma::uword n = X.n_rows; + const arma::uword m = A.n_rows; + const arma::uword d = X.n_cols; + const TaskType task = static_cast(task_int); + const AffinityType aff_type = parse_affinity_type(affinity_type); + + // Task-specific accumulators + arma::mat class_counts; + arma::vec sum_y, sum_aff; + + if (task == TaskType::CLASSIFICATION) { + class_counts = arma::zeros(m, nClasses); + } else if (task == TaskType::REGRESSION) { + sum_y = arma::zeros(m); + sum_aff = arma::zeros(m); + } + + // Compute full affinity matrix (n x m) -- the big BLAS win + arma::mat aff_matrix = affinity_matrix_cpp(X, A, aff_type, alpha, c_param, p_param); + + // Pre-compute decay factor for this iteration + const double decay_factor = std::pow(mutationDecay, iter - 1); + + // Process each data point + for (arma::uword i = 0; i < n; ++i) { + const arma::rowvec aff_values = aff_matrix.row(i); + + const double max_aff = aff_values.max(); + if (max_aff <= 0.0 || !std::isfinite(max_aff)) continue; + + // Top-k indices (descending by affinity) + const arma::uword k2 = std::min(static_cast(k), m); + const arma::uvec sorted_idx = arma::sort_index(aff_values, "descend"); + const arma::uvec top_idx = sorted_idx.head(k2); + + // Task-specific accumulation + if (task == TaskType::CLASSIFICATION) { + const int class_col = static_cast(y_num(i)); + for (arma::uword ki = 0; ki < k2; ++ki) { + const arma::uword jj = top_idx(ki); + class_counts(jj, class_col) += aff_values(jj); + } + } else if (task == TaskType::REGRESSION) { + const double y_val = y_num(i); + for (arma::uword ki = 0; ki < k2; ++ki) { + const arma::uword jj = top_idx(ki); + sum_y(jj) += y_val * aff_values(jj); + sum_aff(jj) += aff_values(jj); + } + } + + // Clone and mutate + const arma::rowvec x_i = X.row(i); + for (arma::uword ki = 0; ki < k2; ++ki) { + const arma::uword jj = top_idx(ki); + const double f_j = aff_values(jj); + const int nClonesInt = std::min( + static_cast(maxClones), + static_cast(std::floor(beta * (f_j / max_aff))) + ); + if (nClonesInt <= 0) continue; + + for (int clone_id = 0; clone_id < nClonesInt; ++clone_id) { + // Decayed mutation rate + const double mutation_rate = std::max( + (1.0 - f_j) * decay_factor, + mutationMin + ); + + // Generate mutated antibody + arma::rowvec noise(d); + for (arma::uword dd = 0; dd < d; ++dd) { + noise(dd) = R::rnorm(0.0, mutation_rate); + } + const arma::rowvec mutated = A.row(jj) + noise; + + // Use scalar affinity (no matrix allocation in hot path) + const double f_mutated = affinity_scalar_cpp( + x_i, mutated, aff_type, alpha, c_param, p_param + ); + + if (std::isfinite(f_mutated) && f_mutated > f_j) { + A.row(jj) = mutated; + // Update the affinity column for this antibody going forward + // Use bulk computation since it updates for all n data points + arma::mat mutated_mat(1, d); + mutated_mat.row(0) = mutated; + aff_matrix.col(jj) = affinity_matrix_cpp(X, mutated_mat, aff_type, + alpha, c_param, p_param).col(0); + } + } + } + } + + return Rcpp::List::create( + Rcpp::Named("A") = A, + Rcpp::Named("class_counts") = class_counts, + Rcpp::Named("sum_y") = sum_y, + Rcpp::Named("sum_aff") = sum_aff + ); +} + + +// Final assignment computation +// For clustering/regression: assign each point to nearest antibody by distance +// For classification: assign each point to antibody with highest affinity +// [[Rcpp::export]] +Rcpp::List final_assignment_cpp( + const arma::mat& X, + const arma::mat& A, + const std::string& affinity_type, + const std::string& dist_type, + int task_int, + double alpha, + double c_param, + double p_param, + const arma::mat& Sigma_inv, + const arma::vec& antibody_values, + double overall_mean) { + + const arma::uword n = X.n_rows; + const TaskType task = static_cast(task_int); + + if (task == TaskType::CLUSTERING) { + const DistanceType dtype = parse_distance_type(dist_type); + const arma::mat D = distance_matrix_cpp(X, A, dtype, p_param, Sigma_inv); + arma::uvec assignments(n); + for (arma::uword i = 0; i < n; ++i) { + assignments(i) = D.row(i).index_min(); + } + const arma::ivec result = arma::conv_to::from(assignments) + 1; + return Rcpp::List::create(Rcpp::Named("assignments") = result); + + } else if (task == TaskType::CLASSIFICATION) { + const AffinityType atype = parse_affinity_type(affinity_type); + const arma::mat aff = affinity_matrix_cpp(X, A, atype, alpha, c_param, p_param); + arma::uvec best_idx(n); + for (arma::uword i = 0; i < n; ++i) { + best_idx(i) = aff.row(i).index_max(); + } + const arma::ivec result = arma::conv_to::from(best_idx) + 1; + return Rcpp::List::create(Rcpp::Named("best_antibody_idx") = result); + + } else { + // Regression: weighted average + nearest cluster + const AffinityType atype = parse_affinity_type(affinity_type); + const arma::mat aff = affinity_matrix_cpp(X, A, atype, alpha, c_param, p_param); + arma::vec predictions(n); + + for (arma::uword i = 0; i < n; ++i) { + const arma::rowvec aff_row = aff.row(i); + const double s_aff = arma::accu(aff_row); + if (s_aff <= 0.0) { + predictions(i) = overall_mean; + } else { + predictions(i) = arma::accu(aff_row.t() % antibody_values) / s_aff; + } + } + + const DistanceType dtype = parse_distance_type(dist_type); + const arma::mat D = distance_matrix_cpp(X, A, dtype, p_param, Sigma_inv); + arma::uvec cluster_assign(n); + for (arma::uword i = 0; i < n; ++i) { + cluster_assign(i) = D.row(i).index_min(); + } + const arma::ivec cluster_result = arma::conv_to::from(cluster_assign) + 1; + + return Rcpp::List::create( + Rcpp::Named("predictions") = predictions, + Rcpp::Named("cluster_assign") = cluster_result + ); + } +} + + +// kmeans++ initialization in C++ +// [[Rcpp::export]] +arma::mat init_kmeanspp_cpp(const arma::mat& X, int nCenters) { + const arma::uword n = X.n_rows; + const arma::uword d = X.n_cols; + arma::mat centers(nCenters, d); + + // Choose first center uniformly at random (safe int sampling) + arma::uword idx = static_cast(std::floor(R::runif(0.0, static_cast(n)))); + if (idx >= n) idx = n - 1; // guard against runif returning exactly n + centers.row(0) = X.row(idx); + + if (nCenters > 1) { + arma::vec dists(n); + + for (int cId = 1; cId < nCenters; ++cId) { + // Compute min squared distance to any chosen center + const arma::mat chosen = centers.rows(0, cId - 1); + arma::mat D = distance_matrix_cpp(X, chosen, DistanceType::EUCLIDEAN, + 2.0, arma::mat()); + D = D % D; // square distances + + // Min distance to any center for each point + dists = arma::min(D, 1); + + // Sample proportional to squared distance + const double total = arma::accu(dists); + if (total <= 0.0) { + idx = static_cast(std::floor(R::runif(0.0, static_cast(n)))); + if (idx >= n) idx = n - 1; + } else { + const arma::vec probs = dists / total; + const double r = R::runif(0.0, 1.0); + double cumsum = 0.0; + idx = 0; + for (arma::uword i = 0; i < n; ++i) { + cumsum += probs(i); + if (r <= cumsum) { + idx = i; + break; + } + idx = i; // fallback: last element if rounding prevents reaching 1.0 + } + } + centers.row(cId) = X.row(idx); + } + } + + return centers; +} diff --git a/src/clonal_selection.o b/src/clonal_selection.o new file mode 100644 index 0000000..56f61c6 Binary files /dev/null and b/src/clonal_selection.o differ diff --git a/src/idiotypic.cpp b/src/idiotypic.cpp new file mode 100644 index 0000000..2f7cd77 --- /dev/null +++ b/src/idiotypic.cpp @@ -0,0 +1,102 @@ +// [[Rcpp::depends(RcppArmadillo)]] +#include "affinity_distance.h" +#include + +// Idiotypic Network Dynamics -- C++ backend +// Implements Varela & Coutinho's second-generation immune network model +// with bell-shaped (double-threshold) activation function + +// Bell-shaped activation: too little = death, moderate = activation, too much = suppression +// Returns: -1 (suppressed), 0 (neutral), +1 (activated) +static double bell_activation(double stimulation, double theta_low, double theta_high) { + if (stimulation < theta_low) return -1.0; // insufficient stimulation -> death + if (stimulation > theta_high) return -1.0; // over-stimulation -> suppression + // In the activation window: scale linearly from 0 to 1 at peak, back to 0 + const double mid = (theta_low + theta_high) / 2.0; + const double half_width = (theta_high - theta_low) / 2.0; + if (half_width <= 0.0) return 0.0; // degenerate case: theta_low == theta_high + return 1.0 - std::abs(stimulation - mid) / half_width; +} + + +// Simulate idiotypic network dynamics +// Population dynamics: dA_i/dt = source - decay*A_i + A_i * sum(activation) - A_i * sum(suppression) +// +// Returns: +// - population: final population levels per antibody +// - keep: logical vector of which antibodies survive +// - Ab_Ab_affinity: the antibody-antibody affinity matrix +// +// [[Rcpp::export]] +Rcpp::List idiotypic_dynamics_cpp(const arma::mat& A, + const std::string& affinity_type, + double aff_alpha, + double aff_c, + double aff_p, + double theta_low, + double theta_high, + double source_rate, + double decay_rate, + double dt, + int timeSteps, + double survival_threshold) { + arma::uword m = A.n_rows; + + // Compute Ab-Ab affinity matrix + AffinityType at = parse_affinity_type(affinity_type); + arma::mat Ab_Ab = affinity_matrix_cpp(A, A, at, aff_alpha, aff_c, aff_p); + + // Zero out diagonal (no self-interaction) + Ab_Ab.diag().zeros(); + + // Initialize population levels (all start at 1.0) + arma::vec population = arma::ones(m); + + // Simulate dynamics + for (int t = 0; t < timeSteps; ++t) { + arma::vec dp = arma::zeros(m); + + for (arma::uword i = 0; i < m; ++i) { + double activation_sum = 0.0; + double suppression_sum = 0.0; + + for (arma::uword j = 0; j < m; ++j) { + if (i == j) continue; + + // Total stimulation from j to i = affinity * population_j + double stimulation = Ab_Ab(i, j) * population(j); + double response = bell_activation(stimulation, theta_low, theta_high); + + if (response > 0) { + activation_sum += response; + } else if (response < 0) { + suppression_sum += std::abs(response); + } + } + + // Population dynamics ODE + dp(i) = source_rate + - decay_rate * population(i) + + population(i) * activation_sum + - population(i) * suppression_sum; + } + + // Euler step + population += dt * dp; + + // Clamp to non-negative + population.clamp(0.0, arma::datum::inf); + } + + // Determine survivors + Rcpp::LogicalVector keep(m); + for (arma::uword i = 0; i < m; ++i) { + keep[i] = population(i) > survival_threshold; + } + + return Rcpp::List::create( + Rcpp::Named("population") = population, + Rcpp::Named("keep") = keep, + Rcpp::Named("Ab_Ab_affinity") = Ab_Ab + ); +} diff --git a/src/idiotypic.o b/src/idiotypic.o new file mode 100644 index 0000000..d6c607f Binary files /dev/null and b/src/idiotypic.o differ diff --git a/src/network_suppression.cpp b/src/network_suppression.cpp new file mode 100644 index 0000000..24fe7a5 --- /dev/null +++ b/src/network_suppression.cpp @@ -0,0 +1,70 @@ +// [[Rcpp::depends(RcppArmadillo)]] +#include "affinity_distance.h" + +// Network suppression: remove antibodies within epsilon distance of each other +// Returns logical vector indicating which antibodies to keep +// [[Rcpp::export]] +Rcpp::LogicalVector network_suppression_cpp(const arma::mat& A, + const std::string& dist_type, + double epsilon, + double p, + const arma::mat& Sigma_inv) { + arma::uword m = A.n_rows; + DistanceType dtype = parse_distance_type(dist_type); + + std::vector keep(m, true); + + // Compute pairwise distances only as needed (upper triangle) + for (arma::uword u = 0; u < m; ++u) { + if (!keep[u]) continue; + for (arma::uword v = u + 1; v < m; ++v) { + if (!keep[v]) continue; + + double dist_uv; + arma::rowvec diff = A.row(u) - A.row(v); + + switch (dtype) { + case DistanceType::EUCLIDEAN: + dist_uv = std::sqrt(arma::accu(diff % diff)); + break; + case DistanceType::MANHATTAN: + dist_uv = arma::accu(arma::abs(diff)); + break; + case DistanceType::MINKOWSKI: + dist_uv = std::pow(arma::accu(arma::pow(arma::abs(diff), p)), 1.0 / p); + break; + case DistanceType::COSINE: { + double dot = arma::accu(A.row(u) % A.row(v)); + double nu = std::sqrt(arma::accu(A.row(u) % A.row(u))); + double nv = std::sqrt(arma::accu(A.row(v) % A.row(v))); + double denom = nu * nv; + dist_uv = (denom < 1e-15) ? 1.0 : (1.0 - dot / denom); + break; + } + case DistanceType::MAHALANOBIS: { + arma::rowvec tmp = diff * Sigma_inv; + dist_uv = std::sqrt(arma::accu(tmp % diff)); + break; + } + case DistanceType::HAMMING: { + arma::irowvec u_int = arma::conv_to::from(A.row(u)); + arma::irowvec v_int = arma::conv_to::from(A.row(v)); + dist_uv = static_cast(arma::accu(u_int != v_int)); + break; + } + default: + Rcpp::stop("Unknown distance type in suppression"); + } + + if (dist_uv < epsilon) { + keep[v] = false; + } + } + } + + Rcpp::LogicalVector result(m); + for (arma::uword i = 0; i < m; ++i) { + result[i] = keep[i]; + } + return result; +} diff --git a/src/network_suppression.o b/src/network_suppression.o new file mode 100644 index 0000000..5bc25c4 Binary files /dev/null and b/src/network_suppression.o differ diff --git a/src/shm.cpp b/src/shm.cpp new file mode 100644 index 0000000..5a74ec8 --- /dev/null +++ b/src/shm.cpp @@ -0,0 +1,181 @@ +// [[Rcpp::depends(RcppArmadillo)]] +#include "affinity_distance.h" +#include + +// Somatic Hypermutation Engine -- C++ backend +// Five mutation strategies inspired by real immunological mechanisms + +// Strategy 1: UNIFORM -- classic random Gaussian noise (current bHIVE behavior) +// mutation_rate = max((1 - affinity) * decay^(iter-1), mutationMin) +// mutated = antibody + N(0, mutation_rate) +static arma::rowvec mutate_uniform(const arma::rowvec& antibody, + double affinity, int iter, + double decay, double mutationMin) { + arma::uword d = antibody.n_elem; + double rate = std::max((1.0 - affinity) * std::pow(decay, iter - 1), mutationMin); + arma::rowvec noise(d); + for (arma::uword i = 0; i < d; ++i) { + noise(i) = R::rnorm(0.0, rate); + } + return antibody + noise; +} + +// Strategy 2: AIRS -- affinity-proportional mutation (Watkins & Timmis, AIRS2) +// rate = c * exp(-affinity / temperature) +// High affinity -> fine-tuning, low affinity -> broad exploration +static arma::rowvec mutate_airs(const arma::rowvec& antibody, + double affinity, double c_rate, + double temperature) { + arma::uword d = antibody.n_elem; + double rate = c_rate * std::exp(-affinity / temperature); + arma::rowvec noise(d); + for (arma::uword i = 0; i < d; ++i) { + noise(i) = R::rnorm(0.0, rate); + } + return antibody + noise; +} + +// Strategy 3: HOTSPOT -- feature-importance-weighted mutation +// Features with higher gradient magnitude mutate more (analogous to AID targeting) +// gradient = data_point - antibody (direction of improvement) +static arma::rowvec mutate_hotspot(const arma::rowvec& antibody, + const arma::rowvec& data_point, + double affinity, double base_rate) { + arma::uword d = antibody.n_elem; + arma::rowvec gradient = data_point - antibody; + arma::rowvec grad_mag = arma::abs(gradient); + + // Normalize gradient magnitudes to get per-feature weights + double total = arma::accu(grad_mag) + 1e-15; + arma::rowvec weights = grad_mag / total; + + // Scale mutation rate by feature importance + double overall_rate = (1.0 - affinity) * base_rate; + arma::rowvec noise(d); + for (arma::uword i = 0; i < d; ++i) { + double feature_rate = overall_rate * (1.0 + d * weights(i)); + noise(i) = R::rnorm(0.0, feature_rate); + } + return antibody + noise; +} + +// Strategy 4: ENERGY -- mutation budget constrained by E = E_0 * (1-affinity)^2 +// Inspired by Kleinstein's E_SHM ~ N_Mut^2 model +// Total mutation magnitude is bounded, distributed across features +static arma::rowvec mutate_energy(const arma::rowvec& antibody, + double affinity, double E_0) { + arma::uword d = antibody.n_elem; + + // Energy budget + double E_budget = E_0 * std::pow(1.0 - affinity, 2); + + // Generate random direction, then scale to energy budget + arma::rowvec direction(d); + double norm_sq = 0.0; + for (arma::uword i = 0; i < d; ++i) { + direction(i) = R::rnorm(0.0, 1.0); + norm_sq += direction(i) * direction(i); + } + double norm = std::sqrt(norm_sq + 1e-15); + + // Magnitude sampled from [0, sqrt(E_budget)] + double max_magnitude = std::sqrt(E_budget); + double magnitude = R::runif(0.0, max_magnitude); + + return antibody + (magnitude / norm) * direction; +} + +// Strategy 5: ADAPTIVE -- per-feature adaptive rate with moment tracking +// Each feature maintains running mean (m1) and variance (m2) of past gradients +// Mutation rate per feature adapts like Adam optimizer +// [[Rcpp::export]] +Rcpp::List shm_mutate_cpp(const arma::mat& A, + const arma::mat& X, + const arma::vec& affinities, + const arma::uvec& top_k_indices, + const arma::uvec& data_indices, + const std::string& method, + int iter, + double decay, + double mutationMin, + double c_rate, + double temperature, + double E_0, + double base_rate, + double beta1, + double beta2, + double adam_epsilon, + arma::mat m1_state, + arma::mat m2_state, + const std::string& affinity_type, + double aff_alpha, + double aff_c, + double aff_p) { + arma::uword d = A.n_cols; + arma::mat A_out = A; + + for (arma::uword idx = 0; idx < data_indices.n_elem; ++idx) { + arma::uword i = data_indices(idx); // data point index + arma::rowvec x_i = X.row(i); + + for (arma::uword ki = 0; ki < top_k_indices.n_elem; ++ki) { + arma::uword j = top_k_indices(ki); // antibody index + double aff = affinities(ki); + + arma::rowvec mutated; + + if (method == "uniform") { + mutated = mutate_uniform(A_out.row(j), aff, iter, decay, mutationMin); + } else if (method == "airs") { + mutated = mutate_airs(A_out.row(j), aff, c_rate, temperature); + } else if (method == "hotspot") { + mutated = mutate_hotspot(A_out.row(j), x_i, aff, base_rate); + } else if (method == "energy") { + mutated = mutate_energy(A_out.row(j), aff, E_0); + } else if (method == "adaptive") { + // Adam-inspired adaptive per-feature mutation + arma::rowvec gradient = x_i - A_out.row(j); + + // Update moments + m1_state.row(j) = beta1 * m1_state.row(j) + (1.0 - beta1) * gradient; + m2_state.row(j) = beta2 * m2_state.row(j) + (1.0 - beta2) * (gradient % gradient); + + // Bias correction + double bc1 = 1.0 - std::pow(beta1, iter); + double bc2 = 1.0 - std::pow(beta2, iter); + arma::rowvec m1_hat = m1_state.row(j) / bc1; + arma::rowvec m2_hat = m2_state.row(j) / bc2; + + // Adaptive step size (like Adam learning rate) + double lr = (1.0 - aff) * base_rate; + arma::rowvec step = lr * m1_hat / (arma::sqrt(m2_hat) + adam_epsilon); + + // Add stochastic noise scaled by adaptive rate + arma::rowvec noise(d); + for (arma::uword dd = 0; dd < d; ++dd) { + double feat_rate = std::abs(step(dd)) + mutationMin; + noise(dd) = R::rnorm(0.0, feat_rate); + } + mutated = A_out.row(j) + step + 0.1 * noise; + } else { + Rcpp::stop("Unknown SHM method: " + method); + } + + // Evaluate mutated antibody using scalar function (no matrix allocation) + const AffinityType at = parse_affinity_type(affinity_type); + const double f_mutated = affinity_scalar_cpp( + x_i, mutated, at, aff_alpha, aff_c, aff_p + ); + + if (std::isfinite(f_mutated) && f_mutated > aff) { + A_out.row(j) = mutated; + } + } + } + + return Rcpp::List::create( + Rcpp::Named("A") = A_out, + Rcpp::Named("m1_state") = m1_state, + Rcpp::Named("m2_state") = m2_state + ); +} diff --git a/src/shm.o b/src/shm.o new file mode 100644 index 0000000..339d97f Binary files /dev/null and b/src/shm.o differ diff --git a/vignettes/articles/advanced-workflows.Rmd b/vignettes/articles/advanced-workflows.Rmd new file mode 100644 index 0000000..9001ffa --- /dev/null +++ b/vignettes/articles/advanced-workflows.Rmd @@ -0,0 +1,443 @@ +--- +title: "Advanced Tuning & Workflows" +author: + name: Nick Borcherding + email: ncborch@gmail.com + affiliation: Washington University in St. Louis, School of Medicine, St. Louis, MO, USA +date: "`r Sys.Date()`" +output: + BiocStyle::html_document: + toc_float: true +--- + +```{r, echo=FALSE, results="hide", message=FALSE} +knitr::opts_chunk$set(error=FALSE, message=FALSE, warning=FALSE, tidy = FALSE) +library(bHIVE) +library(ggplot2) +library(viridis) +``` + +# Introduction + +This vignette covers advanced usage patterns for bHIVE: + +1. Hyperparameter tuning with `swarmbHIVE()` across tasks +2. Multilayer hierarchical refinement with `honeycombHIVE()` +3. Gradient-based prototype refinement with `refineB()` +4. Integration with the `caret` framework +5. Visualization with `visualizeHIVE()` + +# Hyperparameter Tuning with swarmbHIVE + +`swarmbHIVE()` performs grid search over bHIVE hyperparameters and evaluates +each combination using a task-specific metric. + +## Available Metrics + +| Task | Metrics | Direction | +|:-----|:--------|:----------| +| Clustering | `"silhouette"`, `"davies_bouldin"`, `"calinski_harabasz"` | Higher is better (except D-B) | +| Classification | `"accuracy"`, `"balanced_accuracy"`, `"f1"`, `"kappa"` | Higher is better | +| Regression | `"rmse"`, `"mae"`, `"r2"` | Lower is better (except R^2) | + +## Clustering Tuning + +```{r swarm-clustering, fig.width=7, fig.height=4} +data(iris) +X <- as.matrix(iris[, 1:4]) + +grid <- expand.grid( + nAntibodies = c(10, 20, 30), + beta = c(3, 5), + epsilon = c(0.01, 0.05) +) + +set.seed(42) +tuning <- swarmbHIVE(X = X, task = "clustering", grid = grid, + metric = "silhouette", maxIter = 15, verbose = FALSE) + +ggplot(tuning$results, aes(x = nAntibodies, y = metric_value, + color = factor(beta))) + + geom_line() + + geom_point(aes(shape = factor(epsilon)), size = 3) + + labs(title = "Clustering: Silhouette Score", + x = "Number of Antibodies", y = "Silhouette", + color = "Beta", shape = "Epsilon") + + scale_color_viridis(discrete = TRUE) + + theme_minimal() +``` + +```{r swarm-best} +tuning$best_params +``` + +## Classification Tuning + +```{r swarm-classification} +y <- iris$Species +grid_cls <- expand.grid( + nAntibodies = c(15, 30, 50), + beta = c(3, 5, 10), + epsilon = c(0.01, 0.05) +) + +set.seed(42) +tuning_cls <- swarmbHIVE(X = X, y = y, task = "classification", + grid = grid_cls, metric = "accuracy", + maxIter = 15, verbose = FALSE) +tuning_cls$best_params +``` + +## Parallelization with BiocParallel + +`swarmbHIVE()` accepts a `BPPARAM` argument for parallel execution. This +is especially useful when the grid is large: + +```{r parallel-example, eval=FALSE} +library(BiocParallel) + +# Use 4 cores +tuning <- swarmbHIVE( + X = X, y = y, task = "classification", + grid = grid_cls, metric = "accuracy", + BPPARAM = MulticoreParam(4), + verbose = FALSE +) +``` + +# Multilayer Refinement with honeycombHIVE + +`honeycombHIVE()` runs bHIVE iteratively across multiple layers. Each layer +produces prototypes that become the input for the next layer, creating a +hierarchical compression of the data. + +## Basic Multilayer Clustering + +```{r honeycomb-clustering, fig.width=8, fig.height=4} +set.seed(42) +res <- honeycombHIVE(X = X, task = "clustering", + layers = 3, nAntibodies = 30, + beta = 5, epsilon = 0.05, + maxIter = 10, verbose = FALSE) + +# Each layer progressively refines cluster structure +layer_info <- sapply(res, function(r) { + c(n_prototypes = nrow(r$antibodies), + n_clusters = length(unique(r$membership))) +}) +layer_info +``` + +## Collapse Methods + +The `collapseMethod` parameter controls how clusters are summarized into +prototypes for the next layer: + +| Method | Description | +|:-------|:------------| +| `"centroid"` | Mean of cluster members (default) | +| `"medoid"` | Actual data point closest to center | +| `"median"` | Coordinate-wise median | + +```{r collapse-comparison} +set.seed(42) +res_centroid <- honeycombHIVE(X = X, task = "clustering", layers = 2, + nAntibodies = 20, collapseMethod = "centroid", + verbose = FALSE) +res_medoid <- honeycombHIVE(X = X, task = "clustering", layers = 2, + nAntibodies = 20, collapseMethod = "medoid", + verbose = FALSE) + +cat("Centroid clusters:", length(unique(res_centroid[[2]]$membership)), "\n") +cat("Medoid clusters: ", length(unique(res_medoid[[2]]$membership)), "\n") +``` + +## With Gradient Refinement + +Setting `refine = TRUE` applies gradient-based updates via `refineB()` +after each layer's bHIVE pass. This fine-tunes prototype positions +before collapsing. + +```{r honeycomb-refine} +library(MASS) +data(Boston) +X_bos <- as.matrix(Boston[, -14]) +y_bos <- Boston$medv + +set.seed(42) +res_plain <- honeycombHIVE(X = X_bos, y = y_bos, task = "regression", + layers = 3, nAntibodies = 50, verbose = FALSE) + +res_refined <- honeycombHIVE(X = X_bos, y = y_bos, task = "regression", + layers = 3, nAntibodies = 50, + refine = TRUE, refineOptimizer = "adam", + refineLoss = "mse", refineSteps = 5, + refineLR = 0.01, verbose = FALSE) + +cor_plain <- cor(res_plain[[3]]$predictions, y_bos) +cor_refined <- cor(res_refined[[3]]$predictions, y_bos) +cat("Correlation (plain): ", round(cor_plain, 3), "\n") +cat("Correlation (refined):", round(cor_refined, 3), "\n") +``` + +# Gradient Refinement with refineB + +`refineB()` takes trained antibody positions and fine-tunes them using +gradient-based optimization. It supports 5 optimizers and 8 loss functions. + +## Optimizers + +| Optimizer | Key Parameter | Notes | +|:----------|:--------------|:------| +| `"sgd"` | `lr` | Simple, good baseline | +| `"momentum"` | `momentum_coef` | Accelerates SGD | +| `"adagrad"` | -- | Per-parameter adaptive learning rates | +| `"adam"` | `beta1`, `beta2` | Best general-purpose optimizer | +| `"rmsprop"` | `rmsprop_decay` | Good for non-stationary objectives | + +## Loss Functions + +| Loss | Tasks | Description | +|:-----|:------|:------------| +| `"mse"` | All | Mean squared error | +| `"mae"` | All | Mean absolute error | +| `"categorical_crossentropy"` | Classification | Cross-entropy loss | +| `"binary_crossentropy"` | Classification | Binary cross-entropy | +| `"kullback_leibler"` | Classification | KL divergence | +| `"cosine"` | Classification | Cosine distance loss | +| `"poisson"` | Regression | Poisson deviance | +| `"huber"` | Regression | Huber loss (robust to outliers) | + +## Comparing Optimizers + +```{r refine-comparison, fig.width=8, fig.height=4} +X <- as.matrix(iris[, 1:4]) +y <- iris$Species + +set.seed(42) +res <- bHIVE(X, y, task = "classification", nAntibodies = 10, + beta = 5, epsilon = 0.05, initMethod = "random", + k = 4, verbose = FALSE) + +Ab <- res$antibodies +colnames(Ab) <- colnames(X) +assignments <- as.integer(factor(res$assignments, + levels = unique(res$assignments))) + +pca <- prcomp(X, scale. = TRUE) +X_pca <- pca$x[, 1:2] +A_orig_pca <- predict(pca, Ab) + +optimizers <- c("sgd", "adam", "rmsprop") +refined_list <- lapply(optimizers, function(opt) { + Ab_refined <- refineB(A = Ab, X = X, y = y, + assignments = assignments, + task = "classification", + loss = "categorical_crossentropy", + optimizer = opt, steps = 5, lr = 0.01, + verbose = FALSE) + colnames(Ab_refined) <- colnames(X) + A_ref_pca <- predict(pca, Ab_refined) + data.frame(optimizer = opt, + PC1_after = A_ref_pca[, 1], + PC2_after = A_ref_pca[, 2]) +}) + +refined_df <- do.call(rbind, refined_list) +refined_df$ID <- factor(rep(seq_len(nrow(Ab)), times = length(optimizers))) + +proto_df <- data.frame(ID = factor(seq_len(nrow(A_orig_pca))), + PC1_before = A_orig_pca[, 1], + PC2_before = A_orig_pca[, 2]) + +merged <- merge(proto_df, refined_df, by = "ID") + +ggplot() + + geom_point(data = data.frame(PC1 = X_pca[, 1], PC2 = X_pca[, 2], + Cluster = factor(assignments)), + aes(x = PC1, y = PC2, color = Cluster), alpha = 0.4) + + geom_point(data = proto_df, + aes(x = PC1_before, y = PC2_before), + color = "#440154", size = 4) + + geom_point(data = merged, + aes(x = PC1_after, y = PC2_after), + color = "#FDE725", size = 4) + + geom_segment(data = merged, + aes(x = PC1_before, y = PC2_before, + xend = PC1_after, yend = PC2_after), + arrow = arrow(length = unit(0.2, "cm"), type = "closed"), + color = "black", linewidth = 0.5) + + scale_color_viridis(discrete = TRUE) + + labs(title = "Prototype Movement by Optimizer", + subtitle = "Purple = before, Yellow = after refinement", + x = "PC1", y = "PC2") + + facet_wrap(~optimizer, nrow = 1, + labeller = as_labeller(c(sgd = "SGD", adam = "Adam", + rmsprop = "RMSProp"))) + + theme_minimal() +``` + +# caret Integration + +bHIVE provides two caret-compatible model objects for use with `train()`: + +- `bHIVEmodel` -- single-pass bHIVE with tuning over `nAntibodies`, + `beta`, and `epsilon` +- `honeycombHIVEmodel` -- multilayer bHIVE with additional tuning over + `layers`, `refineOptimizer`, `refineSteps`, and `refineLR` + +## Basic caret Workflow + +```{r caret-basic, fig.width=7, fig.height=5} +library(caret) + +data(Boston) +X_bos <- as.matrix(Boston[, -14]) +y_bos <- Boston$medv + +set.seed(42) +idx <- sample(nrow(X_bos), nrow(X_bos) * 0.7) +X_train <- X_bos[idx, ] +X_test <- X_bos[-idx, ] +y_train <- y_bos[idx] +y_test <- y_bos[-idx] + +ctrl <- trainControl(method = "cv", number = 3) + +model <- train( + x = X_train, y = y_train, + method = bHIVEmodel, + trControl = ctrl, + tuneGrid = expand.grid( + nAntibodies = c(10, 20), + beta = c(3, 5), + epsilon = c(0.01, 0.05) + ), + verbose = FALSE +) + +ggplot(model) + + labs(title = "caret Cross-Validation Results") + + scale_color_viridis(discrete = TRUE) + + theme_minimal() +``` + +## Prediction on Held-Out Data + +```{r caret-predict, fig.width=5, fig.height=4} +preds <- predict(model, newdata = X_test) + +ggplot(data.frame(Actual = y_test, Predicted = preds), + aes(x = Actual, y = Predicted)) + + geom_point(alpha = 0.6) + + geom_smooth(method = "lm", color = "steelblue", se = FALSE) + + labs(title = "caret bHIVE: Predicted vs Actual", + x = "Actual (medv)", y = "Predicted") + + theme_minimal() +``` + +## honeycombHIVE caret Model + +The multilayer caret model adds layer count and refinement parameters to +the tuning grid: + +```{r caret-honeycomb, eval=FALSE} +model_hc <- train( + x = X_train, y = y_train, + method = honeycombHIVEmodel, + trControl = trainControl(method = "cv", number = 3), + tuneGrid = expand.grid( + nAntibodies = c(20, 30), + beta = 5, + epsilon = 0.05, + layers = c(2, 3), + refineOptimizer = "adam", + refineSteps = 5, + refineLR = 0.01, + refineHuberDelta = 1.0 + ), + verbose = FALSE +) +``` + +# Visualization with visualizeHIVE + +The `visualizeHIVE()` function produces publication-ready ggplot2 figures +for bHIVE and honeycombHIVE results. + +## Plot Types + +| Type | Description | +|:-----|:------------| +| `"scatter"` | 2D scatter with optional dimensionality reduction (PCA, UMAP, t-SNE) | +| `"boxplot"` | Feature distributions by group with prototype overlay | +| `"violin"` | Violin plots with prototype markers | +| `"density"` | Density curves with prototype reference lines | + +## Scatter Plot with PCA + +```{r viz-scatter, fig.width=7, fig.height=5} +X <- as.matrix(iris[, 1:4]) + +set.seed(42) +res <- honeycombHIVE(X = X, task = "clustering", layers = 3, + nAntibodies = 30, beta = 5, epsilon = 0.05, + maxIter = 10, verbose = FALSE) + +visualizeHIVE(result = res, X = iris[, 1:4], + plot_type = "scatter", + transformation_method = "PCA", + title = "Clustering (Layer 2)", + layer = 2, task = "clustering") +``` + +## Violin Plot for Classification + +```{r viz-violin, fig.width=7, fig.height=4} +set.seed(42) +res_cls <- honeycombHIVE(X = X, y = iris$Species, + task = "classification", + layers = 2, nAntibodies = 15, + beta = 5, maxIter = 10, verbose = FALSE) + +visualizeHIVE(result = res_cls, X = iris[, 1:4], + plot_type = "violin", feature = "Petal.Length", + title = "Petal Length by Predicted Class", + layer = 1, task = "classification") +``` + +## Density Plot for Regression + +```{r viz-density, fig.width=7, fig.height=4} +set.seed(42) +X_scaled <- as.data.frame(scale(X)) +res_reg <- honeycombHIVE(X = X_scaled, + y = as.numeric(iris$Sepal.Length), + task = "regression", layers = 2, + nAntibodies = 12, beta = 5, + maxIter = 10, verbose = FALSE) + +visualizeHIVE(result = res_reg, X = X_scaled, + plot_type = "density", feature = "Petal.Width", + title = "Density: Petal Width", + layer = 2, task = "regression") +``` + +# Combining Modules with honeycombHIVE + +The R6 API with modules and the functional `honeycombHIVE()` API serve +different use cases. For maximum control, use `AINet` directly with modules +for single-pass analysis. For hierarchical refinement, use `honeycombHIVE()` +with its built-in layer management and gradient refinement. + +A typical advanced workflow: + +1. **Explore** with `bHIVE()` to establish baseline performance +2. **Tune** with `swarmbHIVE()` to find good hyperparameters +3. **Compose** modules via `AINet` to add biological mechanisms +4. **Layer** with `honeycombHIVE()` for hierarchical refinement +5. **Visualize** with `visualizeHIVE()` to understand results + +```{r} +sessionInfo() +``` diff --git a/vignettes/articles/foundations.Rmd b/vignettes/articles/foundations.Rmd new file mode 100644 index 0000000..eb363eb --- /dev/null +++ b/vignettes/articles/foundations.Rmd @@ -0,0 +1,649 @@ +--- +title: "Algorithm & Biological Foundations" +author: + name: Nick Borcherding + email: ncborch@gmail.com + affiliation: Washington University in St. Louis, School of Medicine, St. Louis, MO, USA +date: "`r Sys.Date()`" +output: + BiocStyle::html_document: + toc_float: true +--- + +```{r, echo=FALSE, results="hide", message=FALSE} +knitr::opts_chunk$set(echo=FALSE, message=FALSE, warning=FALSE, tidy=FALSE, + fig.width=7, fig.height=5) +library(bHIVE) +library(ggplot2) +library(viridis) +``` + +# Overview + +This article describes the mathematical formulation and biological +motivation for every component of bHIVE. It is intended as a reference +companion to the tutorial vignettes and is organized to mirror the +algorithm's execution flow: initialization, affinity computation, clonal +selection, mutation, regulation, and assignment. + +Throughout, we use the following notation: + +| Symbol | Meaning | +|:-------|:--------| +| $\mathbf{X} \in \mathbb{R}^{n \times d}$ | Data matrix ($n$ observations, $d$ features) | +| $\mathbf{A} \in \mathbb{R}^{m \times d}$ | Antibody (prototype) matrix ($m$ antibodies) | +| $\mathbf{x}_i$ | The $i$-th data point (row of $\mathbf{X}$) | +| $\mathbf{a}_j$ | The $j$-th antibody (row of $\mathbf{A}$) | +| $f(\mathbf{x}, \mathbf{a})$ | Affinity between a data point and an antibody | +| $d(\mathbf{x}, \mathbf{a})$ | Distance between a data point and an antibody | + +--- + +# Shape Space and Affinity + +## Biological context + +In immunology, **shape space** (Perelson & Oster 1979) is the abstract +space in which an antibody's binding site (paratope) and an antigen's +epitope are represented as points. Affinity is a function of distance in +this space, with closer shapes bind more tightly. The shape space framework +provides the geometric foundation for all AIS algorithms: data points are +antigens, prototypes are antibodies, and learning is the process of moving +antibodies through shape space to maximize affinity to the data. + +## Affinity kernels + +bHIVE computes an $n \times m$ affinity matrix $\mathbf{F}$ where +$F_{ij} = f(\mathbf{x}_i, \mathbf{a}_j)$. Five kernels are available: + +### Gaussian (RBF) + +$$f_{\text{gauss}}(\mathbf{x}, \mathbf{a}) = \exp\!\bigl(-\alpha \|\mathbf{x} - \mathbf{a}\|^2\bigr)$$ + +The default kernel. The bandwidth parameter $\alpha$ controls how quickly +affinity decays with distance. Larger $\alpha$ produces sharper, more +localized affinity peaks. + +### Laplace + +$$f_{\text{lap}}(\mathbf{x}, \mathbf{a}) = \exp\!\bigl(-\alpha \|\mathbf{x} - \mathbf{a}\|\bigr)$$ + +Similar to Gaussian but decays with L1-like (linear) distance rather than +squared distance. Produces heavier tails, making it more robust to +outliers. + +### Polynomial + +$$f_{\text{poly}}(\mathbf{x}, \mathbf{a}) = (\mathbf{x} \cdot \mathbf{a} + c)^p$$ + +A dot-product kernel where $c$ is a bias term and $p$ is the polynomial +degree. Captures non-Euclidean similarity structure. Useful when the +relevant signal is in the angle and magnitude of feature vectors rather +than their Euclidean distance. + +### Cosine + +$$f_{\text{cos}}(\mathbf{x}, \mathbf{a}) = \frac{\mathbf{x} \cdot \mathbf{a}}{\|\mathbf{x}\| \|\mathbf{a}\| + \epsilon}$$ + +Direction-based similarity, invariant to vector magnitude. The small +$\epsilon = 10^{-12}$ prevents division by zero. Appropriate when feature +magnitudes are uninformative (e.g., normalized expression profiles). + +### Hamming + +$$f_{\text{ham}}(\mathbf{x}, \mathbf{a}) = 1 - \frac{1}{d}\sum_{k=1}^d \mathbf{1}[x_k \neq a_k]$$ + +Proportion of matching features. Used for categorical or binary data. + +## Distance functions + +For clustering assignment and network suppression, bHIVE uses distance +rather than affinity. Six metrics are available: + +| Distance | Formula | Notes | +|:---------|:--------|:------| +| Euclidean | $\sqrt{\sum_k (x_k - a_k)^2}$ | Default; L2 norm | +| Manhattan | $\sum_k |x_k - a_k|$ | L1 norm; robust to outliers | +| Minkowski | $\bigl(\sum_k |x_k - a_k|^p\bigr)^{1/p}$ | Generalized; $p=2$ delegates to Euclidean | +| Cosine | $1 - f_{\text{cos}}(\mathbf{x}, \mathbf{a})$ | Angular distance | +| Mahalanobis | $\sqrt{(\mathbf{x}-\mathbf{a})^\top \Sigma^{-1} (\mathbf{x}-\mathbf{a})}$ | Accounts for feature covariance | +| Hamming | $\frac{1}{d}\sum_k \mathbf{1}[x_k \neq a_k]$ | Categorical features | + +--- + +# Initialization + +## Standard methods + +bHIVE provides four initialization strategies for the starting antibody +population: + +| Method | Algorithm | When to use | +|:-------|:----------|:------------| +| `"sample"` | Random rows from $\mathbf{X}$ | Default; fast, data-representative | +| `"random"` | $\mathbf{a} \sim \mathcal{N}(\bar{\mathbf{x}}, \text{diag}(\sigma^2_k))$ | When you want diversity beyond data support | +| `"random_uniform"` | $a_k \sim U(\min_i x_{ik}, \max_i x_{ik})$ | Uniform coverage of feature ranges | +| `"kmeans++"` | D2-weighted sampling | Better coverage; $O(nmd)$ cost | + +The **kmeans++** initialization (Arthur & Vassilvitskii 2007) selects the +first antibody uniformly at random, then each subsequent antibody with +probability proportional to its squared distance to the nearest existing +antibody: $P(\mathbf{x}_i) \propto \min_{j < \text{current}} \|\mathbf{x}_i - \mathbf{a}_j\|^2$. + +## V(D)J Gene Library (`VDJLibrary`) + +### Biological context + +Real antibody diversity is generated by **V(D)J recombination**: the +variable region of an immunoglobulin gene is assembled by randomly +combining one V (variable), one D (diversity), and one J (joining) gene +segment from a germline library. This combinatorial mechanism generates +approximately $10^{11}$ distinct antibodies from only $\sim$300 gene +segments. + +### Algorithm + +`VDJLibrary` translates this to feature space: + +1. **Segment the feature space** into three groups (V, D, J) by splitting + the $d$ dimensions. Three methods are available: + - **PCA**: The first $\lfloor d/3 \rfloor$ principal components form V, + the next form D, the remainder form J + - **Cluster**: k-means clustering within each dimension group creates + alleles + - **Random partition**: Dimensions randomly assigned to V, D, J + +2. **Create alleles** for each segment by clustering the data projected + onto that segment's dimensions (using k-means with `nV`, `nD`, `nJ` + centers respectively) + +3. **Generate antibodies** by combinatorial sampling: select one V allele, + one D allele, and one J allele, then concatenate to form a complete + $d$-dimensional antibody vector + +This produces structured coverage of the data manifold with $n_V \times +n_D \times n_J$ potential combinations, analogous to the combinatorial +diversity of the real immune system. + +--- + +# The Core Loop: Clonal Selection + +## Biological context + +**Clonal selection theory** (Burnet 1959) states that when an antigen +enters the body, the B cells whose receptors best match that antigen are +selected to proliferate (clonal expansion) and undergo mutation (somatic +hypermutation). The result is an iterative refinement process: the +population of antibodies evolves toward higher affinity for the antigen. + +This is the mechanism that de Castro & Von Zuben (2001) formalized in the +AI-Net algorithm and that bHIVE implements with C++ acceleration. + +## Algorithm + +For each iteration $t = 1, \ldots, T$: + +**Step 1: Affinity computation.** Compute the $n \times m$ affinity matrix +$\mathbf{F}$ using the chosen kernel. This is the most expensive step and +is dispatched to BLAS via RcppArmadillo. + +**Step 2: Clonal expansion and mutation.** For each data point +$\mathbf{x}_i$: + +1. Select the $k$ antibodies with highest affinity: + $\mathcal{K}_i = \text{top-}k_j\, f(\mathbf{x}_i, \mathbf{a}_j)$ + +2. For each selected antibody $\mathbf{a}_j \in \mathcal{K}_i$, generate + $n_c$ clones where: + $$n_c = \text{round}\!\left(\beta \cdot f(\mathbf{x}_i, \mathbf{a}_j)\right), \quad n_c \leq \texttt{maxClones}$$ + +3. Mutate each clone: + $$\mathbf{a}' = \mathbf{a}_j + \boldsymbol{\epsilon}$$ + where $\boldsymbol{\epsilon}$ is drawn from the active SHM strategy + (see below). If the mutant has higher affinity than the parent, it + replaces the parent. + +**Step 3: Network regulation.** Suppress redundant antibodies (see +Idiotypic Network section) or apply simple $\epsilon$-threshold +suppression: remove $\mathbf{a}_j$ if +$d(\mathbf{a}_i, \mathbf{a}_j) < \epsilon$ for any $i < j$. + +**Step 4: Task-specific assignment.** + +- **Clustering**: each data point assigned to the nearest antibody by + distance +- **Classification**: each antibody labeled by majority vote of its + assigned data points; predictions via affinity-weighted nearest antibody +- **Regression**: each antibody stores the mean target value of its + assigned points; predictions via affinity-weighted interpolation + +**Step 5: Convergence check.** Early stopping if the antibody population +size has not changed by more than `stopTolerance` for +`noImprovementLimit` consecutive iterations. + +--- + +# Somatic Hypermutation (SHM) + +## Biological context + +**Somatic hypermutation** is the process by which activated B cells +introduce point mutations into their antibody genes at a rate approximately +$10^6$ times higher than the background mutation rate. The enzyme +activation-induced cytidine deaminase (AID) preferentially targets WRCY/RGYW +DNA motifs (hotspots), creating a non-uniform mutation landscape. Mutations +that improve antigen binding are selected for; those that reduce it lead to +cell death. Over multiple rounds of mutation and selection (affinity +maturation), the antibody population converges on high-affinity binders. + +## Five strategies + +### 1. Uniform + +$$\mathbf{a}' = \mathbf{a} + \mathcal{N}\!\left(0,\; (1 - f) \cdot \gamma^{t-1}\right)$$ + +where $f$ is the affinity of the parent antibody, $\gamma$ is +`mutationDecay`, and $t$ is the iteration. The mutation rate decreases +with both affinity (better antibodies mutate less) and time (the population +stabilizes). + +**When to use:** Default baseline. Simple and robust. + +### 2. AIRS (Affinity-Proportional) + +$$\text{rate} = c \cdot \exp(-f / T)$$ + +where $c$ is a scaling constant (`c_rate`) and $T$ is a temperature +parameter. From Watkins & Timmis (2004), this achieves approximately 50% +better data reduction than uniform mutation by concentrating mutation on +low-affinity antibodies. + +**When to use:** When you want the mutation rate to be strictly controlled +by affinity, with a tunable temperature. + +### 3. Hotspot (Feature-Weighted) + +$$\epsilon_k = \text{base\_rate} \cdot |g_k| \cdot \mathcal{N}(0, 1)$$ + +where $g_k = x_k - a_k$ is the per-feature "gradient" toward the matched +data point. Features with larger discrepancies mutate more, analogous to +AID targeting specific DNA motifs. + +**When to use:** When some features are more informative than others and +you want the algorithm to discover this automatically. + +### 4. Energy-Budget + +$$\|\boldsymbol{\epsilon}\|^2 \leq E_0 \cdot (1 - f)^2$$ + +The total mutation magnitude is bounded by an energy budget that shrinks +as affinity increases. Inspired by Kleinstein's observation that the +energetic cost of SHM scales quadratically with the number of mutations +($E_{\text{SHM}} \sim N_{\text{mut}}^2$). Individual feature perturbations +are sampled uniformly, then rescaled to satisfy the budget constraint. + +**When to use:** When you want hard bounds on how much any single +mutation event can perturb an antibody. + +### 5. Adaptive (Adam-Like) + +Implementation of a convergence hypothesis. For each feature $k$ of each antibody: + +$$g_k = x_k - a_k \quad \text{(gradient toward matched antigen)}$$ + +$$m_k \leftarrow \beta_1 \, m_k + (1 - \beta_1)\, g_k \quad \text{(first moment)}$$ + +$$v_k \leftarrow \beta_2 \, v_k + (1 - \beta_2)\, g_k^2 \quad \text{(second moment)}$$ + +$$\epsilon_k = \text{base\_rate} \cdot \frac{m_k}{\sqrt{v_k} + \epsilon_{\text{adam}}}$$ + +This is identical to the Adam optimizer (Kingma & Ba 2015) applied +per-feature. The biological interpretation: $m_k$ represents the cell's +"memory" of which direction to mutate (accumulated selection pressure), +while $v_k$ tracks the variance of past selection signals (stability of +the gradient). Features with consistent directional pressure get larger +mutations; features with noisy signals are damped. + +**When to use:** Complex landscapes where different features have +different scales and signal-to-noise ratios. Best general-purpose choice. + +--- + +# Idiotypic Network Regulation + +## Biological context + +Jerne's **idiotypic network theory** (1974) proposes that antibodies +don't just recognize antigens, but also recognize each other. The +variable region of one antibody (its idiotype) can serve as an epitope +for another antibody (an anti-idiotype). This creates a regulatory network +with emergent properties: memory, tolerance, and self-organized repertoire +structure. + +Varela & Coutinho (1991) formalized this as a "second generation" immune +network with a **bell-shaped activation function**: below a lower +threshold $\theta_{\text{low}}$, insufficient stimulation leads to cell +death (neglect); between thresholds, moderate stimulation leads to +activation; above $\theta_{\text{high}}$, excessive stimulation leads to +suppression. + +This is biologically more realistic than the simple $\epsilon$-threshold +suppression used in classical AIS, and it produces qualitatively different +repertoire dynamics. + +## Algorithm + +Given the antibody matrix $\mathbf{A}$ with $m$ rows: + +**Step 1:** Compute the $m \times m$ pairwise affinity matrix +$\mathbf{S}$ where $S_{ij} = f(\mathbf{a}_i, \mathbf{a}_j)$. + +**Step 2:** For each pair $(i, j)$, compute the activation signal: + +$$h_{ij} = \begin{cases} +-1 & \text{if } S_{ij} < \theta_{\text{low}} \quad \text{(death signal)} \\ ++1 & \text{if } \theta_{\text{low}} \leq S_{ij} \leq \theta_{\text{high}} \quad \text{(activation)} \\ +-1 & \text{if } S_{ij} > \theta_{\text{high}} \quad \text{(suppression)} +\end{cases}$$ + +**Step 3:** Euler-integrate the population dynamics ODE for $T$ steps +with step size $\Delta t$: + +$$\frac{dA_i}{dt} = s - \delta \cdot A_i + A_i \sum_{j: h_{ij} > 0} S_{ij} - A_i \sum_{j: h_{ij} < 0} S_{ij}$$ + +where: +- $s$ = `source_rate` (basal production of new cells) +- $\delta$ = `decay_rate` (natural death) +- The activation and suppression terms are weighted by the actual affinity values + +**Step 4:** Remove antibodies whose population level $A_i$ falls below +`survival_threshold`. + +### Parameter guidance + +| Parameter | Default | Effect of increasing | +|:----------|:--------|:---------------------| +| `theta_low` | 0.01 | More antibodies die from neglect | +| `theta_high` | 0.5 | Narrower activation window; more suppression | +| `source_rate` | 0.5 | More new cells enter; larger final repertoire | +| `decay_rate` | 0.1 | Faster natural death; smaller repertoire | +| `dt` | 0.1 | Larger integration steps (less stable if too large) | +| `timeSteps` | 20 | Longer simulation; more time for dynamics to settle | +| `survival_threshold` | 0.5 | Stricter survival; fewer antibodies remain | + +--- + +# Germinal Center Selection + +## Biological context + +The **germinal center** (GC) is a specialized microstructure within lymph +nodes where B cells undergo iterative rounds of mutation (in the dark zone) +and selection (in the light zone). In the light zone, B cells compete for +help from **T follicular helper (Tfh) cells**. A Tfh cell evaluates the +quality of the B cell's antigen presentation and provides survival signals +only to the best competitors. B cells that fail to receive Tfh help undergo +apoptosis. + +This is a quality-control bottleneck: it ensures that only improved +antibodies survive each round. + +## Algorithm + +The `GerminalCenter` module implements this as resource competition: + +1. Compute a **quality score** $q_j$ for each antibody $\mathbf{a}_j$. + The scoring is task-aware: + - **Classification**: $q_j$ = proportion of assigned data points whose + label matches the antibody's majority label + - **Regression**: $q_j$ = $1 / (1 + \text{MSE}_j)$ where MSE is computed + over assigned data points + - **Clustering**: $q_j$ = mean affinity to assigned data points (cohesion) + +2. Apply selection pressure $p \in [0, 1]$ to compute a survival threshold: + $$\tau = q_{\min} + p \cdot (q_{\max} - q_{\min})$$ + +3. For each of `nTfh` helper cells, the Tfh selects the antibody with + highest quality among those not yet helped. Antibodies below the + threshold $\tau$ that do not receive Tfh help are removed. + +4. Repeat for `rounds` cycles. + +### Parameter guidance + +| Parameter | Default | Effect | +|:----------|:--------|:-------| +| `nTfh` | 10 | Determines maximum survivors per round | +| `selectionPressure` | 0.5 | 0 = everything survives; 1 = only the best | +| `rounds` | 1 | Multiple rounds compound the selection effect | + +--- + +# Microenvironment + +## Biological context + +B cell fate is strongly influenced by the **tissue microenvironment**: +chemokine gradients direct migration, cytokines influence differentiation, +and local cell density affects competition for resources. In the germinal +center, the dark zone (proliferation/mutation) and light zone (selection) +create spatially distinct functional regions. + +## Algorithm + +The `Microenvironment` module classifies each antibody's local context by +computing kernel density estimates from the data: + +$$\rho_j = \frac{1}{n} \sum_{i=1}^n f(\mathbf{x}_i, \mathbf{a}_j)$$ + +Based on the percentile of $\rho_j$ within the antibody population: + +| Zone | Condition | Mutation modifier | Biological analog | +|:-----|:----------|:------------------|:------------------| +| **Stable** | $\rho_j > P_{\text{high}}$ | $\times 0.3$ (reduce) | Light zone; protect good solutions | +| **Explore** | $\rho_j < P_{\text{low}}$ | $\times 2.0$ (increase) | Dark zone; search sparse regions | +| **Boundary** | otherwise | $\times 1.0$ (no change) | Transitional; potential class switch | + +where $P_{\text{high}}$ and $P_{\text{low}}$ are the `high_density_threshold` +and `low_density_threshold` percentiles, respectively. + +### Chemokine-like gradients + +The module also computes a directional gradient for each antibody, +pointing toward the local center of mass of nearby data: + +$$\mathbf{g}_j = \frac{\sum_i f(\mathbf{x}_i, \mathbf{a}_j) \cdot \mathbf{x}_i}{\sum_i f(\mathbf{x}_i, \mathbf{a}_j)} - \mathbf{a}_j$$ + +This gradient can bias mutation direction toward higher-density regions, +analogous to chemokine-directed B cell migration. + +--- + +# Two-Signal Activation Gate + +## Biological context + +In real immunity, B cell activation requires **two signals**: + +- **Signal 1**: Specific antigen recognition by the B cell receptor + (affinity-dependent) +- **Signal 2**: Costimulatory signals from helper T cells, danger signals + (DAMPs/PAMPs), or cytokines + +This **two-signal model** (Bretscher & Cohn 1970, formalized for AIS by +Freitas 2006) prevents autoimmune activation: a B cell that binds +self-antigen without costimulation is anergized (silenced) rather than +activated. It serves as a biologically-principled regularization mechanism. + +## Algorithm + +An antibody-antigen interaction $(i, j)$ is activated only if both signals +exceed their thresholds: + +$$\text{activated}_{ij} = \mathbf{1}\!\left[f(\mathbf{x}_i, \mathbf{a}_j) > \tau_1\right] \;\wedge\; \mathbf{1}\!\left[\text{Signal2}_j > \tau_2\right]$$ + +Three options for Signal 2: + +| Type | Computation | Use case | +|:-----|:------------|:---------| +| `"density"` | Local data density around $\mathbf{a}_j$ | Default; prevents activation in sparse regions | +| `"danger"` | User-provided per-data-point danger scores | When you have external quality annotations | +| `"entropy"` | Local label entropy among $\mathbf{a}_j$'s neighbors | Classification; high entropy = uncertain region | + +--- + +# Class Switching + +## Biological context + +During an immune response, B cells can **switch** the constant region of +their antibody (isotype) while retaining the same antigen-binding variable +region. The functional consequence is a change in effector properties: + +- **IgM**: Pentameric; broad, low-affinity binding. First responder. +- **IgG**: Monomeric; high-affinity, highly specific. Dominant in + secondary responses. +- **IgA**: Dimeric; secreted at mucosal surfaces. Boundary patrol. + +## Algorithm + +In bHIVE, class switching changes the effective **kernel width** of the +affinity function, modifying the matching breadth without changing the +antibody's position in feature space: + +| Isotype | $\alpha$ | Matching behavior | +|:--------|:---------|:------------------| +| IgM | 0.1 (broad) | Large receptive field; captures general patterns | +| IgG | 5.0 (narrow) | Small receptive field; fine-grained discrimination | +| IgA | 1.0 (medium) | Intermediate; boundary patrol | + +Switching rules are driven by the `Microenvironment` zone assignments: + +- **Stable zone** $\to$ IgG: data-dense regions benefit from specific matching +- **Explore zone** $\to$ IgM: sparse regions need broad coverage to find signal +- **Boundary zone** $\to$ IgA: intermediate regions need balanced matching + +--- + +# Memory Pool + +## Biological context + +After an immune response resolves, a subset of activated B cells +differentiate into long-lived **memory B cells** that persist for years. +Upon re-encounter with the same antigen, memory cells mount a faster and +stronger secondary response. + +## Algorithm + +The `MemoryPool` module maintains an archive of high-performing antibodies: + +**Archive**: After each iteration, antibodies whose mean affinity to +their assigned data exceeds `archive_threshold` are copied into the memory +pool, up to `max_memory` total cells. If the pool is full, the lowest- +affinity memories are evicted. + +**Recall**: When new data arrive (or when the data distribution shifts), +memory antibodies whose affinity to the new data exceeds +`recall_threshold` are reintroduced into the active repertoire. This +provides a warm start for the next round of adaptation. + +--- + +# Convergent Selection + +## Biological context + +In adaptive immunology, certain TCR/BCR sequences appear in multiple +unrelated individuals responding to the same pathogen. These **public +clonotypes** are driven by convergent selection: the structure of the +antigen imposes such strong selective pressure that independent immune +systems arrive at the same solution. + +## Algorithm + +The `ConvergentSelector` module finds public antibodies across multiple +independent bHIVE runs: + +1. Run $R$ independent bHIVE analyses (with different random seeds) on the + same data +2. For each antibody from run 1, check how many other runs contain an + antibody within distance `tolerance` +3. Antibodies that appear in $\geq$ `min_appearances` runs are declared + **public** and retained as consensus prototypes + +This implements a biologically-motivated ensemble: rather than averaging +predictions, it identifies the prototypes that multiple independent +optimization trajectories converge on. + +--- + +# Parameter Guidance + +The tables below provide practical starting points and tuning advice for +the most important parameters. These are rules of thumb -- the optimal +settings depend on your data. + +## Core Parameters + +| Parameter | Default | Start here | Tune if... | +|:----------|:--------|:-----------|:-----------| +| `nAntibodies` | 20 | $\sqrt{n}$ to $2\sqrt{n}$ | Underfitting (too few) or slow (too many) | +| `beta` | 5 | 3--10 | Low affinity at convergence (increase) | +| `epsilon` | 0.01 | 0.01--0.1 | Too many or too few final antibodies | +| `maxIter` | 50 | 20--100 | Not converged (increase) or slow (decrease) | +| `k` | 3 | 2--5 | Underfitting (increase) or overfitting (decrease) | +| `affinityFunc` | `"gaussian"` | `"gaussian"` | Non-Euclidean data (try cosine, polynomial) | +| `alpha` | 1 | $1/(2\sigma^2_{\text{data}})$ | All affinities $\approx 0$ (decrease) or $\approx 1$ (increase) | + +## SHM Parameters + +| Parameter | Default | When to change | +|:----------|:--------|:---------------| +| `method` | `"uniform"` | Try `"adaptive"` for complex landscapes | +| `decay` | 1.0 | Set $<1$ if population oscillates and won't converge | +| `mutationMin` | 0.01 | Increase if algorithm stalls at local optima | +| `temperature` | 0.5 (airs) | Lower = less mutation on high-affinity antibodies | +| `base_rate` | 0.1 (adaptive) | Scale with feature magnitudes | + +## Module Interactions + +| Combination | Effect | Recommendation | +|:------------|:-------|:---------------| +| SHM adaptive + Microenvironment | Zone-dependent adaptive mutation rates | Strong synergy; recommended for complex data | +| Idiotypic + GerminalCenter | Network regulation + quality selection | Complementary; idiotypic controls diversity, GC controls quality | +| VDJLibrary + ActivationGate | Structured init + regularized activation | Good for high-dimensional data | +| ClassSwitcher + Microenvironment | Zone-driven kernel width changes | Requires Microenvironment to define zones | + +--- + +# References + +- Arthur, D. & Vassilvitskii, S. (2007). k-means++: The advantages of + careful seeding. *SODA*. +- Bretscher, P. & Cohn, M. (1970). A theory of self-nonself discrimination. + *Science*, 169, 1042--1049. +- Burnet, F.M. (1959). *The Clonal Selection Theory of Acquired Immunity*. + Cambridge University Press. +- de Castro, L.N. & Von Zuben, F.J. (2001). aiNet: Artificial immune + network for data analysis. In *Data Mining: A Heuristic Approach*, 231--260. +- de Castro, L.N. & Timmis, J. (2003). Artificial immune systems as a + novel soft computing paradigm. *Soft Computing*, 7, 526--544. +- Freitas, A.A. (2006). *Immunoinformatics*. Springer. +- Jerne, N.K. (1974). Towards a network theory of the immune system. + *Annales d'Immunologie (Institut Pasteur)*, 125C, 373--389. +- Kingma, D.P. & Ba, J. (2015). Adam: A method for stochastic optimization. + *ICLR*. +- Perelson, A.S. & Oster, G.F. (1979). Theoretical studies of clonal + selection: Minimal antibody repertoire size and reliability of self-non-self + discrimination. *Journal of Theoretical Biology*, 81, 645--670. +- Varela, F.J. & Coutinho, A. (1991). Second generation immune networks. + *Immunology Today*, 12, 159--166. +- Watkins, A. & Timmis, J. (2004). Artificial immune recognition system + (AIRS): An immune-inspired supervised learning algorithm. *Genetic + Programming and Evolvable Machines*, 5, 291--317. + +```{r} +sessionInfo() +``` diff --git a/vignettes/articles/immune-modules.Rmd b/vignettes/articles/immune-modules.Rmd new file mode 100644 index 0000000..5339c96 --- /dev/null +++ b/vignettes/articles/immune-modules.Rmd @@ -0,0 +1,320 @@ +--- +title: "Composing Immune Modules" +author: + name: Nick Borcherding + email: ncborch@gmail.com + affiliation: Washington University in St. Louis, School of Medicine, St. Louis, MO, USA +date: "`r Sys.Date()`" +output: + BiocStyle::html_document: + toc_float: true +--- + +```{r, echo=FALSE, results="hide", message=FALSE} +knitr::opts_chunk$set(error=FALSE, message=FALSE, warning=FALSE, tidy = FALSE) +library(bHIVE) +library(ggplot2) +library(viridis) +``` + +# Introduction + +The bHIVE package provides two equivalent interfaces: a **functional API** +(`bHIVE()`) for quick, one-liner analysis and an **R6 class API** (`AINet`) +for full compositional control. This vignette focuses on the R6 API and +the 9 immune modules that can be injected into `AINet` to customize every +stage of the algorithm. + +Each module is an independent R6 class that encapsulates a single +immunological mechanism. You create modules separately and pass them to +`AINet$new()`, allowing you to mix and match any combination. + +```{r module-overview, eval=FALSE} +model <- AINet$new( + nAntibodies = 20, + shm = SHMEngine$new(method = "adaptive"), + idiotypic = IdiotypicNetwork$new(), + germinalCenter = GerminalCenter$new(), + microenvironment = Microenvironment$new(), + init = VDJLibrary$new(method = "pca"), + activation = ActivationGate$new(), + memory = MemoryPool$new() +) +``` + +# Somatic Hypermutation Engine + +The `SHMEngine` controls how antibodies mutate after cloning. Five +strategies are available, each grounded in a different aspect of SHM biology: + +| Method | Biological Analogy | When to Use | +|:-------|:-------------------|:------------| +| `"uniform"` | Random point mutations | Baseline; simple and robust | +| `"airs"` | Affinity-proportional mutation (AIRS) | When high-affinity antibodies should explore less | +| `"hotspot"` | AID targets WRCY motifs | When some features matter more than others | +| `"energy"` | E_SHM ~ N_Mut^2 budget constraint | When you want bounded total perturbation | +| `"adaptive"` | Per-feature Adam-like moment tracking | Best for complex landscapes; most novel | + +The **adaptive** method is the most distinctive in which somatic hypermutation and the Adam optimizer function the same. Each feature dimension maintains running mean and variance of past gradients. + +```{r shm-comparison, fig.width=8, fig.height=4} +data(iris) +X <- as.matrix(iris[, 1:4]) +y <- iris$Species + +methods <- c("uniform", "airs", "adaptive") +results <- lapply(methods, function(m) { + shm <- SHMEngine$new(method = m) + model <- AINet$new(nAntibodies = 15, + maxIter = 15, + shm = shm, + verbose = FALSE) + model$fit(X, y, task = "classification") + data.frame( + method = m, + accuracy = mean(model$result$assignments == as.character(y)), + n_ab = nrow(model$repertoire$as_matrix()) + ) +}) +do.call(rbind, results) +``` + +# Idiotypic Network Regulation + +In classical AIS, similar antibodies are suppressed via a simple distance +threshold (epsilon). The `IdiotypicNetwork` module replaces this with +principled network dynamics inspired by Varela & Coutinho's (1991) +second-generation immune network theory. + +The key insight is a **bell-shaped activation function**: too little +stimulation from other antibodies leads to death, moderate stimulation +leads to activation, and excessive stimulation leads to suppression. This +creates emergent self-organized repertoire structure. + +```{r idiotypic-demo} +# Standard epsilon suppression (default) +model_std <- AINet$new(nAntibodies = 25, maxIter = 15, verbose = FALSE) +model_std$fit(X, task = "clustering") + +# Idiotypic network regulation +model_idi <- AINet$new( + nAntibodies = 25, maxIter = 15, + idiotypic = IdiotypicNetwork$new( + theta_low = 0.01, + theta_high = 0.5, + source_rate = 0.5, + timeSteps = 20 + ), + verbose = FALSE +) +model_idi$fit(X, task = "clustering") + +cat("Standard suppression:", model_std$repertoire$size(), "antibodies\n") +cat("Idiotypic regulation:", model_idi$repertoire$size(), "antibodies\n") +``` + +The idiotypic network typically produces a more parsimonious repertoire +because regulation is based on the *network* of antibody-antibody +interactions rather than pairwise distance alone. + +# Germinal Center Selection + +The `GerminalCenter` module models the competition for T follicular helper +(Tfh) cell help that occurs in real germinal centers. Only antibodies that +receive Tfh help survive -- this implements quality-based selection pressure. + +The quality metric is task-aware: + +- **Classification**: proportion of correctly matched labels +- **Regression**: inverse prediction error +- **Clustering**: local density (sum of affinities to assigned data points) + +```{r gc-demo} +gc <- GerminalCenter$new( + nTfh = 10, # number of Tfh selectors + selectionPressure = 0.5, # 0 = no selection, 1 = very strict + rounds = 1 +) + +model_gc <- AINet$new( + nAntibodies = 30, + maxIter = 15, + germinalCenter = gc, + verbose = FALSE +) +model_gc$fit(X, y, task = "classification") +cat("Antibodies after GC selection:", model_gc$repertoire$size(), "\n") +cat("Accuracy:", mean(model_gc$result$assignments == as.character(y)), "\n") +``` + +# Microenvironment + +The `Microenvironment` module classifies the local context around each +antibody into three zones and adapts mutation rates accordingly: + +- **Stable** (high local density): reduce mutation rate to preserve + good antibodies +- **Explore** (low local density): increase mutation rate to search + sparse regions +- **Boundary** (intermediate): moderate mutation, potential trigger for + class switching + +```{r microenv-demo} +me <- Microenvironment$new( + high_density_threshold = 0.75, + low_density_threshold = 0.25, + stabilization_factor = 0.3, # reduce mutation to 30% in stable zones + exploration_factor = 2.0 # double mutation in exploration zones +) + +rep <- ImmuneRepertoire$new(X[sample(150, 15), ]) +env <- me$assess(rep, X) +table(env$zones) +``` + +# V(D)J Gene Library Initialization + +Instead of random sampling, `VDJLibrary` generates antibodies by +combinatorial assembly of gene segments, mimicking V(D)J recombination. +The feature space is split into three segments (V, D, J), each discretized +into alleles, and new antibodies are built by combining one allele from +each segment. + +```{r vdj-demo} +vdj <- VDJLibrary$new(nV = 5, + nD = 3, + nJ = 3, + method = "pca") +A <- vdj$generate(20, X) +dim(A) # 20 antibodies x 4 features +print(vdj) +``` + +Three decomposition methods are available: + +- `"pca"`: PCA components define gene segments (best for correlated features) +- `"cluster"`: k-means within dimension groups creates alleles +- `"random_partition"`: random feature grouping (simplest) + +# Two-Signal Activation Gate + +In real immunity, B cells require two signals to activate: antigen +recognition (Signal 1) *and* costimulatory context (Signal 2). The +`ActivationGate` module prevents spurious cloning on isolated outliers. + +Signal 2 options: + +- `"density"`: local data density around the antibody +- `"danger"`: user-provided danger score per data point +- `"entropy"`: local label entropy (classification only) + +```{r activation-demo} +gate <- ActivationGate$new( + signal2_type = "density", + threshold1 = 0.1, # minimum affinity (Signal 1) + threshold2 = 0.3 # density percentile (Signal 2) +) + +A <- X[sample(150, 10), ] +rep <- ImmuneRepertoire$new(A) +aff <- rep$affinity_matrix(X, "gaussian") +activated <- gate$evaluate(aff, X, A) +cat("Activated interactions:", sum(activated), "/", length(activated), "\n") +``` + +# Memory Pool + +The `MemoryPool` archives high-performing antibodies and recalls them +when relevant to current data. This is useful for streaming or +non-stationary data where distribution shifts may occur. + +```{r memory-demo} +mp <- MemoryPool$new( + archive_threshold = 0.01, # minimum average affinity to archive + max_memory = 50, + recall_threshold = 0.01 +) + +rep <- ImmuneRepertoire$new(X[sample(150, 10), ]) +n_archived <- mp$archive(rep, X) +cat("Archived:", n_archived, "memory cells\n") +cat("Pool size:", mp$size(), "\n") + +# Recall memories relevant to a subset +recalled <- mp$recall(X[1:30, ]) +cat("Recalled:", nrow(recalled), "cells for the query\n") +``` + +# Class Switcher + +The `ClassSwitcher` module changes the effective kernel width of antibodies +based on their microenvironment zone, analogous to real B cell isotype +switching: + +| Isotype | Kernel Width | Matching Style | +|:--------|:-------------|:---------------| +| IgM | alpha = 0.1 | Broad (exploration) | +| IgG | alpha = 5.0 | Specific (discrimination) | +| IgA | alpha = 1.0 | Intermediate (boundary patrol) | + +```{r classswitcher-demo} +cs <- ClassSwitcher$new(alpha_IgM = 0.1, alpha_IgG = 5.0, alpha_IgA = 1.0) + +rep <- ImmuneRepertoire$new(X[sample(150, 10), ]) +zones <- sample(c("stable", "explore", "boundary"), 10, replace = TRUE) +alphas <- cs$switch_isotypes(rep, zones) +data.frame(zone = zones, isotype = rep$metadata$isotype, alpha = alphas) +``` + +# Convergent Selection + +The `ConvergentSelector` identifies "public antibodies" -- antibodies that +appear across multiple independent bHIVE runs. This implements the +immunological concept of public clonotypes as a biologically-motivated +ensemble method. + +```{r convergent-demo} +# Run 3 independent bHIVE analyses +set.seed(42) +results <- lapply(1:3, function(i) { + m <- AINet$new(nAntibodies = 15, maxIter = 10, verbose = FALSE) + m$fit(X, task = "clustering") + m$result +}) + +conv <- ConvergentSelector$new(tolerance = 1.0, min_appearances = 2) +public <- conv$from_results(results) +cat("Public antibodies:", nrow(public), "\n") +``` + +# Putting It All Together + +Here is a full example combining multiple modules for a classification task: + +```{r full-example} +model <- AINet$new( + nAntibodies = 25, + maxIter = 20, + k = 3, + beta = 5, + shm = SHMEngine$new(method = "adaptive", base_rate = 0.1), + idiotypic = IdiotypicNetwork$new(theta_low = 0.01, theta_high = 0.5), + germinalCenter = GerminalCenter$new(nTfh = 8, selectionPressure = 0.4), + verbose = FALSE +) + +model$fit(X, y, task = "classification") + +cat("Final antibodies:", model$repertoire$size(), "\n") +cat("Accuracy:", mean(model$result$assignments == as.character(y)), "\n") +``` + +Not every module combination will improve every dataset. The modular design +lets you experiment with different biological mechanisms and find the +combination that works best for your data and task. + +# Session Information + +```{r} +sessionInfo() +``` diff --git a/vignettes/bHIVE.Rmd b/vignettes/bHIVE.Rmd index aea5047..0173b81 100644 --- a/vignettes/bHIVE.Rmd +++ b/vignettes/bHIVE.Rmd @@ -16,7 +16,7 @@ vignette: > --- ```{r, echo=FALSE, results="hide", message=FALSE} -knitr::opts_chunk$set(error=FALSE, message=FALSE, warning=FALSE) +knitr::opts_chunk$set(error=FALSE, message=FALSE, warning=FALSE, tidy = FALSE) library(BiocStyle) library(bHIVE) library(ggplot2) @@ -61,8 +61,6 @@ The behavior of the `bHIVE` function can be fine-tuned using a range of hyperpar | `seed` | Random seed for reproducibility. | | `verbose` | Logical. If `TRUE`, prints progress messages for each iteration. | ---- - ### How `bHIVE` Works 1. **Affinity Calculation**: Measures the similarity between each data point and antibodies using a specified `affinityFunc`. @@ -176,7 +174,6 @@ table(Predicted = res$assignments, Actual = y) **Regression** is a supervised learning task where the goal is to predict continuous numeric outcomes. In this example, we use the Boston housing dataset to predict the median home value (medv) using `bHIVE`. ```{r tidy = FALSE} -if (!requireNamespace("MASS", quietly = TRUE)) install.packages("MASS") library(MASS) data(Boston) X <- as.matrix(Boston[, -14])