es-ude · LeoBuron · May 15, 2026 · May 15, 2026 · May 15, 2026 · May 15, 2026
diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
@@ -103,6 +103,66 @@ jobs:
       - name: Test
         run: ctest --preset unit_test_asan
 
+  c-bit-parity:
+    runs-on: ubuntu-latest
+
+    steps:
+      - uses: actions/checkout@v4
+
+      - name: Install dependencies
+        run: sudo apt-get update && sudo apt-get install -y cmake ninja-build gcc
+
+      - name: Install uv
+        uses: astral-sh/setup-uv@v6
+
+      - name: Set up Python
+        run: uv python install 3.12
+
+      - name: Sync Python deps
+        run: uv sync
+
+      - name: Prepare HAR data
+        run: uv run examples/har_classifier/prepare_data.py
+
+      - name: Prepare ECG data
+        run: uv run examples/ecg_anomaly_ae/prepare_data.py
+
+      - name: Train PyTorch HAR (produces reference predictions + weights)
+        run: uv run examples/har_classifier/train_pytorch.py
+
+      - name: Train PyTorch ECG (produces reference reconstructions + weights)
+        run: uv run examples/ecg_anomaly_ae/train_pytorch.py
+
+      - name: Configure
+        run: cmake --preset examples
+
+      - name: Build v2 binaries
+        run: |
+          cmake --build --preset examples --target train_c_har_classifier_v2
+          cmake --build --preset examples --target train_c_ecg_anomaly_ae_v2
+
+      - name: Run HAR v2 in BIT_PARITY mode
+        run: BIT_PARITY=1 build/examples/examples/har_classifier_v2/train_c_har_classifier_v2
+
+      - name: Run ECG v2 in BIT_PARITY mode
+        run: BIT_PARITY=1 build/examples/examples/ecg_anomaly_ae_v2/train_c_ecg_anomaly_ae_v2
+
+      - name: Diff HAR predictions (int32, exact match required)
+        run: |
+          uv run examples/_shared/compare_predictions.py \
+            --pytorch examples/har_classifier/outputs/pytorch_predictions.npy \
+            --c examples/har_classifier_v2/outputs/c_predictions.npy \
+            --dtype int32
+
+      - name: Diff ECG reconstructions (float32, allclose)
+        run: |
+          uv run examples/_shared/compare_predictions.py \
+            --pytorch examples/ecg_anomaly_ae/outputs/pytorch_reconstructions.npy \
+            --c examples/ecg_anomaly_ae_v2/outputs/c_reconstructions.npy \
+            --dtype float32 \
+            --rtol 1e-4 \
+            --atol 1e-5
+
   python-test:
     runs-on: ubuntu-latest
 

diff --git a/examples/CMakeLists.txt b/examples/CMakeLists.txt
@@ -1,3 +1,5 @@
 add_subdirectory(_shared)
 add_subdirectory(har_classifier)
+add_subdirectory(har_classifier_v2)
 add_subdirectory(ecg_anomaly_ae)
+add_subdirectory(ecg_anomaly_ae_v2)
diff --git a/examples/_shared/compare_predictions.py b/examples/_shared/compare_predictions.py
@@ -0,0 +1,63 @@
+"""Compare C-side predictions/reconstructions against PyTorch reference outputs.
+
+Used by the bit-parity CI step. Exits 0 on match, 1 on mismatch.
+
+Usage:
+    uv run examples/_shared/compare_predictions.py \\
+        --pytorch <path-to-pytorch.npy> \\
+        --c <path-to-c.npy> \\
+        --dtype {int32,float32} \\
+        [--rtol 1e-4] [--atol 1e-5]
+"""
+
+import argparse
+import sys
+import numpy as np
+
+
+def main() -> int:
+    parser = argparse.ArgumentParser()
+    parser.add_argument("--pytorch", required=True, help="PyTorch reference .npy")
+    parser.add_argument("--c", required=True, help="C-side .npy")
+    parser.add_argument("--dtype", required=True, choices=["int32", "float32"])
+    parser.add_argument("--rtol", type=float, default=1e-4)
+    parser.add_argument("--atol", type=float, default=1e-5)
+    args = parser.parse_args()
+
+    py = np.load(args.pytorch)
+    c = np.load(args.c)
+
+    if py.shape != c.shape:
+        print(f"FAIL: shape mismatch — pytorch={py.shape}, c={c.shape}", file=sys.stderr)
+        return 1
+
+    if args.dtype == "int32":
+        if not np.array_equal(py, c):
+            mismatches = np.flatnonzero(py != c)
+            print(f"FAIL: int32 mismatch at {mismatches.size}/{py.size} positions",
+                  file=sys.stderr)
+            for idx in mismatches[:5]:
+                print(f"  idx={idx}: pytorch={py.flat[idx]}, c={c.flat[idx]}", file=sys.stderr)
+            return 1
+        print(f"PASS: int32 arrays bit-identical ({py.size} elements)")
+        return 0
+
+    # float32
+    if not np.allclose(py, c, rtol=args.rtol, atol=args.atol):
+        diffs = np.abs(py - c)
+        max_diff = diffs.max()
+        rel_diffs = diffs / (np.abs(py) + args.atol)
+        max_rel = rel_diffs.max()
+        print(f"FAIL: float32 mismatch — max_abs={max_diff:.6e}, "
+              f"max_rel={max_rel:.6e}, rtol={args.rtol}, atol={args.atol}", file=sys.stderr)
+        worst = np.argmax(diffs)
+        print(f"  worst idx={worst}: pytorch={py.flat[worst]:.6e}, c={c.flat[worst]:.6e}",
+              file=sys.stderr)
+        return 1
+    print(f"PASS: float32 arrays close (rtol={args.rtol}, atol={args.atol}, "
+          f"{py.size} elements)")
+    return 0
+
+
+if __name__ == "__main__":
+    sys.exit(main())
diff --git a/examples/ecg_anomaly_ae/train_c.c b/examples/ecg_anomaly_ae/train_c.c
@@ -271,7 +271,7 @@ static void buildModel(layer_t **model) {
     parameter_t *e1_w =
         buildParam(XAVIER_UNIFORM, e1_w_data, e1_w_dims, 3, IN_CHANNELS * E1_K, E1_OUT * E1_K);
     parameter_t *e1_b = buildParam(ZEROS, e1_b_data, e1_b_dims, 1, 1, E1_OUT);
-    model[0] = conv1dLayerInit(e1_w, e1_b, e1k, q, q, q, q);
+    model[0] = conv1dLayerInitLegacy(e1_w, e1_b, e1k, q, q, q, q);
     model[1] = reluLayerInitLegacy(quantizationInitFloat(), quantizationInitFloat());
 
     /* Block P1: MaxPool1d(K=2, S=2). 70 → 35. */
@@ -283,8 +283,9 @@ static void buildModel(layer_t **model) {
     parameter_t *e2_w =
         buildParam(XAVIER_UNIFORM, e2_w_data, e2_w_dims, 3, E1_OUT * E2_K, E2_OUT * E2_K);
     parameter_t *e2_b = buildParam(ZEROS, e2_b_data, e2_b_dims, 1, 1, E2_OUT);
-    model[3] = conv1dLayerInit(e2_w, e2_b, e2k, quantizationInitFloat(), quantizationInitFloat(),
-                               quantizationInitFloat(), quantizationInitFloat());
+    model[3] =
+        conv1dLayerInitLegacy(e2_w, e2_b, e2k, quantizationInitFloat(), quantizationInitFloat(),
+                              quantizationInitFloat(), quantizationInitFloat());
     model[4] = reluLayerInitLegacy(quantizationInitFloat(), quantizationInitFloat());
 
     /* Block P2: AvgPool1d(K=5, S=5). 35 → 7 (bottleneck). */

diff --git a/examples/ecg_anomaly_ae/train_pytorch.py b/examples/ecg_anomaly_ae/train_pytorch.py
@@ -188,6 +188,33 @@ def main() -> None:
     np.save(OUTPUTS / "pytorch_train_recons.npy", pt_train_recons.astype(np.float32))
     print(f"FINAL test_loss={test_loss:.6f}", flush=True)
 
+    # Save per-layer weights for the C-side BIT_PARITY mode.
+    # C-side expects: examples/ecg_anomaly_ae/weights/<name>.{weight,bias}.npy
+    # Where <name> in {e1, e2, d1, d2, d3} matches the order in v2's buildModel.
+    import os
+
+    weights_dir = HERE / "weights"
+    os.makedirs(weights_dir, exist_ok=True)
+
+    # Keys match C-side loadStateDictFromDir() names; values are actual PyTorch attrs.
+    layer_map = {
+        "e1": model.enc1,   # Conv1d(1->8, K=7, S=2)
+        "e2": model.enc2,   # Conv1d(8->16, K=5)
+        "d1": model.dec1,   # ConvTranspose1d(16->8, K=5, S=5)
+        "d2": model.dec2,   # ConvTranspose1d(8->4, K=2, S=2)
+        "d3": model.dec3,   # ConvTranspose1d(4->1, K=2, S=2)
+    }
+
+    print("Saving per-layer weights:", flush=True)
+    for name, layer in layer_map.items():
+        w = layer.weight.detach().cpu().numpy().astype(np.float32)
+        np.save(weights_dir / f"{name}.weight.npy", w)
+        if layer.bias is not None:
+            b = layer.bias.detach().cpu().numpy().astype(np.float32)
+            np.save(weights_dir / f"{name}.bias.npy", b)
+        has_bias = f" + {name}.bias.npy" if layer.bias is not None else ""
+        print(f"  wrote {name}.weight.npy shape={w.shape}{has_bias}", flush=True)
+
 
 if __name__ == "__main__":
     main()
diff --git a/examples/ecg_anomaly_ae_v2/CMakeLists.txt b/examples/ecg_anomaly_ae_v2/CMakeLists.txt
@@ -0,0 +1,56 @@
+add_executable(train_c_ecg_anomaly_ae_v2 train_c.c)
+
+target_link_libraries(train_c_ecg_anomaly_ae_v2 PRIVATE
+        DataLoaderApi
+        DataLoader
+        NPYLoaderApi
+        NPYLoader
+
+        Layer
+
+        Conv1dApi
+        Conv1d
+
+        Conv1dTransposedApi
+        Conv1dTransposed
+
+        ReluApi
+        Relu
+
+        Pool1dApi
+        MaxPool1d
+        AvgPool1d
+
+        QuantizationApi
+        Quantization
+
+        TensorApi
+        Tensor
+        Rounding
+
+        TrainingLoopApi
+        CalculateGradsSequential
+        TrainingBatchDefault
+        TrainingEpochDefault
+        Optimizer
+
+        LossFunction
+        MSE
+
+        Sgd
+        SgdApi
+
+        InferenceApi
+
+        StateDictApi
+        LayerWeightsApi
+        LayerQuant
+        LayerCommon
+        Distributions
+
+        Common
+        StorageApi
+        RNG
+
+        examples_shared
+)