diff --git a/.github/workflows/docker-image.yml b/.github/workflows/docker-image.yml
index 11f9dbb1..dca06709 100644
--- a/.github/workflows/docker-image.yml
+++ b/.github/workflows/docker-image.yml
@@ -141,13 +141,21 @@ jobs:
     uses: ./.github/workflows/pytorchsim_test.yml
     with:
       image_name: ghcr.io/psal-postech/torchsim-test:${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
-      vector_lane: 128
-      spad_size: 128
+      togsim_config: /workspace/PyTorchSim/configs/systolic_ws_128x128_c1_simple_noc_tpuv3.yml
+      run_accuracy: true
 
   test-pytorchsim-wrapper2:
     needs: build-and-test
     uses: ./.github/workflows/pytorchsim_test.yml
     with:
       image_name: ghcr.io/psal-postech/torchsim-test:${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
-      vector_lane: 32
-      spad_size: 32
+      togsim_config: /workspace/PyTorchSim/configs/systolic_ws_32x32_c1_simple_noc_tpuv3.yml
+      run_accuracy: false
+
+  test-pytorchsim-wrapper3:
+    needs: build-and-test
+    uses: ./.github/workflows/pytorchsim_test.yml
+    with:
+      image_name: ghcr.io/psal-postech/torchsim-test:${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
+      togsim_config: /workspace/PyTorchSim/configs/systolic_ws_8x8_c1_simple_noc_tpuv3.yml
+      run_accuracy: false
diff --git a/.github/workflows/pytorchsim_test.yml b/.github/workflows/pytorchsim_test.yml
index 33e279fe..345c716e 100644
--- a/.github/workflows/pytorchsim_test.yml
+++ b/.github/workflows/pytorchsim_test.yml
@@ -6,14 +6,15 @@ on:
       image_name:
         required: true
         type: string
-      vector_lane:
-        description: "Vector lane size (use empty string for server TPU)"
+      togsim_config:
+        description: "TOGSim hardware config YAML (single source of truth; drives both codegen and the cycle sim)"
         required: true
-        type: number
-      spad_size:
-        description: "SPAD size (use empty string for server TPU)"
-        required: true
-        type: number
+        type: string
+      run_accuracy:
+        description: "Run the accuracy + speedup artifact job (only meaningful for the 128x128 config)"
+        required: false
+        default: false
+        type: boolean
 
 # Runner policy: the CPU-only CI image is small enough to pull on GitHub-hosted
 # runners, so op and model tests run on ubuntu-latest. The memory/time-intensive
@@ -35,8 +36,7 @@ jobs:
         run: |
           echo "Running test_add.py"
           docker run --rm \
-            -e vpu_num_lanes="${{ inputs.vector_lane }}" \
-            -e vpu_spad_size_kb_per_lane="${{ inputs.spad_size }}" \
+            -e TOGSIM_CONFIG="${{ inputs.togsim_config }}" \
             ${{ inputs.image_name }} python3 PyTorchSim/tests/ops/elementwise/test_add.py
 
   test_transcendental:
@@ -54,8 +54,7 @@ jobs:
         run: |
           echo "Running test_transcendental.py"
           docker run --rm \
-            -e vpu_num_lanes="${{ inputs.vector_lane }}" \
-            -e vpu_spad_size_kb_per_lane="${{ inputs.spad_size }}" \
+            -e TOGSIM_CONFIG="${{ inputs.togsim_config }}" \
             ${{ inputs.image_name }} python3 PyTorchSim/tests/ops/elementwise/test_transcendental.py
 
   test_activation:
@@ -73,8 +72,7 @@ jobs:
         run: |
           echo "Running test_activation.py"
           docker run --rm \
-            -e vpu_num_lanes="${{ inputs.vector_lane }}" \
-            -e vpu_spad_size_kb_per_lane="${{ inputs.spad_size }}" \
+            -e TOGSIM_CONFIG="${{ inputs.togsim_config }}" \
             ${{ inputs.image_name }} python3 PyTorchSim/tests/ops/elementwise/test_activation.py
 
   test_batchnorm:
@@ -92,8 +90,7 @@ jobs:
         run: |
           echo "Running test_batchnorm.py"
           docker run --rm \
-            -e vpu_num_lanes="${{ inputs.vector_lane }}" \
-            -e vpu_spad_size_kb_per_lane="${{ inputs.spad_size }}" \
+            -e TOGSIM_CONFIG="${{ inputs.togsim_config }}" \
             ${{ inputs.image_name }} python3 PyTorchSim/tests/ops/reduce/test_batchnorm.py
 
   test_bmm:
@@ -111,8 +108,7 @@ jobs:
         run: |
           echo "Running test_bmm.py"
           docker run --rm \
-            -e vpu_num_lanes="${{ inputs.vector_lane }}" \
-            -e vpu_spad_size_kb_per_lane="${{ inputs.spad_size }}" \
+            -e TOGSIM_CONFIG="${{ inputs.togsim_config }}" \
             ${{ inputs.image_name }} python3 PyTorchSim/tests/ops/gemm/test_bmm.py
 
   test_cnn:
@@ -130,8 +126,7 @@ jobs:
         run: |
           echo "Running test_cnn.py"
           docker run --rm \
-            -e vpu_num_lanes="${{ inputs.vector_lane }}" \
-            -e vpu_spad_size_kb_per_lane="${{ inputs.spad_size }}" \
+            -e TOGSIM_CONFIG="${{ inputs.togsim_config }}" \
             ${{ inputs.image_name }} python3 PyTorchSim/tests/ops/conv/test_cnn.py
 
   test_conv2d:
@@ -149,8 +144,7 @@ jobs:
         run: |
           echo "Running test_conv2d.py"
           docker run --rm \
-            -e vpu_num_lanes="${{ inputs.vector_lane }}" \
-            -e vpu_spad_size_kb_per_lane="${{ inputs.spad_size }}" \
+            -e TOGSIM_CONFIG="${{ inputs.togsim_config }}" \
             ${{ inputs.image_name }} python3 PyTorchSim/tests/ops/conv/test_conv2d.py
 
   test_cat:
@@ -168,8 +162,7 @@ jobs:
         run: |
           echo "Running test_cat.py"
           docker run --rm \
-            -e vpu_num_lanes="${{ inputs.vector_lane }}" \
-            -e vpu_spad_size_kb_per_lane="${{ inputs.spad_size }}" \
+            -e TOGSIM_CONFIG="${{ inputs.togsim_config }}" \
             ${{ inputs.image_name }} python3 PyTorchSim/tests/ops/view/test_cat.py
 
   test_floormod_axis_split:
@@ -187,8 +180,7 @@ jobs:
         run: |
           echo "Running test_floormod_axis_split.py"
           docker run --rm \
-            -e vpu_num_lanes="${{ inputs.vector_lane }}" \
-            -e vpu_spad_size_kb_per_lane="${{ inputs.spad_size }}" \
+            -e TOGSIM_CONFIG="${{ inputs.togsim_config }}" \
             ${{ inputs.image_name }} python3 PyTorchSim/tests/ops/view/test_floormod_axis_split.py
 
   test_widen_dtype:
@@ -206,8 +198,7 @@ jobs:
         run: |
           echo "Running test_widen_dtype.py"
           docker run --rm \
-            -e vpu_num_lanes="${{ inputs.vector_lane }}" \
-            -e vpu_spad_size_kb_per_lane="${{ inputs.spad_size }}" \
+            -e TOGSIM_CONFIG="${{ inputs.togsim_config }}" \
             ${{ inputs.image_name }} python3 PyTorchSim/tests/ops/misc/test_widen_dtype.py
 
   test_matmul:
@@ -225,8 +216,7 @@ jobs:
         run: |
           echo "Running test_matmul.py"
           docker run --rm \
-            -e vpu_num_lanes="${{ inputs.vector_lane }}" \
-            -e vpu_spad_size_kb_per_lane="${{ inputs.spad_size }}" \
+            -e TOGSIM_CONFIG="${{ inputs.togsim_config }}" \
             ${{ inputs.image_name }} python3 PyTorchSim/tests/ops/gemm/test_matmul.py
 
   test_reduce:
@@ -244,8 +234,7 @@ jobs:
         run: |
           echo "Running test_reduce.py"
           docker run --rm \
-            -e vpu_num_lanes="${{ inputs.vector_lane }}" \
-            -e vpu_spad_size_kb_per_lane="${{ inputs.spad_size }}" \
+            -e TOGSIM_CONFIG="${{ inputs.togsim_config }}" \
             ${{ inputs.image_name }} python3 PyTorchSim/tests/ops/reduce/test_reduce.py
 
   test_softmax:
@@ -263,8 +252,7 @@ jobs:
         run: |
           echo "Running test_softmax.py"
           docker run --rm \
-            -e vpu_num_lanes="${{ inputs.vector_lane }}" \
-            -e vpu_spad_size_kb_per_lane="${{ inputs.spad_size }}" \
+            -e TOGSIM_CONFIG="${{ inputs.togsim_config }}" \
             ${{ inputs.image_name }} python3 PyTorchSim/tests/ops/reduce/test_softmax.py
 
   test_transpose2D:
@@ -282,8 +270,7 @@ jobs:
         run: |
           echo "Running test_transpose2D.py"
           docker run --rm \
-            -e vpu_num_lanes="${{ inputs.vector_lane }}" \
-            -e vpu_spad_size_kb_per_lane="${{ inputs.spad_size }}" \
+            -e TOGSIM_CONFIG="${{ inputs.togsim_config }}" \
             ${{ inputs.image_name }} python3 PyTorchSim/tests/ops/view/test_transpose2D.py
 
   test_view3D_2D:
@@ -301,8 +288,7 @@ jobs:
         run: |
           echo "Running test_view3D_2D.py"
           docker run --rm \
-            -e vpu_num_lanes="${{ inputs.vector_lane }}" \
-            -e vpu_spad_size_kb_per_lane="${{ inputs.spad_size }}" \
+            -e TOGSIM_CONFIG="${{ inputs.togsim_config }}" \
             ${{ inputs.image_name }} python3 PyTorchSim/tests/ops/view/test_view3D_2D.py
 
   test_layernorm:
@@ -320,8 +306,7 @@ jobs:
         run: |
           echo "Running test_layernorm.py"
           docker run --rm \
-            -e vpu_num_lanes="${{ inputs.vector_lane }}" \
-            -e vpu_spad_size_kb_per_lane="${{ inputs.spad_size }}" \
+            -e TOGSIM_CONFIG="${{ inputs.togsim_config }}" \
             ${{ inputs.image_name }} python3 PyTorchSim/tests/ops/reduce/test_layernorm.py
 
   test_mlp:
@@ -339,8 +324,7 @@ jobs:
         run: |
           echo "Running test_mlp.py"
           docker run --rm \
-            -e vpu_num_lanes="${{ inputs.vector_lane }}" \
-            -e vpu_spad_size_kb_per_lane="${{ inputs.spad_size }}" \
+            -e TOGSIM_CONFIG="${{ inputs.togsim_config }}" \
             ${{ inputs.image_name }} python3 PyTorchSim/tests/models/test_mlp.py
 
   test_resnet:
@@ -358,16 +342,14 @@ jobs:
         run: |
           echo "Running test_resnet.py"
           docker run --rm \
-            -e vpu_num_lanes="${{ inputs.vector_lane }}" \
-            -e vpu_spad_size_kb_per_lane="${{ inputs.spad_size }}" \
+            -e TOGSIM_CONFIG="${{ inputs.togsim_config }}" \
             ${{ inputs.image_name }} python3 PyTorchSim/tests/models/test_resnet.py
 
       - name: Run test_resnet50.py
         run: |
           echo "Running test_resnet.py"
           docker run --rm \
-            -e vpu_num_lanes="${{ inputs.vector_lane }}" \
-            -e vpu_spad_size_kb_per_lane="${{ inputs.spad_size }}" \
+            -e TOGSIM_CONFIG="${{ inputs.togsim_config }}" \
             ${{ inputs.image_name }} python3 PyTorchSim/tests/models/test_resnet.py --model_type resnet50
 
   test_mobilenet:
@@ -385,8 +367,7 @@ jobs:
         run: |
           echo "Running test_mobilenet.py"
           docker run --rm \
-            -e vpu_num_lanes="${{ inputs.vector_lane }}" \
-            -e vpu_spad_size_kb_per_lane="${{ inputs.spad_size }}" \
+            -e TOGSIM_CONFIG="${{ inputs.togsim_config }}" \
             ${{ inputs.image_name }} python3 PyTorchSim/tests/models/MobileNet/test_mobilenet.py
 
   test_transformer:
@@ -404,8 +385,7 @@ jobs:
         run: |
           echo "Running test_transformer.py"
           docker run --rm \
-            -e vpu_num_lanes="${{ inputs.vector_lane }}" \
-            -e vpu_spad_size_kb_per_lane="${{ inputs.spad_size }}" \
+            -e TOGSIM_CONFIG="${{ inputs.togsim_config }}" \
             ${{ inputs.image_name }} python3 PyTorchSim/tests/models/test_transformer.py
 
   test_transpose3D:
@@ -423,8 +403,7 @@ jobs:
         run: |
           echo "Running test_transpose3D.py"
           docker run --rm \
-            -e vpu_num_lanes="${{ inputs.vector_lane }}" \
-            -e vpu_spad_size_kb_per_lane="${{ inputs.spad_size }}" \
+            -e TOGSIM_CONFIG="${{ inputs.togsim_config }}" \
             ${{ inputs.image_name }} python3 PyTorchSim/tests/ops/view/test_transpose3D.py
 
   test_sparsity:
@@ -442,8 +421,7 @@ jobs:
         run: |
           echo "Running test_sparsity.py"
           docker run --rm \
-            -e vpu_num_lanes="${{ inputs.vector_lane }}" \
-            -e vpu_spad_size_kb_per_lane="${{ inputs.spad_size }}" \
+            -e TOGSIM_CONFIG="${{ inputs.togsim_config }}" \
             ${{ inputs.image_name }} python3 PyTorchSim/tests/ops/sparsity/test_sparsity.py
 
   test_pool:
@@ -461,8 +439,7 @@ jobs:
         run: |
           echo "Running test_pool.py"
           docker run --rm \
-            -e vpu_num_lanes="${{ inputs.vector_lane }}" \
-            -e vpu_spad_size_kb_per_lane="${{ inputs.spad_size }}" \
+            -e TOGSIM_CONFIG="${{ inputs.togsim_config }}" \
             ${{ inputs.image_name }} python3 PyTorchSim/tests/ops/conv/test_pool.py
 
   test_perceptron:
@@ -480,8 +457,7 @@ jobs:
         run: |
           echo "Running test_single_perceptron.py"
           docker run --rm \
-            -e vpu_num_lanes="${{ inputs.vector_lane }}" \
-            -e vpu_spad_size_kb_per_lane="${{ inputs.spad_size }}" \
+            -e TOGSIM_CONFIG="${{ inputs.togsim_config }}" \
             ${{ inputs.image_name }} python3 PyTorchSim/tests/models/test_single_perceptron.py
 
   test_fusion:
@@ -499,80 +475,70 @@ jobs:
         run: |
           echo "Running test_addmm_residual.py"
           docker run --rm \
-            -e vpu_num_lanes="${{ inputs.vector_lane }}" \
-            -e vpu_spad_size_kb_per_lane="${{ inputs.spad_size }}" \
+            -e TOGSIM_CONFIG="${{ inputs.togsim_config }}" \
             ${{ inputs.image_name }} python3 PyTorchSim/tests/ops/fusion/test_addmm_residual.py
 
       - name: Run test_matmul_activation.py
         run: |
           echo "Running test_matmul_activation.py"
           docker run --rm \
-            -e vpu_num_lanes="${{ inputs.vector_lane }}" \
-            -e vpu_spad_size_kb_per_lane="${{ inputs.spad_size }}" \
+            -e TOGSIM_CONFIG="${{ inputs.togsim_config }}" \
             ${{ inputs.image_name }} python3 PyTorchSim/tests/ops/fusion/test_matmul_activation.py
 
       - name: Run test_matmul_scalar.py
         run: |
           echo "Running test_matmul_scalar.py"
           docker run --rm \
-            -e vpu_num_lanes="${{ inputs.vector_lane }}" \
-            -e vpu_spad_size_kb_per_lane="${{ inputs.spad_size }}" \
+            -e TOGSIM_CONFIG="${{ inputs.togsim_config }}" \
             ${{ inputs.image_name }} python3 PyTorchSim/tests/ops/fusion/test_matmul_scalar.py
 
       - name: Run test_matmul_reduction.py
         run: |
           echo "Running test_matmul_reduction.py"
           docker run --rm \
-            -e vpu_num_lanes="${{ inputs.vector_lane }}" \
-            -e vpu_spad_size_kb_per_lane="${{ inputs.spad_size }}" \
+            -e TOGSIM_CONFIG="${{ inputs.togsim_config }}" \
             ${{ inputs.image_name }} python3 PyTorchSim/tests/ops/fusion/test_matmul_reduction.py
 
       - name: Run test_bmm_reduction.py
         run: |
           echo "Running test_bmm_reduction.py"
           docker run --rm \
-            -e vpu_num_lanes="${{ inputs.vector_lane }}" \
-            -e vpu_spad_size_kb_per_lane="${{ inputs.spad_size }}" \
+            -e TOGSIM_CONFIG="${{ inputs.togsim_config }}" \
             ${{ inputs.image_name }} python3 PyTorchSim/tests/ops/fusion/test_bmm_reduction.py
 
       - name: Run test_prologue_fusion.py
         run: |
           echo "Running test_prologue_fusion.py"
           docker run --rm \
-            -e vpu_num_lanes="${{ inputs.vector_lane }}" \
-            -e vpu_spad_size_kb_per_lane="${{ inputs.spad_size }}" \
+            -e TOGSIM_CONFIG="${{ inputs.togsim_config }}" \
             ${{ inputs.image_name }} python3 PyTorchSim/tests/ops/fusion/test_prologue_fusion.py
 
       - name: Run test_transformer_fusion.py
         run: |
           echo "Running test_transformer_fusion.py"
           docker run --rm \
-            -e vpu_num_lanes="${{ inputs.vector_lane }}" \
-            -e vpu_spad_size_kb_per_lane="${{ inputs.spad_size }}" \
+            -e TOGSIM_CONFIG="${{ inputs.togsim_config }}" \
             ${{ inputs.image_name }} python3 PyTorchSim/tests/ops/fusion/test_transformer_fusion.py
 
       - name: Run test_conv_fusion.py
         run: |
           echo "Running test_conv_fusion.py"
           docker run --rm \
-            -e vpu_num_lanes="${{ inputs.vector_lane }}" \
-            -e vpu_spad_size_kb_per_lane="${{ inputs.spad_size }}" \
+            -e TOGSIM_CONFIG="${{ inputs.togsim_config }}" \
             ${{ inputs.image_name }} python3 PyTorchSim/tests/ops/fusion/test_conv_fusion.py
 
       - name: Run test_attention_fusion.py
         run: |
           echo "Running test_attention_fusion.py"
           docker run --rm \
-            -e vpu_num_lanes="${{ inputs.vector_lane }}" \
-            -e vpu_spad_size_kb_per_lane="${{ inputs.spad_size }}" \
+            -e TOGSIM_CONFIG="${{ inputs.togsim_config }}" \
             ${{ inputs.image_name }} python3 PyTorchSim/tests/ops/fusion/test_attention_fusion.py
 
       - name: Run test_matmul_vector.py
         run: |
           echo "Running test_matmul_vector.py"
           docker run --rm \
-            -e vpu_num_lanes="${{ inputs.vector_lane }}" \
-            -e vpu_spad_size_kb_per_lane="${{ inputs.spad_size }}" \
+            -e TOGSIM_CONFIG="${{ inputs.togsim_config }}" \
             ${{ inputs.image_name }} python3 PyTorchSim/tests/ops/fusion/test_matmul_vector.py
 
   test_moe:
@@ -590,8 +556,7 @@ jobs:
         run: |
           echo "Running test_moe.py"
           docker run --rm \
-            -e vpu_num_lanes="${{ inputs.vector_lane }}" \
-            -e vpu_spad_size_kb_per_lane="${{ inputs.spad_size }}" \
+            -e TOGSIM_CONFIG="${{ inputs.togsim_config }}" \
             ${{ inputs.image_name }} python3 PyTorchSim/tests/models/MoE/test_moe.py
 
   test_mistral:
@@ -609,8 +574,7 @@ jobs:
         run: |
           echo "Running test_mistral.py"
           docker run --rm \
-            -e vpu_num_lanes="${{ inputs.vector_lane }}" \
-            -e vpu_spad_size_kb_per_lane="${{ inputs.spad_size }}" \
+            -e TOGSIM_CONFIG="${{ inputs.togsim_config }}" \
             ${{ inputs.image_name }} python3 PyTorchSim/tests/models/Mixtral8x7B/test_attention.py
 
   test_vit:
@@ -628,8 +592,7 @@ jobs:
         run: |
           echo "Running test_vit.py"
           docker run --rm \
-            -e vpu_num_lanes="${{ inputs.vector_lane }}" \
-            -e vpu_spad_size_kb_per_lane="${{ inputs.spad_size }}" \
+            -e TOGSIM_CONFIG="${{ inputs.togsim_config }}" \
             ${{ inputs.image_name }} python3 PyTorchSim/tests/models/test_vit.py
 
   test_diffusion:
@@ -649,8 +612,7 @@ jobs:
         run: |
           echo "Running test_diffusion.py"
           docker run --rm \
-            -e vpu_num_lanes="${{ inputs.vector_lane }}" \
-            -e vpu_spad_size_kb_per_lane="${{ inputs.spad_size }}" \
+            -e TOGSIM_CONFIG="${{ inputs.togsim_config }}" \
             ${{ inputs.image_name }} python3 PyTorchSim/tests/models/Diffusion/test_diffusion.py
 
   test_indirect:
@@ -668,8 +630,7 @@ jobs:
         run: |
           echo "Running test_indirect.py"
           docker run --rm \
-            -e vpu_num_lanes="${{ inputs.vector_lane }}" \
-            -e vpu_spad_size_kb_per_lane="${{ inputs.spad_size }}" \
+            -e TOGSIM_CONFIG="${{ inputs.togsim_config }}" \
             ${{ inputs.image_name }} python3 PyTorchSim/tests/ops/misc/test_indirect_access.py
 
   test_scheduler:
@@ -687,8 +648,7 @@ jobs:
         run: |
           echo "Running test_scheduler.py"
           docker run --rm \
-            -e vpu_num_lanes="${{ inputs.vector_lane }}" \
-            -e vpu_spad_size_kb_per_lane="${{ inputs.spad_size }}" \
+            -e TOGSIM_CONFIG="${{ inputs.togsim_config }}" \
             ${{ inputs.image_name }} python3 PyTorchSim/tests/system/test_scheduler.py
 
   test_llama:
@@ -706,8 +666,7 @@ jobs:
         run: |
           echo "Running test_llama.py"
           docker run --rm \
-            -e vpu_num_lanes="${{ inputs.vector_lane }}" \
-            -e vpu_spad_size_kb_per_lane="${{ inputs.spad_size }}" \
+            -e TOGSIM_CONFIG="${{ inputs.togsim_config }}" \
             ${{ inputs.image_name }} python3 PyTorchSim/tests/models/Llama/test_llama.py
 
   test_yolov5:
@@ -725,8 +684,7 @@ jobs:
         run: |
           echo "Running test_yolov5.py"
           docker run --rm \
-            -e vpu_num_lanes="${{ inputs.vector_lane }}" \
-            -e vpu_spad_size_kb_per_lane="${{ inputs.spad_size }}" \
+            -e TOGSIM_CONFIG="${{ inputs.togsim_config }}" \
             ${{ inputs.image_name }} python3 PyTorchSim/tests/models/Yolov5/test_yolov5.py
 
   test_deepseek:
@@ -746,8 +704,7 @@ jobs:
         run: |
           echo "Running test_deepseek_v3_base.py"
           docker run --rm \
-            -e vpu_num_lanes="${{ inputs.vector_lane }}" \
-            -e vpu_spad_size_kb_per_lane="${{ inputs.spad_size }}" \
+            -e TOGSIM_CONFIG="${{ inputs.togsim_config }}" \
             ${{ inputs.image_name }} python3 PyTorchSim/tests/models/DeepSeek/test_deepseek_v3_base.py
 
   test_eager:
@@ -765,8 +722,7 @@ jobs:
         run: |
           echo "Running test_eager.py"
           docker run --rm \
-            -e vpu_num_lanes="${{ inputs.vector_lane }}" \
-            -e vpu_spad_size_kb_per_lane="${{ inputs.spad_size }}" \
+            -e TOGSIM_CONFIG="${{ inputs.togsim_config }}" \
             ${{ inputs.image_name }} python3 PyTorchSim/tests/system/test_eager.py
 
   test_exponent:
@@ -784,8 +740,7 @@ jobs:
         run: |
           echo "Running test_exponent.py"
           docker run --rm \
-            -e vpu_num_lanes="${{ inputs.vector_lane }}" \
-            -e vpu_spad_size_kb_per_lane="${{ inputs.spad_size }}" \
+            -e TOGSIM_CONFIG="${{ inputs.togsim_config }}" \
             ${{ inputs.image_name }} python3 PyTorchSim/tests/ops/elementwise/test_exponent.py
 
   test_sort:
@@ -803,8 +758,7 @@ jobs:
         run: |
           echo "Running test_sort.py"
           docker run --rm \
-            -e vpu_num_lanes="${{ inputs.vector_lane }}" \
-            -e vpu_spad_size_kb_per_lane="${{ inputs.spad_size }}" \
+            -e TOGSIM_CONFIG="${{ inputs.togsim_config }}" \
             ${{ inputs.image_name }} python3 PyTorchSim/tests/ops/sort/test_sort.py
 
   test_accuracy:
@@ -812,7 +766,7 @@ jobs:
     # Accuracy + speedup runs many model simulations end to end; it is the most
     # time- and memory-intensive job, so keep it on a self-hosted runner.
     runs-on: self-hosted
-    if: inputs.vector_lane == 128
+    if: inputs.run_accuracy
     steps:
       - name: Log in to GitHub Container Registry
         uses: docker/login-action@v3
@@ -830,8 +784,7 @@ jobs:
           set -o pipefail
           mkdir -p "$ART_DIR"
           docker run --rm \
-            -e vpu_num_lanes="${{ inputs.vector_lane }}" \
-            -e vpu_spad_size_kb_per_lane="${{ inputs.spad_size }}" \
+            -e TOGSIM_CONFIG="${{ inputs.togsim_config }}" \
             -e SKIP_ILS=1 \
             -e SPEEDUP_ITERS=2 \
             -v "$ART_DIR:/artifacts" \
diff --git a/configs/systolic_ws_32x32_c1_simple_noc_tpuv3.yml b/configs/systolic_ws_32x32_c1_simple_noc_tpuv3.yml
new file mode 100644
index 00000000..7bcbd763
--- /dev/null
+++ b/configs/systolic_ws_32x32_c1_simple_noc_tpuv3.yml
@@ -0,0 +1,28 @@
+num_cores: 1
+core_freq_mhz: 940
+core_stats_print_period_cycles: 10000
+num_systolic_array_per_core: 2
+
+vpu_num_lanes: 32
+vpu_spad_size_kb_per_lane: 32
+vpu_vector_length_bits: 256
+
+dram_type: ramulator2
+dram_freq_mhz: 940
+dram_channels: 16
+dram_stats_print_period_cycles: 10000
+ramulator_config_path: ../configs/ramulator2_configs/HBM2_TPUv3.yaml
+
+icnt_type: simple
+icnt_latency_cycles: 10
+icnt_freq_mhz: 940
+icnt_injection_ports_per_core: 16
+
+pytorchsim_functional_mode: 1
+pytorchsim_timing_mode: 1
+
+codegen_mapping_strategy: heuristic
+codegen_external_mapping_file: ''
+codegen_autotune_max_retry: 10
+codegen_autotune_template_topk: 4
+codegen_compiler_optimization: all
diff --git a/configs/systolic_ws_8x8_c1_simple_noc_tpuv3.yml b/configs/systolic_ws_8x8_c1_simple_noc_tpuv3.yml
new file mode 100644
index 00000000..0353ef45
--- /dev/null
+++ b/configs/systolic_ws_8x8_c1_simple_noc_tpuv3.yml
@@ -0,0 +1,28 @@
+num_cores: 1
+core_freq_mhz: 940
+core_stats_print_period_cycles: 10000
+num_systolic_array_per_core: 2
+
+vpu_num_lanes: 8
+vpu_spad_size_kb_per_lane: 32
+vpu_vector_length_bits: 256
+
+dram_type: ramulator2
+dram_freq_mhz: 940
+dram_channels: 16
+dram_stats_print_period_cycles: 10000
+ramulator_config_path: ../configs/ramulator2_configs/HBM2_TPUv3.yaml
+
+icnt_type: simple
+icnt_latency_cycles: 10
+icnt_freq_mhz: 940
+icnt_injection_ports_per_core: 16
+
+pytorchsim_functional_mode: 1
+pytorchsim_timing_mode: 1
+
+codegen_mapping_strategy: heuristic
+codegen_external_mapping_file: ''
+codegen_autotune_max_retry: 10
+codegen_autotune_template_topk: 4
+codegen_compiler_optimization: all