Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion src/Coloring/Coloring.jl
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,7 @@ IndexedSet(n::Integer) = IndexedSet(zeros(Int, n), trues(n), 0)

function Base.push!(v::IndexedSet, i::Integer)
if v.empty[i] # new index
v.nzidx[v.nnz += 1] = i
v.nzidx[v.nnz+=1] = i
v.empty[i] = false
end
return
Expand Down
4 changes: 1 addition & 3 deletions src/mathoptinterface_api.jl
Original file line number Diff line number Diff line change
Expand Up @@ -270,9 +270,7 @@ function _read_residual!(F::AbstractVector, d::NLPEvaluator)
res = something(d.residual)
range = _storage_range(res.expr.sizes, 1)
@assert length(F) == length(range)
for (i, j) in enumerate(range)
F[i] = res.expr.forward_storage[j]
end
copyto!(view(F, eachindex(range)), view(res.expr.forward_storage, range))
return
end

Expand Down
1 change: 0 additions & 1 deletion test/Project.toml
Original file line number Diff line number Diff line change
@@ -1,6 +1,5 @@
[deps]
ArrayDiff = "c45fa1ca-6901-44ac-ae5b-5513a4852d50"
CUDA = "052768ef-5323-5732-b1bb-66c8b64840ba"
Calculus = "49dc2e85-a5d0-5ad3-a950-438e2897f1b9"
ChainRulesCore = "d360d2e6-b24c-11e9-a2a3-2a2ae2dbcce4"
ForwardDiff = "f6369f11-7733-5829-9624-2563aa707210"
Expand Down
71 changes: 71 additions & 0 deletions test/eval_residual_gpu.jl
Original file line number Diff line number Diff line change
@@ -0,0 +1,71 @@
module TestEvalResidualGPU

using Test

using JuMP
using ArrayDiff
import CUDA
import MathOptInterface as MOI

function runtests()
if !CUDA.functional()
@info "CUDA is not functional in this environment; skipping GPU tests."
return
end
for name in names(@__MODULE__; all = true)
if startswith("$(name)", "test_")
@testset "$(name)" begin
getfield(@__MODULE__, name)()
end
end
end
return
end

# Regression test for the branch's `_read_residual!` change in
# `src/mathoptinterface_api.jl`. The old element-wise loop
# for (i, j) in enumerate(range)
# F[i] = res.expr.forward_storage[j]
# end
# triggers a scalar-indexing error when either `F` or `forward_storage` lives
# on the GPU. The new implementation uses `copyto!` with views, which dispatches
# to `cudaMemcpy` for same-dtype CuArray ↔ CuArray (or CuArray ↔ contiguous
# CPU buffer) transfers.
function _residual_fn(W1, b1, W2, b2)
return x -> W2 * tanh.(W1 * x .+ b1) .+ b2
end

function test_eval_residual_gpu_matches_cpu()
# Small two-layer MLP residual: 3 → 4 → 2.
W1 = [0.4 -0.2 0.1; -0.3 0.5 0.2; 0.1 0.1 -0.4; 0.2 -0.1 0.3]
b1 = [0.05, -0.1, 0.1, 0.0]
W2 = [0.3 -0.4 0.2 0.1; -0.1 0.2 0.3 -0.5]
b2 = [0.0, 0.0]
f = _residual_fn(W1, b1, W2, b2)
input_dim = 3
output_dim = 2
x_cpu = [0.6, -0.3, 0.4]
expected = f(x_cpu)
# CPU evaluator as a reference.
cpu_eval = ArrayDiff.evaluator(f, input_dim)
F_cpu = zeros(Float64, output_dim)
ArrayDiff.eval_residual!(cpu_eval, F_cpu, x_cpu)
@test F_cpu ≈ expected
# GPU evaluator: forward_storage lives on the device. `_read_residual!`
# must copy `forward_storage::CuVector → F::CuVector` without scalar
# indexing.
gpu_eval = ArrayDiff.evaluator(
f,
input_dim;
mode = ArrayDiff.Mode{CUDA.CuVector{Float64}}(),
)
F_gpu = CUDA.zeros(Float64, output_dim)
x_gpu = CUDA.CuVector{Float64}(x_cpu)
ArrayDiff.eval_residual!(gpu_eval, F_gpu, x_gpu)
@test Array(F_gpu) ≈ expected
return
end

end

TestEvalResidualGPU.runtests()
3 changes: 2 additions & 1 deletion test/runtests.jl
Original file line number Diff line number Diff line change
Expand Up @@ -10,5 +10,6 @@ if VERSION >= v"1.11"
# Needs https://github.com/JuliaSmoothOptimizers/NLPModelsJuMP.jl/pull/229
include("NLPModelsJuMP.jl")
include("Optimisers.jl")
include("Optimisers_GPU.jl")
#include("Optimisers_GPU.jl")
end
#include("eval_residual_gpu.jl")
Loading