diff --git a/perf/Project.toml b/perf/Project.toml index 4aa5df2..daaa5d5 100644 --- a/perf/Project.toml +++ b/perf/Project.toml @@ -1,21 +1,34 @@ name = "ArrayDiffPerf" uuid = "00000000-0000-0000-0000-000000000001" -authors = ["Benoît Legat "] version = "0.0.0" +authors = ["Benoît Legat "] [deps] ArrayDiff = "c45fa1ca-6901-44ac-ae5b-5513a4852d50" BenchmarkTools = "6e4b80f9-dd63-53aa-95a3-0cdb28fa8baf" CUDA = "052768ef-5323-5732-b1bb-66c8b64840ba" CondaPkg = "992eb4ea-22a4-4c89-a5bb-47a3300528ab" +JSOSolvers = "10dff2fc-5484-5881-a0e0-c90441020f8a" JuMP = "4076af6c-e467-56ae-b986-b466b2749572" LinearAlgebra = "37e2e46d-f89d-539d-b4ee-838fcccc9c8e" Lux = "b2108857-7c20-44ae-9111-449ecde12c47" MathOptInterface = "b8f27783-ece8-5eb3-8dc8-9495eed66fee" Mooncake = "da2b9cff-9c12-43a0-ae48-6db2b0edb7d6" +NLPModels = "a4795742-8479-5a88-8948-cc11e1c8c1a6" +NLPModelsJuMP = "792afdf1-32c1-5681-94e0-d7bf7a5df49e" +NLopt = "76087f3c-5699-56af-9a33-bf431cd00edd" +Optimisers = "3bd65402-5787-11e9-1adc-39752487f4e2" Printf = "de0858da-6303-5e67-8744-51eddeeeb8d7" PythonCall = "6099a3de-0909-46bc-b1f4-468b9a2dfc0d" Random = "9a3f8284-a2c9-5f02-9a11-845980a1fd5c" +SolverCore = "ff4d7338-4cf1-434d-91df-b86cb86fb843" [sources] ArrayDiff = {path = ".."} + +[compat] +JSOSolvers = "0.14.8" +NLPModels = "0.21.12" +NLPModelsJuMP = "0.13.5" +Optimisers = "0.4.7" +SolverCore = "0.3.10" diff --git a/perf/neural.jl b/perf/neural.jl index f32aef8..3cc9ebf 100644 --- a/perf/neural.jl +++ b/perf/neural.jl @@ -1,39 +1,74 @@ -# Neural network optimization using ArrayDiff + NLopt -# +# Neural network optimization using ArrayDiff + NLopt # # This demonstrates end-to-end optimization of a simple two-layer neural # network with array-valued decision variables, array-aware AD, and a # first-order NLP solver. using JuMP using ArrayDiff -import NLopt +import Random -n = 2 -X = rand(n, n) -target = rand(n, n) +# Benchmark used for SIAM'OP 26 talk. +function bench(solver, ::Type{T} = Float64; h::Int = 4096, d::Int = 13, n::Int = 178, out_dim = 2, gpu::Bool = false) where {T<:Real} + Random.seed!(0) + X = randn(T, d, n) + Y = randn(T, out_dim, n) -model = direct_model(NLopt.Optimizer()) -set_attribute(model, "algorithm", :LD_LBFGS) + model = GenericModel{T}(solver) + @variable(model, W1[1:h, 1:d], + container = ArrayDiff.ArrayOfVariables) + @variable(model, W2[1:out_dim, 1:h], + container = ArrayDiff.ArrayOfVariables) + Y_hat = W2 * tanh.(W1 * X) + # We need `.-` and not `-` as a workaround for + # https://github.com/blegat/ArrayDiff.jl/issues/83 + loss = sum((Y_hat .- Y) .^ 2) + @objective(model, Min, loss) -@variable(model, W1[1:n, 1:n], container = ArrayDiff.ArrayOfVariables) -@variable(model, W2[1:n, 1:n], container = ArrayDiff.ArrayOfVariables) + for i in 1:n, j in 1:n + set_start_value(W1[i, j], 0.1 * randn()) + set_start_value(W2[i, j], 0.1 * randn()) + end + V = gpu ? CUDA.CuVector{T} : Vector{T} + set_attribute(model, + MOI.AutomaticDifferentiationBackend(), + ArrayDiff.Mode{V}()) + optimize!(model) -# Set non-zero starting values to avoid saddle point at zero -for i in 1:n, j in 1:n - set_start_value(W1[i, j], 0.1 * randn()) - set_start_value(W2[i, j], 0.1 * randn()) + display(solution_summary(model)) + if !is_solved_and_feasible(model) + @warn(solution_summary(model)) + end + return model + return solve_time(model) end -# Forward pass: Y = W2 * tanh.(W1 * X) -Y = W2 * tanh.(W1 * X) +import NLopt +nlopt = optimizer_with_attributes( + NLopt.Optimizer, + "algorithm" => :LD_LBFGS, + "ftol_rel" => 1e-14, + "ftol_abs" => 1e-14, + "xtol_rel" => 1e-14, + "maxeval" => 100_000, +) +m = bench(nlopt) + +import NLPModelsJuMP +include(joinpath(dirname(dirname(pathof(ArrayDiff))), "test", "OptimisersSolver.jl")) -# Loss: sum of squared errors -loss = sum((Y .- target) .^ 2) -@objective(model, Min, loss) +import JSOSolvers +lbfgs = optimizer_with_attributes( + NLPModelsJuMP.Optimizer, + "solver" => JSOSolvers.lbfgs, +) +bench(lbfgs) -optimize!(model) +adam = optimizer_with_attributes( + NLPModelsJuMP.Optimizer, + "tol" => 1e-4, + "solver" => OptimisersSolver, +) +bench(adam) -println("Termination status: ", termination_status(model)) -println("Objective value: ", objective_value(model)) -println("W1 = ", [value(W1[i, j]) for i in 1:n, j in 1:n]) -println("W2 = ", [value(W2[i, j]) for i in 1:n, j in 1:n]) +import CUDA +bench(adam, Float32, gpu = true)