Skip to content

Commit

Permalink
Merge branch 'main' into ap/loss_functions
Browse files Browse the repository at this point in the history
  • Loading branch information
avik-pal authored Jun 17, 2024
2 parents aa7497c + 012c232 commit 762fd7a
Show file tree
Hide file tree
Showing 14 changed files with 77 additions and 47 deletions.
4 changes: 2 additions & 2 deletions .buildkite/testing.yml
Original file line number Diff line number Diff line change
Expand Up @@ -42,7 +42,7 @@ steps:
cuda: "*"
env:
RETESTITEMS_NWORKERS: 2
if: build.message !~ /\[skip tests\]/ && build.message !~ /\[skip downstream\]/ && build.message !~ /\[skip ci\]/
if: build.message !~ /\[skip tests\]/ && build.message !~ /\[skip downstream\]/ && build.message !~ /\[skip ci\]/ && build.pull_request.labels includes "run downstream tests"
timeout_in_minutes: 240
matrix:
setup:
Expand Down Expand Up @@ -103,7 +103,7 @@ steps:
JULIA_AMDGPU_CORE_MUST_LOAD: "1"
JULIA_AMDGPU_HIP_MUST_LOAD: "1"
JULIA_AMDGPU_DISABLE_ARTIFACTS: "1"
if: build.message !~ /\[skip tests\]/ && build.message !~ /\[skip downstream\]/ && build.message !~ /\[skip ci\]/
if: build.message !~ /\[skip tests\]/ && build.message !~ /\[skip downstream\]/ && build.message !~ /\[skip ci\]/ && build.pull_request.labels includes "run downstream tests"
timeout_in_minutes: 60
matrix:
setup:
Expand Down
2 changes: 1 addition & 1 deletion .github/workflows/CI.yml
Original file line number Diff line number Diff line change
Expand Up @@ -72,7 +72,7 @@ jobs:

downstream:
name: Downstream ${{ matrix.package.repo }}/${{ matrix.package.group }}
if: ${{ !contains(github.event.head_commit.message, '[skip tests]') }} && github.base_ref == github.event.repository.default_branch
if: ${{ !contains(github.event.head_commit.message, '[skip tests]') }} && github.base_ref == github.event.repository.default_branch && contains(github.event.pull_request.labels.*.name, 'run downstream test')
runs-on: ${{ matrix.os }}
timeout-minutes: 60
env:
Expand Down
12 changes: 2 additions & 10 deletions Project.toml
Original file line number Diff line number Diff line change
Expand Up @@ -73,7 +73,7 @@ ArrayInterface = "7.9"
CUDA = "5.3.2"
ChainRules = "1.68"
ChainRulesCore = "1.23"
Compat = "4.10.0"
Compat = "4.12.0"
ComponentArrays = "0.15.11"
ConcreteStructs = "0.2.3"
ConstructionBase = "1.5"
Expand All @@ -90,7 +90,6 @@ GPUArraysCore = "0.1.6"
LinearAlgebra = "1.10"
Logging = "1.10"
LuxAMDGPU = "0.2.2"
LuxCUDA = "0.3.2"
LuxCore = "0.1.14"
LuxDeviceUtils = "0.1.19"
LuxLib = "0.3.23"
Expand Down Expand Up @@ -130,17 +129,10 @@ DynamicExpressions = "a40a106e-89c9-4ca8-8020-a735e8728b6b"
Enzyme = "7da242da-08ed-463a-9acd-ee780be4f1d9"
ExplicitImports = "7d51a73a-1435-4ff3-83d9-f097790105c7"
FiniteDifferences = "26cc04aa-876d-5657-8c51-4c34ba976000"
Flux = "587475ba-b771-5e3f-ad9e-33799f191a9c"
ForwardDiff = "f6369f11-7733-5829-9624-2563aa707210"
Logging = "56ddb016-857b-54e1-b83d-db4d58db5568"
LuxAMDGPU = "83120cb1-ca15-4f04-bf3b-6967d2e6b60b"
LuxCUDA = "d0bbae9a-e099-4d5b-a835-1c6931763bda"
LuxTestUtils = "ac9de150-d08f-4546-94fb-7472b5760531"
MLUtils = "f1d291b0-491e-4a28-83b9-f70985020b54"
MPI = "da04e1cc-30fd-572f-bb4f-1f8673147195"
Metalhead = "dbeba491-748d-5e0e-a39e-b530a07fa0cc"
NCCL = "3fe64909-d7a1-4096-9b7d-7a0f12cf0f6b"
OneHotArrays = "0b1bfda6-eb8a-41d2-88d8-f5af5cad476f"
Optimisers = "3bd65402-5787-11e9-1adc-39752487f4e2"
Pkg = "44cfe95a-1eb2-52ea-b672-e2afdf69b78f"
ReTestItems = "817f1d60-ba6b-4fd5-9520-3cf149f6a823"
Expand All @@ -153,4 +145,4 @@ Tracker = "9f7883ad-71c0-57eb-9f7f-b5c9e6d3789c"
Zygote = "e88e6eb3-aa80-5325-afca-941959d7151f"

[targets]
test = ["Aqua", "ComponentArrays", "Documenter", "DynamicExpressions", "Enzyme", "ExplicitImports", "FiniteDifferences", "Flux", "ForwardDiff", "Logging", "LuxAMDGPU", "LuxCUDA", "LuxTestUtils", "MLUtils", "MPI", "Metalhead", "NCCL", "OneHotArrays", "Optimisers", "Pkg", "ReTestItems", "ReverseDiff", "SimpleChains", "StableRNGs", "Statistics", "Test", "Tracker", "Zygote"]
test = ["Aqua", "ComponentArrays", "Documenter", "DynamicExpressions", "Enzyme", "ExplicitImports", "FiniteDifferences", "ForwardDiff", "Logging", "LuxTestUtils", "MLUtils", "Optimisers", "Pkg", "ReTestItems", "ReverseDiff", "SimpleChains", "StableRNGs", "Statistics", "Test", "Tracker", "Zygote"]
6 changes: 3 additions & 3 deletions ext/LuxMPIExt.jl
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@ module LuxMPIExt
using Lux: MPIBackend, NCCLBackend, DistributedUtils, __unwrap_val, MPI_CUDA_AWARE,
MPI_ROCM_AWARE
using LuxDeviceUtils: AbstractLuxDevice, LuxCUDADevice, LuxAMDGPUDevice, cpu_device,
set_device!, __is_functional
set_device!, functional
using MPI: MPI

function DistributedUtils.__initialize(
Expand All @@ -14,7 +14,7 @@ function DistributedUtils.__initialize(

local_rank = MPI.Comm_rank(MPI.COMM_WORLD)

if cuda_devices !== missing && __is_functional(LuxCUDADevice)
if cuda_devices !== missing && functional(LuxCUDADevice)
if cuda_devices === nothing
set_device!(LuxCUDADevice, nothing, local_rank + 1)
else
Expand All @@ -24,7 +24,7 @@ function DistributedUtils.__initialize(
error(lazy"CUDA devices are not functional (or `LuxCUDA.jl` not loaded) and `force_cuda` is set to `true`. This is caused by backend: $(caller).")
end

if amdgpu_devices !== missing && __is_functional(LuxAMDGPUDevice)
if amdgpu_devices !== missing && functional(LuxAMDGPUDevice)
if amdgpu_devices === nothing
set_device!(LuxAMDGPUDevice, nothing, local_rank + 1)
else
Expand Down
16 changes: 12 additions & 4 deletions src/layers/extension.jl
Original file line number Diff line number Diff line change
Expand Up @@ -67,11 +67,19 @@ julia> x = [1.0f0 2.0f0 3.0f0
1.0 2.0 3.0
4.0 5.0 6.0
julia> layer(x, ps, st)
(Float32[0.6967068 -0.4544041 -2.8266668; 1.5 -4.5 -12.5], (layer_1 = (layer_1 = NamedTuple(), layer_2 = NamedTuple()), layer_2 = NamedTuple()))
julia> layer(x, ps, st)[1] ≈ Float32[0.6967068 -0.4544041 -2.8266668; 1.5 -4.5 -12.5]
true
julia> Zygote.gradient(Base.Fix1(sum, abs2) ∘ first ∘ layer, x, ps, st)
(Float32[-14.0292 54.206482 180.32669; -0.9995737 10.7700815 55.6814], (layer_1 = (layer_1 = (params = Float32[-6.451908],), layer_2 = (params = Float32[-31.0, 90.0],)), layer_2 = nothing), nothing)
julia> ∂x, ∂ps, _ = Zygote.gradient(Base.Fix1(sum, abs2) ∘ first ∘ layer, x, ps, st);
julia> ∂x ≈ Float32[-14.0292 54.206482 180.32669; -0.9995737 10.7700815 55.6814]
true
julia> ∂ps.layer_1.layer_1.params ≈ Float32[-6.451908]
true
julia> ∂ps.layer_1.layer_2.params ≈ Float32[-31.0, 90.0]
true
```
"""
@kwdef @concrete struct DynamicExpressionsLayer <: AbstractExplicitLayer
Expand Down
8 changes: 7 additions & 1 deletion test/distributed/common_distributedtest.jl
Original file line number Diff line number Diff line change
@@ -1,7 +1,13 @@
using Lux, MPI, NCCL, Test
using LuxAMDGPU, LuxCUDA

const input_args = length(ARGS) == 2 ? ARGS : ("CPU", "mpi")
if input_args[1] == "CUDA"
using LuxCUDA
end
if input_args[1] == "AMDGPU"
using LuxAMDGPU
end

const backend_type = input_args[2] == "nccl" ? NCCLBackend : MPIBackend
const dev = input_args[1] == "CPU" ? LuxCPUDevice() :
(input_args[1] == "CUDA" ? LuxCUDADevice() : LuxAMDGPUDevice())
Expand Down
8 changes: 7 additions & 1 deletion test/distributed/data_distributedtest.jl
Original file line number Diff line number Diff line change
@@ -1,7 +1,13 @@
using Lux, MLUtils, MPI, NCCL, Random, Test
using LuxAMDGPU, LuxCUDA

const input_args = length(ARGS) == 2 ? ARGS : ("CPU", "mpi")
if input_args[1] == "CUDA"
using LuxCUDA
end
if input_args[1] == "AMDGPU"
using LuxAMDGPU
end

const backend_type = input_args[2] == "nccl" ? NCCLBackend : MPIBackend
const dev = input_args[1] == "CPU" ? LuxCPUDevice() :
(input_args[1] == "CUDA" ? LuxCUDADevice() : LuxAMDGPUDevice())
Expand Down
8 changes: 7 additions & 1 deletion test/distributed/optimizer_distributedtest.jl
Original file line number Diff line number Diff line change
@@ -1,7 +1,13 @@
using Lux, MPI, NCCL, Optimisers, Random, Test
using LuxAMDGPU, LuxCUDA

const input_args = length(ARGS) == 2 ? ARGS : ("CPU", "mpi")
if input_args[1] == "CUDA"
using LuxCUDA
end
if input_args[1] == "AMDGPU"
using LuxAMDGPU
end

const backend_type = input_args[2] == "nccl" ? NCCLBackend : MPIBackend
const dev = input_args[1] == "CPU" ? LuxCPUDevice() :
(input_args[1] == "CUDA" ? LuxCUDADevice() : LuxAMDGPUDevice())
Expand Down
8 changes: 7 additions & 1 deletion test/distributed/synchronize_distributedtest.jl
Original file line number Diff line number Diff line change
@@ -1,7 +1,13 @@
using ComponentArrays, Lux, MPI, NCCL, Optimisers, Random, Test
using LuxAMDGPU, LuxCUDA

const input_args = length(ARGS) == 2 ? ARGS : ("CPU", "mpi")
if input_args[1] == "CUDA"
using LuxCUDA
end
if input_args[1] == "AMDGPU"
using LuxAMDGPU
end

const backend_type = input_args[2] == "nccl" ? NCCLBackend : MPIBackend
const dev = input_args[1] == "CPU" ? LuxCPUDevice() :
(input_args[1] == "CUDA" ? LuxCUDADevice() : LuxAMDGPUDevice())
Expand Down
6 changes: 6 additions & 0 deletions test/qa_tests.jl
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,9 @@
end

@testitem "Explicit Imports: Quality Assurance" tags=[:others] begin
import Pkg
Pkg.add(["Flux", "LuxAMDGPU"])

# Load all trigger packages
import Lux, ComponentArrays, ReverseDiff, Flux, LuxAMDGPU, SimpleChains, Tracker,
Zygote, Enzyme
Expand All @@ -26,6 +29,9 @@ end
@testitem "doctests: Quality Assurance" tags=[:others] skip=:(!Sys.islinux()) begin
using Documenter

import Pkg
Pkg.add("Flux")

doctestexpr = quote
using SimpleChains: static
using Flux: Flux
Expand Down
7 changes: 4 additions & 3 deletions test/runtests.jl
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
using ReTestItems
using ReTestItems, Pkg, Test

const LUX_TEST_GROUP = lowercase(get(ENV, "LUX_TEST_GROUP", "all"))
@info "Running tests for group: $LUX_TEST_GROUP"
Expand All @@ -11,9 +11,10 @@ else
end

# Distributed Tests
using MPI, Pkg, Test

if LUX_TEST_GROUP == "all" || LUX_TEST_GROUP == "distributed"
Pkg.add(["MPI", "NCCL"])
using MPI

nprocs_str = get(ENV, "JULIA_MPI_TEST_NPROCS", "")
nprocs = nprocs_str == "" ? clamp(Sys.CPU_THREADS, 2, 4) : parse(Int, nprocs_str)
testdir = @__DIR__
Expand Down
24 changes: 15 additions & 9 deletions test/setup_modes.jl
Original file line number Diff line number Diff line change
@@ -1,9 +1,19 @@
using Lux, LuxCUDA, LuxAMDGPU
using Lux, GPUArraysCore, Pkg

CUDA.allowscalar(false)
GPUArraysCore.allowscalar(false)

const BACKEND_GROUP = get(ENV, "BACKEND_GROUP", "All")

if BACKEND_GROUP == "All" || BACKEND_GROUP == "CUDA"
Pkg.add("LuxCUDA")
using LuxCUDA
end

if BACKEND_GROUP == "All" || BACKEND_GROUP == "AMDGPU"
Pkg.add("LuxAMDGPU")
using LuxAMDGPU
end

cpu_testing() = BACKEND_GROUP == "All" || BACKEND_GROUP == "CPU"
cuda_testing() = (BACKEND_GROUP == "All" || BACKEND_GROUP == "CUDA") && LuxCUDA.functional()
function amdgpu_testing()
Expand All @@ -12,14 +22,10 @@ end

const MODES = begin
# Mode, Array Type, Device Function, GPU?
cpu_mode = ("CPU", Array, LuxCPUDevice(), false)
cuda_mode = ("CUDA", CuArray, LuxCUDADevice(), true)
amdgpu_mode = ("AMDGPU", ROCArray, LuxAMDGPUDevice(), true)

modes = []
cpu_testing() && push!(modes, cpu_mode)
cuda_testing() && push!(modes, cuda_mode)
amdgpu_testing() && push!(modes, amdgpu_mode)
cpu_testing() && push!(modes, ("CPU", Array, LuxCPUDevice(), false))
cuda_testing() && push!(modes, ("CUDA", CuArray, LuxCUDADevice(), true))
amdgpu_testing() && push!(modes, ("AMDGPU", ROCArray, LuxAMDGPUDevice(), true))

modes
end
12 changes: 1 addition & 11 deletions test/shared_testsetup.jl
Original file line number Diff line number Diff line change
Expand Up @@ -20,18 +20,8 @@ get_stable_rng(seed=12345) = StableRNG(seed)

__display(args...) = (println(); display(args...))

# AMDGPU Specifics
function _rocRAND_functional()
try
get_default_rng("AMDGPU")
return true
catch
return false
end
end

export @jet, @test_gradients, check_approx
export BACKEND_GROUP, MODES, cpu_testing, cuda_testing, amdgpu_testing, get_default_rng,
get_stable_rng, __display, _rocRAND_functional
get_stable_rng, __display

end
3 changes: 3 additions & 0 deletions test/transform/flux_tests.jl
Original file line number Diff line number Diff line change
@@ -1,4 +1,7 @@
@testitem "FromFluxAdaptor" setup=[SharedTestSetup] tags=[:others] begin
import Pkg
Pkg.add("Flux")

import Flux

from_flux = fdevice(::Lux.LuxCPUDevice) = Flux.cpu
Expand Down

0 comments on commit 762fd7a

Please sign in to comment.