diff --git a/.buildkite/benchmarks.yml b/.buildkite/benchmarks.yml index 0ca52de2..487dba56 100644 --- a/.buildkite/benchmarks.yml +++ b/.buildkite/benchmarks.yml @@ -15,6 +15,7 @@ steps: command: | julia --project=benchmarks -e 'println("--- :julia: Instantiating project") using Pkg + Pkg.instantiate() Pkg.develop([PackageSpec(path=pwd())])' julia --project=benchmarks -e 'println("--- :julia: Run Benchmarks") @@ -38,10 +39,12 @@ steps: command: | julia --project=benchmarks -e 'println("--- :julia: Instantiating project") using Pkg + Pkg.instantiate() Pkg.develop([PackageSpec(path=pwd())])' julia --project=benchmarks -e 'println("--- :julia: Add AMDGPU to benchmarks environment") using Pkg + Pkg.instantiate() Pkg.add("AMDGPU")' julia --project=benchmarks -e 'println("--- :julia: Run Benchmarks") @@ -62,10 +65,12 @@ steps: command: | julia --project=benchmarks -e 'println("--- :julia: Instantiating project") using Pkg + Pkg.instantiate() Pkg.develop([PackageSpec(path=pwd())])' julia --project=benchmarks -e 'println("--- :julia: Add CUDA to benchmarks environment") using Pkg + Pkg.instantiate() Pkg.add("LuxCUDA")' julia --project=benchmarks -e 'println("--- :julia: Run Benchmarks") @@ -88,10 +93,12 @@ steps: command: | julia --project=benchmarks -e 'println("--- :julia: Instantiating project") using Pkg + Pkg.instantiate() Pkg.develop([PackageSpec(path=pwd())])' julia --project=benchmarks -e 'println("--- :julia: Add Metal to benchmarks environment") using Pkg + Pkg.instantiate() Pkg.add("Metal")' julia --project=benchmarks -e 'println("--- :julia: Run Benchmarks") @@ -114,10 +121,12 @@ steps: command: | julia --project=benchmarks -e 'println("--- :julia: Instantiating project") using Pkg + Pkg.instantiate() Pkg.develop([PackageSpec(path=pwd())])' julia --project=benchmarks -e 'println("--- :julia: Add oneAPI to benchmarks environment") using Pkg + Pkg.instantiate() Pkg.add("oneAPI")' julia --project=benchmarks -e 'println("--- :julia: Run Benchmarks") diff --git a/Manifest.toml b/Manifest.toml new file mode 100644 index 00000000..bc333e0a --- /dev/null +++ b/Manifest.toml @@ -0,0 +1,732 @@ +# This file is machine-generated - editing it directly is not advised + +julia_version = "1.10.4" +manifest_format = "2.0" +project_hash = "66a375ed9a663e61aa18273bb404cb5b24ccab73" + +[[deps.Adapt]] +deps = ["LinearAlgebra", "Requires"] +git-tree-sha1 = "6a55b747d1812e699320963ffde36f1ebdda4099" +uuid = "79e6a3ab-5dfb-504d-930d-738a2a938a0e" +version = "4.0.4" +weakdeps = ["StaticArrays"] + + [deps.Adapt.extensions] + AdaptStaticArraysExt = "StaticArrays" + +[[deps.ArgTools]] +uuid = "0dad84c5-d112-42e6-8d28-ef12dabb789f" +version = "1.1.1" + +[[deps.ArrayInterface]] +deps = ["Adapt", "LinearAlgebra"] +git-tree-sha1 = "f54c23a5d304fb87110de62bace7777d59088c34" +uuid = "4fba245c-0d91-5ea0-9b3e-6abc04ee57a9" +version = "7.15.0" + + [deps.ArrayInterface.extensions] + ArrayInterfaceBandedMatricesExt = "BandedMatrices" + ArrayInterfaceBlockBandedMatricesExt = "BlockBandedMatrices" + ArrayInterfaceCUDAExt = "CUDA" + ArrayInterfaceCUDSSExt = "CUDSS" + ArrayInterfaceChainRulesExt = "ChainRules" + ArrayInterfaceGPUArraysCoreExt = "GPUArraysCore" + ArrayInterfaceReverseDiffExt = "ReverseDiff" + ArrayInterfaceSparseArraysExt = "SparseArrays" + ArrayInterfaceStaticArraysCoreExt = "StaticArraysCore" + ArrayInterfaceTrackerExt = "Tracker" + + [deps.ArrayInterface.weakdeps] + BandedMatrices = "aae01518-5342-5314-be14-df237901396f" + BlockBandedMatrices = "ffab5731-97b5-5995-9138-79e8c1846df0" + CUDA = "052768ef-5323-5732-b1bb-66c8b64840ba" + CUDSS = "45b445bb-4962-46a0-9369-b4df9d0f772e" + ChainRules = "082447d4-558c-5d27-93f4-14fc19e9eca2" + GPUArraysCore = "46192b85-c4d5-4398-a991-12ede77f4527" + ReverseDiff = "37e2e3b7-166d-5795-8a7a-e32c996b4267" + SparseArrays = "2f01184e-e22b-5df5-ae63-d93ebab69eaf" + StaticArraysCore = "1e83bf80-4336-4d27-bf5d-d5a4f845583c" + Tracker = "9f7883ad-71c0-57eb-9f7f-b5c9e6d3789c" + +[[deps.Artifacts]] +uuid = "56f22d72-fd6d-98f1-02f0-08ddc0907c33" + +[[deps.Atomix]] +deps = ["UnsafeAtomics"] +git-tree-sha1 = "c06a868224ecba914baa6942988e2f2aade419be" +uuid = "a9b6321e-bd34-4604-b9c9-b65b8de01458" +version = "0.1.0" + +[[deps.Base64]] +uuid = "2a0f44e3-6c83-55bd-87e4-b1978d98bd5f" + +[[deps.BitTwiddlingConvenienceFunctions]] +deps = ["Static"] +git-tree-sha1 = "f21cfd4950cb9f0587d5067e69405ad2acd27b87" +uuid = "62783981-4cbd-42fc-bca8-16325de8dc4b" +version = "0.1.6" + +[[deps.CEnum]] +git-tree-sha1 = "389ad5c84de1ae7cf0e28e381131c98ea87d54fc" +uuid = "fa961155-64e5-5f13-b03f-caf6b980ea82" +version = "0.5.0" + +[[deps.CPUSummary]] +deps = ["CpuId", "IfElse", "PrecompileTools", "Static"] +git-tree-sha1 = "5a97e67919535d6841172016c9530fd69494e5ec" +uuid = "2a0fbf3d-bb9c-48f3-b0a9-814d99fd7ab9" +version = "0.2.6" + +[[deps.ChainRulesCore]] +deps = ["Compat", "LinearAlgebra"] +git-tree-sha1 = "71acdbf594aab5bbb2cec89b208c41b4c411e49f" +uuid = "d360d2e6-b24c-11e9-a2a3-2a2ae2dbcce4" +version = "1.24.0" +weakdeps = ["SparseArrays"] + + [deps.ChainRulesCore.extensions] + ChainRulesCoreSparseArraysExt = "SparseArrays" + +[[deps.CloseOpenIntervals]] +deps = ["Static", "StaticArrayInterface"] +git-tree-sha1 = "05ba0d07cd4fd8b7a39541e31a7b0254704ea581" +uuid = "fb6a15b2-703c-40df-9091-08a04967cfa9" +version = "0.1.13" + +[[deps.CommonSubexpressions]] +deps = ["MacroTools"] +git-tree-sha1 = "cda2cfaebb4be89c9084adaca7dd7333369715c5" +uuid = "bbf7d656-a473-5ed7-a52c-81e309532950" +version = "0.3.1" + +[[deps.CommonWorldInvalidations]] +git-tree-sha1 = "ae52d1c52048455e85a387fbee9be553ec2b68d0" +uuid = "f70d9fcc-98c5-4d4a-abd7-e4cdeebd8ca8" +version = "1.0.0" + +[[deps.Compat]] +deps = ["TOML", "UUIDs"] +git-tree-sha1 = "8ae8d32e09f0dcf42a36b90d4e17f5dd2e4c4215" +uuid = "34da2185-b29b-5c13-b0c7-acf172513d20" +version = "4.16.0" +weakdeps = ["Dates", "LinearAlgebra"] + + [deps.Compat.extensions] + CompatLinearAlgebraExt = "LinearAlgebra" + +[[deps.CompilerSupportLibraries_jll]] +deps = ["Artifacts", "Libdl"] +uuid = "e66e0078-7015-5450-92f7-15fbd957f2ae" +version = "1.1.1+0" + +[[deps.ConstructionBase]] +deps = ["LinearAlgebra"] +git-tree-sha1 = "a33b7ced222c6165f624a3f2b55945fac5a598d9" +uuid = "187b0558-2788-49d3-abe0-74a17ed4e7c9" +version = "1.5.7" + + [deps.ConstructionBase.extensions] + ConstructionBaseIntervalSetsExt = "IntervalSets" + ConstructionBaseStaticArraysExt = "StaticArrays" + + [deps.ConstructionBase.weakdeps] + IntervalSets = "8197267c-284f-5f27-9208-e0e47529a953" + StaticArrays = "90137ffa-7385-5640-81b9-e52037218182" + +[[deps.CpuId]] +deps = ["Markdown"] +git-tree-sha1 = "fcbb72b032692610bfbdb15018ac16a36cf2e406" +uuid = "adafc99b-e345-5852-983c-f28acb93d879" +version = "0.3.1" + +[[deps.Dates]] +deps = ["Printf"] +uuid = "ade2ca70-3891-5945-98fb-dc099432e06a" + +[[deps.DiffResults]] +deps = ["StaticArraysCore"] +git-tree-sha1 = "782dd5f4561f5d267313f23853baaaa4c52ea621" +uuid = "163ba53b-c6d8-5494-b064-1a9d43ac40c5" +version = "1.1.0" + +[[deps.DiffRules]] +deps = ["IrrationalConstants", "LogExpFunctions", "NaNMath", "Random", "SpecialFunctions"] +git-tree-sha1 = "23163d55f885173722d1e4cf0f6110cdbaf7e272" +uuid = "b552c78f-8df3-52c6-915a-8e097449b14b" +version = "1.15.1" + +[[deps.DispatchDoctor]] +deps = ["MacroTools", "Preferences"] +git-tree-sha1 = "c2acd1de2c4c357928f9fb6b60b402d914621378" +uuid = "8d63f2c5-f18a-4cf2-ba9d-b3f60fc568c8" +version = "0.4.14" +weakdeps = ["ChainRulesCore", "EnzymeCore"] + + [deps.DispatchDoctor.extensions] + DispatchDoctorChainRulesCoreExt = "ChainRulesCore" + DispatchDoctorEnzymeCoreExt = "EnzymeCore" + +[[deps.DocStringExtensions]] +deps = ["LibGit2"] +git-tree-sha1 = "2fb1e02f2b635d0845df5d7c167fec4dd739b00d" +uuid = "ffbed154-4ef7-542d-bbb7-c09d3a79fcae" +version = "0.9.3" + +[[deps.Downloads]] +deps = ["ArgTools", "FileWatching", "LibCURL", "NetworkOptions"] +uuid = "f43a241f-c20a-4ad4-852c-f6b1247861c6" +version = "1.6.0" + +[[deps.EnzymeCore]] +git-tree-sha1 = "8f205a601760f4798a10f138c3940f0451d95188" +uuid = "f151be2c-9106-41f4-ab19-57ee4f262869" +version = "0.7.8" +weakdeps = ["Adapt"] + + [deps.EnzymeCore.extensions] + AdaptExt = "Adapt" + +[[deps.FastClosures]] +git-tree-sha1 = "acebe244d53ee1b461970f8910c235b259e772ef" +uuid = "9aa1b823-49e4-5ca5-8b0f-3971ec8bab6a" +version = "0.3.2" + +[[deps.FileWatching]] +uuid = "7b1f6079-737a-58dc-b8bc-7a2ca5c1b5ee" + +[[deps.ForwardDiff]] +deps = ["CommonSubexpressions", "DiffResults", "DiffRules", "LinearAlgebra", "LogExpFunctions", "NaNMath", "Preferences", "Printf", "Random", "SpecialFunctions"] +git-tree-sha1 = "cf0fe81336da9fb90944683b8c41984b08793dad" +uuid = "f6369f11-7733-5829-9624-2563aa707210" +version = "0.10.36" +weakdeps = ["StaticArrays"] + + [deps.ForwardDiff.extensions] + ForwardDiffStaticArraysExt = "StaticArrays" + +[[deps.Functors]] +deps = ["LinearAlgebra"] +git-tree-sha1 = "64d8e93700c7a3f28f717d265382d52fac9fa1c1" +uuid = "d9f16b24-f501-4c13-a1f2-28368ffc5196" +version = "0.4.12" + +[[deps.Future]] +deps = ["Random"] +uuid = "9fa8497b-333b-5362-9e8d-4d0656e87820" + +[[deps.GPUArraysCore]] +deps = ["Adapt"] +git-tree-sha1 = "ec632f177c0d990e64d955ccc1b8c04c485a0950" +uuid = "46192b85-c4d5-4398-a991-12ede77f4527" +version = "0.1.6" + +[[deps.HostCPUFeatures]] +deps = ["BitTwiddlingConvenienceFunctions", "IfElse", "Libdl", "Static"] +git-tree-sha1 = "8e070b599339d622e9a081d17230d74a5c473293" +uuid = "3e5b6fbb-0976-4d2c-9146-d79de83f2fb0" +version = "0.1.17" + +[[deps.Hwloc]] +deps = ["CEnum", "Hwloc_jll", "Printf"] +git-tree-sha1 = "6a3d80f31ff87bc94ab22a7b8ec2f263f9a6a583" +uuid = "0e44f5e4-bd66-52a0-8798-143a42290a1d" +version = "3.3.0" + + [deps.Hwloc.extensions] + HwlocTrees = "AbstractTrees" + + [deps.Hwloc.weakdeps] + AbstractTrees = "1520ce14-60c1-5f80-bbc7-55ef81b5835c" + +[[deps.Hwloc_jll]] +deps = ["Artifacts", "JLLWrappers", "Libdl"] +git-tree-sha1 = "5e19e1e4fa3e71b774ce746274364aef0234634e" +uuid = "e33a78d0-f292-5ffc-b300-72abe9b543c8" +version = "2.11.1+0" + +[[deps.IfElse]] +git-tree-sha1 = "debdd00ffef04665ccbb3e150747a77560e8fad1" +uuid = "615f187c-cbe4-4ef1-ba3b-2fcf58d6d173" +version = "0.1.1" + +[[deps.InteractiveUtils]] +deps = ["Markdown"] +uuid = "b77e0a4c-d291-57a0-90e8-8db25a27a240" + +[[deps.IrrationalConstants]] +git-tree-sha1 = "630b497eafcc20001bba38a4651b327dcfc491d2" +uuid = "92d709cd-6900-40b7-9082-c6be49f344b6" +version = "0.2.2" + +[[deps.JLLWrappers]] +deps = ["Artifacts", "Preferences"] +git-tree-sha1 = "7e5d6779a1e09a36db2a7b6cff50942a0a7d0fca" +uuid = "692b3bcd-3c85-4b1f-b108-f13ce0eb3210" +version = "1.5.0" + +[[deps.KernelAbstractions]] +deps = ["Adapt", "Atomix", "InteractiveUtils", "LinearAlgebra", "MacroTools", "PrecompileTools", "Requires", "SparseArrays", "StaticArrays", "UUIDs", "UnsafeAtomics", "UnsafeAtomicsLLVM"] +git-tree-sha1 = "2d8be6e915f96ecdf33b134795f4e82bbbd01326" +repo-rev = "vc/simd_loop" +repo-url = "https://github.com/JuliaGPU/KernelAbstractions.jl.git" +uuid = "63c18a36-062a-441e-b654-da1e3ab1ce7c" +version = "0.9.22" +weakdeps = ["EnzymeCore"] + + [deps.KernelAbstractions.extensions] + EnzymeExt = "EnzymeCore" + +[[deps.LLVM]] +deps = ["CEnum", "LLVMExtra_jll", "Libdl", "Preferences", "Printf", "Requires", "Unicode"] +git-tree-sha1 = "2470e69781ddd70b8878491233cd09bc1bd7fc96" +uuid = "929cbde3-209d-540e-8aea-75f648917ca0" +version = "8.1.0" + + [deps.LLVM.extensions] + BFloat16sExt = "BFloat16s" + + [deps.LLVM.weakdeps] + BFloat16s = "ab4f0b2a-ad5b-11e8-123f-65d77653426b" + +[[deps.LLVMExtra_jll]] +deps = ["Artifacts", "JLLWrappers", "LazyArtifacts", "Libdl", "TOML"] +git-tree-sha1 = "597d1c758c9ae5d985ba4202386a607c675ee700" +uuid = "dad2f222-ce93-54a1-a47d-0025e8a3acab" +version = "0.0.31+0" + +[[deps.LayoutPointers]] +deps = ["ArrayInterface", "LinearAlgebra", "ManualMemory", "SIMDTypes", "Static", "StaticArrayInterface"] +git-tree-sha1 = "a9eaadb366f5493a5654e843864c13d8b107548c" +uuid = "10f19ff3-798f-405d-979b-55457f8fc047" +version = "0.1.17" + +[[deps.LazyArtifacts]] +deps = ["Artifacts", "Pkg"] +uuid = "4af54fe1-eca0-43a8-85a7-787d91b784e3" + +[[deps.LibCURL]] +deps = ["LibCURL_jll", "MozillaCACerts_jll"] +uuid = "b27032c2-a3e7-50c8-80cd-2d36dbcbfd21" +version = "0.6.4" + +[[deps.LibCURL_jll]] +deps = ["Artifacts", "LibSSH2_jll", "Libdl", "MbedTLS_jll", "Zlib_jll", "nghttp2_jll"] +uuid = "deac9b47-8bc7-5906-a0fe-35ac56dc84c0" +version = "8.4.0+0" + +[[deps.LibGit2]] +deps = ["Base64", "LibGit2_jll", "NetworkOptions", "Printf", "SHA"] +uuid = "76f85450-5226-5b5a-8eaa-529ad045b433" + +[[deps.LibGit2_jll]] +deps = ["Artifacts", "LibSSH2_jll", "Libdl", "MbedTLS_jll"] +uuid = "e37daf67-58a4-590a-8e99-b0245dd2ffc5" +version = "1.6.4+0" + +[[deps.LibSSH2_jll]] +deps = ["Artifacts", "Libdl", "MbedTLS_jll"] +uuid = "29816b5a-b9ab-546f-933c-edad1886dfa8" +version = "1.11.0+1" + +[[deps.Libdl]] +uuid = "8f399da3-3557-5675-b5ff-fb832c97cbdb" + +[[deps.LinearAlgebra]] +deps = ["Libdl", "OpenBLAS_jll", "libblastrampoline_jll"] +uuid = "37e2e46d-f89d-539d-b4ee-838fcccc9c8e" + +[[deps.LogExpFunctions]] +deps = ["DocStringExtensions", "IrrationalConstants", "LinearAlgebra"] +git-tree-sha1 = "a2d09619db4e765091ee5c6ffe8872849de0feea" +uuid = "2ab3a3ac-af41-5b50-aa03-7779005ae688" +version = "0.3.28" + + [deps.LogExpFunctions.extensions] + LogExpFunctionsChainRulesCoreExt = "ChainRulesCore" + LogExpFunctionsChangesOfVariablesExt = "ChangesOfVariables" + LogExpFunctionsInverseFunctionsExt = "InverseFunctions" + + [deps.LogExpFunctions.weakdeps] + ChainRulesCore = "d360d2e6-b24c-11e9-a2a3-2a2ae2dbcce4" + ChangesOfVariables = "9e997f8a-9a97-42d5-a9f1-ce6bfc15e2c0" + InverseFunctions = "3587e190-3f89-42d0-90ee-14403ec27112" + +[[deps.Logging]] +uuid = "56ddb016-857b-54e1-b83d-db4d58db5568" + +[[deps.LoopVectorization]] +deps = ["ArrayInterface", "CPUSummary", "CloseOpenIntervals", "DocStringExtensions", "HostCPUFeatures", "IfElse", "LayoutPointers", "LinearAlgebra", "OffsetArrays", "PolyesterWeave", "PrecompileTools", "SIMDTypes", "SLEEFPirates", "Static", "StaticArrayInterface", "ThreadingUtilities", "UnPack", "VectorizationBase"] +git-tree-sha1 = "8084c25a250e00ae427a379a5b607e7aed96a2dd" +uuid = "bdcacae8-1622-11e9-2a5c-532679323890" +version = "0.12.171" +weakdeps = ["ChainRulesCore", "ForwardDiff", "SpecialFunctions"] + + [deps.LoopVectorization.extensions] + ForwardDiffExt = ["ChainRulesCore", "ForwardDiff"] + SpecialFunctionsExt = "SpecialFunctions" + +[[deps.LuxCore]] +deps = ["Compat", "DispatchDoctor", "Functors", "Random", "Setfield"] +git-tree-sha1 = "43aa0d448e99c16d452bfbac1f27acd2cbcf4ef1" +uuid = "bb33d45b-7691-41d6-9220-0943567d0623" +version = "0.1.25" + + [deps.LuxCore.extensions] + LuxCoreArrayInterfaceReverseDiffExt = ["ArrayInterface", "ReverseDiff"] + LuxCoreArrayInterfaceTrackerExt = ["ArrayInterface", "Tracker"] + LuxCoreChainRulesCoreExt = "ChainRulesCore" + LuxCoreEnzymeCoreExt = "EnzymeCore" + LuxCoreMLDataDevicesExt = "MLDataDevices" + + [deps.LuxCore.weakdeps] + ArrayInterface = "4fba245c-0d91-5ea0-9b3e-6abc04ee57a9" + ChainRulesCore = "d360d2e6-b24c-11e9-a2a3-2a2ae2dbcce4" + EnzymeCore = "f151be2c-9106-41f4-ab19-57ee4f262869" + MLDataDevices = "7e8f7934-dd98-4c1a-8fe8-92b47a384d40" + ReverseDiff = "37e2e3b7-166d-5795-8a7a-e32c996b4267" + Tracker = "9f7883ad-71c0-57eb-9f7f-b5c9e6d3789c" + +[[deps.MLDataDevices]] +deps = ["Adapt", "ChainRulesCore", "Functors", "Preferences", "Random", "UnrolledUtilities"] +git-tree-sha1 = "d1c1dcb3499f0c7f7f4167db5f7f68bfba6ed939" +uuid = "7e8f7934-dd98-4c1a-8fe8-92b47a384d40" +version = "1.0.3" + + [deps.MLDataDevices.extensions] + MLDataDevicesAMDGPUExt = "AMDGPU" + MLDataDevicesCUDAExt = "CUDA" + MLDataDevicesFillArraysExt = "FillArrays" + MLDataDevicesGPUArraysExt = "GPUArrays" + MLDataDevicesMetalExt = ["GPUArrays", "Metal"] + MLDataDevicesRecursiveArrayToolsExt = "RecursiveArrayTools" + MLDataDevicesReverseDiffExt = "ReverseDiff" + MLDataDevicesSparseArraysExt = "SparseArrays" + MLDataDevicesTrackerExt = "Tracker" + MLDataDevicesZygoteExt = "Zygote" + MLDataDevicescuDNNExt = ["CUDA", "cuDNN"] + MLDataDevicesoneAPIExt = ["GPUArrays", "oneAPI"] + + [deps.MLDataDevices.weakdeps] + AMDGPU = "21141c5a-9bdb-4563-92ae-f87d6854732e" + CUDA = "052768ef-5323-5732-b1bb-66c8b64840ba" + FillArrays = "1a297f60-69ca-5386-bcde-b61e274b549b" + GPUArrays = "0c68f7d7-f131-5f86-a1c3-88cf8149b2d7" + Metal = "dde4c033-4e86-420c-a63e-0dd931031962" + RecursiveArrayTools = "731186ca-8d62-57ce-b412-fbd966d074cd" + ReverseDiff = "37e2e3b7-166d-5795-8a7a-e32c996b4267" + SparseArrays = "2f01184e-e22b-5df5-ae63-d93ebab69eaf" + Tracker = "9f7883ad-71c0-57eb-9f7f-b5c9e6d3789c" + Zygote = "e88e6eb3-aa80-5325-afca-941959d7151f" + cuDNN = "02a925ec-e4fe-4b08-9a7e-0d78e3d38ccd" + oneAPI = "8f75cd03-7ff8-4ecb-9b8f-daf728133b1b" + +[[deps.MacroTools]] +deps = ["Markdown", "Random"] +git-tree-sha1 = "2fa9ee3e63fd3a4f7a9a4f4744a52f4856de82df" +uuid = "1914dd2f-81c6-5fcd-8719-6d5c9610ff09" +version = "0.5.13" + +[[deps.ManualMemory]] +git-tree-sha1 = "bcaef4fc7a0cfe2cba636d84cda54b5e4e4ca3cd" +uuid = "d125e4d3-2237-4719-b19c-fa641b8a4667" +version = "0.1.8" + +[[deps.Markdown]] +deps = ["Base64"] +uuid = "d6f4376e-aef5-505a-96c1-9c027394607a" + +[[deps.MbedTLS_jll]] +deps = ["Artifacts", "Libdl"] +uuid = "c8ffd9c3-330d-5841-b78e-0817d7145fa1" +version = "2.28.2+1" + +[[deps.MozillaCACerts_jll]] +uuid = "14a3606d-f60d-562e-9121-12d972cd8159" +version = "2023.1.10" + +[[deps.NNlib]] +deps = ["Adapt", "Atomix", "ChainRulesCore", "GPUArraysCore", "KernelAbstractions", "LinearAlgebra", "Pkg", "Random", "Requires", "Statistics"] +git-tree-sha1 = "ae52c156a63bb647f80c26319b104e99e5977e51" +uuid = "872c559c-99b0-510c-b3b7-b6c96a88d5cd" +version = "0.9.22" + + [deps.NNlib.extensions] + NNlibAMDGPUExt = "AMDGPU" + NNlibCUDACUDNNExt = ["CUDA", "cuDNN"] + NNlibCUDAExt = "CUDA" + NNlibEnzymeCoreExt = "EnzymeCore" + NNlibFFTWExt = "FFTW" + + [deps.NNlib.weakdeps] + AMDGPU = "21141c5a-9bdb-4563-92ae-f87d6854732e" + CUDA = "052768ef-5323-5732-b1bb-66c8b64840ba" + EnzymeCore = "f151be2c-9106-41f4-ab19-57ee4f262869" + FFTW = "7a1cc6ca-52ef-59f5-83cd-3a7055c09341" + cuDNN = "02a925ec-e4fe-4b08-9a7e-0d78e3d38ccd" + +[[deps.NaNMath]] +deps = ["OpenLibm_jll"] +git-tree-sha1 = "0877504529a3e5c3343c6f8b4c0381e57e4387e4" +uuid = "77ba4419-2d1f-58cd-9bb1-8ffee604a2e3" +version = "1.0.2" + +[[deps.NetworkOptions]] +uuid = "ca575930-c2e3-43a9-ace4-1e988b2c1908" +version = "1.2.0" + +[[deps.Octavian]] +deps = ["CPUSummary", "IfElse", "LoopVectorization", "ManualMemory", "PolyesterWeave", "PrecompileTools", "Static", "StaticArrayInterface", "ThreadingUtilities", "VectorizationBase"] +git-tree-sha1 = "92410e147bdcaf9e2f982a7cc9b1341fc5dd1a77" +uuid = "6fd5a793-0b7e-452c-907f-f8bfe9c57db4" +version = "0.3.28" + + [deps.Octavian.extensions] + ForwardDiffExt = "ForwardDiff" + HyperDualNumbersExt = "HyperDualNumbers" + + [deps.Octavian.weakdeps] + ForwardDiff = "f6369f11-7733-5829-9624-2563aa707210" + HyperDualNumbers = "50ceba7f-c3ee-5a84-a6e8-3ad40456ec97" + +[[deps.OffsetArrays]] +git-tree-sha1 = "1a27764e945a152f7ca7efa04de513d473e9542e" +uuid = "6fe1bfb0-de20-5000-8ca7-80f57d26f881" +version = "1.14.1" +weakdeps = ["Adapt"] + + [deps.OffsetArrays.extensions] + OffsetArraysAdaptExt = "Adapt" + +[[deps.OpenBLAS_jll]] +deps = ["Artifacts", "CompilerSupportLibraries_jll", "Libdl"] +uuid = "4536629a-c528-5b80-bd46-f80d51c5b363" +version = "0.3.23+4" + +[[deps.OpenLibm_jll]] +deps = ["Artifacts", "Libdl"] +uuid = "05823500-19ac-5b8b-9628-191a04bc5112" +version = "0.8.1+2" + +[[deps.OpenSpecFun_jll]] +deps = ["Artifacts", "CompilerSupportLibraries_jll", "JLLWrappers", "Libdl", "Pkg"] +git-tree-sha1 = "13652491f6856acfd2db29360e1bbcd4565d04f1" +uuid = "efe28fd5-8261-553b-a9e1-b2916fc3738e" +version = "0.5.5+0" + +[[deps.Pkg]] +deps = ["Artifacts", "Dates", "Downloads", "FileWatching", "LibGit2", "Libdl", "Logging", "Markdown", "Printf", "REPL", "Random", "SHA", "Serialization", "TOML", "Tar", "UUIDs", "p7zip_jll"] +uuid = "44cfe95a-1eb2-52ea-b672-e2afdf69b78f" +version = "1.10.0" + +[[deps.Polyester]] +deps = ["ArrayInterface", "BitTwiddlingConvenienceFunctions", "CPUSummary", "IfElse", "ManualMemory", "PolyesterWeave", "Static", "StaticArrayInterface", "StrideArraysCore", "ThreadingUtilities"] +git-tree-sha1 = "6d38fea02d983051776a856b7df75b30cf9a3c1f" +uuid = "f517fe37-dbe3-4b94-8317-1923a5111588" +version = "0.7.16" + +[[deps.PolyesterWeave]] +deps = ["BitTwiddlingConvenienceFunctions", "CPUSummary", "IfElse", "Static", "ThreadingUtilities"] +git-tree-sha1 = "645bed98cd47f72f67316fd42fc47dee771aefcd" +uuid = "1d0040c9-8b98-4ee7-8388-3f51789ca0ad" +version = "0.2.2" + +[[deps.PrecompileTools]] +deps = ["Preferences"] +git-tree-sha1 = "5aa36f7049a63a1528fe8f7c3f2113413ffd4e1f" +uuid = "aea7be01-6a6a-4083-8856-8a6e6704d82a" +version = "1.2.1" + +[[deps.Preferences]] +deps = ["TOML"] +git-tree-sha1 = "9306f6085165d270f7e3db02af26a400d580f5c6" +uuid = "21216c6a-2e73-6563-6e65-726566657250" +version = "1.4.3" + +[[deps.Printf]] +deps = ["Unicode"] +uuid = "de0858da-6303-5e67-8744-51eddeeeb8d7" + +[[deps.REPL]] +deps = ["InteractiveUtils", "Markdown", "Sockets", "Unicode"] +uuid = "3fa0cd96-eef1-5676-8a61-b3b8758bbffb" + +[[deps.Random]] +deps = ["SHA"] +uuid = "9a3f8284-a2c9-5f02-9a11-845980a1fd5c" + +[[deps.Reexport]] +git-tree-sha1 = "45e428421666073eab6f2da5c9d310d99bb12f9b" +uuid = "189a3867-3050-52da-a836-e630ba90ab69" +version = "1.2.2" + +[[deps.Requires]] +deps = ["UUIDs"] +git-tree-sha1 = "838a3a4188e2ded87a4f9f184b4b0d78a1e91cb7" +uuid = "ae029012-a4dd-5104-9daa-d747884805df" +version = "1.3.0" + +[[deps.SHA]] +uuid = "ea8e919c-243c-51af-8825-aaa63cd721ce" +version = "0.7.0" + +[[deps.SIMDTypes]] +git-tree-sha1 = "330289636fb8107c5f32088d2741e9fd7a061a5c" +uuid = "94e857df-77ce-4151-89e5-788b33177be4" +version = "0.1.0" + +[[deps.SLEEFPirates]] +deps = ["IfElse", "Static", "VectorizationBase"] +git-tree-sha1 = "456f610ca2fbd1c14f5fcf31c6bfadc55e7d66e0" +uuid = "476501e8-09a2-5ece-8869-fb82de89a1fa" +version = "0.6.43" + +[[deps.Serialization]] +uuid = "9e88b42a-f829-5b0c-bbe9-9e923198166b" + +[[deps.Setfield]] +deps = ["ConstructionBase", "Future", "MacroTools", "StaticArraysCore"] +git-tree-sha1 = "e2cc6d8c88613c05e1defb55170bf5ff211fbeac" +uuid = "efcf1570-3423-57d1-acb7-fd33fddbac46" +version = "1.1.1" + +[[deps.Sockets]] +uuid = "6462fe0b-24de-5631-8697-dd941f90decc" + +[[deps.SparseArrays]] +deps = ["Libdl", "LinearAlgebra", "Random", "Serialization", "SuiteSparse_jll"] +uuid = "2f01184e-e22b-5df5-ae63-d93ebab69eaf" +version = "1.10.0" + +[[deps.SpecialFunctions]] +deps = ["IrrationalConstants", "LogExpFunctions", "OpenLibm_jll", "OpenSpecFun_jll"] +git-tree-sha1 = "2f5d4697f21388cbe1ff299430dd169ef97d7e14" +uuid = "276daf66-3868-5448-9aa4-cd146d93841b" +version = "2.4.0" +weakdeps = ["ChainRulesCore"] + + [deps.SpecialFunctions.extensions] + SpecialFunctionsChainRulesCoreExt = "ChainRulesCore" + +[[deps.Static]] +deps = ["CommonWorldInvalidations", "IfElse", "PrecompileTools"] +git-tree-sha1 = "87d51a3ee9a4b0d2fe054bdd3fc2436258db2603" +uuid = "aedffcd0-7271-4cad-89d0-dc628f76c6d3" +version = "1.1.1" + +[[deps.StaticArrayInterface]] +deps = ["ArrayInterface", "Compat", "IfElse", "LinearAlgebra", "PrecompileTools", "Static"] +git-tree-sha1 = "96381d50f1ce85f2663584c8e886a6ca97e60554" +uuid = "0d7ed370-da01-4f52-bd93-41d350b8b718" +version = "1.8.0" +weakdeps = ["OffsetArrays", "StaticArrays"] + + [deps.StaticArrayInterface.extensions] + StaticArrayInterfaceOffsetArraysExt = "OffsetArrays" + StaticArrayInterfaceStaticArraysExt = "StaticArrays" + +[[deps.StaticArrays]] +deps = ["LinearAlgebra", "PrecompileTools", "Random", "StaticArraysCore"] +git-tree-sha1 = "eeafab08ae20c62c44c8399ccb9354a04b80db50" +uuid = "90137ffa-7385-5640-81b9-e52037218182" +version = "1.9.7" +weakdeps = ["ChainRulesCore", "Statistics"] + + [deps.StaticArrays.extensions] + StaticArraysChainRulesCoreExt = "ChainRulesCore" + StaticArraysStatisticsExt = "Statistics" + +[[deps.StaticArraysCore]] +git-tree-sha1 = "192954ef1208c7019899fbf8049e717f92959682" +uuid = "1e83bf80-4336-4d27-bf5d-d5a4f845583c" +version = "1.4.3" + +[[deps.Statistics]] +deps = ["LinearAlgebra"] +git-tree-sha1 = "ae3bb1eb3bba077cd276bc5cfc337cc65c3075c0" +uuid = "10745b16-79ce-11e8-11f9-7d13ad32a3b2" +version = "1.10.0" +weakdeps = ["SparseArrays"] + + [deps.Statistics.extensions] + SparseArraysExt = ["SparseArrays"] + +[[deps.StrideArraysCore]] +deps = ["ArrayInterface", "CloseOpenIntervals", "IfElse", "LayoutPointers", "LinearAlgebra", "ManualMemory", "SIMDTypes", "Static", "StaticArrayInterface", "ThreadingUtilities"] +git-tree-sha1 = "f35f6ab602df8413a50c4a25ca14de821e8605fb" +uuid = "7792a7ef-975c-4747-a70f-980b88e8d1da" +version = "0.5.7" + +[[deps.SuiteSparse_jll]] +deps = ["Artifacts", "Libdl", "libblastrampoline_jll"] +uuid = "bea87d4a-7f5b-5778-9afe-8cc45184846c" +version = "7.2.1+1" + +[[deps.TOML]] +deps = ["Dates"] +uuid = "fa267f1f-6049-4f14-aa54-33bafae1ed76" +version = "1.0.3" + +[[deps.Tar]] +deps = ["ArgTools", "SHA"] +uuid = "a4e569a6-e804-4fa4-b0f3-eef7a1d5b13e" +version = "1.10.0" + +[[deps.ThreadingUtilities]] +deps = ["ManualMemory"] +git-tree-sha1 = "eda08f7e9818eb53661b3deb74e3159460dfbc27" +uuid = "8290d209-cae3-49c0-8002-c8c24d57dab5" +version = "0.5.2" + +[[deps.UUIDs]] +deps = ["Random", "SHA"] +uuid = "cf7118a7-6976-5b1a-9a39-7adc72f591a4" + +[[deps.UnPack]] +git-tree-sha1 = "387c1f73762231e86e0c9c5443ce3b4a0a9a0c2b" +uuid = "3a884ed6-31ef-47d7-9d2a-63182c4928ed" +version = "1.0.2" + +[[deps.Unicode]] +uuid = "4ec0a83e-493e-50e2-b9ac-8f72acf5a8f5" + +[[deps.UnrolledUtilities]] +git-tree-sha1 = "b73f7a7c25a2618c5052c80ed32b07e471cc6cb0" +uuid = "0fe1646c-419e-43be-ac14-22321958931b" +version = "0.1.2" + +[[deps.UnsafeAtomics]] +git-tree-sha1 = "6331ac3440856ea1988316b46045303bef658278" +uuid = "013be700-e6cd-48c3-b4a1-df204f14c38f" +version = "0.2.1" + +[[deps.UnsafeAtomicsLLVM]] +deps = ["LLVM", "UnsafeAtomics"] +git-tree-sha1 = "bf2c553f25e954a9b38c9c0593a59bb13113f9e5" +uuid = "d80eeb9a-aca5-4d75-85e5-170c8b632249" +version = "0.1.5" + +[[deps.VectorizationBase]] +deps = ["ArrayInterface", "CPUSummary", "HostCPUFeatures", "IfElse", "LayoutPointers", "Libdl", "LinearAlgebra", "SIMDTypes", "Static", "StaticArrayInterface"] +git-tree-sha1 = "e7f5b81c65eb858bed630fe006837b935518aca5" +uuid = "3d5dd08c-fd9d-11e8-17fa-ed2836048c2f" +version = "0.21.70" + +[[deps.Zlib_jll]] +deps = ["Libdl"] +uuid = "83775a58-1f1d-513f-b197-d71354ab007a" +version = "1.2.13+1" + +[[deps.libblastrampoline_jll]] +deps = ["Artifacts", "Libdl"] +uuid = "8e850b90-86db-534c-a0d3-1478176c7d93" +version = "5.8.0+1" + +[[deps.nghttp2_jll]] +deps = ["Artifacts", "Libdl"] +uuid = "8e850ede-7688-5339-a07c-302acd2aaf8d" +version = "1.52.0+1" + +[[deps.p7zip_jll]] +deps = ["Artifacts", "Libdl"] +uuid = "3f19e933-33d8-53b3-aaab-bd5110c3b7a0" +version = "17.4.0+2" diff --git a/benchmarks/Manifest.toml b/benchmarks/Manifest.toml new file mode 100644 index 00000000..cd4570d3 --- /dev/null +++ b/benchmarks/Manifest.toml @@ -0,0 +1,919 @@ +# This file is machine-generated - editing it directly is not advised + +julia_version = "1.10.4" +manifest_format = "2.0" +project_hash = "74d2967fd38cf01b43e118897b8c2259a7785591" + +[[deps.AbstractFFTs]] +deps = ["LinearAlgebra"] +git-tree-sha1 = "d92ad398961a3ed262d8bf04a1a2b8340f915fef" +uuid = "621f4979-c628-5d54-868e-fcf4e3e8185c" +version = "1.5.0" + + [deps.AbstractFFTs.extensions] + AbstractFFTsChainRulesCoreExt = "ChainRulesCore" + AbstractFFTsTestExt = "Test" + + [deps.AbstractFFTs.weakdeps] + ChainRulesCore = "d360d2e6-b24c-11e9-a2a3-2a2ae2dbcce4" + Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40" + +[[deps.Adapt]] +deps = ["LinearAlgebra", "Requires"] +git-tree-sha1 = "6a55b747d1812e699320963ffde36f1ebdda4099" +uuid = "79e6a3ab-5dfb-504d-930d-738a2a938a0e" +version = "4.0.4" +weakdeps = ["StaticArrays"] + + [deps.Adapt.extensions] + AdaptStaticArraysExt = "StaticArrays" + +[[deps.ArgTools]] +uuid = "0dad84c5-d112-42e6-8d28-ef12dabb789f" +version = "1.1.1" + +[[deps.ArrayInterface]] +deps = ["Adapt", "LinearAlgebra"] +git-tree-sha1 = "f54c23a5d304fb87110de62bace7777d59088c34" +uuid = "4fba245c-0d91-5ea0-9b3e-6abc04ee57a9" +version = "7.15.0" + + [deps.ArrayInterface.extensions] + ArrayInterfaceBandedMatricesExt = "BandedMatrices" + ArrayInterfaceBlockBandedMatricesExt = "BlockBandedMatrices" + ArrayInterfaceCUDAExt = "CUDA" + ArrayInterfaceCUDSSExt = "CUDSS" + ArrayInterfaceChainRulesExt = "ChainRules" + ArrayInterfaceGPUArraysCoreExt = "GPUArraysCore" + ArrayInterfaceReverseDiffExt = "ReverseDiff" + ArrayInterfaceSparseArraysExt = "SparseArrays" + ArrayInterfaceStaticArraysCoreExt = "StaticArraysCore" + ArrayInterfaceTrackerExt = "Tracker" + + [deps.ArrayInterface.weakdeps] + BandedMatrices = "aae01518-5342-5314-be14-df237901396f" + BlockBandedMatrices = "ffab5731-97b5-5995-9138-79e8c1846df0" + CUDA = "052768ef-5323-5732-b1bb-66c8b64840ba" + CUDSS = "45b445bb-4962-46a0-9369-b4df9d0f772e" + ChainRules = "082447d4-558c-5d27-93f4-14fc19e9eca2" + GPUArraysCore = "46192b85-c4d5-4398-a991-12ede77f4527" + ReverseDiff = "37e2e3b7-166d-5795-8a7a-e32c996b4267" + SparseArrays = "2f01184e-e22b-5df5-ae63-d93ebab69eaf" + StaticArraysCore = "1e83bf80-4336-4d27-bf5d-d5a4f845583c" + Tracker = "9f7883ad-71c0-57eb-9f7f-b5c9e6d3789c" + +[[deps.Artifacts]] +uuid = "56f22d72-fd6d-98f1-02f0-08ddc0907c33" + +[[deps.Atomix]] +deps = ["UnsafeAtomics"] +git-tree-sha1 = "c06a868224ecba914baa6942988e2f2aade419be" +uuid = "a9b6321e-bd34-4604-b9c9-b65b8de01458" +version = "0.1.0" + +[[deps.Base64]] +uuid = "2a0f44e3-6c83-55bd-87e4-b1978d98bd5f" + +[[deps.BenchmarkTools]] +deps = ["JSON", "Logging", "Printf", "Profile", "Statistics", "UUIDs"] +git-tree-sha1 = "f1dff6729bc61f4d49e140da1af55dcd1ac97b2f" +uuid = "6e4b80f9-dd63-53aa-95a3-0cdb28fa8baf" +version = "1.5.0" + +[[deps.BitTwiddlingConvenienceFunctions]] +deps = ["Static"] +git-tree-sha1 = "f21cfd4950cb9f0587d5067e69405ad2acd27b87" +uuid = "62783981-4cbd-42fc-bca8-16325de8dc4b" +version = "0.1.6" + +[[deps.CEnum]] +git-tree-sha1 = "389ad5c84de1ae7cf0e28e381131c98ea87d54fc" +uuid = "fa961155-64e5-5f13-b03f-caf6b980ea82" +version = "0.5.0" + +[[deps.CPUSummary]] +deps = ["CpuId", "IfElse", "PrecompileTools", "Static"] +git-tree-sha1 = "5a97e67919535d6841172016c9530fd69494e5ec" +uuid = "2a0fbf3d-bb9c-48f3-b0a9-814d99fd7ab9" +version = "0.2.6" + +[[deps.ChainRules]] +deps = ["Adapt", "ChainRulesCore", "Compat", "Distributed", "GPUArraysCore", "IrrationalConstants", "LinearAlgebra", "Random", "RealDot", "SparseArrays", "SparseInverseSubset", "Statistics", "StructArrays", "SuiteSparse"] +git-tree-sha1 = "227985d885b4dbce5e18a96f9326ea1e836e5a03" +uuid = "082447d4-558c-5d27-93f4-14fc19e9eca2" +version = "1.69.0" + +[[deps.ChainRulesCore]] +deps = ["Compat", "LinearAlgebra"] +git-tree-sha1 = "71acdbf594aab5bbb2cec89b208c41b4c411e49f" +uuid = "d360d2e6-b24c-11e9-a2a3-2a2ae2dbcce4" +version = "1.24.0" +weakdeps = ["SparseArrays"] + + [deps.ChainRulesCore.extensions] + ChainRulesCoreSparseArraysExt = "SparseArrays" + +[[deps.CloseOpenIntervals]] +deps = ["Static", "StaticArrayInterface"] +git-tree-sha1 = "05ba0d07cd4fd8b7a39541e31a7b0254704ea581" +uuid = "fb6a15b2-703c-40df-9091-08a04967cfa9" +version = "0.1.13" + +[[deps.CommonSubexpressions]] +deps = ["MacroTools"] +git-tree-sha1 = "cda2cfaebb4be89c9084adaca7dd7333369715c5" +uuid = "bbf7d656-a473-5ed7-a52c-81e309532950" +version = "0.3.1" + +[[deps.CommonWorldInvalidations]] +git-tree-sha1 = "ae52d1c52048455e85a387fbee9be553ec2b68d0" +uuid = "f70d9fcc-98c5-4d4a-abd7-e4cdeebd8ca8" +version = "1.0.0" + +[[deps.Compat]] +deps = ["TOML", "UUIDs"] +git-tree-sha1 = "8ae8d32e09f0dcf42a36b90d4e17f5dd2e4c4215" +uuid = "34da2185-b29b-5c13-b0c7-acf172513d20" +version = "4.16.0" +weakdeps = ["Dates", "LinearAlgebra"] + + [deps.Compat.extensions] + CompatLinearAlgebraExt = "LinearAlgebra" + +[[deps.CompilerSupportLibraries_jll]] +deps = ["Artifacts", "Libdl"] +uuid = "e66e0078-7015-5450-92f7-15fbd957f2ae" +version = "1.1.1+0" + +[[deps.ConstructionBase]] +deps = ["LinearAlgebra"] +git-tree-sha1 = "a33b7ced222c6165f624a3f2b55945fac5a598d9" +uuid = "187b0558-2788-49d3-abe0-74a17ed4e7c9" +version = "1.5.7" + + [deps.ConstructionBase.extensions] + ConstructionBaseIntervalSetsExt = "IntervalSets" + ConstructionBaseStaticArraysExt = "StaticArrays" + + [deps.ConstructionBase.weakdeps] + IntervalSets = "8197267c-284f-5f27-9208-e0e47529a953" + StaticArrays = "90137ffa-7385-5640-81b9-e52037218182" + +[[deps.CpuId]] +deps = ["Markdown"] +git-tree-sha1 = "fcbb72b032692610bfbdb15018ac16a36cf2e406" +uuid = "adafc99b-e345-5852-983c-f28acb93d879" +version = "0.3.1" + +[[deps.DataAPI]] +git-tree-sha1 = "abe83f3a2f1b857aac70ef8b269080af17764bbe" +uuid = "9a962f9c-6df0-11e9-0e5d-c546b8b5ee8a" +version = "1.16.0" + +[[deps.DataValueInterfaces]] +git-tree-sha1 = "bfc1187b79289637fa0ef6d4436ebdfe6905cbd6" +uuid = "e2d170a0-9d28-54be-80f0-106bbe20a464" +version = "1.0.0" + +[[deps.Dates]] +deps = ["Printf"] +uuid = "ade2ca70-3891-5945-98fb-dc099432e06a" + +[[deps.DiffResults]] +deps = ["StaticArraysCore"] +git-tree-sha1 = "782dd5f4561f5d267313f23853baaaa4c52ea621" +uuid = "163ba53b-c6d8-5494-b064-1a9d43ac40c5" +version = "1.1.0" + +[[deps.DiffRules]] +deps = ["IrrationalConstants", "LogExpFunctions", "NaNMath", "Random", "SpecialFunctions"] +git-tree-sha1 = "23163d55f885173722d1e4cf0f6110cdbaf7e272" +uuid = "b552c78f-8df3-52c6-915a-8e097449b14b" +version = "1.15.1" + +[[deps.DispatchDoctor]] +deps = ["MacroTools", "Preferences"] +git-tree-sha1 = "c2acd1de2c4c357928f9fb6b60b402d914621378" +uuid = "8d63f2c5-f18a-4cf2-ba9d-b3f60fc568c8" +version = "0.4.14" +weakdeps = ["ChainRulesCore", "EnzymeCore"] + + [deps.DispatchDoctor.extensions] + DispatchDoctorChainRulesCoreExt = "ChainRulesCore" + DispatchDoctorEnzymeCoreExt = "EnzymeCore" + +[[deps.Distributed]] +deps = ["Random", "Serialization", "Sockets"] +uuid = "8ba89e20-285c-5b6f-9357-94700520ee1b" + +[[deps.DocStringExtensions]] +deps = ["LibGit2"] +git-tree-sha1 = "2fb1e02f2b635d0845df5d7c167fec4dd739b00d" +uuid = "ffbed154-4ef7-542d-bbb7-c09d3a79fcae" +version = "0.9.3" + +[[deps.Downloads]] +deps = ["ArgTools", "FileWatching", "LibCURL", "NetworkOptions"] +uuid = "f43a241f-c20a-4ad4-852c-f6b1247861c6" +version = "1.6.0" + +[[deps.EnzymeCore]] +git-tree-sha1 = "8f205a601760f4798a10f138c3940f0451d95188" +uuid = "f151be2c-9106-41f4-ab19-57ee4f262869" +version = "0.7.8" +weakdeps = ["Adapt"] + + [deps.EnzymeCore.extensions] + AdaptExt = "Adapt" + +[[deps.FastClosures]] +git-tree-sha1 = "acebe244d53ee1b461970f8910c235b259e772ef" +uuid = "9aa1b823-49e4-5ca5-8b0f-3971ec8bab6a" +version = "0.3.2" + +[[deps.FileWatching]] +uuid = "7b1f6079-737a-58dc-b8bc-7a2ca5c1b5ee" + +[[deps.FillArrays]] +deps = ["LinearAlgebra"] +git-tree-sha1 = "fd0002c0b5362d7eb952450ad5eb742443340d6e" +uuid = "1a297f60-69ca-5386-bcde-b61e274b549b" +version = "1.12.0" + + [deps.FillArrays.extensions] + FillArraysPDMatsExt = "PDMats" + FillArraysSparseArraysExt = "SparseArrays" + FillArraysStatisticsExt = "Statistics" + + [deps.FillArrays.weakdeps] + PDMats = "90014a1f-27ba-587c-ab20-58faa44d9150" + SparseArrays = "2f01184e-e22b-5df5-ae63-d93ebab69eaf" + Statistics = "10745b16-79ce-11e8-11f9-7d13ad32a3b2" + +[[deps.ForwardDiff]] +deps = ["CommonSubexpressions", "DiffResults", "DiffRules", "LinearAlgebra", "LogExpFunctions", "NaNMath", "Preferences", "Printf", "Random", "SpecialFunctions"] +git-tree-sha1 = "cf0fe81336da9fb90944683b8c41984b08793dad" +uuid = "f6369f11-7733-5829-9624-2563aa707210" +version = "0.10.36" +weakdeps = ["StaticArrays"] + + [deps.ForwardDiff.extensions] + ForwardDiffStaticArraysExt = "StaticArrays" + +[[deps.Functors]] +deps = ["LinearAlgebra"] +git-tree-sha1 = "64d8e93700c7a3f28f717d265382d52fac9fa1c1" +uuid = "d9f16b24-f501-4c13-a1f2-28368ffc5196" +version = "0.4.12" + +[[deps.Future]] +deps = ["Random"] +uuid = "9fa8497b-333b-5362-9e8d-4d0656e87820" + +[[deps.GPUArrays]] +deps = ["Adapt", "GPUArraysCore", "LLVM", "LinearAlgebra", "Printf", "Random", "Reexport", "Serialization", "Statistics"] +git-tree-sha1 = "a74c3f1cf56a3dfcdef0605f8cdb7015926aae30" +uuid = "0c68f7d7-f131-5f86-a1c3-88cf8149b2d7" +version = "10.3.0" + +[[deps.GPUArraysCore]] +deps = ["Adapt"] +git-tree-sha1 = "ec632f177c0d990e64d955ccc1b8c04c485a0950" +uuid = "46192b85-c4d5-4398-a991-12ede77f4527" +version = "0.1.6" + +[[deps.HostCPUFeatures]] +deps = ["BitTwiddlingConvenienceFunctions", "IfElse", "Libdl", "Static"] +git-tree-sha1 = "8e070b599339d622e9a081d17230d74a5c473293" +uuid = "3e5b6fbb-0976-4d2c-9146-d79de83f2fb0" +version = "0.1.17" + +[[deps.Hwloc]] +deps = ["CEnum", "Hwloc_jll", "Printf"] +git-tree-sha1 = "6a3d80f31ff87bc94ab22a7b8ec2f263f9a6a583" +uuid = "0e44f5e4-bd66-52a0-8798-143a42290a1d" +version = "3.3.0" + + [deps.Hwloc.extensions] + HwlocTrees = "AbstractTrees" + + [deps.Hwloc.weakdeps] + AbstractTrees = "1520ce14-60c1-5f80-bbc7-55ef81b5835c" + +[[deps.Hwloc_jll]] +deps = ["Artifacts", "JLLWrappers", "Libdl"] +git-tree-sha1 = "5e19e1e4fa3e71b774ce746274364aef0234634e" +uuid = "e33a78d0-f292-5ffc-b300-72abe9b543c8" +version = "2.11.1+0" + +[[deps.IRTools]] +deps = ["InteractiveUtils", "MacroTools"] +git-tree-sha1 = "950c3717af761bc3ff906c2e8e52bd83390b6ec2" +uuid = "7869d1d1-7146-5819-86e3-90919afe41df" +version = "0.4.14" + +[[deps.IfElse]] +git-tree-sha1 = "debdd00ffef04665ccbb3e150747a77560e8fad1" +uuid = "615f187c-cbe4-4ef1-ba3b-2fcf58d6d173" +version = "0.1.1" + +[[deps.InteractiveUtils]] +deps = ["Markdown"] +uuid = "b77e0a4c-d291-57a0-90e8-8db25a27a240" + +[[deps.IrrationalConstants]] +git-tree-sha1 = "630b497eafcc20001bba38a4651b327dcfc491d2" +uuid = "92d709cd-6900-40b7-9082-c6be49f344b6" +version = "0.2.2" + +[[deps.IteratorInterfaceExtensions]] +git-tree-sha1 = "a3f24677c21f5bbe9d2a714f95dcd58337fb2856" +uuid = "82899510-4779-5014-852e-03e436cf321d" +version = "1.0.0" + +[[deps.JLLWrappers]] +deps = ["Artifacts", "Preferences"] +git-tree-sha1 = "7e5d6779a1e09a36db2a7b6cff50942a0a7d0fca" +uuid = "692b3bcd-3c85-4b1f-b108-f13ce0eb3210" +version = "1.5.0" + +[[deps.JSON]] +deps = ["Dates", "Mmap", "Parsers", "Unicode"] +git-tree-sha1 = "31e996f0a15c7b280ba9f76636b3ff9e2ae58c9a" +uuid = "682c06a0-de6a-54ab-a142-c8b1cf79cde6" +version = "0.21.4" + +[[deps.KernelAbstractions]] +deps = ["Adapt", "Atomix", "InteractiveUtils", "LinearAlgebra", "MacroTools", "PrecompileTools", "Requires", "SparseArrays", "StaticArrays", "UUIDs", "UnsafeAtomics", "UnsafeAtomicsLLVM"] +git-tree-sha1 = "2d8be6e915f96ecdf33b134795f4e82bbbd01326" +repo-rev = "vc/simd_loop" +repo-url = "https://github.com/JuliaGPU/KernelAbstractions.jl.git" +uuid = "63c18a36-062a-441e-b654-da1e3ab1ce7c" +version = "0.9.22" +weakdeps = ["EnzymeCore"] + + [deps.KernelAbstractions.extensions] + EnzymeExt = "EnzymeCore" + +[[deps.LLVM]] +deps = ["CEnum", "LLVMExtra_jll", "Libdl", "Preferences", "Printf", "Requires", "Unicode"] +git-tree-sha1 = "2470e69781ddd70b8878491233cd09bc1bd7fc96" +uuid = "929cbde3-209d-540e-8aea-75f648917ca0" +version = "8.1.0" + + [deps.LLVM.extensions] + BFloat16sExt = "BFloat16s" + + [deps.LLVM.weakdeps] + BFloat16s = "ab4f0b2a-ad5b-11e8-123f-65d77653426b" + +[[deps.LLVMExtra_jll]] +deps = ["Artifacts", "JLLWrappers", "LazyArtifacts", "Libdl", "TOML"] +git-tree-sha1 = "597d1c758c9ae5d985ba4202386a607c675ee700" +uuid = "dad2f222-ce93-54a1-a47d-0025e8a3acab" +version = "0.0.31+0" + +[[deps.LayoutPointers]] +deps = ["ArrayInterface", "LinearAlgebra", "ManualMemory", "SIMDTypes", "Static", "StaticArrayInterface"] +git-tree-sha1 = "a9eaadb366f5493a5654e843864c13d8b107548c" +uuid = "10f19ff3-798f-405d-979b-55457f8fc047" +version = "0.1.17" + +[[deps.LazyArtifacts]] +deps = ["Artifacts", "Pkg"] +uuid = "4af54fe1-eca0-43a8-85a7-787d91b784e3" + +[[deps.LibCURL]] +deps = ["LibCURL_jll", "MozillaCACerts_jll"] +uuid = "b27032c2-a3e7-50c8-80cd-2d36dbcbfd21" +version = "0.6.4" + +[[deps.LibCURL_jll]] +deps = ["Artifacts", "LibSSH2_jll", "Libdl", "MbedTLS_jll", "Zlib_jll", "nghttp2_jll"] +uuid = "deac9b47-8bc7-5906-a0fe-35ac56dc84c0" +version = "8.4.0+0" + +[[deps.LibGit2]] +deps = ["Base64", "LibGit2_jll", "NetworkOptions", "Printf", "SHA"] +uuid = "76f85450-5226-5b5a-8eaa-529ad045b433" + +[[deps.LibGit2_jll]] +deps = ["Artifacts", "LibSSH2_jll", "Libdl", "MbedTLS_jll"] +uuid = "e37daf67-58a4-590a-8e99-b0245dd2ffc5" +version = "1.6.4+0" + +[[deps.LibSSH2_jll]] +deps = ["Artifacts", "Libdl", "MbedTLS_jll"] +uuid = "29816b5a-b9ab-546f-933c-edad1886dfa8" +version = "1.11.0+1" + +[[deps.Libdl]] +uuid = "8f399da3-3557-5675-b5ff-fb832c97cbdb" + +[[deps.LinearAlgebra]] +deps = ["Libdl", "OpenBLAS_jll", "libblastrampoline_jll"] +uuid = "37e2e46d-f89d-539d-b4ee-838fcccc9c8e" + +[[deps.LogExpFunctions]] +deps = ["DocStringExtensions", "IrrationalConstants", "LinearAlgebra"] +git-tree-sha1 = "a2d09619db4e765091ee5c6ffe8872849de0feea" +uuid = "2ab3a3ac-af41-5b50-aa03-7779005ae688" +version = "0.3.28" + + [deps.LogExpFunctions.extensions] + LogExpFunctionsChainRulesCoreExt = "ChainRulesCore" + LogExpFunctionsChangesOfVariablesExt = "ChangesOfVariables" + LogExpFunctionsInverseFunctionsExt = "InverseFunctions" + + [deps.LogExpFunctions.weakdeps] + ChainRulesCore = "d360d2e6-b24c-11e9-a2a3-2a2ae2dbcce4" + ChangesOfVariables = "9e997f8a-9a97-42d5-a9f1-ce6bfc15e2c0" + InverseFunctions = "3587e190-3f89-42d0-90ee-14403ec27112" + +[[deps.Logging]] +uuid = "56ddb016-857b-54e1-b83d-db4d58db5568" + +[[deps.LoopVectorization]] +deps = ["ArrayInterface", "CPUSummary", "CloseOpenIntervals", "DocStringExtensions", "HostCPUFeatures", "IfElse", "LayoutPointers", "LinearAlgebra", "OffsetArrays", "PolyesterWeave", "PrecompileTools", "SIMDTypes", "SLEEFPirates", "Static", "StaticArrayInterface", "ThreadingUtilities", "UnPack", "VectorizationBase"] +git-tree-sha1 = "8084c25a250e00ae427a379a5b607e7aed96a2dd" +uuid = "bdcacae8-1622-11e9-2a5c-532679323890" +version = "0.12.171" +weakdeps = ["ChainRulesCore", "ForwardDiff", "SpecialFunctions"] + + [deps.LoopVectorization.extensions] + ForwardDiffExt = ["ChainRulesCore", "ForwardDiff"] + SpecialFunctionsExt = "SpecialFunctions" + +[[deps.LuxCore]] +deps = ["Compat", "DispatchDoctor", "Functors", "Random", "Setfield"] +git-tree-sha1 = "43aa0d448e99c16d452bfbac1f27acd2cbcf4ef1" +uuid = "bb33d45b-7691-41d6-9220-0943567d0623" +version = "0.1.25" + + [deps.LuxCore.extensions] + LuxCoreArrayInterfaceReverseDiffExt = ["ArrayInterface", "ReverseDiff"] + LuxCoreArrayInterfaceTrackerExt = ["ArrayInterface", "Tracker"] + LuxCoreChainRulesCoreExt = "ChainRulesCore" + LuxCoreEnzymeCoreExt = "EnzymeCore" + LuxCoreMLDataDevicesExt = "MLDataDevices" + + [deps.LuxCore.weakdeps] + ArrayInterface = "4fba245c-0d91-5ea0-9b3e-6abc04ee57a9" + ChainRulesCore = "d360d2e6-b24c-11e9-a2a3-2a2ae2dbcce4" + EnzymeCore = "f151be2c-9106-41f4-ab19-57ee4f262869" + MLDataDevices = "7e8f7934-dd98-4c1a-8fe8-92b47a384d40" + ReverseDiff = "37e2e3b7-166d-5795-8a7a-e32c996b4267" + Tracker = "9f7883ad-71c0-57eb-9f7f-b5c9e6d3789c" + +[[deps.LuxLib]] +deps = ["ArrayInterface", "ChainRulesCore", "Compat", "CpuId", "DispatchDoctor", "EnzymeCore", "FastClosures", "ForwardDiff", "Hwloc", "KernelAbstractions", "LinearAlgebra", "LoopVectorization", "LuxCore", "MLDataDevices", "Markdown", "NNlib", "Octavian", "Polyester", "Random", "Reexport", "SLEEFPirates", "Static", "StaticArraysCore", "Statistics", "UnrolledUtilities"] +git-tree-sha1 = "69aa4114d67d522724eb059f2918898108a0a1ad" +uuid = "82251201-b29d-42c6-8e01-566dec8acb11" +version = "0.3.48" + + [deps.LuxLib.extensions] + LuxLibAppleAccelerateExt = "AppleAccelerate" + LuxLibBLISBLASExt = "BLISBLAS" + LuxLibCUDAExt = "CUDA" + LuxLibMKLExt = "MKL" + LuxLibReverseDiffExt = "ReverseDiff" + LuxLibTrackerAMDGPUExt = ["AMDGPU", "Tracker"] + LuxLibTrackerExt = "Tracker" + LuxLibcuDNNExt = ["CUDA", "cuDNN"] + + [deps.LuxLib.weakdeps] + AMDGPU = "21141c5a-9bdb-4563-92ae-f87d6854732e" + AppleAccelerate = "13e28ba4-7ad8-5781-acae-3021b1ed3924" + BLISBLAS = "6f275bd8-fec0-4d39-945b-7e95a765fa1e" + CUDA = "052768ef-5323-5732-b1bb-66c8b64840ba" + MKL = "33e6dc65-8f57-5167-99aa-e5a354878fb2" + ReverseDiff = "37e2e3b7-166d-5795-8a7a-e32c996b4267" + Tracker = "9f7883ad-71c0-57eb-9f7f-b5c9e6d3789c" + cuDNN = "02a925ec-e4fe-4b08-9a7e-0d78e3d38ccd" + +[[deps.MLDataDevices]] +deps = ["Adapt", "ChainRulesCore", "Functors", "Preferences", "Random", "UnrolledUtilities"] +git-tree-sha1 = "d1c1dcb3499f0c7f7f4167db5f7f68bfba6ed939" +uuid = "7e8f7934-dd98-4c1a-8fe8-92b47a384d40" +version = "1.0.3" + + [deps.MLDataDevices.extensions] + MLDataDevicesAMDGPUExt = "AMDGPU" + MLDataDevicesCUDAExt = "CUDA" + MLDataDevicesFillArraysExt = "FillArrays" + MLDataDevicesGPUArraysExt = "GPUArrays" + MLDataDevicesMetalExt = ["GPUArrays", "Metal"] + MLDataDevicesRecursiveArrayToolsExt = "RecursiveArrayTools" + MLDataDevicesReverseDiffExt = "ReverseDiff" + MLDataDevicesSparseArraysExt = "SparseArrays" + MLDataDevicesTrackerExt = "Tracker" + MLDataDevicesZygoteExt = "Zygote" + MLDataDevicescuDNNExt = ["CUDA", "cuDNN"] + MLDataDevicesoneAPIExt = ["GPUArrays", "oneAPI"] + + [deps.MLDataDevices.weakdeps] + AMDGPU = "21141c5a-9bdb-4563-92ae-f87d6854732e" + CUDA = "052768ef-5323-5732-b1bb-66c8b64840ba" + FillArrays = "1a297f60-69ca-5386-bcde-b61e274b549b" + GPUArrays = "0c68f7d7-f131-5f86-a1c3-88cf8149b2d7" + Metal = "dde4c033-4e86-420c-a63e-0dd931031962" + RecursiveArrayTools = "731186ca-8d62-57ce-b412-fbd966d074cd" + ReverseDiff = "37e2e3b7-166d-5795-8a7a-e32c996b4267" + SparseArrays = "2f01184e-e22b-5df5-ae63-d93ebab69eaf" + Tracker = "9f7883ad-71c0-57eb-9f7f-b5c9e6d3789c" + Zygote = "e88e6eb3-aa80-5325-afca-941959d7151f" + cuDNN = "02a925ec-e4fe-4b08-9a7e-0d78e3d38ccd" + oneAPI = "8f75cd03-7ff8-4ecb-9b8f-daf728133b1b" + +[[deps.MacroTools]] +deps = ["Markdown", "Random"] +git-tree-sha1 = "2fa9ee3e63fd3a4f7a9a4f4744a52f4856de82df" +uuid = "1914dd2f-81c6-5fcd-8719-6d5c9610ff09" +version = "0.5.13" + +[[deps.ManualMemory]] +git-tree-sha1 = "bcaef4fc7a0cfe2cba636d84cda54b5e4e4ca3cd" +uuid = "d125e4d3-2237-4719-b19c-fa641b8a4667" +version = "0.1.8" + +[[deps.Markdown]] +deps = ["Base64"] +uuid = "d6f4376e-aef5-505a-96c1-9c027394607a" + +[[deps.MbedTLS_jll]] +deps = ["Artifacts", "Libdl"] +uuid = "c8ffd9c3-330d-5841-b78e-0817d7145fa1" +version = "2.28.2+1" + +[[deps.Mmap]] +uuid = "a63ad114-7e13-5084-954f-fe012c677804" + +[[deps.MozillaCACerts_jll]] +uuid = "14a3606d-f60d-562e-9121-12d972cd8159" +version = "2023.1.10" + +[[deps.NNlib]] +deps = ["Adapt", "Atomix", "ChainRulesCore", "GPUArraysCore", "KernelAbstractions", "LinearAlgebra", "Pkg", "Random", "Requires", "Statistics"] +git-tree-sha1 = "ae52c156a63bb647f80c26319b104e99e5977e51" +uuid = "872c559c-99b0-510c-b3b7-b6c96a88d5cd" +version = "0.9.22" + + [deps.NNlib.extensions] + NNlibAMDGPUExt = "AMDGPU" + NNlibCUDACUDNNExt = ["CUDA", "cuDNN"] + NNlibCUDAExt = "CUDA" + NNlibEnzymeCoreExt = "EnzymeCore" + NNlibFFTWExt = "FFTW" + + [deps.NNlib.weakdeps] + AMDGPU = "21141c5a-9bdb-4563-92ae-f87d6854732e" + CUDA = "052768ef-5323-5732-b1bb-66c8b64840ba" + EnzymeCore = "f151be2c-9106-41f4-ab19-57ee4f262869" + FFTW = "7a1cc6ca-52ef-59f5-83cd-3a7055c09341" + cuDNN = "02a925ec-e4fe-4b08-9a7e-0d78e3d38ccd" + +[[deps.NaNMath]] +deps = ["OpenLibm_jll"] +git-tree-sha1 = "0877504529a3e5c3343c6f8b4c0381e57e4387e4" +uuid = "77ba4419-2d1f-58cd-9bb1-8ffee604a2e3" +version = "1.0.2" + +[[deps.NetworkOptions]] +uuid = "ca575930-c2e3-43a9-ace4-1e988b2c1908" +version = "1.2.0" + +[[deps.Octavian]] +deps = ["CPUSummary", "IfElse", "LoopVectorization", "ManualMemory", "PolyesterWeave", "PrecompileTools", "Static", "StaticArrayInterface", "ThreadingUtilities", "VectorizationBase"] +git-tree-sha1 = "92410e147bdcaf9e2f982a7cc9b1341fc5dd1a77" +uuid = "6fd5a793-0b7e-452c-907f-f8bfe9c57db4" +version = "0.3.28" + + [deps.Octavian.extensions] + ForwardDiffExt = "ForwardDiff" + HyperDualNumbersExt = "HyperDualNumbers" + + [deps.Octavian.weakdeps] + ForwardDiff = "f6369f11-7733-5829-9624-2563aa707210" + HyperDualNumbers = "50ceba7f-c3ee-5a84-a6e8-3ad40456ec97" + +[[deps.OffsetArrays]] +git-tree-sha1 = "1a27764e945a152f7ca7efa04de513d473e9542e" +uuid = "6fe1bfb0-de20-5000-8ca7-80f57d26f881" +version = "1.14.1" +weakdeps = ["Adapt"] + + [deps.OffsetArrays.extensions] + OffsetArraysAdaptExt = "Adapt" + +[[deps.OpenBLAS_jll]] +deps = ["Artifacts", "CompilerSupportLibraries_jll", "Libdl"] +uuid = "4536629a-c528-5b80-bd46-f80d51c5b363" +version = "0.3.23+4" + +[[deps.OpenLibm_jll]] +deps = ["Artifacts", "Libdl"] +uuid = "05823500-19ac-5b8b-9628-191a04bc5112" +version = "0.8.1+2" + +[[deps.OpenSpecFun_jll]] +deps = ["Artifacts", "CompilerSupportLibraries_jll", "JLLWrappers", "Libdl", "Pkg"] +git-tree-sha1 = "13652491f6856acfd2db29360e1bbcd4565d04f1" +uuid = "efe28fd5-8261-553b-a9e1-b2916fc3738e" +version = "0.5.5+0" + +[[deps.OrderedCollections]] +git-tree-sha1 = "dfdf5519f235516220579f949664f1bf44e741c5" +uuid = "bac558e1-5e72-5ebc-8fee-abe8a469f55d" +version = "1.6.3" + +[[deps.Parsers]] +deps = ["Dates", "PrecompileTools", "UUIDs"] +git-tree-sha1 = "8489905bcdbcfac64d1daa51ca07c0d8f0283821" +uuid = "69de0a69-1ddd-5017-9359-2bf0b02dc9f0" +version = "2.8.1" + +[[deps.Pkg]] +deps = ["Artifacts", "Dates", "Downloads", "FileWatching", "LibGit2", "Libdl", "Logging", "Markdown", "Printf", "REPL", "Random", "SHA", "Serialization", "TOML", "Tar", "UUIDs", "p7zip_jll"] +uuid = "44cfe95a-1eb2-52ea-b672-e2afdf69b78f" +version = "1.10.0" + +[[deps.Polyester]] +deps = ["ArrayInterface", "BitTwiddlingConvenienceFunctions", "CPUSummary", "IfElse", "ManualMemory", "PolyesterWeave", "Static", "StaticArrayInterface", "StrideArraysCore", "ThreadingUtilities"] +git-tree-sha1 = "6d38fea02d983051776a856b7df75b30cf9a3c1f" +uuid = "f517fe37-dbe3-4b94-8317-1923a5111588" +version = "0.7.16" + +[[deps.PolyesterWeave]] +deps = ["BitTwiddlingConvenienceFunctions", "CPUSummary", "IfElse", "Static", "ThreadingUtilities"] +git-tree-sha1 = "645bed98cd47f72f67316fd42fc47dee771aefcd" +uuid = "1d0040c9-8b98-4ee7-8388-3f51789ca0ad" +version = "0.2.2" + +[[deps.PrecompileTools]] +deps = ["Preferences"] +git-tree-sha1 = "5aa36f7049a63a1528fe8f7c3f2113413ffd4e1f" +uuid = "aea7be01-6a6a-4083-8856-8a6e6704d82a" +version = "1.2.1" + +[[deps.Preferences]] +deps = ["TOML"] +git-tree-sha1 = "9306f6085165d270f7e3db02af26a400d580f5c6" +uuid = "21216c6a-2e73-6563-6e65-726566657250" +version = "1.4.3" + +[[deps.Printf]] +deps = ["Unicode"] +uuid = "de0858da-6303-5e67-8744-51eddeeeb8d7" + +[[deps.Profile]] +deps = ["Printf"] +uuid = "9abbd945-dff8-562f-b5e8-e1ebf5ef1b79" + +[[deps.REPL]] +deps = ["InteractiveUtils", "Markdown", "Sockets", "Unicode"] +uuid = "3fa0cd96-eef1-5676-8a61-b3b8758bbffb" + +[[deps.Random]] +deps = ["SHA"] +uuid = "9a3f8284-a2c9-5f02-9a11-845980a1fd5c" + +[[deps.RealDot]] +deps = ["LinearAlgebra"] +git-tree-sha1 = "9f0a1b71baaf7650f4fa8a1d168c7fb6ee41f0c9" +uuid = "c1ae055f-0cd5-4b69-90a6-9a35b1a98df9" +version = "0.1.0" + +[[deps.Reexport]] +git-tree-sha1 = "45e428421666073eab6f2da5c9d310d99bb12f9b" +uuid = "189a3867-3050-52da-a836-e630ba90ab69" +version = "1.2.2" + +[[deps.Requires]] +deps = ["UUIDs"] +git-tree-sha1 = "838a3a4188e2ded87a4f9f184b4b0d78a1e91cb7" +uuid = "ae029012-a4dd-5104-9daa-d747884805df" +version = "1.3.0" + +[[deps.SHA]] +uuid = "ea8e919c-243c-51af-8825-aaa63cd721ce" +version = "0.7.0" + +[[deps.SIMDTypes]] +git-tree-sha1 = "330289636fb8107c5f32088d2741e9fd7a061a5c" +uuid = "94e857df-77ce-4151-89e5-788b33177be4" +version = "0.1.0" + +[[deps.SLEEFPirates]] +deps = ["IfElse", "Static", "VectorizationBase"] +git-tree-sha1 = "456f610ca2fbd1c14f5fcf31c6bfadc55e7d66e0" +uuid = "476501e8-09a2-5ece-8869-fb82de89a1fa" +version = "0.6.43" + +[[deps.Serialization]] +uuid = "9e88b42a-f829-5b0c-bbe9-9e923198166b" + +[[deps.Setfield]] +deps = ["ConstructionBase", "Future", "MacroTools", "StaticArraysCore"] +git-tree-sha1 = "e2cc6d8c88613c05e1defb55170bf5ff211fbeac" +uuid = "efcf1570-3423-57d1-acb7-fd33fddbac46" +version = "1.1.1" + +[[deps.Sockets]] +uuid = "6462fe0b-24de-5631-8697-dd941f90decc" + +[[deps.SparseArrays]] +deps = ["Libdl", "LinearAlgebra", "Random", "Serialization", "SuiteSparse_jll"] +uuid = "2f01184e-e22b-5df5-ae63-d93ebab69eaf" +version = "1.10.0" + +[[deps.SparseInverseSubset]] +deps = ["LinearAlgebra", "SparseArrays", "SuiteSparse"] +git-tree-sha1 = "52962839426b75b3021296f7df242e40ecfc0852" +uuid = "dc90abb0-5640-4711-901d-7e5b23a2fada" +version = "0.1.2" + +[[deps.SpecialFunctions]] +deps = ["IrrationalConstants", "LogExpFunctions", "OpenLibm_jll", "OpenSpecFun_jll"] +git-tree-sha1 = "2f5d4697f21388cbe1ff299430dd169ef97d7e14" +uuid = "276daf66-3868-5448-9aa4-cd146d93841b" +version = "2.4.0" +weakdeps = ["ChainRulesCore"] + + [deps.SpecialFunctions.extensions] + SpecialFunctionsChainRulesCoreExt = "ChainRulesCore" + +[[deps.StableRNGs]] +deps = ["Random"] +git-tree-sha1 = "83e6cce8324d49dfaf9ef059227f91ed4441a8e5" +uuid = "860ef19b-820b-49d6-a774-d7a799459cd3" +version = "1.0.2" + +[[deps.Static]] +deps = ["CommonWorldInvalidations", "IfElse", "PrecompileTools"] +git-tree-sha1 = "87d51a3ee9a4b0d2fe054bdd3fc2436258db2603" +uuid = "aedffcd0-7271-4cad-89d0-dc628f76c6d3" +version = "1.1.1" + +[[deps.StaticArrayInterface]] +deps = ["ArrayInterface", "Compat", "IfElse", "LinearAlgebra", "PrecompileTools", "Static"] +git-tree-sha1 = "96381d50f1ce85f2663584c8e886a6ca97e60554" +uuid = "0d7ed370-da01-4f52-bd93-41d350b8b718" +version = "1.8.0" +weakdeps = ["OffsetArrays", "StaticArrays"] + + [deps.StaticArrayInterface.extensions] + StaticArrayInterfaceOffsetArraysExt = "OffsetArrays" + StaticArrayInterfaceStaticArraysExt = "StaticArrays" + +[[deps.StaticArrays]] +deps = ["LinearAlgebra", "PrecompileTools", "Random", "StaticArraysCore"] +git-tree-sha1 = "eeafab08ae20c62c44c8399ccb9354a04b80db50" +uuid = "90137ffa-7385-5640-81b9-e52037218182" +version = "1.9.7" +weakdeps = ["ChainRulesCore", "Statistics"] + + [deps.StaticArrays.extensions] + StaticArraysChainRulesCoreExt = "ChainRulesCore" + StaticArraysStatisticsExt = "Statistics" + +[[deps.StaticArraysCore]] +git-tree-sha1 = "192954ef1208c7019899fbf8049e717f92959682" +uuid = "1e83bf80-4336-4d27-bf5d-d5a4f845583c" +version = "1.4.3" + +[[deps.Statistics]] +deps = ["LinearAlgebra", "SparseArrays"] +uuid = "10745b16-79ce-11e8-11f9-7d13ad32a3b2" +version = "1.10.0" + +[[deps.StrideArraysCore]] +deps = ["ArrayInterface", "CloseOpenIntervals", "IfElse", "LayoutPointers", "LinearAlgebra", "ManualMemory", "SIMDTypes", "Static", "StaticArrayInterface", "ThreadingUtilities"] +git-tree-sha1 = "f35f6ab602df8413a50c4a25ca14de821e8605fb" +uuid = "7792a7ef-975c-4747-a70f-980b88e8d1da" +version = "0.5.7" + +[[deps.StructArrays]] +deps = ["ConstructionBase", "DataAPI", "Tables"] +git-tree-sha1 = "f4dc295e983502292c4c3f951dbb4e985e35b3be" +uuid = "09ab397b-f2b6-538f-b94a-2f83cf4a842a" +version = "0.6.18" +weakdeps = ["Adapt", "GPUArraysCore", "SparseArrays", "StaticArrays"] + + [deps.StructArrays.extensions] + StructArraysAdaptExt = "Adapt" + StructArraysGPUArraysCoreExt = "GPUArraysCore" + StructArraysSparseArraysExt = "SparseArrays" + StructArraysStaticArraysExt = "StaticArrays" + +[[deps.SuiteSparse]] +deps = ["Libdl", "LinearAlgebra", "Serialization", "SparseArrays"] +uuid = "4607b0f0-06f3-5cda-b6b1-a6196a1729e9" + +[[deps.SuiteSparse_jll]] +deps = ["Artifacts", "Libdl", "libblastrampoline_jll"] +uuid = "bea87d4a-7f5b-5778-9afe-8cc45184846c" +version = "7.2.1+1" + +[[deps.TOML]] +deps = ["Dates"] +uuid = "fa267f1f-6049-4f14-aa54-33bafae1ed76" +version = "1.0.3" + +[[deps.TableTraits]] +deps = ["IteratorInterfaceExtensions"] +git-tree-sha1 = "c06b2f539df1c6efa794486abfb6ed2022561a39" +uuid = "3783bdb8-4a98-5b6b-af9a-565f29a5fe9c" +version = "1.0.1" + +[[deps.Tables]] +deps = ["DataAPI", "DataValueInterfaces", "IteratorInterfaceExtensions", "OrderedCollections", "TableTraits"] +git-tree-sha1 = "598cd7c1f68d1e205689b1c2fe65a9f85846f297" +uuid = "bd369af6-aec1-5ad0-b16a-f7cc5008161c" +version = "1.12.0" + +[[deps.Tar]] +deps = ["ArgTools", "SHA"] +uuid = "a4e569a6-e804-4fa4-b0f3-eef7a1d5b13e" +version = "1.10.0" + +[[deps.ThreadingUtilities]] +deps = ["ManualMemory"] +git-tree-sha1 = "eda08f7e9818eb53661b3deb74e3159460dfbc27" +uuid = "8290d209-cae3-49c0-8002-c8c24d57dab5" +version = "0.5.2" + +[[deps.UUIDs]] +deps = ["Random", "SHA"] +uuid = "cf7118a7-6976-5b1a-9a39-7adc72f591a4" + +[[deps.UnPack]] +git-tree-sha1 = "387c1f73762231e86e0c9c5443ce3b4a0a9a0c2b" +uuid = "3a884ed6-31ef-47d7-9d2a-63182c4928ed" +version = "1.0.2" + +[[deps.Unicode]] +uuid = "4ec0a83e-493e-50e2-b9ac-8f72acf5a8f5" + +[[deps.UnrolledUtilities]] +git-tree-sha1 = "b73f7a7c25a2618c5052c80ed32b07e471cc6cb0" +uuid = "0fe1646c-419e-43be-ac14-22321958931b" +version = "0.1.2" + +[[deps.UnsafeAtomics]] +git-tree-sha1 = "6331ac3440856ea1988316b46045303bef658278" +uuid = "013be700-e6cd-48c3-b4a1-df204f14c38f" +version = "0.2.1" + +[[deps.UnsafeAtomicsLLVM]] +deps = ["LLVM", "UnsafeAtomics"] +git-tree-sha1 = "bf2c553f25e954a9b38c9c0593a59bb13113f9e5" +uuid = "d80eeb9a-aca5-4d75-85e5-170c8b632249" +version = "0.1.5" + +[[deps.VectorizationBase]] +deps = ["ArrayInterface", "CPUSummary", "HostCPUFeatures", "IfElse", "LayoutPointers", "Libdl", "LinearAlgebra", "SIMDTypes", "Static", "StaticArrayInterface"] +git-tree-sha1 = "e7f5b81c65eb858bed630fe006837b935518aca5" +uuid = "3d5dd08c-fd9d-11e8-17fa-ed2836048c2f" +version = "0.21.70" + +[[deps.Zlib_jll]] +deps = ["Libdl"] +uuid = "83775a58-1f1d-513f-b197-d71354ab007a" +version = "1.2.13+1" + +[[deps.Zygote]] +deps = ["AbstractFFTs", "ChainRules", "ChainRulesCore", "DiffRules", "Distributed", "FillArrays", "ForwardDiff", "GPUArrays", "GPUArraysCore", "IRTools", "InteractiveUtils", "LinearAlgebra", "LogExpFunctions", "MacroTools", "NaNMath", "PrecompileTools", "Random", "Requires", "SparseArrays", "SpecialFunctions", "Statistics", "ZygoteRules"] +git-tree-sha1 = "19c586905e78a26f7e4e97f81716057bd6b1bc54" +uuid = "e88e6eb3-aa80-5325-afca-941959d7151f" +version = "0.6.70" + + [deps.Zygote.extensions] + ZygoteColorsExt = "Colors" + ZygoteDistancesExt = "Distances" + ZygoteTrackerExt = "Tracker" + + [deps.Zygote.weakdeps] + Colors = "5ae59095-9a9b-59fe-a467-6f913c188581" + Distances = "b4f34e82-e78d-54a5-968a-f98e89d6e8f7" + Tracker = "9f7883ad-71c0-57eb-9f7f-b5c9e6d3789c" + +[[deps.ZygoteRules]] +deps = ["ChainRulesCore", "MacroTools"] +git-tree-sha1 = "27798139afc0a2afa7b1824c206d5e87ea587a00" +uuid = "700de1a5-db45-46bc-99cf-38207098b444" +version = "0.2.5" + +[[deps.libblastrampoline_jll]] +deps = ["Artifacts", "Libdl"] +uuid = "8e850b90-86db-534c-a0d3-1478176c7d93" +version = "5.8.0+1" + +[[deps.nghttp2_jll]] +deps = ["Artifacts", "Libdl"] +uuid = "8e850ede-7688-5339-a07c-302acd2aaf8d" +version = "1.52.0+1" + +[[deps.p7zip_jll]] +deps = ["Artifacts", "Libdl"] +uuid = "3f19e933-33d8-53b3-aaab-bd5110c3b7a0" +version = "17.4.0+2" diff --git a/src/impl/batchnorm.jl b/src/impl/batchnorm.jl index 9ef017e6..76a96854 100644 --- a/src/impl/batchnorm.jl +++ b/src/impl/batchnorm.jl @@ -74,138 +74,10 @@ end end function batchnorm_affine_normalize_internal!( - y::AbstractArray{yT, 3}, opmode::LoopedArrayOp, act::F, x::AbstractArray{xT, 3}, - μ::AbstractVector, σ²::AbstractVector, γ::Optional{<:AbstractVector}, - β::Optional{<:AbstractVector}, ϵ::Real, - γ′::Optional{<:AbstractVector}=nothing) where {F, xT, yT} - N = size(y, 2) - γ′ = γ′ === nothing ? - similar(x, promote_type(Utils.eltype(γ), Utils.eltype(σ²), Utils.eltype(ϵ)), N) : - γ′ - β′ = similar(x, promote_type(Utils.eltype(β), Utils.eltype(σ²), Utils.eltype(ϵ)), N) - - compute_batchnorm_scale_bias!(γ′, β′, γ, β, μ, σ², ϵ) - - if Utils.known(Traits.fuse_cpu_activation(act)) - apply_batchnorm_scale_bias_act_cpu!(y, γ′, β′, x, act) - else - apply_batchnorm_scale_bias_cpu!(y, γ′, β′, x) - activation!(y, opmode, act, y) - end - - return -end - -function compute_batchnorm_scale_bias!(γ′, β′, γ, β, μ, σ², ϵ) - if γ === nothing && β === nothing - @simd ivdep for J in indices((γ′, β′, μ, σ²)) - @fastmath @inbounds γ′[J] = inv(sqrt(σ²[J] + ϵ)) - @fastmath @inbounds β′[J] = -μ[J] * γ′[J] - end - else - @simd ivdep for J in indices((γ′, β′, γ, β, μ, σ²)) - @fastmath @inbounds γ′[J] = γ[J] / sqrt(σ²[J] + ϵ) - @fastmath @inbounds β′[J] = β[J] - μ[J] * γ′[J] - end - end -end - -function apply_batchnorm_scale_bias_act_cpu!( - y::AbstractArray{yT, 3}, γ′::AbstractVector, β′::AbstractVector, - x::AbstractArray{xT, 3}, σ::F) where {F, xT, yT} - if size(y, 1) == 1 - apply_batchnorm_scale_bias_act_2d_serial_cpu!(y, γ′, β′, x, σ) - else - apply_batchnorm_scale_bias_act_3d_threaded_cpu!(y, γ′, β′, x, σ) - end -end - -@inline function apply_batchnorm_scale_bias_act_2d_serial_cpu!( - y::AbstractArray{yT, 3}, γ′::AbstractVector, β′::AbstractVector, - x::AbstractArray{xT, 3}, σ::F) where {F, xT, yT} - for K in indices((x, y), 3) - @simd ivdep for J in indices((x, y, γ′, β′), (2, 2, 1, 1)) - @fastmath @inbounds y[1, J, K] = σ(x[1, J, K] * γ′[J] + β′[J]) - end - end -end - -@inline function apply_batchnorm_scale_bias_act_3d_threaded_cpu!( - y::AbstractArray{yT, 3}, γ′::AbstractVector, β′::AbstractVector, - x::AbstractArray{xT, 3}, σ::F) where {F, xT, yT} - @batch for K in indices((x, y), 3) - for J in indices((x, y, γ′, β′), (2, 2, 1, 1)) - @simd ivdep for I in indices((x, y), 1) - @fastmath @inbounds y[I, J, K] = σ(x[I, J, K] * γ′[J] + β′[J]) - end - end - end -end - -@inline function apply_batchnorm_scale_bias_act_3d_serial_cpu!( - y::AbstractArray{yT, 3}, γ′::AbstractVector, β′::AbstractVector, - x::AbstractArray{xT, 3}, σ::F) where {F, xT, yT} - for K in indices((x, y), 3) - for J in indices((x, y, γ′, β′), (2, 2, 1, 1)) - @simd ivdep for I in indices((x, y), 1) - @fastmath @inbounds y[I, J, K] = σ(x[I, J, K] * γ′[J] + β′[J]) - end - end - end -end - -Utils.@enzyme_reverse_alternative apply_batchnorm_scale_bias_act_3d_threaded_cpu! apply_batchnorm_scale_bias_act_3d_serial_cpu! - -function apply_batchnorm_scale_bias_cpu!(y::AbstractArray{yT, 3}, γ′::AbstractVector, - β′::AbstractVector, x::AbstractArray{xT, 3}) where {xT, yT} - if size(y, 1) == 1 - apply_batchnorm_scale_bias_2d_serial_cpu!(y, γ′, β′, x) - else - apply_batchnorm_scale_bias_3d_threaded_cpu!(y, γ′, β′, x) - end -end - -@inline function apply_batchnorm_scale_bias_2d_serial_cpu!( - y::AbstractArray{yT, 3}, γ′::AbstractVector, β′::AbstractVector, - x::AbstractArray{xT, 3}) where {xT, yT} - for K in indices((x, y), 3) - @simd ivdep for J in indices((x, y, γ′, β′), (2, 2, 1, 1)) - @fastmath @inbounds y[1, J, K] = x[1, J, K] * γ′[J] + β′[J] - end - end -end - -@inline function apply_batchnorm_scale_bias_3d_threaded_cpu!( - y::AbstractArray{yT, 3}, γ′::AbstractVector, β′::AbstractVector, - x::AbstractArray{xT, 3}) where {xT, yT} - @batch for K in indices((x, y), 3) - for J in indices((x, y, γ′, β′), (2, 2, 1, 1)) - @simd ivdep for I in indices((x, y), 1) - @fastmath @inbounds y[I, J, K] = x[I, J, K] * γ′[J] + β′[J] - end - end - end -end - -@inline function apply_batchnorm_scale_bias_3d_serial_cpu!( - y::AbstractArray{yT, 3}, γ′::AbstractVector, β′::AbstractVector, - x::AbstractArray{xT, 3}) where {xT, yT} - for K in indices((x, y), 3) - for J in indices((x, y, γ′, β′), (2, 2, 1, 1)) - @simd ivdep for I in indices((x, y), 1) - @fastmath @inbounds y[I, J, K] = x[I, J, K] * γ′[J] + β′[J] - end - end - end -end - -Utils.@enzyme_reverse_alternative apply_batchnorm_scale_bias_3d_threaded_cpu! apply_batchnorm_scale_bias_3d_serial_cpu! - -function batchnorm_affine_normalize_internal!( - y::AbstractArray{yT, 3}, ::GPUBroadcastOp, act::F, x::AbstractArray{xT, 3}, - μ::AbstractVector, σ²::AbstractVector, γ::Optional{<:AbstractVector}, - β::Optional{<:AbstractVector}, ϵ::Real, - γ′::Optional{<:AbstractVector}=nothing) where {F, xT, yT} + y::AbstractArray{yT, 3}, ::Union{GPUBroadcastOp, LoopedArrayOp}, act::F, + x::AbstractArray{xT, 3}, μ::AbstractVector, σ²::AbstractVector, + γ::Optional{<:AbstractVector}, β::Optional{<:AbstractVector}, + ϵ::Real, γ′::Optional{<:AbstractVector}=nothing) where {F, yT, xT} backend = KA.get_backend(y) Utils.run_ka_kernel( batchnorm_affine_normalize_internal_kernel!, backend, nothing, size(y), @@ -213,7 +85,7 @@ function batchnorm_affine_normalize_internal!( KA.synchronize(backend) end -@kernel cpu=false inbounds=true function batchnorm_affine_normalize_internal_kernel!( +@kernel inbounds=true function batchnorm_affine_normalize_internal_kernel!( y::AbstractArray{<:Number, 3}, @Const(γ′::Nothing), @Const(f), @Const(x), @Const(μ), @Const(σ²), @Const(γ::Nothing), @Const(β::Nothing), @Const(ϵ)) @@ -223,7 +95,7 @@ end y[i, j, k] = f(muladd(x[i, j, k], γ′′, β′)) end -@kernel cpu=false inbounds=true function batchnorm_affine_normalize_internal_kernel!( +@kernel inbounds=true function batchnorm_affine_normalize_internal_kernel!( y::AbstractArray{<:Number, 3}, γ′::AbstractVector{<:Number}, @Const(f), @Const(x), @Const(μ), @Const(σ²), @Const(γ::Nothing), @Const(β::Nothing), @Const(ϵ)) @@ -233,7 +105,7 @@ end y[i, j, k] = f(muladd(x[i, j, k], γ′[j], β′)) end -@kernel cpu=false inbounds=true function batchnorm_affine_normalize_internal_kernel!( +@kernel inbounds=true function batchnorm_affine_normalize_internal_kernel!( y::AbstractArray{<:Number, 3}, @Const(γ′::Nothing), @Const(f), @Const(x), @Const(μ), @Const(σ²), @Const(γ), @Const(β), @Const(ϵ)) @@ -243,7 +115,7 @@ end y[i, j, k] = f(muladd(x[i, j, k], γ′′, β′)) end -@kernel cpu=false inbounds=true function batchnorm_affine_normalize_internal_kernel!( +@kernel inbounds=true function batchnorm_affine_normalize_internal_kernel!( y::AbstractArray{<:Number, 3}, γ′::AbstractVector{<:Number}, @Const(f), @Const(x), @Const(μ), @Const(σ²), @Const(γ), @Const(β), @Const(ϵ)) @@ -281,107 +153,6 @@ function CRC.rrule( return z, ∇batchnorm_affine_normalize_internal end -function ∇batchnorm_affine_normalize(opmode::LoopedArrayOp, ∂y::AbstractArray{∂yT, 3}, - x::AbstractArray{xT, 3}, μ::AbstractVector, σ²::AbstractVector, - γ::Optional{<:AbstractVector}, β::Optional{<:AbstractVector}, ϵ::Real, - γ′::AbstractVector) where {∂yT, xT} - ∂x, ∂μ, ∂σ² = similar(x), similar(μ), similar(σ²) - ∂γ = γ === nothing ? nothing : similar(γ) - ∂β = β === nothing ? nothing : similar(β) - - ∇batchnorm_affine_normalize_cpu!(∂x, ∂μ, ∂σ², ∂γ, ∂β, ∂y, x, μ, σ², γ, ϵ, γ′) - - ∂γ = γ === nothing ? ∂∅ : ∂γ - ∂β = β === nothing ? ∂∅ : ∂β - - return ∂x, ∂μ, ∂σ², ∂γ, ∂β -end - -function ∇batchnorm_affine_normalize_cpu!( - ∂x::AbstractArray{∂xT, 3}, ∂μ::AbstractVector{∂μT}, - ∂σ²::AbstractVector{∂σ²T}, ::Nothing, ::Nothing, ∂y::AbstractArray{∂yT, 3}, - x::AbstractArray{xT, 3}, μ::AbstractVector, σ²::AbstractVector, ::Nothing, - ϵ::Real, γ′::AbstractVector) where {∂xT, ∂μT, ∂σ²T, ∂yT, xT} - half = eltype(∂σ²)(0.5) - - fill!(∂μ, 0) - fill!(∂σ², 0) - - if size(∂y, 1) == 1 - @fastmath @inbounds for K in indices(∂y, 3) - @simd for J in indices(∂y, 2) - idenom = γ′[J] - idenom² = idenom^2 - - xμ = x[1, J, K] - μ[J] - - ∂x[1, J, K] = ∂y[1, J, K] * idenom - ∂μ[J] -= ∂x[1, J, K] - ∂σ²[J] -= ∂x[1, J, K] * xμ * half * idenom² - end - end - else - @fastmath @inbounds for K in indices(∂y, 3), J in indices(∂y, 2) - idenom = γ′[J] - idenom² = idenom^2 - - @simd for I in indices(∂y, 1) - xμ = x[I, J, K] - μ[J] - - ∂x[I, J, K] = ∂y[I, J, K] * idenom - ∂μ[J] -= ∂x[I, J, K] - ∂σ²[J] -= ∂x[I, J, K] * xμ * half * idenom² - end - end - end -end - -function ∇batchnorm_affine_normalize_cpu!( - ∂x::AbstractArray{∂xT, 3}, ∂μ::AbstractVector{∂μT}, - ∂σ²::AbstractVector{∂σ²T}, ∂γ::AbstractVector{∂γT}, - ∂β::AbstractVector{∂βT}, ∂y::AbstractArray{∂yT, 3}, x::AbstractArray{xT, 3}, - μ::AbstractVector, σ²::AbstractVector, γ::AbstractVector, ϵ::Real, - γ′::AbstractVector) where {∂xT, ∂μT, ∂σ²T, ∂γT, ∂βT, ∂yT, xT} - half = eltype(∂σ²)(0.5) - - fill!(∂μ, 0) - fill!(∂σ², 0) - fill!(∂γ, 0) - fill!(∂β, 0) - - if size(∂y, 1) == 1 - @fastmath @inbounds for K in indices(∂y, 3) - @simd for J in indices(∂y, 2) - idenom = inv(sqrt(σ²[J] + ϵ)) - idenom² = idenom^2 - - xμ = x[1, J, K] - μ[J] - - ∂x[1, J, K] = ∂y[1, J, K] * γ′[J] - ∂μ[J] -= ∂x[1, J, K] - ∂σ²[J] -= ∂x[1, J, K] * xμ * half * idenom² - ∂γ[J] += ∂y[1, J, K] * xμ * idenom - ∂β[J] += ∂y[1, J, K] - end - end - else - @fastmath @inbounds for K in indices(∂y, 3), J in indices(∂y, 2) - idenom = inv(sqrt(σ²[J] + ϵ)) - idenom² = idenom^2 - - @simd for I in indices(∂y, 1) - xμ = x[I, J, K] - μ[J] - - ∂x[I, J, K] = ∂y[I, J, K] * γ′[J] - ∂μ[J] -= ∂x[I, J, K] - ∂σ²[J] -= ∂x[I, J, K] * xμ * half * idenom² - ∂γ[J] += ∂y[I, J, K] * xμ * idenom - ∂β[J] += ∂y[I, J, K] - end - end - end -end - function ∇batchnorm_affine_normalize( opmode::AbstractInternalArrayOpMode, ∂y::AbstractArray{∂yT, 3}, x::AbstractArray{xT, 3}, μ::AbstractVector, σ²::AbstractVector, @@ -402,7 +173,7 @@ end function ∇batchnorm_affine_normalize!( ∂x::AbstractArray{∂xT, 3}, ∂σ²::AbstractArray{∂σ²T, 3}, - ∂γ::Optional{<:AbstractArray{<:Any, 3}}, ::GPUBroadcastOp, + ∂γ::Optional{<:AbstractArray{<:Any, 3}}, ::Union{GPUBroadcastOp, LoopedArrayOp}, ∂y::AbstractArray{∂yT, 3}, x::AbstractArray{xT, 3}, μ::AbstractVector, σ²::AbstractVector, γ::Optional{<:AbstractVector}, ϵ::Real, γ′::AbstractVector) where {∂xT, ∂σ²T, ∂yT, xT} @@ -413,7 +184,7 @@ function ∇batchnorm_affine_normalize!( KA.synchronize(backend) end -@kernel cpu=false inbounds=true function ∇batchnorm_affine_normalize_kernel!( +@kernel inbounds=true function ∇batchnorm_affine_normalize_kernel!( ∂x, ∂σ², @Const(∂γ::Nothing), @Const(∂y), @Const(x), @Const(μ), @Const(σ²), @Const(ϵ), @Const(γ′)) i, j, k = @index(Global, NTuple) @@ -426,7 +197,7 @@ end ∂σ²[i, j, k] = -∂x[i, j, k] * xμ * idenom² / 2 end -@kernel cpu=false inbounds=true function ∇batchnorm_affine_normalize_kernel!( +@kernel inbounds=true function ∇batchnorm_affine_normalize_kernel!( ∂x, ∂σ², ∂γ, @Const(∂y), @Const(x), @Const(μ), @Const(σ²), @Const(ϵ), @Const(γ′)) i, j, k = @index(Global, NTuple) diff --git a/src/impl/groupnorm.jl b/src/impl/groupnorm.jl index b736aa8b..71d75cb3 100644 --- a/src/impl/groupnorm.jl +++ b/src/impl/groupnorm.jl @@ -66,150 +66,10 @@ end end function groupnorm_affine_normalize_internal!( - y::AbstractArray{yT, 4}, opmode::LoopedArrayOp, act::F, x::AbstractArray{xT, 4}, - μ::AbstractArray{μT, 4}, σ²::AbstractArray{σ²T, 4}, - γ::Optional{<:AbstractArray{<:Any, 4}}, β::Optional{<:AbstractArray{<:Any, 4}}, - ϵ::Real) where {F, xT, yT, μT, σ²T} - if Utils.known(Traits.fuse_cpu_activation(act)) - groupnorm_affine_normalize_act_cpu!(y, x, μ, σ², γ, β, ϵ, act) - else - groupnorm_affine_normalize_cpu!(y, x, μ, σ², γ, β, ϵ) - activation!(y, opmode, act, y) - end - return -end - -function groupnorm_affine_normalize_act_cpu!( - y::AbstractArray{yT, 4}, x::AbstractArray{xT, 4}, μ::AbstractArray{μT, 4}, - σ²::AbstractArray{σ²T, 4}, γ::Optional{<:AbstractArray{<:Any, 4}}, - β::Optional{<:AbstractArray{<:Any, 4}}, ϵ::Real, act::F) where {F, xT, yT, μT, σ²T} - if size(y, 1) == 1 - groupnorm_affine_normalize_act_3d_serial_cpu!(y, x, μ, σ², γ, β, ϵ, act) - else - groupnorm_affine_normalize_act_4d_serial_cpu!(y, x, μ, σ², γ, β, ϵ, act) - end -end - -function groupnorm_affine_normalize_act_3d_serial_cpu!( - y::AbstractArray{yT, 4}, x::AbstractArray{xT, 4}, μ::AbstractArray{μT, 4}, - σ²::AbstractArray{σ²T, 4}, γ::Optional{<:AbstractArray{<:Any, 4}}, - β::Optional{<:AbstractArray{<:Any, 4}}, ϵ::Real, σ::F) where {F, xT, yT, μT, σ²T} - if γ === nothing && β === nothing - @fastmath @inbounds for L in indices(y, 4), K in indices(y, 3) - γ′ = inv(sqrt(σ²[1, 1, K, L] + ϵ)) - β′ = -μ[1, 1, K, L] * γ′ - @simd ivdep for J in indices(y, 2) - y[1, J, K, L] = σ(x[1, J, K, L] * γ′ + β′) - end - end - else - @fastmath @inbounds for L in indices(y, 4), K in indices(y, 3) - idenom = inv(sqrt(σ²[1, 1, K, L] + ϵ)) - @simd for J in indices(y, 2) - γ′ = γ[1, J, K, 1] * idenom - β′ = β[1, J, K, 1] - μ[1, 1, K, L] * γ′ - y[1, J, K, L] = σ(x[1, J, K, L] * γ′ + β′) - end - end - end -end - -function groupnorm_affine_normalize_act_4d_serial_cpu!( - y::AbstractArray{yT, 4}, x::AbstractArray{xT, 4}, μ::AbstractArray{μT, 4}, - σ²::AbstractArray{σ²T, 4}, γ::Optional{<:AbstractArray{<:Any, 4}}, - β::Optional{<:AbstractArray{<:Any, 4}}, ϵ::Real, σ::F) where {F, xT, yT, μT, σ²T} - if γ === nothing && β === nothing - @fastmath @inbounds for L in indices(y, 4), K in indices(y, 3) - γ′ = inv(sqrt(σ²[1, 1, K, L] + ϵ)) - β′ = -μ[1, 1, K, L] * γ′ - for J in indices(y, 2) - @simd ivdep for I in indices(y, 1) - y[I, J, K, L] = σ(x[I, J, K, L] * γ′ + β′) - end - end - end - else - @fastmath @inbounds for L in indices(y, 4), K in indices(y, 3) - idenom = inv(sqrt(σ²[1, 1, K, L] + ϵ)) - for J in indices(y, 2) - γ′ = γ[1, J, K, 1] * idenom - β′ = β[1, J, K, 1] - μ[1, 1, K, L] * γ′ - @simd ivdep for I in indices(y, 1) - y[I, J, K, L] = σ(x[I, J, K, L] * γ′ + β′) - end - end - end - end -end - -function groupnorm_affine_normalize_cpu!( - y::AbstractArray{yT, 4}, x::AbstractArray{xT, 4}, μ::AbstractArray{μT, 4}, - σ²::AbstractArray{σ²T, 4}, γ::Optional{<:AbstractArray{<:Any, 4}}, - β::Optional{<:AbstractArray{<:Any, 4}}, ϵ::Real) where {xT, yT, μT, σ²T} - if size(y, 1) == 1 - groupnorm_affine_normalize_3d_serial_cpu!(y, x, μ, σ², γ, β, ϵ) - else - groupnorm_affine_normalize_4d_serial_cpu!(y, x, μ, σ², γ, β, ϵ) - end -end - -@inline function groupnorm_affine_normalize_3d_serial_cpu!( - y::AbstractArray{yT, 4}, x::AbstractArray{xT, 4}, μ::AbstractArray{μT, 4}, - σ²::AbstractArray{σ²T, 4}, γ::Optional{<:AbstractArray{<:Any, 4}}, - β::Optional{<:AbstractArray{<:Any, 4}}, ϵ::Real) where {xT, yT, μT, σ²T} - if γ === nothing && β === nothing - @fastmath @inbounds for L in indices(y, 4), K in indices(y, 3) - γ′ = inv(sqrt(σ²[1, 1, K, L] + ϵ)) - β′ = -μ[1, 1, K, L] * γ′ - @simd ivdep for J in indices(y, 2) - y[1, J, K, L] = x[1, J, K, L] * γ′ + β′ - end - end - else - @fastmath @inbounds for L in indices(y, 4), K in indices(y, 3) - idenom = inv(sqrt(σ²[1, 1, K, L] + ϵ)) - @simd for J in indices(y, 2) - γ′ = γ[1, J, K, 1] * idenom - β′ = β[1, J, K, 1] - μ[1, 1, K, L] * γ′ - y[1, J, K, L] = x[1, J, K, L] * γ′ + β′ - end - end - end -end - -@inline function groupnorm_affine_normalize_4d_serial_cpu!( - y::AbstractArray{yT, 4}, x::AbstractArray{xT, 4}, μ::AbstractArray{μT, 4}, - σ²::AbstractArray{σ²T, 4}, γ::Optional{<:AbstractArray{<:Any, 4}}, - β::Optional{<:AbstractArray{<:Any, 4}}, ϵ::Real) where {xT, yT, μT, σ²T} - if γ === nothing && β === nothing - @fastmath @inbounds for L in indices(y, 4), K in indices(y, 3) - γ′ = inv(sqrt(σ²[1, 1, K, L] + ϵ)) - β′ = -μ[1, 1, K, L] * γ′ - for J in indices(y, 2) - @simd ivdep for I in indices(y, 1) - y[I, J, K, L] = x[I, J, K, L] * γ′ + β′ - end - end - end - else - @fastmath @inbounds for L in indices(y, 4), K in indices(y, 3) - idenom = inv(sqrt(σ²[1, 1, K, L] + ϵ)) - for J in indices(y, 2) - γ′ = γ[1, J, K, 1] * idenom - β′ = β[1, J, K, 1] - μ[1, 1, K, L] * γ′ - @simd ivdep for I in indices(y, 1) - y[I, J, K, L] = x[I, J, K, L] * γ′ + β′ - end - end - end - end -end - -function groupnorm_affine_normalize_internal!( - y::AbstractArray{yT, 4}, ::GPUBroadcastOp, act::F, x::AbstractArray{xT, 4}, - μ::AbstractArray{μT, 4}, σ²::AbstractArray{σ²T, 4}, + y::AbstractArray{yT, 4}, ::Union{GPUBroadcastOp, LoopedArrayOp}, act::F, + x::AbstractArray{xT, 4}, μ::AbstractArray{μT, 4}, σ²::AbstractArray{σ²T, 4}, γ::Optional{<:AbstractArray{<:Any, 4}}, β::Optional{<:AbstractArray{<:Any, 4}}, - ϵ::Real) where {F, xT, yT, μT, σ²T} + ϵ::Real) where {F, yT, xT, μT, σ²T} backend = KA.get_backend(y) Utils.run_ka_kernel( groupnorm_affine_normalize_kernel!, backend, nothing, size(y), @@ -217,7 +77,7 @@ function groupnorm_affine_normalize_internal!( KA.synchronize(backend) end -@kernel cpu=false inbounds=true function groupnorm_affine_normalize_kernel!( +@kernel inbounds=true function groupnorm_affine_normalize_kernel!( y::AbstractArray{<:Number, 4}, @Const(f), @Const(x), @Const(μ), @Const(σ²), @Const(γ::Nothing), @Const(β::Nothing), @Const(ϵ)) i, j, k, l = @index(Global, NTuple) @@ -226,7 +86,7 @@ end y[i, j, k, l] = f(muladd(x[i, j, k, l], γ′, β′)) end -@kernel cpu=false inbounds=true function groupnorm_affine_normalize_kernel!( +@kernel inbounds=true function groupnorm_affine_normalize_kernel!( y::AbstractArray{<:Number, 4}, @Const(f), @Const(x), @Const(μ), @Const(σ²), @Const(γ), @Const(β), @Const(ϵ)) i, j, k, l = @index(Global, NTuple) @@ -279,117 +139,9 @@ function ∇groupnorm_affine_normalize( return ∂x, ∂μ, ∂σ², ∂γ, ∂β end -function ∇groupnorm_affine_normalize(::LoopedArrayOp, ∂y::AbstractArray{∂yT, 4}, - x::AbstractArray{xT, 4}, μ::AbstractArray{μT, 4}, σ²::AbstractArray{σ²T, 4}, - γ::Optional{<:AbstractArray{<:Any, 4}}, β::Optional{<:AbstractArray{<:Any, 4}}, - ϵ::Real) where {∂yT, xT, μT, σ²T} - ∂x, ∂μ, ∂σ² = similar(x), similar(μ), similar(σ²) - ∂γ = γ === nothing ? nothing : similar(γ) - ∂β = β === nothing ? nothing : similar(β) - - ∇groupnorm_affine_normalize_cpu!(∂x, ∂μ, ∂σ², ∂γ, ∂β, ∂y, x, μ, σ², γ, ϵ) - - ∂γ = γ === nothing ? ∂∅ : ∂γ - ∂β = β === nothing ? ∂∅ : ∂β - - return ∂x, ∂μ, ∂σ², ∂γ, ∂β -end - -function ∇groupnorm_affine_normalize_cpu!( - ∂x::AbstractArray{∂xT, 4}, ∂μ::AbstractArray{∂μT, 4}, ∂σ²::AbstractArray{∂σ²T, 4}, - ::Nothing, ::Nothing, ∂y::AbstractArray{∂yT, 4}, x::AbstractArray{xT, 4}, - μ::AbstractArray{μT, 4}, σ²::AbstractArray{σ²T, 4}, ::Nothing, - ϵ::Real) where {∂xT, ∂μT, ∂σ²T, ∂yT, xT, μT, σ²T} - half = eltype(∂σ²)(0.5) - - fill!(∂μ, 0) - fill!(∂σ², 0) - - if size(∂y, 1) == 1 - @fastmath @inbounds for L in indices(∂y, 4), K in indices(∂y, 3) - idenom = inv(sqrt(σ²[1, 1, K, L] + ϵ)) - idenom² = idenom^2 - - @simd for J in indices(∂y, 2) - xμ = x[1, J, K, L] - μ[1, 1, K, L] - - ∂x[1, J, K, L] = ∂y[1, J, K, L] * idenom - ∂μ[1, 1, K, L] -= ∂x[1, J, K, L] - ∂σ²[1, 1, K, L] -= ∂x[1, J, K, L] * xμ * half * idenom² - end - end - else - @fastmath @inbounds for L in indices(∂y, 4), K in indices(∂y, 3) - idenom = inv(sqrt(σ²[1, 1, K, L] + ϵ)) - idenom² = idenom^2 - - for J in indices(∂y, 2) - @simd for I in indices(∂y, 1) - xμ = x[I, J, K, L] - μ[1, 1, K, L] - - ∂x[I, J, K, L] = ∂y[I, J, K, L] * idenom - ∂μ[1, 1, K, L] -= ∂x[I, J, K, L] - ∂σ²[1, 1, K, L] -= ∂x[I, J, K, L] * xμ * half * idenom² - end - end - end - end -end - -function ∇groupnorm_affine_normalize_cpu!( - ∂x::AbstractArray{∂xT, 4}, ∂μ::AbstractArray{∂μT, 4}, ∂σ²::AbstractArray{∂σ²T, 4}, - ∂γ::AbstractArray{∂γT, 4}, ∂β::AbstractArray{∂βT, 4}, ∂y::AbstractArray{∂yT, 4}, - x::AbstractArray{xT, 4}, μ::AbstractArray{μT, 4}, σ²::AbstractArray{σ²T, 4}, - γ::AbstractArray{γT, 4}, - ϵ::Real) where {∂xT, ∂μT, ∂σ²T, ∂γT, ∂βT, ∂yT, xT, μT, σ²T, γT} - half = eltype(∂σ²)(0.5) - - fill!(∂μ, 0) - fill!(∂σ², 0) - fill!(∂γ, 0) - fill!(∂β, 0) - - if size(∂y, 1) == 1 - @fastmath @inbounds for L in indices(∂y, 4), K in indices(∂y, 3) - idenom = inv(sqrt(σ²[1, 1, K, L] + ϵ)) - idenom² = idenom^2 - - @simd for J in indices(∂y, 2) - γ′ = γ[1, J, K, 1] * idenom - - xμ = x[1, J, K, L] - μ[1, 1, K, L] - - ∂x[1, J, K, L] = ∂y[1, J, K, L] * γ′ - ∂μ[1, 1, K, L] -= ∂x[1, J, K, L] - ∂σ²[1, 1, K, L] -= ∂x[1, J, K, L] * xμ * half * idenom² - ∂γ[1, J, K, 1] += ∂y[1, J, K, L] * xμ * idenom - ∂β[1, J, K, 1] += ∂y[1, J, K, L] - end - end - else - @fastmath @inbounds for L in indices(∂y, 4), K in indices(∂y, 3) - idenom = inv(sqrt(σ²[1, 1, K, L] + ϵ)) - idenom² = idenom^2 - - for J in indices(∂y, 2) - γ′ = γ[1, J, K, 1] * idenom - @simd for I in indices(∂y, 1) - xμ = x[I, J, K, L] - μ[1, 1, K, L] - - ∂x[I, J, K, L] = ∂y[I, J, K, L] * γ′ - ∂μ[1, 1, K, L] -= ∂x[I, J, K, L] - ∂σ²[1, 1, K, L] -= ∂x[I, J, K, L] * xμ * half * idenom² - ∂γ[1, J, K, 1] += ∂y[I, J, K, L] * xμ * idenom - ∂β[1, J, K, 1] += ∂y[I, J, K, L] - end - end - end - end -end - function ∇groupnorm_affine_normalize!( ∂x::AbstractArray{∂xT, 4}, ∂σ²::AbstractArray{∂σ²T, 4}, - ∂γ::Optional{<:AbstractArray{<:Any, 4}}, ::GPUBroadcastOp, + ∂γ::Optional{<:AbstractArray{<:Any, 4}}, ::Union{GPUBroadcastOp, LoopedArrayOp}, ∂y::AbstractArray{∂yT, 4}, x::AbstractArray{xT, 4}, μ::AbstractArray{μT, 4}, σ²::AbstractArray{σ²T, 4}, γ::Optional{<:AbstractArray{<:Any, 4}}, ϵ::Real) where {∂xT, ∂σ²T, ∂yT, xT, μT, σ²T} @@ -400,7 +152,7 @@ function ∇groupnorm_affine_normalize!( KA.synchronize(backend) end -@kernel cpu=false inbounds=true function ∇groupnorm_affine_normalize_kernel!( +@kernel inbounds=true function ∇groupnorm_affine_normalize_kernel!( ∂x, ∂σ², @Const(∂γ::Nothing), @Const(∂y), @Const(x), @Const(μ), @Const(σ²), @Const(ϵ), @Const(γ::Nothing)) i, j, k, l = @index(Global, NTuple) @@ -410,7 +162,7 @@ end ∂σ²[i, j, k, l] = ∂x[i, j, k, l] * (μ[1, 1, k, l] - x[i, j, k, l]) * idenom * idenom / 2 end -@kernel cpu=false inbounds=true function ∇groupnorm_affine_normalize_kernel!( +@kernel inbounds=true function ∇groupnorm_affine_normalize_kernel!( ∂x, ∂σ², ∂γ, @Const(∂y), @Const(x), @Const(μ), @Const(σ²), @Const(ϵ), @Const(γ)) i, j, k, l = @index(Global, NTuple) diff --git a/src/impl/normalization.jl b/src/impl/normalization.jl index 0e7ef4c6..b3b406fa 100644 --- a/src/impl/normalization.jl +++ b/src/impl/normalization.jl @@ -35,21 +35,8 @@ end CRC.@non_differentiable update_running_statistics(::Any...) -function update_running_statistics!(rμₙ, rσ²ₙ, ::LoopedArrayOp, rμ, rσ², μ, σ², m₁, m₂, m₃) - update_running_statistics_simd_loop!( - rμₙ, rσ²ₙ, LoopedArrayOp(), rμ, rσ², μ, σ², m₁, m₂, m₃) - return -end - -function update_running_statistics_simd_loop!( - rμₙ, rσ²ₙ, ::LoopedArrayOp, rμ, rσ², μ, σ², m₁, m₂, m₃) - @simd ivdep for I in indices((rμₙ, rσ²ₙ)) - rμₙ[I] = m₃ * rμ[I] + m₁ * μ[I] - rσ²ₙ[I] = m₃ * rσ²[I] + m₂ * σ²[I] - end -end - -function update_running_statistics!(rμₙ, rσ²ₙ, ::GPUBroadcastOp, rμ, rσ², μ, σ², m₁, m₂, m₃) +function update_running_statistics!( + rμₙ, rσ²ₙ, ::Union{LoopedArrayOp, GPUBroadcastOp}, rμ, rσ², μ, σ², m₁, m₂, m₃) backend = KA.get_backend(rμₙ) Utils.run_ka_kernel( update_running_statistics_kernel!, backend, nothing, size(rμₙ), @@ -58,7 +45,7 @@ function update_running_statistics!(rμₙ, rσ²ₙ, ::GPUBroadcastOp, rμ, rσ return end -@kernel cpu=false inbounds=true function update_running_statistics_kernel!( +@kernel inbounds=true function update_running_statistics_kernel!( rμₙ, rσ²ₙ, @Const(rμ), @Const(rσ²), @Const(μ), @Const(σ²), @Const(m₁), @Const(m₂), @Const(m₃)) I = @index(Global)