From 4992c1dcb7f339d405549132deb10b69a66355cd Mon Sep 17 00:00:00 2001 From: github-action-benchmark Date: Sat, 2 Nov 2024 21:32:31 +0000 Subject: [PATCH] add Lux Benchmarks (julia) benchmark result for 699c8d8fd1112cb1d7801f7073c85c51c96cac1d --- benchmarks/data.js | 2402 +++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 2401 insertions(+), 1 deletion(-) diff --git a/benchmarks/data.js b/benchmarks/data.js index 1ca218fc19..b992d990c4 100644 --- a/benchmarks/data.js +++ b/benchmarks/data.js @@ -1,5 +1,5 @@ window.BENCHMARK_DATA = { - "lastUpdate": 1730265968818, + "lastUpdate": 1730583151465, "repoUrl": "https://github.com/LuxDL/Lux.jl", "entries": { "Lux Benchmarks": [ @@ -139202,6 +139202,2406 @@ window.BENCHMARK_DATA = { "extra": "gctime=131571042\nmemory=206400912\nallocs=130\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" } ] + }, + { + "commit": { + "author": { + "email": "martinuzzi.francesco@gmail.com", + "name": "Francesco Martinuzzi", + "username": "MartinuzziFrancesco" + }, + "committer": { + "email": "noreply@github.com", + "name": "GitHub", + "username": "web-flow" + }, + "distinct": true, + "id": "699c8d8fd1112cb1d7801f7073c85c51c96cac1d", + "message": "docs: fix broken link in Recurrence docs (#1001)", + "timestamp": "2024-11-02T16:20:42-04:00", + "tree_id": "6d8c27531ab5cc4c1f8559d225abd0fbb9fdfcdd", + "url": "https://github.com/LuxDL/Lux.jl/commit/699c8d8fd1112cb1d7801f7073c85c51c96cac1d" + }, + "date": 1730583131260, + "tool": "julia", + "benches": [ + { + "name": "Dense(512 => 512, identity)(512 x 128)/forward/CPU/2 thread(s)", + "value": 411375, + "unit": "ns", + "extra": "gctime=0\nmemory=262192\nallocs=2\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "Dense(512 => 512, identity)(512 x 128)/forward/CPU/4 thread(s)", + "value": 323084, + "unit": "ns", + "extra": "gctime=0\nmemory=262192\nallocs=2\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "Dense(512 => 512, identity)(512 x 128)/forward/CPU/8 thread(s)", + "value": 241750, + "unit": "ns", + "extra": "gctime=0\nmemory=262192\nallocs=2\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "Dense(512 => 512, identity)(512 x 128)/forward/CPU/1 thread(s)", + "value": 742584, + "unit": "ns", + "extra": "gctime=0\nmemory=262192\nallocs=2\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "Dense(512 => 512, identity)(512 x 128)/forward/GPU/CUDA", + "value": 43670.5, + "unit": "ns", + "extra": "gctime=0\nmemory=1056\nallocs=47\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "Dense(512 => 512, identity)(512 x 128)/zygote/CPU/2 thread(s)", + "value": 638083, + "unit": "ns", + "extra": "gctime=0\nmemory=1837696\nallocs=14\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "Dense(512 => 512, identity)(512 x 128)/zygote/CPU/4 thread(s)", + "value": 521459, + "unit": "ns", + "extra": "gctime=0\nmemory=1837696\nallocs=14\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "Dense(512 => 512, identity)(512 x 128)/zygote/CPU/8 thread(s)", + "value": 403792, + "unit": "ns", + "extra": "gctime=0\nmemory=1837696\nallocs=14\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "Dense(512 => 512, identity)(512 x 128)/zygote/CPU/1 thread(s)", + "value": 908000, + "unit": "ns", + "extra": "gctime=0\nmemory=1837696\nallocs=14\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "Dense(512 => 512, identity)(512 x 128)/zygote/GPU/CUDA", + "value": 188991, + "unit": "ns", + "extra": "gctime=0\nmemory=11536\nallocs=441\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "Dense(512 => 512, identity)(512 x 128)/enzyme/CPU/2 thread(s)", + "value": 744083.5, + "unit": "ns", + "extra": "gctime=0\nmemory=1575600\nallocs=17\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "Dense(512 => 512, identity)(512 x 128)/enzyme/CPU/4 thread(s)", + "value": 624667, + "unit": "ns", + "extra": "gctime=0\nmemory=1575616\nallocs=18\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "Dense(512 => 512, identity)(512 x 128)/enzyme/CPU/8 thread(s)", + "value": 521562.5, + "unit": "ns", + "extra": "gctime=0\nmemory=1575632\nallocs=19\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "Dense(512 => 512, identity)(512 x 128)/enzyme/CPU/1 thread(s)", + "value": 1006750, + "unit": "ns", + "extra": "gctime=0\nmemory=1575584\nallocs=16\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "Conv((3, 3), 2 => 2, identity)(64 x 64 x 2 x 128)/forward/CPU/2 thread(s)", + "value": 1618667, + "unit": "ns", + "extra": "gctime=0\nmemory=4494384\nallocs=49\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "Conv((3, 3), 2 => 2, identity)(64 x 64 x 2 x 128)/forward/CPU/4 thread(s)", + "value": 1189854.5, + "unit": "ns", + "extra": "gctime=0\nmemory=5326672\nallocs=59\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "Conv((3, 3), 2 => 2, identity)(64 x 64 x 2 x 128)/forward/CPU/8 thread(s)", + "value": 1358375, + "unit": "ns", + "extra": "gctime=0\nmemory=6991120\nallocs=79\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "Conv((3, 3), 2 => 2, identity)(64 x 64 x 2 x 128)/forward/CPU/1 thread(s)", + "value": 2360458, + "unit": "ns", + "extra": "gctime=0\nmemory=4216672\nallocs=44\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "Conv((3, 3), 2 => 2, identity)(64 x 64 x 2 x 128)/forward/GPU/CUDA", + "value": 211422.5, + "unit": "ns", + "extra": "gctime=0\nmemory=6592\nallocs=197\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "Conv((3, 3), 2 => 2, identity)(64 x 64 x 2 x 128)/zygote/CPU/2 thread(s)", + "value": 12284958.5, + "unit": "ns", + "extra": "gctime=0\nmemory=13466816\nallocs=157\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "Conv((3, 3), 2 => 2, identity)(64 x 64 x 2 x 128)/zygote/CPU/4 thread(s)", + "value": 9550979.5, + "unit": "ns", + "extra": "gctime=0\nmemory=15131392\nallocs=177\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "Conv((3, 3), 2 => 2, identity)(64 x 64 x 2 x 128)/zygote/CPU/8 thread(s)", + "value": 9390791, + "unit": "ns", + "extra": "gctime=0\nmemory=18460288\nallocs=217\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "Conv((3, 3), 2 => 2, identity)(64 x 64 x 2 x 128)/zygote/CPU/1 thread(s)", + "value": 18060041.5, + "unit": "ns", + "extra": "gctime=0\nmemory=12911392\nallocs=147\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "Conv((3, 3), 2 => 2, identity)(64 x 64 x 2 x 128)/zygote/GPU/CUDA", + "value": 1906624.5, + "unit": "ns", + "extra": "gctime=0\nmemory=28400\nallocs=833\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "Conv((3, 3), 2 => 2, identity)(64 x 64 x 2 x 128)/enzyme/CPU/2 thread(s)", + "value": 17280916, + "unit": "ns", + "extra": "gctime=0\nmemory=13463104\nallocs=139\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "Conv((3, 3), 2 => 2, identity)(64 x 64 x 2 x 128)/enzyme/CPU/4 thread(s)", + "value": 14329167, + "unit": "ns", + "extra": "gctime=0\nmemory=15127680\nallocs=159\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "Conv((3, 3), 2 => 2, identity)(64 x 64 x 2 x 128)/enzyme/CPU/8 thread(s)", + "value": 14463083, + "unit": "ns", + "extra": "gctime=0\nmemory=18456576\nallocs=199\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "Conv((3, 3), 2 => 2, identity)(64 x 64 x 2 x 128)/enzyme/CPU/1 thread(s)", + "value": 21088375, + "unit": "ns", + "extra": "gctime=0\nmemory=12907680\nallocs=129\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "Conv((3, 3), 64 => 64, relu)(64 x 64 x 64 x 128)/forward/CPU/2 thread(s)", + "value": 121038500, + "unit": "ns", + "extra": "gctime=0\nmemory=143677872\nallocs=49\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "Conv((3, 3), 64 => 64, relu)(64 x 64 x 64 x 128)/forward/CPU/4 thread(s)", + "value": 174268209, + "unit": "ns", + "extra": "gctime=0\nmemory=170249552\nallocs=59\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "Conv((3, 3), 64 => 64, relu)(64 x 64 x 64 x 128)/forward/CPU/8 thread(s)", + "value": 155647417, + "unit": "ns", + "extra": "gctime=0\nmemory=223392912\nallocs=79\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "Conv((3, 3), 64 => 64, relu)(64 x 64 x 64 x 128)/forward/CPU/1 thread(s)", + "value": 103289458, + "unit": "ns", + "extra": "gctime=251541\nmemory=134820320\nallocs=44\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "Conv((3, 3), 64 => 64, relu)(64 x 64 x 64 x 128)/forward/GPU/CUDA", + "value": 5459016, + "unit": "ns", + "extra": "gctime=0\nmemory=6736\nallocs=206\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "Conv((3, 3), 64 => 64, relu)(64 x 64 x 64 x 128)/zygote/CPU/2 thread(s)", + "value": 592681937.5, + "unit": "ns", + "extra": "gctime=112354083.5\nmemory=556544784\nallocs=161\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "Conv((3, 3), 64 => 64, relu)(64 x 64 x 64 x 128)/zygote/CPU/4 thread(s)", + "value": 540116125, + "unit": "ns", + "extra": "gctime=64682000\nmemory=609688144\nallocs=181\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "Conv((3, 3), 64 => 64, relu)(64 x 64 x 64 x 128)/zygote/CPU/8 thread(s)", + "value": 460022146, + "unit": "ns", + "extra": "gctime=45921458.5\nmemory=715974864\nallocs=221\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "Conv((3, 3), 64 => 64, relu)(64 x 64 x 64 x 128)/zygote/CPU/1 thread(s)", + "value": 623412250, + "unit": "ns", + "extra": "gctime=3029166\nmemory=538829680\nallocs=151\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "Conv((3, 3), 64 => 64, relu)(64 x 64 x 64 x 128)/zygote/GPU/CUDA", + "value": 38146652, + "unit": "ns", + "extra": "gctime=0\nmemory=28408\nallocs=850\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "Conv((3, 3), 64 => 64, relu)(64 x 64 x 64 x 128)/enzyme/CPU/2 thread(s)", + "value": 751859749.5, + "unit": "ns", + "extra": "gctime=109686312.5\nmemory=430580528\nallocs=140\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "Conv((3, 3), 64 => 64, relu)(64 x 64 x 64 x 128)/enzyme/CPU/4 thread(s)", + "value": 667614542, + "unit": "ns", + "extra": "gctime=16242667\nmemory=483723888\nallocs=160\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "Conv((3, 3), 64 => 64, relu)(64 x 64 x 64 x 128)/enzyme/CPU/8 thread(s)", + "value": 606980437.5, + "unit": "ns", + "extra": "gctime=27235875\nmemory=590010608\nallocs=200\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "Conv((3, 3), 64 => 64, relu)(64 x 64 x 64 x 128)/enzyme/CPU/1 thread(s)", + "value": 744028250, + "unit": "ns", + "extra": "gctime=3324791\nmemory=412865424\nallocs=130\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "lenet(28, 28, 1, 32)/forward/CPU/2 thread(s)", + "value": 861145.5, + "unit": "ns", + "extra": "gctime=0\nmemory=947952\nallocs=139\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "lenet(28, 28, 1, 32)/forward/CPU/4 thread(s)", + "value": 826334, + "unit": "ns", + "extra": "gctime=0\nmemory=1239856\nallocs=159\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "lenet(28, 28, 1, 32)/forward/CPU/8 thread(s)", + "value": 1164604.5, + "unit": "ns", + "extra": "gctime=0\nmemory=1823664\nallocs=199\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "lenet(28, 28, 1, 32)/forward/CPU/1 thread(s)", + "value": 959395.5, + "unit": "ns", + "extra": "gctime=0\nmemory=850000\nallocs=129\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "lenet(28, 28, 1, 32)/forward/GPU/CUDA", + "value": 263975.5, + "unit": "ns", + "extra": "gctime=0\nmemory=20704\nallocs=674\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "lenet(28, 28, 1, 32)/zygote/CPU/2 thread(s)", + "value": 2730708, + "unit": "ns", + "extra": "gctime=0\nmemory=2951280\nallocs=548\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "lenet(28, 28, 1, 32)/zygote/CPU/4 thread(s)", + "value": 2455708.5, + "unit": "ns", + "extra": "gctime=0\nmemory=3535088\nallocs=588\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "lenet(28, 28, 1, 32)/zygote/CPU/8 thread(s)", + "value": 3317604.5, + "unit": "ns", + "extra": "gctime=0\nmemory=4702704\nallocs=668\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "lenet(28, 28, 1, 32)/zygote/CPU/1 thread(s)", + "value": 3286521.5, + "unit": "ns", + "extra": "gctime=0\nmemory=2755376\nallocs=528\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "lenet(28, 28, 1, 32)/zygote/GPU/CUDA", + "value": 1038213, + "unit": "ns", + "extra": "gctime=0\nmemory=140432\nallocs=2599\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "Conv((3, 3), 2 => 2, gelu)(64 x 64 x 2 x 128)/forward/CPU/2 thread(s)", + "value": 6779291.5, + "unit": "ns", + "extra": "gctime=0\nmemory=4494384\nallocs=49\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "Conv((3, 3), 2 => 2, gelu)(64 x 64 x 2 x 128)/forward/CPU/4 thread(s)", + "value": 6365500, + "unit": "ns", + "extra": "gctime=0\nmemory=5326672\nallocs=59\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "Conv((3, 3), 2 => 2, gelu)(64 x 64 x 2 x 128)/forward/CPU/8 thread(s)", + "value": 6531583, + "unit": "ns", + "extra": "gctime=0\nmemory=6991120\nallocs=79\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "Conv((3, 3), 2 => 2, gelu)(64 x 64 x 2 x 128)/forward/CPU/1 thread(s)", + "value": 7635875, + "unit": "ns", + "extra": "gctime=0\nmemory=4216672\nallocs=44\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "Conv((3, 3), 2 => 2, gelu)(64 x 64 x 2 x 128)/forward/GPU/CUDA", + "value": 210025, + "unit": "ns", + "extra": "gctime=0\nmemory=6112\nallocs=208\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "Conv((3, 3), 2 => 2, gelu)(64 x 64 x 2 x 128)/zygote/CPU/2 thread(s)", + "value": 24055375, + "unit": "ns", + "extra": "gctime=0\nmemory=21335088\nallocs=135\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "Conv((3, 3), 2 => 2, gelu)(64 x 64 x 2 x 128)/zygote/CPU/4 thread(s)", + "value": 21237625, + "unit": "ns", + "extra": "gctime=0\nmemory=22999664\nallocs=155\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "Conv((3, 3), 2 => 2, gelu)(64 x 64 x 2 x 128)/zygote/CPU/8 thread(s)", + "value": 21535792, + "unit": "ns", + "extra": "gctime=355791\nmemory=26328560\nallocs=195\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "Conv((3, 3), 2 => 2, gelu)(64 x 64 x 2 x 128)/zygote/CPU/1 thread(s)", + "value": 29721771, + "unit": "ns", + "extra": "gctime=0\nmemory=20779664\nallocs=125\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "Conv((3, 3), 2 => 2, gelu)(64 x 64 x 2 x 128)/zygote/GPU/CUDA", + "value": 1973993, + "unit": "ns", + "extra": "gctime=0\nmemory=30824\nallocs=1000\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "Conv((3, 3), 2 => 2, gelu)(64 x 64 x 2 x 128)/enzyme/CPU/2 thread(s)", + "value": 37426416, + "unit": "ns", + "extra": "gctime=0\nmemory=13463120\nallocs=139\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "Conv((3, 3), 2 => 2, gelu)(64 x 64 x 2 x 128)/enzyme/CPU/4 thread(s)", + "value": 34385895.5, + "unit": "ns", + "extra": "gctime=0\nmemory=15127696\nallocs=159\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "Conv((3, 3), 2 => 2, gelu)(64 x 64 x 2 x 128)/enzyme/CPU/8 thread(s)", + "value": 45888792, + "unit": "ns", + "extra": "gctime=0\nmemory=18456592\nallocs=199\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "Conv((3, 3), 2 => 2, gelu)(64 x 64 x 2 x 128)/enzyme/CPU/1 thread(s)", + "value": 49367041.5, + "unit": "ns", + "extra": "gctime=0\nmemory=12907696\nallocs=129\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "Conv((3, 3), 4 => 4, gelu)(64 x 64 x 4 x 128)/forward/CPU/2 thread(s)", + "value": 13355875, + "unit": "ns", + "extra": "gctime=0\nmemory=8984112\nallocs=49\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "Conv((3, 3), 4 => 4, gelu)(64 x 64 x 4 x 128)/forward/CPU/4 thread(s)", + "value": 12430958.5, + "unit": "ns", + "extra": "gctime=0\nmemory=10646736\nallocs=59\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "Conv((3, 3), 4 => 4, gelu)(64 x 64 x 4 x 128)/forward/CPU/8 thread(s)", + "value": 12600937.5, + "unit": "ns", + "extra": "gctime=0\nmemory=13971856\nallocs=79\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "Conv((3, 3), 4 => 4, gelu)(64 x 64 x 4 x 128)/forward/CPU/1 thread(s)", + "value": 15122729, + "unit": "ns", + "extra": "gctime=0\nmemory=8429664\nallocs=44\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "Conv((3, 3), 4 => 4, gelu)(64 x 64 x 4 x 128)/forward/GPU/CUDA", + "value": 518849, + "unit": "ns", + "extra": "gctime=0\nmemory=6128\nallocs=209\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "Conv((3, 3), 4 => 4, gelu)(64 x 64 x 4 x 128)/zygote/CPU/2 thread(s)", + "value": 47134500, + "unit": "ns", + "extra": "gctime=283541\nmemory=42658560\nallocs=135\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "Conv((3, 3), 4 => 4, gelu)(64 x 64 x 4 x 128)/zygote/CPU/4 thread(s)", + "value": 41671875, + "unit": "ns", + "extra": "gctime=269563\nmemory=45983808\nallocs=155\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "Conv((3, 3), 4 => 4, gelu)(64 x 64 x 4 x 128)/zygote/CPU/8 thread(s)", + "value": 41125499.5, + "unit": "ns", + "extra": "gctime=398562.5\nmemory=52634048\nallocs=195\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "Conv((3, 3), 4 => 4, gelu)(64 x 64 x 4 x 128)/zygote/CPU/1 thread(s)", + "value": 58336333, + "unit": "ns", + "extra": "gctime=0\nmemory=41549664\nallocs=125\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "Conv((3, 3), 4 => 4, gelu)(64 x 64 x 4 x 128)/zygote/GPU/CUDA", + "value": 3218047, + "unit": "ns", + "extra": "gctime=0\nmemory=30824\nallocs=1000\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "Conv((3, 3), 4 => 4, gelu)(64 x 64 x 4 x 128)/enzyme/CPU/2 thread(s)", + "value": 74376750, + "unit": "ns", + "extra": "gctime=305520.5\nmemory=26914064\nallocs=139\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "Conv((3, 3), 4 => 4, gelu)(64 x 64 x 4 x 128)/enzyme/CPU/4 thread(s)", + "value": 68965000, + "unit": "ns", + "extra": "gctime=571583\nmemory=30239312\nallocs=159\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "Conv((3, 3), 4 => 4, gelu)(64 x 64 x 4 x 128)/enzyme/CPU/8 thread(s)", + "value": 91496292, + "unit": "ns", + "extra": "gctime=551833.5\nmemory=36889552\nallocs=199\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "Conv((3, 3), 4 => 4, gelu)(64 x 64 x 4 x 128)/enzyme/CPU/1 thread(s)", + "value": 98399104, + "unit": "ns", + "extra": "gctime=597375\nmemory=25805168\nallocs=129\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "Conv((3, 3), 64 => 64, gelu)(64 x 64 x 64 x 128)/forward/CPU/2 thread(s)", + "value": 286107083.5, + "unit": "ns", + "extra": "gctime=0\nmemory=143677872\nallocs=49\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "Conv((3, 3), 64 => 64, gelu)(64 x 64 x 64 x 128)/forward/CPU/4 thread(s)", + "value": 339607208, + "unit": "ns", + "extra": "gctime=0\nmemory=170249552\nallocs=59\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "Conv((3, 3), 64 => 64, gelu)(64 x 64 x 64 x 128)/forward/CPU/8 thread(s)", + "value": 321183396, + "unit": "ns", + "extra": "gctime=0\nmemory=223392912\nallocs=79\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "Conv((3, 3), 64 => 64, gelu)(64 x 64 x 64 x 128)/forward/CPU/1 thread(s)", + "value": 268796333, + "unit": "ns", + "extra": "gctime=251917\nmemory=134820320\nallocs=44\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "Conv((3, 3), 64 => 64, gelu)(64 x 64 x 64 x 128)/forward/GPU/CUDA", + "value": 7107764, + "unit": "ns", + "extra": "gctime=0\nmemory=6240\nallocs=216\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "Conv((3, 3), 64 => 64, gelu)(64 x 64 x 64 x 128)/zygote/CPU/2 thread(s)", + "value": 971792250, + "unit": "ns", + "extra": "gctime=124619083\nmemory=682500624\nallocs=136\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "Conv((3, 3), 64 => 64, gelu)(64 x 64 x 64 x 128)/zygote/CPU/4 thread(s)", + "value": 922480542, + "unit": "ns", + "extra": "gctime=77627333\nmemory=735643984\nallocs=156\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "Conv((3, 3), 64 => 64, gelu)(64 x 64 x 64 x 128)/zygote/CPU/8 thread(s)", + "value": 835684104, + "unit": "ns", + "extra": "gctime=51040916\nmemory=841930704\nallocs=196\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "Conv((3, 3), 64 => 64, gelu)(64 x 64 x 64 x 128)/zygote/CPU/1 thread(s)", + "value": 1117474583, + "unit": "ns", + "extra": "gctime=133068980\nmemory=664785520\nallocs=126\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "Conv((3, 3), 64 => 64, gelu)(64 x 64 x 64 x 128)/zygote/GPU/CUDA", + "value": 33742759, + "unit": "ns", + "extra": "gctime=0\nmemory=29352\nallocs=937\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "Conv((3, 3), 64 => 64, gelu)(64 x 64 x 64 x 128)/enzyme/CPU/2 thread(s)", + "value": 1448964667, + "unit": "ns", + "extra": "gctime=132657209\nmemory=430580528\nallocs=140\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "Conv((3, 3), 64 => 64, gelu)(64 x 64 x 64 x 128)/enzyme/CPU/4 thread(s)", + "value": 1371326875, + "unit": "ns", + "extra": "gctime=16619979\nmemory=483723888\nallocs=160\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "Conv((3, 3), 64 => 64, gelu)(64 x 64 x 64 x 128)/enzyme/CPU/8 thread(s)", + "value": 1656412041, + "unit": "ns", + "extra": "gctime=0\nmemory=590010608\nallocs=200\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "Conv((3, 3), 64 => 64, gelu)(64 x 64 x 64 x 128)/enzyme/CPU/1 thread(s)", + "value": 1663889000, + "unit": "ns", + "extra": "gctime=3358167\nmemory=412865424\nallocs=130\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "lenet(28, 28, 1, 128)/forward/CPU/2 thread(s)", + "value": 1528208, + "unit": "ns", + "extra": "gctime=0\nmemory=3180288\nallocs=141\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "lenet(28, 28, 1, 128)/forward/CPU/4 thread(s)", + "value": 1277937.5, + "unit": "ns", + "extra": "gctime=0\nmemory=3472192\nallocs=161\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "lenet(28, 28, 1, 128)/forward/CPU/8 thread(s)", + "value": 1635937.5, + "unit": "ns", + "extra": "gctime=0\nmemory=4056000\nallocs=201\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "lenet(28, 28, 1, 128)/forward/CPU/1 thread(s)", + "value": 2136917, + "unit": "ns", + "extra": "gctime=0\nmemory=3082336\nallocs=131\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "lenet(28, 28, 1, 128)/forward/GPU/CUDA", + "value": 277390.5, + "unit": "ns", + "extra": "gctime=0\nmemory=20704\nallocs=674\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "lenet(28, 28, 1, 128)/zygote/CPU/2 thread(s)", + "value": 7872250, + "unit": "ns", + "extra": "gctime=0\nmemory=9515248\nallocs=554\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "lenet(28, 28, 1, 128)/zygote/CPU/4 thread(s)", + "value": 6588000, + "unit": "ns", + "extra": "gctime=0\nmemory=10099056\nallocs=594\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "lenet(28, 28, 1, 128)/zygote/CPU/8 thread(s)", + "value": 7229396.5, + "unit": "ns", + "extra": "gctime=0\nmemory=11266672\nallocs=674\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "lenet(28, 28, 1, 128)/zygote/CPU/1 thread(s)", + "value": 10478041, + "unit": "ns", + "extra": "gctime=0\nmemory=9319344\nallocs=534\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "lenet(28, 28, 1, 128)/zygote/GPU/CUDA", + "value": 1130644, + "unit": "ns", + "extra": "gctime=0\nmemory=141600\nallocs=2648\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "vgg16(32, 32, 3, 32)/forward/CPU/2 thread(s)", + "value": 177405459, + "unit": "ns", + "extra": "gctime=14936042\nmemory=89286640\nallocs=987\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "vgg16(32, 32, 3, 32)/forward/CPU/4 thread(s)", + "value": 132546709, + "unit": "ns", + "extra": "gctime=14940000\nmemory=109329168\nallocs=1117\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "vgg16(32, 32, 3, 32)/forward/CPU/8 thread(s)", + "value": 130053917, + "unit": "ns", + "extra": "gctime=10719500.5\nmemory=149414224\nallocs=1377\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "vgg16(32, 32, 3, 32)/forward/CPU/1 thread(s)", + "value": 165568083, + "unit": "ns", + "extra": "gctime=14704437.5\nmemory=82601568\nallocs=922\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "vgg16(32, 32, 3, 32)/forward/GPU/CUDA", + "value": 4878153.5, + "unit": "ns", + "extra": "gctime=0\nmemory=139824\nallocs=4453\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "vgg16(32, 32, 3, 32)/zygote/CPU/2 thread(s)", + "value": 643663333, + "unit": "ns", + "extra": "gctime=163966165\nmemory=466623152\nallocs=3373\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "vgg16(32, 32, 3, 32)/zygote/CPU/4 thread(s)", + "value": 496969000, + "unit": "ns", + "extra": "gctime=88178583\nmemory=506708208\nallocs=3633\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "vgg16(32, 32, 3, 32)/zygote/CPU/8 thread(s)", + "value": 558568375, + "unit": "ns", + "extra": "gctime=129629917\nmemory=586878320\nallocs=4153\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "vgg16(32, 32, 3, 32)/zygote/CPU/1 thread(s)", + "value": 654929750, + "unit": "ns", + "extra": "gctime=156807500\nmemory=453253008\nallocs=3243\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "vgg16(32, 32, 3, 32)/zygote/GPU/CUDA", + "value": 18110009, + "unit": "ns", + "extra": "gctime=0\nmemory=814664\nallocs=13275\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "lenet(28, 28, 1, 64)/forward/CPU/2 thread(s)", + "value": 1068292, + "unit": "ns", + "extra": "gctime=0\nmemory=1691904\nallocs=141\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "lenet(28, 28, 1, 64)/forward/CPU/4 thread(s)", + "value": 983291, + "unit": "ns", + "extra": "gctime=0\nmemory=1983808\nallocs=161\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "lenet(28, 28, 1, 64)/forward/CPU/8 thread(s)", + "value": 1327542, + "unit": "ns", + "extra": "gctime=0\nmemory=2567616\nallocs=201\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "lenet(28, 28, 1, 64)/forward/CPU/1 thread(s)", + "value": 1373792, + "unit": "ns", + "extra": "gctime=0\nmemory=1593952\nallocs=131\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "lenet(28, 28, 1, 64)/forward/GPU/CUDA", + "value": 281111, + "unit": "ns", + "extra": "gctime=0\nmemory=20704\nallocs=674\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "lenet(28, 28, 1, 64)/zygote/CPU/2 thread(s)", + "value": 6002271, + "unit": "ns", + "extra": "gctime=0\nmemory=5138672\nallocs=554\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "lenet(28, 28, 1, 64)/zygote/CPU/4 thread(s)", + "value": 4660958.5, + "unit": "ns", + "extra": "gctime=0\nmemory=5722480\nallocs=594\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "lenet(28, 28, 1, 64)/zygote/CPU/8 thread(s)", + "value": 5006354, + "unit": "ns", + "extra": "gctime=0\nmemory=6890096\nallocs=674\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "lenet(28, 28, 1, 64)/zygote/CPU/1 thread(s)", + "value": 5624708, + "unit": "ns", + "extra": "gctime=0\nmemory=4942768\nallocs=534\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "lenet(28, 28, 1, 64)/zygote/GPU/CUDA", + "value": 1151478.5, + "unit": "ns", + "extra": "gctime=0\nmemory=140656\nallocs=2613\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "Conv((3, 3), 32 => 32, identity)(64 x 64 x 32 x 128)/forward/CPU/2 thread(s)", + "value": 23602937.5, + "unit": "ns", + "extra": "gctime=0\nmemory=71841200\nallocs=49\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "Conv((3, 3), 32 => 32, identity)(64 x 64 x 32 x 128)/forward/CPU/4 thread(s)", + "value": 34462041.5, + "unit": "ns", + "extra": "gctime=0\nmemory=85128016\nallocs=59\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "Conv((3, 3), 32 => 32, identity)(64 x 64 x 32 x 128)/forward/CPU/8 thread(s)", + "value": 41206708, + "unit": "ns", + "extra": "gctime=0\nmemory=111701648\nallocs=79\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "Conv((3, 3), 32 => 32, identity)(64 x 64 x 32 x 128)/forward/CPU/1 thread(s)", + "value": 34998812.5, + "unit": "ns", + "extra": "gctime=290646\nmemory=67411936\nallocs=44\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "Conv((3, 3), 32 => 32, identity)(64 x 64 x 32 x 128)/forward/GPU/CUDA", + "value": 1861561, + "unit": "ns", + "extra": "gctime=0\nmemory=6736\nallocs=206\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "Conv((3, 3), 32 => 32, identity)(64 x 64 x 32 x 128)/zygote/CPU/2 thread(s)", + "value": 184955020.5, + "unit": "ns", + "extra": "gctime=3024895.5\nmemory=215263264\nallocs=158\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "Conv((3, 3), 32 => 32, identity)(64 x 64 x 32 x 128)/zygote/CPU/4 thread(s)", + "value": 159249771, + "unit": "ns", + "extra": "gctime=2992542\nmemory=241836896\nallocs=178\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "Conv((3, 3), 32 => 32, identity)(64 x 64 x 32 x 128)/zygote/CPU/8 thread(s)", + "value": 150499917, + "unit": "ns", + "extra": "gctime=1383417\nmemory=294984160\nallocs=218\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "Conv((3, 3), 32 => 32, identity)(64 x 64 x 32 x 128)/zygote/CPU/1 thread(s)", + "value": 390550250, + "unit": "ns", + "extra": "gctime=120868250\nmemory=206404736\nallocs=148\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "Conv((3, 3), 32 => 32, identity)(64 x 64 x 32 x 128)/zygote/GPU/CUDA", + "value": 16472871, + "unit": "ns", + "extra": "gctime=0\nmemory=28656\nallocs=849\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "Conv((3, 3), 32 => 32, identity)(64 x 64 x 32 x 128)/enzyme/CPU/2 thread(s)", + "value": 286689500, + "unit": "ns", + "extra": "gctime=19624750\nmemory=215259424\nallocs=140\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "Conv((3, 3), 32 => 32, identity)(64 x 64 x 32 x 128)/enzyme/CPU/4 thread(s)", + "value": 244388646, + "unit": "ns", + "extra": "gctime=5823708\nmemory=241833056\nallocs=160\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "Conv((3, 3), 32 => 32, identity)(64 x 64 x 32 x 128)/enzyme/CPU/8 thread(s)", + "value": 296120917, + "unit": "ns", + "extra": "gctime=65048166.5\nmemory=294980320\nallocs=200\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "Conv((3, 3), 32 => 32, identity)(64 x 64 x 32 x 128)/enzyme/CPU/1 thread(s)", + "value": 440533417, + "unit": "ns", + "extra": "gctime=120379875\nmemory=206400896\nallocs=130\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "vgg16(32, 32, 3, 128)/forward/CPU/2 thread(s)", + "value": 624998521, + "unit": "ns", + "extra": "gctime=156018479.5\nmemory=316766112\nallocs=987\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "vgg16(32, 32, 3, 128)/forward/CPU/4 thread(s)", + "value": 477642917, + "unit": "ns", + "extra": "gctime=102671541\nmemory=336808640\nallocs=1117\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "vgg16(32, 32, 3, 128)/forward/CPU/8 thread(s)", + "value": 411867812.5, + "unit": "ns", + "extra": "gctime=79387499.5\nmemory=376893696\nallocs=1377\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "vgg16(32, 32, 3, 128)/forward/CPU/1 thread(s)", + "value": 656030104, + "unit": "ns", + "extra": "gctime=76004604.5\nmemory=310081040\nallocs=922\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "vgg16(32, 32, 3, 128)/forward/GPU/CUDA", + "value": 12477905, + "unit": "ns", + "extra": "gctime=0\nmemory=139952\nallocs=4461\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "vgg16(32, 32, 3, 128)/zygote/CPU/2 thread(s)", + "value": 1873735437.5, + "unit": "ns", + "extra": "gctime=153558562.5\nmemory=1360017936\nallocs=3373\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "vgg16(32, 32, 3, 128)/zygote/CPU/4 thread(s)", + "value": 1636021583, + "unit": "ns", + "extra": "gctime=100415208\nmemory=1400102992\nallocs=3633\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "vgg16(32, 32, 3, 128)/zygote/CPU/8 thread(s)", + "value": 1558895000, + "unit": "ns", + "extra": "gctime=111921563\nmemory=1480273104\nallocs=4153\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "vgg16(32, 32, 3, 128)/zygote/CPU/1 thread(s)", + "value": 2103890062.5, + "unit": "ns", + "extra": "gctime=151298500\nmemory=1346647792\nallocs=3243\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "vgg16(32, 32, 3, 128)/zygote/GPU/CUDA", + "value": 49609571, + "unit": "ns", + "extra": "gctime=0\nmemory=815736\nallocs=13318\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "Conv((3, 3), 4 => 4, relu)(64 x 64 x 4 x 128)/forward/CPU/2 thread(s)", + "value": 3064313, + "unit": "ns", + "extra": "gctime=0\nmemory=8984112\nallocs=49\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "Conv((3, 3), 4 => 4, relu)(64 x 64 x 4 x 128)/forward/CPU/4 thread(s)", + "value": 2106875, + "unit": "ns", + "extra": "gctime=0\nmemory=10646736\nallocs=59\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "Conv((3, 3), 4 => 4, relu)(64 x 64 x 4 x 128)/forward/CPU/8 thread(s)", + "value": 2301542, + "unit": "ns", + "extra": "gctime=0\nmemory=13971856\nallocs=79\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "Conv((3, 3), 4 => 4, relu)(64 x 64 x 4 x 128)/forward/CPU/1 thread(s)", + "value": 4944708.5, + "unit": "ns", + "extra": "gctime=0\nmemory=8429664\nallocs=44\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "Conv((3, 3), 4 => 4, relu)(64 x 64 x 4 x 128)/forward/GPU/CUDA", + "value": 586671, + "unit": "ns", + "extra": "gctime=0\nmemory=6608\nallocs=198\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "Conv((3, 3), 4 => 4, relu)(64 x 64 x 4 x 128)/zygote/CPU/2 thread(s)", + "value": 25694166, + "unit": "ns", + "extra": "gctime=856042\nmemory=34790400\nallocs=160\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "Conv((3, 3), 4 => 4, relu)(64 x 64 x 4 x 128)/zygote/CPU/4 thread(s)", + "value": 20092625.5, + "unit": "ns", + "extra": "gctime=874271\nmemory=38115648\nallocs=180\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "Conv((3, 3), 4 => 4, relu)(64 x 64 x 4 x 128)/zygote/CPU/8 thread(s)", + "value": 19545895.5, + "unit": "ns", + "extra": "gctime=1234063\nmemory=44765888\nallocs=220\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "Conv((3, 3), 4 => 4, relu)(64 x 64 x 4 x 128)/zygote/CPU/1 thread(s)", + "value": 36568812, + "unit": "ns", + "extra": "gctime=787583\nmemory=33681504\nallocs=150\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "Conv((3, 3), 4 => 4, relu)(64 x 64 x 4 x 128)/zygote/GPU/CUDA", + "value": 3200820, + "unit": "ns", + "extra": "gctime=0\nmemory=29864\nallocs=912\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "Conv((3, 3), 4 => 4, relu)(64 x 64 x 4 x 128)/enzyme/CPU/2 thread(s)", + "value": 35138250, + "unit": "ns", + "extra": "gctime=639417\nmemory=26914064\nallocs=139\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "Conv((3, 3), 4 => 4, relu)(64 x 64 x 4 x 128)/enzyme/CPU/4 thread(s)", + "value": 28420084, + "unit": "ns", + "extra": "gctime=0\nmemory=30239312\nallocs=159\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "Conv((3, 3), 4 => 4, relu)(64 x 64 x 4 x 128)/enzyme/CPU/8 thread(s)", + "value": 30280062.5, + "unit": "ns", + "extra": "gctime=979396\nmemory=36889552\nallocs=199\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "Conv((3, 3), 4 => 4, relu)(64 x 64 x 4 x 128)/enzyme/CPU/1 thread(s)", + "value": 42544854.5, + "unit": "ns", + "extra": "gctime=710895.5\nmemory=25805168\nallocs=129\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "Conv((3, 3), 2 => 2, relu)(64 x 64 x 2 x 128)/forward/CPU/2 thread(s)", + "value": 1650167, + "unit": "ns", + "extra": "gctime=0\nmemory=4494384\nallocs=49\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "Conv((3, 3), 2 => 2, relu)(64 x 64 x 2 x 128)/forward/CPU/4 thread(s)", + "value": 1195708, + "unit": "ns", + "extra": "gctime=0\nmemory=5326672\nallocs=59\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "Conv((3, 3), 2 => 2, relu)(64 x 64 x 2 x 128)/forward/CPU/8 thread(s)", + "value": 1388458, + "unit": "ns", + "extra": "gctime=0\nmemory=6991120\nallocs=79\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "Conv((3, 3), 2 => 2, relu)(64 x 64 x 2 x 128)/forward/CPU/1 thread(s)", + "value": 2498125, + "unit": "ns", + "extra": "gctime=0\nmemory=4216672\nallocs=44\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "Conv((3, 3), 2 => 2, relu)(64 x 64 x 2 x 128)/forward/GPU/CUDA", + "value": 218867, + "unit": "ns", + "extra": "gctime=0\nmemory=6592\nallocs=197\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "Conv((3, 3), 2 => 2, relu)(64 x 64 x 2 x 128)/zygote/CPU/2 thread(s)", + "value": 12700771, + "unit": "ns", + "extra": "gctime=0\nmemory=17403184\nallocs=160\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "Conv((3, 3), 2 => 2, relu)(64 x 64 x 2 x 128)/zygote/CPU/4 thread(s)", + "value": 9962124.5, + "unit": "ns", + "extra": "gctime=0\nmemory=19067760\nallocs=180\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "Conv((3, 3), 2 => 2, relu)(64 x 64 x 2 x 128)/zygote/CPU/8 thread(s)", + "value": 9800459, + "unit": "ns", + "extra": "gctime=0\nmemory=22396656\nallocs=220\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "Conv((3, 3), 2 => 2, relu)(64 x 64 x 2 x 128)/zygote/CPU/1 thread(s)", + "value": 18403354, + "unit": "ns", + "extra": "gctime=0\nmemory=16847760\nallocs=150\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "Conv((3, 3), 2 => 2, relu)(64 x 64 x 2 x 128)/zygote/GPU/CUDA", + "value": 1957280, + "unit": "ns", + "extra": "gctime=0\nmemory=29864\nallocs=912\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "Conv((3, 3), 2 => 2, relu)(64 x 64 x 2 x 128)/enzyme/CPU/2 thread(s)", + "value": 17702708, + "unit": "ns", + "extra": "gctime=0\nmemory=13463120\nallocs=139\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "Conv((3, 3), 2 => 2, relu)(64 x 64 x 2 x 128)/enzyme/CPU/4 thread(s)", + "value": 14737000, + "unit": "ns", + "extra": "gctime=0\nmemory=15127696\nallocs=159\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "Conv((3, 3), 2 => 2, relu)(64 x 64 x 2 x 128)/enzyme/CPU/8 thread(s)", + "value": 14865041, + "unit": "ns", + "extra": "gctime=0\nmemory=18456592\nallocs=199\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "Conv((3, 3), 2 => 2, relu)(64 x 64 x 2 x 128)/enzyme/CPU/1 thread(s)", + "value": 21477333.5, + "unit": "ns", + "extra": "gctime=0\nmemory=12907696\nallocs=129\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "Conv((3, 3), 32 => 32, relu)(64 x 64 x 32 x 128)/forward/CPU/2 thread(s)", + "value": 23644021, + "unit": "ns", + "extra": "gctime=0\nmemory=71841200\nallocs=49\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "Conv((3, 3), 32 => 32, relu)(64 x 64 x 32 x 128)/forward/CPU/4 thread(s)", + "value": 34568146, + "unit": "ns", + "extra": "gctime=0\nmemory=85128016\nallocs=59\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "Conv((3, 3), 32 => 32, relu)(64 x 64 x 32 x 128)/forward/CPU/8 thread(s)", + "value": 41693959, + "unit": "ns", + "extra": "gctime=0\nmemory=111701648\nallocs=79\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "Conv((3, 3), 32 => 32, relu)(64 x 64 x 32 x 128)/forward/CPU/1 thread(s)", + "value": 34878583, + "unit": "ns", + "extra": "gctime=286916.5\nmemory=67411936\nallocs=44\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "Conv((3, 3), 32 => 32, relu)(64 x 64 x 32 x 128)/forward/GPU/CUDA", + "value": 1840287, + "unit": "ns", + "extra": "gctime=0\nmemory=6736\nallocs=206\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "Conv((3, 3), 32 => 32, relu)(64 x 64 x 32 x 128)/zygote/CPU/2 thread(s)", + "value": 188357375, + "unit": "ns", + "extra": "gctime=0\nmemory=278243472\nallocs=161\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "Conv((3, 3), 32 => 32, relu)(64 x 64 x 32 x 128)/zygote/CPU/4 thread(s)", + "value": 233488333, + "unit": "ns", + "extra": "gctime=70821250\nmemory=304817104\nallocs=181\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "Conv((3, 3), 32 => 32, relu)(64 x 64 x 32 x 128)/zygote/CPU/8 thread(s)", + "value": 202742250, + "unit": "ns", + "extra": "gctime=47810208\nmemory=357964368\nallocs=221\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "Conv((3, 3), 32 => 32, relu)(64 x 64 x 32 x 128)/zygote/CPU/1 thread(s)", + "value": 429823895.5, + "unit": "ns", + "extra": "gctime=154247208\nmemory=269384944\nallocs=151\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "Conv((3, 3), 32 => 32, relu)(64 x 64 x 32 x 128)/zygote/GPU/CUDA", + "value": 13939550, + "unit": "ns", + "extra": "gctime=0\nmemory=30104\nallocs=927\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "Conv((3, 3), 32 => 32, relu)(64 x 64 x 32 x 128)/enzyme/CPU/2 thread(s)", + "value": 291377187.5, + "unit": "ns", + "extra": "gctime=19832000\nmemory=215259440\nallocs=140\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "Conv((3, 3), 32 => 32, relu)(64 x 64 x 32 x 128)/enzyme/CPU/4 thread(s)", + "value": 249397167, + "unit": "ns", + "extra": "gctime=5256583\nmemory=241833072\nallocs=160\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "Conv((3, 3), 32 => 32, relu)(64 x 64 x 32 x 128)/enzyme/CPU/8 thread(s)", + "value": 300701042, + "unit": "ns", + "extra": "gctime=65049979\nmemory=294980336\nallocs=200\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "Conv((3, 3), 32 => 32, relu)(64 x 64 x 32 x 128)/enzyme/CPU/1 thread(s)", + "value": 446062833, + "unit": "ns", + "extra": "gctime=119370208\nmemory=206400912\nallocs=130\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "mlp7layer_bn(gelu)(32 x 256)/forward/CPU/2 thread(s)", + "value": 3387083, + "unit": "ns", + "extra": "gctime=0\nmemory=3174736\nallocs=136\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "mlp7layer_bn(gelu)(32 x 256)/forward/CPU/4 thread(s)", + "value": 3112854, + "unit": "ns", + "extra": "gctime=0\nmemory=3174736\nallocs=136\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "mlp7layer_bn(gelu)(32 x 256)/forward/CPU/8 thread(s)", + "value": 2905708, + "unit": "ns", + "extra": "gctime=0\nmemory=3174736\nallocs=136\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "mlp7layer_bn(gelu)(32 x 256)/forward/CPU/1 thread(s)", + "value": 3940000, + "unit": "ns", + "extra": "gctime=0\nmemory=3174736\nallocs=136\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "mlp7layer_bn(gelu)(32 x 256)/forward/GPU/CUDA", + "value": 570283, + "unit": "ns", + "extra": "gctime=0\nmemory=25664\nallocs=978\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "mlp7layer_bn(gelu)(32 x 256)/zygote/CPU/2 thread(s)", + "value": 7636021, + "unit": "ns", + "extra": "gctime=0\nmemory=16023200\nallocs=705\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "mlp7layer_bn(gelu)(32 x 256)/zygote/CPU/4 thread(s)", + "value": 7442000, + "unit": "ns", + "extra": "gctime=0\nmemory=16023200\nallocs=705\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "mlp7layer_bn(gelu)(32 x 256)/zygote/CPU/8 thread(s)", + "value": 7380521, + "unit": "ns", + "extra": "gctime=0\nmemory=16023200\nallocs=705\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "mlp7layer_bn(gelu)(32 x 256)/zygote/CPU/1 thread(s)", + "value": 8212750, + "unit": "ns", + "extra": "gctime=0\nmemory=16023200\nallocs=705\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "mlp7layer_bn(gelu)(32 x 256)/zygote/GPU/CUDA", + "value": 1364212, + "unit": "ns", + "extra": "gctime=0\nmemory=366272\nallocs=3997\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "mlp7layer_bn(gelu)(32 x 256)/enzyme/CPU/2 thread(s)", + "value": 13685833.5, + "unit": "ns", + "extra": "gctime=0\nmemory=9373552\nallocs=470\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "mlp7layer_bn(gelu)(32 x 256)/enzyme/CPU/4 thread(s)", + "value": 19094334, + "unit": "ns", + "extra": "gctime=0\nmemory=9373632\nallocs=475\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "mlp7layer_bn(gelu)(32 x 256)/enzyme/CPU/8 thread(s)", + "value": 19126041, + "unit": "ns", + "extra": "gctime=0\nmemory=9373712\nallocs=480\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "mlp7layer_bn(gelu)(32 x 256)/enzyme/CPU/1 thread(s)", + "value": 15649500.5, + "unit": "ns", + "extra": "gctime=0\nmemory=9373456\nallocs=464\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "Dense(512 => 512, relu)(512 x 128)/forward/CPU/2 thread(s)", + "value": 69459, + "unit": "ns", + "extra": "gctime=0\nmemory=262384\nallocs=6\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "Dense(512 => 512, relu)(512 x 128)/forward/CPU/4 thread(s)", + "value": 69875, + "unit": "ns", + "extra": "gctime=0\nmemory=262384\nallocs=6\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "Dense(512 => 512, relu)(512 x 128)/forward/CPU/8 thread(s)", + "value": 72083, + "unit": "ns", + "extra": "gctime=0\nmemory=262384\nallocs=6\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "Dense(512 => 512, relu)(512 x 128)/forward/CPU/1 thread(s)", + "value": 68812.5, + "unit": "ns", + "extra": "gctime=0\nmemory=262384\nallocs=6\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "Dense(512 => 512, relu)(512 x 128)/forward/GPU/CUDA", + "value": 47850, + "unit": "ns", + "extra": "gctime=0\nmemory=1056\nallocs=47\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "Dense(512 => 512, relu)(512 x 128)/zygote/CPU/2 thread(s)", + "value": 318833.5, + "unit": "ns", + "extra": "gctime=0\nmemory=2101616\nallocs=37\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "Dense(512 => 512, relu)(512 x 128)/zygote/CPU/4 thread(s)", + "value": 285875.5, + "unit": "ns", + "extra": "gctime=0\nmemory=2101616\nallocs=37\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "Dense(512 => 512, relu)(512 x 128)/zygote/CPU/8 thread(s)", + "value": 326000, + "unit": "ns", + "extra": "gctime=0\nmemory=2101616\nallocs=37\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "Dense(512 => 512, relu)(512 x 128)/zygote/CPU/1 thread(s)", + "value": 319625, + "unit": "ns", + "extra": "gctime=0\nmemory=2101616\nallocs=37\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "Dense(512 => 512, relu)(512 x 128)/zygote/GPU/CUDA", + "value": 210144, + "unit": "ns", + "extra": "gctime=0\nmemory=13048\nallocs=519\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "Dense(512 => 512, relu)(512 x 128)/enzyme/CPU/2 thread(s)", + "value": 447500, + "unit": "ns", + "extra": "gctime=0\nmemory=1837984\nallocs=22\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "Dense(512 => 512, relu)(512 x 128)/enzyme/CPU/4 thread(s)", + "value": 437791, + "unit": "ns", + "extra": "gctime=0\nmemory=1837984\nallocs=22\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "Dense(512 => 512, relu)(512 x 128)/enzyme/CPU/8 thread(s)", + "value": 413375, + "unit": "ns", + "extra": "gctime=0\nmemory=1837984\nallocs=22\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "Dense(512 => 512, relu)(512 x 128)/enzyme/CPU/1 thread(s)", + "value": 328959, + "unit": "ns", + "extra": "gctime=0\nmemory=1837984\nallocs=22\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "Conv((3, 3), 4 => 4, identity)(64 x 64 x 4 x 128)/forward/CPU/2 thread(s)", + "value": 3055292, + "unit": "ns", + "extra": "gctime=0\nmemory=8984112\nallocs=49\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "Conv((3, 3), 4 => 4, identity)(64 x 64 x 4 x 128)/forward/CPU/4 thread(s)", + "value": 2092833, + "unit": "ns", + "extra": "gctime=0\nmemory=10646736\nallocs=59\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "Conv((3, 3), 4 => 4, identity)(64 x 64 x 4 x 128)/forward/CPU/8 thread(s)", + "value": 2283687.5, + "unit": "ns", + "extra": "gctime=0\nmemory=13971856\nallocs=79\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "Conv((3, 3), 4 => 4, identity)(64 x 64 x 4 x 128)/forward/CPU/1 thread(s)", + "value": 4895416.5, + "unit": "ns", + "extra": "gctime=0\nmemory=8429664\nallocs=44\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "Conv((3, 3), 4 => 4, identity)(64 x 64 x 4 x 128)/forward/GPU/CUDA", + "value": 585359, + "unit": "ns", + "extra": "gctime=0\nmemory=6608\nallocs=198\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "Conv((3, 3), 4 => 4, identity)(64 x 64 x 4 x 128)/zygote/CPU/2 thread(s)", + "value": 23561833, + "unit": "ns", + "extra": "gctime=0\nmemory=26917776\nallocs=157\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "Conv((3, 3), 4 => 4, identity)(64 x 64 x 4 x 128)/zygote/CPU/4 thread(s)", + "value": 18085229, + "unit": "ns", + "extra": "gctime=0\nmemory=30243024\nallocs=177\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "Conv((3, 3), 4 => 4, identity)(64 x 64 x 4 x 128)/zygote/CPU/8 thread(s)", + "value": 18562458, + "unit": "ns", + "extra": "gctime=1003666\nmemory=36893264\nallocs=217\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "Conv((3, 3), 4 => 4, identity)(64 x 64 x 4 x 128)/zygote/CPU/1 thread(s)", + "value": 35017833, + "unit": "ns", + "extra": "gctime=0\nmemory=25808880\nallocs=147\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "Conv((3, 3), 4 => 4, identity)(64 x 64 x 4 x 128)/zygote/GPU/CUDA", + "value": 3105298.5, + "unit": "ns", + "extra": "gctime=0\nmemory=28416\nallocs=834\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "Conv((3, 3), 4 => 4, identity)(64 x 64 x 4 x 128)/enzyme/CPU/2 thread(s)", + "value": 33378229, + "unit": "ns", + "extra": "gctime=0\nmemory=26914048\nallocs=139\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "Conv((3, 3), 4 => 4, identity)(64 x 64 x 4 x 128)/enzyme/CPU/4 thread(s)", + "value": 27662145.5, + "unit": "ns", + "extra": "gctime=0\nmemory=30239296\nallocs=159\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "Conv((3, 3), 4 => 4, identity)(64 x 64 x 4 x 128)/enzyme/CPU/8 thread(s)", + "value": 27887458, + "unit": "ns", + "extra": "gctime=0\nmemory=36889536\nallocs=199\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "Conv((3, 3), 4 => 4, identity)(64 x 64 x 4 x 128)/enzyme/CPU/1 thread(s)", + "value": 41809854.5, + "unit": "ns", + "extra": "gctime=556354\nmemory=25805152\nallocs=129\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "Conv((3, 3), 64 => 64, identity)(64 x 64 x 64 x 128)/forward/CPU/2 thread(s)", + "value": 120765334, + "unit": "ns", + "extra": "gctime=0\nmemory=143677872\nallocs=49\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "Conv((3, 3), 64 => 64, identity)(64 x 64 x 64 x 128)/forward/CPU/4 thread(s)", + "value": 174275666, + "unit": "ns", + "extra": "gctime=0\nmemory=170249552\nallocs=59\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "Conv((3, 3), 64 => 64, identity)(64 x 64 x 64 x 128)/forward/CPU/8 thread(s)", + "value": 156098417, + "unit": "ns", + "extra": "gctime=0\nmemory=223392912\nallocs=79\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "Conv((3, 3), 64 => 64, identity)(64 x 64 x 64 x 128)/forward/CPU/1 thread(s)", + "value": 103997770.5, + "unit": "ns", + "extra": "gctime=319125\nmemory=134820320\nallocs=44\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "Conv((3, 3), 64 => 64, identity)(64 x 64 x 64 x 128)/forward/GPU/CUDA", + "value": 5461795.5, + "unit": "ns", + "extra": "gctime=0\nmemory=6736\nallocs=206\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "Conv((3, 3), 64 => 64, identity)(64 x 64 x 64 x 128)/zygote/CPU/2 thread(s)", + "value": 471697125, + "unit": "ns", + "extra": "gctime=4303542\nmemory=430584480\nallocs=158\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "Conv((3, 3), 64 => 64, identity)(64 x 64 x 64 x 128)/zygote/CPU/4 thread(s)", + "value": 468205208, + "unit": "ns", + "extra": "gctime=4021854\nmemory=483727840\nallocs=178\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "Conv((3, 3), 64 => 64, identity)(64 x 64 x 64 x 128)/zygote/CPU/8 thread(s)", + "value": 455789333, + "unit": "ns", + "extra": "gctime=52705145.5\nmemory=590014560\nallocs=218\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "Conv((3, 3), 64 => 64, identity)(64 x 64 x 64 x 128)/zygote/CPU/1 thread(s)", + "value": 728998166, + "unit": "ns", + "extra": "gctime=130283625\nmemory=412869376\nallocs=148\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "Conv((3, 3), 64 => 64, identity)(64 x 64 x 64 x 128)/zygote/GPU/CUDA", + "value": 35173763, + "unit": "ns", + "extra": "gctime=0\nmemory=26960\nallocs=772\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "Conv((3, 3), 64 => 64, identity)(64 x 64 x 64 x 128)/enzyme/CPU/2 thread(s)", + "value": 640412562.5, + "unit": "ns", + "extra": "gctime=0\nmemory=430580512\nallocs=140\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "Conv((3, 3), 64 => 64, identity)(64 x 64 x 64 x 128)/enzyme/CPU/4 thread(s)", + "value": 655505917, + "unit": "ns", + "extra": "gctime=22497646\nmemory=483723872\nallocs=160\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "Conv((3, 3), 64 => 64, identity)(64 x 64 x 64 x 128)/enzyme/CPU/8 thread(s)", + "value": 590476187.5, + "unit": "ns", + "extra": "gctime=22979542\nmemory=590010592\nallocs=200\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "Conv((3, 3), 64 => 64, identity)(64 x 64 x 64 x 128)/enzyme/CPU/1 thread(s)", + "value": 732032000, + "unit": "ns", + "extra": "gctime=3520375\nmemory=412865408\nallocs=130\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "mlp7layer_bn(relu)(32 x 256)/forward/CPU/2 thread(s)", + "value": 1249541, + "unit": "ns", + "extra": "gctime=0\nmemory=3174736\nallocs=136\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "mlp7layer_bn(relu)(32 x 256)/forward/CPU/4 thread(s)", + "value": 949958.5, + "unit": "ns", + "extra": "gctime=0\nmemory=3174736\nallocs=136\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "mlp7layer_bn(relu)(32 x 256)/forward/CPU/8 thread(s)", + "value": 764125, + "unit": "ns", + "extra": "gctime=0\nmemory=3174736\nallocs=136\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "mlp7layer_bn(relu)(32 x 256)/forward/CPU/1 thread(s)", + "value": 2000458, + "unit": "ns", + "extra": "gctime=0\nmemory=3174736\nallocs=136\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "mlp7layer_bn(relu)(32 x 256)/forward/GPU/CUDA", + "value": 568299.5, + "unit": "ns", + "extra": "gctime=0\nmemory=26048\nallocs=978\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "mlp7layer_bn(relu)(32 x 256)/zygote/CPU/2 thread(s)", + "value": 2960792, + "unit": "ns", + "extra": "gctime=0\nmemory=14447328\nallocs=687\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "mlp7layer_bn(relu)(32 x 256)/zygote/CPU/4 thread(s)", + "value": 2611021, + "unit": "ns", + "extra": "gctime=0\nmemory=14447328\nallocs=687\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "mlp7layer_bn(relu)(32 x 256)/zygote/CPU/8 thread(s)", + "value": 2513020.5, + "unit": "ns", + "extra": "gctime=0\nmemory=14447328\nallocs=687\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "mlp7layer_bn(relu)(32 x 256)/zygote/CPU/1 thread(s)", + "value": 3690271, + "unit": "ns", + "extra": "gctime=0\nmemory=14447328\nallocs=687\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "mlp7layer_bn(relu)(32 x 256)/zygote/GPU/CUDA", + "value": 1319857, + "unit": "ns", + "extra": "gctime=0\nmemory=359712\nallocs=3667\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "mlp7layer_bn(relu)(32 x 256)/enzyme/CPU/2 thread(s)", + "value": 6641791, + "unit": "ns", + "extra": "gctime=0\nmemory=9373552\nallocs=470\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "mlp7layer_bn(relu)(32 x 256)/enzyme/CPU/4 thread(s)", + "value": 6504791, + "unit": "ns", + "extra": "gctime=0\nmemory=9373632\nallocs=475\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "mlp7layer_bn(relu)(32 x 256)/enzyme/CPU/8 thread(s)", + "value": 6489375, + "unit": "ns", + "extra": "gctime=0\nmemory=9373712\nallocs=480\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "mlp7layer_bn(relu)(32 x 256)/enzyme/CPU/1 thread(s)", + "value": 4443166, + "unit": "ns", + "extra": "gctime=0\nmemory=9373456\nallocs=464\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "Dense(128 => 128, gelu)(128 x 128)/forward/CPU/2 thread(s)", + "value": 104249.5, + "unit": "ns", + "extra": "gctime=0\nmemory=65776\nallocs=6\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "Dense(128 => 128, gelu)(128 x 128)/forward/CPU/4 thread(s)", + "value": 105166, + "unit": "ns", + "extra": "gctime=0\nmemory=65776\nallocs=6\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "Dense(128 => 128, gelu)(128 x 128)/forward/CPU/8 thread(s)", + "value": 105250, + "unit": "ns", + "extra": "gctime=0\nmemory=65776\nallocs=6\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "Dense(128 => 128, gelu)(128 x 128)/forward/CPU/1 thread(s)", + "value": 105625, + "unit": "ns", + "extra": "gctime=0\nmemory=65776\nallocs=6\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "Dense(128 => 128, gelu)(128 x 128)/forward/GPU/CUDA", + "value": 28456, + "unit": "ns", + "extra": "gctime=0\nmemory=1056\nallocs=47\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "Dense(128 => 128, gelu)(128 x 128)/zygote/CPU/2 thread(s)", + "value": 236750, + "unit": "ns", + "extra": "gctime=0\nmemory=394272\nallocs=18\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "Dense(128 => 128, gelu)(128 x 128)/zygote/CPU/4 thread(s)", + "value": 236541, + "unit": "ns", + "extra": "gctime=0\nmemory=394272\nallocs=18\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "Dense(128 => 128, gelu)(128 x 128)/zygote/CPU/8 thread(s)", + "value": 237667, + "unit": "ns", + "extra": "gctime=0\nmemory=394272\nallocs=18\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "Dense(128 => 128, gelu)(128 x 128)/zygote/CPU/1 thread(s)", + "value": 249625, + "unit": "ns", + "extra": "gctime=0\nmemory=394272\nallocs=18\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "Dense(128 => 128, gelu)(128 x 128)/zygote/GPU/CUDA", + "value": 217310.5, + "unit": "ns", + "extra": "gctime=0\nmemory=14216\nallocs=576\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "Dense(128 => 128, gelu)(128 x 128)/enzyme/CPU/2 thread(s)", + "value": 330167, + "unit": "ns", + "extra": "gctime=0\nmemory=328800\nallocs=21\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "Dense(128 => 128, gelu)(128 x 128)/enzyme/CPU/4 thread(s)", + "value": 742062.5, + "unit": "ns", + "extra": "gctime=0\nmemory=328800\nallocs=21\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "Dense(128 => 128, gelu)(128 x 128)/enzyme/CPU/8 thread(s)", + "value": 748209, + "unit": "ns", + "extra": "gctime=0\nmemory=328800\nallocs=21\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "Dense(128 => 128, gelu)(128 x 128)/enzyme/CPU/1 thread(s)", + "value": 721792, + "unit": "ns", + "extra": "gctime=0\nmemory=328784\nallocs=20\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "Dense(128 => 128, relu)(128 x 128)/forward/CPU/2 thread(s)", + "value": 13583, + "unit": "ns", + "extra": "gctime=0\nmemory=65776\nallocs=6\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "Dense(128 => 128, relu)(128 x 128)/forward/CPU/4 thread(s)", + "value": 14250, + "unit": "ns", + "extra": "gctime=0\nmemory=65776\nallocs=6\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "Dense(128 => 128, relu)(128 x 128)/forward/CPU/8 thread(s)", + "value": 14354, + "unit": "ns", + "extra": "gctime=0\nmemory=65776\nallocs=6\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "Dense(128 => 128, relu)(128 x 128)/forward/CPU/1 thread(s)", + "value": 13791, + "unit": "ns", + "extra": "gctime=0\nmemory=65776\nallocs=6\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "Dense(128 => 128, relu)(128 x 128)/forward/GPU/CUDA", + "value": 28098, + "unit": "ns", + "extra": "gctime=0\nmemory=1056\nallocs=47\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "Dense(128 => 128, relu)(128 x 128)/zygote/CPU/2 thread(s)", + "value": 25333.5, + "unit": "ns", + "extra": "gctime=0\nmemory=330416\nallocs=37\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "Dense(128 => 128, relu)(128 x 128)/zygote/CPU/4 thread(s)", + "value": 25750, + "unit": "ns", + "extra": "gctime=0\nmemory=330416\nallocs=37\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "Dense(128 => 128, relu)(128 x 128)/zygote/CPU/8 thread(s)", + "value": 25667, + "unit": "ns", + "extra": "gctime=0\nmemory=330416\nallocs=37\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "Dense(128 => 128, relu)(128 x 128)/zygote/CPU/1 thread(s)", + "value": 25750, + "unit": "ns", + "extra": "gctime=0\nmemory=330416\nallocs=37\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "Dense(128 => 128, relu)(128 x 128)/zygote/GPU/CUDA", + "value": 206637.5, + "unit": "ns", + "extra": "gctime=0\nmemory=12728\nallocs=499\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "Dense(128 => 128, relu)(128 x 128)/enzyme/CPU/2 thread(s)", + "value": 45583.5, + "unit": "ns", + "extra": "gctime=0\nmemory=263392\nallocs=22\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "Dense(128 => 128, relu)(128 x 128)/enzyme/CPU/4 thread(s)", + "value": 45875, + "unit": "ns", + "extra": "gctime=0\nmemory=263392\nallocs=22\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "Dense(128 => 128, relu)(128 x 128)/enzyme/CPU/8 thread(s)", + "value": 46000, + "unit": "ns", + "extra": "gctime=0\nmemory=263392\nallocs=22\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "Dense(128 => 128, relu)(128 x 128)/enzyme/CPU/1 thread(s)", + "value": 28209, + "unit": "ns", + "extra": "gctime=0\nmemory=263392\nallocs=22\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "vgg16(32, 32, 3, 64)/forward/CPU/2 thread(s)", + "value": 309099062.5, + "unit": "ns", + "extra": "gctime=20037500\nmemory=165113248\nallocs=987\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "vgg16(32, 32, 3, 64)/forward/CPU/4 thread(s)", + "value": 232469666.5, + "unit": "ns", + "extra": "gctime=22043854.5\nmemory=185155776\nallocs=1117\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "vgg16(32, 32, 3, 64)/forward/CPU/8 thread(s)", + "value": 216377833, + "unit": "ns", + "extra": "gctime=20789000\nmemory=225240832\nallocs=1377\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "vgg16(32, 32, 3, 64)/forward/CPU/1 thread(s)", + "value": 308762583, + "unit": "ns", + "extra": "gctime=17970750\nmemory=158428176\nallocs=922\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "vgg16(32, 32, 3, 64)/forward/GPU/CUDA", + "value": 7672114, + "unit": "ns", + "extra": "gctime=0\nmemory=139952\nallocs=4461\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "vgg16(32, 32, 3, 64)/zygote/CPU/2 thread(s)", + "value": 1103432604, + "unit": "ns", + "extra": "gctime=181215104\nmemory=764421648\nallocs=3373\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "vgg16(32, 32, 3, 64)/zygote/CPU/4 thread(s)", + "value": 1001458208, + "unit": "ns", + "extra": "gctime=213475271.5\nmemory=804506704\nallocs=3633\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "vgg16(32, 32, 3, 64)/zygote/CPU/8 thread(s)", + "value": 901919771, + "unit": "ns", + "extra": "gctime=123775104.5\nmemory=884676816\nallocs=4153\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "vgg16(32, 32, 3, 64)/zygote/CPU/1 thread(s)", + "value": 1293921625, + "unit": "ns", + "extra": "gctime=319395375\nmemory=751051504\nallocs=3243\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "vgg16(32, 32, 3, 64)/zygote/GPU/CUDA", + "value": 27115979, + "unit": "ns", + "extra": "gctime=0\nmemory=814792\nallocs=13283\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "Dense(512 => 512, gelu)(512 x 128)/forward/CPU/2 thread(s)", + "value": 414208.5, + "unit": "ns", + "extra": "gctime=0\nmemory=262384\nallocs=6\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "Dense(512 => 512, gelu)(512 x 128)/forward/CPU/4 thread(s)", + "value": 415583, + "unit": "ns", + "extra": "gctime=0\nmemory=262384\nallocs=6\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "Dense(512 => 512, gelu)(512 x 128)/forward/CPU/8 thread(s)", + "value": 416958, + "unit": "ns", + "extra": "gctime=0\nmemory=262384\nallocs=6\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "Dense(512 => 512, gelu)(512 x 128)/forward/CPU/1 thread(s)", + "value": 418375.5, + "unit": "ns", + "extra": "gctime=0\nmemory=262384\nallocs=6\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "Dense(512 => 512, gelu)(512 x 128)/forward/GPU/CUDA", + "value": 48086, + "unit": "ns", + "extra": "gctime=0\nmemory=1056\nallocs=47\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "Dense(512 => 512, gelu)(512 x 128)/zygote/CPU/2 thread(s)", + "value": 1344667, + "unit": "ns", + "extra": "gctime=0\nmemory=2362080\nallocs=18\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "Dense(512 => 512, gelu)(512 x 128)/zygote/CPU/4 thread(s)", + "value": 1315687, + "unit": "ns", + "extra": "gctime=0\nmemory=2362080\nallocs=18\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "Dense(512 => 512, gelu)(512 x 128)/zygote/CPU/8 thread(s)", + "value": 1294125, + "unit": "ns", + "extra": "gctime=0\nmemory=2362080\nallocs=18\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "Dense(512 => 512, gelu)(512 x 128)/zygote/CPU/1 thread(s)", + "value": 1745083.5, + "unit": "ns", + "extra": "gctime=0\nmemory=2362080\nallocs=18\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "Dense(512 => 512, gelu)(512 x 128)/zygote/GPU/CUDA", + "value": 221906, + "unit": "ns", + "extra": "gctime=0\nmemory=14536\nallocs=596\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "Dense(512 => 512, gelu)(512 x 128)/enzyme/CPU/2 thread(s)", + "value": 1836104.5, + "unit": "ns", + "extra": "gctime=0\nmemory=2100000\nallocs=21\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "Dense(512 => 512, gelu)(512 x 128)/enzyme/CPU/4 thread(s)", + "value": 3473770.5, + "unit": "ns", + "extra": "gctime=0\nmemory=2100016\nallocs=22\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "Dense(512 => 512, gelu)(512 x 128)/enzyme/CPU/8 thread(s)", + "value": 3450771, + "unit": "ns", + "extra": "gctime=0\nmemory=2100032\nallocs=23\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "Dense(512 => 512, gelu)(512 x 128)/enzyme/CPU/1 thread(s)", + "value": 3660083, + "unit": "ns", + "extra": "gctime=0\nmemory=2099984\nallocs=20\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "mlp7layer_bn(tanh)(32 x 256)/forward/CPU/2 thread(s)", + "value": 1396583.5, + "unit": "ns", + "extra": "gctime=0\nmemory=3174736\nallocs=136\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "mlp7layer_bn(tanh)(32 x 256)/forward/CPU/4 thread(s)", + "value": 1097333, + "unit": "ns", + "extra": "gctime=0\nmemory=3174736\nallocs=136\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "mlp7layer_bn(tanh)(32 x 256)/forward/CPU/8 thread(s)", + "value": 939062.5, + "unit": "ns", + "extra": "gctime=0\nmemory=3174736\nallocs=136\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "mlp7layer_bn(tanh)(32 x 256)/forward/CPU/1 thread(s)", + "value": 2231792, + "unit": "ns", + "extra": "gctime=0\nmemory=3174736\nallocs=136\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "mlp7layer_bn(tanh)(32 x 256)/forward/GPU/CUDA", + "value": 574483.5, + "unit": "ns", + "extra": "gctime=0\nmemory=25664\nallocs=978\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "mlp7layer_bn(tanh)(32 x 256)/zygote/CPU/2 thread(s)", + "value": 2873417, + "unit": "ns", + "extra": "gctime=0\nmemory=14451296\nallocs=687\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "mlp7layer_bn(tanh)(32 x 256)/zygote/CPU/4 thread(s)", + "value": 2715208, + "unit": "ns", + "extra": "gctime=0\nmemory=14451296\nallocs=687\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "mlp7layer_bn(tanh)(32 x 256)/zygote/CPU/8 thread(s)", + "value": 2626645.5, + "unit": "ns", + "extra": "gctime=0\nmemory=14451296\nallocs=687\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "mlp7layer_bn(tanh)(32 x 256)/zygote/CPU/1 thread(s)", + "value": 3813542, + "unit": "ns", + "extra": "gctime=0\nmemory=14451296\nallocs=687\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "mlp7layer_bn(tanh)(32 x 256)/zygote/GPU/CUDA", + "value": 1401203, + "unit": "ns", + "extra": "gctime=0\nmemory=363360\nallocs=3667\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "mlp7layer_bn(tanh)(32 x 256)/enzyme/CPU/2 thread(s)", + "value": 8821895.5, + "unit": "ns", + "extra": "gctime=0\nmemory=9373552\nallocs=470\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "mlp7layer_bn(tanh)(32 x 256)/enzyme/CPU/4 thread(s)", + "value": 8770604, + "unit": "ns", + "extra": "gctime=0\nmemory=9373632\nallocs=475\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "mlp7layer_bn(tanh)(32 x 256)/enzyme/CPU/8 thread(s)", + "value": 8763666.5, + "unit": "ns", + "extra": "gctime=0\nmemory=9373712\nallocs=480\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "mlp7layer_bn(tanh)(32 x 256)/enzyme/CPU/1 thread(s)", + "value": 6350229.5, + "unit": "ns", + "extra": "gctime=0\nmemory=9373456\nallocs=464\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "Dense(16 => 16, relu)(16 x 128)/forward/CPU/2 thread(s)", + "value": 2250, + "unit": "ns", + "extra": "gctime=0\nmemory=8640\nallocs=5\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "Dense(16 => 16, relu)(16 x 128)/forward/CPU/4 thread(s)", + "value": 2583, + "unit": "ns", + "extra": "gctime=0\nmemory=8640\nallocs=5\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "Dense(16 => 16, relu)(16 x 128)/forward/CPU/8 thread(s)", + "value": 3333, + "unit": "ns", + "extra": "gctime=0\nmemory=8640\nallocs=5\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "Dense(16 => 16, relu)(16 x 128)/forward/CPU/1 thread(s)", + "value": 2583, + "unit": "ns", + "extra": "gctime=0\nmemory=8640\nallocs=5\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "Dense(16 => 16, relu)(16 x 128)/forward/GPU/CUDA", + "value": 24886, + "unit": "ns", + "extra": "gctime=0\nmemory=1056\nallocs=47\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "Dense(16 => 16, relu)(16 x 128)/zygote/CPU/2 thread(s)", + "value": 7292, + "unit": "ns", + "extra": "gctime=0\nmemory=36928\nallocs=32\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "Dense(16 => 16, relu)(16 x 128)/zygote/CPU/4 thread(s)", + "value": 7042, + "unit": "ns", + "extra": "gctime=0\nmemory=36928\nallocs=32\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "Dense(16 => 16, relu)(16 x 128)/zygote/CPU/8 thread(s)", + "value": 7375, + "unit": "ns", + "extra": "gctime=0\nmemory=36928\nallocs=32\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "Dense(16 => 16, relu)(16 x 128)/zygote/CPU/1 thread(s)", + "value": 6959, + "unit": "ns", + "extra": "gctime=0\nmemory=36928\nallocs=32\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "Dense(16 => 16, relu)(16 x 128)/zygote/GPU/CUDA", + "value": 184871.5, + "unit": "ns", + "extra": "gctime=0\nmemory=12728\nallocs=499\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "Dense(16 => 16, relu)(16 x 128)/enzyme/CPU/2 thread(s)", + "value": 8479.5, + "unit": "ns", + "extra": "gctime=0\nmemory=27040\nallocs=18\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "Dense(16 => 16, relu)(16 x 128)/enzyme/CPU/4 thread(s)", + "value": 8667, + "unit": "ns", + "extra": "gctime=0\nmemory=27040\nallocs=18\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "Dense(16 => 16, relu)(16 x 128)/enzyme/CPU/8 thread(s)", + "value": 8625, + "unit": "ns", + "extra": "gctime=0\nmemory=27040\nallocs=18\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "Dense(16 => 16, relu)(16 x 128)/enzyme/CPU/1 thread(s)", + "value": 6000, + "unit": "ns", + "extra": "gctime=0\nmemory=27040\nallocs=18\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "Dense(16 => 16, gelu)(16 x 128)/forward/CPU/2 thread(s)", + "value": 13291, + "unit": "ns", + "extra": "gctime=0\nmemory=8640\nallocs=5\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "Dense(16 => 16, gelu)(16 x 128)/forward/CPU/4 thread(s)", + "value": 13750, + "unit": "ns", + "extra": "gctime=0\nmemory=8640\nallocs=5\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "Dense(16 => 16, gelu)(16 x 128)/forward/CPU/8 thread(s)", + "value": 14521, + "unit": "ns", + "extra": "gctime=0\nmemory=8640\nallocs=5\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "Dense(16 => 16, gelu)(16 x 128)/forward/CPU/1 thread(s)", + "value": 13458, + "unit": "ns", + "extra": "gctime=0\nmemory=8640\nallocs=5\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "Dense(16 => 16, gelu)(16 x 128)/forward/GPU/CUDA", + "value": 25102, + "unit": "ns", + "extra": "gctime=0\nmemory=1056\nallocs=47\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "Dense(16 => 16, gelu)(16 x 128)/zygote/CPU/2 thread(s)", + "value": 29250, + "unit": "ns", + "extra": "gctime=0\nmemory=43648\nallocs=12\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "Dense(16 => 16, gelu)(16 x 128)/zygote/CPU/4 thread(s)", + "value": 28959, + "unit": "ns", + "extra": "gctime=0\nmemory=43648\nallocs=12\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "Dense(16 => 16, gelu)(16 x 128)/zygote/CPU/8 thread(s)", + "value": 29167, + "unit": "ns", + "extra": "gctime=0\nmemory=43648\nallocs=12\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "Dense(16 => 16, gelu)(16 x 128)/zygote/CPU/1 thread(s)", + "value": 29208.5, + "unit": "ns", + "extra": "gctime=0\nmemory=43648\nallocs=12\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "Dense(16 => 16, gelu)(16 x 128)/zygote/GPU/CUDA", + "value": 194866.5, + "unit": "ns", + "extra": "gctime=0\nmemory=14216\nallocs=576\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "Dense(16 => 16, gelu)(16 x 128)/enzyme/CPU/2 thread(s)", + "value": 43333, + "unit": "ns", + "extra": "gctime=0\nmemory=35296\nallocs=15\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "Dense(16 => 16, gelu)(16 x 128)/enzyme/CPU/4 thread(s)", + "value": 94750, + "unit": "ns", + "extra": "gctime=0\nmemory=35296\nallocs=15\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "Dense(16 => 16, gelu)(16 x 128)/enzyme/CPU/8 thread(s)", + "value": 93687.5, + "unit": "ns", + "extra": "gctime=0\nmemory=35296\nallocs=15\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "Dense(16 => 16, gelu)(16 x 128)/enzyme/CPU/1 thread(s)", + "value": 90834, + "unit": "ns", + "extra": "gctime=0\nmemory=35296\nallocs=15\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "Dense(128 => 128, identity)(128 x 128)/forward/CPU/2 thread(s)", + "value": 27916, + "unit": "ns", + "extra": "gctime=0\nmemory=65584\nallocs=2\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "Dense(128 => 128, identity)(128 x 128)/forward/CPU/4 thread(s)", + "value": 28500, + "unit": "ns", + "extra": "gctime=0\nmemory=65584\nallocs=2\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "Dense(128 => 128, identity)(128 x 128)/forward/CPU/8 thread(s)", + "value": 27166, + "unit": "ns", + "extra": "gctime=0\nmemory=65584\nallocs=2\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "Dense(128 => 128, identity)(128 x 128)/forward/CPU/1 thread(s)", + "value": 46166, + "unit": "ns", + "extra": "gctime=0\nmemory=65584\nallocs=2\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "Dense(128 => 128, identity)(128 x 128)/forward/GPU/CUDA", + "value": 26285, + "unit": "ns", + "extra": "gctime=0\nmemory=1056\nallocs=47\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "Dense(128 => 128, identity)(128 x 128)/zygote/CPU/2 thread(s)", + "value": 44541, + "unit": "ns", + "extra": "gctime=0\nmemory=263104\nallocs=14\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "Dense(128 => 128, identity)(128 x 128)/zygote/CPU/4 thread(s)", + "value": 44250, + "unit": "ns", + "extra": "gctime=0\nmemory=263104\nallocs=14\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "Dense(128 => 128, identity)(128 x 128)/zygote/CPU/8 thread(s)", + "value": 44666, + "unit": "ns", + "extra": "gctime=0\nmemory=263104\nallocs=14\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "Dense(128 => 128, identity)(128 x 128)/zygote/CPU/1 thread(s)", + "value": 63625, + "unit": "ns", + "extra": "gctime=0\nmemory=263104\nallocs=14\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "Dense(128 => 128, identity)(128 x 128)/zygote/GPU/CUDA", + "value": 167275, + "unit": "ns", + "extra": "gctime=0\nmemory=11216\nallocs=421\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "Dense(128 => 128, identity)(128 x 128)/enzyme/CPU/2 thread(s)", + "value": 68458, + "unit": "ns", + "extra": "gctime=0\nmemory=197616\nallocs=17\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "Dense(128 => 128, identity)(128 x 128)/enzyme/CPU/4 thread(s)", + "value": 68125, + "unit": "ns", + "extra": "gctime=0\nmemory=197616\nallocs=17\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "Dense(128 => 128, identity)(128 x 128)/enzyme/CPU/8 thread(s)", + "value": 68708, + "unit": "ns", + "extra": "gctime=0\nmemory=197616\nallocs=17\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "Dense(128 => 128, identity)(128 x 128)/enzyme/CPU/1 thread(s)", + "value": 68208, + "unit": "ns", + "extra": "gctime=0\nmemory=197600\nallocs=16\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "Dense(16 => 16, identity)(16 x 128)/forward/CPU/2 thread(s)", + "value": 1834, + "unit": "ns", + "extra": "gctime=0\nmemory=8448\nallocs=1\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "Dense(16 => 16, identity)(16 x 128)/forward/CPU/4 thread(s)", + "value": 2042, + "unit": "ns", + "extra": "gctime=0\nmemory=8448\nallocs=1\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "Dense(16 => 16, identity)(16 x 128)/forward/CPU/8 thread(s)", + "value": 2250, + "unit": "ns", + "extra": "gctime=0\nmemory=8448\nallocs=1\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "Dense(16 => 16, identity)(16 x 128)/forward/CPU/1 thread(s)", + "value": 1958, + "unit": "ns", + "extra": "gctime=0\nmemory=8448\nallocs=1\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "Dense(16 => 16, identity)(16 x 128)/forward/GPU/CUDA", + "value": 23492, + "unit": "ns", + "extra": "gctime=0\nmemory=1056\nallocs=47\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "Dense(16 => 16, identity)(16 x 128)/zygote/CPU/2 thread(s)", + "value": 5416, + "unit": "ns", + "extra": "gctime=0\nmemory=26752\nallocs=10\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "Dense(16 => 16, identity)(16 x 128)/zygote/CPU/4 thread(s)", + "value": 5333, + "unit": "ns", + "extra": "gctime=0\nmemory=26752\nallocs=10\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "Dense(16 => 16, identity)(16 x 128)/zygote/CPU/8 thread(s)", + "value": 5375, + "unit": "ns", + "extra": "gctime=0\nmemory=26752\nallocs=10\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "Dense(16 => 16, identity)(16 x 128)/zygote/CPU/1 thread(s)", + "value": 5291.5, + "unit": "ns", + "extra": "gctime=0\nmemory=26752\nallocs=10\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "Dense(16 => 16, identity)(16 x 128)/zygote/GPU/CUDA", + "value": 171557, + "unit": "ns", + "extra": "gctime=0\nmemory=11216\nallocs=421\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "Dense(16 => 16, identity)(16 x 128)/enzyme/CPU/2 thread(s)", + "value": 8312.5, + "unit": "ns", + "extra": "gctime=0\nmemory=18384\nallocs=13\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "Dense(16 => 16, identity)(16 x 128)/enzyme/CPU/4 thread(s)", + "value": 8250, + "unit": "ns", + "extra": "gctime=0\nmemory=18384\nallocs=13\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "Dense(16 => 16, identity)(16 x 128)/enzyme/CPU/8 thread(s)", + "value": 8208, + "unit": "ns", + "extra": "gctime=0\nmemory=18384\nallocs=13\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "Dense(16 => 16, identity)(16 x 128)/enzyme/CPU/1 thread(s)", + "value": 5667, + "unit": "ns", + "extra": "gctime=0\nmemory=18384\nallocs=13\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "Conv((3, 3), 32 => 32, gelu)(64 x 64 x 32 x 128)/forward/CPU/2 thread(s)", + "value": 106272125, + "unit": "ns", + "extra": "gctime=0\nmemory=71841200\nallocs=49\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "Conv((3, 3), 32 => 32, gelu)(64 x 64 x 32 x 128)/forward/CPU/4 thread(s)", + "value": 117220895.5, + "unit": "ns", + "extra": "gctime=0\nmemory=85128016\nallocs=59\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "Conv((3, 3), 32 => 32, gelu)(64 x 64 x 32 x 128)/forward/CPU/8 thread(s)", + "value": 123891541, + "unit": "ns", + "extra": "gctime=0\nmemory=111701648\nallocs=79\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "Conv((3, 3), 32 => 32, gelu)(64 x 64 x 32 x 128)/forward/CPU/1 thread(s)", + "value": 117462292, + "unit": "ns", + "extra": "gctime=288228.5\nmemory=67411936\nallocs=44\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "Conv((3, 3), 32 => 32, gelu)(64 x 64 x 32 x 128)/forward/GPU/CUDA", + "value": 2638590.5, + "unit": "ns", + "extra": "gctime=0\nmemory=6240\nallocs=216\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "Conv((3, 3), 32 => 32, gelu)(64 x 64 x 32 x 128)/zygote/CPU/2 thread(s)", + "value": 390984854, + "unit": "ns", + "extra": "gctime=19982749.5\nmemory=341219216\nallocs=136\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "Conv((3, 3), 32 => 32, gelu)(64 x 64 x 32 x 128)/zygote/CPU/4 thread(s)", + "value": 370181584, + "unit": "ns", + "extra": "gctime=20888584\nmemory=367792848\nallocs=156\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "Conv((3, 3), 32 => 32, gelu)(64 x 64 x 32 x 128)/zygote/CPU/8 thread(s)", + "value": 344393625, + "unit": "ns", + "extra": "gctime=0\nmemory=420940112\nallocs=196\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "Conv((3, 3), 32 => 32, gelu)(64 x 64 x 32 x 128)/zygote/CPU/1 thread(s)", + "value": 481330584, + "unit": "ns", + "extra": "gctime=20584416\nmemory=332360688\nallocs=126\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "Conv((3, 3), 32 => 32, gelu)(64 x 64 x 32 x 128)/zygote/GPU/CUDA", + "value": 15192721.5, + "unit": "ns", + "extra": "gctime=0\nmemory=31048\nallocs=1014\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "Conv((3, 3), 32 => 32, gelu)(64 x 64 x 32 x 128)/enzyme/CPU/2 thread(s)", + "value": 619409458, + "unit": "ns", + "extra": "gctime=21081375\nmemory=215259440\nallocs=140\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "Conv((3, 3), 32 => 32, gelu)(64 x 64 x 32 x 128)/enzyme/CPU/4 thread(s)", + "value": 668415479, + "unit": "ns", + "extra": "gctime=96589188\nmemory=241833072\nallocs=160\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "Conv((3, 3), 32 => 32, gelu)(64 x 64 x 32 x 128)/enzyme/CPU/8 thread(s)", + "value": 816519375, + "unit": "ns", + "extra": "gctime=72565999\nmemory=294980336\nallocs=200\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + }, + { + "name": "Conv((3, 3), 32 => 32, gelu)(64 x 64 x 32 x 128)/enzyme/CPU/1 thread(s)", + "value": 916595917, + "unit": "ns", + "extra": "gctime=130700916\nmemory=206400912\nallocs=130\nparams={\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":false,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":20,\"overhead\":0,\"memory_tolerance\":0.01}" + } + ] } ] }