From e468c07c18ccfdd61c7e5ce4fb13e5b97a58ac2b Mon Sep 17 00:00:00 2001 From: Fabrice Normandin Date: Wed, 20 Nov 2024 20:41:56 +0000 Subject: [PATCH] Update regression files Signed-off-by: Fabrice Normandin --- .../cifar10_jax_cnn_jax_image_classifier.yaml | 8 +- ...ifar10_jax_fcnet_jax_image_classifier.yaml | 8 +- ...on_mnist_jax_cnn_jax_image_classifier.yaml | 4 +- ..._mnist_jax_fcnet_jax_image_classifier.yaml | 8 +- .../cuda/llm_finetuning.yaml | 660 ++++++++++++++---- 5 files changed, 542 insertions(+), 146 deletions(-) diff --git a/.regression_files/project/algorithms/jax_image_classifier_test/test_backward_pass_is_reproducible/cifar10_jax_cnn_jax_image_classifier.yaml b/.regression_files/project/algorithms/jax_image_classifier_test/test_backward_pass_is_reproducible/cifar10_jax_cnn_jax_image_classifier.yaml index bdd5022e..ff422c2a 100644 --- a/.regression_files/project/algorithms/jax_image_classifier_test/test_backward_pass_is_reproducible/cifar10_jax_cnn_jax_image_classifier.yaml +++ b/.regression_files/project/algorithms/jax_image_classifier_test/test_backward_pass_is_reproducible/cifar10_jax_cnn_jax_image_classifier.yaml @@ -75,20 +75,20 @@ grads.network.params.5: grads.network.params.6: device: cuda:0 max: '2.984e-02' - mean: '-1.211e-09' + mean: '-5.588e-10' min: '-2.597e-02' shape: - 10 - sum: '-1.211e-08' + sum: '-5.588e-09' grads.network.params.7: device: cuda:0 max: '4.361e-02' - mean: '-3.26e-10' + mean: '-2.154e-10' min: '-4.662e-02' shape: - 256 - 10 - sum: '-8.345e-07' + sum: '-5.513e-07' outputs.logits: device: cuda:0 max: '9.608e-01' diff --git a/.regression_files/project/algorithms/jax_image_classifier_test/test_backward_pass_is_reproducible/cifar10_jax_fcnet_jax_image_classifier.yaml b/.regression_files/project/algorithms/jax_image_classifier_test/test_backward_pass_is_reproducible/cifar10_jax_fcnet_jax_image_classifier.yaml index ab334819..2fe6e1fa 100644 --- a/.regression_files/project/algorithms/jax_image_classifier_test/test_backward_pass_is_reproducible/cifar10_jax_fcnet_jax_image_classifier.yaml +++ b/.regression_files/project/algorithms/jax_image_classifier_test/test_backward_pass_is_reproducible/cifar10_jax_fcnet_jax_image_classifier.yaml @@ -37,20 +37,20 @@ grads.network.params.1: grads.network.params.2: device: cuda:0 max: '6.868e-02' - mean: '-7.451e-10' + mean: '0.e+00' min: '-3.458e-02' shape: - 10 - sum: '-7.451e-09' + sum: '0.e+00' grads.network.params.3: device: cuda:0 max: '1.497e-01' - mean: '-4.191e-10' + mean: '-2.445e-10' min: '-1.415e-01' shape: - 256 - 10 - sum: '-1.073e-06' + sum: '-6.258e-07' outputs.logits: device: cuda:0 max: '2.380e+00' diff --git a/.regression_files/project/algorithms/jax_image_classifier_test/test_backward_pass_is_reproducible/fashion_mnist_jax_cnn_jax_image_classifier.yaml b/.regression_files/project/algorithms/jax_image_classifier_test/test_backward_pass_is_reproducible/fashion_mnist_jax_cnn_jax_image_classifier.yaml index 97164706..7b7a7623 100644 --- a/.regression_files/project/algorithms/jax_image_classifier_test/test_backward_pass_is_reproducible/fashion_mnist_jax_cnn_jax_image_classifier.yaml +++ b/.regression_files/project/algorithms/jax_image_classifier_test/test_backward_pass_is_reproducible/fashion_mnist_jax_cnn_jax_image_classifier.yaml @@ -83,12 +83,12 @@ grads.network.params.6: grads.network.params.7: device: cuda:0 max: '1.382e-01' - mean: '-4.657e-10' + mean: '-1.775e-10' min: '-1.376e-01' shape: - 256 - 10 - sum: '-1.192e-06' + sum: '-4.545e-07' outputs.logits: device: cuda:0 max: '1.032e+00' diff --git a/.regression_files/project/algorithms/jax_image_classifier_test/test_backward_pass_is_reproducible/fashion_mnist_jax_fcnet_jax_image_classifier.yaml b/.regression_files/project/algorithms/jax_image_classifier_test/test_backward_pass_is_reproducible/fashion_mnist_jax_fcnet_jax_image_classifier.yaml index 91422898..7a36defc 100644 --- a/.regression_files/project/algorithms/jax_image_classifier_test/test_backward_pass_is_reproducible/fashion_mnist_jax_fcnet_jax_image_classifier.yaml +++ b/.regression_files/project/algorithms/jax_image_classifier_test/test_backward_pass_is_reproducible/fashion_mnist_jax_fcnet_jax_image_classifier.yaml @@ -37,20 +37,20 @@ grads.network.params.1: grads.network.params.2: device: cuda:0 max: '1.375e-01' - mean: '1.676e-09' + mean: '0.e+00' min: '-9.162e-02' shape: - 10 - sum: '1.676e-08' + sum: '0.e+00' grads.network.params.3: device: cuda:0 max: '3.990e-01' - mean: '2.328e-10' + mean: '-1.106e-10' min: '-2.054e-01' shape: - 256 - 10 - sum: '5.960e-07' + sum: '-2.831e-07' outputs.logits: device: cuda:0 max: '2.656e+00' diff --git a/.regression_files/project/algorithms/llm_finetuning_test/test_forward_pass_is_reproducible/cuda/llm_finetuning.yaml b/.regression_files/project/algorithms/llm_finetuning_test/test_forward_pass_is_reproducible/cuda/llm_finetuning.yaml index a75e1e85..41f33102 100644 --- a/.regression_files/project/algorithms/llm_finetuning_test/test_forward_pass_is_reproducible/cuda/llm_finetuning.yaml +++ b/.regression_files/project/algorithms/llm_finetuning_test/test_forward_pass_is_reproducible/cuda/llm_finetuning.yaml @@ -42,135 +42,531 @@ out.loss: min: '4.05e+00' shape: [] sum: '4.05e+00' -out.past_key_values: - '0': - '0': - device: cuda:0 - hash: -5597283837606595630 - max: '1.824e+00' - mean: '-3.677e-03' - min: '-2.004e+00' - shape: - - 8 - - 16 - - 256 - - 64 - sum: '-7.711e+03' - '1': - device: cuda:0 - hash: -5038052215002921505 - max: '1.91e-01' - mean: '6.668e-05' - min: '-1.719e-01' - shape: - - 8 - - 16 - - 256 - - 64 - sum: '1.398e+02' - length: 2 - '1': - '0': - device: cuda:0 - hash: 1296227023590222554 - max: '1.150e+01' - mean: '5.521e-03' - min: '-1.144e+01' - shape: - - 8 - - 16 - - 256 - - 64 - sum: '1.158e+04' - '1': - device: cuda:0 - hash: 7673183268564812739 - max: '4.35e+00' - mean: '2.593e-03' - min: '-4.527e+00' - shape: - - 8 - - 16 - - 256 - - 64 - sum: '5.439e+03' - length: 2 - '2': - '0': - device: cuda:0 - hash: 8593970087358618549 - max: '1.074e+01' - mean: '6.862e-02' - min: '-1.063e+01' - shape: - - 8 - - 16 - - 256 - - 64 - sum: '1.439e+05' - '1': - device: cuda:0 - hash: -4879008825285192049 - max: '4.396e+00' - mean: '2.223e-03' - min: '-4.462e+00' - shape: - - 8 - - 16 - - 256 - - 64 - sum: '4.662e+03' - length: 2 - '3': - '0': - device: cuda:0 - hash: -4641278451346103211 - max: '1.142e+01' - mean: '4.512e-02' - min: '-1.147e+01' - shape: - - 8 - - 16 - - 256 - - 64 - sum: '9.462e+04' - '1': - device: cuda:0 - hash: -1495399951870456760 - max: '4.416e+00' - mean: '-3.978e-04' - min: '-4.476e+00' - shape: - - 8 - - 16 - - 256 - - 64 - sum: '-8.342e+02' - length: 2 - '4': - '0': - device: cuda:0 - hash: -3802337921208132183 - max: '1.193e+01' - mean: '-3.041e-02' - min: '-1.091e+01' - shape: - - 8 - - 16 - - 256 - - 64 - sum: '-6.377e+04' - '1': - device: cuda:0 - hash: 9041939600569860586 - max: '4.839e+00' - mean: '-4.185e-04' - min: '-5.120e+00' - shape: - - 8 - - 16 - - 256 - - 64 - sum: '-8.776e+02' - length: 2 - length: 24 +out.past_key_values.0.0: + device: cuda:0 + max: '1.824e+00' + mean: '-3.677e-03' + min: '-2.004e+00' + shape: + - 8 + - 16 + - 256 + - 64 + sum: '-7.711e+03' +out.past_key_values.0.1: + device: cuda:0 + max: '1.91e-01' + mean: '6.668e-05' + min: '-1.719e-01' + shape: + - 8 + - 16 + - 256 + - 64 + sum: '1.398e+02' +out.past_key_values.1.0: + device: cuda:0 + max: '1.150e+01' + mean: '5.521e-03' + min: '-1.144e+01' + shape: + - 8 + - 16 + - 256 + - 64 + sum: '1.158e+04' +out.past_key_values.1.1: + device: cuda:0 + max: '4.35e+00' + mean: '2.593e-03' + min: '-4.527e+00' + shape: + - 8 + - 16 + - 256 + - 64 + sum: '5.439e+03' +out.past_key_values.10.0: + device: cuda:0 + max: '9.741e+00' + mean: '5.765e-02' + min: '-1.030e+01' + shape: + - 8 + - 16 + - 256 + - 64 + sum: '1.209e+05' +out.past_key_values.10.1: + device: cuda:0 + max: '5.526e+00' + mean: '1.023e-02' + min: '-5.248e+00' + shape: + - 8 + - 16 + - 256 + - 64 + sum: '2.145e+04' +out.past_key_values.11.0: + device: cuda:0 + max: '9.2e+00' + mean: '4.524e-02' + min: '-8.32e+00' + shape: + - 8 + - 16 + - 256 + - 64 + sum: '9.488e+04' +out.past_key_values.11.1: + device: cuda:0 + max: '4.676e+00' + mean: '7.994e-03' + min: '-4.337e+00' + shape: + - 8 + - 16 + - 256 + - 64 + sum: '1.676e+04' +out.past_key_values.12.0: + device: cuda:0 + max: '8.099e+00' + mean: '-4.339e-03' + min: '-8.358e+00' + shape: + - 8 + - 16 + - 256 + - 64 + sum: '-9.101e+03' +out.past_key_values.12.1: + device: cuda:0 + max: '5.357e+00' + mean: '7.804e-03' + min: '-5.152e+00' + shape: + - 8 + - 16 + - 256 + - 64 + sum: '1.637e+04' +out.past_key_values.13.0: + device: cuda:0 + max: '8.449e+00' + mean: '-9.491e-03' + min: '-8.29e+00' + shape: + - 8 + - 16 + - 256 + - 64 + sum: '-1.990e+04' +out.past_key_values.13.1: + device: cuda:0 + max: '4.555e+00' + mean: '3.872e-03' + min: '-5.178e+00' + shape: + - 8 + - 16 + - 256 + - 64 + sum: '8.120e+03' +out.past_key_values.14.0: + device: cuda:0 + max: '7.696e+00' + mean: '-4.042e-02' + min: '-8.394e+00' + shape: + - 8 + - 16 + - 256 + - 64 + sum: '-8.477e+04' +out.past_key_values.14.1: + device: cuda:0 + max: '5.031e+00' + mean: '3.803e-03' + min: '-5.123e+00' + shape: + - 8 + - 16 + - 256 + - 64 + sum: '7.976e+03' +out.past_key_values.15.0: + device: cuda:0 + max: '8.108e+00' + mean: '2.572e-02' + min: '-1.000e+01' + shape: + - 8 + - 16 + - 256 + - 64 + sum: '5.394e+04' +out.past_key_values.15.1: + device: cuda:0 + max: '4.85e+00' + mean: '-8.774e-03' + min: '-4.855e+00' + shape: + - 8 + - 16 + - 256 + - 64 + sum: '-1.840e+04' +out.past_key_values.16.0: + device: cuda:0 + max: '8.927e+00' + mean: '-1.676e-02' + min: '-8.144e+00' + shape: + - 8 + - 16 + - 256 + - 64 + sum: '-3.515e+04' +out.past_key_values.16.1: + device: cuda:0 + max: '4.793e+00' + mean: '-1.081e-02' + min: '-5.854e+00' + shape: + - 8 + - 16 + - 256 + - 64 + sum: '-2.268e+04' +out.past_key_values.17.0: + device: cuda:0 + max: '1.004e+01' + mean: '2.810e-02' + min: '-9.726e+00' + shape: + - 8 + - 16 + - 256 + - 64 + sum: '5.893e+04' +out.past_key_values.17.1: + device: cuda:0 + max: '5.284e+00' + mean: '5.285e-03' + min: '-5.681e+00' + shape: + - 8 + - 16 + - 256 + - 64 + sum: '1.108e+04' +out.past_key_values.18.0: + device: cuda:0 + max: '8.982e+00' + mean: '5.052e-02' + min: '-8.762e+00' + shape: + - 8 + - 16 + - 256 + - 64 + sum: '1.059e+05' +out.past_key_values.18.1: + device: cuda:0 + max: '4.748e+00' + mean: '-1.694e-03' + min: '-4.891e+00' + shape: + - 8 + - 16 + - 256 + - 64 + sum: '-3.554e+03' +out.past_key_values.19.0: + device: cuda:0 + max: '9.813e+00' + mean: '1.273e-02' + min: '-9.707e+00' + shape: + - 8 + - 16 + - 256 + - 64 + sum: '2.670e+04' +out.past_key_values.19.1: + device: cuda:0 + max: '4.619e+00' + mean: '-1.924e-02' + min: '-4.700e+00' + shape: + - 8 + - 16 + - 256 + - 64 + sum: '-4.036e+04' +out.past_key_values.2.0: + device: cuda:0 + max: '1.074e+01' + mean: '6.862e-02' + min: '-1.063e+01' + shape: + - 8 + - 16 + - 256 + - 64 + sum: '1.439e+05' +out.past_key_values.2.1: + device: cuda:0 + max: '4.396e+00' + mean: '2.223e-03' + min: '-4.462e+00' + shape: + - 8 + - 16 + - 256 + - 64 + sum: '4.662e+03' +out.past_key_values.20.0: + device: cuda:0 + max: '1.106e+01' + mean: '5.73e-02' + min: '-1.099e+01' + shape: + - 8 + - 16 + - 256 + - 64 + sum: '1.202e+05' +out.past_key_values.20.1: + device: cuda:0 + max: '4.813e+00' + mean: '6.246e-03' + min: '-5.477e+00' + shape: + - 8 + - 16 + - 256 + - 64 + sum: '1.31e+04' +out.past_key_values.21.0: + device: cuda:0 + max: '1.079e+01' + mean: '4.522e-02' + min: '-1.039e+01' + shape: + - 8 + - 16 + - 256 + - 64 + sum: '9.484e+04' +out.past_key_values.21.1: + device: cuda:0 + max: '4.631e+00' + mean: '1.379e-02' + min: '-4.818e+00' + shape: + - 8 + - 16 + - 256 + - 64 + sum: '2.891e+04' +out.past_key_values.22.0: + device: cuda:0 + max: '1.065e+01' + mean: '4.017e-02' + min: '-1.125e+01' + shape: + - 8 + - 16 + - 256 + - 64 + sum: '8.425e+04' +out.past_key_values.22.1: + device: cuda:0 + max: '5.105e+00' + mean: '5.328e-03' + min: '-4.445e+00' + shape: + - 8 + - 16 + - 256 + - 64 + sum: '1.117e+04' +out.past_key_values.23.0: + device: cuda:0 + max: '9.464e+00' + mean: '1.056e-02' + min: '-8.453e+00' + shape: + - 8 + - 16 + - 256 + - 64 + sum: '2.214e+04' +out.past_key_values.23.1: + device: cuda:0 + max: '4.379e+00' + mean: '-1.464e-03' + min: '-4.951e+00' + shape: + - 8 + - 16 + - 256 + - 64 + sum: '-3.069e+03' +out.past_key_values.3.0: + device: cuda:0 + max: '1.142e+01' + mean: '4.512e-02' + min: '-1.147e+01' + shape: + - 8 + - 16 + - 256 + - 64 + sum: '9.462e+04' +out.past_key_values.3.1: + device: cuda:0 + max: '4.416e+00' + mean: '-3.978e-04' + min: '-4.476e+00' + shape: + - 8 + - 16 + - 256 + - 64 + sum: '-8.342e+02' +out.past_key_values.4.0: + device: cuda:0 + max: '1.193e+01' + mean: '-3.041e-02' + min: '-1.091e+01' + shape: + - 8 + - 16 + - 256 + - 64 + sum: '-6.377e+04' +out.past_key_values.4.1: + device: cuda:0 + max: '4.839e+00' + mean: '-4.185e-04' + min: '-5.120e+00' + shape: + - 8 + - 16 + - 256 + - 64 + sum: '-8.776e+02' +out.past_key_values.5.0: + device: cuda:0 + max: '1.230e+01' + mean: '4.608e-02' + min: '-1.164e+01' + shape: + - 8 + - 16 + - 256 + - 64 + sum: '9.664e+04' +out.past_key_values.5.1: + device: cuda:0 + max: '5.191e+00' + mean: '1.398e-03' + min: '-4.402e+00' + shape: + - 8 + - 16 + - 256 + - 64 + sum: '2.932e+03' +out.past_key_values.6.0: + device: cuda:0 + max: '1.248e+01' + mean: '6.588e-03' + min: '-1.322e+01' + shape: + - 8 + - 16 + - 256 + - 64 + sum: '1.382e+04' +out.past_key_values.6.1: + device: cuda:0 + max: '4.148e+00' + mean: '5.169e-03' + min: '-4.295e+00' + shape: + - 8 + - 16 + - 256 + - 64 + sum: '1.084e+04' +out.past_key_values.7.0: + device: cuda:0 + max: '1.326e+01' + mean: '-1.400e-02' + min: '-1.272e+01' + shape: + - 8 + - 16 + - 256 + - 64 + sum: '-2.936e+04' +out.past_key_values.7.1: + device: cuda:0 + max: '4.043e+00' + mean: '5.246e-03' + min: '-3.823e+00' + shape: + - 8 + - 16 + - 256 + - 64 + sum: '1.100e+04' +out.past_key_values.8.0: + device: cuda:0 + max: '1.329e+01' + mean: '1.543e-02' + min: '-1.222e+01' + shape: + - 8 + - 16 + - 256 + - 64 + sum: '3.235e+04' +out.past_key_values.8.1: + device: cuda:0 + max: '4.179e+00' + mean: '-1.275e-03' + min: '-4.191e+00' + shape: + - 8 + - 16 + - 256 + - 64 + sum: '-2.674e+03' +out.past_key_values.9.0: + device: cuda:0 + max: '1.514e+01' + mean: '-1.051e-01' + min: '-1.701e+01' + shape: + - 8 + - 16 + - 256 + - 64 + sum: '-2.204e+05' +out.past_key_values.9.1: + device: cuda:0 + max: '4.456e+00' + mean: '3.825e-04' + min: '-4.440e+00' + shape: + - 8 + - 16 + - 256 + - 64 + sum: '8.022e+02'