Skip to content

Commit

Permalink
Raise Training operator timeout to 60 minutes
Browse files Browse the repository at this point in the history
  • Loading branch information
sutaakar authored and openshift-merge-bot[bot] committed Aug 9, 2024
1 parent bfc80ac commit ec54bd7
Showing 1 changed file with 2 additions and 2 deletions.
4 changes: 2 additions & 2 deletions tests/kfto/core/kfto_kueue_sft_GPU_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -98,7 +98,7 @@ func runMultiGpuPytorchjob(t *testing.T, modelConfigFile string) {
if IsOpenShift(test) {
// Check that GPUs were utilized recently
// That itself doesn't guarantee that PyTorchJob generated the load in GPU, but is the best we can achieve for now
test.Eventually(openShiftPrometheusGpuUtil(test, namespace), 30*time.Minute).
test.Eventually(openShiftPrometheusGpuUtil(test, namespace), 60*time.Minute).
Should(
And(
HaveLen(numberOfGpus),
Expand All @@ -111,7 +111,7 @@ func runMultiGpuPytorchjob(t *testing.T, modelConfigFile string) {
}

// Make sure the PyTorch job succeed
test.Eventually(PytorchJob(test, namespace, tuningJob.Name), 30*time.Minute).Should(WithTransform(PytorchJobConditionSucceeded, Equal(corev1.ConditionTrue)))
test.Eventually(PytorchJob(test, namespace, tuningJob.Name), 60*time.Minute).Should(WithTransform(PytorchJobConditionSucceeded, Equal(corev1.ConditionTrue)))
test.T().Logf("PytorchJob %s/%s ran successfully", tuningJob.Namespace, tuningJob.Name)
}

Expand Down

0 comments on commit ec54bd7

Please sign in to comment.