Skip to content

Commit

Permalink
fix: add additional Wait for Ray API to be responsive
Browse files Browse the repository at this point in the history
This attempts to prevent the following test failure:

```
[ERROR] io.odh.test.e2e.standard.DistributedST.testDistributedWorkloadWithKueue -- Time elapsed: 402.4 s <<< ERROR!
java.io.IOException: HTTP/1.1 header parser received no bytes
```
  • Loading branch information
jiridanek committed Jun 27, 2024
1 parent 7c3ec64 commit 74051c2
Show file tree
Hide file tree
Showing 2 changed files with 24 additions and 2 deletions.
15 changes: 15 additions & 0 deletions src/main/java/io/odh/test/platform/RayClient.java
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@
import lombok.SneakyThrows;

import java.io.IOException;
import java.net.HttpURLConnection;
import java.net.URI;
import java.net.http.HttpClient;
import java.net.http.HttpRequest;
Expand Down Expand Up @@ -42,6 +43,20 @@ public RayClient(HttpClient httpClient, String baseUrl, String oauthToken) {
this.oauthToken = oauthToken;
}

/**
* Check that the API server is responsive and ready to accept jobs
*/
@SneakyThrows
public boolean isLive() {
HttpRequest request = buildRequest()
.uri(URI.create(baseUrl + "/api/jobs/"))
.method("HEAD", HttpRequest.BodyPublishers.noBody())
.build();
HttpResponse<Void> result = httpClient.send(request, HttpResponse.BodyHandlers.discarding());

return result.statusCode() == HttpURLConnection.HTTP_OK;
}

/**
* @return Job ID
*/
Expand Down
11 changes: 9 additions & 2 deletions src/test/java/io/odh/test/e2e/standard/DistributedST.java
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,7 @@
import io.skodjob.annotations.SuiteDoc;
import io.skodjob.annotations.TestDoc;
import io.skodjob.testframe.resources.KubeResourceManager;
import io.skodjob.testframe.wait.Wait;
import io.x_k8s.kueue.v1beta1.ClusterQueue;
import io.x_k8s.kueue.v1beta1.ClusterQueueBuilder;
import io.x_k8s.kueue.v1beta1.LocalQueue;
Expand All @@ -62,6 +63,8 @@
import java.util.concurrent.TimeUnit;
import java.util.function.Predicate;

import static io.odh.test.TestConstants.GLOBAL_POLL_INTERVAL_1_SEC;
import static io.odh.test.TestConstants.GLOBAL_STABILITY_TIME;
import static io.odh.test.TestConstants.GLOBAL_TIMEOUT;

@SuiteDoc(
Expand Down Expand Up @@ -325,11 +328,15 @@ void testDistributedWorkloadWithKueue() throws Exception {
.sslContext(TlsUtils.getSSLContextFromSecret(signingKey))
.build();

Allure.step("Wait for service availability");
Allure.step("Wait for OpenShift service availability");
TestUtils.waitForServiceNotUnavailable(httpClient, url);

RayClient ray = new RayClient(httpClient, url, oauthToken);

Allure.step("Wait for Ray API availability");
Wait.until("Ray API is available", GLOBAL_POLL_INTERVAL_1_SEC, GLOBAL_STABILITY_TIME, ray::isLive);

Allure.step("Run workload through Ray API", () -> {
RayClient ray = new RayClient(httpClient, url, oauthToken);
String jobId = ray.submitJob("expr 3 + 4");
ray.waitForJob(jobId);
String logs = ray.getJobLogs(jobId);
Expand Down

0 comments on commit 74051c2

Please sign in to comment.