From 60afeb7b05323f5626b395dc8ffd07eb5d773c15 Mon Sep 17 00:00:00 2001 From: Ankit Kala Date: Thu, 15 Jun 2023 17:54:28 +0530 Subject: [PATCH] Enforce 512 byte document ID limit in bulk updates (#8039) * Enforce doc id limit on UpdateRequests as well Signed-off-by: Ankit Kala * PR comments Signed-off-by: Ankit Kala * Address comments 2 Signed-off-by: Ankit Kala * Move changelog entry Signed-off-by: Andrew Ross --------- Signed-off-by: Ankit Kala Signed-off-by: Andrew Ross Co-authored-by: Andrew Ross --- CHANGELOG.md | 1 + .../action/bulk/BulkIntegrationIT.java | 35 ++++++++++++++ .../opensearch/action/DocWriteRequest.java | 20 ++++++++ .../opensearch/action/index/IndexRequest.java | 8 +--- .../action/update/UpdateRequest.java | 3 ++ .../action/bulk/BulkRequestTests.java | 46 +++++++++++++++++++ 6 files changed, 106 insertions(+), 7 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index e532ab57a470c..b964dc5844757 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -127,6 +127,7 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/), ### Fixed - Fixing error: adding a new/forgotten parameter to the configuration for checking the config on startup in plugins/repository-s3 #7924 +- Enforce 512 byte document ID limit in bulk updates ([#8039](https://github.com/opensearch-project/OpenSearch/pull/8039)) ### Security diff --git a/server/src/internalClusterTest/java/org/opensearch/action/bulk/BulkIntegrationIT.java b/server/src/internalClusterTest/java/org/opensearch/action/bulk/BulkIntegrationIT.java index 2489fc4894742..ec8d02a8c57b7 100644 --- a/server/src/internalClusterTest/java/org/opensearch/action/bulk/BulkIntegrationIT.java +++ b/server/src/internalClusterTest/java/org/opensearch/action/bulk/BulkIntegrationIT.java @@ -43,6 +43,7 @@ import org.opensearch.action.ingest.PutPipelineRequest; import org.opensearch.action.support.master.AcknowledgedResponse; import org.opensearch.action.support.replication.ReplicationRequest; +import org.opensearch.action.update.UpdateRequest; import org.opensearch.cluster.metadata.IndexMetadata; import org.opensearch.common.bytes.BytesReference; import org.opensearch.core.xcontent.XContentBuilder; @@ -218,4 +219,38 @@ public void testDeleteIndexWhileIndexing() throws Exception { assertFalse(thread.isAlive()); } + public void testDocIdTooLong() { + String index = "testing"; + createIndex(index); + String validId = String.join("", Collections.nCopies(512, "a")); + String invalidId = String.join("", Collections.nCopies(513, "a")); + + // Index Request + IndexRequest indexRequest = new IndexRequest(index).source(Collections.singletonMap("foo", "baz")); + // Valid id shouldn't throw any exception + assertFalse(client().prepareBulk().add(indexRequest.id(validId)).get().hasFailures()); + // Invalid id should throw the ActionRequestValidationException + validateDocIdLimit(() -> client().prepareBulk().add(indexRequest.id(invalidId)).get()); + + // Update Request + UpdateRequest updateRequest = new UpdateRequest(index, validId).doc("reason", "no source"); + // Valid id shouldn't throw any exception + assertFalse(client().prepareBulk().add(updateRequest).get().hasFailures()); + // Invalid id should throw the ActionRequestValidationException + validateDocIdLimit(() -> client().prepareBulk().add(updateRequest.id(invalidId)).get()); + } + + private void validateDocIdLimit(Runnable runner) { + try { + runner.run(); + fail("Request validation for docId didn't fail"); + } catch (ActionRequestValidationException e) { + assertEquals( + 1, + e.validationErrors().stream().filter(msg -> msg.contains("is too long, must be no longer than 512 bytes but was")).count() + ); + } catch (Exception e) { + fail("Request validation for docId failed with different exception: " + e); + } + } } diff --git a/server/src/main/java/org/opensearch/action/DocWriteRequest.java b/server/src/main/java/org/opensearch/action/DocWriteRequest.java index ed59b5e95a01f..2810452cd583d 100644 --- a/server/src/main/java/org/opensearch/action/DocWriteRequest.java +++ b/server/src/main/java/org/opensearch/action/DocWriteRequest.java @@ -32,6 +32,7 @@ package org.opensearch.action; import org.apache.lucene.util.Accountable; +import org.apache.lucene.util.UnicodeUtil; import org.opensearch.action.delete.DeleteRequest; import org.opensearch.action.index.IndexRequest; import org.opensearch.action.support.IndicesOptions; @@ -248,6 +249,25 @@ static DocWriteRequest readDocumentRequest(@Nullable ShardId shardId, StreamI return docWriteRequest; } + /** + * Validates whether the doc id length is under the limit. + * @param id DocId to verify + * @param validationException containing all the validation errors. + * @return validationException + */ + static ActionRequestValidationException validateDocIdLength(String id, ActionRequestValidationException validationException) { + if (id != null) { + int docIdLength = UnicodeUtil.calcUTF16toUTF8Length(id, 0, id.length()); + if (docIdLength > 512) { + return addValidationError( + "id [" + id + "] is too long, must be no longer than 512 bytes but was: " + docIdLength, + validationException + ); + } + } + return validationException; + } + /** write a document write (index/delete/update) request*/ static void writeDocumentRequest(StreamOutput out, DocWriteRequest request) throws IOException { if (request instanceof IndexRequest) { diff --git a/server/src/main/java/org/opensearch/action/index/IndexRequest.java b/server/src/main/java/org/opensearch/action/index/IndexRequest.java index 2182b8d1ffa4c..1cce14ef447f5 100644 --- a/server/src/main/java/org/opensearch/action/index/IndexRequest.java +++ b/server/src/main/java/org/opensearch/action/index/IndexRequest.java @@ -65,7 +65,6 @@ import org.opensearch.index.shard.ShardId; import java.io.IOException; -import java.nio.charset.StandardCharsets; import java.util.Locale; import java.util.Map; import java.util.Objects; @@ -228,12 +227,7 @@ public ActionRequestValidationException validate() { validationException = DocWriteRequest.validateSeqNoBasedCASParams(this, validationException); - if (id != null && id.getBytes(StandardCharsets.UTF_8).length > 512) { - validationException = addValidationError( - "id [" + id + "] is too long, must be no longer than 512 bytes but was: " + id.getBytes(StandardCharsets.UTF_8).length, - validationException - ); - } + validationException = DocWriteRequest.validateDocIdLength(id, validationException); if (pipeline != null && pipeline.isEmpty()) { validationException = addValidationError("pipeline cannot be an empty string", validationException); diff --git a/server/src/main/java/org/opensearch/action/update/UpdateRequest.java b/server/src/main/java/org/opensearch/action/update/UpdateRequest.java index 56913b1ec5915..315ec47e30296 100644 --- a/server/src/main/java/org/opensearch/action/update/UpdateRequest.java +++ b/server/src/main/java/org/opensearch/action/update/UpdateRequest.java @@ -231,6 +231,9 @@ public ActionRequestValidationException validate() { if (doc == null && docAsUpsert) { validationException = addValidationError("doc must be specified if doc_as_upsert is enabled", validationException); } + + validationException = DocWriteRequest.validateDocIdLength(id, validationException); + return validationException; } diff --git a/server/src/test/java/org/opensearch/action/bulk/BulkRequestTests.java b/server/src/test/java/org/opensearch/action/bulk/BulkRequestTests.java index 94e866f959e95..1be82e045ec4c 100644 --- a/server/src/test/java/org/opensearch/action/bulk/BulkRequestTests.java +++ b/server/src/test/java/org/opensearch/action/bulk/BulkRequestTests.java @@ -53,6 +53,7 @@ import java.io.IOException; import java.nio.charset.StandardCharsets; import java.util.ArrayList; +import java.util.Collections; import java.util.HashMap; import java.util.List; import java.util.Map; @@ -272,6 +273,51 @@ public void testBulkRequestWithRefresh() throws Exception { ); } + public void testBulkRequestInvalidDocIDDuringCreate() { + String validDocID = String.join("", Collections.nCopies(512, "a")); + String invalidDocID = String.join("", Collections.nCopies(513, "a")); + + // doc id length under limit + IndexRequest indexRequest = new IndexRequest("index").id(validDocID).source(Requests.INDEX_CONTENT_TYPE, "field", "value"); + BulkRequest bulkRequest = new BulkRequest(); + bulkRequest.add(indexRequest); + assertNull(bulkRequest.validate()); + + // doc id length over limit + indexRequest.id(invalidDocID); + ActionRequestValidationException validate = bulkRequest.validate(); + assertThat(validate, notNullValue()); + assertEquals( + 1, + validate.validationErrors() + .stream() + .filter(msg -> msg.contains("is too long, must be no longer than 512 bytes but was: ")) + .count() + ); + } + + public void testBulkRequestInvalidDocIDDuringUpdate() { + String validDocID = String.join("", Collections.nCopies(512, "a")); + String invalidDocID = String.join("", Collections.nCopies(513, "a")); + // doc id length under limit + UpdateRequest updateRequest = new UpdateRequest("index", validDocID).doc("reason", "no source"); + BulkRequest bulkRequest = new BulkRequest(); + bulkRequest.add(updateRequest); + assertNull(bulkRequest.validate()); + + // doc id length over limit + updateRequest.id(invalidDocID); + ActionRequestValidationException validate = bulkRequest.validate(); + assertThat(validate, notNullValue()); + assertEquals( + 1, + validate.validationErrors() + .stream() + .filter(msg -> msg.contains("is too long, must be no longer than 512 bytes but was: ")) + .count() + ); + } + // issue 15120 public void testBulkNoSource() throws Exception { BulkRequest bulkRequest = new BulkRequest();