Skip to content

Commit

Permalink
Add exponential scaling FNV composite value hash algorithm for remote…
Browse files Browse the repository at this point in the history
… path

Signed-off-by: Ashish Singh <[email protected]>
  • Loading branch information
ashking94 committed Apr 12, 2024
1 parent 645b1f1 commit b2ec3ef
Show file tree
Hide file tree
Showing 9 changed files with 213 additions and 45 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -272,13 +272,13 @@ public void testRemoteStoreCustomDataOnIndexCreationAndRestore() {
.get();
assertEquals(RestStatus.ACCEPTED, restoreSnapshotResponse.status());
ensureGreen(restoredIndexName1version2);
validatePathType(restoredIndexName1version2, PathType.HASHED_PREFIX, PathHashAlgorithm.FNV_1A);
validatePathType(restoredIndexName1version2, PathType.HASHED_PREFIX, PathHashAlgorithm.FNV_1A_BASE64);

// Create index with cluster setting cluster.remote_store.index.path.prefix.type as hashed_prefix.
indexSettings = getIndexSettings(1, 0).build();
createIndex(indexName2, indexSettings);
ensureGreen(indexName2);
validatePathType(indexName2, PathType.HASHED_PREFIX, PathHashAlgorithm.FNV_1A);
validatePathType(indexName2, PathType.HASHED_PREFIX, PathHashAlgorithm.FNV_1A_BASE64);

// Validating that custom data has not changed for indexes which were created before the cluster setting got updated
validatePathType(indexName1, PathType.FIXED);
Expand Down Expand Up @@ -309,7 +309,7 @@ public void testRemoteStoreCustomDataOnIndexCreationAndRestore() {
ensureGreen(indexName2);

// Validating that custom data has not changed for testindex2 which was created before the cluster setting got updated
validatePathType(indexName2, PathType.HASHED_PREFIX, PathHashAlgorithm.FNV_1A);
validatePathType(indexName2, PathType.HASHED_PREFIX, PathHashAlgorithm.FNV_1A_BASE64);
}

private void validatePathType(String index, PathType pathType) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,8 @@
import static java.util.Collections.unmodifiableMap;
import static org.opensearch.index.remote.RemoteStoreEnums.DataType.DATA;
import static org.opensearch.index.remote.RemoteStoreEnums.DataType.METADATA;
import static org.opensearch.index.remote.RemoteStoreUtils.longToCompositeUrlBase64AndBinaryEncodingUsing20Bits;
import static org.opensearch.index.remote.RemoteStoreUtils.longToUrlBase64;

/**
* This class contains the different enums related to remote store like data categories and types, path types
Expand Down Expand Up @@ -216,13 +218,26 @@ public static PathType parseString(String pathType) {
@PublicApi(since = "2.14.0")
public enum PathHashAlgorithm {

FNV_1A(0) {
FNV_1A_BASE64(0) {
@Override
String hash(PathInput pathInput) {
String input = pathInput.indexUUID() + pathInput.shardId() + pathInput.dataCategory().getName() + pathInput.dataType()
.getName();
long hash = FNV1a.hash64(input);
return RemoteStoreUtils.longToUrlBase64(hash);
return longToUrlBase64(hash);
}
},
/**
* This hash algorithm will generate a hash value which will use 1st 6 bits to create bas64 character and next 14
* bits to create binary string.
*/
FNV_1A_COMPOSITE(1) {
@Override
String hash(PathInput pathInput) {
String input = pathInput.indexUUID() + pathInput.shardId() + pathInput.dataCategory().getName() + pathInput.dataType()
.getName();
long hash = FNV1a.hash64(input);
return longToCompositeUrlBase64AndBinaryEncodingUsing20Bits(hash);
}
};

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -39,7 +39,7 @@ public RemoteStorePathStrategy get() {
// Min node version check ensures that we are enabling the new prefix type only when all the nodes understand it.
pathType = Version.CURRENT.compareTo(minNodeVersionSupplier.get()) <= 0 ? type : PathType.FIXED;
// If the path type is fixed, hash algorithm is not applicable.
pathHashAlgorithm = pathType == PathType.FIXED ? null : PathHashAlgorithm.FNV_1A;
pathHashAlgorithm = pathType == PathType.FIXED ? null : PathHashAlgorithm.FNV_1A_BASE64;
return new RemoteStorePathStrategy(pathType, pathHashAlgorithm);
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -10,11 +10,14 @@

import org.opensearch.common.collect.Tuple;

import java.math.BigInteger;
import java.nio.ByteBuffer;
import java.util.Arrays;
import java.util.Base64;
import java.util.Collections;
import java.util.HashMap;
import java.util.List;
import java.util.Locale;
import java.util.Map;
import java.util.function.Function;

Expand All @@ -26,10 +29,26 @@
public class RemoteStoreUtils {
public static final int LONG_MAX_LENGTH = String.valueOf(Long.MAX_VALUE).length();

/**
* URL safe base 64 character set. This must not be changed as this is used in deriving the base64 equivalent of binary.
*/
private static final char[] URL_BASE64_CHARSET = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-_".toCharArray();

private static Map<Character, Integer> BASE64_CHARSET_IDX_MAP;

static {
Map<Character, Integer> charToIndexMap = new HashMap<>();
for (int i = 0; i < URL_BASE64_CHARSET.length; i++) {
charToIndexMap.put(URL_BASE64_CHARSET[i], i);
}
BASE64_CHARSET_IDX_MAP = Collections.unmodifiableMap(charToIndexMap);
}

/**
* This method subtracts given numbers from Long.MAX_VALUE and returns a string representation of the result.
* The resultant string is guaranteed to be of the same length that of Long.MAX_VALUE. If shorter, we add left padding
* of 0s to the string.
*
* @param num number to get the inverted long string for
* @return String value of Long.MAX_VALUE - num
*/
Expand All @@ -46,6 +65,7 @@ public static String invertLong(long num) {

/**
* This method converts the given string into long and subtracts it from Long.MAX_VALUE
*
* @param str long in string format to be inverted
* @return long value of the invert result
*/
Expand All @@ -59,6 +79,7 @@ public static long invertLong(String str) {

/**
* Extracts the segment name from the provided segment file name
*
* @param filename Segment file name to parse
* @return Name of the segment that the segment file belongs to
*/
Expand All @@ -79,10 +100,9 @@ public static String getSegmentName(String filename) {
}

/**
*
* @param mdFiles List of segment/translog metadata files
* @param fn Function to extract PrimaryTerm_Generation and Node Id from metadata file name .
* fn returns null if node id is not part of the file name
* @param fn Function to extract PrimaryTerm_Generation and Node Id from metadata file name .
* fn returns null if node id is not part of the file name
*/
public static void verifyNoMultipleWriters(List<String> mdFiles, Function<String, Tuple<String, String>> fn) {
Map<String, String> nodesByPrimaryTermAndGen = new HashMap<>();
Expand Down Expand Up @@ -116,4 +136,43 @@ static String longToUrlBase64(long value) {
String base64Str = Base64.getUrlEncoder().encodeToString(hashBytes);
return base64Str.substring(0, base64Str.length() - 1);
}

static long urlBase64ToLong(String base64Str) {
byte[] hashBytes = Base64.getUrlDecoder().decode(base64Str);
return ByteBuffer.wrap(hashBytes).getLong();
}

/**
* Converts an input hash which occupies 64 bits of memory into a composite encoded string. The string will have 2 parts -
* 1. Base 64 string and 2. Binary String. We will use the first 6 bits for creating the base 64 string.
* For the second part, we will use the next 14 bits. For eg - A010001010100010.
*/
static String longToCompositeUrlBase64AndBinaryEncodingUsing20Bits(long value) {
return longToCompositeBase64AndBinaryEncoding(value, 20);
}

/**
* Converts an input hash which occupies 64 bits of memory into a composite encoded string. The string will have 2 parts -
* 1. Base 64 string and 2. Binary String. We will use the first 6 bits for creating the base 64 string.
* For the second part, the rest of the bits will be used as is in string form.
*/
static String longToCompositeBase64AndBinaryEncoding(long value, int len) {
if (len < 7 || len > 64) {
throw new IllegalArgumentException("In longToCompositeBase64AndBinaryEncoding, len must be between 7 and 64 (both inclusive)");
}
String binaryEncoding = String.format(Locale.ROOT, "%64s", Long.toBinaryString(value)).replace(' ', '0');
String base64Part = binaryEncoding.substring(0, 6);
String binaryPart = binaryEncoding.substring(6, len);
int base64DecimalValue = Integer.valueOf(base64Part, 2);
assert base64DecimalValue >= 0 && base64DecimalValue < 64;
return URL_BASE64_CHARSET[base64DecimalValue] + binaryPart;
}

static long compositeUrlBase64BinaryEncodingToLong(String encodedValue) {
char ch = encodedValue.charAt(0);
int base64BitsIntValue = BASE64_CHARSET_IDX_MAP.get(ch);
String base64PartBinary = Integer.toBinaryString(base64BitsIntValue);
String binaryString = base64PartBinary + encodedValue.substring(1);
return new BigInteger(binaryString, 2).longValue();
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -1607,7 +1607,7 @@ public void testRemoteCustomData() {
validateRemoteCustomData(
indexMetadata.getCustomData(IndexMetadata.REMOTE_STORE_CUSTOM_KEY),
PathHashAlgorithm.NAME,
PathHashAlgorithm.FNV_1A.name()
PathHashAlgorithm.FNV_1A_BASE64.name()
);
}

Expand Down
Loading

0 comments on commit b2ec3ef

Please sign in to comment.