Skip to content

Commit

Permalink
support to increase block size to 16k
Browse files Browse the repository at this point in the history
  • Loading branch information
sarthakaggarwal97 committed Jan 26, 2024
1 parent 9f649e0 commit 21f71af
Show file tree
Hide file tree
Showing 3 changed files with 212 additions and 9 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,6 @@

import org.apache.logging.log4j.Logger;
import org.apache.lucene.codecs.Codec;
import org.apache.lucene.codecs.lucene99.Lucene99Codec;
import org.apache.lucene.codecs.lucene99.Lucene99Codec.Mode;
import org.opensearch.common.Nullable;
import org.opensearch.common.collect.MapBuilder;
Expand Down Expand Up @@ -68,15 +67,15 @@ public CodecService(@Nullable MapperService mapperService, IndexSettings indexSe
final MapBuilder<String, Codec> codecs = MapBuilder.<String, Codec>newMapBuilder();
assert null != indexSettings;
if (mapperService == null) {
codecs.put(DEFAULT_CODEC, new Lucene99Codec());
codecs.put(LZ4, new Lucene99Codec());
codecs.put(BEST_COMPRESSION_CODEC, new Lucene99Codec(Mode.BEST_COMPRESSION));
codecs.put(ZLIB, new Lucene99Codec(Mode.BEST_COMPRESSION));
codecs.put(DEFAULT_CODEC, new Lucene99CoreCodec());
codecs.put(LZ4, new Lucene99CoreCodec());
codecs.put(BEST_COMPRESSION_CODEC, new Lucene99CoreCodec(Mode.BEST_COMPRESSION));
codecs.put(ZLIB, new Lucene99CoreCodec(Mode.BEST_COMPRESSION));
} else {
codecs.put(DEFAULT_CODEC, new PerFieldMappingPostingFormatCodec(Mode.BEST_SPEED, mapperService, logger));
codecs.put(LZ4, new PerFieldMappingPostingFormatCodec(Mode.BEST_SPEED, mapperService, logger));
codecs.put(BEST_COMPRESSION_CODEC, new PerFieldMappingPostingFormatCodec(Mode.BEST_COMPRESSION, mapperService, logger));
codecs.put(ZLIB, new PerFieldMappingPostingFormatCodec(Mode.BEST_COMPRESSION, mapperService, logger));
codecs.put(DEFAULT_CODEC, new Lucene99CoreCodec(Mode.BEST_SPEED, mapperService, logger));
codecs.put(LZ4, new Lucene99CoreCodec(Mode.BEST_SPEED, mapperService, logger));
codecs.put(BEST_COMPRESSION_CODEC, new Lucene99CoreCodec(Mode.BEST_COMPRESSION, mapperService, logger));
codecs.put(ZLIB, new Lucene99CoreCodec(Mode.BEST_COMPRESSION, mapperService, logger));
}
codecs.put(LUCENE_DEFAULT_CODEC, Codec.getDefault());
for (String codec : Codec.availableCodecs()) {
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,41 @@
/*
* SPDX-License-Identifier: Apache-2.0
*
* The OpenSearch Contributors require contributions made to
* this file be licensed under the Apache-2.0 license or a
* compatible open source license.
*/

package org.opensearch.index.codec;

import org.apache.logging.log4j.Logger;
import org.apache.lucene.codecs.FilterCodec;
import org.apache.lucene.codecs.StoredFieldsFormat;
import org.apache.lucene.codecs.lucene99.Lucene99Codec;
import org.opensearch.index.mapper.MapperService;

public class Lucene99CoreCodec extends FilterCodec {

private final StoredFieldsFormat storedFieldsFormat;

public Lucene99CoreCodec() {
super("Lucene99", new Lucene99Codec());
storedFieldsFormat = new Lucene99CoreStoredFieldsFormat();
}

public Lucene99CoreCodec(Lucene99Codec.Mode mode) {
super("Lucene99", new Lucene99Codec(mode));
storedFieldsFormat = new Lucene99CoreStoredFieldsFormat(mode);
}

@Override
public StoredFieldsFormat storedFieldsFormat() {
return storedFieldsFormat;
}

public Lucene99CoreCodec(Lucene99Codec.Mode mode, MapperService mapperService, Logger logger) {
super("Lucene99", new PerFieldMappingPostingFormatCodec(mode, mapperService, logger));
this.storedFieldsFormat = new Lucene99CoreStoredFieldsFormat(mode);
}

}
Original file line number Diff line number Diff line change
@@ -0,0 +1,163 @@
/*
* SPDX-License-Identifier: Apache-2.0
*
* The OpenSearch Contributors require contributions made to
* this file be licensed under the Apache-2.0 license or a
* compatible open source license.
*/

package org.opensearch.index.codec;

import org.apache.lucene.codecs.StoredFieldsFormat;
import org.apache.lucene.codecs.StoredFieldsReader;
import org.apache.lucene.codecs.StoredFieldsWriter;
import org.apache.lucene.codecs.compressing.CompressionMode;
import org.apache.lucene.codecs.lucene90.DeflateWithPresetDictCompressionMode;
import org.apache.lucene.codecs.lucene90.LZ4WithPresetDictCompressionMode;
import org.apache.lucene.codecs.lucene90.Lucene90StoredFieldsFormat;
import org.apache.lucene.codecs.lucene90.compressing.Lucene90CompressingStoredFieldsFormat;
import org.apache.lucene.codecs.lucene99.Lucene99Codec;
import org.apache.lucene.index.FieldInfos;
import org.apache.lucene.index.SegmentInfo;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.IOContext;

import java.io.IOException;
import java.util.Objects;

/**
* Stored field format used by pluggable codec
*/
public class Lucene99CoreStoredFieldsFormat extends Lucene90StoredFieldsFormat {

/**
* A key that we use to map to a mode
*/
public static final String MODE_KEY = Lucene99CoreStoredFieldsFormat.class.getSimpleName() + ".mode";

private final Lucene99Codec.Mode mode;

/**
* default constructor
*/
public Lucene99CoreStoredFieldsFormat() {
this(Lucene99Codec.Mode.BEST_SPEED);
}

/**
* Creates a new instance.
*
* @param mode The mode represents ZSTD or ZSTDNODICT
*/

public Lucene99CoreStoredFieldsFormat(Lucene99Codec.Mode mode) {
this.mode = Objects.requireNonNull(mode);
}

/**
* Returns a {@link StoredFieldsReader} to load stored fields.
*
* @param directory The index directory.
* @param si The SegmentInfo that stores segment information.
* @param fn The fieldInfos.
* @param context The IOContext that holds additional details on the merge/search context.
*/
@Override
public StoredFieldsReader fieldsReader(Directory directory, SegmentInfo si, FieldInfos fn, IOContext context) throws IOException {

if (si.getAttribute(Lucene90StoredFieldsFormat.MODE_KEY) != null) {
String value = si.getAttribute(Lucene90StoredFieldsFormat.MODE_KEY);
Lucene90StoredFieldsFormat.Mode mode = Lucene90StoredFieldsFormat.Mode.valueOf(value);
return impl(mode).fieldsReader(directory, si, fn, context);
} else if (si.getAttribute(MODE_KEY) != null) {
String value = si.getAttribute(MODE_KEY);
Lucene99Codec.Mode mode = Lucene99Codec.Mode.valueOf(value);
return impl(mode).fieldsReader(directory, si, fn, context);
} else {
throw new IllegalStateException("missing value for " + MODE_KEY + " for segment: " + si.name);
}

}

private StoredFieldsFormat impl(Lucene90StoredFieldsFormat.Mode mode) {
switch (mode) {
case BEST_SPEED:
return getLZ4CompressingStoredFieldsFormat();
case BEST_COMPRESSION:
return getZlibCompressingStoredFieldsFormat();
default:
throw new AssertionError();
}
}

/**
* Returns a {@link StoredFieldsReader} to write stored fields.
*
* @param directory The index directory.
* @param si The SegmentInfo that stores segment information.
* @param context The IOContext that holds additional details on the merge/search context.
*/

@Override
public StoredFieldsWriter fieldsWriter(Directory directory, SegmentInfo si, IOContext context) throws IOException {
String previous = si.putAttribute(MODE_KEY, mode.name());
if (previous != null && previous.equals(mode.name()) == false) {
throw new IllegalStateException(
"found existing value for " + MODE_KEY + " for segment: " + si.name + "old=" + previous + ", new=" + mode.name()
);
}
return impl(mode).fieldsWriter(directory, si, context);
}

StoredFieldsFormat impl(Lucene99Codec.Mode mode) {
switch (mode) {
case BEST_SPEED:
return getLZ4CompressingStoredFieldsFormat();
case BEST_COMPRESSION:
return getZlibCompressingStoredFieldsFormat();
default:
throw new AssertionError();
}
}

public Lucene99Codec.Mode getMode() {
return mode;
}

// Shoot for 10 sub blocks of 48kB each.
private static final int BEST_COMPRESSION_BLOCK_LENGTH = 10 * 48 * 1024;

/**
* Compression mode for {@link Lucene90StoredFieldsFormat.Mode#BEST_COMPRESSION}
*/
public static final CompressionMode BEST_COMPRESSION_MODE = new DeflateWithPresetDictCompressionMode();

// Shoot for 10 sub blocks of 8kB each.
private static final int BEST_SPEED_BLOCK_LENGTH = 10 * 16 * 1024;

/**
* Compression mode for {@link Lucene90StoredFieldsFormat.Mode#BEST_SPEED}
*/
public static final CompressionMode BEST_SPEED_MODE = new LZ4WithPresetDictCompressionMode();

private StoredFieldsFormat getLZ4CompressingStoredFieldsFormat() {
return new Lucene90CompressingStoredFieldsFormat(
"Lucene90StoredFieldsFastData",
BEST_SPEED_MODE,
BEST_SPEED_BLOCK_LENGTH,
1024,
10
);
}

private StoredFieldsFormat getZlibCompressingStoredFieldsFormat() {
return new Lucene90CompressingStoredFieldsFormat(
"Lucene90StoredFieldsHighData",
BEST_COMPRESSION_MODE,
BEST_COMPRESSION_BLOCK_LENGTH,
4096,
10
);
}

}

0 comments on commit 21f71af

Please sign in to comment.