-
Notifications
You must be signed in to change notification settings - Fork 1.9k
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
[Star Tree] Lucene Abstractions for Star Tree File Formats (#15278)
--------- Signed-off-by: Sarthak Aggarwal <[email protected]>
- Loading branch information
1 parent
ed65482
commit 9e5604b
Showing
17 changed files
with
588 additions
and
7 deletions.
There are no files selected for viewing
46 changes: 46 additions & 0 deletions
46
server/src/main/java/org/apache/lucene/codecs/lucene90/Lucene90DocValuesConsumerWrapper.java
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,46 @@ | ||
/* | ||
* SPDX-License-Identifier: Apache-2.0 | ||
* | ||
* The OpenSearch Contributors require contributions made to | ||
* this file be licensed under the Apache-2.0 license or a | ||
* compatible open source license. | ||
*/ | ||
|
||
package org.apache.lucene.codecs.lucene90; | ||
|
||
import org.apache.lucene.codecs.DocValuesConsumer; | ||
import org.apache.lucene.index.SegmentWriteState; | ||
|
||
import java.io.Closeable; | ||
import java.io.IOException; | ||
|
||
/** | ||
* This class is an abstraction of the {@link DocValuesConsumer} for the Star Tree index structure. | ||
* It is responsible to consume various types of document values (numeric, binary, sorted, sorted numeric, | ||
* and sorted set) for fields in the Star Tree index. | ||
* | ||
* @opensearch.experimental | ||
*/ | ||
public class Lucene90DocValuesConsumerWrapper implements Closeable { | ||
|
||
private final Lucene90DocValuesConsumer lucene90DocValuesConsumer; | ||
|
||
public Lucene90DocValuesConsumerWrapper( | ||
SegmentWriteState state, | ||
String dataCodec, | ||
String dataExtension, | ||
String metaCodec, | ||
String metaExtension | ||
) throws IOException { | ||
lucene90DocValuesConsumer = new Lucene90DocValuesConsumer(state, dataCodec, dataExtension, metaCodec, metaExtension); | ||
} | ||
|
||
public Lucene90DocValuesConsumer getLucene90DocValuesConsumer() { | ||
return lucene90DocValuesConsumer; | ||
} | ||
|
||
@Override | ||
public void close() throws IOException { | ||
lucene90DocValuesConsumer.close(); | ||
} | ||
} |
46 changes: 46 additions & 0 deletions
46
server/src/main/java/org/apache/lucene/codecs/lucene90/Lucene90DocValuesProducerWrapper.java
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,46 @@ | ||
/* | ||
* SPDX-License-Identifier: Apache-2.0 | ||
* | ||
* The OpenSearch Contributors require contributions made to | ||
* this file be licensed under the Apache-2.0 license or a | ||
* compatible open source license. | ||
*/ | ||
|
||
package org.apache.lucene.codecs.lucene90; | ||
|
||
import org.apache.lucene.codecs.DocValuesProducer; | ||
import org.apache.lucene.index.SegmentReadState; | ||
|
||
import java.io.Closeable; | ||
import java.io.IOException; | ||
|
||
/** | ||
* This class is a custom abstraction of the {@link DocValuesProducer} for the Star Tree index structure. | ||
* It is responsible for providing access to various types of document values (numeric, binary, sorted, sorted numeric, | ||
* and sorted set) for fields in the Star Tree index. | ||
* | ||
* @opensearch.experimental | ||
*/ | ||
public class Lucene90DocValuesProducerWrapper implements Closeable { | ||
|
||
private final Lucene90DocValuesProducer lucene90DocValuesProducer; | ||
|
||
public Lucene90DocValuesProducerWrapper( | ||
SegmentReadState state, | ||
String dataCodec, | ||
String dataExtension, | ||
String metaCodec, | ||
String metaExtension | ||
) throws IOException { | ||
lucene90DocValuesProducer = new Lucene90DocValuesProducer(state, dataCodec, dataExtension, metaCodec, metaExtension); | ||
} | ||
|
||
public DocValuesProducer getLucene90DocValuesProducer() { | ||
return lucene90DocValuesProducer; | ||
} | ||
|
||
@Override | ||
public void close() throws IOException { | ||
lucene90DocValuesProducer.close(); | ||
} | ||
} |
53 changes: 53 additions & 0 deletions
53
server/src/main/java/org/apache/lucene/index/SortedNumericDocValuesWriterWrapper.java
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,53 @@ | ||
/* | ||
* SPDX-License-Identifier: Apache-2.0 | ||
* | ||
* The OpenSearch Contributors require contributions made to | ||
* this file be licensed under the Apache-2.0 license or a | ||
* compatible open source license. | ||
*/ | ||
|
||
package org.apache.lucene.index; | ||
|
||
import org.apache.lucene.util.Counter; | ||
|
||
/** | ||
* A wrapper class for writing sorted numeric doc values. | ||
* <p> | ||
* This class provides a convenient way to add sorted numeric doc values to a field | ||
* and retrieve the corresponding {@link SortedNumericDocValues} instance. | ||
* | ||
* @opensearch.experimental | ||
*/ | ||
public class SortedNumericDocValuesWriterWrapper { | ||
|
||
private final SortedNumericDocValuesWriter sortedNumericDocValuesWriter; | ||
|
||
/** | ||
* Sole constructor. Constructs a new {@link SortedNumericDocValuesWriterWrapper} instance. | ||
* | ||
* @param fieldInfo the field information for the field being written | ||
* @param counter a counter for tracking memory usage | ||
*/ | ||
public SortedNumericDocValuesWriterWrapper(FieldInfo fieldInfo, Counter counter) { | ||
sortedNumericDocValuesWriter = new SortedNumericDocValuesWriter(fieldInfo, counter); | ||
} | ||
|
||
/** | ||
* Adds a value to the sorted numeric doc values for the specified document. | ||
* | ||
* @param docID the document ID | ||
* @param value the value to add | ||
*/ | ||
public void addValue(int docID, long value) { | ||
sortedNumericDocValuesWriter.addValue(docID, value); | ||
} | ||
|
||
/** | ||
* Returns the {@link SortedNumericDocValues} instance containing the sorted numeric doc values | ||
* | ||
* @return the {@link SortedNumericDocValues} instance | ||
*/ | ||
public SortedNumericDocValues getDocValues() { | ||
return sortedNumericDocValuesWriter.getDocValues(); | ||
} | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
50 changes: 50 additions & 0 deletions
50
...er/src/main/java/org/opensearch/index/codec/composite/LuceneDocValuesConsumerFactory.java
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,50 @@ | ||
/* | ||
* SPDX-License-Identifier: Apache-2.0 | ||
* | ||
* The OpenSearch Contributors require contributions made to | ||
* this file be licensed under the Apache-2.0 license or a | ||
* compatible open source license. | ||
*/ | ||
|
||
package org.opensearch.index.codec.composite; | ||
|
||
import org.apache.lucene.codecs.DocValuesConsumer; | ||
import org.apache.lucene.codecs.lucene90.Lucene90DocValuesConsumerWrapper; | ||
import org.apache.lucene.index.SegmentWriteState; | ||
|
||
import java.io.IOException; | ||
|
||
/** | ||
* A factory class that provides a factory method for creating {@link DocValuesConsumer} instances | ||
* for the latest composite codec. | ||
* <p> | ||
* The segments are written using the latest composite codec. The codec | ||
* internally manages calling the appropriate consumer factory for its abstractions. | ||
* <p> | ||
* This design ensures forward compatibility for writing operations | ||
* | ||
* @opensearch.experimental | ||
*/ | ||
public class LuceneDocValuesConsumerFactory { | ||
|
||
public static DocValuesConsumer getDocValuesConsumerForCompositeCodec( | ||
SegmentWriteState state, | ||
String dataCodec, | ||
String dataExtension, | ||
String metaCodec, | ||
String metaExtension | ||
) throws IOException { | ||
try ( | ||
Lucene90DocValuesConsumerWrapper lucene90DocValuesConsumerWrapper = new Lucene90DocValuesConsumerWrapper( | ||
state, | ||
dataCodec, | ||
dataExtension, | ||
metaCodec, | ||
metaExtension | ||
) | ||
) { | ||
return lucene90DocValuesConsumerWrapper.getLucene90DocValuesConsumer(); | ||
} | ||
} | ||
|
||
} |
60 changes: 60 additions & 0 deletions
60
...er/src/main/java/org/opensearch/index/codec/composite/LuceneDocValuesProducerFactory.java
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,60 @@ | ||
/* | ||
* SPDX-License-Identifier: Apache-2.0 | ||
* | ||
* The OpenSearch Contributors require contributions made to | ||
* this file be licensed under the Apache-2.0 license or a | ||
* compatible open source license. | ||
*/ | ||
|
||
package org.opensearch.index.codec.composite; | ||
|
||
import org.apache.lucene.codecs.DocValuesProducer; | ||
import org.apache.lucene.codecs.lucene90.Lucene90DocValuesProducerWrapper; | ||
import org.apache.lucene.index.SegmentReadState; | ||
import org.opensearch.index.codec.composite.composite99.Composite99Codec; | ||
|
||
import java.io.IOException; | ||
|
||
/** | ||
* A factory class that provides a factory method for creating {@link DocValuesProducer} instances | ||
* based on the specified composite codec. | ||
* <p> | ||
* In producers, we want to ensure compatibility with older codec versions during the segment reads. | ||
* This approach allows for writing with only the latest codec while maintaining | ||
* the ability to read data encoded with any codec version present in the segment. | ||
* <p> | ||
* This design ensures backward compatibility for reads across different codec versions. | ||
* | ||
* @opensearch.experimental | ||
*/ | ||
public class LuceneDocValuesProducerFactory { | ||
|
||
public static DocValuesProducer getDocValuesProducerForCompositeCodec( | ||
String compositeCodec, | ||
SegmentReadState state, | ||
String dataCodec, | ||
String dataExtension, | ||
String metaCodec, | ||
String metaExtension | ||
) throws IOException { | ||
|
||
switch (compositeCodec) { | ||
case Composite99Codec.COMPOSITE_INDEX_CODEC_NAME: | ||
try ( | ||
Lucene90DocValuesProducerWrapper lucene90DocValuesProducerWrapper = new Lucene90DocValuesProducerWrapper( | ||
state, | ||
dataCodec, | ||
dataExtension, | ||
metaCodec, | ||
metaExtension | ||
) | ||
) { | ||
return lucene90DocValuesProducerWrapper.getLucene90DocValuesProducer(); | ||
} | ||
default: | ||
throw new IllegalStateException("Invalid composite codec " + "[" + compositeCodec + "]"); | ||
} | ||
|
||
} | ||
|
||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
12 changes: 12 additions & 0 deletions
12
server/src/main/java/org/opensearch/index/codec/composite/composite99/package-info.java
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,12 @@ | ||
/* | ||
* SPDX-License-Identifier: Apache-2.0 | ||
* | ||
* The OpenSearch Contributors require contributions made to | ||
* this file be licensed under the Apache-2.0 license or a | ||
* compatible open source license. | ||
*/ | ||
|
||
/** | ||
* Responsible for handling all composite index codecs and operations associated with Composite99 codec | ||
*/ | ||
package org.opensearch.index.codec.composite.composite99; |
2 changes: 1 addition & 1 deletion
2
server/src/main/resources/META-INF/services/org.apache.lucene.codecs.Codec
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1 +1 @@ | ||
org.opensearch.index.codec.composite.Composite99Codec | ||
org.opensearch.index.codec.composite.composite99.Composite99Codec |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Oops, something went wrong.