Skip to content

Commit

Permalink
Adding sorted numeric dv for dims and metrics
Browse files Browse the repository at this point in the history
Signed-off-by: Bharathwaj G <[email protected]>
  • Loading branch information
bharath-techie committed Feb 16, 2024
1 parent e48e60c commit 197d659
Show file tree
Hide file tree
Showing 9 changed files with 499 additions and 43 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@
import org.apache.lucene.codecs.CodecUtil;
import org.apache.lucene.codecs.DocValuesConsumer;
import org.apache.lucene.codecs.DocValuesProducer;
import org.apache.lucene.index.DocValues;
import org.apache.lucene.index.DocValuesType;
import org.apache.lucene.index.DocsWithFieldSet;
import org.apache.lucene.index.EmptyDocValuesProducer;
Expand All @@ -34,6 +35,8 @@
import org.apache.lucene.index.VectorEncoding;
import org.apache.lucene.index.VectorSimilarityFunction;
import org.apache.lucene.store.IndexOutput;
import org.apache.lucene.util.ArrayUtil;
import org.apache.lucene.util.Counter;
import org.apache.lucene.util.IOUtils;
import org.apache.lucene.util.packed.PackedInts;
import org.apache.lucene.util.packed.PackedLongValues;
Expand All @@ -42,6 +45,7 @@
import org.opensearch.index.codec.freshstartree.aggregator.AggregationFunctionType;
import org.opensearch.index.codec.freshstartree.aggregator.ValueAggregator;
import org.opensearch.index.codec.freshstartree.aggregator.ValueAggregatorFactory;
import org.opensearch.index.codec.freshstartree.codec.SortedNumericDocValuesWriter;
import org.opensearch.index.codec.freshstartree.codec.StarTreeAggregatedValues;
import org.opensearch.index.codec.freshstartree.node.StarTreeNode;
import org.opensearch.index.codec.freshstartree.util.BufferedAggregatedDocValues;
Expand Down Expand Up @@ -151,7 +155,7 @@ public abstract class BaseSingleTreeBuilder {
}

// TODO : Removing hardcoding
_maxLeafRecords = 10000; // builderConfig.getMaxLeafRecords();
_maxLeafRecords = 1000; // builderConfig.getMaxLeafRecords();
}

private void constructStarTree(StarTreeBuilderUtils.TreeNode node, int startDocId, int endDocId) throws IOException {
Expand Down Expand Up @@ -257,12 +261,118 @@ public void build(Iterator<Record> recordIterator, boolean isMerge) throws IOExc
logger.info("Finished creating aggregated documents, got aggregated records : {}", numAggregatedRecords);

// Create doc values indices in disk
createDocValuesIndices(_docValuesConsumer);
createSortedDocValuesIndices(_docValuesConsumer);

// Serialize and save in disk
StarTreeBuilderUtils.serializeTree(indexOutput, _rootNode, _dimensionsSplitOrder, _numNodes);
}

private void createSortedDocValuesIndices(DocValuesConsumer docValuesConsumer) throws IOException {
List<SortedNumericDocValuesWriter> dimWriterList = new ArrayList<>();
List<SortedNumericDocValuesWriter> metricWriterList = new ArrayList<>();
FieldInfo[] dimFieldInfoArr = new FieldInfo[_dimensionReaders.length];
FieldInfo[] metricFieldInfoArr = new FieldInfo[_metricReaders.length];
int fieldNum = 0;
for (int i = 0; i < _dimensionReaders.length; i++) {
final FieldInfo fi = new FieldInfo(
_dimensionsSplitOrder[i] + "_dim",
fieldNum,
false,
false,
true,
IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS,
DocValuesType.SORTED_NUMERIC,
-1,
Collections.emptyMap(),
0,
0,
0,
0,
VectorEncoding.FLOAT32,
VectorSimilarityFunction.EUCLIDEAN,
false
);
dimFieldInfoArr[i] = fi;
final SortedNumericDocValuesWriter w = new SortedNumericDocValuesWriter(fi, Counter.newCounter());
dimWriterList.add(w);
fieldNum++;
}
for (int i = 0; i < _metricReaders.length; i++) {
FieldInfo fi = new FieldInfo(
_metrics[i] + "_metric",
fieldNum,
false,
false,
true,
IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS,
DocValuesType.SORTED_NUMERIC,
-1,
Collections.emptyMap(),
0,
0,
0,
0,
VectorEncoding.FLOAT32,
VectorSimilarityFunction.EUCLIDEAN,
false
);
final SortedNumericDocValuesWriter w = new SortedNumericDocValuesWriter(fi, Counter.newCounter());
metricWriterList.add(w);
metricFieldInfoArr[i] = fi;
fieldNum++;
}

for (int docId = 0; docId < _numDocs; docId++) {
Record record = getStarTreeRecord(docId);
for (int i = 0; i < record._dimensions.length; i++) {
long val = record._dimensions[i];
dimWriterList.get(i).addValue(docId, val);
}
for (int i = 0; i < record._metrics.length; i++) {
switch (_valueAggregators[i].getAggregatedValueType()) {
case LONG:
long val = (long) record._metrics[i];
metricWriterList.get(i).addValue(docId, val);
break;
// TODO: support this
case DOUBLE:
// double doubleval = (double) record._metrics[i];
// break;
case FLOAT:
case INT:
default:
throw new IllegalStateException("Unsupported value type");
}
}
}

for (int i = 0; i < _dimensionReaders.length; i++) {
final int finalI = i;
DocValuesProducer a1 = new EmptyDocValuesProducer() {
@Override
public SortedNumericDocValues getSortedNumeric(FieldInfo field) throws IOException {

return dimWriterList.get(finalI).getSortedNumericDocValues();
}
};
docValuesConsumer.addSortedNumericField(dimFieldInfoArr[i], a1);
}

for (int i = 0; i < _metricReaders.length; i++) {
final int finalI = i;
DocValuesProducer a1 = new EmptyDocValuesProducer() {
@Override
public SortedNumericDocValues getSortedNumeric(FieldInfo field) throws IOException {

return metricWriterList.get(finalI).getSortedNumericDocValues();
}
};
docValuesConsumer.addSortedNumericField(metricFieldInfoArr[i], a1);
}


}

private void createDocValuesIndices(DocValuesConsumer docValuesConsumer) throws IOException {
PackedLongValues.Builder[] pendingDimArr = new PackedLongValues.Builder[_dimensionReaders.length];
PackedLongValues.Builder[] pendingMetricArr = new PackedLongValues.Builder[_metricReaders.length];
Expand All @@ -280,7 +390,7 @@ private void createDocValuesIndices(DocValuesConsumer docValuesConsumer) throws
false,
true,
IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS,
DocValuesType.NUMERIC,
DocValuesType.SORTED_NUMERIC,
-1,
Collections.emptyMap(),
0,
Expand All @@ -303,7 +413,7 @@ private void createDocValuesIndices(DocValuesConsumer docValuesConsumer) throws
false,
true,
IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS,
DocValuesType.NUMERIC,
DocValuesType.SORTED_NUMERIC,
-1,
Collections.emptyMap(),
0,
Expand Down Expand Up @@ -347,24 +457,24 @@ private void createDocValuesIndices(DocValuesConsumer docValuesConsumer) throws
final int finalI = i;
DocValuesProducer a1 = new EmptyDocValuesProducer() {
@Override
public NumericDocValues getNumeric(FieldInfo field) throws IOException {
public SortedNumericDocValues getSortedNumeric(FieldInfo field) throws IOException {

return new BufferedAggregatedDocValues(pendingDimArr[finalI].build(), docsWithField.iterator());
return DocValues.singleton(new BufferedAggregatedDocValues(pendingDimArr[finalI].build(), docsWithField.iterator()));
}
};
docValuesConsumer.addNumericField(dimFieldInfoArr[i], a1);
docValuesConsumer.addSortedNumericField(dimFieldInfoArr[i], a1);
}

for (int i = 0; i < _metricReaders.length; i++) {
final int finalI = i;
DocValuesProducer a1 = new EmptyDocValuesProducer() {
@Override
public NumericDocValues getNumeric(FieldInfo field) throws IOException {
public SortedNumericDocValues getSortedNumeric(FieldInfo field) throws IOException {

return new BufferedAggregatedDocValues(pendingMetricArr[finalI].build(), docsWithField.iterator());
return DocValues.singleton(new BufferedAggregatedDocValues(pendingDimArr[finalI].build(), docsWithField.iterator()));
}
};
docValuesConsumer.addNumericField(metricFieldInfoArr[i], a1);
docValuesConsumer.addSortedNumericField(metricFieldInfoArr[i], a1);
}
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -129,12 +129,14 @@ private Iterator<Record> mergeRecords(List<StarTreeAggregatedValues> aggrList) t
while (!endOfDoc) {
long[] dims = new long[starTree.dimensionValues.size()];
int i = 0;
for (Map.Entry<String, NumericDocValues> dimValue : starTree.dimensionValues.entrySet()) {
endOfDoc = dimValue.getValue().nextDoc() == DocIdSetIterator.NO_MORE_DOCS || dimValue.getValue().longValue() == -1;
for (Map.Entry<String, SortedNumericDocValues> dimValue : starTree.dimensionValues.entrySet()) {
int doc = dimValue.getValue().nextDoc();
long val = dimValue.getValue().nextValue();

endOfDoc = doc == DocIdSetIterator.NO_MORE_DOCS || val == -1;
if (endOfDoc) {
break;
}
long val = dimValue.getValue().longValue();
dims[i] = val;
i++;
}
Expand All @@ -143,9 +145,9 @@ private Iterator<Record> mergeRecords(List<StarTreeAggregatedValues> aggrList) t
}
i = 0;
Object[] metrics = new Object[starTree.metricValues.size()];
for (Map.Entry<String, NumericDocValues> metricValue : starTree.metricValues.entrySet()) {
for (Map.Entry<String, SortedNumericDocValues> metricValue : starTree.metricValues.entrySet()) {
metricValue.getValue().nextDoc();
metrics[i] = metricValue.getValue().longValue();
metrics[i] = metricValue.getValue().nextValue();
i++;
}
Record record = new Record(dims, metrics);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -58,12 +58,13 @@ private Iterator<Record> mergeRecords(List<StarTreeAggregatedValues> aggrList) t
while (!endOfDoc) {
long[] dims = new long[starTree.dimensionValues.size()];
int i = 0;
for (Map.Entry<String, NumericDocValues> dimValue : starTree.dimensionValues.entrySet()) {
endOfDoc = dimValue.getValue().nextDoc() == DocIdSetIterator.NO_MORE_DOCS || dimValue.getValue().longValue() == -1;
for (Map.Entry<String, SortedNumericDocValues> dimValue : starTree.dimensionValues.entrySet()) {
int doc = dimValue.getValue().nextDoc();
long val = dimValue.getValue().nextValue();
endOfDoc = doc == DocIdSetIterator.NO_MORE_DOCS || val == -1;
if (endOfDoc) {
break;
}
long val = dimValue.getValue().longValue();
dims[i] = val;
i++;
}
Expand All @@ -72,9 +73,9 @@ private Iterator<Record> mergeRecords(List<StarTreeAggregatedValues> aggrList) t
}
i = 0;
Object[] metrics = new Object[starTree.metricValues.size()];
for (Map.Entry<String, NumericDocValues> metricValue : starTree.metricValues.entrySet()) {
for (Map.Entry<String, SortedNumericDocValues> metricValue : starTree.metricValues.entrySet()) {
metricValue.getValue().nextDoc();
metrics[i] = metricValue.getValue().longValue();
metrics[i] = metricValue.getValue().nextValue();
i++;
}
BaseSingleTreeBuilder.Record record = new BaseSingleTreeBuilder.Record(dims, metrics);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -174,7 +174,7 @@ public FieldInfo[] getFieldInfoArr() {
false,
true,
IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS,
DocValuesType.NUMERIC,
DocValuesType.SORTED_NUMERIC,
-1,
Collections.emptyMap(),
0,
Expand All @@ -195,7 +195,7 @@ public FieldInfo[] getFieldInfoArr() {
false,
true,
IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS,
DocValuesType.NUMERIC,
DocValuesType.SORTED_NUMERIC,
-1,
Collections.emptyMap(),
0,
Expand Down
Loading

0 comments on commit 197d659

Please sign in to comment.