diff --git a/server/build.gradle b/server/build.gradle index 9c409d77363cb..b8c4b5380a6a9 100644 --- a/server/build.gradle +++ b/server/build.gradle @@ -96,6 +96,12 @@ if (!isEclipse) { manifest.attributes('Multi-Release': 'true') } } +repositories { + flatDir { + dirs 'lib' + } +} + dependencies { @@ -107,7 +113,6 @@ dependencies { api project(":libs:opensearch-geo") api project(":libs:opensearch-telemetry") - compileOnly project(':libs:opensearch-plugin-classloader') testRuntimeOnly project(':libs:opensearch-plugin-classloader') diff --git a/server/src/main/java/org/opensearch/action/search/SearchStarTreeAction.java b/server/src/main/java/org/opensearch/action/search/SearchStarTreeAction.java new file mode 100644 index 0000000000000..4fe3ff1a97a42 --- /dev/null +++ b/server/src/main/java/org/opensearch/action/search/SearchStarTreeAction.java @@ -0,0 +1,2 @@ +package org.opensearch.action.search;public class SearchStarTreeAction { +} diff --git a/server/src/main/java/org/opensearch/action/search/SearchStarTreeResponse.java b/server/src/main/java/org/opensearch/action/search/SearchStarTreeResponse.java new file mode 100644 index 0000000000000..764fc9616d041 --- /dev/null +++ b/server/src/main/java/org/opensearch/action/search/SearchStarTreeResponse.java @@ -0,0 +1,2 @@ +package org.opensearch.action.search;public class SearchStarTreeResponse { +} diff --git a/server/src/main/java/org/opensearch/action/search/TransportSearchStarTreeAction.java b/server/src/main/java/org/opensearch/action/search/TransportSearchStarTreeAction.java new file mode 100644 index 0000000000000..baa113997f243 --- /dev/null +++ b/server/src/main/java/org/opensearch/action/search/TransportSearchStarTreeAction.java @@ -0,0 +1,133 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +package org.opensearch.action.search; + +import org.apache.logging.log4j.message.ParameterizedMessage; +import org.opensearch.action.StepListener; +import org.opensearch.action.support.ActionFilters; +import org.opensearch.action.support.HandledTransportAction; +import org.opensearch.cluster.service.ClusterService; +import org.opensearch.common.inject.Inject; +import org.opensearch.common.unit.TimeValue; +import org.opensearch.core.action.ActionListener; +import org.opensearch.core.common.io.stream.NamedWriteableRegistry; +import org.opensearch.core.common.io.stream.StreamInput; +import org.opensearch.core.common.io.stream.StreamOutput; +import org.opensearch.core.index.shard.ShardId; +import org.opensearch.search.SearchPhaseResult; +import org.opensearch.search.internal.ShardSearchContextId; +import org.opensearch.tasks.Task; +import org.opensearch.transport.TransportRequest; +import org.opensearch.transport.TransportService; + +import java.io.IOException; +import java.util.Arrays; + +/** + * Transport action for creating PIT reader context + */ +public class TransportCreatePitAction extends HandledTransportAction { + + public static final String CREATE_PIT_ACTION = "create_pit"; + private final TransportService transportService; + private final SearchTransportService searchTransportService; + private final ClusterService clusterService; + private final TransportSearchAction transportSearchAction; + private final NamedWriteableRegistry namedWriteableRegistry; + private final CreatePitController createPitController; + + @Inject + public TransportCreatePitAction( + TransportService transportService, + ActionFilters actionFilters, + SearchTransportService searchTransportService, + ClusterService clusterService, + TransportSearchAction transportSearchAction, + NamedWriteableRegistry namedWriteableRegistry, + CreatePitController createPitController + ) { + super(CreatePitAction.NAME, transportService, actionFilters, in -> new CreatePitRequest(in)); + this.transportService = transportService; + this.searchTransportService = searchTransportService; + this.clusterService = clusterService; + this.transportSearchAction = transportSearchAction; + this.namedWriteableRegistry = namedWriteableRegistry; + this.createPitController = createPitController; + } + + @Override + protected void doExecute(Task task, CreatePitRequest request, ActionListener listener) { + final StepListener createPitListener = new StepListener<>(); + final ActionListener updatePitIdListener = ActionListener.wrap(r -> listener.onResponse(r), e -> { + logger.error( + () -> new ParameterizedMessage( + "PIT creation failed while updating PIT ID for indices [{}]", + Arrays.toString(request.indices()) + ) + ); + listener.onFailure(e); + }); + createPitController.executeCreatePit(request, task, createPitListener, updatePitIdListener); + } + + /** + * Request to create pit reader context with keep alive + */ + public static class CreateReaderContextRequest extends TransportRequest { + private final ShardId shardId; + private final TimeValue keepAlive; + + public CreateReaderContextRequest(ShardId shardId, TimeValue keepAlive) { + this.shardId = shardId; + this.keepAlive = keepAlive; + } + + public ShardId getShardId() { + return shardId; + } + + public TimeValue getKeepAlive() { + return keepAlive; + } + + public CreateReaderContextRequest(StreamInput in) throws IOException { + super(in); + this.shardId = new ShardId(in); + this.keepAlive = in.readTimeValue(); + } + + @Override + public void writeTo(StreamOutput out) throws IOException { + super.writeTo(out); + shardId.writeTo(out); + out.writeTimeValue(keepAlive); + } + } + + /** + * Create pit reader context response which holds the contextId + */ + public static class CreateReaderContextResponse extends SearchPhaseResult { + public CreateReaderContextResponse(ShardSearchContextId shardSearchContextId) { + this.contextId = shardSearchContextId; + } + + public CreateReaderContextResponse(StreamInput in) throws IOException { + super(in); + contextId = new ShardSearchContextId(in); + } + + @Override + public void writeTo(StreamOutput out) throws IOException { + super.writeTo(out); + contextId.writeTo(out); + } + } + +} diff --git a/server/src/main/java/org/opensearch/common/lucene/Lucene.java b/server/src/main/java/org/opensearch/common/lucene/Lucene.java index c61b1bcc676a6..75743229db887 100644 --- a/server/src/main/java/org/opensearch/common/lucene/Lucene.java +++ b/server/src/main/java/org/opensearch/common/lucene/Lucene.java @@ -1077,6 +1077,12 @@ public SortedDocValues getSortedDocValues(String field) { return null; } + @Override + public Object getAggregatedDocValues() + throws IOException { + return null; + } + public SortedNumericDocValues getSortedNumericDocValues(String field) { return null; } diff --git a/server/src/main/java/org/opensearch/index/codec/CodecService.java b/server/src/main/java/org/opensearch/index/codec/CodecService.java index 9b57fe64cbeab..88a107c44462b 100644 --- a/server/src/main/java/org/opensearch/index/codec/CodecService.java +++ b/server/src/main/java/org/opensearch/index/codec/CodecService.java @@ -39,6 +39,7 @@ import org.opensearch.common.Nullable; import org.opensearch.common.collect.MapBuilder; import org.opensearch.index.IndexSettings; +import org.opensearch.index.codec.freshstartree.codec.StarTreeCodec; import org.opensearch.index.mapper.MapperService; import java.util.Map; @@ -68,8 +69,8 @@ public CodecService(@Nullable MapperService mapperService, IndexSettings indexSe final MapBuilder codecs = MapBuilder.newMapBuilder(); assert null != indexSettings; if (mapperService == null) { - codecs.put(DEFAULT_CODEC, new Lucene95Codec()); - codecs.put(LZ4, new Lucene95Codec()); + codecs.put(DEFAULT_CODEC, new StarTreeCodec()); + codecs.put(LZ4, new StarTreeCodec()); codecs.put(BEST_COMPRESSION_CODEC, new Lucene95Codec(Mode.BEST_COMPRESSION)); codecs.put(ZLIB, new Lucene95Codec(Mode.BEST_COMPRESSION)); } else { @@ -78,7 +79,7 @@ public CodecService(@Nullable MapperService mapperService, IndexSettings indexSe codecs.put(BEST_COMPRESSION_CODEC, new PerFieldMappingPostingFormatCodec(Mode.BEST_COMPRESSION, mapperService, logger)); codecs.put(ZLIB, new PerFieldMappingPostingFormatCodec(Mode.BEST_COMPRESSION, mapperService, logger)); } - codecs.put(LUCENE_DEFAULT_CODEC, Codec.getDefault()); + codecs.put(LUCENE_DEFAULT_CODEC, new StarTreeCodec()); for (String codec : Codec.availableCodecs()) { codecs.put(codec, Codec.forName(codec)); } @@ -96,7 +97,7 @@ public CodecService(@Nullable MapperService mapperService, Logger logger) { codecs.put(DEFAULT_CODEC, new PerFieldMappingPostingFormatCodec(Mode.BEST_SPEED, mapperService, logger)); codecs.put(BEST_COMPRESSION_CODEC, new PerFieldMappingPostingFormatCodec(Mode.BEST_COMPRESSION, mapperService, logger)); } - codecs.put(LUCENE_DEFAULT_CODEC, Codec.getDefault()); + codecs.put(LUCENE_DEFAULT_CODEC, new StarTreeCodec()); for (String codec : Codec.availableCodecs()) { codecs.put(codec, Codec.forName(codec)); } diff --git a/server/src/main/java/org/opensearch/index/codec/PerFieldMappingPostingFormatCodec.java b/server/src/main/java/org/opensearch/index/codec/PerFieldMappingPostingFormatCodec.java index d3207557273a5..294aaccff013d 100644 --- a/server/src/main/java/org/opensearch/index/codec/PerFieldMappingPostingFormatCodec.java +++ b/server/src/main/java/org/opensearch/index/codec/PerFieldMappingPostingFormatCodec.java @@ -39,6 +39,7 @@ import org.apache.lucene.codecs.lucene90.Lucene90DocValuesFormat; import org.apache.lucene.codecs.lucene95.Lucene95Codec; import org.opensearch.common.lucene.Lucene; +import org.opensearch.index.codec.freshstartree.codec.StarTreeDocValuesFormat; import org.opensearch.index.mapper.CompletionFieldMapper; import org.opensearch.index.mapper.MappedFieldType; import org.opensearch.index.mapper.MapperService; @@ -56,7 +57,7 @@ public class PerFieldMappingPostingFormatCodec extends Lucene95Codec { private final Logger logger; private final MapperService mapperService; - private final DocValuesFormat dvFormat = new Lucene90DocValuesFormat(); + private final DocValuesFormat dvFormat = new StarTreeDocValuesFormat(); static { assert Codec.forName(Lucene.LATEST_CODEC).getClass().isAssignableFrom(PerFieldMappingPostingFormatCodec.class) @@ -84,4 +85,10 @@ public PostingsFormat getPostingsFormatForField(String field) { public DocValuesFormat getDocValuesFormatForField(String field) { return dvFormat; } + + + @Override + public final DocValuesFormat docValuesFormat() { + return dvFormat; + } } diff --git a/server/src/main/java/org/opensearch/index/codec/freshstartree/aggregator/AggregationFunctionColumnPair.java b/server/src/main/java/org/opensearch/index/codec/freshstartree/aggregator/AggregationFunctionColumnPair.java new file mode 100644 index 0000000000000..4e7b3d21916b6 --- /dev/null +++ b/server/src/main/java/org/opensearch/index/codec/freshstartree/aggregator/AggregationFunctionColumnPair.java @@ -0,0 +1,98 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.opensearch.index.codec.freshstartree.aggregator; + +import java.util.Comparator; + + +/** Aggregation function, doc values column pair */ +public class AggregationFunctionColumnPair implements Comparable { + public static final String DELIMITER = "__"; + public static final String STAR = "*"; + public static final AggregationFunctionColumnPair COUNT_STAR = + new AggregationFunctionColumnPair(AggregationFunctionType.COUNT, STAR); + + private final AggregationFunctionType _functionType; + private final String _column; + + public AggregationFunctionColumnPair(AggregationFunctionType functionType, String column) { + _functionType = functionType; + if (functionType == AggregationFunctionType.COUNT) { + _column = STAR; + } else { + _column = column; + } + } + + public AggregationFunctionType getFunctionType() { + return _functionType; + } + + public String getColumn() { + return _column; + } + + public String toColumnName() { + return toColumnName(_functionType, _column); + } + + public static String toColumnName(AggregationFunctionType functionType, String column) { + return functionType.getName() + DELIMITER + column; + } + + public static AggregationFunctionColumnPair fromColumnName(String columnName) { + String[] parts = columnName.split(DELIMITER, 2); + return fromFunctionAndColumnName(parts[0], parts[1]); + } + + private static AggregationFunctionColumnPair fromFunctionAndColumnName(String functionName, String columnName) { + AggregationFunctionType functionType = AggregationFunctionType.getAggregationFunctionType(functionName); + if (functionType == AggregationFunctionType.COUNT) { + return COUNT_STAR; + } else { + return new AggregationFunctionColumnPair(functionType, columnName); + } + } + + @Override + public int hashCode() { + return 31 * _functionType.hashCode() + _column.hashCode(); + } + + @Override + public boolean equals(Object obj) { + if (this == obj) { + return true; + } + if (obj instanceof AggregationFunctionColumnPair) { + AggregationFunctionColumnPair anotherPair = (AggregationFunctionColumnPair) obj; + return _functionType == anotherPair._functionType && _column.equals(anotherPair._column); + } + return false; + } + + @Override + public String toString() { + return toColumnName(); + } + + @Override + public int compareTo(AggregationFunctionColumnPair other) { + return Comparator.comparing((AggregationFunctionColumnPair o) -> o._column) + .thenComparing((AggregationFunctionColumnPair o) -> o._functionType).compare(this, other); + } +} diff --git a/server/src/main/java/org/opensearch/index/codec/freshstartree/aggregator/AggregationFunctionType.java b/server/src/main/java/org/opensearch/index/codec/freshstartree/aggregator/AggregationFunctionType.java new file mode 100644 index 0000000000000..dfb3197a5e979 --- /dev/null +++ b/server/src/main/java/org/opensearch/index/codec/freshstartree/aggregator/AggregationFunctionType.java @@ -0,0 +1,37 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.opensearch.index.codec.freshstartree.aggregator; + +/** Aggregated function type */ +public enum AggregationFunctionType { + COUNT("count"), SUM("sum"); + // AVG("avg"); + + private String name; + + AggregationFunctionType(String name) { + this.name = name; + } + + public static AggregationFunctionType getAggregationFunctionType(String functionName) { + return AggregationFunctionType.valueOf(functionName); + } + + public String getName() { + return name; + } +} diff --git a/server/src/main/java/org/opensearch/index/codec/freshstartree/aggregator/CountValueAggregator.java b/server/src/main/java/org/opensearch/index/codec/freshstartree/aggregator/CountValueAggregator.java new file mode 100644 index 0000000000000..97836196d83e6 --- /dev/null +++ b/server/src/main/java/org/opensearch/index/codec/freshstartree/aggregator/CountValueAggregator.java @@ -0,0 +1,67 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.opensearch.index.codec.freshstartree.aggregator; + +/** Count value aggregator */ +public class CountValueAggregator implements ValueAggregator { + public static final DataType AGGREGATED_VALUE_TYPE = DataType.LONG; + + @Override + public AggregationFunctionType getAggregationType() { + return AggregationFunctionType.COUNT; + } + + @Override + public DataType getAggregatedValueType() { + return AGGREGATED_VALUE_TYPE; + } + + @Override + public Long getInitialAggregatedValue(Long rawValue) { + return 1l; + } + + @Override + public Long applyRawValue(Long value, Long rawValue) { + return value + 1; + } + + @Override + public Long applyAggregatedValue(Long value, Long aggregatedValue) { + return value + aggregatedValue; + } + + @Override + public Long cloneAggregatedValue(Long value) { + return value; + } + + @Override + public int getMaxAggregatedValueByteSize() { + return Long.BYTES; + } + + @Override + public byte[] serializeAggregatedValue(Long value) { + throw new UnsupportedOperationException(); + } + + @Override + public Long deserializeAggregatedValue(byte[] bytes) { + throw new UnsupportedOperationException(); + } +} diff --git a/server/src/main/java/org/opensearch/index/codec/freshstartree/aggregator/DataType.java b/server/src/main/java/org/opensearch/index/codec/freshstartree/aggregator/DataType.java new file mode 100644 index 0000000000000..ee7dc9516fb22 --- /dev/null +++ b/server/src/main/java/org/opensearch/index/codec/freshstartree/aggregator/DataType.java @@ -0,0 +1,66 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.opensearch.index.codec.freshstartree.aggregator; + +/** Data type of doc values */ +public enum DataType { + INT(Integer.BYTES, true), LONG(Long.BYTES, true), FLOAT(Float.BYTES, true), DOUBLE(Double.BYTES, true); + + private final int _size; + private final boolean _numeric; + + DataType(int size, boolean numeric) { + _size = size; + _numeric = numeric; + } + + /** Returns the number of bytes needed to store the data type. */ + public int size() { + if (_size >= 0) { + return _size; + } + throw new IllegalStateException("Cannot get number of bytes for: " + this); + } + + /** + * Returns {@code true} if the data type is numeric (INT, LONG, FLOAT, DOUBLE, BIG_DECIMAL), + * {@code false} otherwise. + */ + public boolean isNumeric() { + return _numeric; + } + + /** Converts the given string value to the data type. Returns byte[] for BYTES. */ + public Object convert(String value) { + try { + switch (this) { + case INT: + return Integer.valueOf(value); + case LONG: + return Long.valueOf(value); + case FLOAT: + return Float.valueOf(value); + case DOUBLE: + return Double.valueOf(value); + default: + throw new IllegalStateException(); + } + } catch (Exception e) { + throw new IllegalArgumentException(e); + } + } +} diff --git a/server/src/main/java/org/opensearch/index/codec/freshstartree/aggregator/SumValueAggregator.java b/server/src/main/java/org/opensearch/index/codec/freshstartree/aggregator/SumValueAggregator.java new file mode 100644 index 0000000000000..3572643588648 --- /dev/null +++ b/server/src/main/java/org/opensearch/index/codec/freshstartree/aggregator/SumValueAggregator.java @@ -0,0 +1,67 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.opensearch.index.codec.freshstartree.aggregator; + +/** Sum value aggregator */ +public class SumValueAggregator implements ValueAggregator { + public static final DataType AGGREGATED_VALUE_TYPE = DataType.LONG; + + @Override + public AggregationFunctionType getAggregationType() { + return AggregationFunctionType.SUM; + } + + @Override + public DataType getAggregatedValueType() { + return AGGREGATED_VALUE_TYPE; + } + + @Override + public Long getInitialAggregatedValue(Long rawValue) { + return rawValue; + } + + @Override + public Long applyRawValue(Long value, Long rawValue) { + return value + rawValue; + } + + @Override + public Long applyAggregatedValue(Long value, Long aggregatedValue) { + return value + aggregatedValue; + } + + @Override + public Long cloneAggregatedValue(Long value) { + return value; + } + + @Override + public int getMaxAggregatedValueByteSize() { + return Long.BYTES; + } + + @Override + public byte[] serializeAggregatedValue(Long value) { + throw new UnsupportedOperationException(); + } + + @Override + public Long deserializeAggregatedValue(byte[] bytes) { + throw new UnsupportedOperationException(); + } +} diff --git a/server/src/main/java/org/opensearch/index/codec/freshstartree/aggregator/ValueAggregator.java b/server/src/main/java/org/opensearch/index/codec/freshstartree/aggregator/ValueAggregator.java new file mode 100644 index 0000000000000..a292aa7d7ba0b --- /dev/null +++ b/server/src/main/java/org/opensearch/index/codec/freshstartree/aggregator/ValueAggregator.java @@ -0,0 +1,58 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.opensearch.index.codec.freshstartree.aggregator; + +/** + * A value aggregator that pre-aggregates on the input values for a specific type of aggregation. + */ +public interface ValueAggregator { + + /** Returns the type of the aggregation. */ + AggregationFunctionType getAggregationType(); + + /** Returns the data type of the aggregated value. */ + DataType getAggregatedValueType(); + + /** Returns the initial aggregated value. */ + Long getInitialAggregatedValue(Long rawValue); + + /** + * Applies a raw value to the current aggregated value. + * + *

NOTE: if value is mutable, will directly modify the value. + */ + Long applyRawValue(Long value, Long rawValue); + + /** + * Applies an aggregated value to the current aggregated value. + * + *

NOTE: if value is mutable, will directly modify the value. + */ + Long applyAggregatedValue(Long value, Long aggregatedValue); + + /** Clones an aggregated value. */ + Long cloneAggregatedValue(Long value); + + /** Returns the maximum size in bytes of the aggregated values seen so far. */ + int getMaxAggregatedValueByteSize(); + + /** Serializes an aggregated value into a byte array. */ + byte[] serializeAggregatedValue(Long value); + + /** De-serializes an aggregated value from a byte array. */ + Long deserializeAggregatedValue(byte[] bytes); +} diff --git a/server/src/main/java/org/opensearch/index/codec/freshstartree/aggregator/ValueAggregatorFactory.java b/server/src/main/java/org/opensearch/index/codec/freshstartree/aggregator/ValueAggregatorFactory.java new file mode 100644 index 0000000000000..bd28f52746692 --- /dev/null +++ b/server/src/main/java/org/opensearch/index/codec/freshstartree/aggregator/ValueAggregatorFactory.java @@ -0,0 +1,61 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.opensearch.index.codec.freshstartree.aggregator; + +/** Value aggregator factory for a given aggregation type */ +public class ValueAggregatorFactory { + private ValueAggregatorFactory() { + } + + /** + * Returns a new instance of value aggregator for the given aggregation type. + * + * @param aggregationType Aggregation type + * @return Value aggregator + */ + public static ValueAggregator getValueAggregator(AggregationFunctionType aggregationType) { + switch (aggregationType) { + case COUNT: + return new CountValueAggregator(); + case SUM: + return new SumValueAggregator(); + // case AVG: + // return new AvgValueAggregator(); + default: + throw new IllegalStateException("Unsupported aggregation type: " + aggregationType); + } + } + + /** + * Returns the data type of the aggregated value for the given aggregation type. + * + * @param aggregationType Aggregation type + * @return Data type of the aggregated value + */ + public static DataType getAggregatedValueType(AggregationFunctionType aggregationType) { + switch (aggregationType) { + case COUNT: + return CountValueAggregator.AGGREGATED_VALUE_TYPE; + case SUM: + return SumValueAggregator.AGGREGATED_VALUE_TYPE; + // case AVG: + // return AvgValueAggregator.AGGREGATED_VALUE_TYPE; + default: + throw new IllegalStateException("Unsupported aggregation type: " + aggregationType); + } + } +} diff --git a/server/src/main/java/org/opensearch/index/codec/freshstartree/aggregator/package-info.java b/server/src/main/java/org/opensearch/index/codec/freshstartree/aggregator/package-info.java new file mode 100644 index 0000000000000..d05d4fd5cb5cf --- /dev/null +++ b/server/src/main/java/org/opensearch/index/codec/freshstartree/aggregator/package-info.java @@ -0,0 +1,18 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +/** Aggregator classes for star tree */ +package org.opensearch.index.codec.freshstartree.aggregator; diff --git a/server/src/main/java/org/opensearch/index/codec/freshstartree/builder/BaseSingleTreeBuilder.java b/server/src/main/java/org/opensearch/index/codec/freshstartree/builder/BaseSingleTreeBuilder.java new file mode 100644 index 0000000000000..ec3ec563bcf54 --- /dev/null +++ b/server/src/main/java/org/opensearch/index/codec/freshstartree/builder/BaseSingleTreeBuilder.java @@ -0,0 +1,620 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.opensearch.index.codec.freshstartree.builder; + +import java.io.IOException; +import java.util.ArrayList; +import java.util.Arrays; +import java.util.Collections; +import java.util.HashMap; +import java.util.HashSet; +import java.util.Iterator; +import java.util.List; +import java.util.Map; +import java.util.Set; +import org.apache.logging.log4j.LogManager; +import org.apache.logging.log4j.Logger; +import org.apache.lucene.codecs.CodecUtil; +import org.apache.lucene.codecs.DocValuesConsumer; +import org.apache.lucene.codecs.DocValuesProducer; +import org.apache.lucene.index.DocValuesType; +import org.apache.lucene.index.DocsWithFieldSet; +import org.apache.lucene.index.EmptyDocValuesProducer; +import org.apache.lucene.index.FieldInfo; +import org.apache.lucene.index.IndexFileNames; +import org.apache.lucene.index.IndexOptions; +import org.apache.lucene.index.NumericDocValues; +import org.apache.lucene.index.SegmentWriteState; +import org.apache.lucene.index.SortedNumericDocValues; +import org.apache.lucene.index.VectorEncoding; +import org.apache.lucene.index.VectorSimilarityFunction; +import org.apache.lucene.store.IndexOutput; +import org.apache.lucene.util.IOUtils; +import org.apache.lucene.util.packed.PackedInts; +import org.apache.lucene.util.packed.PackedLongValues; +import org.opensearch.index.codec.freshstartree.aggregator.AggregationFunctionColumnPair; +import org.opensearch.index.codec.freshstartree.aggregator.AggregationFunctionType; +import org.opensearch.index.codec.freshstartree.aggregator.ValueAggregator; +import org.opensearch.index.codec.freshstartree.aggregator.ValueAggregatorFactory; +import org.opensearch.index.codec.freshstartree.codec.StarTreeAggregatedValues; +import org.opensearch.index.codec.freshstartree.codec.StarTreeDocValuesWriter; +import org.opensearch.index.codec.freshstartree.node.StarTreeNode; +import org.opensearch.index.codec.freshstartree.util.BufferedAggregatedDocValues; + + +/** Base class for star tree builder */ +public abstract class BaseSingleTreeBuilder { + public static final int STAR_IN_DOC_VALUES_INDEX = -1; + public final static int SECOND = 1000; + public final static int MINUTE = 60 * SECOND; + public final static int HOUR = 60 * 60 * SECOND; + public final static int DAY = 24 * HOUR; + public final static int YEAR = 365 * DAY; + private static final Logger logger = LogManager.getLogger(BaseSingleTreeBuilder.class); + final int _numDimensions; + final String[] _dimensionsSplitOrder; + final Set _skipStarNodeCreationForDimensions; + final int _numMetrics; + // Name of the function-column pairs + final String[] _metrics; + final int _maxLeafRecords; + int _numDocs; + int _totalDocs; + int _numNodes; + final StarTreeBuilderUtils.TreeNode _rootNode = getNewNode(); + IndexOutput indexOutput; + SortedNumericDocValues[] _dimensionReaders; + SortedNumericDocValues[] _metricReaders; + ValueAggregator[] _valueAggregators; + DocValuesConsumer _docValuesConsumer; + + BaseSingleTreeBuilder(IndexOutput output, List dimensionsSplitOrder, + Map docValuesMap, int maxDoc, DocValuesConsumer docValuesConsumer, + SegmentWriteState state) + throws IOException { + + String docFileName = IndexFileNames.segmentFileName(state.segmentInfo.name, state.segmentSuffix, "stttree"); + indexOutput = state.directory.createOutput(docFileName, state.context); + CodecUtil.writeIndexHeader(indexOutput, "STARTreeCodec", 0, state.segmentInfo.getId(), state.segmentSuffix); + dimensionsSplitOrder = new ArrayList<>(); + dimensionsSplitOrder.add("minute"); + dimensionsSplitOrder.add("hour"); + dimensionsSplitOrder.add("day"); + dimensionsSplitOrder.add("month"); + //dimensionsSplitOrder.add("year"); + dimensionsSplitOrder.add("status"); + _numDimensions = dimensionsSplitOrder.size(); + _dimensionsSplitOrder = new String[_numDimensions]; + _skipStarNodeCreationForDimensions = new HashSet<>(); + _totalDocs = state.segmentInfo.maxDoc(); + _docValuesConsumer = docValuesConsumer; + List functionColumnPairList = new ArrayList<>(); + + // TODO : pass function column pair - Remove hardcoding + functionColumnPairList.add("SUM__status"); + List aggregationSpecs = new ArrayList<>(); + aggregationSpecs.add(AggregationFunctionColumnPair.fromColumnName("SUM__status")); + //aggregationSpecs.add(AggregationFunctionColumnPair.fromColumnName("COUNT__status")); + + _dimensionReaders = new SortedNumericDocValues[_numDimensions]; + Set skipStarNodeCreationForDimensions = new HashSet<>(); + for (int i = 0; i < _numDimensions; i++) { + String dimension = dimensionsSplitOrder.get(i); + //logger.info("Dimension split order : {}", dimension); + _dimensionsSplitOrder[i] = dimension; + if (skipStarNodeCreationForDimensions.contains(dimension)) { + _skipStarNodeCreationForDimensions.add(i); + } + _dimensionReaders[i] = docValuesMap.get(dimension + "_dim"); + } + _numMetrics = aggregationSpecs.size(); + _metrics = new String[_numMetrics]; + _valueAggregators = new ValueAggregator[_numMetrics]; + + int index = 0; + _metricReaders = new SortedNumericDocValues[_numMetrics]; + for (AggregationFunctionColumnPair aggrPair : aggregationSpecs) { + AggregationFunctionColumnPair functionColumnPair = aggrPair; + _metrics[index] = functionColumnPair.toColumnName() + "_" + functionColumnPair.getFunctionType().getName(); + _valueAggregators[index] = ValueAggregatorFactory.getValueAggregator(functionColumnPair.getFunctionType()); + // Ignore the column for COUNT aggregation function + if (_valueAggregators[index].getAggregationType() != AggregationFunctionType.COUNT) { + String column = functionColumnPair.getColumn(); + _metricReaders[index] = + docValuesMap.get(column + "_" + functionColumnPair.getFunctionType().getName() + "_metric"); + } + + index++; + } + + // TODO : Removing hardcoding + _maxLeafRecords = 100; // builderConfig.getMaxLeafRecords(); + } + + private void constructStarTree(StarTreeBuilderUtils.TreeNode node, int startDocId, int endDocId) + throws IOException { + + int childDimensionId = node._dimensionId + 1; + if (childDimensionId == _numDimensions) { + return; + } + + // Construct all non-star children nodes + node._childDimensionId = childDimensionId; + Map children = + constructNonStarNodes(startDocId, endDocId, childDimensionId); + node._children = children; + + // Construct star-node if required + if (!_skipStarNodeCreationForDimensions.contains(childDimensionId) && children.size() > 1) { + children.put(StarTreeNode.ALL, constructStarNode(startDocId, endDocId, childDimensionId)); + } + + // Further split on child nodes if required + for (StarTreeBuilderUtils.TreeNode child : children.values()) { + if (child._endDocId - child._startDocId > _maxLeafRecords) { + constructStarTree(child, child._startDocId, child._endDocId); + } + } + } + + private Map constructNonStarNodes(int startDocId, int endDocId, + int dimensionId) + throws IOException { + Map nodes = new HashMap<>(); + int nodeStartDocId = startDocId; + long nodeDimensionValue = getDimensionValue(startDocId, dimensionId); + for (int i = startDocId + 1; i < endDocId; i++) { + long dimensionValue = getDimensionValue(i, dimensionId); + // System.out.println("Dim value : " + dimensionValue ); + if (dimensionValue != nodeDimensionValue) { + StarTreeBuilderUtils.TreeNode child = getNewNode(); + child._dimensionId = dimensionId; + child._dimensionValue = nodeDimensionValue; + child._startDocId = nodeStartDocId; + child._endDocId = i; + nodes.put(nodeDimensionValue, child); + + nodeStartDocId = i; + nodeDimensionValue = dimensionValue; + } + } + StarTreeBuilderUtils.TreeNode lastNode = getNewNode(); + lastNode._dimensionId = dimensionId; + lastNode._dimensionValue = nodeDimensionValue; + lastNode._startDocId = nodeStartDocId; + lastNode._endDocId = endDocId; + nodes.put(nodeDimensionValue, lastNode); + return nodes; + } + + private StarTreeBuilderUtils.TreeNode constructStarNode(int startDocId, int endDocId, int dimensionId) + throws IOException { + StarTreeBuilderUtils.TreeNode starNode = getNewNode(); + starNode._dimensionId = dimensionId; + starNode._dimensionValue = StarTreeNode.ALL; + starNode._startDocId = _numDocs; + Iterator recordIterator = generateRecordsForStarNode(startDocId, endDocId, dimensionId); + while (recordIterator.hasNext()) { + appendToStarTree(recordIterator.next()); + } + starNode._endDocId = _numDocs; + return starNode; + } + + public abstract void build(List aggrList) + throws IOException; + + public void build() + throws IOException { + // TODO: get total docs + int numSegmentRecords = _totalDocs; + + long startTime = System.currentTimeMillis(); + Iterator recordIterator = sortAndAggregateSegmentRecords(numSegmentRecords); + logger.info("Sorting and aggregating star-tree in ms : {}", (System.currentTimeMillis() - startTime)); + // System.out.println( + // "== =============Finished sorting and aggregating star-tree in ms : " + + // (System.currentTimeMillis() + // - startTime)); + + build(recordIterator, false); + } + + public void build(Iterator recordIterator, boolean isMerge) + throws IOException { + int numSegmentRecords = _totalDocs; + + while (recordIterator.hasNext()) { + appendToStarTree(recordIterator.next()); + } + int numStarTreeRecords = _numDocs; + logger.info("Generated star tree records number : [{}] from segment records : [{}]", numStarTreeRecords, + numSegmentRecords); + if (_numDocs == 0) { + StarTreeBuilderUtils.serializeTree(indexOutput, _rootNode, _dimensionsSplitOrder, _numNodes); + return; + } + constructStarTree(_rootNode, 0, _numDocs); + int numRecordsUnderStarNode = _numDocs - numStarTreeRecords; + logger.info("Finished constructing star-tree, got [ {} ] tree nodes and [ {} ] records under star-node", + _numNodes, numRecordsUnderStarNode); + + createAggregatedDocs(_rootNode); + int numAggregatedRecords = _numDocs - numStarTreeRecords - numRecordsUnderStarNode; + logger.info("Finished creating aggregated documents, got aggregated records : {}", numAggregatedRecords); + + // Create doc values indices in disk + createDocValuesIndices(_docValuesConsumer); + + // Serialize and save in disk + StarTreeBuilderUtils.serializeTree(indexOutput, _rootNode, _dimensionsSplitOrder, _numNodes); + } + + private void createDocValuesIndices(DocValuesConsumer docValuesConsumer) + throws IOException { + PackedLongValues.Builder[] pendingDimArr = new PackedLongValues.Builder[_dimensionReaders.length]; + PackedLongValues.Builder[] pendingMetricArr = new PackedLongValues.Builder[_metricReaders.length]; + + FieldInfo[] dimFieldInfoArr = new FieldInfo[_dimensionReaders.length]; + FieldInfo[] metricFieldInfoArr = new FieldInfo[_metricReaders.length]; + int fieldNum = 0; + + for (int i = 0; i < _dimensionReaders.length; i++) { + pendingDimArr[fieldNum] = PackedLongValues.deltaPackedBuilder(PackedInts.COMPACT); + dimFieldInfoArr[fieldNum] = new FieldInfo(_dimensionsSplitOrder[i] + "_dim", fieldNum, false, false, true, + IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS, DocValuesType.NUMERIC, -1, + Collections.emptyMap(), 0, 0, 0, 0, VectorEncoding.FLOAT32, VectorSimilarityFunction.EUCLIDEAN, false); + fieldNum++; + } + + for (int i = 0; i < _metricReaders.length; i++) { + pendingMetricArr[i] = PackedLongValues.deltaPackedBuilder(PackedInts.COMPACT); + metricFieldInfoArr[i] = new FieldInfo(_metrics[i] + "_metric", fieldNum, false, false, true, + IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS, DocValuesType.NUMERIC, -1, + Collections.emptyMap(), 0, 0, 0, 0, VectorEncoding.FLOAT32, VectorSimilarityFunction.EUCLIDEAN, false); + fieldNum++; + } + + DocsWithFieldSet docsWithField = new DocsWithFieldSet(); + + for (int docId = 0; docId < _numDocs; docId++) { + Record record = getStarTreeRecord(docId); + for (int i = 0; i < record._dimensions.length; i++) { + long val = record._dimensions[i]; + pendingDimArr[i].add(val); + } + for (int i = 0; i < record._metrics.length; i++) { + switch (_valueAggregators[i].getAggregatedValueType()) { + case LONG: + long val = (long) record._metrics[i]; + pendingMetricArr[i].add(val); + break; + // TODO: support this + case DOUBLE: + // double doubleval = (double) record._metrics[i]; + // break; + case FLOAT: + case INT: + default: + throw new IllegalStateException("Unsupported value type"); + } + } + docsWithField.add(docId); + } + for (int i = 0; i < _dimensionReaders.length; i++) { + final int finalI = i; + DocValuesProducer a1 = new EmptyDocValuesProducer() { + @Override + public NumericDocValues getNumeric(FieldInfo field) + throws IOException { + + return new BufferedAggregatedDocValues(pendingDimArr[finalI].build(), docsWithField.iterator()); + } + }; + docValuesConsumer.addNumericField(dimFieldInfoArr[i], a1); + } + + for (int i = 0; i < _metricReaders.length; i++) { + final int finalI = i; + DocValuesProducer a1 = new EmptyDocValuesProducer() { + @Override + public NumericDocValues getNumeric(FieldInfo field) + throws IOException { + + return new BufferedAggregatedDocValues(pendingMetricArr[finalI].build(), docsWithField.iterator()); + } + }; + docValuesConsumer.addNumericField(metricFieldInfoArr[i], a1); + } + } + + private StarTreeBuilderUtils.TreeNode getNewNode() { + _numNodes++; + return new StarTreeBuilderUtils.TreeNode(); + } + + private void appendToStarTree(Record record) + throws IOException { + // TODO : uncomment this for sanity +// boolean star = true; +// for(long dim : record._dimensions) { +// if(dim != StarTreeNode.ALL) { +// star = false; +// break; +// } +// } +// if(star) { +// System.out.println("======Overall sum =====" + (long) record._metrics[0]); +// } + appendRecord(record); + _numDocs++; + } + + /** + * Appends a record to the star-tree. + * + * @param record Record to be appended + */ + abstract void appendRecord(Record record) + throws IOException; + + /** + * Returns the record of the given document Id in the star-tree. + * + * @param docId Document Id + * @return Star-tree record + */ + abstract Record getStarTreeRecord(int docId) + throws IOException; + + /** + * Returns the dimension value of the given document and dimension Id in the star-tree. + * + * @param docId Document Id + * @param dimensionId Dimension Id + * @return Dimension value + */ + abstract long getDimensionValue(int docId, int dimensionId) + throws IOException; + + /** + * Sorts and aggregates the records in the segment, and returns a record iterator for all the + * aggregated records. + * + *

This method reads records from segment and generates the initial records for the star-tree. + * + * @param numDocs Number of documents in the segment + * @return Iterator for the aggregated records + */ + abstract Iterator sortAndAggregateSegmentRecords(int numDocs) + throws IOException; + + /** + * Generates aggregated records for star-node. + * + *

This method will do the following steps: + * + *

    + *
  • Creates a temporary buffer for the given range of documents + *
  • Replaces the value for the given dimension Id to {@code STAR} + *
  • Sorts the records inside the temporary buffer + *
  • Aggregates the records with same dimensions + *
  • Returns an iterator for the aggregated records + *
+ * + * @param startDocId Start document Id in the star-tree + * @param endDocId End document Id (exclusive) in the star-tree + * @param dimensionId Dimension Id of the star-node + * @return Iterator for the aggregated records + */ + abstract Iterator generateRecordsForStarNode(int startDocId, int endDocId, int dimensionId) + throws IOException; + + private Record createAggregatedDocs(StarTreeBuilderUtils.TreeNode node) + throws IOException { + Record aggregatedRecord = null; + if (node._children == null) { + // For leaf node + + if (node._startDocId == node._endDocId - 1) { + // If it has only one document, use it as the aggregated document + aggregatedRecord = getStarTreeRecord(node._startDocId); + node._aggregatedDocId = node._startDocId; + } else { + // If it has multiple documents, aggregate all of them + for (int i = node._startDocId; i < node._endDocId; i++) { + aggregatedRecord = mergeStarTreeRecord(aggregatedRecord, getStarTreeRecord(i)); + } + assert aggregatedRecord != null; + for (int i = node._dimensionId + 1; i < _numDimensions; i++) { + aggregatedRecord._dimensions[i] = + STAR_IN_DOC_VALUES_INDEX; // StarTreeV2Constants.STAR_IN_FORWARD_INDEX; + } + node._aggregatedDocId = _numDocs; + appendToStarTree(aggregatedRecord); + } + } else { + // For non-leaf node + + if (node._children.containsKey(StarTreeNode.ALL)) { + // If it has star child, use the star child aggregated document directly + for (StarTreeBuilderUtils.TreeNode child : node._children.values()) { + if (child._dimensionValue == StarTreeNode.ALL) { + aggregatedRecord = createAggregatedDocs(child); + node._aggregatedDocId = child._aggregatedDocId; + } else { + createAggregatedDocs(child); + } + } + } else { + // If no star child exists, aggregate all aggregated documents from non-star children + for (StarTreeBuilderUtils.TreeNode child : node._children.values()) { + aggregatedRecord = mergeStarTreeRecord(aggregatedRecord, createAggregatedDocs(child)); + } + assert aggregatedRecord != null; + for (int i = node._dimensionId + 1; i < _numDimensions; i++) { + aggregatedRecord._dimensions[i] = + STAR_IN_DOC_VALUES_INDEX; // StarTreeV2Constants.STAR_IN_FORWARD_INDEX; + } + node._aggregatedDocId = _numDocs; + appendToStarTree(aggregatedRecord); + } + } + return aggregatedRecord; + } + + /** + * Merges a segment record (raw) into the aggregated record. + * + *

Will create a new aggregated record if the current one is {@code null}. + * + * @param aggregatedRecord Aggregated record + * @param segmentRecord Segment record + * @return Merged record + */ + Record mergeSegmentRecord(Record aggregatedRecord, Record segmentRecord) { + if (aggregatedRecord == null) { + long[] dimensions = new long[_numDimensions]; + for (int i = 0; i < _numDimensions; i++) { + dimensions[i] = segmentRecord._dimensions[i]; + } + Object[] metrics = new Object[_numMetrics]; + for (int i = 0; i < _numMetrics; i++) { + // TODO: fill this + metrics[i] = _valueAggregators[i].getInitialAggregatedValue((Long) segmentRecord._metrics[i]); + } + return new Record(dimensions, metrics); + } else { + for (int i = 0; i < _numMetrics; i++) { + aggregatedRecord._metrics[i] = _valueAggregators[i].applyRawValue((Long) aggregatedRecord._metrics[i], + (Long) segmentRecord._metrics[i]); + } + return aggregatedRecord; + } + } + + /** + * Merges a star-tree record (aggregated) into the aggregated record. + * + *

Will create a new aggregated record if the current one is {@code null}. + * + * @param aggregatedRecord Aggregated record + * @param starTreeRecord Star-tree record + * @return Merged record + */ + Record mergeStarTreeRecord(Record aggregatedRecord, Record starTreeRecord) { + if (aggregatedRecord == null) { + long[] dimensions = new long[_numDimensions]; + for (int i = 0; i < _numDimensions; i++) { + dimensions[i] = starTreeRecord._dimensions[i]; + } + Object[] metrics = new Object[_numMetrics]; + for (int i = 0; i < _numMetrics; i++) { + metrics[i] = _valueAggregators[i].cloneAggregatedValue((Long) starTreeRecord._metrics[i]); + } + return new Record(dimensions, metrics); + } else { + for (int i = 0; i < _numMetrics; i++) { + aggregatedRecord._metrics[i] = + _valueAggregators[i].applyAggregatedValue((Long) starTreeRecord._metrics[i], + (Long) aggregatedRecord._metrics[i]); + } + return aggregatedRecord; + } + } + + Record getNextSegmentRecord() + throws IOException { + long[] dimensions = getNextSegmentRecordDimensions(); + Object[] metrics = new Object[_numMetrics]; + for (int i = 0; i < _numMetrics; i++) { + // Ignore the column for COUNT aggregation function + if (_metricReaders[i] != null) { + _metricReaders[i].nextDoc(); + metrics[i] = _metricReaders[i].nextValue(); + } + } + return new Record(dimensions, metrics); + } + + private long getTimeStampVal(final String fieldName, final long val) { + + switch (fieldName) { + case "minute": + return val / MINUTE; + case "hour": + return val / HOUR; + case "day": + return val / DAY; + case "month": + return val/DAY * 30; // TODO + case "year": + return val / YEAR; + default: + return val; + } + } + + long[] getNextSegmentRecordDimensions() + throws IOException { + long[] dimensions = new long[_numDimensions]; + for (int i = 0; i < _numDimensions; i++) { + _dimensionReaders[i].nextDoc(); + dimensions[i] = getTimeStampVal(_dimensionsSplitOrder[i], _dimensionReaders[i].nextValue()); + } + return dimensions; + } + + public void close() + throws IOException { + boolean success = false; + try { + if (indexOutput != null) { + indexOutput.writeInt(-1); + CodecUtil.writeFooter(indexOutput); // write checksum + } + success = true; + } catch (Exception e) { + throw new RuntimeException(e); + // System.out.println(e.getMessage()); + } finally { + if (success) { + IOUtils.close(indexOutput); + } else { + IOUtils.closeWhileHandlingException(indexOutput); + } + indexOutput = null; + } + } + + /** Star tree record */ + public static class Record { + final long[] _dimensions; + final Object[] _metrics; + + public Record(long[] dimensions, Object[] metrics) { + _dimensions = dimensions; + _metrics = metrics; + } + + @Override + public String toString() { + return Arrays.toString(_dimensions) + " | " + Arrays.toString(_metrics); + } + } +} diff --git a/server/src/main/java/org/opensearch/index/codec/freshstartree/builder/MixedHeapTemporaryFileSingleTreeBuilder.java b/server/src/main/java/org/opensearch/index/codec/freshstartree/builder/MixedHeapTemporaryFileSingleTreeBuilder.java new file mode 100644 index 0000000000000..bfeb3e924e903 --- /dev/null +++ b/server/src/main/java/org/opensearch/index/codec/freshstartree/builder/MixedHeapTemporaryFileSingleTreeBuilder.java @@ -0,0 +1,324 @@ +///* +// * Licensed to the Apache Software Foundation (ASF) under one or more +// * contributor license agreements. See the NOTICE file distributed with +// * this work for additional information regarding copyright ownership. +// * The ASF licenses this file to You under the Apache License, Version 2.0 +// * (the "License"); you may not use this file except in compliance with +// * the License. You may obtain a copy of the License at +// * +// * http://www.apache.org/licenses/LICENSE-2.0 +// * +// * Unless required by applicable law or agreed to in writing, software +// * distributed under the License is distributed on an "AS IS" BASIS, +// * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// * See the License for the specific language governing permissions and +// * limitations under the License. +// */ +//package org.opensearch.index.codec.freshstartree.builder; +// +//import java.io.IOException; +//import java.nio.ByteBuffer; +//import java.nio.ByteOrder; +//import java.nio.file.Path; +//import java.util.ArrayList; +//import java.util.Arrays; +//import java.util.Iterator; +//import java.util.List; +//import java.util.Map; +//import org.apache.lucene.codecs.DocValuesConsumer; +//import org.apache.lucene.index.IndexFileNames; +//import org.apache.lucene.index.SegmentWriteState; +//import org.apache.lucene.index.SortedNumericDocValues; +//import org.apache.lucene.store.IndexInput; +//import org.apache.lucene.store.IndexOutput; +//import org.apache.lucene.store.RandomAccessInput; +//import org.apache.lucene.util.IOUtils; +//import org.opensearch.index.codec.freshstartree.codec.StarTreeAggregatedValues; +//import org.opensearch.index.codec.freshstartree.util.QuickSorter; +// +// +///** +// * Sorting and aggregating segment records is done off heap and star records processing is done on +// * heap +// */ +//public class MixedHeapTemporaryFileSingleTreeBuilder extends BaseSingleTreeBuilder { +// private static final String SEGMENT_RECORD_FILE_NAME = "segment.record"; +// private final List _records = new ArrayList<>(); +// private final List _segmentRecordOffsets; +// +// IndexOutput segmentRecordFileOutput; +// RandomAccessInput segmentRandomInput; +// +// SegmentWriteState state; +// +// public MixedHeapTemporaryFileSingleTreeBuilder(IndexOutput output, List dimensionsSplitOrder, +// Map docValuesMap, int maxDoc, DocValuesConsumer consumer, +// SegmentWriteState state) +// throws IOException { +// super(output, dimensionsSplitOrder, docValuesMap, maxDoc, consumer, state); +// this.state = state; +// // TODO : how to set this dynammically +// // CodecUtil.writeIndexHeader( +// // starTreeRecordFileOutput, +// // "STARTreeCodec", +// // 0, +// // state.segmentInfo.getId(), +// // state.segmentSuffix); +// segmentRecordFileOutput = state.directory.createTempOutput("segmentrecord", "sort", state.context); +// +// _segmentRecordOffsets = new ArrayList<>(); +// _segmentRecordOffsets.add(0L); +// } +// +// @Override +// public void build(List aggrList) +// throws IOException { +// +// } +// +// private byte[] serializeStarTreeRecord(Record starTreeRecord) { +// int numBytes = _numDimensions * Integer.BYTES; +// for (int i = 0; i < _numMetrics; i++) { +// switch (_valueAggregators[i].getAggregatedValueType()) { +// case LONG: +// numBytes += Long.BYTES; +// break; +// case DOUBLE: +// case INT: +// case FLOAT: +// // numBytes += Double.BYTES; +// // break; +// default: +// throw new IllegalStateException(); +// } +// } +// byte[] bytes = new byte[numBytes]; +// ByteBuffer byteBuffer = ByteBuffer.wrap(bytes).order(ByteOrder.nativeOrder()); +// for (int dimension : starTreeRecord._dimensions) { +// byteBuffer.putInt(dimension); +// } +// for (int i = 0; i < _numMetrics; i++) { +// switch (_valueAggregators[i].getAggregatedValueType()) { +// case LONG: +// byteBuffer.putLong((Long) starTreeRecord._metrics[i]); +// break; +// case DOUBLE: +// // byteBuffer.putDouble((Double) starTreeRecord._metrics[i]); +// // break; +// case INT: +// case FLOAT: +// default: +// throw new IllegalStateException(); +// } +// } +// return bytes; +// } +// +// private Record deserializeStarTreeRecord(RandomAccessInput buffer, long offset) +// throws IOException { +// int[] dimensions = new int[_numDimensions]; +// for (int i = 0; i < _numDimensions; i++) { +// dimensions[i] = buffer.readInt(offset); +// offset += Integer.BYTES; +// } +// Object[] metrics = new Object[_numMetrics]; +// for (int i = 0; i < _numMetrics; i++) { +// switch (_valueAggregators[i].getAggregatedValueType()) { +// case LONG: +// metrics[i] = buffer.readLong(offset); +// offset += Long.BYTES; +// break; +// case DOUBLE: +// // TODO : handle double +// // metrics[i] = buffer.getDouble((int) offset); +// // offset += Double.BYTES; +// case INT: +// case FLOAT: +// default: +// throw new IllegalStateException(); +// } +// } +// return new Record(dimensions, metrics); +// } +// +// @Override +// void appendRecord(Record record) +// throws IOException { +// _records.add(record); +// } +// +// @Override +// Record getStarTreeRecord(int docId) +// throws IOException { +// return _records.get(docId); +// } +// +// @Override +// long getDimensionValue(int docId, int dimensionId) { +// return _records.get(docId)._dimensions[dimensionId]; +// } +// +// @Override +// Iterator sortAndAggregateSegmentRecords(int numDocs) +// throws IOException { +// // Write all dimensions for segment records into the buffer, and sort all records using an int +// // array +// // PinotDataBuffer dataBuffer; +// // long bufferSize = (long) numDocs * _numDimensions * Integer.BYTES; +// long recordBytesLength = 0; +// Integer[] sortedDocIds = new Integer[numDocs]; +// for (int i = 0; i < numDocs; i++) { +// sortedDocIds[i] = i; +// } +// +// try { +// for (int i = 0; i < numDocs; i++) { +// Record record = getNextSegmentRecord(); +// byte[] bytes = serializeStarTreeRecord(record); +// recordBytesLength = bytes.length; +// segmentRecordFileOutput.writeBytes(bytes, bytes.length); +// } +// } finally { +// segmentRecordFileOutput.close(); +// } +// +// IndexInput segmentRecordFileInput = state.directory.openInput(segmentRecordFileOutput.getName(), state.context); +// final long recordBytes = recordBytesLength; +// segmentRandomInput = segmentRecordFileInput.randomAccessSlice(0, segmentRecordFileInput.length()); +// +// try { +// // ArrayUtil.introSort(sortedDocIds, comparator); +// // Arrays.sort(sortedDocIds, comparator); +// +// QuickSorter.quickSort(0, numDocs, (i1, i2) -> { +// long offset1 = (long) sortedDocIds[i1] * recordBytes; +// long offset2 = (long) sortedDocIds[i2] * recordBytes; +// for (int i = 0; i < _numDimensions; i++) { +// try { +// int dimension1 = segmentRandomInput.readInt(offset1 + i * Integer.BYTES); +// int dimension2 = segmentRandomInput.readInt(offset2 + i * Integer.BYTES); +// if (dimension1 != dimension2) { +// return dimension1 - dimension2; +// } +// } catch (IOException e) { +// throw new RuntimeException(e); +// } +// } +// return 0; +// }, (i1, i2) -> { +// int temp = sortedDocIds[i1]; +// sortedDocIds[i1] = sortedDocIds[i2]; +// sortedDocIds[i2] = temp; +// }); +// +// // System.out.println("Sorted doc ids : " + Arrays.toString(sortedDocIds)); +// } finally { +// segmentRecordFileInput.close(); +// } +// +// // Create an iterator for aggregated records +// return new Iterator() { +// boolean _hasNext = true; +// Record _currentRecord = getSegmentRecord(sortedDocIds[0], recordBytes); +// int _docId = 1; +// +// @Override +// public boolean hasNext() { +// return _hasNext; +// } +// +// @Override +// public Record next() { +// Record next = mergeSegmentRecord(null, _currentRecord); +// while (_docId < numDocs) { +// Record record = null; +// try { +// record = getSegmentRecord(sortedDocIds[_docId++], recordBytes); +// } catch (IOException e) { +// // TODO : handle this block better - how to handle exceptions ? +// throw new RuntimeException(e); +// } +// if (!Arrays.equals(record._dimensions, next._dimensions)) { +// _currentRecord = record; +// return next; +// } else { +// next = mergeSegmentRecord(next, record); +// } +// } +// _hasNext = false; +// +// IOUtils.closeWhileHandlingException(segmentRecordFileInput, segmentRecordFileOutput); +// IOUtils.deleteFilesIgnoringExceptions(state.directory, segmentRecordFileOutput.getName()); +// return next; +// } +// }; +// } +// +// public Record getSegmentRecord(int docID, long recordBytes) +// throws IOException { +// return deserializeStarTreeRecord(segmentRandomInput, docID * recordBytes); +// } +// +// @Override +// Iterator generateRecordsForStarNode(int startDocId, int endDocId, int dimensionId) +// throws IOException { +// +// int numDocs = endDocId - startDocId; +// Record[] records = new Record[numDocs]; +// for (int i = 0; i < numDocs; i++) { +// records[i] = getStarTreeRecord(startDocId + i); +// } +// Arrays.sort(records, (o1, o2) -> { +// for (int i = dimensionId + 1; i < _numDimensions; i++) { +// if (o1._dimensions[i] != o2._dimensions[i]) { +// return o1._dimensions[i] - o2._dimensions[i]; +// } +// } +// return 0; +// }); +// return new Iterator() { +// boolean _hasNext = true; +// Record _currentRecord = records[0]; +// int _docId = 1; +// +// private boolean hasSameDimensions(Record record1, Record record2) { +// for (int i = dimensionId + 1; i < _numDimensions; i++) { +// if (record1._dimensions[i] != record2._dimensions[i]) { +// return false; +// } +// } +// return true; +// } +// +// @Override +// public boolean hasNext() { +// return _hasNext; +// } +// +// @Override +// public Record next() { +// Record next = mergeStarTreeRecord(null, _currentRecord); +// next._dimensions[dimensionId] = STAR_IN_DOC_VALUES_INDEX; +// while (_docId < numDocs) { +// Record record = records[_docId++]; +// if (!hasSameDimensions(record, _currentRecord)) { +// _currentRecord = record; +// return next; +// } else { +// next = mergeStarTreeRecord(next, record); +// } +// } +// _hasNext = false; +// return next; +// } +// }; +// } +// +// @Override +// public void close() +// throws IOException { +// IOUtils.deleteFilesIgnoringExceptions(Path.of( +// IndexFileNames.segmentFileName(state.segmentInfo.name, state.segmentSuffix, SEGMENT_RECORD_FILE_NAME))); +// super.close(); +// } +//} diff --git a/server/src/main/java/org/opensearch/index/codec/freshstartree/builder/OffHeapBufferedSingleTreeBuilder.java b/server/src/main/java/org/opensearch/index/codec/freshstartree/builder/OffHeapBufferedSingleTreeBuilder.java new file mode 100644 index 0000000000000..4b3226067b099 --- /dev/null +++ b/server/src/main/java/org/opensearch/index/codec/freshstartree/builder/OffHeapBufferedSingleTreeBuilder.java @@ -0,0 +1,572 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.opensearch.index.codec.freshstartree.builder; + +import java.io.BufferedOutputStream; +import java.io.FileOutputStream; +import java.io.IOException; +import java.nio.ByteBuffer; +import java.nio.ByteOrder; +import java.util.ArrayList; +import java.util.Arrays; +import java.util.HashMap; +import java.util.Iterator; +import java.util.LinkedHashMap; +import java.util.List; +import java.util.Map; +import org.apache.lucene.codecs.CodecUtil; +import org.apache.lucene.codecs.DocValuesConsumer; +import org.apache.lucene.index.IndexFileNames; +import org.apache.lucene.index.NumericDocValues; +import org.apache.lucene.index.SegmentWriteState; +import org.apache.lucene.index.SortedNumericDocValues; +import org.apache.lucene.search.DocIdSetIterator; +import org.apache.lucene.store.IndexInput; +import org.apache.lucene.store.IndexOutput; +import org.apache.lucene.store.RandomAccessInput; +import org.apache.lucene.util.IOUtils; +import org.opensearch.index.codec.freshstartree.codec.StarTreeAggregatedValues; +import org.opensearch.index.codec.freshstartree.util.QuickSorter; + + +/** + * Off heap implementation of star tree builder Segment records are sorted and aggregated completely + * off heap Star tree records are using mixed approach where we have a buffer of hashmap to doc ids + * and also a temp file This is done since star tree records file needs to be read and written at + * same time, sometimes latest changes are not present during read + */ +public class OffHeapBufferedSingleTreeBuilder extends BaseSingleTreeBuilder { + private static final String SEGMENT_RECORD_FILE_NAME = "segment.record"; + private static final String STAR_TREE_RECORD_FILE_NAME = "star-tree.record"; + + private final List _starTreeRecordOffsets; + + private int _numReadableStarTreeRecords; + + IndexOutput segmentRecordFileOutput; + IndexOutput starTreeRecordFileOutput; + RandomAccessInput segmentRandomInput; + private RandomAccessInput starTreeRecordRandomInput; + + SegmentWriteState state; + + long currBytes = 0; + Map fileToByteSizeMap; + int starTreeFileCount = 0; + int currentStarTreeFileIndex = 0; + int prevStartDocId = Integer.MAX_VALUE; + + public OffHeapBufferedSingleTreeBuilder(IndexOutput output, List dimensionsSplitOrder, + Map docValuesMap, int maxDoc, DocValuesConsumer consumer, + SegmentWriteState state) + throws IOException { + super(output, dimensionsSplitOrder, docValuesMap, maxDoc, consumer, state); + this.state = state; + fileToByteSizeMap = new LinkedHashMap<>(); // maintain order + + // TODO : how to set this dynammically + String segmentRecordFileName = + IndexFileNames.segmentFileName(state.segmentInfo.name, state.segmentSuffix, SEGMENT_RECORD_FILE_NAME); + String starTreeRecordFileName = + IndexFileNames.segmentFileName(state.segmentInfo.name, state.segmentSuffix, STAR_TREE_RECORD_FILE_NAME) + + "_" + starTreeFileCount; + + // TODO : create temp output + starTreeRecordFileOutput = state.directory.createOutput(starTreeRecordFileName, state.context); + starTreeFileCount++; + // CodecUtil.writeIndexHeader( + // starTreeRecordFileOutput, + // "STARTreeCodec", + // 0, + // state.segmentInfo.getId(), + // state.segmentSuffix); + segmentRecordFileOutput = state.directory.createOutput(segmentRecordFileName, state.context); + + _starTreeRecordOffsets = new ArrayList<>(); + //_starTreeRecordOffsets.add(0L); + } + + @Override + public void build(List aggrList) + throws IOException { + build(mergeRecords(aggrList), true); + } + + private Iterator mergeRecords(List aggrList) + throws IOException { + int recordBytesLength = 0; + int numDocs = 0; + Integer[] sortedDocIds; + try { + for (StarTreeAggregatedValues starTree : aggrList) { + boolean endOfDoc = false; + while (!endOfDoc) { + long[] dims = new long[starTree.dimensionValues.size()]; + int i = 0; + for (Map.Entry dimValue : starTree.dimensionValues.entrySet()) { + endOfDoc = dimValue.getValue().nextDoc() == DocIdSetIterator.NO_MORE_DOCS + || dimValue.getValue().longValue() == -1; + if (endOfDoc) { + break; + } + long val = dimValue.getValue().longValue(); + dims[i] = val; + i++; + } + if (endOfDoc) { + break; + } + i = 0; + Object[] metrics = new Object[starTree.metricValues.size()]; + for (Map.Entry metricValue : starTree.metricValues.entrySet()) { + metricValue.getValue().nextDoc(); + metrics[i] = metricValue.getValue().longValue(); + i++; + } + Record record = new Record(dims, metrics); + byte[] bytes = serializeStarTreeRecord(record); + numDocs++; + recordBytesLength = bytes.length; + segmentRecordFileOutput.writeBytes(bytes, bytes.length); + } + } + sortedDocIds = new Integer[numDocs]; + for (int i = 0; i < numDocs; i++) { + sortedDocIds[i] = i; + } + } finally { + segmentRecordFileOutput.close(); + } + + if(numDocs == 0) return new ArrayList().iterator(); + + return sortRecords(sortedDocIds, numDocs, recordBytesLength); + } + + private byte[] serializeStarTreeRecord(Record starTreeRecord) { + int numBytes = _numDimensions * Long.BYTES; + for (int i = 0; i < _numMetrics; i++) { + switch (_valueAggregators[i].getAggregatedValueType()) { + case LONG: + numBytes += Long.BYTES; + break; + case DOUBLE: + numBytes += Double.BYTES; + break; + case FLOAT: + case INT: + default: + throw new IllegalStateException(); + } + } + byte[] bytes = new byte[numBytes]; + ByteBuffer byteBuffer = ByteBuffer.wrap(bytes).order(ByteOrder.nativeOrder()); + for (long dimension : starTreeRecord._dimensions) { + byteBuffer.putLong(dimension); + } + for (int i = 0; i < _numMetrics; i++) { + switch (_valueAggregators[i].getAggregatedValueType()) { + case LONG: + if (starTreeRecord._metrics[i] != null) { + byteBuffer.putLong((Long) starTreeRecord._metrics[i]); + } + break; + case DOUBLE: + // byteBuffer.putDouble((Double) starTreeRecord._metrics[i]); + // break; + case INT: + case FLOAT: + default: + throw new IllegalStateException(); + } + } + return bytes; + } + + private Record deserializeStarTreeRecord(RandomAccessInput buffer, long offset) + throws IOException { + long[] dimensions = new long[_numDimensions]; + for (int i = 0; i < _numDimensions; i++) { + dimensions[i] = buffer.readLong(offset); + offset += Long.BYTES; + } + Object[] metrics = new Object[_numMetrics]; + for (int i = 0; i < _numMetrics; i++) { + switch (_valueAggregators[i].getAggregatedValueType()) { + case LONG: + metrics[i] = buffer.readLong(offset); + offset += Long.BYTES; + break; + case DOUBLE: + // TODO : handle double + // metrics[i] = buffer.getDouble((int) offset); + // offset += Double.BYTES; + break; + case FLOAT: + case INT: + default: + throw new IllegalStateException(); + } + } + return new Record(dimensions, metrics); + } + + // public void copyTo(ByteBuffer byteBuffer, long offset, byte[] buffer) { + // copyTo(offset, byteBuffer, 0, buffer.length); + // } + + @Override + void appendRecord(Record record) + throws IOException { + byte[] bytes = serializeStarTreeRecord(record); + // System.out.println("Appending record : " + record.toString()); + starTreeRecordFileOutput.writeBytes(bytes, bytes.length); + //System.out.println("Appending doc : " + _numDocs + "curr bytes : " + currBytes + " offset: " + _starTreeRecordOffsets.size()); + _starTreeRecordOffsets.add(currBytes); + currBytes += bytes.length; + } + + @Override + Record getStarTreeRecord(int docId) + throws IOException { + ensureBufferReadable(docId); + //System.out.println("Want star record of id : " + docId); + return deserializeStarTreeRecord(starTreeRecordRandomInput, _starTreeRecordOffsets.get(docId)); + } + + @Override + long getDimensionValue(int docId, int dimensionId) + throws IOException { + // System.out.println("doc id : " + docId + " _numReadableStarTreeRecords : " + + // _numReadableStarTreeRecords); + //System.out.println("Want dimension value record of id : " + docId); + ensureBufferReadable(docId, false, true); + // System.out.println("want offset : " + (_starTreeRecordOffsets.get(docId) + (dimensionId * + // Integer.BYTES))); + return starTreeRecordRandomInput.readLong( + (_starTreeRecordOffsets.get(docId) + (dimensionId * Long.BYTES))); + } + + @Override + Iterator sortAndAggregateSegmentRecords(int numDocs) + throws IOException { + // Write all dimensions for segment records into the buffer, and sort all records using an int + // array + // PinotDataBuffer dataBuffer; + // long bufferSize = (long) numDocs * _numDimensions * Integer.BYTES; + int recordBytesLength = 0; + Integer[] sortedDocIds = new Integer[numDocs]; + for (int i = 0; i < numDocs; i++) { + sortedDocIds[i] = i; + } + + try { + for (int i = 0; i < numDocs; i++) { + Record record = getNextSegmentRecord(); + byte[] bytes = serializeStarTreeRecord(record); + recordBytesLength = bytes.length; + segmentRecordFileOutput.writeBytes(bytes, bytes.length); + } + } finally { + segmentRecordFileOutput.close(); + } + + // Create an iterator for aggregated records + return sortRecords(sortedDocIds, numDocs, recordBytesLength); + } + + private Iterator sortRecords(Integer[] sortedDocIds, int numDocs, int recordBytesLength) + throws IOException { + IndexInput segmentRecordFileInput = state.directory.openInput( + IndexFileNames.segmentFileName(state.segmentInfo.name, state.segmentSuffix, SEGMENT_RECORD_FILE_NAME), + state.context); + final long recordBytes = recordBytesLength; + segmentRandomInput = segmentRecordFileInput.randomAccessSlice(0, segmentRecordFileInput.length()); + + try { + // ArrayUtil.introSort(sortedDocIds, comparator); + // Arrays.sort(sortedDocIds, comparator); + + QuickSorter.quickSort(0, numDocs, (i1, i2) -> { + long offset1 = (long) sortedDocIds[i1] * recordBytes; + long offset2 = (long) sortedDocIds[i2] * recordBytes; + for (int i = 0; i < _numDimensions; i++) { + try { + long dimension1 = segmentRandomInput.readLong(offset1 + i * Long.BYTES); + long dimension2 = segmentRandomInput.readLong(offset2 + i * Long.BYTES); + if (dimension1 != dimension2) { + return Long.compare(dimension1, dimension2); + } + } catch (IOException e) { + throw new RuntimeException(e); // TODO: handle this better + } + } + return 0; + }, (i1, i2) -> { + int temp = sortedDocIds[i1]; + sortedDocIds[i1] = sortedDocIds[i2]; + sortedDocIds[i2] = temp; + }); + + // System.out.println("Sorted doc ids : " + Arrays.toString(sortedDocIds)); + } finally { + // segmentRecordFileInput.close(); + // state.directory.deleteFile(IndexFileNames.segmentFileName(state.segmentInfo.name, + // state.segmentSuffix, + // SEGMENT_RECORD_FILE_NAME)); + // Files.deleteIfExists(new Path(IndexFileNames.segmentFileName(state.segmentInfo.name, + // state.segmentSuffix, + // SEGMENT_RECORD_FILE_NAME))); + } + if(sortedDocIds != null) + System.out.println("Sorted doc ids length" + sortedDocIds.length); + else + System.out.println("Sorted doc ids array is null"); + + // Create an iterator for aggregated records + return new Iterator() { + boolean _hasNext = true; + Record _currentRecord = getSegmentRecord(sortedDocIds[0], recordBytes); + int _docId = 1; + + @Override + public boolean hasNext() { + return _hasNext; + } + + @Override + public Record next() { + Record next = mergeSegmentRecord(null, _currentRecord); + while (_docId < numDocs) { + Record record = null; + try { + record = getSegmentRecord(sortedDocIds[_docId++], recordBytes); + } catch (IOException e) { + throw new RuntimeException(e); + // TODO : handle this block better - how to handle exceptions ? + } + if (!Arrays.equals(record._dimensions, next._dimensions)) { + _currentRecord = record; + return next; + } else { + next = mergeSegmentRecord(next, record); + } + } + _hasNext = false; + return next; + } + }; + } + + public Record getSegmentRecord(int docID, long recordBytes) + throws IOException { + return deserializeStarTreeRecord(segmentRandomInput, docID * recordBytes); + } + + @Override + Iterator generateRecordsForStarNode(int startDocId, int endDocId, int dimensionId) + throws IOException { + //System.out.println("End doc id " + endDocId); + ensureBufferReadable(endDocId, true); + + // Sort all records using an int array + int numDocs = endDocId - startDocId; + int[] sortedDocIds = new int[numDocs]; + for (int i = 0; i < numDocs; i++) { + sortedDocIds[i] = startDocId + i; + } + QuickSorter.quickSort(0, numDocs, (i1, i2) -> { + + long offset1 = _starTreeRecordOffsets.get(sortedDocIds[i1]); + long offset2 = _starTreeRecordOffsets.get(sortedDocIds[i2]); + for (int i = dimensionId + 1; i < _numDimensions; i++) { + try { + long dimension1 = starTreeRecordRandomInput.readLong(offset1 + i * Long.BYTES); + long dimension2 = starTreeRecordRandomInput.readLong(offset2 + i * Long.BYTES); + if (dimension1 != dimension2) { + return Long.compare(dimension1, dimension2); + } + } catch (Exception e) { + throw new RuntimeException(e); // TODO : do better handling + } + } + + return 0; + }, (i1, i2) -> { + int temp = sortedDocIds[i1]; + sortedDocIds[i1] = sortedDocIds[i2]; + sortedDocIds[i2] = temp; + }); + + // Create an iterator for aggregated records + return new Iterator() { + boolean _hasNext = true; + Record _currentRecord = getStarTreeRecord(sortedDocIds[0]); + int _docId = 1; + + private boolean hasSameDimensions(Record record1, Record record2) { + for (int i = dimensionId + 1; i < _numDimensions; i++) { + if (record1._dimensions[i] != record2._dimensions[i]) { + return false; + } + } + return true; + } + + @Override + public boolean hasNext() { + return _hasNext; + } + + @Override + public Record next() { + Record next = mergeStarTreeRecord(null, _currentRecord); + next._dimensions[dimensionId] = STAR_IN_DOC_VALUES_INDEX; + while (_docId < numDocs) { + Record record; + try { + record = getStarTreeRecord(sortedDocIds[_docId++]); + } catch (IOException e) { + throw new RuntimeException(e); + } + if (!hasSameDimensions(record, _currentRecord)) { + _currentRecord = record; + return next; + } else { + next = mergeStarTreeRecord(next, record); + } + } + _hasNext = false; + return next; + } + }; + } + + private void ensureBufferReadable(int docId) throws IOException { + ensureBufferReadable(docId, false); + } + + private void ensureBufferReadable(int docId, boolean endDoc) throws IOException { + ensureBufferReadable(docId, endDoc, false); + } + + private void ensureBufferReadable(int docId, boolean endDocCheck, boolean dimIdCheck) + throws IOException { + + if (docId >= prevStartDocId && (( endDocCheck && docId <= _numReadableStarTreeRecords ) + || (!endDocCheck && docId < _numReadableStarTreeRecords)) ) { + return; + } + //System.out.println("want doc : " + docId + " dim : " + dimIdCheck); + IndexInput in = null; + if(docId < _numDocs ) { + try { + int prevStartDocId = 0; + for(Map.Entry entry : fileToByteSizeMap.entrySet()) { + if(docId < entry.getValue() - 1) { + in = state.directory.openInput(entry.getKey(), state.context); + starTreeRecordRandomInput = + in.randomAccessSlice(in.getFilePointer(), in.length() - in.getFilePointer()); + _numReadableStarTreeRecords = entry.getValue(); + break; + } + prevStartDocId = entry.getValue(); + } + //System.out.println("First loop Current start : " + prevStartDocId + " - Current end : " + _numReadableStarTreeRecords); + this.prevStartDocId = prevStartDocId; + } finally { + // if (in != null) { + // in.close(); + // } + } + } + + if(in != null) return; + + + + //System.out.println("want doc 1 : " + docId + " num docs : " + _numDocs); + fileToByteSizeMap.put(starTreeRecordFileOutput.getName(), + _numDocs); + + //System.out.println("Star tree record file size : " + starTreeRecordFileOutput.getFilePointer()); + //System.out.println("Star tree record file name : " + starTreeRecordFileOutput.getName()); + + starTreeRecordFileOutput.close(); + + String starTreeRecordFileName = + IndexFileNames.segmentFileName(state.segmentInfo.name, state.segmentSuffix, STAR_TREE_RECORD_FILE_NAME) + + "_" + starTreeFileCount; + + // TODO : create temp output + starTreeRecordFileOutput = state.directory.createOutput(starTreeRecordFileName, state.context); + starTreeFileCount++; + + currBytes = 0; + // state.directory.sync(Collections.singleton(starTreeRecordFileOutput.getName())); + if (starTreeRecordRandomInput != null) { + starTreeRecordRandomInput = null; + } + + try { + int prevStartDocId = 0; + for(Map.Entry entry : fileToByteSizeMap.entrySet()) { + if(docId <= entry.getValue() - 1) { + in = state.directory.openInput(entry.getKey(), state.context); + starTreeRecordRandomInput = + in.randomAccessSlice(in.getFilePointer(), in.length() - in.getFilePointer()); + _numReadableStarTreeRecords = entry.getValue(); + break; + } + //System.out.println("Setting start value : " + entry.getValue()); + prevStartDocId = entry.getValue(); + } + //System.out.println("Current start : " + prevStartDocId + " - Current end : " + _numReadableStarTreeRecords); + this.prevStartDocId = prevStartDocId; + } finally { + // if (in != null) { + // in.close(); + // } + } + + } + + @Override + public void close() + throws IOException { + boolean success = false; + try { + if (starTreeRecordFileOutput != null) { + IOUtils.deleteFilesIgnoringExceptions(state.directory, starTreeRecordFileOutput.getName()); + } + success = true; + } catch (Exception e) { + throw new RuntimeException(e); + // System.out.println(e.getMessage()); + } finally { + if (success) { + IOUtils.close(starTreeRecordFileOutput); + } else { + IOUtils.closeWhileHandlingException(starTreeRecordFileOutput); + } + // starTreeRecordFileOutput = null; + } + IOUtils.deleteFilesIgnoringExceptions(state.directory, segmentRecordFileOutput.getName()); + IOUtils.deleteFilesIgnoringExceptions(state.directory, fileToByteSizeMap.keySet()); + super.close(); + } +} diff --git a/server/src/main/java/org/opensearch/index/codec/freshstartree/builder/OffHeapSingleTreeBuilder.java b/server/src/main/java/org/opensearch/index/codec/freshstartree/builder/OffHeapSingleTreeBuilder.java new file mode 100644 index 0000000000000..e92ce473944a8 --- /dev/null +++ b/server/src/main/java/org/opensearch/index/codec/freshstartree/builder/OffHeapSingleTreeBuilder.java @@ -0,0 +1,529 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.opensearch.index.codec.freshstartree.builder; + +import java.io.IOException; +import java.nio.ByteBuffer; +import java.nio.ByteOrder; +import java.util.ArrayList; +import java.util.Arrays; +import java.util.Calendar; +import java.util.Iterator; +import java.util.LinkedHashMap; +import java.util.List; +import java.util.Map; +import org.apache.lucene.codecs.CodecUtil; +import org.apache.lucene.codecs.DocValuesConsumer; +import org.apache.lucene.index.IndexFileNames; +import org.apache.lucene.index.NumericDocValues; +import org.apache.lucene.index.SegmentWriteState; +import org.apache.lucene.index.SortedNumericDocValues; +import org.apache.lucene.search.DocIdSetIterator; +import org.apache.lucene.store.IndexInput; +import org.apache.lucene.store.IndexOutput; +import org.apache.lucene.store.RandomAccessInput; +import org.apache.lucene.util.IOUtils; +import org.opensearch.index.codec.freshstartree.codec.StarTreeAggregatedValues; +import org.opensearch.index.codec.freshstartree.util.QuickSorter; + + +/** + * Off heap implementation of star tree builder Segment records are sorted and aggregated completely + * off heap Star tree records are using mixed approach where we have a buffer of hashmap to doc ids + * and also a temp file This is done since star tree records file needs to be read and written at + * same time, sometimes latest changes are not present during read + */ +public class OffHeapSingleTreeBuilder extends BaseSingleTreeBuilder { + private static final String SEGMENT_RECORD_FILE_NAME = "segment.record"; + private static final String STAR_TREE_RECORD_FILE_NAME = "star-tree.record"; + + private final List _starTreeRecordOffsets; + + private int _numReadableStarTreeRecords; + + IndexOutput segmentRecordFileOutput; + IndexOutput starTreeRecordFileOutput; + RandomAccessInput segmentRandomInput; + private RandomAccessInput starTreeRecordRandomInput; + + SegmentWriteState state; + + long currBytes = 0; + + private class MaxSizeHashMap extends LinkedHashMap { + private final int maxSize; + + public MaxSizeHashMap(int maxSize) { + this.maxSize = maxSize; + } + + @Override + protected boolean removeEldestEntry(Map.Entry eldest) { + return size() > maxSize; + } + } + + MaxSizeHashMap _maxSizeHashMap; + + public OffHeapSingleTreeBuilder(IndexOutput output, List dimensionsSplitOrder, + Map docValuesMap, int maxDoc, DocValuesConsumer consumer, + SegmentWriteState state) + throws IOException { + super(output, dimensionsSplitOrder, docValuesMap, maxDoc, consumer, state); + this.state = state; + // TODO : how to set this dynammically + String segmentRecordFileName = + IndexFileNames.segmentFileName(state.segmentInfo.name, state.segmentSuffix, SEGMENT_RECORD_FILE_NAME); + String starTreeRecordFileName = + IndexFileNames.segmentFileName(state.segmentInfo.name, state.segmentSuffix, STAR_TREE_RECORD_FILE_NAME); + + // TODO : create temp output + starTreeRecordFileOutput = state.directory.createOutput(starTreeRecordFileName, state.context); + _maxSizeHashMap = new MaxSizeHashMap<>(10000000); + // CodecUtil.writeIndexHeader( + // starTreeRecordFileOutput, + // "STARTreeCodec", + // 0, + // state.segmentInfo.getId(), + // state.segmentSuffix); + segmentRecordFileOutput = state.directory.createOutput(segmentRecordFileName, state.context); + + _starTreeRecordOffsets = new ArrayList<>(); + _starTreeRecordOffsets.add(0L); + } + + @Override + public void build(List aggrList) + throws IOException { + build(mergeRecords(aggrList), true); + } + + private Iterator mergeRecords(List aggrList) + throws IOException { + int recordBytesLength = 0; + int numDocs = 0; + Integer[] sortedDocIds; + try { + for (StarTreeAggregatedValues starTree : aggrList) { + boolean endOfDoc = false; + while (!endOfDoc) { + long[] dims = new long[starTree.dimensionValues.size()]; + int i = 0; + for (Map.Entry dimValue : starTree.dimensionValues.entrySet()) { + endOfDoc = dimValue.getValue().nextDoc() == DocIdSetIterator.NO_MORE_DOCS + || dimValue.getValue().longValue() == -1; + if (endOfDoc) { + break; + } + long val = dimValue.getValue().longValue(); + dims[i] = val; + i++; + } + if (endOfDoc) { + break; + } + i = 0; + Object[] metrics = new Object[starTree.metricValues.size()]; + for (Map.Entry metricValue : starTree.metricValues.entrySet()) { + metricValue.getValue().nextDoc(); + metrics[i] = metricValue.getValue().longValue(); + i++; + } + BaseSingleTreeBuilder.Record record = new BaseSingleTreeBuilder.Record(dims, metrics); + byte[] bytes = serializeStarTreeRecord(record); + numDocs++; + recordBytesLength = bytes.length; + segmentRecordFileOutput.writeBytes(bytes, bytes.length); + } + } + sortedDocIds = new Integer[numDocs]; + for (int i = 0; i < numDocs; i++) { + sortedDocIds[i] = i; + } + } finally { + segmentRecordFileOutput.close(); + } + + if(numDocs == 0) return new ArrayList().iterator(); + + return sortRecords(sortedDocIds, numDocs, recordBytesLength); + } + + private byte[] serializeStarTreeRecord(Record starTreeRecord) { + int numBytes = _numDimensions * Long.BYTES; + for (int i = 0; i < _numMetrics; i++) { + switch (_valueAggregators[i].getAggregatedValueType()) { + case LONG: + numBytes += Long.BYTES; + break; + case DOUBLE: + numBytes += Double.BYTES; + break; + case FLOAT: + case INT: + default: + throw new IllegalStateException(); + } + } + byte[] bytes = new byte[numBytes]; + ByteBuffer byteBuffer = ByteBuffer.wrap(bytes).order(ByteOrder.nativeOrder()); + for (long dimension : starTreeRecord._dimensions) { + byteBuffer.putLong(dimension); + } + for (int i = 0; i < _numMetrics; i++) { + switch (_valueAggregators[i].getAggregatedValueType()) { + case LONG: + if (starTreeRecord._metrics[i] != null) { + byteBuffer.putLong((Long) starTreeRecord._metrics[i]); + } + break; + case DOUBLE: + // byteBuffer.putDouble((Double) starTreeRecord._metrics[i]); + // break; + case INT: + case FLOAT: + default: + throw new IllegalStateException(); + } + } + return bytes; + } + + private Record deserializeStarTreeRecord(RandomAccessInput buffer, long offset) + throws IOException { + long[] dimensions = new long[_numDimensions]; + for (int i = 0; i < _numDimensions; i++) { + dimensions[i] = buffer.readLong(offset); + offset += Long.BYTES; + } + Object[] metrics = new Object[_numMetrics]; + for (int i = 0; i < _numMetrics; i++) { + switch (_valueAggregators[i].getAggregatedValueType()) { + case LONG: + metrics[i] = buffer.readLong(offset); + offset += Long.BYTES; + break; + case DOUBLE: + // TODO : handle double + // metrics[i] = buffer.getDouble((int) offset); + // offset += Double.BYTES; + break; + case FLOAT: + case INT: + default: + throw new IllegalStateException(); + } + } + return new Record(dimensions, metrics); + } + + // public void copyTo(ByteBuffer byteBuffer, long offset, byte[] buffer) { + // copyTo(offset, byteBuffer, 0, buffer.length); + // } + + @Override + void appendRecord(Record record) + throws IOException { + byte[] bytes = serializeStarTreeRecord(record); + // System.out.println("Appending record : " + record.toString()); + _maxSizeHashMap.put(_numDocs, record); + currBytes += bytes.length; + starTreeRecordFileOutput.writeBytes(bytes, bytes.length); + _starTreeRecordOffsets.add(_starTreeRecordOffsets.get(_numDocs) + bytes.length); + } + + @Override + Record getStarTreeRecord(int docId) + throws IOException { + ensureBufferReadable(docId); + // System.out.println("Want star record of id : " + docId); + if (_maxSizeHashMap.containsKey(docId)) { + return _maxSizeHashMap.get(docId); + } + return deserializeStarTreeRecord(starTreeRecordRandomInput, _starTreeRecordOffsets.get(docId)); + } + + @Override + long getDimensionValue(int docId, int dimensionId) + throws IOException { + // System.out.println("doc id : " + docId + " _numReadableStarTreeRecords : " + + // _numReadableStarTreeRecords); + ensureBufferReadable(docId); + if (_maxSizeHashMap.containsKey(docId)) { + return _maxSizeHashMap.get(docId)._dimensions[dimensionId]; + } + // System.out.println("want offset : " + (_starTreeRecordOffsets.get(docId) + (dimensionId * + // Integer.BYTES))); + return starTreeRecordRandomInput.readLong( + (_starTreeRecordOffsets.get(docId) + (dimensionId * Long.BYTES))); + } + + @Override + Iterator sortAndAggregateSegmentRecords(int numDocs) + throws IOException { + // Write all dimensions for segment records into the buffer, and sort all records using an int + // array + // PinotDataBuffer dataBuffer; + // long bufferSize = (long) numDocs * _numDimensions * Integer.BYTES; + int recordBytesLength = 0; + Integer[] sortedDocIds = new Integer[numDocs]; + for (int i = 0; i < numDocs; i++) { + sortedDocIds[i] = i; + } + + try { + for (int i = 0; i < numDocs; i++) { + Record record = getNextSegmentRecord(); + byte[] bytes = serializeStarTreeRecord(record); + recordBytesLength = bytes.length; + segmentRecordFileOutput.writeBytes(bytes, bytes.length); + } + } finally { + segmentRecordFileOutput.close(); + } + + // Create an iterator for aggregated records + return sortRecords(sortedDocIds, numDocs, recordBytesLength); + } + + private Iterator sortRecords(Integer[] sortedDocIds, int numDocs, int recordBytesLength) + throws IOException { + IndexInput segmentRecordFileInput = state.directory.openInput( + IndexFileNames.segmentFileName(state.segmentInfo.name, state.segmentSuffix, SEGMENT_RECORD_FILE_NAME), + state.context); + final long recordBytes = recordBytesLength; + segmentRandomInput = segmentRecordFileInput.randomAccessSlice(0, segmentRecordFileInput.length()); + + try { + // ArrayUtil.introSort(sortedDocIds, comparator); + // Arrays.sort(sortedDocIds, comparator); + + QuickSorter.quickSort(0, numDocs, (i1, i2) -> { + long offset1 = (long) sortedDocIds[i1] * recordBytes; + long offset2 = (long) sortedDocIds[i2] * recordBytes; + for (int i = 0; i < _numDimensions; i++) { + try { + long dimension1 = segmentRandomInput.readLong(offset1 + i * Long.BYTES); + long dimension2 = segmentRandomInput.readLong(offset2 + i * Long.BYTES); + if (dimension1 != dimension2) { + return Long.compare(dimension1, dimension2); + } + } catch (IOException e) { + throw new RuntimeException(e); // TODO: handle this better + } + } + return 0; + }, (i1, i2) -> { + int temp = sortedDocIds[i1]; + sortedDocIds[i1] = sortedDocIds[i2]; + sortedDocIds[i2] = temp; + }); + + // System.out.println("Sorted doc ids : " + Arrays.toString(sortedDocIds)); + } finally { + // segmentRecordFileInput.close(); + // state.directory.deleteFile(IndexFileNames.segmentFileName(state.segmentInfo.name, + // state.segmentSuffix, + // SEGMENT_RECORD_FILE_NAME)); + // Files.deleteIfExists(new Path(IndexFileNames.segmentFileName(state.segmentInfo.name, + // state.segmentSuffix, + // SEGMENT_RECORD_FILE_NAME))); + } + if(sortedDocIds != null) + System.out.println("Sorted doc ids length" + sortedDocIds.length); + else + System.out.println("Sorted doc ids array is null"); + + // Create an iterator for aggregated records + return new Iterator() { + boolean _hasNext = true; + Record _currentRecord = getSegmentRecord(sortedDocIds[0], recordBytes); + int _docId = 1; + + @Override + public boolean hasNext() { + return _hasNext; + } + + @Override + public Record next() { + Record next = mergeSegmentRecord(null, _currentRecord); + while (_docId < numDocs) { + Record record = null; + try { + record = getSegmentRecord(sortedDocIds[_docId++], recordBytes); + } catch (IOException e) { + throw new RuntimeException(e); + // TODO : handle this block better - how to handle exceptions ? + } + if (!Arrays.equals(record._dimensions, next._dimensions)) { + _currentRecord = record; + return next; + } else { + next = mergeSegmentRecord(next, record); + } + } + _hasNext = false; + return next; + } + }; + } + + public Record getSegmentRecord(int docID, long recordBytes) + throws IOException { + return deserializeStarTreeRecord(segmentRandomInput, docID * recordBytes); + } + + @Override + Iterator generateRecordsForStarNode(int startDocId, int endDocId, int dimensionId) + throws IOException { + ensureBufferReadable(endDocId); + + // Sort all records using an int array + int numDocs = endDocId - startDocId; + int[] sortedDocIds = new int[numDocs]; + for (int i = 0; i < numDocs; i++) { + sortedDocIds[i] = startDocId + i; + } + QuickSorter.quickSort(0, numDocs, (i1, i2) -> { + if (_maxSizeHashMap.containsKey(sortedDocIds[i1]) && _maxSizeHashMap.containsKey(sortedDocIds[i2])) { + for (int i = dimensionId + 1; i < _numDimensions; i++) { + long val1 = _maxSizeHashMap.get(sortedDocIds[i1])._dimensions[i]; + long val2 = _maxSizeHashMap.get(sortedDocIds[i2])._dimensions[i]; + if (val1 != val2) { + return Long.compare(val1, val2); + } + } + } else { + long offset1 = _starTreeRecordOffsets.get(sortedDocIds[i1]); + long offset2 = _starTreeRecordOffsets.get(sortedDocIds[i2]); + for (int i = dimensionId + 1; i < _numDimensions; i++) { + try { + long dimension1 = starTreeRecordRandomInput.readLong(offset1 + i * Long.BYTES); + long dimension2 = starTreeRecordRandomInput.readLong(offset2 + i * Long.BYTES); + if (dimension1 != dimension2) { + return Long.compare(dimension1, dimension2); + } + } catch (Exception e) { + throw new RuntimeException(e); // TODO : do better handling + } + } + } + return 0; + }, (i1, i2) -> { + int temp = sortedDocIds[i1]; + sortedDocIds[i1] = sortedDocIds[i2]; + sortedDocIds[i2] = temp; + }); + + // Create an iterator for aggregated records + return new Iterator() { + boolean _hasNext = true; + Record _currentRecord = getStarTreeRecord(sortedDocIds[0]); + int _docId = 1; + + private boolean hasSameDimensions(Record record1, Record record2) { + for (int i = dimensionId + 1; i < _numDimensions; i++) { + if (record1._dimensions[i] != record2._dimensions[i]) { + return false; + } + } + return true; + } + + @Override + public boolean hasNext() { + return _hasNext; + } + + @Override + public Record next() { + Record next = mergeStarTreeRecord(null, _currentRecord); + next._dimensions[dimensionId] = STAR_IN_DOC_VALUES_INDEX; + while (_docId < numDocs) { + Record record; + try { + record = getStarTreeRecord(sortedDocIds[_docId++]); + } catch (IOException e) { + throw new RuntimeException(e); + } + if (!hasSameDimensions(record, _currentRecord)) { + _currentRecord = record; + return next; + } else { + next = mergeStarTreeRecord(next, record); + } + } + _hasNext = false; + return next; + } + }; + } + + private void ensureBufferReadable(int docId) + throws IOException { + if (_numReadableStarTreeRecords <= docId) { + // starTreeRecordFileOutput.close(); + // state.directory.sync(Collections.singleton(starTreeRecordFileOutput.getName())); + if (starTreeRecordRandomInput != null) { + starTreeRecordRandomInput = null; + } + IndexInput in = null; + try { + in = state.directory.openInput(starTreeRecordFileOutput.getName(), state.context); + // CodecUtil.checkIndexHeader(in, "STARTreeCodec", 0, + // Lucene90DocValuesFormat.VERSION_CURRENT, + // state.segmentInfo.getId(), state.segmentSuffix); + // System.out.println("Star tree expected : " + currBytes); + // System.out.println("Star Tree Record File Size : " + in.length()); + starTreeRecordRandomInput = + in.randomAccessSlice(in.getFilePointer(), in.length() - in.getFilePointer()); + } finally { + // if (in != null) { + // in.close(); + // } + } + _numReadableStarTreeRecords = _numDocs; + } + } + + @Override + public void close() + throws IOException { + boolean success = false; + try { + if (starTreeRecordFileOutput != null) { + starTreeRecordFileOutput.writeInt(-1); + CodecUtil.writeFooter(starTreeRecordFileOutput); // write checksum + } + success = true; + } catch (Exception e) { + throw new RuntimeException(e); + // System.out.println(e.getMessage()); + } finally { + if (success) { + IOUtils.close(starTreeRecordFileOutput); + } else { + IOUtils.closeWhileHandlingException(starTreeRecordFileOutput); + } + // starTreeRecordFileOutput = null; + } + IOUtils.deleteFilesIgnoringExceptions(state.directory, segmentRecordFileOutput.getName()); + IOUtils.deleteFilesIgnoringExceptions(state.directory, starTreeRecordFileOutput.getName()); + super.close(); + } +} diff --git a/server/src/main/java/org/opensearch/index/codec/freshstartree/builder/OnHeapSingleTreeBuilder.java b/server/src/main/java/org/opensearch/index/codec/freshstartree/builder/OnHeapSingleTreeBuilder.java new file mode 100644 index 0000000000000..699cf66fa3b30 --- /dev/null +++ b/server/src/main/java/org/opensearch/index/codec/freshstartree/builder/OnHeapSingleTreeBuilder.java @@ -0,0 +1,219 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.opensearch.index.codec.freshstartree.builder; + +import java.io.IOException; +import java.util.ArrayList; +import java.util.Arrays; +import java.util.Iterator; +import java.util.List; +import java.util.Map; +import org.apache.lucene.codecs.DocValuesConsumer; +import org.apache.lucene.index.NumericDocValues; +import org.apache.lucene.index.SegmentWriteState; +import org.apache.lucene.index.SortedNumericDocValues; +import org.apache.lucene.search.DocIdSetIterator; +import org.apache.lucene.store.IndexOutput; +import org.opensearch.index.codec.freshstartree.codec.StarTreeAggregatedValues; + + +/** On heap single tree builder */ +public class OnHeapSingleTreeBuilder extends BaseSingleTreeBuilder { + private final List _records = new ArrayList<>(); + + public OnHeapSingleTreeBuilder(IndexOutput output, List dimensionsSplitOrder, + Map docValuesMap, int maxDoc, DocValuesConsumer consumer, + SegmentWriteState state) + throws IOException { + super(output, dimensionsSplitOrder, docValuesMap, maxDoc, consumer, state); + } + + @Override + public void build(List aggrList) + throws IOException { + build(mergeRecords(aggrList), true); + } + + private Iterator mergeRecords(List aggrList) + throws IOException { + List records = new ArrayList<>(); + for (StarTreeAggregatedValues starTree : aggrList) { + boolean endOfDoc = false; + while (!endOfDoc) { + long[] dims = new long[starTree.dimensionValues.size()]; + int i = 0; + for (Map.Entry dimValue : starTree.dimensionValues.entrySet()) { + endOfDoc = dimValue.getValue().nextDoc() == DocIdSetIterator.NO_MORE_DOCS + || dimValue.getValue().longValue() == -1; + if (endOfDoc) { + break; + } + long val = dimValue.getValue().longValue(); + dims[i] = val; + i++; + } + if (endOfDoc) { + break; + } + i = 0; + Object[] metrics = new Object[starTree.metricValues.size()]; + for (Map.Entry metricValue : starTree.metricValues.entrySet()) { + metricValue.getValue().nextDoc(); + metrics[i] = metricValue.getValue().longValue(); + i++; + } + BaseSingleTreeBuilder.Record record = new BaseSingleTreeBuilder.Record(dims, metrics); + // System.out.println("Adding : " + record.toString()); + records.add(record); + } + } + BaseSingleTreeBuilder.Record[] recordsArr = new BaseSingleTreeBuilder.Record[records.size()]; + records.toArray(recordsArr); + records = null; + return sortRecords(recordsArr); + } + + @Override + void appendRecord(Record record) + throws IOException { + // System.out.println("Appending record : " + record.toString()); + _records.add(record); + } + + @Override + Record getStarTreeRecord(int docId) + throws IOException { + return _records.get(docId); + } + + @Override + long getDimensionValue(int docId, int dimensionId) + throws IOException { + // System.out.println("doc id : " + docId + " dim id : " + dimensionId + " size : " + + // _records.size()); + return _records.get(docId)._dimensions[dimensionId]; + } + + @Override + Iterator sortAndAggregateSegmentRecords(int numDocs) + throws IOException { + Record[] records = new Record[numDocs]; + for (int i = 0; i < numDocs; i++) { + records[i] = getNextSegmentRecord(); + // System.out.println("Step 3 : " + records[i]._dimensions[0] + " | " + + // records[i]._dimensions[1] + " | " + + // records[i]._metrics[0]); + } + return sortAndAggregateSegmentRecords(records); + } + + public Iterator sortAndAggregateSegmentRecords(Record[] records) + throws IOException { + Arrays.sort(records, (o1, o2) -> { + for (int i = 0; i < _numDimensions; i++) { + if (o1._dimensions[i] != o2._dimensions[i]) { + return Math.toIntExact(o1._dimensions[i] - o2._dimensions[i]); + } + } + return 0; + }); + return sortRecords(records); + } + + private Iterator sortRecords(Record[] records) { + return new Iterator() { + boolean _hasNext = true; + Record _currentRecord = records[0]; + int _docId = 1; + + @Override + public boolean hasNext() { + return _hasNext; + } + + @Override + public Record next() { + Record next = mergeSegmentRecord(null, _currentRecord); + while (_docId < records.length) { + Record record = records[_docId++]; + if (!Arrays.equals(record._dimensions, next._dimensions)) { + _currentRecord = record; + return next; + } else { + next = mergeSegmentRecord(next, record); + } + } + _hasNext = false; + return next; + } + }; + } + + @Override + Iterator generateRecordsForStarNode(int startDocId, int endDocId, int dimensionId) + throws IOException { + int numDocs = endDocId - startDocId; + Record[] records = new Record[numDocs]; + for (int i = 0; i < numDocs; i++) { + records[i] = getStarTreeRecord(startDocId + i); + } + Arrays.sort(records, (o1, o2) -> { + for (int i = dimensionId + 1; i < _numDimensions; i++) { + if (o1._dimensions[i] != o2._dimensions[i]) { + return Math.toIntExact(o1._dimensions[i] - o2._dimensions[i]); + } + } + return 0; + }); + return new Iterator() { + boolean _hasNext = true; + Record _currentRecord = records[0]; + int _docId = 1; + + private boolean hasSameDimensions(Record record1, Record record2) { + for (int i = dimensionId + 1; i < _numDimensions; i++) { + if (record1._dimensions[i] != record2._dimensions[i]) { + return false; + } + } + return true; + } + + @Override + public boolean hasNext() { + return _hasNext; + } + + @Override + public Record next() { + Record next = mergeStarTreeRecord(null, _currentRecord); + next._dimensions[dimensionId] = STAR_IN_DOC_VALUES_INDEX; + while (_docId < numDocs) { + Record record = records[_docId++]; + if (!hasSameDimensions(record, _currentRecord)) { + _currentRecord = record; + return next; + } else { + next = mergeStarTreeRecord(next, record); + } + } + _hasNext = false; + return next; + } + }; + } +} diff --git a/server/src/main/java/org/opensearch/index/codec/freshstartree/builder/Record.java b/server/src/main/java/org/opensearch/index/codec/freshstartree/builder/Record.java new file mode 100644 index 0000000000000..43650d98c0871 --- /dev/null +++ b/server/src/main/java/org/opensearch/index/codec/freshstartree/builder/Record.java @@ -0,0 +1,44 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.opensearch.index.codec.freshstartree.builder; + +/** Class representing a star tree record */ +public class Record { + // TODO : make it not specific to numeric tree + int[] dimensions; + Object[] metrics; + + public Record(int dims, int metrics) { + + this.dimensions = new int[dims]; + this.metrics = new Object[metrics]; + } + + @Override + public String toString() { + StringBuilder sb = new StringBuilder(); + sb.append("Dimensions : "); + for (int l : dimensions) { + sb.append(l + " "); + } + sb.append("Metrics : "); + for (Object o : metrics) { + sb.append(o + " "); + } + return sb.toString(); + } +} diff --git a/server/src/main/java/org/opensearch/index/codec/freshstartree/builder/StarTreeBuilderUtils.java b/server/src/main/java/org/opensearch/index/codec/freshstartree/builder/StarTreeBuilderUtils.java new file mode 100644 index 0000000000000..caa039e91a461 --- /dev/null +++ b/server/src/main/java/org/opensearch/index/codec/freshstartree/builder/StarTreeBuilderUtils.java @@ -0,0 +1,132 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.opensearch.index.codec.freshstartree.builder; + +import java.io.IOException; +import java.util.ArrayList; +import java.util.LinkedList; +import java.util.List; +import java.util.Map; +import java.util.Queue; +import org.apache.logging.log4j.LogManager; +import org.apache.logging.log4j.Logger; +import org.apache.lucene.store.IndexOutput; + +import static java.nio.charset.StandardCharsets.UTF_8; +import static org.opensearch.index.codec.freshstartree.node.OffHeapStarTreeNode.SERIALIZABLE_SIZE_IN_BYTES; + +// import static +// org.opensearch.index.codec.freshstartree.node.OffHeapStarTreeNode.SERIALIZABLE_SIZE_IN_BYTES; + +/** Util class for building star tree */ +public class StarTreeBuilderUtils { + + private static final Logger logger = LogManager.getLogger(StarTreeBuilderUtils.class); + + private StarTreeBuilderUtils() { + } + + public static final int INVALID_ID = -1; + public static final long MAGIC_MARKER = 0xBADDA55B00DAD00DL; + + /** Tree node representation */ + public static class TreeNode { + public int _dimensionId = INVALID_ID; + public long _dimensionValue = INVALID_ID; + public int _startDocId = INVALID_ID; + public int _endDocId = INVALID_ID; + public int _aggregatedDocId = INVALID_ID; + public int _childDimensionId = INVALID_ID; + public Map _children; + } + + public static void serializeTree(IndexOutput indexOutput, TreeNode rootNode, String[] dimensions, int numNodes) + throws IOException { + int headerSizeInBytes = computeHeaderByteSize(dimensions); + long totalSizeInBytes = headerSizeInBytes + (long) numNodes * SERIALIZABLE_SIZE_IN_BYTES; + + logger.info("Star tree size in bytes : {}", totalSizeInBytes); + + writeHeader(indexOutput, headerSizeInBytes, dimensions, numNodes); + writeNodes(indexOutput, rootNode); + } + + private static int computeHeaderByteSize(String[] dimensions) { + // Magic marker (8), version (4), size of header (4) and number of dimensions (4) + int headerSizeInBytes = 20; + + for (String dimension : dimensions) { + headerSizeInBytes += Integer.BYTES; // For dimension index + headerSizeInBytes += Integer.BYTES; // For length of dimension name + headerSizeInBytes += dimension.getBytes(UTF_8).length; // For dimension name + } + + headerSizeInBytes += Integer.BYTES; // For number of nodes. + return headerSizeInBytes; + } + + private static void writeHeader(IndexOutput output, int headerSizeInBytes, String[] dimensions, int numNodes) + throws IOException { + output.writeLong(MAGIC_MARKER); + output.writeInt(1); + output.writeInt(headerSizeInBytes); + output.writeInt(dimensions.length); + for (int i = 0; i < dimensions.length; i++) { + output.writeInt(i); + output.writeString(dimensions[i]); + } + output.writeInt(numNodes); + } + + private static void writeNodes(IndexOutput output, TreeNode rootNode) + throws IOException { + Queue queue = new LinkedList<>(); + queue.add(rootNode); + + int currentNodeId = 0; + while (!queue.isEmpty()) { + TreeNode node = queue.remove(); + + if (node._children == null) { + writeNode(output, node, INVALID_ID, INVALID_ID); + } else { + // Sort all children nodes based on dimension value + List sortedChildren = new ArrayList<>(node._children.values()); + sortedChildren.sort((o1, o2) -> Long.compare(o1._dimensionValue, o2._dimensionValue)); + + int firstChildId = currentNodeId + queue.size() + 1; + int lastChildId = firstChildId + sortedChildren.size() - 1; + writeNode(output, node, firstChildId, lastChildId); + + queue.addAll(sortedChildren); + } + + currentNodeId++; + } + } + + private static void writeNode(IndexOutput output, TreeNode node, int firstChildId, int lastChildId) + throws IOException { + output.writeInt(node._dimensionId); + output.writeLong(node._dimensionValue); + output.writeInt(node._startDocId); + output.writeInt(node._endDocId); + output.writeInt(node._aggregatedDocId); + output.writeInt(firstChildId); + output.writeInt(lastChildId); + } +} diff --git a/server/src/main/java/org/opensearch/index/codec/freshstartree/builder/package-info.java b/server/src/main/java/org/opensearch/index/codec/freshstartree/builder/package-info.java new file mode 100644 index 0000000000000..298febc36487b --- /dev/null +++ b/server/src/main/java/org/opensearch/index/codec/freshstartree/builder/package-info.java @@ -0,0 +1,18 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +/** Builder classes for star tree */ +package org.opensearch.index.codec.freshstartree.builder; diff --git a/server/src/main/java/org/opensearch/index/codec/freshstartree/codec/Lucene90DocValuesConsumerCopy.java b/server/src/main/java/org/opensearch/index/codec/freshstartree/codec/Lucene90DocValuesConsumerCopy.java new file mode 100644 index 0000000000000..ad5eaf7f66a79 --- /dev/null +++ b/server/src/main/java/org/opensearch/index/codec/freshstartree/codec/Lucene90DocValuesConsumerCopy.java @@ -0,0 +1,814 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.opensearch.index.codec.freshstartree.codec; + +import java.io.IOException; +import java.util.Arrays; +import java.util.HashMap; +import java.util.HashSet; +import java.util.Map; +import java.util.Set; +import org.apache.lucene.codecs.CodecUtil; +import org.apache.lucene.codecs.DocValuesConsumer; +import org.apache.lucene.codecs.DocValuesProducer; +import org.apache.lucene.codecs.lucene90.IndexedDISI; +import org.apache.lucene.codecs.lucene90.Lucene90DocValuesFormat; +import org.apache.lucene.index.BinaryDocValues; +import org.apache.lucene.index.DocValues; +import org.apache.lucene.index.EmptyDocValuesProducer; +import org.apache.lucene.index.FieldInfo; +import org.apache.lucene.index.IndexFileNames; +import org.apache.lucene.index.NumericDocValues; +import org.apache.lucene.index.SegmentWriteState; +import org.apache.lucene.index.SortedDocValues; +import org.apache.lucene.index.SortedNumericDocValues; +import org.apache.lucene.index.SortedSetDocValues; +import org.apache.lucene.index.TermsEnum; +import org.apache.lucene.search.DocIdSetIterator; +import org.apache.lucene.search.SortedSetSelector; +import org.apache.lucene.store.ByteArrayDataOutput; +import org.apache.lucene.store.ByteBuffersDataOutput; +import org.apache.lucene.store.ByteBuffersIndexOutput; +import org.apache.lucene.store.IndexOutput; +import org.apache.lucene.util.ArrayUtil; +import org.apache.lucene.util.BytesRef; +import org.apache.lucene.util.BytesRefBuilder; +import org.apache.lucene.util.IOUtils; +import org.apache.lucene.util.LongsRef; +import org.apache.lucene.util.MathUtil; +import org.apache.lucene.util.StringHelper; +import org.apache.lucene.util.compress.LZ4; +import org.apache.lucene.util.packed.DirectMonotonicWriter; +import org.apache.lucene.util.packed.DirectWriter; + + +/** writer for {@link Lucene90DocValuesFormat} */ +public final class Lucene90DocValuesConsumerCopy extends DocValuesConsumer { + + IndexOutput data, meta; + final int maxDoc; + private byte[] termsDictBuffer; + static final int VERSION_START = 0; + public static final int VERSION_CURRENT = VERSION_START; + + // indicates docvalues type + static final byte NUMERIC = 0; + static final byte BINARY = 1; + static final byte SORTED = 2; + static final byte SORTED_SET = 3; + static final byte SORTED_NUMERIC = 4; + + static final int DIRECT_MONOTONIC_BLOCK_SHIFT = 16; + + static final int NUMERIC_BLOCK_SHIFT = 14; + static final int NUMERIC_BLOCK_SIZE = 1 << NUMERIC_BLOCK_SHIFT; + + static final int TERMS_DICT_BLOCK_LZ4_SHIFT = 6; + static final int TERMS_DICT_BLOCK_LZ4_SIZE = 1 << TERMS_DICT_BLOCK_LZ4_SHIFT; + static final int TERMS_DICT_BLOCK_LZ4_MASK = TERMS_DICT_BLOCK_LZ4_SIZE - 1; + + static final int TERMS_DICT_REVERSE_INDEX_SHIFT = 10; + static final int TERMS_DICT_REVERSE_INDEX_SIZE = 1 << TERMS_DICT_REVERSE_INDEX_SHIFT; + static final int TERMS_DICT_REVERSE_INDEX_MASK = TERMS_DICT_REVERSE_INDEX_SIZE - 1; + + /** expert: Creates a new writer */ + public Lucene90DocValuesConsumerCopy(SegmentWriteState state, String dataCodec, String dataExtension, + String metaCodec, String metaExtension) + throws IOException { + this.termsDictBuffer = new byte[1 << 14]; + boolean success = false; + try { + String dataName = + IndexFileNames.segmentFileName(state.segmentInfo.name, state.segmentSuffix, dataExtension); + data = state.directory.createOutput(dataName, state.context); + CodecUtil.writeIndexHeader(data, dataCodec, 0, state.segmentInfo.getId(), state.segmentSuffix); + String metaName = + IndexFileNames.segmentFileName(state.segmentInfo.name, state.segmentSuffix, metaExtension); + meta = state.directory.createOutput(metaName, state.context); + CodecUtil.writeIndexHeader(meta, metaCodec, 0, state.segmentInfo.getId(), state.segmentSuffix); + maxDoc = state.segmentInfo.maxDoc(); + success = true; + } finally { + if (!success) { + IOUtils.closeWhileHandlingException(this); + } + } + } + + @Override + public void close() + throws IOException { + boolean success = false; + try { + if (meta != null) { + meta.writeInt(-1); // write EOF marker + CodecUtil.writeFooter(meta); // write checksum + } + if (data != null) { + CodecUtil.writeFooter(data); // write checksum + } + success = true; + } finally { + if (success) { + IOUtils.close(data, meta); + } else { + IOUtils.closeWhileHandlingException(data, meta); + } + meta = data = null; + } + } + + @Override + public void addNumericField(FieldInfo field, DocValuesProducer valuesProducer) + throws IOException { + meta.writeInt(field.number); + meta.writeByte(NUMERIC); + + writeValues(field, new EmptyDocValuesProducer() { + @Override + public SortedNumericDocValues getSortedNumeric(FieldInfo field) + throws IOException { + return DocValues.singleton(valuesProducer.getNumeric(field)); + } + }, false); + } + + private static class MinMaxTracker { + long min, max, numValues, spaceInBits; + + MinMaxTracker() { + reset(); + spaceInBits = 0; + } + + private void reset() { + min = Long.MAX_VALUE; + max = Long.MIN_VALUE; + numValues = 0; + } + + /** Accumulate a new value. */ + void update(long v) { + min = Math.min(min, v); + max = Math.max(max, v); + ++numValues; + } + + /** Accumulate state from another tracker. */ + void update(MinMaxTracker other) { + min = Math.min(min, other.min); + max = Math.max(max, other.max); + numValues += other.numValues; + } + + /** Update the required space. */ + void finish() { + if (max > min) { + spaceInBits += DirectWriter.unsignedBitsRequired(max - min) * numValues; + } + } + + /** Update space usage and get ready for accumulating values for the next block. */ + void nextBlock() { + finish(); + reset(); + } + } + + private long[] writeValues(FieldInfo field, DocValuesProducer valuesProducer, boolean ords) + throws IOException { + SortedNumericDocValues values = valuesProducer.getSortedNumeric(field); + final long firstValue; + if (values.nextDoc() != DocIdSetIterator.NO_MORE_DOCS) { + firstValue = values.nextValue(); + } else { + firstValue = 0L; + } + values = valuesProducer.getSortedNumeric(field); + int numDocsWithValue = 0; + MinMaxTracker minMax = new MinMaxTracker(); + MinMaxTracker blockMinMax = new MinMaxTracker(); + long gcd = 0; + Set uniqueValues = ords ? null : new HashSet<>(); + for (int doc = values.nextDoc(); doc != DocIdSetIterator.NO_MORE_DOCS; doc = values.nextDoc()) { + for (int i = 0, count = values.docValueCount(); i < count; ++i) { + long v = values.nextValue(); + + if (gcd != 1) { + if (v < Long.MIN_VALUE / 2 || v > Long.MAX_VALUE / 2) { + // in that case v - minValue might overflow and make the GCD computation return + // wrong results. Since these extreme values are unlikely, we just discard + // GCD computation for them + gcd = 1; + } else { + gcd = MathUtil.gcd(gcd, v - firstValue); + } + } + + blockMinMax.update(v); + if (blockMinMax.numValues == NUMERIC_BLOCK_SIZE) { + minMax.update(blockMinMax); + blockMinMax.nextBlock(); + } + + if (uniqueValues != null && uniqueValues.add(v) && uniqueValues.size() > 256) { + uniqueValues = null; + } + } + + numDocsWithValue++; + } + + minMax.update(blockMinMax); + minMax.finish(); + blockMinMax.finish(); + + if (ords && minMax.numValues > 0) { + if (minMax.min != 0) { + throw new IllegalStateException("The min value for ordinals should always be 0, got " + minMax.min); + } + if (minMax.max != 0 && gcd != 1) { + throw new IllegalStateException("GCD compression should never be used on ordinals, found gcd=" + gcd); + } + } + + final long numValues = minMax.numValues; + long min = minMax.min; + final long max = minMax.max; + assert blockMinMax.spaceInBits <= minMax.spaceInBits; + + if (numDocsWithValue == 0) { // meta[-2, 0]: No documents with values + meta.writeLong(-2); // docsWithFieldOffset + meta.writeLong(0L); // docsWithFieldLength + meta.writeShort((short) -1); // jumpTableEntryCount + meta.writeByte((byte) -1); // denseRankPower + } else if (numDocsWithValue == maxDoc) { // meta[-1, 0]: All documents has values + meta.writeLong(-1); // docsWithFieldOffset + meta.writeLong(0L); // docsWithFieldLength + meta.writeShort((short) -1); // jumpTableEntryCount + meta.writeByte((byte) -1); // denseRankPower + } else { // meta[data.offset, data.length]: IndexedDISI structure for documents with values + long offset = data.getFilePointer(); + meta.writeLong(offset); // docsWithFieldOffset + values = valuesProducer.getSortedNumeric(field); + final short jumpTableEntryCount = + IndexedDISI.writeBitSet(values, data, IndexedDISI.DEFAULT_DENSE_RANK_POWER); + meta.writeLong(data.getFilePointer() - offset); // docsWithFieldLength + meta.writeShort(jumpTableEntryCount); + meta.writeByte(IndexedDISI.DEFAULT_DENSE_RANK_POWER); + } + + meta.writeLong(numValues); + final int numBitsPerValue; + boolean doBlocks = false; + Map encode = null; + if (min >= max) { // meta[-1]: All values are 0 + numBitsPerValue = 0; + meta.writeInt(-1); // tablesize + } else { + if (uniqueValues != null && uniqueValues.size() > 1 + && DirectWriter.unsignedBitsRequired(uniqueValues.size() - 1) < DirectWriter.unsignedBitsRequired( + (max - min) / gcd)) { + numBitsPerValue = DirectWriter.unsignedBitsRequired(uniqueValues.size() - 1); + final Long[] sortedUniqueValues = uniqueValues.toArray(new Long[0]); + Arrays.sort(sortedUniqueValues); + meta.writeInt(sortedUniqueValues.length); // tablesize + for (Long v : sortedUniqueValues) { + meta.writeLong(v); // table[] entry + } + encode = new HashMap<>(); + for (int i = 0; i < sortedUniqueValues.length; ++i) { + encode.put(sortedUniqueValues[i], i); + } + min = 0; + gcd = 1; + } else { + uniqueValues = null; + // we do blocks if that appears to save 10+% storage + doBlocks = minMax.spaceInBits > 0 && (double) blockMinMax.spaceInBits / minMax.spaceInBits <= 0.9; + if (doBlocks) { + numBitsPerValue = 0xFF; + meta.writeInt(-2 - NUMERIC_BLOCK_SHIFT); // tablesize + } else { + numBitsPerValue = DirectWriter.unsignedBitsRequired((max - min) / gcd); + if (gcd == 1 && min > 0 + && DirectWriter.unsignedBitsRequired(max) == DirectWriter.unsignedBitsRequired(max - min)) { + min = 0; + } + meta.writeInt(-1); // tablesize + } + } + } + + meta.writeByte((byte) numBitsPerValue); + meta.writeLong(min); + meta.writeLong(gcd); + long startOffset = data.getFilePointer(); + meta.writeLong(startOffset); // valueOffset + long jumpTableOffset = -1; + if (doBlocks) { + jumpTableOffset = writeValuesMultipleBlocks(valuesProducer.getSortedNumeric(field), gcd); + } else if (numBitsPerValue != 0) { + writeValuesSingleBlock(valuesProducer.getSortedNumeric(field), numValues, numBitsPerValue, min, gcd, + encode); + } + meta.writeLong(data.getFilePointer() - startOffset); // valuesLength + meta.writeLong(jumpTableOffset); + return new long[]{numDocsWithValue, numValues}; + } + + private void writeValuesSingleBlock(SortedNumericDocValues values, long numValues, int numBitsPerValue, long min, + long gcd, Map encode) + throws IOException { + DirectWriter writer = DirectWriter.getInstance(data, numValues, numBitsPerValue); + for (int doc = values.nextDoc(); doc != DocIdSetIterator.NO_MORE_DOCS; doc = values.nextDoc()) { + for (int i = 0, count = values.docValueCount(); i < count; ++i) { + long v = values.nextValue(); + if (encode == null) { + writer.add((v - min) / gcd); + } else { + writer.add(encode.get(v)); + } + } + } + writer.finish(); + } + + // Returns the offset to the jump-table for vBPV + private long writeValuesMultipleBlocks(SortedNumericDocValues values, long gcd) + throws IOException { + long[] offsets = new long[ArrayUtil.oversize(1, Long.BYTES)]; + int offsetsIndex = 0; + final long[] buffer = new long[NUMERIC_BLOCK_SIZE]; + final ByteBuffersDataOutput encodeBuffer = ByteBuffersDataOutput.newResettableInstance(); + int upTo = 0; + for (int doc = values.nextDoc(); doc != DocIdSetIterator.NO_MORE_DOCS; doc = values.nextDoc()) { + for (int i = 0, count = values.docValueCount(); i < count; ++i) { + buffer[upTo++] = values.nextValue(); + if (upTo == NUMERIC_BLOCK_SIZE) { + offsets = ArrayUtil.grow(offsets, offsetsIndex + 1); + offsets[offsetsIndex++] = data.getFilePointer(); + writeBlock(buffer, NUMERIC_BLOCK_SIZE, gcd, encodeBuffer); + upTo = 0; + } + } + } + if (upTo > 0) { + offsets = ArrayUtil.grow(offsets, offsetsIndex + 1); + offsets[offsetsIndex++] = data.getFilePointer(); + writeBlock(buffer, upTo, gcd, encodeBuffer); + } + + // All blocks has been written. Flush the offset jump-table + final long offsetsOrigo = data.getFilePointer(); + for (int i = 0; i < offsetsIndex; i++) { + data.writeLong(offsets[i]); + } + data.writeLong(offsetsOrigo); + return offsetsOrigo; + } + + private void writeBlock(long[] values, int length, long gcd, ByteBuffersDataOutput buffer) + throws IOException { + assert length > 0; + long min = values[0]; + long max = values[0]; + for (int i = 1; i < length; ++i) { + final long v = values[i]; + assert Math.floorMod(values[i] - min, gcd) == 0; + min = Math.min(min, v); + max = Math.max(max, v); + } + if (min == max) { + data.writeByte((byte) 0); + data.writeLong(min); + } else { + final int bitsPerValue = DirectWriter.unsignedBitsRequired((max - min) / gcd); + buffer.reset(); + assert buffer.size() == 0; + final DirectWriter w = DirectWriter.getInstance(buffer, length, bitsPerValue); + for (int i = 0; i < length; ++i) { + w.add((values[i] - min) / gcd); + } + w.finish(); + data.writeByte((byte) bitsPerValue); + data.writeLong(min); + data.writeInt(Math.toIntExact(buffer.size())); + buffer.copyTo(data); + } + } + + @Override + public void addBinaryField(FieldInfo field, DocValuesProducer valuesProducer) + throws IOException { + meta.writeInt(field.number); + meta.writeByte(BINARY); + + BinaryDocValues values = valuesProducer.getBinary(field); + long start = data.getFilePointer(); + meta.writeLong(start); // dataOffset + int numDocsWithField = 0; + int minLength = Integer.MAX_VALUE; + int maxLength = 0; + for (int doc = values.nextDoc(); doc != DocIdSetIterator.NO_MORE_DOCS; doc = values.nextDoc()) { + numDocsWithField++; + BytesRef v = values.binaryValue(); + int length = v.length; + data.writeBytes(v.bytes, v.offset, v.length); + minLength = Math.min(length, minLength); + maxLength = Math.max(length, maxLength); + } + assert numDocsWithField <= maxDoc; + meta.writeLong(data.getFilePointer() - start); // dataLength + + if (numDocsWithField == 0) { + meta.writeLong(-2); // docsWithFieldOffset + meta.writeLong(0L); // docsWithFieldLength + meta.writeShort((short) -1); // jumpTableEntryCount + meta.writeByte((byte) -1); // denseRankPower + } else if (numDocsWithField == maxDoc) { + meta.writeLong(-1); // docsWithFieldOffset + meta.writeLong(0L); // docsWithFieldLength + meta.writeShort((short) -1); // jumpTableEntryCount + meta.writeByte((byte) -1); // denseRankPower + } else { + long offset = data.getFilePointer(); + meta.writeLong(offset); // docsWithFieldOffset + values = valuesProducer.getBinary(field); + final short jumpTableEntryCount = + IndexedDISI.writeBitSet(values, data, IndexedDISI.DEFAULT_DENSE_RANK_POWER); + meta.writeLong(data.getFilePointer() - offset); // docsWithFieldLength + meta.writeShort(jumpTableEntryCount); + meta.writeByte(IndexedDISI.DEFAULT_DENSE_RANK_POWER); + } + + meta.writeInt(numDocsWithField); + meta.writeInt(minLength); + meta.writeInt(maxLength); + if (maxLength > minLength) { + start = data.getFilePointer(); + meta.writeLong(start); + meta.writeVInt(DIRECT_MONOTONIC_BLOCK_SHIFT); + + final DirectMonotonicWriter writer = + DirectMonotonicWriter.getInstance(meta, data, numDocsWithField + 1, DIRECT_MONOTONIC_BLOCK_SHIFT); + long addr = 0; + writer.add(addr); + values = valuesProducer.getBinary(field); + for (int doc = values.nextDoc(); doc != DocIdSetIterator.NO_MORE_DOCS; doc = values.nextDoc()) { + addr += values.binaryValue().length; + writer.add(addr); + } + writer.finish(); + meta.writeLong(data.getFilePointer() - start); + } + } + + @Override + public void addSortedField(FieldInfo field, DocValuesProducer valuesProducer) + throws IOException { + meta.writeInt(field.number); + meta.writeByte(SORTED); + doAddSortedField(field, valuesProducer); + } + + private void doAddSortedField(FieldInfo field, DocValuesProducer valuesProducer) + throws IOException { + writeValues(field, new EmptyDocValuesProducer() { + @Override + public SortedNumericDocValues getSortedNumeric(FieldInfo field) + throws IOException { + SortedDocValues sorted = valuesProducer.getSorted(field); + NumericDocValues sortedOrds = new NumericDocValues() { + @Override + public long longValue() + throws IOException { + return sorted.ordValue(); + } + + @Override + public boolean advanceExact(int target) + throws IOException { + return sorted.advanceExact(target); + } + + @Override + public int docID() { + return sorted.docID(); + } + + @Override + public int nextDoc() + throws IOException { + return sorted.nextDoc(); + } + + @Override + public int advance(int target) + throws IOException { + return sorted.advance(target); + } + + @Override + public long cost() { + return sorted.cost(); + } + }; + return DocValues.singleton(sortedOrds); + } + }, true); + addTermsDict(DocValues.singleton(valuesProducer.getSorted(field))); + } + + private void addTermsDict(SortedSetDocValues values) + throws IOException { + final long size = values.getValueCount(); + meta.writeVLong(size); + + int blockMask = TERMS_DICT_BLOCK_LZ4_MASK; + int shift = TERMS_DICT_BLOCK_LZ4_SHIFT; + + meta.writeInt(DIRECT_MONOTONIC_BLOCK_SHIFT); + ByteBuffersDataOutput addressBuffer = new ByteBuffersDataOutput(); + ByteBuffersIndexOutput addressOutput = new ByteBuffersIndexOutput(addressBuffer, "temp", "temp"); + long numBlocks = (size + blockMask) >>> shift; + DirectMonotonicWriter writer = + DirectMonotonicWriter.getInstance(meta, addressOutput, numBlocks, DIRECT_MONOTONIC_BLOCK_SHIFT); + + BytesRefBuilder previous = new BytesRefBuilder(); + long ord = 0; + long start = data.getFilePointer(); + int maxLength = 0, maxBlockLength = 0; + TermsEnum iterator = values.termsEnum(); + + LZ4.FastCompressionHashTable ht = new LZ4.FastCompressionHashTable(); + ByteArrayDataOutput bufferedOutput = new ByteArrayDataOutput(termsDictBuffer); + int dictLength = 0; + + for (BytesRef term = iterator.next(); term != null; term = iterator.next()) { + if ((ord & blockMask) == 0) { + if (ord != 0) { + // flush the previous block + final int uncompressedLength = compressAndGetTermsDictBlockLength(bufferedOutput, dictLength, ht); + maxBlockLength = Math.max(maxBlockLength, uncompressedLength); + bufferedOutput.reset(termsDictBuffer); + } + + writer.add(data.getFilePointer() - start); + // Write the first term both to the index output, and to the buffer where we'll use it as a + // dictionary for compression + data.writeVInt(term.length); + data.writeBytes(term.bytes, term.offset, term.length); + bufferedOutput = maybeGrowBuffer(bufferedOutput, term.length); + bufferedOutput.writeBytes(term.bytes, term.offset, term.length); + dictLength = term.length; + } else { + final int prefixLength = StringHelper.bytesDifference(previous.get(), term); + final int suffixLength = term.length - prefixLength; + assert suffixLength > 0; // terms are unique + // Will write (suffixLength + 1 byte + 2 vint) bytes. Grow the buffer in need. + bufferedOutput = maybeGrowBuffer(bufferedOutput, suffixLength + 11); + bufferedOutput.writeByte((byte) (Math.min(prefixLength, 15) | (Math.min(15, suffixLength - 1) << 4))); + if (prefixLength >= 15) { + bufferedOutput.writeVInt(prefixLength - 15); + } + if (suffixLength >= 16) { + bufferedOutput.writeVInt(suffixLength - 16); + } + bufferedOutput.writeBytes(term.bytes, term.offset + prefixLength, suffixLength); + } + maxLength = Math.max(maxLength, term.length); + previous.copyBytes(term); + ++ord; + } + // Compress and write out the last block + if (bufferedOutput.getPosition() > dictLength) { + final int uncompressedLength = compressAndGetTermsDictBlockLength(bufferedOutput, dictLength, ht); + maxBlockLength = Math.max(maxBlockLength, uncompressedLength); + } + + writer.finish(); + meta.writeInt(maxLength); + // Write one more int for storing max block length. + meta.writeInt(maxBlockLength); + meta.writeLong(start); + meta.writeLong(data.getFilePointer() - start); + start = data.getFilePointer(); + addressBuffer.copyTo(data); + meta.writeLong(start); + meta.writeLong(data.getFilePointer() - start); + + // Now write the reverse terms index + writeTermsIndex(values); + } + + private int compressAndGetTermsDictBlockLength(ByteArrayDataOutput bufferedOutput, int dictLength, + LZ4.FastCompressionHashTable ht) + throws IOException { + int uncompressedLength = bufferedOutput.getPosition() - dictLength; + data.writeVInt(uncompressedLength); + LZ4.compressWithDictionary(termsDictBuffer, 0, dictLength, uncompressedLength, data, ht); + return uncompressedLength; + } + + private ByteArrayDataOutput maybeGrowBuffer(ByteArrayDataOutput bufferedOutput, int termLength) { + int pos = bufferedOutput.getPosition(), originalLength = termsDictBuffer.length; + if (pos + termLength >= originalLength - 1) { + termsDictBuffer = ArrayUtil.grow(termsDictBuffer, originalLength + termLength); + bufferedOutput = new ByteArrayDataOutput(termsDictBuffer, pos, termsDictBuffer.length - pos); + } + return bufferedOutput; + } + + private void writeTermsIndex(SortedSetDocValues values) + throws IOException { + final long size = values.getValueCount(); + meta.writeInt(TERMS_DICT_REVERSE_INDEX_SHIFT); + long start = data.getFilePointer(); + + long numBlocks = 1L + ((size + TERMS_DICT_REVERSE_INDEX_MASK) >>> TERMS_DICT_REVERSE_INDEX_SHIFT); + ByteBuffersDataOutput addressBuffer = new ByteBuffersDataOutput(); + DirectMonotonicWriter writer; + try (ByteBuffersIndexOutput addressOutput = new ByteBuffersIndexOutput(addressBuffer, "temp", "temp")) { + writer = DirectMonotonicWriter.getInstance(meta, addressOutput, numBlocks, DIRECT_MONOTONIC_BLOCK_SHIFT); + TermsEnum iterator = values.termsEnum(); + BytesRefBuilder previous = new BytesRefBuilder(); + long offset = 0; + long ord = 0; + for (BytesRef term = iterator.next(); term != null; term = iterator.next()) { + if ((ord & TERMS_DICT_REVERSE_INDEX_MASK) == 0) { + writer.add(offset); + final int sortKeyLength; + if (ord == 0) { + // no previous term: no bytes to write + sortKeyLength = 0; + } else { + sortKeyLength = StringHelper.sortKeyLength(previous.get(), term); + } + offset += sortKeyLength; + data.writeBytes(term.bytes, term.offset, sortKeyLength); + } else if ((ord & TERMS_DICT_REVERSE_INDEX_MASK) == TERMS_DICT_REVERSE_INDEX_MASK) { + previous.copyBytes(term); + } + ++ord; + } + writer.add(offset); + writer.finish(); + meta.writeLong(start); + meta.writeLong(data.getFilePointer() - start); + start = data.getFilePointer(); + addressBuffer.copyTo(data); + meta.writeLong(start); + meta.writeLong(data.getFilePointer() - start); + } + } + + @Override + public void addSortedNumericField(FieldInfo field, DocValuesProducer valuesProducer) + throws IOException { + meta.writeInt(field.number); + meta.writeByte(SORTED_NUMERIC); + doAddSortedNumericField(field, valuesProducer, false); + } + + private void doAddSortedNumericField(FieldInfo field, DocValuesProducer valuesProducer, boolean ords) + throws IOException { + long[] stats = writeValues(field, valuesProducer, ords); + int numDocsWithField = Math.toIntExact(stats[0]); + long numValues = stats[1]; + assert numValues >= numDocsWithField; + + meta.writeInt(numDocsWithField); + if (numValues > numDocsWithField) { + long start = data.getFilePointer(); + meta.writeLong(start); + meta.writeVInt(DIRECT_MONOTONIC_BLOCK_SHIFT); + + final DirectMonotonicWriter addressesWriter = + DirectMonotonicWriter.getInstance(meta, data, numDocsWithField + 1L, DIRECT_MONOTONIC_BLOCK_SHIFT); + long addr = 0; + addressesWriter.add(addr); + SortedNumericDocValues values = valuesProducer.getSortedNumeric(field); + for (int doc = values.nextDoc(); doc != DocIdSetIterator.NO_MORE_DOCS; doc = values.nextDoc()) { + addr += values.docValueCount(); + addressesWriter.add(addr); + } + addressesWriter.finish(); + meta.writeLong(data.getFilePointer() - start); + } + } + + private static boolean isSingleValued(SortedSetDocValues values) + throws IOException { + if (DocValues.unwrapSingleton(values) != null) { + return true; + } + + assert values.docID() == -1; + for (int doc = values.nextDoc(); doc != DocIdSetIterator.NO_MORE_DOCS; doc = values.nextDoc()) { + int docValueCount = values.docValueCount(); + assert docValueCount > 0; + if (docValueCount > 1) { + return false; + } + } + return true; + } + + @Override + public void addSortedSetField(FieldInfo field, DocValuesProducer valuesProducer) + throws IOException { + meta.writeInt(field.number); + meta.writeByte(SORTED_SET); + + if (isSingleValued(valuesProducer.getSortedSet(field))) { + meta.writeByte((byte) 0); // multiValued (0 = singleValued) + doAddSortedField(field, new EmptyDocValuesProducer() { + @Override + public SortedDocValues getSorted(FieldInfo field) + throws IOException { + return SortedSetSelector.wrap(valuesProducer.getSortedSet(field), SortedSetSelector.Type.MIN); + } + }); + return; + } + meta.writeByte((byte) 1); // multiValued (1 = multiValued) + + doAddSortedNumericField(field, new EmptyDocValuesProducer() { + @Override + public SortedNumericDocValues getSortedNumeric(FieldInfo field) + throws IOException { + SortedSetDocValues values = valuesProducer.getSortedSet(field); + return new SortedNumericDocValues() { + + long[] ords = LongsRef.EMPTY_LONGS; + int i, docValueCount; + + @Override + public long nextValue() + throws IOException { + return ords[i++]; + } + + @Override + public int docValueCount() { + return docValueCount; + } + + @Override + public boolean advanceExact(int target) + throws IOException { + throw new UnsupportedOperationException(); + } + + @Override + public int docID() { + return values.docID(); + } + + @Override + public int nextDoc() + throws IOException { + int doc = values.nextDoc(); + if (doc != NO_MORE_DOCS) { + docValueCount = values.docValueCount(); + ords = ArrayUtil.grow(ords, docValueCount); + for (int j = 0; j < docValueCount; j++) { + ords[j] = values.nextOrd(); + } + i = 0; + } + return doc; + } + + @Override + public int advance(int target) + throws IOException { + throw new UnsupportedOperationException(); + } + + @Override + public long cost() { + return values.cost(); + } + }; + } + }, true); + + addTermsDict(valuesProducer.getSortedSet(field)); + } +} diff --git a/server/src/main/java/org/opensearch/index/codec/freshstartree/codec/Lucene90DocValuesProducerCopy.java b/server/src/main/java/org/opensearch/index/codec/freshstartree/codec/Lucene90DocValuesProducerCopy.java new file mode 100644 index 0000000000000..48ad1edbc93ff --- /dev/null +++ b/server/src/main/java/org/opensearch/index/codec/freshstartree/codec/Lucene90DocValuesProducerCopy.java @@ -0,0 +1,862 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.opensearch.index.codec.freshstartree.codec; + +import java.io.IOException; +import java.util.ArrayList; +import java.util.Collections; +import java.util.HashMap; +import java.util.List; +import java.util.Map; +import org.apache.lucene.codecs.CodecUtil; +import org.apache.lucene.codecs.DocValuesProducer; +import org.apache.lucene.codecs.lucene90.IndexedDISI; +import org.apache.lucene.index.BinaryDocValues; +import org.apache.lucene.index.CorruptIndexException; +import org.apache.lucene.index.DocValues; +import org.apache.lucene.index.DocValuesType; +import org.apache.lucene.index.FieldInfo; +import org.apache.lucene.index.IndexFileNames; +import org.apache.lucene.index.IndexOptions; +import org.apache.lucene.index.NumericDocValues; +import org.apache.lucene.index.SegmentReadState; +import org.apache.lucene.index.SortedDocValues; +import org.apache.lucene.index.SortedNumericDocValues; +import org.apache.lucene.index.SortedSetDocValues; +import org.apache.lucene.index.VectorEncoding; +import org.apache.lucene.index.VectorSimilarityFunction; +import org.apache.lucene.store.ChecksumIndexInput; +import org.apache.lucene.store.IndexInput; +import org.apache.lucene.store.RandomAccessInput; +import org.apache.lucene.util.IOUtils; +import org.apache.lucene.util.LongValues; +import org.apache.lucene.util.packed.DirectMonotonicReader; +import org.apache.lucene.util.packed.DirectReader; + + +/** + * Created a copy to initialize producer without field info stored in state which is the case for + * aggregated doc values fields + */ +public class Lucene90DocValuesProducerCopy extends DocValuesProducer { + private final Map numerics; + private final Map sortedNumerics; + private final IndexInput data; + private final int maxDoc; + private int version = -1; + private final boolean merging; + + private FieldInfo[] fieldInfoArr; + + private List dimensionSplitOrder; + + /** expert: instantiates a new reader */ + public Lucene90DocValuesProducerCopy(SegmentReadState state, String dataCodec, String dataExtension, + String metaCodec, String metaExtension, List dimensionSplitOrder) + throws IOException { + String metaName = IndexFileNames.segmentFileName(state.segmentInfo.name, state.segmentSuffix, metaExtension); + this.maxDoc = state.segmentInfo.maxDoc(); + numerics = new HashMap<>(); + sortedNumerics = new HashMap<>(); + merging = false; + this.dimensionSplitOrder = dimensionSplitOrder; + fieldInfoArr = getFieldInfoArr(); + + // read in the entries from the metadata file. + try (ChecksumIndexInput in = state.directory.openChecksumInput(metaName, state.context)) { + Throwable priorE = null; + + try { + version = CodecUtil.checkIndexHeader(in, metaCodec, 0,// TODO : don't hardcode + 0, // TODO : don't hardcode + state.segmentInfo.getId(), state.segmentSuffix); + + readFields(in, fieldInfoArr); + } catch (Throwable exception) { + priorE = exception; + } finally { + CodecUtil.checkFooter(in, priorE); + } + } + + String dataName = IndexFileNames.segmentFileName(state.segmentInfo.name, state.segmentSuffix, dataExtension); + this.data = state.directory.openInput(dataName, state.context); + boolean success = false; + try { + final int version2 = CodecUtil.checkIndexHeader(data, dataCodec, 0, // TODO : don't hardcode + 0, // Todo : don't hardcode + state.segmentInfo.getId(), state.segmentSuffix); + if (version != version2) { + throw new CorruptIndexException("Format versions mismatch: meta=" + version + ", data=" + version2, + data); + } + + // NOTE: data file is too costly to verify checksum against all the bytes on open, + // but for now we at least verify proper structure of the checksum footer: which looks + // for FOOTER_MAGIC + algorithmID. This is cheap and can detect some forms of corruption + // such as file truncation. + CodecUtil.retrieveChecksum(data); + + success = true; + } finally { + if (!success) { + IOUtils.closeWhileHandlingException(this.data); + } + } + } + + // Used for cloning + private Lucene90DocValuesProducerCopy(Map numerics, + Map sortedNumerics, IndexInput data, int maxDoc, int version, boolean merging) { + this.numerics = numerics; + this.sortedNumerics = sortedNumerics; + this.data = data.clone(); + this.maxDoc = maxDoc; + this.version = version; + this.merging = merging; + } + + @Override + public DocValuesProducer getMergeInstance() { + return new Lucene90DocValuesProducerCopy(numerics, sortedNumerics, data, maxDoc, version, true); + } + + public FieldInfo[] getFieldInfoArr() { + List metrics = new ArrayList<>(); + // TODO : remove this + metrics.add("status_sum"); + //metrics.add("status_count"); + FieldInfo[] fArr = new FieldInfo[dimensionSplitOrder.size() + metrics.size()]; + int fieldNum = 0; + for (int i = 0; i < dimensionSplitOrder.size(); i++) { + fArr[fieldNum] = new FieldInfo(dimensionSplitOrder.get(i) + "_dim", fieldNum, false, false, true, + IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS, DocValuesType.NUMERIC, -1, + Collections.emptyMap(), 0, 0, 0, 0, VectorEncoding.FLOAT32, VectorSimilarityFunction.EUCLIDEAN, false); + fieldNum++; + } + for (int i = 0; i < metrics.size(); i++) { + fArr[fieldNum] = new FieldInfo(metrics.get(i) + "_metric", fieldNum, false, false, true, + IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS, DocValuesType.NUMERIC, -1, + Collections.emptyMap(), 0, 0, 0, 0, VectorEncoding.FLOAT32, VectorSimilarityFunction.EUCLIDEAN, false); + fieldNum++; + } + return fArr; + } + + private void readFields(IndexInput meta, FieldInfo[] infos) + throws IOException { + for (int fieldNumber = meta.readInt(); fieldNumber != -1; fieldNumber = meta.readInt()) { + // System.out.println("Field number : " + fieldNumber); + FieldInfo info = infos[fieldNumber]; + if (info == null) { + throw new CorruptIndexException("Invalid field number: " + fieldNumber, meta); + } + byte type = meta.readByte(); + if (type == 0) { // Lucene90DocValuesFormat.NUMERIC) { + numerics.put(info.name, readNumeric(meta)); + } else if (type == 4) { // Lucene90DocValuesFormat.SORTED_NUMERIC + sortedNumerics.put(info.name, readSortedNumeric(meta)); + } else { + throw new CorruptIndexException("invalid type: " + type, meta); + } + } + } + + private NumericEntry readNumeric(IndexInput meta) + throws IOException { + NumericEntry entry = new NumericEntry(); + readNumeric(meta, entry); + return entry; + } + + private void readNumeric(IndexInput meta, NumericEntry entry) + throws IOException { + entry.docsWithFieldOffset = meta.readLong(); + entry.docsWithFieldLength = meta.readLong(); + entry.jumpTableEntryCount = meta.readShort(); + entry.denseRankPower = meta.readByte(); + entry.numValues = meta.readLong(); + int tableSize = meta.readInt(); + if (tableSize > 256) { + throw new CorruptIndexException("invalid table size: " + tableSize, meta); + } + if (tableSize >= 0) { + entry.table = new long[tableSize]; + for (int i = 0; i < tableSize; ++i) { + entry.table[i] = meta.readLong(); + } + } + if (tableSize < -1) { + entry.blockShift = -2 - tableSize; + } else { + entry.blockShift = -1; + } + entry.bitsPerValue = meta.readByte(); + entry.minValue = meta.readLong(); + entry.gcd = meta.readLong(); + entry.valuesOffset = meta.readLong(); + entry.valuesLength = meta.readLong(); + entry.valueJumpTableOffset = meta.readLong(); + } + + private SortedNumericEntry readSortedNumeric(IndexInput meta) + throws IOException { + SortedNumericEntry entry = new SortedNumericEntry(); + readSortedNumeric(meta, entry); + return entry; + } + + private SortedNumericEntry readSortedNumeric(IndexInput meta, SortedNumericEntry entry) + throws IOException { + readNumeric(meta, entry); + entry.numDocsWithField = meta.readInt(); + if (entry.numDocsWithField != entry.numValues) { + entry.addressesOffset = meta.readLong(); + final int blockShift = meta.readVInt(); + entry.addressesMeta = DirectMonotonicReader.loadMeta(meta, entry.numDocsWithField + 1, blockShift); + entry.addressesLength = meta.readLong(); + } + return entry; + } + + @Override + public void close() + throws IOException { + data.close(); + } + + /** Numeric entry */ + public static class NumericEntry { + long[] table; + int blockShift; + byte bitsPerValue; + long docsWithFieldOffset; + long docsWithFieldLength; + short jumpTableEntryCount; + byte denseRankPower; + long numValues; + long minValue; + long gcd; + long valuesOffset; + long valuesLength; + long valueJumpTableOffset; // -1 if no jump-table + } + + private static class SortedNumericEntry extends NumericEntry { + int numDocsWithField; + DirectMonotonicReader.Meta addressesMeta; + long addressesOffset; + long addressesLength; + } + + @Override + public NumericDocValues getNumeric(FieldInfo field) + throws IOException { + NumericEntry entry = numerics.get(field.name); + return getNumeric(entry); + } + + public NumericDocValues getNumeric(String fieldName) + throws IOException { + NumericEntry entry = numerics.get(fieldName); + return getNumeric(entry); + } + + private abstract static class DenseNumericDocValues extends NumericDocValues { + + final int maxDoc; + int doc = -1; + + DenseNumericDocValues(int maxDoc) { + this.maxDoc = maxDoc; + } + + @Override + public int docID() { + return doc; + } + + @Override + public int nextDoc() + throws IOException { + return advance(doc + 1); + } + + @Override + public int advance(int target) + throws IOException { + if (target >= maxDoc) { + return doc = NO_MORE_DOCS; + } + return doc = target; + } + + @Override + public boolean advanceExact(int target) { + doc = target; + return true; + } + + @Override + public long cost() { + return maxDoc; + } + } + + private abstract static class SparseNumericDocValues extends NumericDocValues { + + final IndexedDISI disi; + + SparseNumericDocValues(IndexedDISI disi) { + this.disi = disi; + } + + @Override + public int advance(int target) + throws IOException { + return disi.advance(target); + } + + @Override + public boolean advanceExact(int target) + throws IOException { + return disi.advanceExact(target); + } + + @Override + public int nextDoc() + throws IOException { + return disi.nextDoc(); + } + + @Override + public int docID() { + return disi.docID(); + } + + @Override + public long cost() { + return disi.cost(); + } + } + + private LongValues getDirectReaderInstance(RandomAccessInput slice, int bitsPerValue, long offset, long numValues) { + if (merging) { + return DirectReader.getMergeInstance(slice, bitsPerValue, offset, numValues); + } else { + return DirectReader.getInstance(slice, bitsPerValue, offset); + } + } + + private NumericDocValues getNumeric(NumericEntry entry) + throws IOException { + if (entry.docsWithFieldOffset == -2) { + // empty + return DocValues.emptyNumeric(); + } else if (entry.docsWithFieldOffset == -1) { + // dense + if (entry.bitsPerValue == 0) { + return new DenseNumericDocValues(maxDoc) { + @Override + public long longValue() + throws IOException { + return entry.minValue; + } + }; + } else { + final RandomAccessInput slice = data.randomAccessSlice(entry.valuesOffset, entry.valuesLength); + if (entry.blockShift >= 0) { + // dense but split into blocks of different bits per value + return new DenseNumericDocValues(maxDoc) { + final VaryingBPVReader vBPVReader = new VaryingBPVReader(entry, slice); + + @Override + public long longValue() + throws IOException { + return vBPVReader.getLongValue(doc); + } + }; + } else { + final LongValues values = getDirectReaderInstance(slice, entry.bitsPerValue, 0L, entry.numValues); + if (entry.table != null) { + final long[] table = entry.table; + return new DenseNumericDocValues(maxDoc) { + @Override + public long longValue() + throws IOException { + return table[(int) values.get(doc)]; + } + }; + } else if (entry.gcd == 1 && entry.minValue == 0) { + // Common case for ordinals, which are encoded as numerics + return new DenseNumericDocValues(maxDoc) { + @Override + public long longValue() + throws IOException { + return values.get(doc); + } + }; + } else { + final long mul = entry.gcd; + final long delta = entry.minValue; + return new DenseNumericDocValues(maxDoc) { + @Override + public long longValue() + throws IOException { + return mul * values.get(doc) + delta; + } + }; + } + } + } + } else { + // sparse + final IndexedDISI disi = + new IndexedDISI(data, entry.docsWithFieldOffset, entry.docsWithFieldLength, entry.jumpTableEntryCount, + entry.denseRankPower, entry.numValues); + if (entry.bitsPerValue == 0) { + return new SparseNumericDocValues(disi) { + @Override + public long longValue() + throws IOException { + return entry.minValue; + } + }; + } else { + final RandomAccessInput slice = data.randomAccessSlice(entry.valuesOffset, entry.valuesLength); + if (entry.blockShift >= 0) { + // sparse and split into blocks of different bits per value + return new SparseNumericDocValues(disi) { + final VaryingBPVReader vBPVReader = new VaryingBPVReader(entry, slice); + + @Override + public long longValue() + throws IOException { + final int index = disi.index(); + return vBPVReader.getLongValue(index); + } + }; + } else { + final LongValues values = getDirectReaderInstance(slice, entry.bitsPerValue, 0L, entry.numValues); + if (entry.table != null) { + final long[] table = entry.table; + return new SparseNumericDocValues(disi) { + @Override + public long longValue() + throws IOException { + return table[(int) values.get(disi.index())]; + } + }; + } else if (entry.gcd == 1 && entry.minValue == 0) { + return new SparseNumericDocValues(disi) { + @Override + public long longValue() + throws IOException { + return values.get(disi.index()); + } + }; + } else { + final long mul = entry.gcd; + final long delta = entry.minValue; + return new SparseNumericDocValues(disi) { + @Override + public long longValue() + throws IOException { + return mul * values.get(disi.index()) + delta; + } + }; + } + } + } + } + } + + private LongValues getNumericValues(NumericEntry entry) + throws IOException { + if (entry.bitsPerValue == 0) { + return new LongValues() { + @Override + public long get(long index) { + return entry.minValue; + } + }; + } else { + final RandomAccessInput slice = data.randomAccessSlice(entry.valuesOffset, entry.valuesLength); + if (entry.blockShift >= 0) { + return new LongValues() { + final VaryingBPVReader vBPVReader = new VaryingBPVReader(entry, slice); + + @Override + public long get(long index) { + try { + return vBPVReader.getLongValue(index); + } catch (IOException e) { + throw new RuntimeException(e); + } + } + }; + } else { + final LongValues values = getDirectReaderInstance(slice, entry.bitsPerValue, 0L, entry.numValues); + if (entry.table != null) { + final long[] table = entry.table; + return new LongValues() { + @Override + public long get(long index) { + return table[(int) values.get(index)]; + } + }; + } else if (entry.gcd != 1) { + final long gcd = entry.gcd; + final long minValue = entry.minValue; + return new LongValues() { + @Override + public long get(long index) { + return values.get(index) * gcd + minValue; + } + }; + } else if (entry.minValue != 0) { + final long minValue = entry.minValue; + return new LongValues() { + @Override + public long get(long index) { + return values.get(index) + minValue; + } + }; + } else { + return values; + } + } + } + } + + private abstract static class DenseBinaryDocValues extends BinaryDocValues { + + final int maxDoc; + int doc = -1; + + DenseBinaryDocValues(int maxDoc) { + this.maxDoc = maxDoc; + } + + @Override + public int nextDoc() + throws IOException { + return advance(doc + 1); + } + + @Override + public int docID() { + return doc; + } + + @Override + public long cost() { + return maxDoc; + } + + @Override + public int advance(int target) + throws IOException { + if (target >= maxDoc) { + return doc = NO_MORE_DOCS; + } + return doc = target; + } + + @Override + public boolean advanceExact(int target) + throws IOException { + doc = target; + return true; + } + } + + private abstract static class SparseBinaryDocValues extends BinaryDocValues { + + final IndexedDISI disi; + + SparseBinaryDocValues(IndexedDISI disi) { + this.disi = disi; + } + + @Override + public int nextDoc() + throws IOException { + return disi.nextDoc(); + } + + @Override + public int docID() { + return disi.docID(); + } + + @Override + public long cost() { + return disi.cost(); + } + + @Override + public int advance(int target) + throws IOException { + return disi.advance(target); + } + + @Override + public boolean advanceExact(int target) + throws IOException { + return disi.advanceExact(target); + } + } + + @Override + public BinaryDocValues getBinary(FieldInfo field) + throws IOException { + return null; // TODO + } + + @Override + public SortedDocValues getSorted(FieldInfo field) + throws IOException { + return null; // TODO + } + + @Override + public SortedNumericDocValues getSortedNumeric(FieldInfo field) + throws IOException { + SortedNumericEntry entry = sortedNumerics.get(field.name); + return getSortedNumeric(entry); + } + + public SortedNumericDocValues getSortedNumeric(String fieldName) + throws IOException { + SortedNumericEntry entry = sortedNumerics.get(fieldName); + return getSortedNumeric(entry); + } + + private SortedNumericDocValues getSortedNumeric(SortedNumericEntry entry) + throws IOException { + if (entry.numValues == entry.numDocsWithField) { + return DocValues.singleton(getNumeric(entry)); + } + + final RandomAccessInput addressesInput = data.randomAccessSlice(entry.addressesOffset, entry.addressesLength); + final LongValues addresses = DirectMonotonicReader.getInstance(entry.addressesMeta, addressesInput, merging); + + final LongValues values = getNumericValues(entry); + + if (entry.docsWithFieldOffset == -1) { + // dense + return new SortedNumericDocValues() { + + int doc = -1; + long start, end; + int count; + + @Override + public int nextDoc() + throws IOException { + return advance(doc + 1); + } + + @Override + public int docID() { + return doc; + } + + @Override + public long cost() { + return maxDoc; + } + + @Override + public int advance(int target) + throws IOException { + if (target >= maxDoc) { + return doc = NO_MORE_DOCS; + } + start = addresses.get(target); + end = addresses.get(target + 1L); + count = (int) (end - start); + return doc = target; + } + + @Override + public boolean advanceExact(int target) + throws IOException { + start = addresses.get(target); + end = addresses.get(target + 1L); + count = (int) (end - start); + doc = target; + return true; + } + + @Override + public long nextValue() + throws IOException { + return values.get(start++); + } + + @Override + public int docValueCount() { + return count; + } + }; + } else { + // sparse + final IndexedDISI disi = + new IndexedDISI(data, entry.docsWithFieldOffset, entry.docsWithFieldLength, entry.jumpTableEntryCount, + entry.denseRankPower, entry.numDocsWithField); + return new SortedNumericDocValues() { + + boolean set; + long start, end; + int count; + + @Override + public int nextDoc() + throws IOException { + set = false; + return disi.nextDoc(); + } + + @Override + public int docID() { + return disi.docID(); + } + + @Override + public long cost() { + return disi.cost(); + } + + @Override + public int advance(int target) + throws IOException { + set = false; + return disi.advance(target); + } + + @Override + public boolean advanceExact(int target) + throws IOException { + set = false; + return disi.advanceExact(target); + } + + @Override + public long nextValue() + throws IOException { + set(); + return values.get(start++); + } + + @Override + public int docValueCount() { + set(); + return count; + } + + private void set() { + if (set == false) { + final int index = disi.index(); + start = addresses.get(index); + end = addresses.get(index + 1L); + count = (int) (end - start); + set = true; + } + } + }; + } + } + + @Override + public SortedSetDocValues getSortedSet(FieldInfo field) + throws IOException { + return null; + } + + @Override + public void checkIntegrity() + throws IOException { + CodecUtil.checksumEntireFile(data); + } + + /** + * Reader for longs split into blocks of different bits per values. The longs are requested by + * index and must be accessed in monotonically increasing order. + */ + // Note: The order requirement could be removed as the jump-tables allow for backwards iteration + // Note 2: The rankSlice is only used if an advance of > 1 block is called. Its construction could + // be lazy + private class VaryingBPVReader { + final RandomAccessInput slice; // 2 slices to avoid cache thrashing when using rank + final RandomAccessInput rankSlice; + final NumericEntry entry; + final int shift; + final long mul; + final int mask; + + long block = -1; + long delta; + long offset; + long blockEndOffset; + LongValues values; + + VaryingBPVReader(NumericEntry entry, RandomAccessInput slice) + throws IOException { + this.entry = entry; + this.slice = slice; + this.rankSlice = entry.valueJumpTableOffset == -1 ? null + : data.randomAccessSlice(entry.valueJumpTableOffset, data.length() - entry.valueJumpTableOffset); + shift = entry.blockShift; + mul = entry.gcd; + mask = (1 << shift) - 1; + } + + long getLongValue(long index) + throws IOException { + final long block = index >>> shift; + if (this.block != block) { + int bitsPerValue; + do { + // If the needed block is the one directly following the current block, it is cheaper to + // avoid the cache + if (rankSlice != null && block != this.block + 1) { + blockEndOffset = rankSlice.readLong(block * Long.BYTES) - entry.valuesOffset; + this.block = block - 1; + } + offset = blockEndOffset; + bitsPerValue = slice.readByte(offset++); + delta = slice.readLong(offset); + offset += Long.BYTES; + if (bitsPerValue == 0) { + blockEndOffset = offset; + } else { + final int length = slice.readInt(offset); + offset += Integer.BYTES; + blockEndOffset = offset + length; + } + this.block++; + } while (this.block != block); + final int numValues = Math.toIntExact(Math.min(1 << shift, entry.numValues - (block << shift))); + values = bitsPerValue == 0 ? LongValues.ZEROES + : getDirectReaderInstance(slice, bitsPerValue, offset, numValues); + } + return mul * values.get(index & mask) + delta; + } + } +} diff --git a/server/src/main/java/org/opensearch/index/codec/freshstartree/codec/StarTreeAggregatedValues.java b/server/src/main/java/org/opensearch/index/codec/freshstartree/codec/StarTreeAggregatedValues.java new file mode 100644 index 0000000000000..1e129295c7a58 --- /dev/null +++ b/server/src/main/java/org/opensearch/index/codec/freshstartree/codec/StarTreeAggregatedValues.java @@ -0,0 +1,38 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.opensearch.index.codec.freshstartree.codec; + +import java.util.Map; +import org.apache.lucene.index.NumericDocValues; +import org.opensearch.index.codec.freshstartree.node.StarTree; + +// TODO : this is tightly coupled to star tree + +/** Star tree aggregated values holder for reader / query */ +public class StarTreeAggregatedValues { + public StarTree _starTree; + public Map dimensionValues; + + public Map metricValues; + + public StarTreeAggregatedValues(StarTree starTree, Map dimensionValues, + Map metricValues) { + this._starTree = starTree; + this.dimensionValues = dimensionValues; + this.metricValues = metricValues; + } +} diff --git a/server/src/main/java/org/opensearch/index/codec/freshstartree/codec/StarTreeCodec.java b/server/src/main/java/org/opensearch/index/codec/freshstartree/codec/StarTreeCodec.java new file mode 100644 index 0000000000000..8901319d5b1b7 --- /dev/null +++ b/server/src/main/java/org/opensearch/index/codec/freshstartree/codec/StarTreeCodec.java @@ -0,0 +1,107 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.opensearch.index.codec.freshstartree.codec; + +import org.apache.lucene.codecs.Codec; +import org.apache.lucene.codecs.CompoundFormat; +import org.apache.lucene.codecs.DocValuesFormat; +import org.apache.lucene.codecs.FieldInfosFormat; +import org.apache.lucene.codecs.KnnVectorsFormat; +import org.apache.lucene.codecs.LiveDocsFormat; +import org.apache.lucene.codecs.NormsFormat; +import org.apache.lucene.codecs.PointsFormat; +import org.apache.lucene.codecs.PostingsFormat; +import org.apache.lucene.codecs.SegmentInfoFormat; +import org.apache.lucene.codecs.StoredFieldsFormat; +import org.apache.lucene.codecs.TermVectorsFormat; + + +/** Codec for performing aggregation during indexing */ +public class StarTreeCodec extends Codec { + private Codec lucene99Codec; + public static final String LUCENE_99 = "Lucene95"; // Lucene Codec to be used + + public static final String STAR_TREE_CODEC_NAME = "StarTreeCodec"; + + private final DocValuesFormat dvFormat = new StarTreeDocValuesFormat(); + + public StarTreeCodec() { + super(STAR_TREE_CODEC_NAME); + } + + public Codec getDelegate() { + if (lucene99Codec == null) { + lucene99Codec = Codec.forName(LUCENE_99); + } + return lucene99Codec; + } + + @Override + public PostingsFormat postingsFormat() { + return getDelegate().postingsFormat(); + } + + @Override + public DocValuesFormat docValuesFormat() { + return dvFormat; + } + + @Override + public StoredFieldsFormat storedFieldsFormat() { + return getDelegate().storedFieldsFormat(); + } + + @Override + public TermVectorsFormat termVectorsFormat() { + return getDelegate().termVectorsFormat(); // or getDefault() + } + + @Override + public FieldInfosFormat fieldInfosFormat() { + return getDelegate().fieldInfosFormat(); + } + + @Override + public SegmentInfoFormat segmentInfoFormat() { + return getDelegate().segmentInfoFormat(); + } + + @Override + public NormsFormat normsFormat() { + return getDelegate().normsFormat(); + } + + @Override + public LiveDocsFormat liveDocsFormat() { + return getDelegate().liveDocsFormat(); + } + + @Override + public CompoundFormat compoundFormat() { + return getDelegate().compoundFormat(); + } + + @Override + public PointsFormat pointsFormat() { + return getDelegate().pointsFormat(); + } + + @Override + public KnnVectorsFormat knnVectorsFormat() { + return getDelegate().knnVectorsFormat(); + } +} diff --git a/server/src/main/java/org/opensearch/index/codec/freshstartree/codec/StarTreeDocValuesFormat.java b/server/src/main/java/org/opensearch/index/codec/freshstartree/codec/StarTreeDocValuesFormat.java new file mode 100644 index 0000000000000..89854e73fd11a --- /dev/null +++ b/server/src/main/java/org/opensearch/index/codec/freshstartree/codec/StarTreeDocValuesFormat.java @@ -0,0 +1,60 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.opensearch.index.codec.freshstartree.codec; + +import java.io.IOException; +import org.apache.lucene.codecs.DocValuesConsumer; +import org.apache.lucene.codecs.DocValuesFormat; +import org.apache.lucene.codecs.DocValuesProducer; +import org.apache.lucene.codecs.lucene90.Lucene90DocValuesFormat; +import org.apache.lucene.index.SegmentReadState; +import org.apache.lucene.index.SegmentWriteState; + + +/** Custom doc values format for star tree codec */ +public class StarTreeDocValuesFormat extends DocValuesFormat { + /** + * Creates a new docvalues format. + * + *

The provided name will be written into the index segment in some configurations (such as + * when using {@code PerFieldDocValuesFormat}): in such configurations, for the segment to be read + * this class should be registered with Java's SPI mechanism (registered in META-INF/ of your jar + * file, etc). + */ + private final DocValuesFormat delegate; + + public StarTreeDocValuesFormat() { + this(new Lucene90DocValuesFormat()); + } + + public StarTreeDocValuesFormat(DocValuesFormat delegate) { + super(delegate.getName()); + this.delegate = delegate; + } + + @Override + public DocValuesConsumer fieldsConsumer(SegmentWriteState state) + throws IOException { + return new StarTreeDocValuesWriter(delegate.fieldsConsumer(state), state); + } + + @Override + public DocValuesProducer fieldsProducer(SegmentReadState state) + throws IOException { + return new StarTreeDocValuesReader(delegate.fieldsProducer(state), state); + } +} diff --git a/server/src/main/java/org/opensearch/index/codec/freshstartree/codec/StarTreeDocValuesReader.java b/server/src/main/java/org/opensearch/index/codec/freshstartree/codec/StarTreeDocValuesReader.java new file mode 100644 index 0000000000000..28f584b31c869 --- /dev/null +++ b/server/src/main/java/org/opensearch/index/codec/freshstartree/codec/StarTreeDocValuesReader.java @@ -0,0 +1,123 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.opensearch.index.codec.freshstartree.codec; + +import java.io.IOException; +import java.util.HashMap; +import java.util.List; +import java.util.Map; +import org.apache.lucene.codecs.CodecUtil; +import org.apache.lucene.codecs.DocValuesProducer; +import org.apache.lucene.index.BinaryDocValues; +import org.apache.lucene.index.FieldInfo; +import org.apache.lucene.index.IndexFileNames; +import org.apache.lucene.index.NumericDocValues; +import org.apache.lucene.index.SegmentReadState; +import org.apache.lucene.index.SortedDocValues; +import org.apache.lucene.index.SortedNumericDocValues; +import org.apache.lucene.index.SortedSetDocValues; +import org.apache.lucene.store.IndexInput; +import org.opensearch.index.codec.freshstartree.node.OffHeapStarTree; +import org.opensearch.index.codec.freshstartree.node.StarTree; + + +/** Custom star tree doc values reader */ +public class StarTreeDocValuesReader extends DocValuesProducer { + private DocValuesProducer delegate; + + private IndexInput data; + + private Lucene90DocValuesProducerCopy valuesProducer; + + StarTree starTree; + + Map dimensionValues; + + Map metricValues; + public static final String DATA_CODEC = "Lucene90DocValuesData"; + public static final String META_CODEC = "Lucene90DocValuesMetadata"; + + public StarTreeDocValuesReader(DocValuesProducer producer, SegmentReadState state) + throws IOException { + this.delegate = producer; + + String dataName = IndexFileNames.segmentFileName(state.segmentInfo.name, state.segmentSuffix, "stttree"); + this.data = state.directory.openInput(dataName, state.context); + CodecUtil.checkIndexHeader(data, "STARTreeCodec", 0, 0, state.segmentInfo.getId(), state.segmentSuffix); + starTree = new OffHeapStarTree(data); + valuesProducer = new Lucene90DocValuesProducerCopy(state, DATA_CODEC, "sttd", META_CODEC, "sttm", + starTree.getDimensionNames()); + dimensionValues = new HashMap<>(); + } + + @Override + public NumericDocValues getNumeric(FieldInfo field) + throws IOException { + return delegate.getNumeric(field); + } + + @Override + public StarTreeAggregatedValues getAggregatedDocValues() + throws IOException { + // starTree.printTree(new HashMap<>()); + // System.out.println(starTree); + List dimensionsSplitOrder = starTree.getDimensionNames(); + for (int i = 0; i < dimensionsSplitOrder.size(); i++) { + System.out.println("Reading dimension : " + dimensionsSplitOrder); + dimensionValues.put(dimensionsSplitOrder.get(i), + valuesProducer.getNumeric(dimensionsSplitOrder.get(i) + "_dim")); + } + metricValues = new HashMap<>(); + metricValues.put("status_sum", valuesProducer.getNumeric("status_sum_metric")); + //metricValues.put("status_count", valuesProducer.getNumeric("status_count_metric")); + return new StarTreeAggregatedValues(starTree, dimensionValues, metricValues); + } + + @Override + public BinaryDocValues getBinary(FieldInfo field) + throws IOException { + return delegate.getBinary(field); + } + + @Override + public SortedDocValues getSorted(FieldInfo field) + throws IOException { + return delegate.getSorted(field); + } + + @Override + public SortedNumericDocValues getSortedNumeric(FieldInfo field) + throws IOException { + return delegate.getSortedNumeric(field); + } + + @Override + public SortedSetDocValues getSortedSet(FieldInfo field) + throws IOException { + return delegate.getSortedSet(field); + } + + @Override + public void checkIntegrity() + throws IOException { + } + + @Override + public void close() + throws IOException { + } +} diff --git a/server/src/main/java/org/opensearch/index/codec/freshstartree/codec/StarTreeDocValuesWriter.java b/server/src/main/java/org/opensearch/index/codec/freshstartree/codec/StarTreeDocValuesWriter.java new file mode 100644 index 0000000000000..d6b2ef3a74c77 --- /dev/null +++ b/server/src/main/java/org/opensearch/index/codec/freshstartree/codec/StarTreeDocValuesWriter.java @@ -0,0 +1,180 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.opensearch.index.codec.freshstartree.codec; + +import java.io.IOException; +import java.util.ArrayList; +import java.util.HashMap; +import java.util.List; +import java.util.Map; +import org.apache.logging.log4j.LogManager; +import org.apache.logging.log4j.Logger; +import org.apache.lucene.codecs.DocValuesConsumer; +import org.apache.lucene.codecs.DocValuesProducer; +import org.apache.lucene.index.FieldInfo; +import org.apache.lucene.index.MergeState; +import org.apache.lucene.index.SegmentWriteState; +import org.apache.lucene.index.SortedNumericDocValues; +import org.apache.lucene.store.IndexOutput; +import org.opensearch.index.codec.freshstartree.builder.BaseSingleTreeBuilder; +import org.opensearch.index.codec.freshstartree.builder.OffHeapBufferedSingleTreeBuilder; +import org.opensearch.index.codec.freshstartree.builder.OffHeapSingleTreeBuilder; + + +/** Custom star tree doc values writer */ +public class StarTreeDocValuesWriter extends DocValuesConsumer { + + private DocValuesConsumer delegate; + private final SegmentWriteState state; + + // TODO : should we make all of this final ? + + List dimensionsSplitOrder; + + Map dimensionReaders; + + BaseSingleTreeBuilder builder; + IndexOutput data; + IndexOutput meta; + + DocValuesConsumer docValuesConsumer; + public static final String DATA_CODEC = "Lucene90DocValuesData"; + public static final String META_CODEC = "Lucene90DocValuesMetadata"; + private static final Logger logger = LogManager.getLogger(StarTreeDocValuesWriter.class); + + + public StarTreeDocValuesWriter(DocValuesConsumer delegate, SegmentWriteState segmentWriteState) + throws IOException { + this.delegate = delegate; + this.state = segmentWriteState; + dimensionReaders = new HashMap<>(); + dimensionsSplitOrder = new ArrayList<>(); + + docValuesConsumer = new Lucene90DocValuesConsumerCopy(state, DATA_CODEC, "sttd", META_CODEC, "sttm"); + } + + @Override + public void addNumericField(FieldInfo field, DocValuesProducer valuesProducer) + throws IOException { + // TODO : check for attributes + // if(field.attributes().containsKey("dimensions") || + // field.attributes().containsKey("metric") ) { + // dimensionReaders.put(field.name, valuesProducer.getNumeric(field)); + // } + delegate.addNumericField(field, valuesProducer); + } + + @Override + public void addBinaryField(FieldInfo field, DocValuesProducer valuesProducer) + throws IOException { + delegate.addBinaryField(field, valuesProducer); + } + + @Override + public void addSortedField(FieldInfo field, DocValuesProducer valuesProducer) + throws IOException { + delegate.addSortedField(field, valuesProducer); + } + + @Override + public void addSortedNumericField(FieldInfo field, DocValuesProducer valuesProducer) + throws IOException { + delegate.addSortedNumericField(field, valuesProducer); + if (field.name.equalsIgnoreCase("size")) { + return; + } + if (field.name.equalsIgnoreCase("@timestamp")) { + //logger.info("Adding timestamp fields"); + dimensionReaders.put("minute_dim", valuesProducer.getSortedNumeric(field)); + dimensionReaders.put("hour_dim", valuesProducer.getSortedNumeric(field)); + dimensionReaders.put("day_dim", valuesProducer.getSortedNumeric(field)); + dimensionReaders.put("month_dim", valuesProducer.getSortedNumeric(field)); + //dimensionReaders.put("year_dim", valuesProducer.getSortedNumeric(field)); + //dimensionsSplitOrder.add("minute"); + + } else { + //logger.info("Adding field : " + field.name); + dimensionReaders.put(field.name + "_dim", valuesProducer.getSortedNumeric(field)); + dimensionsSplitOrder.add(field.name); + } + if (field.name.contains("status")) { + // TODO : change this metric type + dimensionReaders.put(field.name + "_sum_metric", valuesProducer.getSortedNumeric(field)); + } + } + + @Override + public void addSortedSetField(FieldInfo field, DocValuesProducer valuesProducer) + throws IOException { + delegate.addSortedSetField(field, valuesProducer); + } + + @Override + public void merge(MergeState mergeState) + throws IOException { + super.merge(mergeState); + mergeAggregatedValues(mergeState); + } + + public void mergeAggregatedValues(MergeState mergeState) + throws IOException { + List aggrList = new ArrayList<>(); + for (int i = 0; i < mergeState.docValuesProducers.length; i++) { + DocValuesProducer producer = mergeState.docValuesProducers[i]; + Object obj = producer.getAggregatedDocValues(); + StarTreeAggregatedValues starTree = (StarTreeAggregatedValues) obj; + aggrList.add(starTree); + } + long startTime = System.currentTimeMillis(); + // BaseSingleTreeBuilder.Record[] recordsArr = mergeRecords(aggrList); + builder = new OffHeapBufferedSingleTreeBuilder(data, dimensionsSplitOrder, dimensionReaders, state.segmentInfo.maxDoc(), + docValuesConsumer, state); + builder.build(aggrList); + logger.info("Finished merging star-tree in ms : {}" , (System.currentTimeMillis() - startTime)); + // long startTime = System.currentTimeMillis(); + // System.out.println(recordsArr); + // TODO : remove this + // todo: do this off heap + // builder.build(builder.sortAndAggregateSegmentRecords(recordsArr), true); + // System.out.println("Finished merging star-tree in ms : " + (System.currentTimeMillis() - + // startTime)); + } + + @Override + public void aggregate() + throws IOException { + long startTime = System.currentTimeMillis(); + builder = new OffHeapBufferedSingleTreeBuilder(data, dimensionsSplitOrder, dimensionReaders, state.segmentInfo.maxDoc(), + docValuesConsumer, state); + builder.build(); + logger.info("Finished building star-tree in ms : {}" , (System.currentTimeMillis() - startTime)); + } + + @Override + public void close() + throws IOException { + if (delegate != null) { + delegate.close(); + } + if (docValuesConsumer != null) { + docValuesConsumer.close(); + } + if (builder != null) { + builder.close(); + } + } +} diff --git a/server/src/main/java/org/opensearch/index/codec/freshstartree/codec/package-info.java b/server/src/main/java/org/opensearch/index/codec/freshstartree/codec/package-info.java new file mode 100644 index 0000000000000..beea4beb3ddff --- /dev/null +++ b/server/src/main/java/org/opensearch/index/codec/freshstartree/codec/package-info.java @@ -0,0 +1,18 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +/** Codec for performing aggregation during indexing */ +package org.opensearch.index.codec.freshstartree.codec; diff --git a/server/src/main/java/org/opensearch/index/codec/freshstartree/node/OffHeapStarTree.java b/server/src/main/java/org/opensearch/index/codec/freshstartree/node/OffHeapStarTree.java new file mode 100644 index 0000000000000..715711c3937ce --- /dev/null +++ b/server/src/main/java/org/opensearch/index/codec/freshstartree/node/OffHeapStarTree.java @@ -0,0 +1,121 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.opensearch.index.codec.freshstartree.node; + +import java.io.IOException; +import java.util.ArrayList; +import java.util.Arrays; +import java.util.Iterator; +import java.util.List; +import java.util.Map; +import java.util.StringJoiner; +import org.apache.lucene.store.IndexInput; +import org.apache.lucene.store.RandomAccessInput; + + +/** Off heap implementation of star tree. */ +public class OffHeapStarTree implements StarTree { + public static final long MAGIC_MARKER = 0xBADDA55B00DAD00DL; + public static final int VERSION = 1; + private final OffHeapStarTreeNode _root; + private final List _dimensionNames = new ArrayList<>(); + + public OffHeapStarTree(IndexInput data) + throws IOException { + long magicmarker = data.readLong(); + if (MAGIC_MARKER != magicmarker) { + throw new IOException("Invalid magic marker"); + } + int ver = data.readInt(); + if (VERSION != ver) { + throw new IOException("Invalid version"); + } + data.readInt(); // header size + + int dimLength = data.readInt(); + String[] dimensionNames = new String[dimLength]; + + for (int i = 0; i < dimLength; i++) { + int dimensionId = data.readInt(); + dimensionNames[dimensionId] = data.readString(); + } + _dimensionNames.addAll(Arrays.asList(dimensionNames)); + data.readInt(); // num nodes + // System.out.println("Number of nodes : " + numNodes); + // System.out.println(data.length()); + RandomAccessInput in = data.randomAccessSlice(data.getFilePointer(), data.length() - data.getFilePointer()); + _root = new OffHeapStarTreeNode(in, 0); + } + + @Override + public StarTreeNode getRoot() { + return _root; + } + + @Override + public List getDimensionNames() { + return _dimensionNames; + } + + @Override + public void printTree(Map> dictionaryMap) + throws IOException { + printTreeHelper(dictionaryMap, _root, 0); + } + + /** Helper method to print the tree. */ + private void printTreeHelper(Map> dictionaryMap, OffHeapStarTreeNode node, int level) + throws IOException { + StringBuilder stringBuilder = new StringBuilder(); + for (int i = 0; i < level; i++) { + stringBuilder.append(" "); + } + String dimensionName = "ALL"; + int dimensionId = node.getDimensionId(); + if (dimensionId != StarTreeNode.ALL) { + dimensionName = _dimensionNames.get(dimensionId); + } + String dimensionValueString = "ALL"; + long dimensionValue = node.getDimensionValue(); + if (dimensionValue != StarTreeNode.ALL) { + // dimensionValueString = dictionaryMap.get(dimensionName).get(dimensionValue).toString(); + } + + // For leaf node, child dimension id is -1 + String childDimensionName = "null"; + int childDimensionId = node.getChildDimensionId(); + if (childDimensionId != -1) { + childDimensionName = _dimensionNames.get(childDimensionId); + } + + String formattedOutput = new StringJoiner(" - ").add("level : " + level).add("dimensionName : " + dimensionName) + .add("dimensionValue : " + dimensionValueString).add("childDimensionName : " + childDimensionName) + .add("startDocId : " + node.getStartDocId()).add("endDocId : " + node.getEndDocId()) + .add("aggregatedDocId : " + node.getAggregatedDocId()).add("numChildren : " + node.getNumChildren()) + .toString(); + + stringBuilder.append(formattedOutput); + // System.out.println(stringBuilder.toString()); + + if (!node.isLeaf()) { + Iterator childrenIterator = node.getChildrenIterator(); + while (childrenIterator.hasNext()) { + printTreeHelper(dictionaryMap, childrenIterator.next(), level + 1); + } + } + } +} diff --git a/server/src/main/java/org/opensearch/index/codec/freshstartree/node/OffHeapStarTreeNode.java b/server/src/main/java/org/opensearch/index/codec/freshstartree/node/OffHeapStarTreeNode.java new file mode 100644 index 0000000000000..c54d9e3d738ff --- /dev/null +++ b/server/src/main/java/org/opensearch/index/codec/freshstartree/node/OffHeapStarTreeNode.java @@ -0,0 +1,181 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.opensearch.index.codec.freshstartree.node; + +import java.io.IOException; +import java.util.Iterator; +import org.apache.lucene.store.RandomAccessInput; + + +/** Off heap implementation of {@link StarTreeNode} */ +public class OffHeapStarTreeNode implements StarTreeNode { + public static final int NUM_INT_SERIALIZABLE_FIELDS = 6; + public static final int NUM_LONG_SERIALIZABLE_FIELDS = 1; + public static final long SERIALIZABLE_SIZE_IN_BYTES = + ( Integer.BYTES * NUM_INT_SERIALIZABLE_FIELDS ) + ( Long.BYTES * NUM_LONG_SERIALIZABLE_FIELDS ); + private static final int DIMENSION_ID_OFFSET = 0; + private static final int DIMENSION_VALUE_OFFSET = DIMENSION_ID_OFFSET + Integer.BYTES; + private static final int START_DOC_ID_OFFSET = DIMENSION_VALUE_OFFSET + Long.BYTES; + private static final int END_DOC_ID_OFFSET = START_DOC_ID_OFFSET + Integer.BYTES; + private static final int AGGREGATE_DOC_ID_OFFSET = END_DOC_ID_OFFSET + Integer.BYTES; + private static final int FIRST_CHILD_ID_OFFSET = AGGREGATE_DOC_ID_OFFSET + Integer.BYTES; + private static final int LAST_CHILD_ID_OFFSET = FIRST_CHILD_ID_OFFSET + Integer.BYTES; + + public static final int INVALID_ID = -1; + + private final int _nodeId; + private final int _firstChildId; + + RandomAccessInput in; + + public OffHeapStarTreeNode(RandomAccessInput in, int nodeId) + throws IOException { + this.in = in; + _nodeId = nodeId; + _firstChildId = getInt(FIRST_CHILD_ID_OFFSET); + } + + private int getInt(int fieldOffset) + throws IOException { + return in.readInt(_nodeId * SERIALIZABLE_SIZE_IN_BYTES + fieldOffset); + } + + private long getLong(int fieldOffset) + throws IOException { + return in.readLong(_nodeId * SERIALIZABLE_SIZE_IN_BYTES + fieldOffset); + } + + @Override + public int getDimensionId() + throws IOException { + return getInt(DIMENSION_ID_OFFSET); + } + + @Override + public long getDimensionValue() + throws IOException { + return getLong(DIMENSION_VALUE_OFFSET); + } + + @Override + public int getChildDimensionId() + throws IOException { + if (_firstChildId == INVALID_ID) { + return INVALID_ID; + } else { + return in.readInt(_firstChildId * SERIALIZABLE_SIZE_IN_BYTES); + } + } + + @Override + public int getStartDocId() + throws IOException { + return getInt(START_DOC_ID_OFFSET); + } + + @Override + public int getEndDocId() + throws IOException { + return getInt(END_DOC_ID_OFFSET); + } + + @Override + public int getAggregatedDocId() + throws IOException { + return getInt(AGGREGATE_DOC_ID_OFFSET); + } + + @Override + public int getNumChildren() + throws IOException { + if (_firstChildId == INVALID_ID) { + return 0; + } else { + return getInt(LAST_CHILD_ID_OFFSET) - _firstChildId + 1; + } + } + + @Override + public boolean isLeaf() { + return _firstChildId == INVALID_ID; + } + + @Override + public StarTreeNode getChildForDimensionValue(long dimensionValue) + throws IOException { + if (isLeaf()) { + return null; + } + + // Specialize star node for performance + if (dimensionValue == StarTreeNode.ALL) { + OffHeapStarTreeNode firstNode = new OffHeapStarTreeNode(in, _firstChildId); + if (firstNode.getDimensionValue() == StarTreeNode.ALL) { + return firstNode; + } else { + return null; + } + } + + // Binary search + int low = _firstChildId; + int high = getInt(LAST_CHILD_ID_OFFSET); + + while (low <= high) { + int mid = (low + high) / 2; + OffHeapStarTreeNode midNode = new OffHeapStarTreeNode(in, mid); + long midValue = midNode.getDimensionValue(); + + if (midValue == dimensionValue) { + return midNode; + } else if (midValue < dimensionValue) { + low = mid + 1; + } else { + high = mid - 1; + } + } + return null; + } + + @Override + public Iterator getChildrenIterator() + throws IOException { + return new Iterator() { + private int _currentChildId = _firstChildId; + private final int _lastChildId = getInt(LAST_CHILD_ID_OFFSET); + + @Override + public boolean hasNext() { + return _currentChildId <= _lastChildId; + } + + @Override + public OffHeapStarTreeNode next() { + try { + return new OffHeapStarTreeNode(in, _currentChildId++); + } catch (IOException e) { + throw new RuntimeException(e); + } + } + + @Override + public void remove() { + throw new UnsupportedOperationException(); + } + }; + } +} diff --git a/server/src/main/java/org/opensearch/index/codec/freshstartree/node/StarTree.java b/server/src/main/java/org/opensearch/index/codec/freshstartree/node/StarTree.java new file mode 100644 index 0000000000000..db9cc31f3fd59 --- /dev/null +++ b/server/src/main/java/org/opensearch/index/codec/freshstartree/node/StarTree.java @@ -0,0 +1,38 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.opensearch.index.codec.freshstartree.node; + +import java.io.IOException; +import java.util.List; +import java.util.Map; + + +/** Interface for star tree */ +public interface StarTree { + + /** Get the root node of the star tree. */ + StarTreeNode getRoot(); + + /** + * Get a list of all dimension names. The node dimension id is the index of the dimension name in + * this list. + */ + List getDimensionNames(); + + void printTree(Map> dictionaryMap) + throws IOException; +} diff --git a/server/src/main/java/org/opensearch/index/codec/freshstartree/node/StarTreeNode.java b/server/src/main/java/org/opensearch/index/codec/freshstartree/node/StarTreeNode.java new file mode 100644 index 0000000000000..de34ceffe1f1a --- /dev/null +++ b/server/src/main/java/org/opensearch/index/codec/freshstartree/node/StarTreeNode.java @@ -0,0 +1,68 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.opensearch.index.codec.freshstartree.node; + +import java.io.IOException; +import java.util.Iterator; + + +/** Class representing each node in star tree */ +public interface StarTreeNode { + long ALL = -1l; + + /** Get the index of the dimension. */ + int getDimensionId() + throws IOException; + + /** Get the value (dictionary id) of the dimension. */ + long getDimensionValue() + throws IOException; + + /** Get the child dimension id. */ + int getChildDimensionId() + throws IOException; + + /** Get the index of the start document. */ + int getStartDocId() + throws IOException; + + /** Get the index of the end document (exclusive). */ + int getEndDocId() + throws IOException; + + /** Get the index of the aggregated document. */ + int getAggregatedDocId() + throws IOException; + + /** Get the number of children nodes. */ + int getNumChildren() + throws IOException; + + /** Return true if the node is a leaf node, false otherwise. */ + boolean isLeaf(); + + /** + * Get the child node corresponding to the given dimension value (dictionary id), or null if such + * child does not exist. + */ + StarTreeNode getChildForDimensionValue(long dimensionValue) + throws IOException; + + /** Get the iterator over all children nodes. */ + Iterator getChildrenIterator() + throws IOException; +} diff --git a/server/src/main/java/org/opensearch/index/codec/freshstartree/node/package-info.java b/server/src/main/java/org/opensearch/index/codec/freshstartree/node/package-info.java new file mode 100644 index 0000000000000..dbd8287650abe --- /dev/null +++ b/server/src/main/java/org/opensearch/index/codec/freshstartree/node/package-info.java @@ -0,0 +1,19 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/** node package */ +package org.opensearch.index.codec.freshstartree.node; diff --git a/server/src/main/java/org/opensearch/index/codec/freshstartree/query/StarTreeCollector.java b/server/src/main/java/org/opensearch/index/codec/freshstartree/query/StarTreeCollector.java new file mode 100644 index 0000000000000..b92d00843822a --- /dev/null +++ b/server/src/main/java/org/opensearch/index/codec/freshstartree/query/StarTreeCollector.java @@ -0,0 +1,35 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.opensearch.index.codec.freshstartree.query; + +import java.io.IOException; +import org.apache.lucene.search.ScoreMode; +import org.apache.lucene.search.SimpleCollector; + + +/** TODO: remove this ? */ +public class StarTreeCollector extends SimpleCollector { + @Override + public ScoreMode scoreMode() { + return null; + } + + @Override + public void collect(int doc) + throws IOException { + } +} diff --git a/server/src/main/java/org/opensearch/index/codec/freshstartree/query/StarTreeCollectorManager.java b/server/src/main/java/org/opensearch/index/codec/freshstartree/query/StarTreeCollectorManager.java new file mode 100644 index 0000000000000..a3f8685008ae2 --- /dev/null +++ b/server/src/main/java/org/opensearch/index/codec/freshstartree/query/StarTreeCollectorManager.java @@ -0,0 +1,52 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.opensearch.index.codec.freshstartree.query; + +import java.io.IOException; +import java.util.Collection; +import org.apache.lucene.search.CollectorManager; + + +/** TODO : remove this ? */ +public class StarTreeCollectorManager implements CollectorManager { + @Override + public StarTreeCollector newCollector() + throws IOException { + return new StarTreeCollector(); + } + + @Override + public StarTreeCollector reduce(Collection collectors) + throws IOException { + if (collectors == null || collectors.size() == 0) { + return new StarTreeCollector(); + } + if (collectors.size() == 1) { + return collectors.iterator().next(); + } + return new ReducedStarTreeCollector(collectors); + } + + private static class ReducedStarTreeCollector extends StarTreeCollector { + + public ReducedStarTreeCollector(final Collection facetsCollectors) { + // final List matchingDocs = this.getMatchingDocs(); + // facetsCollectors.forEach( + // facetsCollector -> matchingDocs.addAll(facetsCollector.getMatchingDocs())); + } + } +} diff --git a/server/src/main/java/org/opensearch/index/codec/freshstartree/query/StarTreeFilter.java b/server/src/main/java/org/opensearch/index/codec/freshstartree/query/StarTreeFilter.java new file mode 100644 index 0000000000000..f6e5e5cabae01 --- /dev/null +++ b/server/src/main/java/org/opensearch/index/codec/freshstartree/query/StarTreeFilter.java @@ -0,0 +1,274 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.opensearch.index.codec.freshstartree.query; + +import java.io.IOException; +import java.util.ArrayDeque; +import java.util.ArrayList; +import java.util.Collections; +import java.util.HashSet; +import java.util.Iterator; +import java.util.List; +import java.util.Map; +import java.util.Queue; +import java.util.Set; +import java.util.function.Predicate; +import org.apache.lucene.index.NumericDocValues; +import org.apache.lucene.search.ConjunctionUtils; +import org.apache.lucene.search.DocIdSetIterator; +import org.apache.lucene.util.DocIdSetBuilder; +import org.opensearch.index.codec.freshstartree.codec.StarTreeAggregatedValues; +import org.opensearch.index.codec.freshstartree.node.StarTree; +import org.opensearch.index.codec.freshstartree.node.StarTreeNode; + +import static org.apache.lucene.search.DocIdSetIterator.NO_MORE_DOCS; + + +/** Filter operator for star tree data structure. */ +public class StarTreeFilter { + + /** Helper class to wrap the result from traversing the star tree. */ + static class StarTreeResult { + final DocIdSetBuilder _matchedDocIds; + final Set _remainingPredicateColumns; + + StarTreeResult(DocIdSetBuilder matchedDocIds, Set remainingPredicateColumns) { + _matchedDocIds = matchedDocIds; + _remainingPredicateColumns = remainingPredicateColumns; + } + } + + private final StarTree _starTree; + + Map>> _predicateEvaluators; + private final Set _groupByColumns; + + DocIdSetBuilder docsWithField; + DocIdSetBuilder.BulkAdder adder; + Map dimValueMap; + + public StarTreeFilter(StarTreeAggregatedValues starTreeAggrStructure, + Map>> predicateEvaluators, Set groupByColumns) + throws IOException { + // This filter operator does not support AND/OR/NOT operations. + _starTree = starTreeAggrStructure._starTree; + dimValueMap = starTreeAggrStructure.dimensionValues; + _predicateEvaluators = predicateEvaluators != null ? predicateEvaluators : Collections.emptyMap(); + _groupByColumns = groupByColumns != null ? groupByColumns : Collections.emptySet(); + + // TODO : this should be the maximum number of doc values + docsWithField = new DocIdSetBuilder(Integer.MAX_VALUE); + } + + /** + * Helper method to get a filter operator that match the matchingDictIdsMap. + * + *

    + *
  • First go over the star tree and try to match as many columns as possible + *
  • For the remaining columns, use other indexes to match them + *
+ */ + public DocIdSetIterator getStarTreeResult() + throws IOException { + StarTreeResult starTreeResult = traverseStarTree(); + List andIterators = new ArrayList<>(); + andIterators.add(starTreeResult._matchedDocIds.build().iterator()); + + // System.out.println("Remaining predicate columns : " + + // starTreeResult._remainingPredicateColumns.toString()); + for (String remainingPredicateColumn : starTreeResult._remainingPredicateColumns) { + // TODO : set to max value of doc values + DocIdSetBuilder builder = new DocIdSetBuilder(Integer.MAX_VALUE); + List> compositePredicateEvaluators = _predicateEvaluators.get(remainingPredicateColumn); + NumericDocValues ndv = this.dimValueMap.get(remainingPredicateColumn); + for (int docID = ndv.nextDoc(); docID != NO_MORE_DOCS; docID = ndv.nextDoc()) { + for (Predicate compositePredicateEvaluator : compositePredicateEvaluators) { + // TODO : this might be expensive as its done against all doc values docs + if (compositePredicateEvaluator.test(ndv.longValue())) { + // System.out.println("Adding doc id : " + docID + " for status " + ndv.longValue()); + builder.grow(1).add(docID); + break; + } + } + } + andIterators.add(builder.build().iterator()); + } + if (andIterators.size() > 1) { + return ConjunctionUtils.intersectIterators(andIterators); + } + return andIterators.get(0); + } + + /** + * Helper method to traverse the star tree, get matching documents and keep track of all the + * predicate columns that are not matched. Returns {@code null} if no matching dictionary id found + * for a column (i.e. the result for the filter operator is empty). + */ + private StarTreeResult traverseStarTree() + throws IOException { + Set globalRemainingPredicateColumns = null; + + StarTree starTree = _starTree; + List dimensionNames = starTree.getDimensionNames(); + StarTreeNode starTreeRootNode = starTree.getRoot(); + + // Track whether we have found a leaf node added to the queue. If we have found a leaf node, and + // traversed to the + // level of the leave node, we can set globalRemainingPredicateColumns if not already set + // because we know the leaf + // node won't split further on other predicate columns. + boolean foundLeafNode = starTreeRootNode.isLeaf(); + + // Use BFS to traverse the star tree + Queue queue = new ArrayDeque<>(); + queue.add(starTreeRootNode); + int currentDimensionId = -1; + Set remainingPredicateColumns = new HashSet<>(_predicateEvaluators.keySet()); + Set remainingGroupByColumns = new HashSet<>(_groupByColumns); + if (foundLeafNode) { + globalRemainingPredicateColumns = new HashSet<>(remainingPredicateColumns); + } + + StarTreeNode starTreeNode; + while ((starTreeNode = queue.poll()) != null) { + int dimensionId = starTreeNode.getDimensionId(); + if (dimensionId > currentDimensionId) { + // Previous level finished + String dimension = dimensionNames.get(dimensionId); + remainingPredicateColumns.remove(dimension); + remainingGroupByColumns.remove(dimension); + if (foundLeafNode && globalRemainingPredicateColumns == null) { + globalRemainingPredicateColumns = new HashSet<>(remainingPredicateColumns); + } + currentDimensionId = dimensionId; + } + + // If all predicate columns and group-by columns are matched, we can use aggregated document + if (remainingPredicateColumns.isEmpty() && remainingGroupByColumns.isEmpty()) { + // System.out.println("Adding doc id for : " + dimensionNames.get(dimensionId) + " = " + + // starTreeNode + // .getAggregatedDocId()); + adder = docsWithField.grow(1); + adder.add(starTreeNode.getAggregatedDocId()); + continue; + } + + // For leaf node, because we haven't exhausted all predicate columns and group-by columns, we + // cannot use + // the aggregated document. Add the range of documents for this node to the bitmap, and keep + // track of the + // remaining predicate columns for this node + if (starTreeNode.isLeaf()) { + for (long i = starTreeNode.getStartDocId(); i < starTreeNode.getEndDocId(); i++) { + adder = docsWithField.grow(1); + // System.out.println("Adding doc id for : " + dimensionNames.get(dimensionId) + " = " + + // i); + adder.add((int) i); + } + continue; + } + + // For non-leaf node, proceed to next level + String childDimension = dimensionNames.get(dimensionId + 1); + + // Only read star-node when the dimension is not in the global remaining predicate columns or + // group-by columns + // because we cannot use star-node in such cases + StarTreeNode starNode = null; + if ((globalRemainingPredicateColumns == null || !globalRemainingPredicateColumns.contains(childDimension)) + && !remainingGroupByColumns.contains(childDimension)) { + starNode = starTreeNode.getChildForDimensionValue(StarTreeNode.ALL); + } + + if (remainingPredicateColumns.contains(childDimension)) { + // Have predicates on the next level, add matching nodes to the queue + + // Calculate the matching dictionary ids for the child dimension + int numChildren = starTreeNode.getNumChildren(); + + // If number of matching dictionary ids is large, use scan instead of binary search + + Iterator childrenIterator = starTreeNode.getChildrenIterator(); + + // When the star-node exists, and the number of matching doc ids is more than or equal to + // the + // number of non-star child nodes, check if all the child nodes match the predicate, and use + // the + // star-node if so + if (starNode != null) { + List matchingChildNodes = new ArrayList<>(); + boolean findLeafChildNode = false; + while (childrenIterator.hasNext()) { + StarTreeNode childNode = childrenIterator.next(); + List> predicates = _predicateEvaluators.get(childDimension); + for (Predicate predicate : predicates) { + long val = childNode.getDimensionValue(); + if (predicate.test(val)) { + matchingChildNodes.add(childNode); + findLeafChildNode |= childNode.isLeaf(); + break; + } + } + } + if (matchingChildNodes.size() == numChildren - 1) { + // All the child nodes (except for the star-node) match the predicate, use the star-node + queue.add(starNode); + foundLeafNode |= starNode.isLeaf(); + } else { + // Some child nodes do not match the predicate, use the matching child nodes + queue.addAll(matchingChildNodes); + foundLeafNode |= findLeafChildNode; + } + } else { + // Cannot use the star-node, use the matching child nodes + while (childrenIterator.hasNext()) { + StarTreeNode childNode = childrenIterator.next(); + List> predicates = _predicateEvaluators.get(childDimension); + for (Predicate predicate : predicates) { + if (predicate.test(childNode.getDimensionValue())) { + queue.add(childNode); + foundLeafNode |= childNode.isLeaf(); + break; + } + } + } + } + } else { + // No predicate on the next level + + if (starNode != null) { + // Star-node exists, use it + queue.add(starNode); + foundLeafNode |= starNode.isLeaf(); + } else { + // Star-node does not exist or cannot be used, add all non-star nodes to the queue + Iterator childrenIterator = starTreeNode.getChildrenIterator(); + while (childrenIterator.hasNext()) { + StarTreeNode childNode = childrenIterator.next(); + if (childNode.getDimensionValue() != StarTreeNode.ALL) { + queue.add(childNode); + foundLeafNode |= childNode.isLeaf(); + } + } + } + } + } + + return new StarTreeResult(docsWithField, + globalRemainingPredicateColumns != null ? globalRemainingPredicateColumns : Collections.emptySet()); + } +} diff --git a/server/src/main/java/org/opensearch/index/codec/freshstartree/query/StarTreeQuery.java b/server/src/main/java/org/opensearch/index/codec/freshstartree/query/StarTreeQuery.java new file mode 100644 index 0000000000000..2e0376cbe95c5 --- /dev/null +++ b/server/src/main/java/org/opensearch/index/codec/freshstartree/query/StarTreeQuery.java @@ -0,0 +1,97 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.opensearch.index.codec.freshstartree.query; + +import java.io.IOException; +import java.util.List; +import java.util.Map; +import java.util.Set; +import java.util.function.Predicate; +import org.apache.lucene.index.LeafReaderContext; +import org.apache.lucene.search.ConstantScoreScorer; +import org.apache.lucene.search.ConstantScoreWeight; +import org.apache.lucene.search.DocIdSetIterator; +import org.apache.lucene.search.IndexSearcher; +import org.apache.lucene.search.Query; +import org.apache.lucene.search.QueryVisitor; +import org.apache.lucene.search.ScoreMode; +import org.apache.lucene.search.Scorer; +import org.apache.lucene.search.Weight; +import org.apache.lucene.util.Accountable; +import org.opensearch.index.codec.freshstartree.codec.StarTreeAggregatedValues; + + +/** Query class for querying star tree data structure */ +public class StarTreeQuery extends Query implements Accountable { + + Map>> compositePredicateMap; + Set groupByColumns; + + public StarTreeQuery(Map>> compositePredicateMap, Set groupByColumns) { + this.compositePredicateMap = compositePredicateMap; + this.groupByColumns = groupByColumns; + } + + @Override + public String toString(String field) { + return null; + } + + @Override + public void visit(QueryVisitor visitor) { + visitor.visitLeaf(this); + } + + @Override + public boolean equals(Object obj) { + return sameClassAs(obj); + } + + @Override + public int hashCode() { + return classHash(); + } + + @Override + public long ramBytesUsed() { + return 0; + } + + @Override + public Weight createWeight(IndexSearcher searcher, ScoreMode scoreMode, float boost) + throws IOException { + return new ConstantScoreWeight(this, boost) { + @Override + public Scorer scorer(LeafReaderContext context) + throws IOException { + Object obj = context.reader().getAggregatedDocValues(); + DocIdSetIterator result = null; + if (obj != null) { + StarTreeAggregatedValues val = (StarTreeAggregatedValues) obj; + StarTreeFilter filter = new StarTreeFilter(val, compositePredicateMap, groupByColumns); + result = filter.getStarTreeResult(); + } + return new ConstantScoreScorer(this, score(), scoreMode, result); + } + + @Override + public boolean isCacheable(LeafReaderContext ctx) { + return false; + } + }; + } +} diff --git a/server/src/main/java/org/opensearch/index/codec/freshstartree/query/package-info.java b/server/src/main/java/org/opensearch/index/codec/freshstartree/query/package-info.java new file mode 100644 index 0000000000000..8b04706698952 --- /dev/null +++ b/server/src/main/java/org/opensearch/index/codec/freshstartree/query/package-info.java @@ -0,0 +1,19 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/** query package - Todo : move it under main dir - keeping it here for readability */ +package org.opensearch.index.codec.freshstartree.query; diff --git a/server/src/main/java/org/opensearch/index/codec/freshstartree/util/BufferedAggregatedDocValues.java b/server/src/main/java/org/opensearch/index/codec/freshstartree/util/BufferedAggregatedDocValues.java new file mode 100644 index 0000000000000..565ed66cb8763 --- /dev/null +++ b/server/src/main/java/org/opensearch/index/codec/freshstartree/util/BufferedAggregatedDocValues.java @@ -0,0 +1,71 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.opensearch.index.codec.freshstartree.util; + +import java.io.IOException; +import org.apache.lucene.index.NumericDocValues; +import org.apache.lucene.search.DocIdSetIterator; +import org.apache.lucene.util.packed.PackedLongValues; + + +/** Buffered aggregated doc values - in memory */ +public class BufferedAggregatedDocValues extends NumericDocValues { + final PackedLongValues.Iterator iter; + final DocIdSetIterator docsWithField; + private long value; + + public BufferedAggregatedDocValues(PackedLongValues values, DocIdSetIterator docsWithFields) { + this.iter = values.iterator(); + this.docsWithField = docsWithFields; + } + + @Override + public int docID() { + return docsWithField.docID(); + } + + @Override + public int nextDoc() + throws IOException { + int docID = docsWithField.nextDoc(); + if (docID != NO_MORE_DOCS) { + value = iter.next(); + } + return docID; + } + + @Override + public int advance(int target) { + throw new UnsupportedOperationException(); + } + + @Override + public boolean advanceExact(int target) + throws IOException { + throw new UnsupportedOperationException(); + } + + @Override + public long cost() { + return docsWithField.cost(); + } + + @Override + public long longValue() { + return value; + } +} diff --git a/server/src/main/java/org/opensearch/index/codec/freshstartree/util/IntComparator.java b/server/src/main/java/org/opensearch/index/codec/freshstartree/util/IntComparator.java new file mode 100644 index 0000000000000..dcd3bf947c4bd --- /dev/null +++ b/server/src/main/java/org/opensearch/index/codec/freshstartree/util/IntComparator.java @@ -0,0 +1,30 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.opensearch.index.codec.freshstartree.util; + +import java.util.Comparator; + + +/** Int comparator */ +public interface IntComparator extends Comparator { + int compare(int var1, int var2); + + @Override + default int compare(Integer ok1, Integer ok2) { + return this.compare(ok1, ok2); + } +} diff --git a/server/src/main/java/org/opensearch/index/codec/freshstartree/util/QuickSorter.java b/server/src/main/java/org/opensearch/index/codec/freshstartree/util/QuickSorter.java new file mode 100644 index 0000000000000..2e5e33adc06b3 --- /dev/null +++ b/server/src/main/java/org/opensearch/index/codec/freshstartree/util/QuickSorter.java @@ -0,0 +1,118 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.opensearch.index.codec.freshstartree.util; + +/** copy of it.unimi.dsi.fastutil.Arrays.quickSort */ +public class QuickSorter { + + /** it.unimi.dsi.fastutil.Arrays.quickSort - copy */ + public static void quickSort(int from, int to, IntComparator comp, Swapper swapper) { + int len = to - from; + int m; + int j; + if (len < 16) { + for (m = from; m < to; ++m) { + for (j = m; j > from && comp.compare(j - 1, j) > 0; --j) { + swapper.swap(j, j - 1); + } + } + } else { + m = from + len / 2; + j = from; + int n = to - 1; + int a; + if (len > 128) { + a = len / 8; + j = med3(from, from + a, from + 2 * a, comp); + m = med3(m - a, m, m + a, comp); + n = med3(n - 2 * a, n - a, n, comp); + } + + m = med3(j, m, n, comp); + a = from; + int b = from; + int c = to - 1; + int d = c; + + while (true) { + int s; + for (; b > c || (s = comp.compare(b, m)) > 0; swapper.swap(b++, c--)) { + for (; c >= b && (s = comp.compare(c, m)) >= 0; --c) { + if (s == 0) { + if (c == m) { + m = d; + } else if (d == m) { + m = c; + } + + swapper.swap(c, d--); + } + } + + if (b > c) { + s = Math.min(a - from, b - a); + swap(swapper, from, b - s, s); + s = Math.min(d - c, to - d - 1); + swap(swapper, b, to - s, s); + if ((s = b - a) > 1) { + quickSort(from, from + s, comp, swapper); + } + + if ((s = d - c) > 1) { + quickSort(to - s, to, comp, swapper); + } + + return; + } + + if (b == m) { + m = d; + } else if (c == m) { + m = c; + } + } + + if (s == 0) { + if (a == m) { + m = b; + } else if (b == m) { + m = a; + } + + swapper.swap(a++, b); + } + + ++b; + } + } + } + + protected static void swap(Swapper swapper, int a, int b, int n) { + for (int i = 0; i < n; ++b) { + swapper.swap(a, b); + ++i; + ++a; + } + } + + private static int med3(int a, int b, int c, IntComparator comp) { + int ab = comp.compare(a, b); + int ac = comp.compare(a, c); + int bc = comp.compare(b, c); + return ab < 0 ? (bc < 0 ? b : (ac < 0 ? c : a)) : (bc > 0 ? b : (ac > 0 ? c : a)); + } +} diff --git a/server/src/main/java/org/opensearch/index/codec/freshstartree/util/Swapper.java b/server/src/main/java/org/opensearch/index/codec/freshstartree/util/Swapper.java new file mode 100644 index 0000000000000..1df6a7e270698 --- /dev/null +++ b/server/src/main/java/org/opensearch/index/codec/freshstartree/util/Swapper.java @@ -0,0 +1,23 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.opensearch.index.codec.freshstartree.util; + +/** Functional interface for swapper */ +@FunctionalInterface +public interface Swapper { + void swap(int var1, int var2); +} diff --git a/server/src/main/java/org/opensearch/index/codec/freshstartree/util/package-info.java b/server/src/main/java/org/opensearch/index/codec/freshstartree/util/package-info.java new file mode 100644 index 0000000000000..8ad0be6ce1848 --- /dev/null +++ b/server/src/main/java/org/opensearch/index/codec/freshstartree/util/package-info.java @@ -0,0 +1,19 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/** Util package todo: remove this, kept it temporarily for readibility */ +package org.opensearch.index.codec.freshstartree.util; diff --git a/server/src/main/java/org/opensearch/index/engine/SegmentsStats.java b/server/src/main/java/org/opensearch/index/engine/SegmentsStats.java index f4fd2490c7abe..c1b1c8c01a03d 100644 --- a/server/src/main/java/org/opensearch/index/engine/SegmentsStats.java +++ b/server/src/main/java/org/opensearch/index/engine/SegmentsStats.java @@ -93,8 +93,11 @@ public class SegmentsStats implements Writeable, ToXContentFragment { Map.entry("tvx", "Term Vector Index"), Map.entry("tvd", "Term Vector Documents"), Map.entry("tvf", "Term Vector Fields"), - Map.entry("liv", "Live Documents") - ); + Map.entry("liv", "Live Documents"), + Map.entry("stttree", "Star tree"), + Map.entry("sttd", "Star tree doc val data"), + Map.entry("sttm", "Star tree doc val meta") + ); public SegmentsStats() { fileSizes = new HashMap<>(); diff --git a/server/src/main/java/org/opensearch/index/engine/TranslogLeafReader.java b/server/src/main/java/org/opensearch/index/engine/TranslogLeafReader.java index 6aacb6c1cbedf..753ec7c7d7c53 100644 --- a/server/src/main/java/org/opensearch/index/engine/TranslogLeafReader.java +++ b/server/src/main/java/org/opensearch/index/engine/TranslogLeafReader.java @@ -159,6 +159,12 @@ public SortedDocValues getSortedDocValues(String field) { throw new UnsupportedOperationException(); } + @Override + public Object getAggregatedDocValues() + throws IOException { + throw new UnsupportedOperationException(); + } + @Override public SortedNumericDocValues getSortedNumericDocValues(String field) { throw new UnsupportedOperationException(); diff --git a/server/src/main/java/org/opensearch/index/mapper/NumberFieldMapper.java b/server/src/main/java/org/opensearch/index/mapper/NumberFieldMapper.java index 204e7bc4c16ab..0116147a9c460 100644 --- a/server/src/main/java/org/opensearch/index/mapper/NumberFieldMapper.java +++ b/server/src/main/java/org/opensearch/index/mapper/NumberFieldMapper.java @@ -89,6 +89,9 @@ public class NumberFieldMapper extends ParametrizedFieldMapper { public static final Setting COERCE_SETTING = Setting.boolSetting("index.mapping.coerce", true, Property.IndexScope); + private Boolean isDimension; + private Boolean isSumMetric; + private Boolean isCountMetric; private static NumberFieldMapper toType(FieldMapper in) { return (NumberFieldMapper) in; @@ -103,6 +106,12 @@ public static class Builder extends ParametrizedFieldMapper.Builder { private final Parameter indexed = Parameter.indexParam(m -> toType(m).indexed, true); private final Parameter hasDocValues = Parameter.docValuesParam(m -> toType(m).hasDocValues, true); + + protected final Parameter isDimension = Parameter.boolParam("isDimension", false, m -> toType(m).isDimension, false); + protected final Parameter isSumMetric = Parameter.boolParam("isSumMetric", false, m -> toType(m).isSumMetric, false); + protected final Parameter isCountMetric = Parameter.boolParam("isCountMetric", false, m -> toType(m).isCountMetric, false); + + private final Parameter stored = Parameter.storeParam(m -> toType(m).stored, false); private final Parameter> ignoreMalformed; @@ -155,7 +164,8 @@ public Builder docValues(boolean hasDocValues) { @Override protected List> getParameters() { - return Arrays.asList(indexed, hasDocValues, stored, ignoreMalformed, coerce, nullValue, meta); + return Arrays.asList(indexed, hasDocValues, stored, ignoreMalformed, coerce, nullValue, meta, isDimension, + isSumMetric, isCountMetric); } @Override @@ -1343,6 +1353,9 @@ private NumberFieldMapper(String simpleName, MappedFieldType mappedFieldType, Mu this.nullValue = builder.nullValue.getValue(); this.ignoreMalformedByDefault = builder.ignoreMalformed.getDefaultValue().value(); this.coerceByDefault = builder.coerce.getDefaultValue().value(); + this.isCountMetric = builder.isCountMetric.getValue(); + this.isSumMetric = builder.isSumMetric.getValue(); + this.isDimension = builder.isDimension.getValue(); } boolean coerce() { diff --git a/server/src/main/java/org/opensearch/rest/action/search/RestSearchStarTreeAction.java b/server/src/main/java/org/opensearch/rest/action/search/RestSearchStarTreeAction.java new file mode 100644 index 0000000000000..c8fa0e08a9941 --- /dev/null +++ b/server/src/main/java/org/opensearch/rest/action/search/RestSearchStarTreeAction.java @@ -0,0 +1,2 @@ +package org.opensearch.rest.action.search;public class RestSearchStarTreeAction { +} diff --git a/server/src/main/java/org/opensearch/search/aggregations/metrics/SumAggregator.java b/server/src/main/java/org/opensearch/search/aggregations/metrics/SumAggregator.java index 4b8e882cd69bc..6bbf2a6e1569f 100644 --- a/server/src/main/java/org/opensearch/search/aggregations/metrics/SumAggregator.java +++ b/server/src/main/java/org/opensearch/search/aggregations/metrics/SumAggregator.java @@ -31,11 +31,16 @@ package org.opensearch.search.aggregations.metrics; +import java.util.concurrent.atomic.AtomicInteger; +import org.apache.logging.log4j.LogManager; +import org.apache.logging.log4j.Logger; import org.apache.lucene.index.LeafReaderContext; +import org.apache.lucene.index.NumericDocValues; import org.apache.lucene.search.ScoreMode; import org.opensearch.common.lease.Releasables; import org.opensearch.common.util.BigArrays; import org.opensearch.common.util.DoubleArray; +import org.opensearch.index.codec.freshstartree.codec.StarTreeAggregatedValues; import org.opensearch.index.fielddata.SortedNumericDoubleValues; import org.opensearch.search.DocValueFormat; import org.opensearch.search.aggregations.Aggregator; @@ -48,6 +53,8 @@ import java.io.IOException; import java.util.Map; +import org.opensearch.transport.TransportService; + /** * Aggregate all docs into a single sum value @@ -62,6 +69,11 @@ public class SumAggregator extends NumericMetricsAggregator.SingleValue { private DoubleArray sums; private DoubleArray compensations; + private AtomicInteger bucket2; + + private static final Logger logger = LogManager.getLogger(SumAggregator.class); + + SumAggregator( String name, ValuesSourceConfig valuesSourceConfig, @@ -77,6 +89,7 @@ public class SumAggregator extends NumericMetricsAggregator.SingleValue { sums = context.bigArrays().newDoubleArray(1, true); compensations = context.bigArrays().newDoubleArray(1, true); } + bucket2 = new AtomicInteger(0); } @Override @@ -91,14 +104,57 @@ public LeafBucketCollector getLeafCollector(LeafReaderContext ctx, final LeafBuc } final BigArrays bigArrays = context.bigArrays(); final SortedNumericDoubleValues values = valuesSource.doubleValues(ctx); + StarTreeAggregatedValues aggrVals = (StarTreeAggregatedValues) ctx.reader().getAggregatedDocValues(); final CompensatedSum kahanSummation = new CompensatedSum(0, 0); + //int bucket = 0; return new LeafBucketCollectorBase(sub, values) { @Override public void collect(int doc, long bucket) throws IOException { + //long bucket = bucket2.get(); sums = bigArrays.grow(sums, bucket + 1); compensations = bigArrays.grow(compensations, bucket + 1); + NumericDocValues dv = aggrVals.metricValues.get("status_sum"); + NumericDocValues hourValueDim = aggrVals.dimensionValues.get("hour"); + NumericDocValues dayValueDim = aggrVals.dimensionValues.get("day"); + NumericDocValues statusValueDim = aggrVals.dimensionValues.get("status"); + NumericDocValues minuteValueDim = aggrVals.dimensionValues.get("minute"); + NumericDocValues monthValueDim = aggrVals.dimensionValues.get("month"); + + if (dv.advanceExact(doc)) { + final int valuesCount = values.docValueCount(); + // Compute the sum of double values with Kahan summation algorithm which is more + // accurate than naive summation. + double sum = sums.get(bucket); + double compensation = compensations.get(bucket); + kahanSummation.reset(sum, compensation); + double value = dv.longValue(); + hourValueDim.advanceExact(doc); + long hour = hourValueDim.longValue(); - if (values.advanceExact(doc)) { + dayValueDim.advanceExact(doc); + long day = dayValueDim.longValue(); + + statusValueDim.advanceExact(doc); + long status = statusValueDim.longValue(); + + minuteValueDim.advanceExact(doc); + long minute = minuteValueDim.longValue(); + + + monthValueDim.advanceExact(doc); + long month = monthValueDim.longValue(); + + logger.info("Day : {} , hour : {} , status : {}, month : {} , minute: {}", day, hour, status, month, minute); + + kahanSummation.add(value); + + compensations.set(bucket, kahanSummation.delta()); + sums.set(bucket, kahanSummation.value()); + //bucket2.set((int) (bucket + 1l)); + //bucket++; + } + else + if (values.advanceExact(doc)) { final int valuesCount = values.docValueCount(); // Compute the sum of double values with Kahan summation algorithm which is more // accurate than naive summation. diff --git a/server/src/main/java/org/opensearch/search/query/QueryPhase.java b/server/src/main/java/org/opensearch/search/query/QueryPhase.java index fa767f69d1ac6..4eaeded73fdf1 100644 --- a/server/src/main/java/org/opensearch/search/query/QueryPhase.java +++ b/server/src/main/java/org/opensearch/search/query/QueryPhase.java @@ -32,6 +32,11 @@ package org.opensearch.search.query; +import java.util.ArrayList; +import java.util.HashMap; +import java.util.HashSet; +import java.util.Set; +import java.util.function.Predicate; import org.apache.logging.log4j.LogManager; import org.apache.logging.log4j.Logger; import org.apache.lucene.index.IndexReader; @@ -52,6 +57,7 @@ import org.opensearch.common.lucene.search.TopDocsAndMaxScore; import org.opensearch.common.util.concurrent.QueueResizingOpenSearchThreadPoolExecutor; import org.opensearch.core.tasks.TaskCancelledException; +import org.opensearch.index.codec.freshstartree.query.StarTreeQuery; import org.opensearch.lucene.queries.SearchAfterSortedDocQuery; import org.opensearch.search.DocValueFormat; import org.opensearch.search.SearchContextSourcePrinter; @@ -190,6 +196,14 @@ static boolean executeInternal(SearchContext searchContext, QueryPhaseSearcher q final ContextIndexSearcher searcher = searchContext.searcher(); final IndexReader reader = searcher.getIndexReader(); QuerySearchResult queryResult = searchContext.queryResult(); + Set groupByCols = new HashSet<>(); + //groupByCols.add("day"); + groupByCols.add("status"); + Map>> predicateMap = new HashMap<>(); + List> predicates = new ArrayList<>(); + predicates.add(status -> status == 200); + predicateMap.put("status", predicates); + Query q = new StarTreeQuery(new HashMap<>(), groupByCols); queryResult.searchTimedOut(false); try { queryResult.from(searchContext.from()); @@ -280,10 +294,12 @@ static boolean executeInternal(SearchContext searchContext, QueryPhaseSearcher q boolean shouldRescore = queryPhaseSearcher.searchWith( searchContext, searcher, - query, + q, + //query, collectors, hasFilterCollector, timeoutSet + ); ExecutorService executor = searchContext.indexShard().getThreadPool().executor(ThreadPool.Names.SEARCH); diff --git a/server/src/main/resources/META-INF/services/org.apache.lucene.codecs.Codec b/server/src/main/resources/META-INF/services/org.apache.lucene.codecs.Codec new file mode 100644 index 0000000000000..25df72954122b --- /dev/null +++ b/server/src/main/resources/META-INF/services/org.apache.lucene.codecs.Codec @@ -0,0 +1,9 @@ +# +# SPDX-License-Identifier: Apache-2.0 +# +# The OpenSearch Contributors require contributions made to +# this file be licensed under the Apache-2.0 license or a +# compatible open source license. +# + +org.opensearch.index.codec.freshstartree.codec.StarTreeCodec