Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

All star tree indexing commits as of 24aug #35

Merged
merged 33 commits into from
Aug 24, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
33 commits
Select commit Hold shift + click to select a range
e09106d
Changes to handle count and avg metrics as part of star tree mapping
bharath-techie Aug 7, 2024
2e59751
addressing review comments
bharath-techie Aug 8, 2024
3f82b5a
Adding timestamp rounding support in star tree
bharath-techie Aug 14, 2024
123097f
Merge branch 'main' of github.com:opensearch-project/OpenSearch into …
bharath-techie Aug 18, 2024
8806093
removing count as required metric
bharath-techie Aug 18, 2024
0777a28
addressing comments
bharath-techie Aug 19, 2024
c20f12c
Merge branch 'main' of github.com:opensearch-project/OpenSearch into …
bharath-techie Aug 19, 2024
1052ead
addressing comments
bharath-techie Aug 19, 2024
758c9e2
file formats rebase
sarthakaggarwal97 Aug 7, 2024
1f574f6
nit fixes
sarthakaggarwal97 Aug 7, 2024
de8ad0a
added file format tests
sarthakaggarwal97 Aug 11, 2024
711a5c1
writer versioning and addressing comments
sarthakaggarwal97 Aug 12, 2024
0d914ee
fixes in merge, aggregators, added tests, addressed comments
sarthakaggarwal97 Aug 13, 2024
c1c2e75
doc values assertions
sarthakaggarwal97 Aug 15, 2024
2a7db63
removing additional sorted numeric method
sarthakaggarwal97 Aug 20, 2024
0822060
rebase fixes
sarthakaggarwal97 Aug 20, 2024
9a541e6
max doc fixes
sarthakaggarwal97 Aug 20, 2024
f5336aa
metadata to have total star tree docs
sarthakaggarwal97 Aug 20, 2024
fc9614f
adding half hour and quarter hour calendar intervals
bharath-techie Aug 19, 2024
da9b146
Merge branch 'main' of github.com:opensearch-project/OpenSearch into …
bharath-techie Aug 20, 2024
136ab7c
Merge branch 'main' of github.com:opensearch-project/OpenSearch into …
bharath-techie Aug 21, 2024
014f123
adding tests
bharath-techie Aug 21, 2024
0b4bfb2
Merge branch 'main' of github.com:opensearch-project/OpenSearch into …
bharath-techie Aug 21, 2024
0d7768e
separating child star node from children
sarthakaggarwal97 Aug 22, 2024
35ab173
Merge branch 'main' of github.com:opensearch-project/OpenSearch into …
bharath-techie Aug 23, 2024
221aee6
Merge branch 'main' of github.com:opensearch-project/OpenSearch into …
bharath-techie Aug 23, 2024
f376b96
Merge branch 'count_avg_mapper' of github.com:bharath-techie/OpenSear…
bharath-techie Aug 23, 2024
9a5c434
Merge branch 'startree-fileformat-childstarnode' of https://github.co…
bharath-techie Aug 23, 2024
f7124ec
merge conflicts for file formats
bharath-techie Aug 23, 2024
e77f6d0
Doc count field changes in star tree
bharath-techie Aug 16, 2024
6674064
refactoring and addressing comments
bharath-techie Aug 17, 2024
d4ac7fc
Merge branch 'main' of github.com:opensearch-project/OpenSearch into …
bharath-techie Aug 24, 2024
d339434
refactoring and fixing bugs
bharath-techie Aug 24, 2024
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,9 @@
import org.opensearch.index.compositeindex.datacube.MetricStat;
import org.opensearch.index.compositeindex.datacube.startree.StarTreeFieldConfiguration;
import org.opensearch.index.compositeindex.datacube.startree.StarTreeIndexSettings;
import org.opensearch.index.compositeindex.datacube.startree.utils.date.DateTimeUnitAdapter;
import org.opensearch.index.compositeindex.datacube.startree.utils.date.DateTimeUnitRounding;
import org.opensearch.index.compositeindex.datacube.startree.utils.date.ExtendedDateTimeUnit;
import org.opensearch.indices.IndicesService;
import org.opensearch.test.OpenSearchIntegTestCase;
import org.junit.After;
Expand Down Expand Up @@ -47,10 +50,10 @@ private static XContentBuilder createMinimalTestMapping(boolean invalidDim, bool
.startObject("startree-1")
.field("type", "star_tree")
.startObject("config")
.startArray("ordered_dimensions")
.startObject()
.startObject("date_dimension")
.field("name", "timestamp")
.endObject()
.startArray("ordered_dimensions")
.startObject()
.field("name", getDim(invalidDim, keywordDim))
.endObject()
Expand Down Expand Up @@ -90,21 +93,77 @@ private static XContentBuilder createMinimalTestMapping(boolean invalidDim, bool
}
}

private static XContentBuilder createMaxDimTestMapping() {
private static XContentBuilder createDateTestMapping(boolean duplicate) {
try {
return jsonBuilder().startObject()
.startObject("composite")
.startObject("startree-1")
.field("type", "star_tree")
.startObject("config")
.startObject("date_dimension")
.field("name", "timestamp")
.startArray("calendar_intervals")
.value("day")
.value("quarter-hour")
.value(duplicate ? "quarter-hour" : "half-hour")
.endArray()
.endObject()
.startArray("ordered_dimensions")
.startObject()
.field("name", "numeric_dv")
.endObject()
.endArray()
.startArray("metrics")
.startObject()
.field("name", "numeric_dv")
.endObject()
.endArray()
.endObject()
.endObject()
.endObject()
.startObject("properties")
.startObject("timestamp")
.field("type", "date")
.endObject()
.startObject("numeric_dv")
.field("type", "integer")
.field("doc_values", true)
.endObject()
.startObject("numeric")
.field("type", "integer")
.field("doc_values", false)
.endObject()
.startObject("keyword_dv")
.field("type", "keyword")
.field("doc_values", true)
.endObject()
.startObject("keyword")
.field("type", "keyword")
.field("doc_values", false)
.endObject()
.endObject()
.endObject();
} catch (IOException e) {
throw new IllegalStateException(e);
}
}

private static XContentBuilder createMaxDimTestMapping() {
try {
return jsonBuilder().startObject()
.startObject("composite")
.startObject("startree-1")
.field("type", "star_tree")
.startObject("config")
.startObject("date_dimension")
.field("name", "timestamp")
.startArray("calendar_intervals")
.value("day")
.value("month")
.value("half-hour")
.endArray()
.endObject()
.startArray("ordered_dimensions")
.startObject()
.field("name", "dim2")
.endObject()
Expand Down Expand Up @@ -139,7 +198,7 @@ private static XContentBuilder createMaxDimTestMapping() {
}
}

private static XContentBuilder createTestMappingWithoutStarTree(boolean invalidDim, boolean invalidMetric, boolean keywordDim) {
private static XContentBuilder createTestMappingWithoutStarTree() {
try {
return jsonBuilder().startObject()
.startObject("properties")
Expand Down Expand Up @@ -176,10 +235,10 @@ private static XContentBuilder createUpdateTestMapping(boolean changeDim, boolea
.startObject(sameStarTree ? "startree-1" : "startree-2")
.field("type", "star_tree")
.startObject("config")
.startArray("ordered_dimensions")
.startObject()
.startObject("date_dimension")
.field("name", "timestamp")
.endObject()
.startArray("ordered_dimensions")
.startObject()
.field("name", changeDim ? "numeric_new" : getDim(false, false))
.endObject()
Expand Down Expand Up @@ -258,11 +317,101 @@ public void testValidCompositeIndex() {
assertEquals("timestamp", starTreeFieldType.getDimensions().get(0).getField());
assertTrue(starTreeFieldType.getDimensions().get(0) instanceof DateDimension);
DateDimension dateDim = (DateDimension) starTreeFieldType.getDimensions().get(0);
List<Rounding.DateTimeUnit> expectedTimeUnits = Arrays.asList(
Rounding.DateTimeUnit.MINUTES_OF_HOUR,
Rounding.DateTimeUnit.HOUR_OF_DAY
List<DateTimeUnitRounding> expectedTimeUnits = Arrays.asList(
new DateTimeUnitAdapter(Rounding.DateTimeUnit.MINUTES_OF_HOUR),
ExtendedDateTimeUnit.HALF_HOUR_OF_DAY
);
assertEquals(expectedTimeUnits, dateDim.getIntervals());
for (int i = 0; i < dateDim.getSortedCalendarIntervals().size(); i++) {
assertEquals(expectedTimeUnits.get(i).shortName(), dateDim.getSortedCalendarIntervals().get(i).shortName());
}
assertEquals("numeric_dv", starTreeFieldType.getDimensions().get(1).getField());
assertEquals(2, starTreeFieldType.getMetrics().size());
assertEquals("numeric_dv", starTreeFieldType.getMetrics().get(0).getField());
List<MetricStat> expectedMetrics = Arrays.asList(
MetricStat.VALUE_COUNT,
MetricStat.SUM,
MetricStat.AVG
);
assertEquals(expectedMetrics, starTreeFieldType.getMetrics().get(0).getMetrics());

assertEquals("_doc_count", starTreeFieldType.getMetrics().get(1).getField());
assertEquals(List.of(MetricStat.DOC_COUNT), starTreeFieldType.getMetrics().get(1).getMetrics());

assertEquals(10000, starTreeFieldType.getStarTreeConfig().maxLeafDocs());
assertEquals(
StarTreeFieldConfiguration.StarTreeBuildMode.OFF_HEAP,
starTreeFieldType.getStarTreeConfig().getBuildMode()
);
assertEquals(Collections.emptySet(), starTreeFieldType.getStarTreeConfig().getSkipStarNodeCreationInDims());
}
}
}
}

public void testValidCompositeIndexWithDates() {
prepareCreate(TEST_INDEX).setMapping(createDateTestMapping(false)).get();
Iterable<IndicesService> dataNodeInstances = internalCluster().getDataNodeInstances(IndicesService.class);
for (IndicesService service : dataNodeInstances) {
final Index index = resolveIndex("test");
if (service.hasIndex(index)) {
IndexService indexService = service.indexService(index);
Set<CompositeMappedFieldType> fts = indexService.mapperService().getCompositeFieldTypes();

for (CompositeMappedFieldType ft : fts) {
assertTrue(ft instanceof StarTreeMapper.StarTreeFieldType);
StarTreeMapper.StarTreeFieldType starTreeFieldType = (StarTreeMapper.StarTreeFieldType) ft;
assertEquals("timestamp", starTreeFieldType.getDimensions().get(0).getField());
assertTrue(starTreeFieldType.getDimensions().get(0) instanceof DateDimension);
DateDimension dateDim = (DateDimension) starTreeFieldType.getDimensions().get(0);
List<DateTimeUnitRounding> expectedTimeUnits = Arrays.asList(
ExtendedDateTimeUnit.QUARTER_HOUR_OF_DAY,
ExtendedDateTimeUnit.HALF_HOUR_OF_DAY,
new DateTimeUnitAdapter(Rounding.DateTimeUnit.DAY_OF_MONTH)
);
for (int i = 0; i < dateDim.getIntervals().size(); i++) {
assertEquals(expectedTimeUnits.get(i).shortName(), dateDim.getSortedCalendarIntervals().get(i).shortName());
}
assertEquals("numeric_dv", starTreeFieldType.getDimensions().get(1).getField());
assertEquals("numeric_dv", starTreeFieldType.getMetrics().get(0).getField());
List<MetricStat> expectedMetrics = Arrays.asList(
MetricStat.VALUE_COUNT,
MetricStat.SUM,
MetricStat.AVG
);
assertEquals(expectedMetrics, starTreeFieldType.getMetrics().get(0).getMetrics());
assertEquals(10000, starTreeFieldType.getStarTreeConfig().maxLeafDocs());
assertEquals(
StarTreeFieldConfiguration.StarTreeBuildMode.OFF_HEAP,
starTreeFieldType.getStarTreeConfig().getBuildMode()
);
assertEquals(Collections.emptySet(), starTreeFieldType.getStarTreeConfig().getSkipStarNodeCreationInDims());
}
}
}
}

public void testValidCompositeIndexWithDuplicateDates() {
prepareCreate(TEST_INDEX).setMapping(createDateTestMapping(true)).get();
Iterable<IndicesService> dataNodeInstances = internalCluster().getDataNodeInstances(IndicesService.class);
for (IndicesService service : dataNodeInstances) {
final Index index = resolveIndex("test");
if (service.hasIndex(index)) {
IndexService indexService = service.indexService(index);
Set<CompositeMappedFieldType> fts = indexService.mapperService().getCompositeFieldTypes();

for (CompositeMappedFieldType ft : fts) {
assertTrue(ft instanceof StarTreeMapper.StarTreeFieldType);
StarTreeMapper.StarTreeFieldType starTreeFieldType = (StarTreeMapper.StarTreeFieldType) ft;
assertEquals("timestamp", starTreeFieldType.getDimensions().get(0).getField());
assertTrue(starTreeFieldType.getDimensions().get(0) instanceof DateDimension);
DateDimension dateDim = (DateDimension) starTreeFieldType.getDimensions().get(0);
List<DateTimeUnitRounding> expectedTimeUnits = Arrays.asList(
ExtendedDateTimeUnit.QUARTER_HOUR_OF_DAY,
new DateTimeUnitAdapter(Rounding.DateTimeUnit.DAY_OF_MONTH)
);
for (int i = 0; i < dateDim.getIntervals().size(); i++) {
assertEquals(expectedTimeUnits.get(i).shortName(), dateDim.getSortedCalendarIntervals().get(i).shortName());
}
assertEquals("numeric_dv", starTreeFieldType.getDimensions().get(1).getField());
assertEquals("numeric_dv", starTreeFieldType.getMetrics().get(0).getField());

Expand Down Expand Up @@ -291,7 +440,7 @@ public void testUpdateIndexWithAdditionOfStarTree() {
}

public void testUpdateIndexWithNewerStarTree() {
prepareCreate(TEST_INDEX).setMapping(createTestMappingWithoutStarTree(false, false, false)).get();
prepareCreate(TEST_INDEX).setMapping(createTestMappingWithoutStarTree()).get();

IllegalArgumentException ex = expectThrows(
IllegalArgumentException.class,
Expand Down Expand Up @@ -338,11 +487,14 @@ public void testUpdateIndexWhenMappingIsSame() {
assertEquals("timestamp", starTreeFieldType.getDimensions().get(0).getField());
assertTrue(starTreeFieldType.getDimensions().get(0) instanceof DateDimension);
DateDimension dateDim = (DateDimension) starTreeFieldType.getDimensions().get(0);
List<Rounding.DateTimeUnit> expectedTimeUnits = Arrays.asList(
Rounding.DateTimeUnit.MINUTES_OF_HOUR,
Rounding.DateTimeUnit.HOUR_OF_DAY
List<DateTimeUnitRounding> expectedTimeUnits = Arrays.asList(
new DateTimeUnitAdapter(Rounding.DateTimeUnit.MINUTES_OF_HOUR),
ExtendedDateTimeUnit.HALF_HOUR_OF_DAY
);
assertEquals(expectedTimeUnits, dateDim.getIntervals());
for (int i = 0; i < expectedTimeUnits.size(); i++) {
assertEquals(expectedTimeUnits.get(i).shortName(), dateDim.getIntervals().get(i).shortName());
}

assertEquals("numeric_dv", starTreeFieldType.getDimensions().get(1).getField());
assertEquals("numeric_dv", starTreeFieldType.getMetrics().get(0).getField());

Expand Down Expand Up @@ -375,6 +527,7 @@ public void testMaxDimsCompositeIndex() {
MapperParsingException ex = expectThrows(
MapperParsingException.class,
() -> prepareCreate(TEST_INDEX).setMapping(createMaxDimTestMapping())
// Date dimension is considered as one dimension regardless of number of actual calendar intervals
.setSettings(Settings.builder().put(StarTreeIndexSettings.STAR_TREE_MAX_DIMENSIONS_SETTING.getKey(), 2))
.get()
);
Expand Down
18 changes: 9 additions & 9 deletions server/src/main/java/org/opensearch/common/Rounding.java
Original file line number Diff line number Diff line change
Expand Up @@ -95,7 +95,7 @@ public enum DateTimeUnit {
WEEK_OF_WEEKYEAR((byte) 1, "week", IsoFields.WEEK_OF_WEEK_BASED_YEAR, true, TimeUnit.DAYS.toMillis(7)) {
private final long extraLocalOffsetLookup = TimeUnit.DAYS.toMillis(7);

long roundFloor(long utcMillis) {
public long roundFloor(long utcMillis) {
return DateUtils.roundWeekOfWeekYear(utcMillis);
}

Expand All @@ -107,7 +107,7 @@ long extraLocalOffsetLookup() {
YEAR_OF_CENTURY((byte) 2, "year", ChronoField.YEAR_OF_ERA, false, 12) {
private final long extraLocalOffsetLookup = TimeUnit.DAYS.toMillis(366);

long roundFloor(long utcMillis) {
public long roundFloor(long utcMillis) {
return DateUtils.roundYear(utcMillis);
}

Expand All @@ -118,7 +118,7 @@ long extraLocalOffsetLookup() {
QUARTER_OF_YEAR((byte) 3, "quarter", IsoFields.QUARTER_OF_YEAR, false, 3) {
private final long extraLocalOffsetLookup = TimeUnit.DAYS.toMillis(92);

long roundFloor(long utcMillis) {
public long roundFloor(long utcMillis) {
return DateUtils.roundQuarterOfYear(utcMillis);
}

Expand All @@ -129,7 +129,7 @@ long extraLocalOffsetLookup() {
MONTH_OF_YEAR((byte) 4, "month", ChronoField.MONTH_OF_YEAR, false, 1) {
private final long extraLocalOffsetLookup = TimeUnit.DAYS.toMillis(31);

long roundFloor(long utcMillis) {
public long roundFloor(long utcMillis) {
return DateUtils.roundMonthOfYear(utcMillis);
}

Expand All @@ -138,7 +138,7 @@ long extraLocalOffsetLookup() {
}
},
DAY_OF_MONTH((byte) 5, "day", ChronoField.DAY_OF_MONTH, true, ChronoField.DAY_OF_MONTH.getBaseUnit().getDuration().toMillis()) {
long roundFloor(long utcMillis) {
public long roundFloor(long utcMillis) {
return DateUtils.roundFloor(utcMillis, this.ratio);
}

Expand All @@ -147,7 +147,7 @@ long extraLocalOffsetLookup() {
}
},
HOUR_OF_DAY((byte) 6, "hour", ChronoField.HOUR_OF_DAY, true, ChronoField.HOUR_OF_DAY.getBaseUnit().getDuration().toMillis()) {
long roundFloor(long utcMillis) {
public long roundFloor(long utcMillis) {
return DateUtils.roundFloor(utcMillis, ratio);
}

Expand All @@ -162,7 +162,7 @@ long extraLocalOffsetLookup() {
true,
ChronoField.MINUTE_OF_HOUR.getBaseUnit().getDuration().toMillis()
) {
long roundFloor(long utcMillis) {
public long roundFloor(long utcMillis) {
return DateUtils.roundFloor(utcMillis, ratio);
}

Expand All @@ -177,7 +177,7 @@ long extraLocalOffsetLookup() {
true,
ChronoField.SECOND_OF_MINUTE.getBaseUnit().getDuration().toMillis()
) {
long roundFloor(long utcMillis) {
public long roundFloor(long utcMillis) {
return DateUtils.roundFloor(utcMillis, ratio);
}

Expand Down Expand Up @@ -210,7 +210,7 @@ public long extraLocalOffsetLookup() {
* @param utcMillis the milliseconds since the epoch
* @return the rounded down milliseconds since the epoch
*/
abstract long roundFloor(long utcMillis);
public abstract long roundFloor(long utcMillis);

/**
* When looking up {@link LocalTimeOffset} go this many milliseconds
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@
package org.opensearch.index.codec.composite;

import org.opensearch.common.annotation.ExperimentalApi;
import org.opensearch.index.compositeindex.datacube.startree.index.CompositeIndexValues;

import java.io.IOException;
import java.util.List;
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,35 @@
/*
* SPDX-License-Identifier: Apache-2.0
*
* The OpenSearch Contributors require contributions made to
* this file be licensed under the Apache-2.0 license or a
* compatible open source license.
*/

package org.opensearch.index.codec.composite;

import org.apache.lucene.codecs.DocValuesProducer;

/**
* An interface that provides access to document values for a specific field.
*
* @opensearch.experimental
*/
public interface DocValuesProvider {

// /**
// * Returns the sorted numeric document values for the specified field.
// *
// * @param fieldName The name of the field for which to retrieve the sorted numeric document values.
// * @return The sorted numeric document values for the specified field.
// * @throws IOException If an error occurs while retrieving the sorted numeric document values.
// */
// SortedNumericDocValues getSortedNumeric(String fieldName) throws IOException;

/**
* Returns the DocValuesProducer instance.
*
* @return The DocValuesProducer instance.
*/
DocValuesProducer getDocValuesProducer();
}
Loading
Loading