Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Revert "Fix listIndexes bug with double quoted columnName in the index target from indexMetadata" #1823

Merged
merged 3 commits into from
Jan 13, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -307,10 +307,9 @@ private WithWarnings<OrderByCqlClause> resolveVectorSort(
}));
}

// see if Table has vector index on the target sort vector column
var optionalVectorIndex = apiTableDef.supportedIndexes().firstIndexFor(vectorSortIdentifier);

if (optionalVectorIndex.isEmpty()) {
// HACK - waiting for index support on the APiTableDef
var optionalIndexMetadata = findIndexMetadata(commandContext.schemaObject(), vectorSortColumn);
if (optionalIndexMetadata.isEmpty()) {
throw SortException.Code.CANNOT_VECTOR_SORT_NON_INDEXED_VECTOR_COLUMNS.get(
errVars(
commandContext.schemaObject(),
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@
public enum ApiIndexType {
// map, set, list
COLLECTION("collection"),
// Something on a scalar column, and non analysed text index
// Something on a scalar column, and non analsed text index
REGULAR("regular"),
// Not available yet
TEXT_ANALYSED("text-analysed"),
Expand Down Expand Up @@ -53,29 +53,31 @@ public static ApiIndexType fromTypeName(String typeName) {
static ApiIndexType fromCql(
ApiColumnDef apiColumnDef, CQLSAIIndex.IndexTarget indexTarget, IndexMetadata indexMetadata)
throws UnsupportedCqlIndexException {
// If cqlIndexType is values, and the column is a scalar
// then it is a regular index on primitive types
if (indexTarget.cqlIndexType() == CQLSAIIndex.CqlIndexType.VALUES
&& apiColumnDef.type().isPrimitive()) {

// If there is no function on the indexTarget, and the column is a scalar, then it is a regular
// index on
// an int, text, etc
if (indexTarget.indexFunction() == null && apiColumnDef.type().isPrimitive()) {
return REGULAR;
}

// if the target column is a vector, it can only be a vector index
// we will let building the index check the options.
// if the target column is a vector, it can only be a vector index, we will let building the
// index check the options.
// NOTE: check this before the container check, as a vector is a container
if (apiColumnDef.type().typeName() == ApiTypeName.VECTOR) {
return VECTOR;
}

// if the target column is a collection, it can only be a collection index
// we will let building collection index to check if the function is supported.
// if the target column is a collection, it can only be a collection index, we will let building
// the
// collection index to check if the function is supported.
if (apiColumnDef.type().isContainer()) {
return COLLECTION;
}

// we do not support text analysed indexes yet
throw new UnsupportedCqlIndexException(
"Unsupported index:%s on field: %s".formatted(indexMetadata.getName(), apiColumnDef.name()),
"Unsuported index:%s on field: %s".formatted(indexMetadata.getName(), apiColumnDef.name()),
indexMetadata);
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -186,6 +186,13 @@ protected ApiIndexDef create(
.formatted(ApiIndexType.REGULAR, apiIndexType));
}

// also, we should not have an index function
if (indexTarget.indexFunction() != null) {
throw new IllegalStateException(
"ApiRegularIndex factory does not support index functions, indexTarget.indexFunction: "
+ indexTarget.indexFunction());
}

return new ApiRegularIndex(
indexMetadata.getName(), indexTarget.targetColumn(), indexMetadata.getOptions());
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,7 @@ public class ApiTableDef {
private final ApiColumnDefContainer nonPKColumns;
private final ApiColumnDefContainer unsupportedColumns;

// split into two, so we do not accidentally reference an index we cannot use.
// split into two so we do not accidentally reference an index we cannot use.
private final ApiIndexDefContainer supportedIndexes;
private final ApiIndexDefContainer indexesIncludingUnsupported;

Expand Down Expand Up @@ -152,7 +152,7 @@ public ApiColumnDefContainer unsupportedColumns() {
}

/** Get all the indexes on this table that are supported by the API */
public ApiIndexDefContainer supportedIndexes() {
public ApiIndexDefContainer indexes() {
return supportedIndexes;
}

Expand All @@ -162,7 +162,7 @@ public ApiIndexDefContainer indexesIncludingUnsupported() {
}

/**
* Factory for creating a {@link ApiTableDef} from a users description sent in a command {@link
* Factory for creating a {@link ApiTableDef} from a users decription sent in a command {@link
* TableDefinitionDesc}.
*
* <p>Use the singleton {@link #FROM_TABLE_DESC_FACTORY} to create an instance.
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -268,6 +268,13 @@ protected ApiIndexDef create(
.formatted(ApiIndexType.VECTOR, apiIndexType));
}

// also, we must not have an index function
if (indexTarget.indexFunction() != null) {
throw new IllegalStateException(
"ApiVectorIndex factory must not have index function, indexMetadata.name: "
+ indexMetadata.getName());
}

var indexModelName = indexMetadata.getOptions().get(VectorConstants.CQLAnnIndex.SOURCE_MODEL);
var indexModelToUse = sourceModelFromName(indexModelName, indexMetadata);
if (LOGGER.isDebugEnabled()) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,6 @@
import java.util.Objects;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import org.apache.commons.lang3.StringUtils;

/** Shared methods / constants for CQL SAI indexes. */
public abstract class CQLSAIIndex {
Expand All @@ -37,7 +36,7 @@ interface Options {
private static final String SAI_CLASS_NAME_ENDS_WITH = "." + SAI_CLASS_NAME;

/**
* Checks if the indexMetadata describes an SAI index.
* Checks if the indexMetadata deacribes an SAI index.
*
* <p>We only support {@link IndexKind#CUSTOM} , these are from using CQL <code>
* CREATE CUSTOM INDEX
Expand Down Expand Up @@ -83,44 +82,38 @@ static boolean indexClassIsSai(String className) {
* options
* ------------------------------------------------
* {'class_name': 'StorageAttachedIndex', 'target': 'age'}
* {'class_name': 'StorageAttachedIndex', 'target': '"myVectorColumn"'}
* {'class_name': 'StorageAttachedIndex', 'target': 'full(frozen_list_column)'}
* {'class_name': 'StorageAttachedIndex', 'target': 'values(listcolumn)'}
* {'class_name': 'StorageAttachedIndex', 'target': 'entries(mapcolumn)'}
* {'class_name': 'StorageAttachedIndex', 'target': 'keys(mapcolumn)'}
* {'class_name': 'StorageAttachedIndex', 'target': 'values(mapcolumn)'}
* {'class_name': 'StorageAttachedIndex', 'target': 'values(setcolumn)'}
* {'class_name': 'StorageAttachedIndex', 'target': 'country'}
* {'class_name': 'StorageAttachedIndex', 'target': 'values(array_contains)'}
* {'class_name': 'StorageAttachedIndex', 'target': 'entries(array_size)'}
* {'class_name': 'StorageAttachedIndex', 'target': 'values(exist_keys)'}
* {'class_name': 'StorageAttachedIndex', 'target': 'entries(query_bool_values)'}
* {'class_name': 'StorageAttachedIndex', 'target': 'entries(query_dbl_values)'}
* {'class_name': 'StorageAttachedIndex', 'target': 'values(query_null_values)'}
* {'class_name': 'StorageAttachedIndex', 'target': 'entries(query_text_values)'}
* {'class_name': 'StorageAttachedIndex', 'target': 'entries(query_timestamp_values)'}
* {'class_name': 'StorageAttachedIndex', 'target': 'comment_vector'}
* {'class_name': 'StorageAttachedIndex', 'similarity_function': 'cosine', 'target': 'my_vector'}
* </pre>
*
* ------------------------------------------------
* Then it is clear for Regex TARGET_REGEX. Valid Matches are:
* keys(foo) -> keys(group1), foo(group2)
* entries(bar) -> entries(group1), bar(group2)
* values(some_column) -> values(group1), some_column(group2)
* full(indexed_field) -> full(group1), index_field(group2)
* values("capitalColumn") -> values(group1), "capitalColumn"(group2)
* age -> not match, so resolve as simple columnName
* "age" -> not match, so resolve as simple columnName
* The target can just be the name of the column, or the name of the column in parentheses
* prefixed by the index type for a map type: values(column), keys(column), and entries(column)
* see https://docs.datastax.com/en/cql/hcd-1.0/develop/indexing/sai/collections.html
*
* ------------------------------------------------
* If column is doubleQuoted in the original CQL index
* we need to unquote to get the real column name
* {'class_name': 'StorageAttachedIndex', 'target': '"myVectorColumn"'}
* {'class_name': 'StorageAttachedIndex', 'target': 'values("listWithCapitalLetters")'}
* <p>The Reg Exp below will match:
*
* </pre>
* <ul>
* <li>"monkeys": group 1 - "monkeys", group 2 - null
* <li>"values(monkeys)": group 1 - "values" group 2 - "monkeys"
* </ul>
*/
private static final Pattern TARGET_REGEX =
Pattern.compile("^(keys|entries|values|full)\\((.+)\\)$");

private static final Pattern TWO_QUOTES = Pattern.compile("\"\"");
private static final String QUOTE = "\"";
private static Pattern INDEX_TARGET_PATTERN = Pattern.compile("^(\\w+)?(?:\\((\\w+)\\))?$");

/**
* Parses the target from the IndexMetadata to extract the column name and the function if there
* is one.
*
* <p>Index functions are used for indexing collections, see {@link #TARGET_REGEX} for the format
* of the target
* <p>Index functions are used for indexing collections, see {@link #INDEX_TARGET_PATTERN} for the
* format of the target
*
* <p>
*
Expand All @@ -135,68 +128,29 @@ static IndexTarget indexTarget(IndexMetadata indexMetadata)
throws UnknownCqlIndexFunctionException, UnsupportedCqlIndexException {
Objects.requireNonNull(indexMetadata, "indexMetadata must not be null");

// if the regex matches then the target is in the form "keys(foo)", "entries(bar)" etc
// if not, then it must be a simple column name and implicitly its type is VALUES
var target = indexMetadata.getTarget();
Matcher matcher = TARGET_REGEX.matcher(target);
String columnName;
CqlIndexType cqlIndexType;
if (matcher.matches()) {
cqlIndexType = CqlIndexType.fromString(matcher.group(1));
columnName = matcher.group(2);
} else {
columnName = target;
cqlIndexType = CqlIndexType.VALUES;
var target = CQLSAIIndex.Options.TARGET.getWithDefault(indexMetadata.getOptions());
Matcher matcher = INDEX_TARGET_PATTERN.matcher(target);
if (!matcher.matches()) {
throw new UnsupportedCqlIndexException(
"Could not parse index target: '" + target + "'", indexMetadata);
}

// 1. In the case of a quoted column name the name in the target string
// will be enclosed in quotes, which we need to unwrap.
// 2. It may also include quote characters internally, escaped like so:
// abc"def -> abc""def, then we need to un-escape as abc"def -> abc"def
// to get the actual column name
if (columnName.startsWith(QUOTE)) {
columnName = StringUtils.substring(StringUtils.substring(columnName, 1), 0, -1);
columnName = TWO_QUOTES.matcher(columnName).replaceAll(QUOTE);
String columnName;
String functionName;
if (matcher.group(2) == null) {
columnName = matcher.group(1);
functionName = null;
} else {
columnName = matcher.group(2);
functionName = matcher.group(1);
}

return new IndexTarget(CqlIdentifier.fromInternal(columnName), cqlIndexType);
return functionName == null
? new IndexTarget(CqlIdentifier.fromInternal(columnName), null)
: new IndexTarget(
CqlIdentifier.fromInternal(columnName), ApiIndexFunction.fromCql(functionName));
}

/** For internal to this package use only */
public record IndexTarget(CqlIdentifier targetColumn, CqlIndexType cqlIndexType) {}

public enum CqlIndexType {
VALUES,
KEYS,
KEYS_AND_VALUES,
FULL,
SIMPLE;

public String toString() {
switch (this) {
case KEYS:
return "keys";
case KEYS_AND_VALUES:
return "entries";
case FULL:
return "full";
case VALUES:
return "values";
case SIMPLE:
return "";
default:
return "";
}
}

public static CqlIndexType fromString(String s) {
if ("".equals(s)) return SIMPLE;
else if ("values".equals(s)) return VALUES;
else if ("keys".equals(s)) return KEYS;
else if ("entries".equals(s)) return KEYS_AND_VALUES;
else if ("full".equals(s)) return FULL;

throw new AssertionError("Unrecognized index target type " + s);
}
}
protected record IndexTarget(CqlIdentifier targetColumn, ApiIndexFunction indexFunction) {}
}
Original file line number Diff line number Diff line change
Expand Up @@ -16,10 +16,9 @@ public abstract class IndexFactoryFromCql extends FactoryFromCql {
public ApiIndexDef create(ApiColumnDefContainer allColumns, IndexMetadata indexMetadata)
throws UnsupportedCqlIndexException {

// This first check is to see if there is anyway we can support this index, because we are only
// This first check is to see if there is anyway we can support this index, becase we are only
// supporting
// SAI indexes, later on we may find something that we could support but don't like a new type
// of
// SAI indexes, later on we may find something that we could support but dont like a new type of
// sai
if (!isSupported(indexMetadata)) {
return createUnsupported(indexMetadata);
Expand All @@ -34,7 +33,7 @@ public ApiIndexDef create(ApiColumnDefContainer allColumns, IndexMetadata indexM
if (apiColumnDef == null) {
// Cassandra should not let there be an index on a column that is not on the table
throw new IllegalStateException(
"Could not find target column index.name:%s target:%s"
"Could not find target column index.name:%s target: "
.formatted(indexMetadata.getName(), indexTarget.targetColumn()));
}
// will throw if we cannot work it out
Expand Down
Loading
Loading