Skip to content

Commit

Permalink
Fix enumarations for WKB
Browse files Browse the repository at this point in the history
  • Loading branch information
XanthosXanthopoulos committed Oct 18, 2024
1 parent 74ad17c commit 1e4d07e
Show file tree
Hide file tree
Showing 3 changed files with 20 additions and 5 deletions.
4 changes: 4 additions & 0 deletions libtiledbsoma/src/soma/soma_array.cc
Original file line number Diff line number Diff line change
Expand Up @@ -413,6 +413,7 @@ bool SOMAArray::_cast_column(
case TILEDB_CHAR:
case TILEDB_GEOM_WKB:
case TILEDB_GEOM_WKT:
case TILEDB_BLOB:
return _cast_column_aux<std::string>(schema, array, se);
case TILEDB_BOOL:
return _cast_column_aux<bool>(schema, array, se);
Expand Down Expand Up @@ -786,6 +787,9 @@ bool SOMAArray::_extend_enumeration(
case TILEDB_STRING_ASCII:
case TILEDB_STRING_UTF8:
case TILEDB_CHAR:
case TILEDB_BLOB:
case TILEDB_GEOM_WKB:
case TILEDB_GEOM_WKT:
return _extend_and_evolve_schema<std::string>(
value_schema, value_array, index_schema, index_array, se);
case TILEDB_INT8:
Expand Down
3 changes: 2 additions & 1 deletion libtiledbsoma/src/soma/soma_dataframe.cc
Original file line number Diff line number Diff line change
Expand Up @@ -108,7 +108,8 @@ void SOMADataFrame::update_dataframe_schema(
attr_name,
ArrowAdapter::to_tiledb_format(attr_type));

if (ArrowAdapter::arrow_is_string_type(attr_type.c_str())) {
if (ArrowAdapter::arrow_is_string_type(attr_type.c_str()) ||
ArrowAdapter::arrow_is_binary_type(attr_type.c_str())) {
attr.set_cell_val_num(TILEDB_VAR_NUM);
}

Expand Down
18 changes: 14 additions & 4 deletions libtiledbsoma/src/utils/arrow_adapter.cc
Original file line number Diff line number Diff line change
Expand Up @@ -971,7 +971,8 @@ ArraySchema ArrowAdapter::tiledb_schema_from_arrow_schema(
auto schild = index_column_schema->children[i];
auto col_name = schild->name;
if (strcmp(child->name, col_name) == 0) {
if (ArrowAdapter::arrow_is_string_type(child->format)) {
if (ArrowAdapter::arrow_is_string_type(child->format) ||
ArrowAdapter::arrow_is_binary_type(child->format)) {
type = TILEDB_STRING_ASCII;
}

Expand Down Expand Up @@ -1010,7 +1011,8 @@ ArraySchema ArrowAdapter::tiledb_schema_from_arrow_schema(
attr.set_nullable(true);
}

if (ArrowAdapter::arrow_is_string_type(child->format)) {
if (ArrowAdapter::arrow_is_string_type(child->format) ||
ArrowAdapter::arrow_is_binary_type(child->format)) {
attr.set_cell_val_num(TILEDB_VAR_NUM);
}

Expand All @@ -1021,7 +1023,8 @@ ArraySchema ArrowAdapter::tiledb_schema_from_arrow_schema(
*ctx,
child->name,
enmr_type,
ArrowAdapter::arrow_is_string_type(enmr_format) ?
ArrowAdapter::arrow_is_string_type(enmr_format) ||
ArrowAdapter::arrow_is_binary_type(enmr_format) ?
TILEDB_VAR_NUM :
1,
child->flags & ARROW_FLAG_DICTIONARY_ORDERED);
Expand Down Expand Up @@ -1297,7 +1300,8 @@ ArraySchema ArrowAdapter::tiledb_schema_from_arrow_schema(
*ctx,
child->name,
enmr_type,
ArrowAdapter::arrow_is_string_type(enmr_format) ?
ArrowAdapter::arrow_is_string_type(enmr_format) ||
ArrowAdapter::arrow_is_binary_type(enmr_format) ?
TILEDB_VAR_NUM :
1,
child->flags & ARROW_FLAG_DICTIONARY_ORDERED);
Expand Down Expand Up @@ -1700,6 +1704,12 @@ ArrowAdapter::to_arrow(std::shared_ptr<ColumnBuffer> column) {
dict_arr->buffers[1] = column->enum_offsets().data();
dict_arr->buffers[2] = column->enum_string().data();
dict_arr->length = dict_vec.size();
} else if (enmr->type() == TILEDB_GEOM_WKB) {
auto dict_vec = enmr->as_vector<uint8_t>();
column->convert_enumeration();
dict_arr->buffers[1] = column->enum_offsets().data();
dict_arr->buffers[2] = column->enum_string().data();
dict_arr->length = dict_vec.size();
} else {
auto [dict_data, dict_length] = _get_data_and_length(
*enmr, dict_arr->buffers[1]);
Expand Down

0 comments on commit 1e4d07e

Please sign in to comment.