Skip to content

Commit

Permalink
implement ENUM
Browse files Browse the repository at this point in the history
  • Loading branch information
jraymakers committed Mar 24, 2024
1 parent 583358d commit b1e03ab
Show file tree
Hide file tree
Showing 5 changed files with 214 additions and 12 deletions.
2 changes: 1 addition & 1 deletion api/src/DuckDBLogicalType.ts
Original file line number Diff line number Diff line change
Expand Up @@ -198,7 +198,7 @@ export class DuckDBEnumLogicalType extends DuckDBLogicalType {
return ddb.duckdb_enum_internal_type(this.logical_type) as unknown as DuckDBTypeId;
}
public override asType(): DuckDBEnumType {
return new DuckDBEnumType(this.values());
return new DuckDBEnumType(this.values(), this.internalTypeId);
}
}

Expand Down
4 changes: 3 additions & 1 deletion api/src/DuckDBType.ts
Original file line number Diff line number Diff line change
Expand Up @@ -176,9 +176,11 @@ export class DuckDBTimestampNanosecondsType extends BaseDuckDBType {

export class DuckDBEnumType extends BaseDuckDBType {
public readonly values: readonly string[];
public constructor(values: readonly string[]) {
public readonly internalTypeId: DuckDBTypeId;
public constructor(values: readonly string[], internalTypeId: DuckDBTypeId) {
super(DuckDBTypeId.ENUM);
this.values = values;
this.internalTypeId = internalTypeId;
}
}

Expand Down
108 changes: 105 additions & 3 deletions api/src/DuckDBVector.ts
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@ import {
DuckDBDateType,
DuckDBDecimalType,
DuckDBDoubleType,
DuckDBEnumType,
DuckDBFloatType,
DuckDBHugeIntType,
DuckDBIntegerType,
Expand Down Expand Up @@ -318,8 +319,21 @@ export abstract class DuckDBVector<T> {
return DuckDBTimestampMillisecondsVector.fromRawVector(vector, itemCount);
case DuckDBTypeId.TIMESTAMP_NS:
return DuckDBTimestampNanosecondsVector.fromRawVector(vector, itemCount);
case DuckDBTypeId.ENUM: // variable: Uint8, Uint16, Uint32, Uint64
throw new Error('not yet implemented');
case DuckDBTypeId.ENUM:
if (vectorType instanceof DuckDBEnumType) {
const { internalTypeId } = vectorType;
switch (internalTypeId) {
case DuckDBTypeId.UTINYINT:
return DuckDBEnum1Vector.fromRawVector(vectorType, vector, itemCount);
case DuckDBTypeId.USMALLINT:
return DuckDBEnum2Vector.fromRawVector(vectorType, vector, itemCount);
case DuckDBTypeId.UINTEGER:
return DuckDBEnum4Vector.fromRawVector(vectorType, vector, itemCount);
default:
throw new Error(`unsupported ENUM internal type: ${internalTypeId}`);
}
}
throw new Error('DuckDBType has ENUM type id but is not an instance of DuckDBEnumType');
case DuckDBTypeId.LIST:
if (vectorType instanceof DuckDBListType) {
return DuckDBListVector.fromRawVector(vectorType, vector, itemCount);
Expand Down Expand Up @@ -1189,7 +1203,95 @@ export class DuckDBTimestampNanosecondsVector extends DuckDBVector<bigint> {
}
}

// TODO: ENUM
export class DuckDBEnum1Vector extends DuckDBVector<string> {
private readonly enumType: DuckDBEnumType;
private readonly items: Uint8Array;
private readonly validity: DuckDBValidity;
constructor(enumType: DuckDBEnumType, items: Uint8Array, validity: DuckDBValidity) {
super();
this.enumType = enumType;
this.items = items
this.validity = validity;
}
static fromRawVector(enumType: DuckDBEnumType, vector: ddb.duckdb_vector, itemCount: number): DuckDBEnum1Vector {
const data = vectorData(vector, itemCount);
const items = new Uint8Array(data.buffer, data.byteOffset, itemCount);
const validity = DuckDBValidity.fromVector(vector, itemCount);
return new DuckDBEnum1Vector(enumType, items, validity);
}
public override get type(): DuckDBEnumType {
return this.enumType;
}
public override get itemCount(): number {
return this.items.length;
}
public override getItem(itemIndex: number): string | null {
return this.validity.itemValid(itemIndex) ? this.enumType.values[this.items[itemIndex]] : null;
}
public override slice(offset: number, length: number): DuckDBEnum1Vector {
return new DuckDBEnum1Vector(this.enumType, this.items.slice(offset, offset + length), this.validity.slice(offset));
}
}

export class DuckDBEnum2Vector extends DuckDBVector<string> {
private readonly enumType: DuckDBEnumType;
private readonly items: Uint16Array;
private readonly validity: DuckDBValidity;
constructor(enumType: DuckDBEnumType, items: Uint16Array, validity: DuckDBValidity) {
super();
this.enumType = enumType;
this.items = items
this.validity = validity;
}
static fromRawVector(enumType: DuckDBEnumType, vector: ddb.duckdb_vector, itemCount: number): DuckDBEnum2Vector {
const data = vectorData(vector, itemCount * 2);
const items = new Uint16Array(data.buffer, data.byteOffset, itemCount);
const validity = DuckDBValidity.fromVector(vector, itemCount);
return new DuckDBEnum2Vector(enumType, items, validity);
}
public override get type(): DuckDBEnumType {
return this.enumType;
}
public override get itemCount(): number {
return this.items.length;
}
public override getItem(itemIndex: number): string | null {
return this.validity.itemValid(itemIndex) ? this.enumType.values[this.items[itemIndex]] : null;
}
public override slice(offset: number, length: number): DuckDBEnum2Vector {
return new DuckDBEnum2Vector(this.enumType, this.items.slice(offset, offset + length), this.validity.slice(offset));
}
}

export class DuckDBEnum4Vector extends DuckDBVector<string> {
private readonly enumType: DuckDBEnumType;
private readonly items: Uint32Array;
private readonly validity: DuckDBValidity;
constructor(enumType: DuckDBEnumType, items: Uint32Array, validity: DuckDBValidity) {
super();
this.enumType = enumType;
this.items = items
this.validity = validity;
}
static fromRawVector(enumType: DuckDBEnumType, vector: ddb.duckdb_vector, itemCount: number): DuckDBEnum4Vector {
const data = vectorData(vector, itemCount * 4);
const items = new Uint32Array(data.buffer, data.byteOffset, itemCount);
const validity = DuckDBValidity.fromVector(vector, itemCount);
return new DuckDBEnum4Vector(enumType, items, validity);
}
public override get type(): DuckDBEnumType {
return this.enumType;
}
public override get itemCount(): number {
return this.items.length;
}
public override getItem(itemIndex: number): string | null {
return this.validity.itemValid(itemIndex) ? this.enumType.values[this.items[itemIndex]] : null;
}
public override slice(offset: number, length: number): DuckDBEnum4Vector {
return new DuckDBEnum4Vector(this.enumType, this.items.slice(offset, offset + length), this.validity.slice(offset));
}
}

export class DuckDBListVector<TValue = any> extends DuckDBVector<DuckDBVector<TValue>> {
private readonly listType: DuckDBListType;
Expand Down
33 changes: 26 additions & 7 deletions api/test/api.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,9 @@ import {
DuckDBDecimalType,
DuckDBDoubleType,
DuckDBDoubleVector,
DuckDBEnum1Vector,
DuckDBEnum2Vector,
DuckDBEnum4Vector,
DuckDBEnumType,
DuckDBFloatType,
DuckDBFloatVector,
Expand Down Expand Up @@ -62,6 +65,7 @@ import {
DuckDBTinyIntType,
DuckDBTinyIntVector,
DuckDBType,
DuckDBTypeId,
DuckDBUBigIntType,
DuckDBUBigIntVector,
DuckDBUHugeIntType,
Expand Down Expand Up @@ -429,8 +433,11 @@ describe('api', () => {
});
it('should support all data types', async () => {
await withConnection(async (connection) => {
const result = await connection.run('from test_all_types()');
const result = await connection.run('from test_all_types(use_large_enum=true)');
try {
const smallEnumValues = ['DUCK_DUCK_ENUM', 'GOOSE'];
const mediumEnumValues = Array.from({ length: 300 }).map((_, i) => `enum_${i}`);
const largeEnumValues = Array.from({ length: 70000 }).map((_, i) => `enum_${i}`);
assertColumns(result, [
{ name: 'bool', type: DuckDBBooleanType.instance },
{ name: 'tinyint', type: DuckDBTinyIntType.instance },
Expand Down Expand Up @@ -462,9 +469,9 @@ describe('api', () => {
{ name: 'varchar', type: DuckDBVarCharType.instance },
{ name: 'blob', type: DuckDBBlobType.instance },
{ name: 'bit', type: DuckDBBitType.instance },
{ name: 'small_enum', type: new DuckDBEnumType(['DUCK_DUCK_ENUM', 'GOOSE']) },
{ name: 'medium_enum', type: new DuckDBEnumType(Array.from({ length: 300 }).map((_, i) => `enum_${i}`)) },
{ name: 'large_enum', type: new DuckDBEnumType(['enum_0', 'enum_69999']) },
{ name: 'small_enum', type: new DuckDBEnumType(smallEnumValues, DuckDBTypeId.UTINYINT) },
{ name: 'medium_enum', type: new DuckDBEnumType(mediumEnumValues, DuckDBTypeId.USMALLINT) },
{ name: 'large_enum', type: new DuckDBEnumType(largeEnumValues, DuckDBTypeId.UINTEGER) },
{ name: 'int_array', type: new DuckDBListType(DuckDBIntegerType.instance) },
{ name: 'double_array', type: new DuckDBListType(DuckDBDoubleType.instance) },
{ name: 'date_array', type: new DuckDBListType(DuckDBDateType.instance) },
Expand Down Expand Up @@ -556,9 +563,21 @@ describe('api', () => {
DuckDBBitValue.fromString('10101'),
null,
]);
// TODO: ENUM (small)
// TODO: ENUM (medium)
// TODO: ENUM (large)
assertValues(chunk, 30, DuckDBEnum1Vector, [
smallEnumValues[0],
smallEnumValues[smallEnumValues.length - 1],
null,
]);
assertValues(chunk, 31, DuckDBEnum2Vector, [
mediumEnumValues[0],
mediumEnumValues[mediumEnumValues.length - 1],
null,
]);
assertValues(chunk, 32, DuckDBEnum4Vector, [
largeEnumValues[0],
largeEnumValues[largeEnumValues.length - 1],
null,
]);
assertNestedValues<DuckDBVector<number>, DuckDBListVector<number>>(chunk, 33, DuckDBListVector, [
(v, n) => assertVectorValues(v, [], n),
(v, n) => assertVectorValues(v, [42, 999, null, null, -42], n),
Expand Down
79 changes: 79 additions & 0 deletions test_enum.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,79 @@
import * as ddb from '.';

async function test() {
try {
const db = new ddb.duckdb_database;
const open_state = await ddb.duckdb_open(':memory:', db);
if (open_state != ddb.duckdb_state.DuckDBSuccess) {
throw new Error('Failed to open');
}
console.log('open successful');

const con = new ddb.duckdb_connection;
const connect_state = await ddb.duckdb_connect(db, con);
if (connect_state != ddb.duckdb_state.DuckDBSuccess) {
throw new Error('Failed to connect');
}
console.log('connect succesful');

// const sql = `from test_all_types() select small_enum`;
const sql = `from test_all_types() select medium_enum`;
// const sql = `from test_all_types() select large_enum`;

const select_result = new ddb.duckdb_result;
const select_query_state = await ddb.duckdb_query(con, sql, select_result);
if (select_query_state != ddb.duckdb_state.DuckDBSuccess) {
throw new Error('Failed to query: ' + ddb.duckdb_result_error(select_result));
}
console.log('select successful');

console.log('column count:', ddb.duckdb_column_count(select_result));
console.log('column 0 name:', ddb.duckdb_column_name(select_result, 0));
const rowCount = ddb.duckdb_row_count(select_result);
console.log('row count:', rowCount);

const chunk = ddb.duckdb_result_get_chunk(select_result, 0);
console.log('chunk column count:', ddb.duckdb_data_chunk_get_column_count(chunk));
const row_count = ddb.duckdb_data_chunk_get_size(chunk);
console.log('chunk size (row count):', row_count);

const col0_vec = ddb.duckdb_data_chunk_get_vector(chunk, 0);

const col0_log_type = ddb.duckdb_vector_get_column_type(col0_vec);
console.log('col 0 type id:', ddb.duckdb_get_type_id(col0_log_type)); // 24 = ENUM

const enum_int_type = ddb.duckdb_enum_internal_type(col0_log_type);
console.log('enum internal type id:', enum_int_type); // 6 = UTINYINT, 7 = USMALLINT
let bytes_per_value: number | undefined;
switch (enum_int_type) {
case 6: // UTINYINT
bytes_per_value = 1;
break;
case 7: // USMALLINT
bytes_per_value = 2;
break;
case 8: // UINTEGER
bytes_per_value = 4;
break;
}

const enum_dict_size = ddb.duckdb_enum_dictionary_size(col0_log_type); // number of possible values of enum
console.log('enum dict size:', enum_dict_size);
const enum_dict_value_0 = ddb.duckdb_enum_dictionary_value(col0_log_type, 0);
console.log('enum dict value 0:', enum_dict_value_0);

const col0_vec_data = ddb.duckdb_vector_get_data(col0_vec);
if (bytes_per_value) {
const col0_vec_buffer = ddb.copy_buffer(col0_vec_data, row_count * bytes_per_value);
console.log('buffer:', col0_vec_buffer);
if (col0_vec_buffer) {
const dataView = new DataView(col0_vec_buffer.buffer, col0_vec_buffer.byteOffset, col0_vec_buffer.byteLength);
}
}

} catch (e) {
console.error(e);
}
}

test();

0 comments on commit b1e03ab

Please sign in to comment.