Skip to content

Commit

Permalink
Merge pull request #66 from jraymakers/jray/refactor-3
Browse files Browse the repository at this point in the history
additional refactor work (3rd)
  • Loading branch information
hannes authored Mar 25, 2024
2 parents bbf8431 + 137583f commit a9b167f
Show file tree
Hide file tree
Showing 9 changed files with 523 additions and 26 deletions.
2 changes: 1 addition & 1 deletion api/src/DuckDBLogicalType.ts
Original file line number Diff line number Diff line change
Expand Up @@ -198,7 +198,7 @@ export class DuckDBEnumLogicalType extends DuckDBLogicalType {
return ddb.duckdb_enum_internal_type(this.logical_type) as unknown as DuckDBTypeId;
}
public override asType(): DuckDBEnumType {
return new DuckDBEnumType(this.values());
return new DuckDBEnumType(this.values(), this.internalTypeId);
}
}

Expand Down
4 changes: 3 additions & 1 deletion api/src/DuckDBType.ts
Original file line number Diff line number Diff line change
Expand Up @@ -176,9 +176,11 @@ export class DuckDBTimestampNanosecondsType extends BaseDuckDBType {

export class DuckDBEnumType extends BaseDuckDBType {
public readonly values: readonly string[];
public constructor(values: readonly string[]) {
public readonly internalTypeId: DuckDBTypeId;
public constructor(values: readonly string[], internalTypeId: DuckDBTypeId) {
super(DuckDBTypeId.ENUM);
this.values = values;
this.internalTypeId = internalTypeId;
}
}

Expand Down
280 changes: 272 additions & 8 deletions api/src/DuckDBVector.ts
Original file line number Diff line number Diff line change
Expand Up @@ -3,11 +3,13 @@ import * as ddb from '../..';
import { DuckDBLogicalType } from './DuckDBLogicalType';
import {
DuckDBBigIntType,
DuckDBBitType,
DuckDBBlobType,
DuckDBBooleanType,
DuckDBDateType,
DuckDBDecimalType,
DuckDBDoubleType,
DuckDBEnumType,
DuckDBFloatType,
DuckDBHugeIntType,
DuckDBIntegerType,
Expand All @@ -17,6 +19,7 @@ import {
DuckDBSmallIntType,
DuckDBStructEntryType,
DuckDBStructType,
DuckDBTimeTZType,
DuckDBTimeType,
DuckDBTimestampMillisecondsType,
DuckDBTimestampNanosecondsType,
Expand Down Expand Up @@ -316,8 +319,21 @@ export abstract class DuckDBVector<T> {
return DuckDBTimestampMillisecondsVector.fromRawVector(vector, itemCount);
case DuckDBTypeId.TIMESTAMP_NS:
return DuckDBTimestampNanosecondsVector.fromRawVector(vector, itemCount);
case DuckDBTypeId.ENUM: // variable: Uint8, Uint16, Uint32, Uint64
throw new Error('not yet implemented');
case DuckDBTypeId.ENUM:
if (vectorType instanceof DuckDBEnumType) {
const { internalTypeId } = vectorType;
switch (internalTypeId) {
case DuckDBTypeId.UTINYINT:
return DuckDBEnum1Vector.fromRawVector(vectorType, vector, itemCount);
case DuckDBTypeId.USMALLINT:
return DuckDBEnum2Vector.fromRawVector(vectorType, vector, itemCount);
case DuckDBTypeId.UINTEGER:
return DuckDBEnum4Vector.fromRawVector(vectorType, vector, itemCount);
default:
throw new Error(`unsupported ENUM internal type: ${internalTypeId}`);
}
}
throw new Error('DuckDBType has ENUM type id but is not an instance of DuckDBEnumType');
case DuckDBTypeId.LIST:
if (vectorType instanceof DuckDBListType) {
return DuckDBListVector.fromRawVector(vectorType, vector, itemCount);
Expand All @@ -340,10 +356,10 @@ export abstract class DuckDBVector<T> {
return DuckDBUnionVector.fromRawVector(vectorType, vector, itemCount);
}
throw new Error('DuckDBType has UNION type id but is not an instance of DuckDBUnionType');
case DuckDBTypeId.BIT: // binary
throw new Error('not yet implemented');
case DuckDBTypeId.BIT:
return DuckDBBitVector.fromRawVector(vector, itemCount);
case DuckDBTypeId.TIME_TZ:
throw new Error('not yet implemented');
return DuckDBTimeTZVector.fromRawVector(vector, itemCount);
case DuckDBTypeId.TIMESTAMP_TZ:
return DuckDBTimestampVector.fromRawVector(vector, itemCount);
default:
Expand Down Expand Up @@ -1187,7 +1203,95 @@ export class DuckDBTimestampNanosecondsVector extends DuckDBVector<bigint> {
}
}

// TODO: ENUM
export class DuckDBEnum1Vector extends DuckDBVector<string> {
private readonly enumType: DuckDBEnumType;
private readonly items: Uint8Array;
private readonly validity: DuckDBValidity;
constructor(enumType: DuckDBEnumType, items: Uint8Array, validity: DuckDBValidity) {
super();
this.enumType = enumType;
this.items = items
this.validity = validity;
}
static fromRawVector(enumType: DuckDBEnumType, vector: ddb.duckdb_vector, itemCount: number): DuckDBEnum1Vector {
const data = vectorData(vector, itemCount);
const items = new Uint8Array(data.buffer, data.byteOffset, itemCount);
const validity = DuckDBValidity.fromVector(vector, itemCount);
return new DuckDBEnum1Vector(enumType, items, validity);
}
public override get type(): DuckDBEnumType {
return this.enumType;
}
public override get itemCount(): number {
return this.items.length;
}
public override getItem(itemIndex: number): string | null {
return this.validity.itemValid(itemIndex) ? this.enumType.values[this.items[itemIndex]] : null;
}
public override slice(offset: number, length: number): DuckDBEnum1Vector {
return new DuckDBEnum1Vector(this.enumType, this.items.slice(offset, offset + length), this.validity.slice(offset));
}
}

export class DuckDBEnum2Vector extends DuckDBVector<string> {
private readonly enumType: DuckDBEnumType;
private readonly items: Uint16Array;
private readonly validity: DuckDBValidity;
constructor(enumType: DuckDBEnumType, items: Uint16Array, validity: DuckDBValidity) {
super();
this.enumType = enumType;
this.items = items
this.validity = validity;
}
static fromRawVector(enumType: DuckDBEnumType, vector: ddb.duckdb_vector, itemCount: number): DuckDBEnum2Vector {
const data = vectorData(vector, itemCount * 2);
const items = new Uint16Array(data.buffer, data.byteOffset, itemCount);
const validity = DuckDBValidity.fromVector(vector, itemCount);
return new DuckDBEnum2Vector(enumType, items, validity);
}
public override get type(): DuckDBEnumType {
return this.enumType;
}
public override get itemCount(): number {
return this.items.length;
}
public override getItem(itemIndex: number): string | null {
return this.validity.itemValid(itemIndex) ? this.enumType.values[this.items[itemIndex]] : null;
}
public override slice(offset: number, length: number): DuckDBEnum2Vector {
return new DuckDBEnum2Vector(this.enumType, this.items.slice(offset, offset + length), this.validity.slice(offset));
}
}

export class DuckDBEnum4Vector extends DuckDBVector<string> {
private readonly enumType: DuckDBEnumType;
private readonly items: Uint32Array;
private readonly validity: DuckDBValidity;
constructor(enumType: DuckDBEnumType, items: Uint32Array, validity: DuckDBValidity) {
super();
this.enumType = enumType;
this.items = items
this.validity = validity;
}
static fromRawVector(enumType: DuckDBEnumType, vector: ddb.duckdb_vector, itemCount: number): DuckDBEnum4Vector {
const data = vectorData(vector, itemCount * 4);
const items = new Uint32Array(data.buffer, data.byteOffset, itemCount);
const validity = DuckDBValidity.fromVector(vector, itemCount);
return new DuckDBEnum4Vector(enumType, items, validity);
}
public override get type(): DuckDBEnumType {
return this.enumType;
}
public override get itemCount(): number {
return this.items.length;
}
public override getItem(itemIndex: number): string | null {
return this.validity.itemValid(itemIndex) ? this.enumType.values[this.items[itemIndex]] : null;
}
public override slice(offset: number, length: number): DuckDBEnum4Vector {
return new DuckDBEnum4Vector(this.enumType, this.items.slice(offset, offset + length), this.validity.slice(offset));
}
}

export class DuckDBListVector<TValue = any> extends DuckDBVector<DuckDBVector<TValue>> {
private readonly listType: DuckDBListType;
Expand Down Expand Up @@ -1449,6 +1553,166 @@ export class DuckDBUnionVector extends DuckDBVector<DuckDBUnionAlternative> {
}
}

// TODO: BIT
export class DuckDBBitValue {
private readonly data: Uint8Array;

constructor(data: Uint8Array) {
this.data = data;
}

public static fromString(str: string): DuckDBBitValue {
if (!/^[01]*$/.test(str)) {
throw new Error(`input string must only contain '0's and '1's`);
}

const byteCount = Math.ceil(str.length / 8) + 1;
const paddingBitCount = (8 - (str.length % 8)) % 8;

const data = new Uint8Array(byteCount);
let byteIndex = 0;

// first byte contains count of padding bits
data[byteIndex++] = paddingBitCount;

let byte = 0;
let bitIndex = 0;

// padding consists of 1s in MSB of second byte
while (bitIndex < paddingBitCount) {
byte <<= 1;
byte |= 1;
bitIndex++;
}

let charIndex = 0;

while (byteIndex < byteCount) {
while (bitIndex < 8) {
byte <<= 1;
if (str[charIndex++] === '1') {
byte |= 1;
}
bitIndex++;
}
data[byteIndex++] = byte;
byte = 0;
bitIndex = 0;
}

return new DuckDBBitValue(data);
}

private padding(): number {
return this.data[0];
}

public length(): number {
return (this.data.length - 1) * 8 - this.padding();
}

public getBool(index: number): boolean {
const dataIndex = Math.floor(index / 8) + 1;
return (this.data[dataIndex] & (1 << (index % 8))) !== 0;
}

public getBit(index: number): 0 | 1 {
return this.getBool(index) ? 1 : 0;
}

public toString(): string {
const chars = Array.from<string>({ length: this.length() });
for (let i = 0; i < this.length(); i++) {
chars[i] = this.getBool(i) ? '1' : '0';
}
return chars.join('');
}
}

export class DuckDBBitVector extends DuckDBVector<DuckDBBitValue> {
private readonly dataView: DataView;
private readonly validity: DuckDBValidity;
private readonly _itemCount: number;
constructor(dataView: DataView, validity: DuckDBValidity, itemCount: number) {
super();
this.dataView = dataView;
this.validity = validity;
this._itemCount = itemCount;
}
static fromRawVector(vector: ddb.duckdb_vector, itemCount: number): DuckDBBitVector {
const data = vectorData(vector, itemCount * 16);
const dataView = new DataView(data.buffer, data.byteOffset, data.byteLength);
const validity = DuckDBValidity.fromVector(vector, itemCount);
return new DuckDBBitVector(dataView, validity, itemCount);
}
public override get type(): DuckDBBitType {
return DuckDBBitType.instance;
}
public override get itemCount(): number {
return this._itemCount;
}
public override getItem(itemIndex: number): DuckDBBitValue | null {
if (!this.validity.itemValid(itemIndex)) {
return null;
}
const bytes = getStringBytes(this.dataView, itemIndex * 16);
return bytes ? new DuckDBBitValue(bytes) : null;
}
public override slice(offset: number, length: number): DuckDBBitVector {
return new DuckDBBitVector(
new DataView(this.dataView.buffer, offset * 16, length * 16),
this.validity.slice(offset),
length,
);
}
}

export class DuckDBTimeTZValue {
/** Ranges from 0 to 86400000000 (= 24 * 60 * 60 * 1000 * 1000) */
public readonly microseconds: number;

// TODO: TIME_TZ
/** In seconds, ranges from -57599 to 57599 (= 16 * 60 * 60 - 1) */
public readonly offset: number;

public constructor(microseconds: number, offset: number) {
this.microseconds = microseconds;
this.offset = offset;
}

public static TIME_BITS = 40;
public static OFFSET_BITS = 24;
public static MAX_OFFSET = 16 * 60 * 60 - 1; // ±15:59:59 = 57599 seconds

public static fromBits(bits: bigint): DuckDBTimeTZValue {
const microseconds = Number(BigInt.asUintN(DuckDBTimeTZValue.TIME_BITS, bits >> BigInt(DuckDBTimeTZValue.OFFSET_BITS)));
const offset = DuckDBTimeTZValue.MAX_OFFSET - Number(BigInt.asUintN(DuckDBTimeTZValue.OFFSET_BITS, bits));
return new DuckDBTimeTZValue(microseconds, offset);
}
}

export class DuckDBTimeTZVector extends DuckDBVector<DuckDBTimeTZValue> {
private readonly items: BigUint64Array;
private readonly validity: DuckDBValidity;
constructor(items: BigUint64Array, validity: DuckDBValidity) {
super();
this.items = items;
this.validity = validity;
}
static fromRawVector(vector: ddb.duckdb_vector, itemCount: number): DuckDBTimeTZVector {
const data = vectorData(vector, itemCount * BigUint64Array.BYTES_PER_ELEMENT);
const items = new BigUint64Array(data.buffer, data.byteOffset, itemCount);
const validity = DuckDBValidity.fromVector(vector, itemCount);
return new DuckDBTimeTZVector(items, validity);
}
public override get type(): DuckDBTimeTZType {
return DuckDBTimeTZType.instance;
}
public override get itemCount(): number {
return this.items.length;
}
public override getItem(itemIndex: number): DuckDBTimeTZValue | null {
return this.validity.itemValid(itemIndex) ? DuckDBTimeTZValue.fromBits(this.items[itemIndex]) : null;
}
public override slice(offset: number, length: number): DuckDBTimeTZVector {
return new DuckDBTimeTZVector(this.items.slice(offset, offset + length), this.validity.slice(offset));
}
}
Loading

0 comments on commit a9b167f

Please sign in to comment.