Skip to content

Commit

Permalink
[arrow] Add static element field vector converter (#4033)
Browse files Browse the repository at this point in the history
  • Loading branch information
leaves12138 authored Aug 22, 2024
1 parent 88f7c71 commit 618b5f9
Show file tree
Hide file tree
Showing 3 changed files with 174 additions and 0 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@
import org.apache.paimon.arrow.writer.ArrowFieldWriterFactoryVisitor;
import org.apache.paimon.data.Timestamp;
import org.apache.paimon.types.ArrayType;
import org.apache.paimon.types.DataField;
import org.apache.paimon.types.DataType;
import org.apache.paimon.types.MapType;
import org.apache.paimon.types.RowType;
Expand Down Expand Up @@ -73,6 +74,14 @@ public static VectorSchemaRoot createVectorSchemaRoot(
return VectorSchemaRoot.create(new Schema(fields), allocator);
}

public static FieldVector createVector(
DataField dataField, BufferAllocator allocator, boolean allowUpperCase) {
return toArrowField(
allowUpperCase ? dataField.name() : dataField.name().toLowerCase(),
dataField.type())
.createVector(allocator);
}

private static Field toArrowField(String fieldName, DataType dataType) {
FieldType fieldType = dataType.accept(ArrowFieldTypeConversion.ARROW_FIELD_TYPE_VISITOR);
List<Field> children = null;
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,67 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

package org.apache.paimon.arrow.vector;

import org.apache.paimon.arrow.writer.ArrowFieldWriter;
import org.apache.paimon.arrow.writer.ArrowFieldWriterFactoryVisitor;
import org.apache.paimon.data.GenericRow;
import org.apache.paimon.types.DataField;

import org.apache.arrow.memory.BufferAllocator;
import org.apache.arrow.vector.FieldVector;

import static org.apache.paimon.arrow.ArrowUtils.createVector;

/** Convert a static value to a FieldVector. */
public class OneElementFieldVectorGenerator implements AutoCloseable {

private final GenericRow row;
private final FieldVector fieldVector;
private final ArrowFieldWriter writer;

private int pos = 0;

public OneElementFieldVectorGenerator(
BufferAllocator bufferAllocator, DataField dataField, Object value) {
fieldVector = createVector(dataField, bufferAllocator, false);
writer =
dataField
.type()
.accept(ArrowFieldWriterFactoryVisitor.INSTANCE)
.create(fieldVector);
this.row = new GenericRow(1);
row.setField(0, value);
}

FieldVector get(int rowCount) {
if (rowCount > pos) {
for (int i = pos; i < rowCount; i++) {
writer.write(i, row, 0);
}
pos = rowCount;
}
fieldVector.setValueCount(rowCount);
return fieldVector;
}

@Override
public void close() {
fieldVector.close();
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,98 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

package org.apache.paimon.arrow.vector;

import org.apache.paimon.arrow.reader.ArrowBatchReader;
import org.apache.paimon.data.BinaryString;
import org.apache.paimon.data.GenericRow;
import org.apache.paimon.data.InternalRow;
import org.apache.paimon.data.Timestamp;
import org.apache.paimon.types.DataField;
import org.apache.paimon.types.DataTypes;
import org.apache.paimon.types.RowType;

import org.apache.arrow.memory.RootAllocator;
import org.apache.arrow.vector.FieldVector;
import org.apache.arrow.vector.VectorSchemaRoot;
import org.assertj.core.api.Assertions;
import org.junit.jupiter.api.Test;

import java.util.Arrays;

/** Test for {@link OneElementFieldVectorGenerator}. */
public class OneElementFieldVectorGeneratorTest {

@Test
public void testFunction() {
try (RootAllocator rootAllocator = new RootAllocator()) {
DataField dataField = new DataField(0, "id", DataTypes.STRING());
GenericRow genericRow = new GenericRow(1);
Object value = BinaryString.fromString("aklsdfjaklfjasklfd");
genericRow.setField(0, value);
OneElementFieldVectorGenerator oneElementFieldVectorGenerator =
new OneElementFieldVectorGenerator(rootAllocator, dataField, value);
try (FieldVector fieldVector = oneElementFieldVectorGenerator.get(10000)) {
ArrowBatchReader reader =
new ArrowBatchReader(new RowType(Arrays.asList(dataField)));
Iterable<InternalRow> it =
reader.readBatch(new VectorSchemaRoot(Arrays.asList(fieldVector)));
it.forEach(
i ->
Assertions.assertThat(i.getString(0))
.isEqualTo(genericRow.getString(0)));
}
}

try (RootAllocator rootAllocator = new RootAllocator()) {
DataField dataField = new DataField(0, "id", DataTypes.INT());
GenericRow genericRow = new GenericRow(1);
Object value = 10086;
genericRow.setField(0, value);
try (OneElementFieldVectorGenerator oneElementFieldVectorGenerator =
new OneElementFieldVectorGenerator(rootAllocator, dataField, value)) {
FieldVector fieldVector = oneElementFieldVectorGenerator.get(10000);
ArrowBatchReader reader =
new ArrowBatchReader(new RowType(Arrays.asList(dataField)));
Iterable<InternalRow> it =
reader.readBatch(new VectorSchemaRoot(Arrays.asList(fieldVector)));
it.forEach(i -> Assertions.assertThat(i.getInt(0)).isEqualTo(genericRow.getInt(0)));
}
}

try (RootAllocator rootAllocator = new RootAllocator()) {
DataField dataField = new DataField(0, "id", DataTypes.TIMESTAMP(6));
GenericRow genericRow = new GenericRow(1);
Object value = Timestamp.fromEpochMillis(10086);
genericRow.setField(0, value);
OneElementFieldVectorGenerator oneElementFieldVectorGenerator =
new OneElementFieldVectorGenerator(rootAllocator, dataField, value);
try (FieldVector fieldVector = oneElementFieldVectorGenerator.get(100000)) {
Assertions.assertThat(fieldVector.getValueCount()).isEqualTo(100000);
ArrowBatchReader reader =
new ArrowBatchReader(new RowType(Arrays.asList(dataField)));
Iterable<InternalRow> it =
reader.readBatch(new VectorSchemaRoot(Arrays.asList(fieldVector)));
it.forEach(
i ->
Assertions.assertThat(i.getTimestamp(0, 6))
.isEqualTo(genericRow.getTimestamp(0, 6)));
}
}
}
}

0 comments on commit 618b5f9

Please sign in to comment.