Skip to content

Commit

Permalink
1
Browse files Browse the repository at this point in the history
  • Loading branch information
xinyiZzz committed Dec 11, 2024
1 parent e8adcd7 commit 02905c8
Show file tree
Hide file tree
Showing 11 changed files with 911 additions and 0 deletions.
35 changes: 35 additions & 0 deletions samples/arrow-flight-sql/java/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,35 @@
<!--
Licensed to the Apache Software Foundation (ASF) under one
or more contributor license agreements. See the NOTICE file
distributed with this work for additional information
regarding copyright ownership. The ASF licenses this file
to you under the Apache License, Version 2.0 (the
"License"); you may not use this file except in compliance
with the License. You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing,
software distributed under the License is distributed on an
"AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
KIND, either express or implied. See the License for the
specific language governing permissions and limitations
under the License.
-->

# How to use:

1. mvn clean install -U
2. mvn package
3. java --add-opens=java.base/java.nio=org.apache.arrow.memory.core,ALL-UNNAMED -cp java-0.1.jar doris.arrowflight.demo.Main "sql" "fe_ip" "fe_arrow_flight_port" "fe_query_port"

# What can this demo do:

This is a java demo for doris arrow flight sql, you can use this to test various connection
methods for sending queries to the doris arrow flight server, help you understand how to use arrow flight sql
and test performance. You should install maven prior to run this demo.

# Performance test

Section 6.2 of https://github.com/apache/doris/issues/25514 is the performance test
results of the Java Arrow Flight SQL query Doris.
168 changes: 168 additions & 0 deletions samples/arrow-flight-sql/java/pom.xml
Original file line number Diff line number Diff line change
@@ -0,0 +1,168 @@
<?xml version="1.0" encoding="UTF-8"?>
<project xmlns="http://maven.apache.org/POM/4.0.0"
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
<modelVersion>4.0.0</modelVersion>

<groupId>doris.arrowflight.demo</groupId>
<artifactId>java</artifactId>
<version>0.1</version>

<properties>
<maven.compiler.source>17</maven.compiler.source>
<maven.compiler.target>17</maven.compiler.target>
<project.build.sourceEncoding>UTF-8</project.build.sourceEncoding>
<!--
Notice !!! Arrow and ADBC versions cannot be combined arbitrarily.
1. For ADBC 0.15.0, please use Arrow 18.0.0 and after, not compatible with previous versions of Arrow.
2. Try not to use Arrow 17.0.0, may get the following error:
```
Caused by: java.lang.ArrayIndexOutOfBoundsException: Index 2 out of bounds for length 2
at com.google.protobuf.GeneratedMessageV3$FieldAccessorTable.ensureFieldAccessorsInitialized(GeneratedMessageV3.java:2094)
at org.apache.arrow.flight.sql.impl.FlightSql$ActionCreatePreparedStatementRequest.internalGetFieldAccessorTable(FlightSql.java:16332)
at com.google.protobuf.GeneratedMessageV3.getDescriptorForType(GeneratedMessageV3.java:139)
at com.google.protobuf.Any.pack(Any.java:61)
at org.apache.arrow.flight.sql.FlightSqlClient.prepare(FlightSqlClient.java:767)
at org.apache.arrow.flight.sql.FlightSqlClient.prepare(FlightSqlClient.java:746)
at org.apache.arrow.driver.jdbc.client.ArrowFlightSqlClientHandler.prepare(ArrowFlightSqlClientHandler.java:310)
```
similar issue: https://github.com/protocolbuffers/protobuf/issues/15762
3. A more stable version is Arrow 15.0.2 and ADBC 0.12.0, but we always hope to embrace the future with new versions!
-->
<arrow.version>18.1.0</arrow.version>
<adbc.version>0.15.0</adbc.version>
<log4j.version>2.17.1</log4j.version>
</properties>
<dependencies>
<!-- If Maven cannot find the Arrow or Adbc version of the dependency, update the Maven central repository.
Arrow and Adbc are updated frequently, and we always try to use the latest version as much as possible. -->
<dependency>
<groupId>org.apache.arrow.adbc</groupId>
<artifactId>adbc-driver-jdbc</artifactId>
<version>${adbc.version}</version>
</dependency>
<dependency>
<groupId>org.apache.arrow.adbc</groupId>
<artifactId>adbc-driver-flight-sql</artifactId>
<version>${adbc.version}</version>
</dependency>
<dependency>
<groupId>mysql</groupId>
<artifactId>mysql-connector-java</artifactId>
<version>8.0.33</version>
</dependency>
<dependency>
<groupId>org.apache.arrow</groupId>
<artifactId>arrow-memory-core</artifactId>
<version>${arrow.version}</version>
</dependency>
<dependency>
<groupId>org.apache.arrow</groupId>
<artifactId>arrow-memory-netty</artifactId>
<version>${arrow.version}</version>
</dependency>
<dependency>
<groupId>org.apache.arrow</groupId>
<artifactId>arrow-vector</artifactId>
<version>${arrow.version}</version>
</dependency>
<dependency>
<groupId>org.apache.arrow</groupId>
<artifactId>arrow-vector</artifactId>
<version>${arrow.version}</version>
</dependency>
<dependency>
<groupId>org.apache.arrow</groupId>
<artifactId>flight-core</artifactId>
<version>${arrow.version}</version>
</dependency>
<dependency>
<groupId>org.apache.arrow</groupId>
<artifactId>flight-sql</artifactId>
<version>${arrow.version}</version>
</dependency>

<dependency>
<groupId>org.apache.arrow.adbc</groupId>
<artifactId>adbc-core</artifactId>
<version>${adbc.version}</version>
</dependency>
<dependency>
<groupId>org.apache.arrow.adbc</groupId>
<artifactId>adbc-driver-manager</artifactId>
<version>${adbc.version}</version>
</dependency>
<dependency>
<groupId>org.apache.arrow.adbc</groupId>
<artifactId>adbc-sql</artifactId>
<version>${adbc.version}</version>
</dependency>
<dependency>
<groupId>org.apache.arrow</groupId>
<artifactId>flight-sql-jdbc-core</artifactId>
<version>${arrow.version}</version>
</dependency>
<dependency>
<groupId>junit</groupId>
<artifactId>junit</artifactId>
<version>4.13.1</version>
<scope>test</scope>
</dependency>
<dependency>
<groupId>org.junit.jupiter</groupId>
<artifactId>junit-jupiter</artifactId>
<version>RELEASE</version>
<scope>test</scope>
</dependency>
</dependencies>

<build>
<plugins>
<!-- Java Compiler -->
<plugin>
<groupId>org.apache.maven.plugins</groupId>
<artifactId>maven-compiler-plugin</artifactId>
<version>3.10.1</version>
<configuration>
<source>${maven.compiler.source}</source>
<target>${maven.compiler.target}</target>
<encoding>${project.build.sourceEncoding}</encoding>
</configuration>
</plugin>
<plugin>
<groupId>org.apache.maven.plugins</groupId>
<artifactId>maven-shade-plugin</artifactId>
<version>3.4.1</version>
<executions>
<execution>
<phase>package</phase>
<goals>
<goal>shade</goal>
</goals>
<configuration>
<filters>
<filter>
<artifact>*:*</artifact>
<excludes>
<exclude>META-INF/*.SF</exclude>
<exclude>META-INF/*.DSA</exclude>
<exclude>META-INF/*.RSA</exclude>
</excludes>
</filter>
</filters>
<transformers>
<transformer
implementation="org.apache.maven.plugins.shade.resource.ManifestResourceTransformer">
</transformer>
<transformer
implementation="org.apache.maven.plugins.shade.resource.ServicesResourceTransformer"/>
</transformers>
</configuration>
</execution>
</executions>
</plugin>
</plugins>
</build>

</project>
Original file line number Diff line number Diff line change
@@ -0,0 +1,112 @@
/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
* <p>
* http://www.apache.org/licenses/LICENSE-2.0
* <p>
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
**/

package doris.arrowflight.demo;

import org.apache.arrow.vector.ValueVector;
import org.apache.arrow.vector.VectorSchemaRoot;
import org.apache.arrow.vector.ipc.ArrowReader;
import org.apache.arrow.vector.types.pojo.Field;

import java.io.IOException;
import java.time.LocalDate;
import java.time.format.DateTimeFormatter;
import java.util.ArrayList;
import java.util.List;


/**
* Iterate over each batch in ArrowReader.
* ArrowReader is the iterator returned by ADBC Client when executing a query.
*/
public class ArrowBatchReader {

@FunctionalInterface
public interface LoadArrowBatchFunc {
void load(ArrowReader reader) throws IOException;
}

/**
* Print one row in VectorSchemaRoot, if the output format is incorrect, may need to modify
* the output method of different types of ValueVector.
*/
public static void printRow(VectorSchemaRoot root, int rowIndex) {
if (root == null || rowIndex < 0 || rowIndex >= root.getRowCount()) {
System.out.println("Invalid row index: " + rowIndex);
return;
}

System.out.print("> ");
for (Field field : root.getSchema().getFields()) {
ValueVector vector = root.getVector(field.getName());
if (vector != null) {
if (vector instanceof org.apache.arrow.vector.DateDayVector) {
DateTimeFormatter formatter = DateTimeFormatter.ofPattern("yyyy-MM-dd");
int dayOffset = ((org.apache.arrow.vector.DateDayVector) vector).get(rowIndex);
LocalDate date = LocalDate.ofEpochDay(dayOffset);
System.out.print(date.format(formatter));
} else if (vector instanceof org.apache.arrow.vector.BitVector) {
System.out.print(((org.apache.arrow.vector.BitVector) vector).get(rowIndex) == 1);
} else {
// other types field
System.out.print(vector.getObject(rowIndex).toString());
}
System.out.print(", ");
}
}
System.out.println();
}

/**
* Iterate over each batch in ArrowReader with the least cost, only record the number of rows and batches,
* usually used to test performance.
*/
public static LoadArrowBatchFunc loadArrowBatch = reader -> {
int rowCount = 0;
int batchCount = 0;
while (reader.loadNextBatch()) {
VectorSchemaRoot root = reader.getVectorSchemaRoot();
if (batchCount == 0) {
System.out.println("> " + root.getSchema().toString());
printRow(root, 1); // only print first line
}
rowCount += root.getRowCount();
batchCount += 1;
}
System.out.println("> batchCount: " + batchCount + ", rowCount: " + rowCount);
};

/**
* Iterate over each batch in ArrowReader and convert the batch to String, this will take more time.
*/
public static LoadArrowBatchFunc loadArrowBatchToString = reader -> {
int rowCount = 0;
List<String> result = new ArrayList<>();
while (reader.loadNextBatch()) {
VectorSchemaRoot root = reader.getVectorSchemaRoot();
if (result.size() == 0) {
System.out.println("> " + root.getSchema().toString());
printRow(root, 0); // only print first line
}
result.add(root.contentToTSVString());
rowCount += root.getRowCount();
}
System.out.println("> batchCount: " + result.size() + ", rowCount: " + rowCount);
};
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,44 @@
/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
* <p>
* http://www.apache.org/licenses/LICENSE-2.0
* <p>
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
**/

package doris.arrowflight.demo;

public class Configuration {
public String sql = ""; // require
public String ip = "127.0.0.1"; // require
public String arrowFlightPort = "9090"; // require
public String mysqlPort = "9030";
public int retryTimes = 2; // The first execution is cold run
public String user = "root";
public String password = "";

Configuration(String[] args) {
for (int i = 0; i < args.length; i++) {
switch (i) {
case 0 -> sql = args[i];
case 1 -> ip = args[i];
case 2 -> arrowFlightPort = args[i];
case 3 -> mysqlPort = args[i];
case 4 -> retryTimes = Integer.parseInt(args[i]);
case 5 -> user = args[i];
case 6 -> password = args[i];
}
}
}
}
Loading

0 comments on commit 02905c8

Please sign in to comment.