Skip to content

Commit

Permalink
Distributed execution support of Hive with vineyard (#1624)
Browse files Browse the repository at this point in the history
- Support distributed execution for hive with vineyard.
- Build the docker cluster with scripts.
- The hdfs related stuffs have been removed.

Currently, the vineyard jar cannot run directly on hive because of
dependency conflicts. You can run it temporarily by reverting to an
older version of guava dependent by vineyard.

This problem will be fixed in #1682

Signed-off-by: vegetableysm <[email protected]>
  • Loading branch information
vegetableysm authored Dec 28, 2023
1 parent 5e3ba47 commit 2955b60
Show file tree
Hide file tree
Showing 34 changed files with 1,461 additions and 430 deletions.
9 changes: 3 additions & 6 deletions .github/workflows/java-ci.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -158,23 +158,20 @@ jobs:
- name: Start vineyard server for hive test
run: |
./build/bin/vineyardd --socket=./build/metastore/vineyard.sock -rpc_socket_port=18888 --etcd_endpoint="0.0.0.0:2382" &
./build/bin/vineyardd --socket=./build/hiveserver/vineyard.sock -rpc_socket_port=18889 --etcd_endpoint="0.0.0.0:2382" &
./build/bin/vineyardd --socket=./build/vineyard/vineyard.sock -rpc_socket_port=18888 --etcd_endpoint="0.0.0.0:2382" &
- name: Builder hive docker
run: |
# build hive docker
pushd java/hive/docker
./build.sh
popd
# start hive docker
pushd java/hive
docker-compose up -d --force-recreate --remove-orphans
docker-compose -f docker-compose.yaml up -d --force-recreate --remove-orphans
popd
# wait for hive docker ready
sleep 120
sleep 60
- name: Setup tmate session
if: false
Expand Down
4 changes: 4 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,10 @@ cmake-build-debug
*.bin
*.whl

# hive sql work directory
/java/hive/distributed/docker/mysql/conf/
/java/hive/distributed/docker/mysql/data/

# coredump
core.*.*

Expand Down
18 changes: 13 additions & 5 deletions java/core/src/main/java/io/v6d/core/client/Client.java
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@
import java.lang.reflect.Field;
import java.util.Collection;
import java.util.Map;
import lombok.val;

/** Vineyard IPC client. */
public abstract class Client {
Expand All @@ -41,15 +42,20 @@ public abstract class Client {
public abstract ObjectMeta createMetaData(ObjectMeta metadata) throws VineyardException;

public ObjectMeta getMetaData(ObjectID id) throws VineyardException {
return this.getMetaData(id, false);
return this.getMetaData(id, false, false, false);
}

public ObjectMeta getMetaData(ObjectID id, boolean sync_remote) throws VineyardException {
return this.getMetaData(id, sync_remote, false);
public ObjectMeta getMetaData(ObjectID id, boolean fetch) throws VineyardException {
return this.getMetaData(id, fetch, false);
}

public abstract ObjectMeta getMetaData(ObjectID id, boolean sync_remote, boolean wait)
throws VineyardException;
public ObjectMeta getMetaData(ObjectID id, boolean fetch, boolean sync_remote)
throws VineyardException {
return this.getMetaData(id, fetch, sync_remote, false);
}

public abstract ObjectMeta getMetaData(
ObjectID id, boolean fetch, boolean sync_remote, boolean wait) throws VineyardException;

public abstract Collection<ObjectMeta> listMetaData(String pattern) throws VineyardException;

Expand Down Expand Up @@ -79,6 +85,8 @@ public void delete(Collection<ObjectID> ids) throws VineyardException {
this.delete(ids, false, true);
}

public abstract ObjectID migrateObject(ObjectID id) throws VineyardException;

public abstract void putName(ObjectID id, String name) throws VineyardException;

public abstract ObjectID getName(String name, boolean wait) throws VineyardException;
Expand Down
8 changes: 8 additions & 0 deletions java/core/src/main/java/io/v6d/core/client/Context.java
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@

import com.google.common.base.Stopwatch;
import com.google.common.base.StopwatchContext;
import io.v6d.core.common.util.InstanceID;
import io.v6d.core.common.util.VineyardException;
import java.text.SimpleDateFormat;

Expand All @@ -34,6 +35,13 @@ public static synchronized IPCClient getClient() throws VineyardException {
return client;
}

public static synchronized InstanceID getInstanceID() {
if (client == null) {
return null;
}
return client.getInstanceId();
}

public static void println(String message) {
System.err.printf("[%s] %s\n", formatter.get().format(System.currentTimeMillis()), message);
}
Expand Down
24 changes: 22 additions & 2 deletions java/core/src/main/java/io/v6d/core/client/IPCClient.java
Original file line number Diff line number Diff line change
Expand Up @@ -117,8 +117,18 @@ public boolean connected() {
}

@Override
public synchronized ObjectMeta getMetaData(ObjectID id, boolean sync_remote, boolean wait)
public synchronized ObjectMeta getMetaData(
ObjectID id, boolean migrate, boolean sync_remote, boolean wait)
throws VineyardException {
ObjectMeta meta = getMetaDataInternal(id, sync_remote, wait);
if (meta.getInstanceId().compareTo(this.instanceId) != 0 && migrate && (!meta.isGlobal())) {
return getMetaDataInternal(this.migrateObject(id), sync_remote, wait);
}
return meta;
}

private synchronized ObjectMeta getMetaDataInternal(
ObjectID id, boolean sync_remote, boolean wait) throws VineyardException {
val root = mapper.createObjectNode();
GetDataRequest.put(root, id, sync_remote, wait);
this.doWrite(root);
Expand All @@ -140,6 +150,16 @@ public synchronized ObjectMeta getMetaData(ObjectID id, boolean sync_remote, boo
return meta;
}

@Override
public synchronized ObjectID migrateObject(ObjectID id) throws VineyardException {
val root = mapper.createObjectNode();
MigrateObjectRequest.put(root, id);
this.doWrite(root);
val reply = new MigrateObjectReply();
reply.get(this.doReadJson());
return reply.getObjectID();
}

@Override
public Collection<ObjectMeta> listMetaData(String pattern) throws VineyardException {
return listMetaData(pattern, false);
Expand Down Expand Up @@ -289,7 +309,7 @@ public synchronized ObjectID pullStreamChunkID(final ObjectID id) throws Vineyar
@Override
public synchronized ObjectMeta pullStreamChunkMeta(final ObjectID id) throws VineyardException {
val chunk = this.pullStreamChunkID(id);
return this.getMetaData(chunk, false);
return this.getMetaData(chunk);
}

@Override
Expand Down
34 changes: 34 additions & 0 deletions java/core/src/main/java/io/v6d/core/common/util/Protocol.java
Original file line number Diff line number Diff line change
Expand Up @@ -299,6 +299,40 @@ public void get(JsonNode root) throws VineyardException {
}
}

public static class MigrateObjectRequest extends Request {
public static void put(
ObjectNode root,
ObjectID id,
boolean local,
boolean isStream,
String peer,
String peerRpcEndpoint) {
root.put("type", "migrate_object_request");
root.put("object_id", id.value());
root.put("local", local);
root.put("is_stream", isStream);
root.put("peer", peer);
root.put("peer_rpc_endpoint", peerRpcEndpoint);
}

public static void put(ObjectNode root, ObjectID id) {
root.put("type", "migrate_object_request");
root.put("object_id", id.value());
}
}

@Data
@EqualsAndHashCode(callSuper = false)
public static class MigrateObjectReply extends Reply {
private ObjectID objectID;

@Override
public void get(JsonNode root) throws VineyardException {
check(root, "migrate_object_reply");
objectID = new ObjectID(JSON.getLong(root, "object_id"));
}
}

public static class PutNameRequest extends Request {
public static void put(ObjectNode root, ObjectID id, String name) {
root.put("type", "put_name_request");
Expand Down
100 changes: 98 additions & 2 deletions java/hive/README.rst
Original file line number Diff line number Diff line change
Expand Up @@ -8,13 +8,13 @@ Using docker to launch the hive server:

.. code:: bash
docker-compose up -d --force-recreate --remove-orphans
docker-compose -f ./docker/docker-compose.yaml up -d --force-recreate --remove-orphans
If the result query is large, you may need to increase the memory of the hive server (e.g. Set max memory to 8G):

.. code:: bash
docker-compose up -d -e SERVICE_OPTS="-Xmx8G" --force-recreate --remove-orphans
docker-compose -f ./docker/docker-compose.yaml up -d -e SERVICE_OPTS="-Xmx8G" --force-recreate --remove-orphans
Connecting to the hive server:

Expand Down Expand Up @@ -290,3 +290,99 @@ import org.apache.spark.sql.SparkSession
sc.stop()
Refer to `Spark/Hive <https://spark.apache.org/docs/latest/sql-data-sources-hive-tables.html>`_ for detailed documentation.
Build Hive Docker Image with Hadoop
-----------------
### Prepare vineyard jars
```bash
# Currently, the vineyard jar cannot run directly on hive because of
# dependency conflicts. You can run it temporarily by reverting to an
# older version of guava (such as 14.0.1) dependent by vineyard.
# This problem will be fixed in the future.
cd v6d/java
mvn clean package
```

### Build docker images
```bash
cd v6d/java/hive/distributed
./build.sh
```

### Create network
```bash
docker network create hadoop-network
```

### Start sql server for hive metastore
```bash
cd v6d/java/hive/docker/dependency/mysql
docker-compose -f mysql-compose.yaml up -d
# You can change the password in mysql-compose.yaml and hive-site.xml
```

### Run hadoop & hive docker images
```bash
cd v6d/java/hive/docker
docker-compose -f docker-compose-distributed.yaml up -d
```

### Create table
```bash
docker exec -it hive-hiveserver2 beeline -u "jdbc:hive2://hive-hiveserver2:10000" -n root
```

```sql
-- in beeline
drop table test_hive1;
create table test_hive1(field int);
insert into table test_hive1 values (1),(2),(3),(4),(5),(6),(7),(8),(9),(10);
select * from test_hive1;
```

Using vineyard as storage
-----------------

### Run vineyardd
```bash
cd v6d/build
# at terminal 1
./bin/vineyardd --socket=~/vineyard_sock/0/vineyard.sock -rpc_socket_port=9601 --etcd_endpoint="0.0.0.0:2382"
# at terminal 2
./bin/vineyardd --socket=~/vineyard_sock/1/vineyard.sock -rpc_socket_port=9602 --etcd_endpoint="0.0.0.0:2382"
# at terminal 3
./bin/vineyardd --socket=~/vineyard_sock/2/vineyard.sock -rpc_socket_port=9603 --etcd_endpoint="0.0.0.0:2382"
# at terminal 4
./bin/vineyardd --socket=~/vineyard_sock/metastore/vineyard.sock -rpc_socket_port=9604 --etcd_endpoint="0.0.0.0:2382"
# at terminal 5
./bin/vineyardd --socket=~/vineyard_sock/hiveserver/vineyard.sock -rpc_socket_port=9605 --etcd_endpoint="0.0.0.0:2382"
```

### Copy vineyard jars to share dir
```bash
mkdir -p ~/share
cd v6d/java/hive
# you can change share dir in docker-compose.yaml
cp target/vineyard-hive-0.1-SNAPSHOT.jar ~/share
```

### Create table with vineyard
```bash
docker exec -it hive-hiveserver2 beeline -u "jdbc:hive2://hive-hiveserver2:10000" -n root
```

```sql
-- in beeline
drop table test_vineyard;
create table test_vineyard(field int)
stored as Vineyard
location "vineyard:///user/hive_remote/warehouse/test_vineyard";
insert into table test_vineyard values (1),(2),(3),(4),(5),(6),(7),(8),(9),(10);
select * from test_vineyard;
```
7 changes: 7 additions & 0 deletions java/hive/docker/.env
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
HADOOP_YARN_RM_PORT=8088
HADOOP_YARN_NM_PORT=8042
HADOOP_YARN_PROXYSERVER_PORT=9111
HADOOP_MR_HISTORYSERVER_PORT=19888
HIVE_METASTORE_PORT=9083
HIVE_HIVESERVER2_PORT=10000
HIVE_VERSION=2.3.9
71 changes: 0 additions & 71 deletions java/hive/docker/Dockerfile

This file was deleted.

Loading

0 comments on commit 2955b60

Please sign in to comment.