Skip to content

Commit

Permalink
[core] Introduce AsyncPositionOutputStream (apache#3875)
Browse files Browse the repository at this point in the history
  • Loading branch information
JingsongLi authored Aug 6, 2024
1 parent 261393c commit 84732cd
Show file tree
Hide file tree
Showing 19 changed files with 609 additions and 21 deletions.
6 changes: 6 additions & 0 deletions docs/layouts/shortcodes/generated/core_configuration.html
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,12 @@
</tr>
</thead>
<tbody>
<tr>
<td><h5>async-file-write</h5></td>
<td style="word-wrap: break-word;">true</td>
<td>Boolean</td>
<td>Whether to enable asynchronous IO writing when writing files.</td>
</tr>
<tr>
<td><h5>auto-create</h5></td>
<td style="word-wrap: break-word;">false</td>
Expand Down
11 changes: 11 additions & 0 deletions paimon-common/src/main/java/org/apache/paimon/CoreOptions.java
Original file line number Diff line number Diff line change
Expand Up @@ -1283,6 +1283,13 @@ public class CoreOptions implements Serializable {
"When a batch job queries from a table, if a partition does not exist in the current branch, "
+ "the reader will try to get this partition from this fallback branch.");

public static final ConfigOption<Boolean> ASYNC_FILE_WRITE =
key("async-file-write")
.booleanType()
.defaultValue(true)
.withDescription(
"Whether to enable asynchronous IO writing when writing files.");

private final Options options;

public CoreOptions(Map<String, String> options) {
Expand Down Expand Up @@ -2020,6 +2027,10 @@ public boolean prepareCommitWaitCompaction() {
return options.get(LOOKUP_WAIT);
}

public boolean asyncFileWrite() {
return options.get(ASYNC_FILE_WRITE);
}

public boolean metadataIcebergCompatible() {
return options.get(METADATA_ICEBERG_COMPATIBLE);
}
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,227 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

package org.apache.paimon.fs;

import org.apache.paimon.annotation.VisibleForTesting;
import org.apache.paimon.utils.FixLenByteArrayOutputStream;

import java.io.IOException;
import java.util.concurrent.CountDownLatch;
import java.util.concurrent.ExecutionException;
import java.util.concurrent.ExecutorService;
import java.util.concurrent.Executors;
import java.util.concurrent.Future;
import java.util.concurrent.LinkedBlockingQueue;
import java.util.concurrent.TimeUnit;
import java.util.concurrent.atomic.AtomicReference;

import static org.apache.paimon.utils.ThreadUtils.newDaemonThreadFactory;

/** A {@link PositionOutputStream} which uses a async thread to write data. */
public class AsyncPositionOutputStream extends PositionOutputStream {

public static final ExecutorService EXECUTOR_SERVICE =
Executors.newCachedThreadPool(newDaemonThreadFactory("AsyncOutputStream"));

public static final int AWAIT_TIMEOUT_SECONDS = 10;
public static final int BUFFER_SIZE = 1024 * 32;

private final PositionOutputStream out;
private final FixLenByteArrayOutputStream buffer;
private final LinkedBlockingQueue<byte[]> bufferQueue;
private final LinkedBlockingQueue<AsyncEvent> eventQueue;
private final AtomicReference<Throwable> exception;
private final Future<?> future;

private long position;

public AsyncPositionOutputStream(PositionOutputStream out) {
this.out = out;
this.bufferQueue = new LinkedBlockingQueue<>();
this.eventQueue = new LinkedBlockingQueue<>();
this.exception = new AtomicReference<>();
this.position = 0;
this.future = EXECUTOR_SERVICE.submit(this::execute);
this.buffer = new FixLenByteArrayOutputStream();
this.buffer.setBuffer(new byte[BUFFER_SIZE]);
}

@VisibleForTesting
LinkedBlockingQueue<byte[]> getBufferQueue() {
return bufferQueue;
}

private void execute() {
try {
doWork();
} catch (Throwable e) {
exception.set(e);
throw new RuntimeException(e);
}
}

private void doWork() throws InterruptedException, IOException {
try {
while (true) {
AsyncEvent event = eventQueue.poll(AWAIT_TIMEOUT_SECONDS, TimeUnit.SECONDS);
if (event == null) {
continue;
}
if (event instanceof EndEvent) {
return;
}
if (event instanceof DataEvent) {
DataEvent dataEvent = (DataEvent) event;
out.write(dataEvent.data, 0, dataEvent.length);
bufferQueue.add(dataEvent.data);
}
if (event instanceof FlushEvent) {
out.flush();
((FlushEvent) event).latch.countDown();
}
}
} finally {
out.close();
}
}

@Override
public long getPos() throws IOException {
checkException();
return position;
}

private void flushBuffer() {
if (buffer.getCount() == 0) {
return;
}
putEvent(new DataEvent(buffer.getBuffer(), buffer.getCount()));
byte[] byteArray = bufferQueue.poll();
if (byteArray == null) {
byteArray = new byte[BUFFER_SIZE];
}
buffer.setBuffer(byteArray);
buffer.setCount(0);
}

@Override
public void write(int b) throws IOException {
checkException();
position++;
while (buffer.write((byte) b) != 1) {
flushBuffer();
}
}

@Override
public void write(byte[] b) throws IOException {
write(b, 0, b.length);
}

@Override
public void write(byte[] b, int off, int len) throws IOException {
checkException();
position += len;
while (true) {
int written = buffer.write(b, off, len);
off += written;
len -= written;
if (len == 0) {
return;
}
flushBuffer();
}
}

@Override
public void flush() throws IOException {
checkException();
flushBuffer();
FlushEvent event = new FlushEvent();
putEvent(event);
while (true) {
try {
boolean await = event.latch.await(AWAIT_TIMEOUT_SECONDS, TimeUnit.SECONDS);
if (await) {
return;
}
checkException();
} catch (InterruptedException e) {
Thread.currentThread().interrupt();
throw new RuntimeException(e);
}
}
}

@Override
public void close() throws IOException {
checkException();
flushBuffer();
putEvent(new EndEvent());
try {
this.future.get();
} catch (InterruptedException e) {
Thread.currentThread().interrupt();
throw new RuntimeException(e);
} catch (ExecutionException e) {
throw new RuntimeException(e);
}
}

private void putEvent(AsyncEvent event) {
try {
eventQueue.put(event);
} catch (InterruptedException e) {
Thread.currentThread().interrupt();
throw new RuntimeException(e);
}
}

private void checkException() throws IOException {
Throwable throwable = exception.get();
if (throwable != null) {
if (throwable instanceof IOException) {
throw (IOException) throwable;
}
if (throwable instanceof RuntimeException) {
throw (RuntimeException) throwable;
}
throw new IOException(throwable);
}
}

private interface AsyncEvent {}

private static class DataEvent implements AsyncEvent {

private final byte[] data;
private final int length;

public DataEvent(byte[] data, int length) {
this.data = data;
this.length = length;
}
}

private static class FlushEvent implements AsyncEvent {
private final CountDownLatch latch = new CountDownLatch(1);
}

private static class EndEvent implements AsyncEvent {}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,63 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

package org.apache.paimon.utils;

import java.io.ByteArrayOutputStream;

/** A {@link ByteArrayOutputStream} which can reuse byte array. */
public class FixLenByteArrayOutputStream {

private byte[] buf;
private int count;

public void setBuffer(byte[] buffer) {
this.buf = buffer;
}

public byte[] getBuffer() {
return buf;
}

public int write(byte[] b, int off, int len) {
if ((off < 0) || (off > b.length) || (len < 0) || ((off + len) - b.length > 0)) {
throw new IndexOutOfBoundsException();
}
int writeLen = Math.min(len, buf.length - count);
System.arraycopy(b, off, buf, count, writeLen);
count += writeLen;
return writeLen;
}

public int getCount() {
return count;
}

public int write(byte b) {
if (count < buf.length) {
buf[count] = b;
count += 1;
return 1;
}
return 0;
}

public void setCount(int count) {
this.count = count;
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,37 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

package org.apache.paimon.utils;

import java.io.ByteArrayOutputStream;

/** A {@link ByteArrayOutputStream} which can reuse byte array. */
public class ReuseByteArrayOutputStream extends ByteArrayOutputStream {

public ReuseByteArrayOutputStream(int size) {
super(size);
}

public void setBuffer(byte[] buffer) {
this.buf = buffer;
}

public byte[] getBuffer() {
return buf;
}
}
Loading

0 comments on commit 84732cd

Please sign in to comment.