forked from apache/hbase
-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
HBASE-26353 Support loadable dictionaries in hbase-compression-zstd (a…
…pache#3787) ZStandard supports initialization of compressors and decompressors with a precomputed dictionary, which can dramatically improve and speed up compression of tables with small values. For more details, please see The Case For Small Data Compression https://github.com/facebook/zstd#the-case-for-small-data-compression Signed-off-by: Duo Zhang <[email protected]>
- Loading branch information
Showing
15 changed files
with
600 additions
and
47 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
164 changes: 164 additions & 0 deletions
164
hbase-common/src/main/java/org/apache/hadoop/hbase/io/compress/DictionaryCache.java
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,164 @@ | ||
/* | ||
* Licensed to the Apache Software Foundation (ASF) under one or more | ||
* contributor license agreements. See the NOTICE file distributed with this | ||
* work for additional information regarding copyright ownership. The ASF | ||
* licenses this file to you under the Apache License, Version 2.0 (the | ||
* "License"); you may not use this file except in compliance with the License. | ||
* You may obtain a copy of the License at | ||
* | ||
* http://www.apache.org/licenses/LICENSE-2.0 | ||
* | ||
* Unless required by applicable law or agreed to in writing, software | ||
* distributed under the License is distributed on an "AS IS" BASIS, WITHOUT | ||
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the | ||
* License for the specific language governing permissions and limitations under | ||
* the License. | ||
*/ | ||
package org.apache.hadoop.hbase.io.compress; | ||
|
||
import java.io.ByteArrayOutputStream; | ||
import java.io.FileNotFoundException; | ||
import java.io.IOException; | ||
import java.io.InputStream; | ||
import java.util.concurrent.ExecutionException; | ||
import java.util.concurrent.TimeUnit; | ||
import org.apache.hadoop.conf.Configuration; | ||
import org.apache.hadoop.fs.FSDataInputStream; | ||
import org.apache.hadoop.fs.FileSystem; | ||
import org.apache.hadoop.fs.Path; | ||
import org.apache.yetus.audience.InterfaceAudience; | ||
import org.slf4j.Logger; | ||
import org.slf4j.LoggerFactory; | ||
import org.apache.hbase.thirdparty.com.google.common.cache.CacheBuilder; | ||
import org.apache.hbase.thirdparty.com.google.common.cache.CacheLoader; | ||
import org.apache.hbase.thirdparty.com.google.common.cache.LoadingCache; | ||
|
||
/** | ||
* A utility class for managing compressor/decompressor dictionary loading and caching of load | ||
* results. Useful for any codec that can support changing dictionaries at runtime, | ||
* such as ZStandard. | ||
*/ | ||
@InterfaceAudience.Private | ||
public final class DictionaryCache { | ||
|
||
public static final String DICTIONARY_MAX_SIZE_KEY = "hbase.io.compress.dictionary.max.size"; | ||
public static final int DEFAULT_DICTIONARY_MAX_SIZE = 10 * 1024 * 1024; | ||
public static final String RESOURCE_SCHEME = "resource://"; | ||
|
||
private static final Logger LOG = LoggerFactory.getLogger(DictionaryCache.class); | ||
private static LoadingCache<String, byte[]> CACHE; | ||
|
||
private DictionaryCache() { } | ||
|
||
/** | ||
* Load a dictionary or return a previously cached load. | ||
* @param conf configuration | ||
* @param path the hadoop Path where the dictionary is located, as a String | ||
* @return the dictionary bytes if successful, null otherwise | ||
*/ | ||
public static byte[] getDictionary(final Configuration conf, final String path) | ||
throws IOException { | ||
if (path == null || path.isEmpty()) { | ||
return null; | ||
} | ||
// Create the dictionary loading cache if we haven't already | ||
if (CACHE == null) { | ||
synchronized (DictionaryCache.class) { | ||
if (CACHE == null) { | ||
final int maxSize = conf.getInt(DICTIONARY_MAX_SIZE_KEY, DEFAULT_DICTIONARY_MAX_SIZE); | ||
CACHE = CacheBuilder.newBuilder() | ||
.maximumSize(100) | ||
.expireAfterAccess(10, TimeUnit.MINUTES) | ||
.build( | ||
new CacheLoader<String, byte[]>() { | ||
@Override | ||
public byte[] load(String s) throws Exception { | ||
byte[] bytes; | ||
if (path.startsWith(RESOURCE_SCHEME)) { | ||
bytes = loadFromResource(conf, path, maxSize); | ||
} else { | ||
bytes = loadFromHadoopFs(conf, path, maxSize); | ||
} | ||
LOG.info("Loaded dictionary from {} (size {})", s, bytes.length); | ||
return bytes; | ||
} | ||
}); | ||
} | ||
} | ||
} | ||
|
||
// Get or load the dictionary for the given path | ||
try { | ||
return CACHE.get(path); | ||
} catch (ExecutionException e) { | ||
throw new IOException(e); | ||
} | ||
} | ||
|
||
// Visible for testing | ||
public static byte[] loadFromResource(final Configuration conf, final String s, | ||
final int maxSize) throws IOException { | ||
if (!s.startsWith(RESOURCE_SCHEME)) { | ||
throw new IOException("Path does not start with " + RESOURCE_SCHEME); | ||
} | ||
final String path = s.substring(RESOURCE_SCHEME.length(), s.length()); | ||
LOG.info("Loading resource {}", path); | ||
final InputStream in = DictionaryCache.class.getClassLoader().getResourceAsStream(path); | ||
if (in == null) { | ||
throw new FileNotFoundException("Resource " + path + " not found"); | ||
} | ||
final ByteArrayOutputStream baos = new ByteArrayOutputStream(); | ||
try { | ||
final byte[] buffer = new byte[8192]; | ||
int n, len = 0; | ||
do { | ||
n = in.read(buffer); | ||
if (n > 0) { | ||
len += n; | ||
if (len > maxSize) { | ||
throw new IOException("Dictionary " + s + " is too large, limit=" + maxSize); | ||
} | ||
baos.write(buffer, 0, n); | ||
} | ||
} while (n > 0); | ||
} finally { | ||
in.close(); | ||
} | ||
return baos.toByteArray(); | ||
} | ||
|
||
private static byte[] loadFromHadoopFs(final Configuration conf, final String s, | ||
final int maxSize) throws IOException { | ||
final Path path = new Path(s); | ||
final FileSystem fs = FileSystem.get(path.toUri(), conf); | ||
LOG.info("Loading file {}", path); | ||
final ByteArrayOutputStream baos = new ByteArrayOutputStream(); | ||
final FSDataInputStream in = fs.open(path); | ||
try { | ||
final byte[] buffer = new byte[8192]; | ||
int n, len = 0; | ||
do { | ||
n = in.read(buffer); | ||
if (n > 0) { | ||
len += n; | ||
if (len > maxSize) { | ||
throw new IOException("Dictionary " + s + " is too large, limit=" + maxSize); | ||
} | ||
baos.write(buffer, 0, n); | ||
} | ||
} while (n > 0); | ||
} finally { | ||
in.close(); | ||
} | ||
return baos.toByteArray(); | ||
} | ||
|
||
// Visible for testing | ||
public static boolean contains(String dictionaryPath) { | ||
if (CACHE != null) { | ||
return CACHE.asMap().containsKey(dictionaryPath); | ||
} | ||
return false; | ||
} | ||
|
||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Oops, something went wrong.