HBASE-18201 add UT and docs for DataBlockEncodingTool
Signed-off-by: Reid Chan <reidchan@apache.org>
This commit is contained in:
parent
c6ff1de7e2
commit
28635d6101
|
@ -23,7 +23,9 @@ import java.io.DataOutputStream;
|
||||||
import java.io.IOException;
|
import java.io.IOException;
|
||||||
import java.io.OutputStream;
|
import java.io.OutputStream;
|
||||||
import java.nio.ByteBuffer;
|
import java.nio.ByteBuffer;
|
||||||
|
import java.util.ArrayList;
|
||||||
import java.util.Iterator;
|
import java.util.Iterator;
|
||||||
|
import java.util.List;
|
||||||
|
|
||||||
import org.apache.commons.lang3.NotImplementedException;
|
import org.apache.commons.lang3.NotImplementedException;
|
||||||
import org.apache.hadoop.hbase.Cell;
|
import org.apache.hadoop.hbase.Cell;
|
||||||
|
@ -57,6 +59,16 @@ public class EncodedDataBlock {
|
||||||
private final HFileBlockEncodingContext encodingCtx;
|
private final HFileBlockEncodingContext encodingCtx;
|
||||||
private HFileContext meta;
|
private HFileContext meta;
|
||||||
|
|
||||||
|
private final DataBlockEncoding encoding;
|
||||||
|
|
||||||
|
// The is for one situation that there are some cells includes tags and others are not.
|
||||||
|
// isTagsLenZero stores if cell tags length is zero before doing encoding since we need
|
||||||
|
// to check cell tags length is zero or not after decoding.
|
||||||
|
// Encoders ROW_INDEX_V1 would abandon tags segment if tags is 0 after decode cells to
|
||||||
|
// byte array, other encoders won't do that. So we have to find a way to add tagsLen zero
|
||||||
|
// in the decoded byte array.
|
||||||
|
private List<Boolean> isTagsLenZero = new ArrayList<>();
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Create a buffer which will be encoded using dataBlockEncoder.
|
* Create a buffer which will be encoded using dataBlockEncoder.
|
||||||
* @param dataBlockEncoder Algorithm used for compression.
|
* @param dataBlockEncoder Algorithm used for compression.
|
||||||
|
@ -69,6 +81,7 @@ public class EncodedDataBlock {
|
||||||
Preconditions.checkNotNull(encoding,
|
Preconditions.checkNotNull(encoding,
|
||||||
"Cannot create encoded data block with null encoder");
|
"Cannot create encoded data block with null encoder");
|
||||||
this.dataBlockEncoder = dataBlockEncoder;
|
this.dataBlockEncoder = dataBlockEncoder;
|
||||||
|
this.encoding = encoding;
|
||||||
encodingCtx = dataBlockEncoder.newDataBlockEncodingContext(encoding,
|
encodingCtx = dataBlockEncoder.newDataBlockEncodingContext(encoding,
|
||||||
HConstants.HFILEBLOCK_DUMMY_HEADER, meta);
|
HConstants.HFILEBLOCK_DUMMY_HEADER, meta);
|
||||||
this.rawKVs = rawKVs;
|
this.rawKVs = rawKVs;
|
||||||
|
@ -90,6 +103,7 @@ public class EncodedDataBlock {
|
||||||
|
|
||||||
return new Iterator<Cell>() {
|
return new Iterator<Cell>() {
|
||||||
private ByteBuffer decompressedData = null;
|
private ByteBuffer decompressedData = null;
|
||||||
|
private Iterator<Boolean> it = isTagsLenZero.iterator();
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public boolean hasNext() {
|
public boolean hasNext() {
|
||||||
|
@ -116,11 +130,19 @@ public class EncodedDataBlock {
|
||||||
int vlen = decompressedData.getInt();
|
int vlen = decompressedData.getInt();
|
||||||
int tagsLen = 0;
|
int tagsLen = 0;
|
||||||
ByteBufferUtils.skip(decompressedData, klen + vlen);
|
ByteBufferUtils.skip(decompressedData, klen + vlen);
|
||||||
// Read the tag length in case when steam contain tags
|
// Read the tag length in case when stream contain tags
|
||||||
if (meta.isIncludesTags()) {
|
if (meta.isIncludesTags()) {
|
||||||
|
boolean noTags = true;
|
||||||
|
if (it.hasNext()) {
|
||||||
|
noTags = it.next();
|
||||||
|
}
|
||||||
|
// ROW_INDEX_V1 will not put tagsLen back in cell if it is zero, there is no need
|
||||||
|
// to read short here.
|
||||||
|
if (!(encoding.equals(DataBlockEncoding.ROW_INDEX_V1) && noTags)) {
|
||||||
tagsLen = ((decompressedData.get() & 0xff) << 8) ^ (decompressedData.get() & 0xff);
|
tagsLen = ((decompressedData.get() & 0xff) << 8) ^ (decompressedData.get() & 0xff);
|
||||||
ByteBufferUtils.skip(decompressedData, tagsLen);
|
ByteBufferUtils.skip(decompressedData, tagsLen);
|
||||||
}
|
}
|
||||||
|
}
|
||||||
KeyValue kv = new KeyValue(decompressedData.array(), offset,
|
KeyValue kv = new KeyValue(decompressedData.array(), offset,
|
||||||
(int) KeyValue.getKeyValueDataStructureSize(klen, vlen, tagsLen));
|
(int) KeyValue.getKeyValueDataStructureSize(klen, vlen, tagsLen));
|
||||||
if (meta.isIncludesMvcc()) {
|
if (meta.isIncludesMvcc()) {
|
||||||
|
@ -247,6 +269,7 @@ public class EncodedDataBlock {
|
||||||
if (this.meta.isIncludesTags()) {
|
if (this.meta.isIncludesTags()) {
|
||||||
tagsLength = ((in.get() & 0xff) << 8) ^ (in.get() & 0xff);
|
tagsLength = ((in.get() & 0xff) << 8) ^ (in.get() & 0xff);
|
||||||
ByteBufferUtils.skip(in, tagsLength);
|
ByteBufferUtils.skip(in, tagsLength);
|
||||||
|
this.isTagsLenZero.add(tagsLength == 0);
|
||||||
}
|
}
|
||||||
if (this.meta.isIncludesMvcc()) {
|
if (this.meta.isIncludesMvcc()) {
|
||||||
memstoreTS = ByteBufferUtils.readVLong(in);
|
memstoreTS = ByteBufferUtils.readVLong(in);
|
||||||
|
@ -260,6 +283,16 @@ public class EncodedDataBlock {
|
||||||
baos.flush();
|
baos.flush();
|
||||||
baosBytes = baos.toByteArray();
|
baosBytes = baos.toByteArray();
|
||||||
this.dataBlockEncoder.endBlockEncoding(encodingCtx, out, baosBytes);
|
this.dataBlockEncoder.endBlockEncoding(encodingCtx, out, baosBytes);
|
||||||
|
// In endBlockEncoding(encodingCtx, out, baosBytes), Encoder ROW_INDEX_V1 write integer in
|
||||||
|
// out while the others write integer in baosBytes(byte array). We need to add
|
||||||
|
// baos.toByteArray() after endBlockEncoding again to make sure the integer writes in
|
||||||
|
// outputstream with Encoder ROW_INDEX_V1 dump to byte array (baosBytes).
|
||||||
|
// The if branch is necessary because Encoders excepts ROW_INDEX_V1 write integer in
|
||||||
|
// baosBytes directly, without if branch and do toByteArray() again, baosBytes won't
|
||||||
|
// contains the integer wrotten in endBlockEncoding.
|
||||||
|
if (this.encoding.equals(DataBlockEncoding.ROW_INDEX_V1)) {
|
||||||
|
baosBytes = baos.toByteArray();
|
||||||
|
}
|
||||||
} catch (IOException e) {
|
} catch (IOException e) {
|
||||||
throw new RuntimeException(String.format(
|
throw new RuntimeException(String.format(
|
||||||
"Bug in encoding part of algorithm %s. " +
|
"Bug in encoding part of algorithm %s. " +
|
||||||
|
@ -271,6 +304,6 @@ public class EncodedDataBlock {
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public String toString() {
|
public String toString() {
|
||||||
return dataBlockEncoder.toString();
|
return encoding.name();
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -18,6 +18,7 @@ package org.apache.hadoop.hbase.regionserver;
|
||||||
|
|
||||||
import java.io.ByteArrayInputStream;
|
import java.io.ByteArrayInputStream;
|
||||||
import java.io.ByteArrayOutputStream;
|
import java.io.ByteArrayOutputStream;
|
||||||
|
import java.io.DataOutputStream;
|
||||||
import java.io.IOException;
|
import java.io.IOException;
|
||||||
import java.io.InputStream;
|
import java.io.InputStream;
|
||||||
import java.text.DecimalFormat;
|
import java.text.DecimalFormat;
|
||||||
|
@ -44,6 +45,7 @@ import org.apache.hadoop.hbase.io.hfile.HFileContext;
|
||||||
import org.apache.hadoop.hbase.io.hfile.HFileContextBuilder;
|
import org.apache.hadoop.hbase.io.hfile.HFileContextBuilder;
|
||||||
import org.apache.hadoop.hbase.io.hfile.HFileReaderImpl;
|
import org.apache.hadoop.hbase.io.hfile.HFileReaderImpl;
|
||||||
import org.apache.hadoop.hbase.util.Bytes;
|
import org.apache.hadoop.hbase.util.Bytes;
|
||||||
|
import org.apache.hadoop.io.WritableUtils;
|
||||||
import org.apache.hadoop.io.compress.CompressionOutputStream;
|
import org.apache.hadoop.io.compress.CompressionOutputStream;
|
||||||
import org.apache.hadoop.io.compress.Compressor;
|
import org.apache.hadoop.io.compress.Compressor;
|
||||||
import org.apache.hadoop.io.compress.Decompressor;
|
import org.apache.hadoop.io.compress.Decompressor;
|
||||||
|
@ -91,8 +93,8 @@ public class DataBlockEncodingTool {
|
||||||
/** If this is specified, no correctness testing will be done */
|
/** If this is specified, no correctness testing will be done */
|
||||||
private static final String OPT_OMIT_CORRECTNESS_TEST = "c";
|
private static final String OPT_OMIT_CORRECTNESS_TEST = "c";
|
||||||
|
|
||||||
/** What encoding algorithm to test */
|
/** What compression algorithm to test */
|
||||||
private static final String OPT_ENCODING_ALGORITHM = "a";
|
private static final String OPT_COMPRESSION_ALGORITHM = "a";
|
||||||
|
|
||||||
/** Number of times to run each benchmark */
|
/** Number of times to run each benchmark */
|
||||||
private static final String OPT_BENCHMARK_N_TIMES = "t";
|
private static final String OPT_BENCHMARK_N_TIMES = "t";
|
||||||
|
@ -132,7 +134,10 @@ public class DataBlockEncodingTool {
|
||||||
private final Compressor compressor;
|
private final Compressor compressor;
|
||||||
private final Decompressor decompressor;
|
private final Decompressor decompressor;
|
||||||
|
|
||||||
private static enum Manipulation {
|
// Check if HFile use Tag.
|
||||||
|
private static boolean USE_TAG = false;
|
||||||
|
|
||||||
|
private enum Manipulation {
|
||||||
ENCODING,
|
ENCODING,
|
||||||
DECODING,
|
DECODING,
|
||||||
COMPRESSION,
|
COMPRESSION,
|
||||||
|
@ -192,8 +197,25 @@ public class DataBlockEncodingTool {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Add tagsLen zero to cells don't include tags. Since the process of
|
||||||
|
// scanner converts byte array to KV would abandon tagsLen part if tagsLen
|
||||||
|
// is zero. But we still needs the tagsLen part to check if current cell
|
||||||
|
// include tags. If USE_TAG is true, HFile contains cells with tags,
|
||||||
|
// if the cell tagsLen equals 0, it means other cells may have tags.
|
||||||
|
if (USE_TAG && currentKV.getTagsLength() == 0) {
|
||||||
uncompressedOutputStream.write(currentKV.getBuffer(),
|
uncompressedOutputStream.write(currentKV.getBuffer(),
|
||||||
currentKV.getOffset(), currentKV.getLength());
|
currentKV.getOffset(), currentKV.getLength());
|
||||||
|
// write tagsLen = 0.
|
||||||
|
uncompressedOutputStream.write(Bytes.toBytes((short) 0));
|
||||||
|
} else {
|
||||||
|
uncompressedOutputStream.write(currentKV.getBuffer(),
|
||||||
|
currentKV.getOffset(), currentKV.getLength());
|
||||||
|
}
|
||||||
|
|
||||||
|
if(includesMemstoreTS) {
|
||||||
|
WritableUtils.writeVLong(
|
||||||
|
new DataOutputStream(uncompressedOutputStream), currentKV.getSequenceId());
|
||||||
|
}
|
||||||
|
|
||||||
previousKey = currentKey;
|
previousKey = currentKey;
|
||||||
|
|
||||||
|
@ -209,16 +231,16 @@ public class DataBlockEncodingTool {
|
||||||
}
|
}
|
||||||
|
|
||||||
rawKVs = uncompressedOutputStream.toByteArray();
|
rawKVs = uncompressedOutputStream.toByteArray();
|
||||||
boolean useTag = (currentKV.getTagsLength() > 0);
|
|
||||||
for (DataBlockEncoding encoding : encodings) {
|
for (DataBlockEncoding encoding : encodings) {
|
||||||
if (encoding == DataBlockEncoding.NONE) {
|
if (encoding == DataBlockEncoding.NONE) {
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
DataBlockEncoder d = encoding.getEncoder();
|
DataBlockEncoder d = encoding.getEncoder();
|
||||||
HFileContext meta = new HFileContextBuilder()
|
HFileContext meta = new HFileContextBuilder()
|
||||||
|
.withDataBlockEncoding(encoding)
|
||||||
.withCompression(Compression.Algorithm.NONE)
|
.withCompression(Compression.Algorithm.NONE)
|
||||||
.withIncludesMvcc(includesMemstoreTS)
|
.withIncludesMvcc(includesMemstoreTS)
|
||||||
.withIncludesTags(useTag).build();
|
.withIncludesTags(USE_TAG).build();
|
||||||
codecs.add(new EncodedDataBlock(d, encoding, rawKVs, meta ));
|
codecs.add(new EncodedDataBlock(d, encoding, rawKVs, meta ));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -396,8 +418,10 @@ public class DataBlockEncodingTool {
|
||||||
try {
|
try {
|
||||||
for (int itTime = 0; itTime < benchmarkNTimes; ++itTime) {
|
for (int itTime = 0; itTime < benchmarkNTimes; ++itTime) {
|
||||||
final long startTime = System.nanoTime();
|
final long startTime = System.nanoTime();
|
||||||
compressingStream.resetState();
|
// The compressedStream should reset before compressingStream resetState since in GZ
|
||||||
|
// resetStatue will write header in the outputstream.
|
||||||
compressedStream.reset();
|
compressedStream.reset();
|
||||||
|
compressingStream.resetState();
|
||||||
compressingStream.write(buffer, offset, length);
|
compressingStream.write(buffer, offset, length);
|
||||||
compressingStream.flush();
|
compressingStream.flush();
|
||||||
compressedStream.toByteArray();
|
compressedStream.toByteArray();
|
||||||
|
@ -438,12 +462,6 @@ public class DataBlockEncodingTool {
|
||||||
}
|
}
|
||||||
decompressedStream.close();
|
decompressedStream.close();
|
||||||
|
|
||||||
// iterate over KeyValues
|
|
||||||
KeyValue kv;
|
|
||||||
for (int pos = 0; pos < length; pos += kv.getLength()) {
|
|
||||||
kv = new KeyValue(newBuf, pos);
|
|
||||||
}
|
|
||||||
|
|
||||||
} catch (IOException e) {
|
} catch (IOException e) {
|
||||||
throw new RuntimeException(String.format(
|
throw new RuntimeException(String.format(
|
||||||
"Decoding path in '%s' algorithm cause exception ", name), e);
|
"Decoding path in '%s' algorithm cause exception ", name), e);
|
||||||
|
@ -596,8 +614,9 @@ public class DataBlockEncodingTool {
|
||||||
hsf.initReader();
|
hsf.initReader();
|
||||||
StoreFileReader reader = hsf.getReader();
|
StoreFileReader reader = hsf.getReader();
|
||||||
reader.loadFileInfo();
|
reader.loadFileInfo();
|
||||||
KeyValueScanner scanner = reader.getStoreFileScanner(true, true, false, 0, 0, false);
|
KeyValueScanner scanner = reader.getStoreFileScanner(true, true,
|
||||||
|
false, hsf.getMaxMemStoreTS(), 0, false);
|
||||||
|
USE_TAG = reader.getHFileReader().getFileContext().isIncludesTags();
|
||||||
// run the utilities
|
// run the utilities
|
||||||
DataBlockEncodingTool comp = new DataBlockEncodingTool(compressionName);
|
DataBlockEncodingTool comp = new DataBlockEncodingTool(compressionName);
|
||||||
int majorVersion = reader.getHFileVersion();
|
int majorVersion = reader.getHFileVersion();
|
||||||
|
@ -654,7 +673,7 @@ public class DataBlockEncodingTool {
|
||||||
"Measure read throughput");
|
"Measure read throughput");
|
||||||
options.addOption(OPT_OMIT_CORRECTNESS_TEST, false,
|
options.addOption(OPT_OMIT_CORRECTNESS_TEST, false,
|
||||||
"Omit corectness tests.");
|
"Omit corectness tests.");
|
||||||
options.addOption(OPT_ENCODING_ALGORITHM, true,
|
options.addOption(OPT_COMPRESSION_ALGORITHM, true,
|
||||||
"What kind of compression algorithm use for comparison.");
|
"What kind of compression algorithm use for comparison.");
|
||||||
options.addOption(OPT_BENCHMARK_N_TIMES,
|
options.addOption(OPT_BENCHMARK_N_TIMES,
|
||||||
true, "Number of times to run each benchmark. Default value: " +
|
true, "Number of times to run each benchmark. Default value: " +
|
||||||
|
@ -680,6 +699,9 @@ public class DataBlockEncodingTool {
|
||||||
int kvLimit = Integer.MAX_VALUE;
|
int kvLimit = Integer.MAX_VALUE;
|
||||||
if (cmd.hasOption(OPT_KV_LIMIT)) {
|
if (cmd.hasOption(OPT_KV_LIMIT)) {
|
||||||
kvLimit = Integer.parseInt(cmd.getOptionValue(OPT_KV_LIMIT));
|
kvLimit = Integer.parseInt(cmd.getOptionValue(OPT_KV_LIMIT));
|
||||||
|
if (kvLimit <= 0) {
|
||||||
|
LOG.error("KV_LIMIT should not less than 1.");
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// basic argument sanity checks
|
// basic argument sanity checks
|
||||||
|
@ -692,9 +714,9 @@ public class DataBlockEncodingTool {
|
||||||
|
|
||||||
String pathName = cmd.getOptionValue(OPT_HFILE_NAME);
|
String pathName = cmd.getOptionValue(OPT_HFILE_NAME);
|
||||||
String compressionName = DEFAULT_COMPRESSION.getName();
|
String compressionName = DEFAULT_COMPRESSION.getName();
|
||||||
if (cmd.hasOption(OPT_ENCODING_ALGORITHM)) {
|
if (cmd.hasOption(OPT_COMPRESSION_ALGORITHM)) {
|
||||||
compressionName =
|
compressionName =
|
||||||
cmd.getOptionValue(OPT_ENCODING_ALGORITHM).toLowerCase(Locale.ROOT);
|
cmd.getOptionValue(OPT_COMPRESSION_ALGORITHM).toLowerCase(Locale.ROOT);
|
||||||
}
|
}
|
||||||
boolean doBenchmark = cmd.hasOption(OPT_MEASURE_THROUGHPUT);
|
boolean doBenchmark = cmd.hasOption(OPT_MEASURE_THROUGHPUT);
|
||||||
boolean doVerify = !cmd.hasOption(OPT_OMIT_CORRECTNESS_TEST);
|
boolean doVerify = !cmd.hasOption(OPT_OMIT_CORRECTNESS_TEST);
|
||||||
|
|
|
@ -0,0 +1,147 @@
|
||||||
|
/**
|
||||||
|
* Licensed to the Apache Software Foundation (ASF) under one
|
||||||
|
* or more contributor license agreements. See the NOTICE file
|
||||||
|
* distributed with this work for additional information
|
||||||
|
* regarding copyright ownership. The ASF licenses this file
|
||||||
|
* to you under the Apache License, Version 2.0 (the
|
||||||
|
* "License"); you may not use this file except in compliance
|
||||||
|
* with the License. You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
* See the License for the specific language governing permissions and
|
||||||
|
* limitations under the License.
|
||||||
|
*/
|
||||||
|
package org.apache.hadoop.hbase.regionserver;
|
||||||
|
|
||||||
|
import java.io.IOException;
|
||||||
|
|
||||||
|
import org.apache.hadoop.conf.Configuration;
|
||||||
|
import org.apache.hadoop.fs.FileSystem;
|
||||||
|
import org.apache.hadoop.fs.Path;
|
||||||
|
import org.apache.hadoop.hbase.ArrayBackedTag;
|
||||||
|
import org.apache.hadoop.hbase.HBaseClassTestRule;
|
||||||
|
import org.apache.hadoop.hbase.HBaseConfiguration;
|
||||||
|
import org.apache.hadoop.hbase.HBaseTestingUtility;
|
||||||
|
import org.apache.hadoop.hbase.KeyValue;
|
||||||
|
import org.apache.hadoop.hbase.Tag;
|
||||||
|
import org.apache.hadoop.hbase.io.compress.Compression;
|
||||||
|
import org.apache.hadoop.hbase.io.hfile.HFileContext;
|
||||||
|
import org.apache.hadoop.hbase.io.hfile.HFileContextBuilder;
|
||||||
|
import org.apache.hadoop.hbase.testclassification.MiscTests;
|
||||||
|
import org.apache.hadoop.hbase.testclassification.SmallTests;
|
||||||
|
import org.apache.hadoop.hbase.util.Bytes;
|
||||||
|
import org.junit.Before;
|
||||||
|
import org.junit.ClassRule;
|
||||||
|
import org.junit.Test;
|
||||||
|
import org.junit.experimental.categories.Category;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Test DataBlockEncodingTool.
|
||||||
|
*/
|
||||||
|
@Category({MiscTests.class, SmallTests.class})
|
||||||
|
public class TestDataBlockEncodingTool {
|
||||||
|
|
||||||
|
@ClassRule
|
||||||
|
public static final HBaseClassTestRule CLASS_RULE =
|
||||||
|
HBaseClassTestRule.forClass(TestDataBlockEncodingTool.class);
|
||||||
|
|
||||||
|
private static final HBaseTestingUtility TEST_UTIL = new HBaseTestingUtility();
|
||||||
|
private static final String ROOT_DIR =
|
||||||
|
TEST_UTIL.getDataTestDir("TestDataBlockEncodingTool").toString();
|
||||||
|
private static final Configuration conf = TEST_UTIL.getConfiguration();
|
||||||
|
private static FileSystem fs;
|
||||||
|
private static StoreFileWriter sfw;
|
||||||
|
|
||||||
|
@Before
|
||||||
|
public void setUp() throws IOException {
|
||||||
|
fs = TEST_UTIL.getTestFileSystem();
|
||||||
|
}
|
||||||
|
|
||||||
|
private void testHFile(String fileName, boolean useTags, boolean allTags) throws IOException {
|
||||||
|
Path path = new Path(ROOT_DIR, fileName);
|
||||||
|
try {
|
||||||
|
createHFileWithTags(path, useTags, allTags);
|
||||||
|
testDataBlockingTool(path);
|
||||||
|
} finally {
|
||||||
|
if (fs.exists(path)) {
|
||||||
|
fs.delete(path, false);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
private void createHFileWithTags(Path path, boolean useTags, boolean allTags) throws IOException {
|
||||||
|
HFileContext meta = new HFileContextBuilder()
|
||||||
|
.withBlockSize(64 * 1024)
|
||||||
|
.withIncludesTags(useTags).build();
|
||||||
|
sfw =
|
||||||
|
new StoreFileWriter.Builder(conf, fs)
|
||||||
|
.withFilePath(path)
|
||||||
|
.withFileContext(meta).build();
|
||||||
|
long now = System.currentTimeMillis();
|
||||||
|
byte[] FAMILY = Bytes.toBytes("cf");
|
||||||
|
byte[] QUALIFIER = Bytes.toBytes("q");
|
||||||
|
try {
|
||||||
|
for (char d = 'a'; d <= 'z'; d++) {
|
||||||
|
for (char e = 'a'; e <= 'z'; e++) {
|
||||||
|
byte[] b = new byte[] { (byte) d, (byte) e };
|
||||||
|
KeyValue kv;
|
||||||
|
if (useTags) {
|
||||||
|
if (allTags) {
|
||||||
|
// Write cells with tags to HFile.
|
||||||
|
Tag[] tags = new Tag[]{
|
||||||
|
new ArrayBackedTag((byte) 0, Bytes.toString(b)),
|
||||||
|
new ArrayBackedTag((byte) 0, Bytes.toString(b))};
|
||||||
|
kv = new KeyValue(b, FAMILY, QUALIFIER, now, b, tags);
|
||||||
|
} else {
|
||||||
|
// Write half cells with tags and half without tags to HFile.
|
||||||
|
if ((e - 'a') % 2 == 0) {
|
||||||
|
kv = new KeyValue(b, FAMILY, QUALIFIER, now, b);
|
||||||
|
} else {
|
||||||
|
Tag[] tags = new Tag[]{
|
||||||
|
new ArrayBackedTag((byte) 0, Bytes.toString(b)),
|
||||||
|
new ArrayBackedTag((byte) 0, Bytes.toString(b))};
|
||||||
|
kv = new KeyValue(b, FAMILY, QUALIFIER, now, b, tags);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
// Write cells without tags to HFile.
|
||||||
|
kv = new KeyValue(b, FAMILY, QUALIFIER, now, b);
|
||||||
|
}
|
||||||
|
sfw.append(kv);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
sfw.appendMetadata(0, false);
|
||||||
|
} finally {
|
||||||
|
sfw.close();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
private static void testDataBlockingTool(Path path) throws IOException {
|
||||||
|
Configuration conf = HBaseConfiguration.create();
|
||||||
|
int maxKV = Integer.MAX_VALUE;
|
||||||
|
boolean doVerify = true;
|
||||||
|
boolean doBenchmark = true;
|
||||||
|
String testHFilePath = path.toString();
|
||||||
|
DataBlockEncodingTool.testCodecs(conf, maxKV, testHFilePath,
|
||||||
|
Compression.Algorithm.GZ.getName(), doBenchmark, doVerify);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void testHFileAllCellsWithTags() throws IOException {
|
||||||
|
testHFile("1234567890", true, true);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void testHFileAllCellsWithoutTags() throws IOException {
|
||||||
|
testHFile("1234567089", false, false);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void testHFileHalfCellsWithTags() throws IOException {
|
||||||
|
testHFile("1234560789", true, false);
|
||||||
|
}
|
||||||
|
}
|
|
@ -1005,6 +1005,30 @@ drop_namespace 'pre_upgrade_cleanup'
|
||||||
For further information, please refer to
|
For further information, please refer to
|
||||||
link:https://issues.apache.org/jira/browse/HBASE-20649?focusedCommentId=16535476#comment-16535476[HBASE-20649].
|
link:https://issues.apache.org/jira/browse/HBASE-20649?focusedCommentId=16535476#comment-16535476[HBASE-20649].
|
||||||
|
|
||||||
|
=== Data Block Encoding Tool
|
||||||
|
|
||||||
|
Tests various compression algorithms with different data block encoder for key compression on an existing HFile.
|
||||||
|
Useful for testing, debugging and benchmarking.
|
||||||
|
|
||||||
|
You must specify `-f` which is the full path of the HFile.
|
||||||
|
|
||||||
|
The result shows both the performance (MB/s) of compression/decompression and encoding/decoding, and the data savings on the HFile.
|
||||||
|
|
||||||
|
----
|
||||||
|
|
||||||
|
$ bin/hbase org.apache.hadoop.hbase.regionserver.DataBlockEncodingTool
|
||||||
|
Usages: hbase org.apache.hadoop.hbase.regionserver.DataBlockEncodingTool
|
||||||
|
Options:
|
||||||
|
-f HFile to analyse (REQUIRED)
|
||||||
|
-n Maximum number of key/value pairs to process in a single benchmark run.
|
||||||
|
-b Whether to run a benchmark to measure read throughput.
|
||||||
|
-c If this is specified, no correctness testing will be done.
|
||||||
|
-a What kind of compression algorithm use for test. Default value: GZ.
|
||||||
|
-t Number of times to run each benchmark. Default value: 12.
|
||||||
|
-omit Number of first runs of every benchmark to omit from statistics. Default value: 2.
|
||||||
|
|
||||||
|
----
|
||||||
|
|
||||||
[[ops.regionmgt]]
|
[[ops.regionmgt]]
|
||||||
== Region Management
|
== Region Management
|
||||||
|
|
||||||
|
|
Loading…
Reference in New Issue