version 2
This commit is contained in:
parent
6906fc5e89
commit
d70022e144
|
@ -33,7 +33,7 @@ public enum IndexBlockEncoding {
|
||||||
/** Disable index block encoding. */
|
/** Disable index block encoding. */
|
||||||
NONE(0, null),
|
NONE(0, null),
|
||||||
// id 1 is reserved for the PREFIX_TREE algorithm to be added later
|
// id 1 is reserved for the PREFIX_TREE algorithm to be added later
|
||||||
PREFIX_TREE(1, "org.apache.hadoop.hbase.io.encoding.PrefixTreeIndexBlockEncoder");
|
PREFIX_TREE(1, "org.apache.hadoop.hbase.io.encoding.PrefixTreeIndexBlockEncoderV2");
|
||||||
|
|
||||||
private final short id;
|
private final short id;
|
||||||
private final byte[] idInBytes;
|
private final byte[] idInBytes;
|
||||||
|
|
|
@ -0,0 +1,239 @@
|
||||||
|
/*
|
||||||
|
* Licensed to the Apache Software Foundation (ASF) under one
|
||||||
|
* or more contributor license agreements. See the NOTICE file
|
||||||
|
* distributed with this work for additional information
|
||||||
|
* regarding copyright ownership. The ASF licenses this file
|
||||||
|
* to you under the Apache License, Version 2.0 (the
|
||||||
|
* "License"); you may not use this file except in compliance
|
||||||
|
* with the License. You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
* See the License for the specific language governing permissions and
|
||||||
|
* limitations under the License.
|
||||||
|
*/
|
||||||
|
package org.apache.hadoop.hbase.io.encoding;
|
||||||
|
|
||||||
|
import java.io.DataOutput;
|
||||||
|
import java.io.IOException;
|
||||||
|
import java.nio.ByteBuffer;
|
||||||
|
import java.util.ArrayList;
|
||||||
|
import java.util.List;
|
||||||
|
import org.apache.hadoop.hbase.Cell;
|
||||||
|
import org.apache.hadoop.hbase.CellComparator;
|
||||||
|
import org.apache.hadoop.hbase.KeyValue;
|
||||||
|
import org.apache.hadoop.hbase.PrivateCellUtil;
|
||||||
|
import org.apache.hadoop.hbase.io.ByteArrayOutputStream;
|
||||||
|
import org.apache.hadoop.hbase.io.encoding.PrefixTreeUtil.PrefixTreeDataWidth;
|
||||||
|
import org.apache.hadoop.hbase.io.encoding.PrefixTreeUtil.TokenizerNode;
|
||||||
|
import org.apache.hadoop.hbase.io.util.UFIntTool;
|
||||||
|
import org.apache.hadoop.hbase.nio.ByteBuff;
|
||||||
|
import org.apache.hadoop.hbase.util.Bytes;
|
||||||
|
import org.apache.hadoop.hbase.util.ClassSize;
|
||||||
|
import org.apache.hadoop.hbase.util.ObjectIntPair;
|
||||||
|
import org.apache.yetus.audience.InterfaceAudience;
|
||||||
|
|
||||||
|
@InterfaceAudience.Private
|
||||||
|
public class PrefixTreeIndexBlockEncoderV2 implements IndexBlockEncoder {
|
||||||
|
private static byte VERSION = 0;
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public void startBlockEncoding(boolean rootIndexBlock, DataOutput out) throws IOException {
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public void encode(List<byte[]> blockKeys, List<Long> blockOffsets, List<Integer> onDiskDataSizes,
|
||||||
|
DataOutput out) throws IOException {
|
||||||
|
List<KeyValue.KeyOnlyKeyValue> rowKeys = new ArrayList<>(blockKeys.size());
|
||||||
|
for (int i = 0; i < blockKeys.size(); i++) {
|
||||||
|
byte[] key = blockKeys.get(i);
|
||||||
|
KeyValue.KeyOnlyKeyValue rowKey = new KeyValue.KeyOnlyKeyValue(key, 0, key.length);
|
||||||
|
rowKeys.add(rowKey);
|
||||||
|
}
|
||||||
|
|
||||||
|
TokenizerNode node = PrefixTreeUtilV2.buildPrefixTree(rowKeys);
|
||||||
|
PrefixTreeDataWidth dataWidth = new PrefixTreeDataWidth();
|
||||||
|
ByteArrayOutputStream outputStream = new ByteArrayOutputStream();
|
||||||
|
PrefixTreeUtilV2.serializePrefixTree(node, dataWidth, outputStream);
|
||||||
|
byte[] data = outputStream.toByteArray();
|
||||||
|
|
||||||
|
out.write(VERSION);
|
||||||
|
PrefixTreeUtilV2.serialize(out, dataWidth);
|
||||||
|
out.writeInt(blockKeys.size());
|
||||||
|
out.writeInt(data.length);
|
||||||
|
out.write(data);
|
||||||
|
|
||||||
|
long minBlockOffset = blockOffsets.get(0);
|
||||||
|
long maxBlockOffset = blockOffsets.get(blockOffsets.size() - 1);
|
||||||
|
int minOnDiskDataSize = Integer.MAX_VALUE;
|
||||||
|
int maxOnDiskDataSize = Integer.MIN_VALUE;
|
||||||
|
for (int i = 0; i < onDiskDataSizes.size(); ++i) {
|
||||||
|
if (minOnDiskDataSize > onDiskDataSizes.get(i)) {
|
||||||
|
minOnDiskDataSize = onDiskDataSizes.get(i);
|
||||||
|
}
|
||||||
|
if (maxOnDiskDataSize < onDiskDataSizes.get(i)) {
|
||||||
|
maxOnDiskDataSize = onDiskDataSizes.get(i);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
int blockOffsetWidth = UFIntTool.numBytes(maxBlockOffset - minBlockOffset);
|
||||||
|
int onDiskDataSizeWidth = UFIntTool.numBytes(maxOnDiskDataSize - minOnDiskDataSize);
|
||||||
|
|
||||||
|
out.write(blockOffsetWidth);
|
||||||
|
out.write(onDiskDataSizeWidth);
|
||||||
|
out.writeLong(minBlockOffset);
|
||||||
|
out.writeInt(minOnDiskDataSize);
|
||||||
|
|
||||||
|
outputStream.reset();
|
||||||
|
for (int i = 0; i < blockOffsets.size(); ++i) {
|
||||||
|
UFIntTool.writeBytes(blockOffsetWidth, (blockOffsets.get(i) - minBlockOffset), outputStream);
|
||||||
|
UFIntTool.writeBytes(onDiskDataSizeWidth, (onDiskDataSizes.get(i) - minOnDiskDataSize),
|
||||||
|
outputStream);
|
||||||
|
}
|
||||||
|
data = outputStream.toByteArray();
|
||||||
|
out.write(data);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public void endBlockEncoding(DataOutput out) throws IOException {
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public IndexEncodedSeeker createSeeker() {
|
||||||
|
return new PrefixTreeIndexBlockEncodedSeeker();
|
||||||
|
}
|
||||||
|
|
||||||
|
static class PrefixTreeIndexBlockEncodedSeeker implements IndexEncodedSeeker {
|
||||||
|
|
||||||
|
private PrefixTreeDataWidth dataWidth = new PrefixTreeDataWidth();
|
||||||
|
private ByteBuffer prefixTreeNodeData = null;
|
||||||
|
private ByteBuffer blockOffsetAndSizeData = null;
|
||||||
|
private int blockOffsetWidth;
|
||||||
|
private int onDiskDataSizeWidth;
|
||||||
|
private long minBlockOffset;
|
||||||
|
private int minOnDiskDataSize;
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public long heapSize() {
|
||||||
|
long heapSize = ClassSize.align(ClassSize.OBJECT);
|
||||||
|
|
||||||
|
if (prefixTreeNodeData != null) {
|
||||||
|
heapSize += ClassSize.align(ClassSize.BYTE_BUFFER + prefixTreeNodeData.capacity());
|
||||||
|
}
|
||||||
|
if (blockOffsetAndSizeData != null) {
|
||||||
|
heapSize += ClassSize.align(ClassSize.BYTE_BUFFER + blockOffsetAndSizeData.capacity());
|
||||||
|
}
|
||||||
|
|
||||||
|
// dataWidth
|
||||||
|
heapSize += ClassSize.REFERENCE;
|
||||||
|
// blockOffsetWidth onDiskDataSizeWidth minOnDiskDataSize
|
||||||
|
heapSize += 3 * Bytes.SIZEOF_INT;
|
||||||
|
// PrefixTreeDataWidth's data.
|
||||||
|
heapSize += 5 * Bytes.SIZEOF_INT;
|
||||||
|
// minBlockOffset
|
||||||
|
heapSize += Bytes.SIZEOF_LONG;
|
||||||
|
return ClassSize.align(heapSize);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public void initRootIndex(ByteBuff data, int numEntries, CellComparator comparator,
|
||||||
|
int treeLevel) throws IOException {
|
||||||
|
byte version = data.get();
|
||||||
|
if (version != VERSION) {
|
||||||
|
throw new IOException("Corrupted data, version should be 0, but it is " + version);
|
||||||
|
}
|
||||||
|
PrefixTreeUtilV2.deserialize(data, dataWidth);
|
||||||
|
int numEntry = data.getInt();
|
||||||
|
int prefixNodeLength = data.getInt();
|
||||||
|
|
||||||
|
ObjectIntPair<ByteBuffer> tmpPair = new ObjectIntPair<>();
|
||||||
|
data.asSubByteBuffer(data.position(), prefixNodeLength, tmpPair);
|
||||||
|
ByteBuffer dup = tmpPair.getFirst().duplicate();
|
||||||
|
dup.position(tmpPair.getSecond());
|
||||||
|
dup.limit(tmpPair.getSecond() + prefixNodeLength);
|
||||||
|
prefixTreeNodeData = dup.slice();
|
||||||
|
|
||||||
|
data.skip(prefixNodeLength);
|
||||||
|
blockOffsetWidth = data.get();
|
||||||
|
onDiskDataSizeWidth = data.get();
|
||||||
|
minBlockOffset = data.getLong();
|
||||||
|
minOnDiskDataSize = data.getInt();
|
||||||
|
int blockOffsetsAndonDiskDataSize = numEntry * (blockOffsetWidth + onDiskDataSizeWidth);
|
||||||
|
|
||||||
|
data.asSubByteBuffer(data.position(), blockOffsetsAndonDiskDataSize, tmpPair);
|
||||||
|
dup = tmpPair.getFirst().duplicate();
|
||||||
|
dup.position(tmpPair.getSecond());
|
||||||
|
dup.limit(tmpPair.getSecond() + blockOffsetsAndonDiskDataSize);
|
||||||
|
blockOffsetAndSizeData = dup.slice();
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public Cell getRootBlockKey(int i) {
|
||||||
|
byte[] row = PrefixTreeUtilV2.get(prefixTreeNodeData, 0, dataWidth, i);
|
||||||
|
return PrivateCellUtil.createFirstOnRow(row);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public int rootBlockContainingKey(Cell key) {
|
||||||
|
return PrefixTreeUtilV2.search(prefixTreeNodeData, 0, key, 0, dataWidth);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public long rootBlockBlockOffsets(int rootLevelIndex) {
|
||||||
|
int pos = rootLevelIndex * (blockOffsetWidth + onDiskDataSizeWidth);
|
||||||
|
return UFIntTool.fromBytes(blockOffsetAndSizeData, pos, blockOffsetWidth) + minBlockOffset;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public int rootBlockOnDiskDataSizes(int rootLevelIndex) {
|
||||||
|
int pos = rootLevelIndex * (blockOffsetWidth + onDiskDataSizeWidth);
|
||||||
|
int currentOnDiskSize = (int) UFIntTool.fromBytes(blockOffsetAndSizeData,
|
||||||
|
pos + blockOffsetWidth, onDiskDataSizeWidth) + minOnDiskDataSize;
|
||||||
|
return currentOnDiskSize;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public SearchResult locateNonRootIndexEntry(ByteBuff nonRootBlock, Cell key)
|
||||||
|
throws IOException {
|
||||||
|
PrefixTreeDataWidth meta = new PrefixTreeDataWidth();
|
||||||
|
byte version = nonRootBlock.get();
|
||||||
|
if (version != VERSION) {
|
||||||
|
throw new IOException("Corrupted data, version should be 0, but it is " + version);
|
||||||
|
}
|
||||||
|
PrefixTreeUtilV2.deserialize(nonRootBlock, meta);
|
||||||
|
int numEntry = nonRootBlock.getInt();
|
||||||
|
int prefixNodeLength = nonRootBlock.getInt();
|
||||||
|
|
||||||
|
ObjectIntPair<ByteBuffer> tmpPair = new ObjectIntPair<>();
|
||||||
|
nonRootBlock.asSubByteBuffer(nonRootBlock.position(), prefixNodeLength, tmpPair);
|
||||||
|
ByteBuffer dup = tmpPair.getFirst().duplicate();
|
||||||
|
dup.position(tmpPair.getSecond());
|
||||||
|
dup.limit(tmpPair.getSecond() + prefixNodeLength);
|
||||||
|
ByteBuffer prefixTreeNodeData = dup.slice();
|
||||||
|
|
||||||
|
nonRootBlock.skip(prefixNodeLength);
|
||||||
|
|
||||||
|
int entryIndex = PrefixTreeUtilV2.search(prefixTreeNodeData, 0, key, 0, meta);
|
||||||
|
SearchResult result = new SearchResult();
|
||||||
|
result.entryIndex = entryIndex;
|
||||||
|
|
||||||
|
if (entryIndex >= 0 && entryIndex < numEntry) {
|
||||||
|
int blockOffsetWidth = nonRootBlock.get();
|
||||||
|
int onDiskDataSizeWidth = nonRootBlock.get();
|
||||||
|
long minBlockOffset = nonRootBlock.getLong();
|
||||||
|
int minOnDiskDataSize = nonRootBlock.getInt();
|
||||||
|
|
||||||
|
int pos = nonRootBlock.position() + entryIndex * (blockOffsetWidth + onDiskDataSizeWidth);
|
||||||
|
result.offset = UFIntTool.fromBytes(nonRootBlock, pos, blockOffsetWidth) + minBlockOffset;
|
||||||
|
result.onDiskSize =
|
||||||
|
(int) UFIntTool.fromBytes(nonRootBlock, pos + blockOffsetWidth, onDiskDataSizeWidth)
|
||||||
|
+ minOnDiskDataSize;
|
||||||
|
}
|
||||||
|
|
||||||
|
return result;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
|
@ -172,8 +172,8 @@ public class PrefixTreeUtil {
|
||||||
meta.totalNodeDataLength += node.nodeData.length;
|
meta.totalNodeDataLength += node.nodeData.length;
|
||||||
meta.countNodeDataNum++;
|
meta.countNodeDataNum++;
|
||||||
|
|
||||||
if (node.children.size() > meta.maxChildNum) {
|
if (node.children.size() > meta.maxFanOut) {
|
||||||
meta.maxChildNum = node.children.size();
|
meta.maxFanOut = node.children.size();
|
||||||
}
|
}
|
||||||
meta.totalChildNum += node.children.size();
|
meta.totalChildNum += node.children.size();
|
||||||
meta.countChildNum++;
|
meta.countChildNum++;
|
||||||
|
@ -191,7 +191,7 @@ public class PrefixTreeUtil {
|
||||||
}
|
}
|
||||||
if (node.children.isEmpty()) {
|
if (node.children.isEmpty()) {
|
||||||
meta.leafNodes.add(node);
|
meta.leafNodes.add(node);
|
||||||
meta.countIndexNum++;
|
meta.totalIndexNum++;
|
||||||
} else {
|
} else {
|
||||||
meta.nonLeafNodes.add(node);
|
meta.nonLeafNodes.add(node);
|
||||||
}
|
}
|
||||||
|
@ -209,7 +209,7 @@ public class PrefixTreeUtil {
|
||||||
totalLength += meta.totalNodeDataLength;
|
totalLength += meta.totalNodeDataLength;
|
||||||
totalLength += dataWidth.nodeDataLengthWidth * meta.countNodeDataNum;
|
totalLength += dataWidth.nodeDataLengthWidth * meta.countNodeDataNum;
|
||||||
|
|
||||||
dataWidth.fanOutWidth = UFIntTool.numBytes(meta.maxChildNum);
|
dataWidth.fanOutWidth = UFIntTool.numBytes(meta.maxFanOut);
|
||||||
// fan Out
|
// fan Out
|
||||||
totalLength += dataWidth.fanOutWidth * meta.countChildNum;
|
totalLength += dataWidth.fanOutWidth * meta.countChildNum;
|
||||||
// fan Byte
|
// fan Byte
|
||||||
|
@ -222,7 +222,7 @@ public class PrefixTreeUtil {
|
||||||
totalLength += dataWidth.occurrencesWidth * meta.countNumOccurrences;
|
totalLength += dataWidth.occurrencesWidth * meta.countNumOccurrences;
|
||||||
|
|
||||||
dataWidth.indexWidth = UFIntTool.numBytes(meta.maxIndex);
|
dataWidth.indexWidth = UFIntTool.numBytes(meta.maxIndex);
|
||||||
totalLength += dataWidth.indexWidth * meta.countIndexNum;
|
totalLength += dataWidth.indexWidth * meta.totalIndexNum;
|
||||||
|
|
||||||
dataWidth.childNodeOffsetWidth = UFIntTool.numBytes(totalLength);
|
dataWidth.childNodeOffsetWidth = UFIntTool.numBytes(totalLength);
|
||||||
|
|
||||||
|
@ -525,6 +525,9 @@ public class PrefixTreeUtil {
|
||||||
throw new IllegalStateException("Unexpected unable to find index=" + index);
|
throw new IllegalStateException("Unexpected unable to find index=" + index);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Used only when serialize for build the prefix tree.
|
||||||
|
*/
|
||||||
public static class TokenizerNode {
|
public static class TokenizerNode {
|
||||||
|
|
||||||
public byte[] nodeData = null;
|
public byte[] nodeData = null;
|
||||||
|
@ -549,6 +552,9 @@ public class PrefixTreeUtil {
|
||||||
|
|
||||||
public List<KeyValue.KeyOnlyKeyValue> keys = null;
|
public List<KeyValue.KeyOnlyKeyValue> keys = null;
|
||||||
|
|
||||||
|
public int qualifierLength = 0;
|
||||||
|
public int qualifierNum = 0;
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* A positive value indicating how many bytes before the end of the block this node will start.
|
* A positive value indicating how many bytes before the end of the block this node will start.
|
||||||
* If the section is 55 bytes and negativeOffset is 9, then the node will start at 46.
|
* If the section is 55 bytes and negativeOffset is 9, then the node will start at 46.
|
||||||
|
@ -564,7 +570,7 @@ public class PrefixTreeUtil {
|
||||||
public int totalNodeDataLength = 0;
|
public int totalNodeDataLength = 0;
|
||||||
public int countNodeDataNum = 0;
|
public int countNodeDataNum = 0;
|
||||||
|
|
||||||
public int maxChildNum = 0;
|
public int maxFanOut = 0;
|
||||||
public int totalChildNum = 0;
|
public int totalChildNum = 0;
|
||||||
public int countChildNum = 0;
|
public int countChildNum = 0;
|
||||||
|
|
||||||
|
@ -572,7 +578,11 @@ public class PrefixTreeUtil {
|
||||||
public int countNumOccurrences = 0;
|
public int countNumOccurrences = 0;
|
||||||
|
|
||||||
public int maxIndex = 0;
|
public int maxIndex = 0;
|
||||||
public int countIndexNum = 0;
|
public int totalIndexNum = 0;
|
||||||
|
|
||||||
|
public int maxQualifierLength = 0;
|
||||||
|
public int countQualifierNum = 0;
|
||||||
|
public int totalQualifierLength = 0;
|
||||||
|
|
||||||
public ArrayList<TokenizerNode> nonLeafNodes = new ArrayList<>();
|
public ArrayList<TokenizerNode> nonLeafNodes = new ArrayList<>();
|
||||||
|
|
||||||
|
@ -589,5 +599,7 @@ public class PrefixTreeUtil {
|
||||||
public int indexWidth = 0;
|
public int indexWidth = 0;
|
||||||
|
|
||||||
public int childNodeOffsetWidth = 0;
|
public int childNodeOffsetWidth = 0;
|
||||||
|
|
||||||
|
public int qualifierLengthWidth = 0;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -0,0 +1,848 @@
|
||||||
|
/*
|
||||||
|
* Licensed to the Apache Software Foundation (ASF) under one
|
||||||
|
* or more contributor license agreements. See the NOTICE file
|
||||||
|
* distributed with this work for additional information
|
||||||
|
* regarding copyright ownership. The ASF licenses this file
|
||||||
|
* to you under the Apache License, Version 2.0 (the
|
||||||
|
* "License"); you may not use this file except in compliance
|
||||||
|
* with the License. You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
* See the License for the specific language governing permissions and
|
||||||
|
* limitations under the License.
|
||||||
|
*/
|
||||||
|
package org.apache.hadoop.hbase.io.encoding;
|
||||||
|
|
||||||
|
import java.io.DataOutput;
|
||||||
|
import java.io.IOException;
|
||||||
|
import java.nio.ByteBuffer;
|
||||||
|
import java.util.ArrayList;
|
||||||
|
import java.util.List;
|
||||||
|
import org.apache.hadoop.hbase.ByteBufferExtendedCell;
|
||||||
|
import org.apache.hadoop.hbase.Cell;
|
||||||
|
import org.apache.hadoop.hbase.KeyValue;
|
||||||
|
import org.apache.hadoop.hbase.io.ByteArrayOutputStream;
|
||||||
|
import org.apache.hadoop.hbase.io.encoding.PrefixTreeUtil.PrefixTreeDataWidth;
|
||||||
|
import org.apache.hadoop.hbase.io.encoding.PrefixTreeUtil.TokenizerNode;
|
||||||
|
import org.apache.hadoop.hbase.io.encoding.PrefixTreeUtil.TokenizerNodeMeta;
|
||||||
|
import org.apache.hadoop.hbase.io.util.StreamUtils;
|
||||||
|
import org.apache.hadoop.hbase.io.util.UFIntTool;
|
||||||
|
import org.apache.hadoop.hbase.nio.ByteBuff;
|
||||||
|
import org.apache.hadoop.hbase.util.ByteBufferUtils;
|
||||||
|
import org.apache.hadoop.hbase.util.Bytes;
|
||||||
|
import org.apache.yetus.audience.InterfaceAudience;
|
||||||
|
import org.slf4j.Logger;
|
||||||
|
import org.slf4j.LoggerFactory;
|
||||||
|
|
||||||
|
@InterfaceAudience.Private
|
||||||
|
public class PrefixTreeUtilV2 {
|
||||||
|
|
||||||
|
private static final Logger LOG = LoggerFactory.getLogger(PrefixTreeUtilV2.class);
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Build tree from begin
|
||||||
|
* @return the tree
|
||||||
|
*/
|
||||||
|
public static TokenizerNode buildPrefixTree(List<KeyValue.KeyOnlyKeyValue> rowKeys) {
|
||||||
|
// root node.
|
||||||
|
TokenizerNode node = new TokenizerNode();
|
||||||
|
int start = 0;
|
||||||
|
// Get max common prefix
|
||||||
|
int common = maxCommonPrefix(rowKeys, 0, rowKeys.size() - 1, 0);
|
||||||
|
if (common > 0) {
|
||||||
|
byte[] commonB =
|
||||||
|
Bytes.copy(rowKeys.get(0).getRowArray(), rowKeys.get(0).getRowOffset(), common);
|
||||||
|
node.nodeData = commonB;
|
||||||
|
for (int i = 0; i < rowKeys.size(); i++) {
|
||||||
|
if (rowKeys.get(i).getRowLength() == common) {
|
||||||
|
node.numOccurrences++;
|
||||||
|
if (node.index == null) {
|
||||||
|
node.index = new ArrayList<>(1);
|
||||||
|
}
|
||||||
|
node.index.add(i);
|
||||||
|
if (node.keys == null) {
|
||||||
|
node.keys = new ArrayList<>(1);
|
||||||
|
}
|
||||||
|
node.keys.add(rowKeys.get(i));
|
||||||
|
start = i + 1;
|
||||||
|
} else {
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
// Only root node data can be empty.
|
||||||
|
node.nodeData = new byte[0];
|
||||||
|
}
|
||||||
|
if (start <= rowKeys.size() - 1) {
|
||||||
|
constructAndSplitChild(node, rowKeys, start, rowKeys.size() - 1, common);
|
||||||
|
}
|
||||||
|
return node;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Calculate max common prefix
|
||||||
|
* @return the max common prefix num bytes
|
||||||
|
*/
|
||||||
|
static int maxCommonPrefix(List<KeyValue.KeyOnlyKeyValue> rowKeys, int start, int end,
|
||||||
|
int startPos) {
|
||||||
|
// only one entry.
|
||||||
|
if (start == end) {
|
||||||
|
return rowKeys.get(start).getRowLength() - startPos;
|
||||||
|
}
|
||||||
|
int common = 0;
|
||||||
|
KeyValue.KeyOnlyKeyValue startRowKey = rowKeys.get(start);
|
||||||
|
for (int round = 0; round <= startRowKey.getRowLength() - startPos - 1; round++) {
|
||||||
|
boolean same = true;
|
||||||
|
for (int i = start + 1; i <= end; i++) {
|
||||||
|
KeyValue.KeyOnlyKeyValue rowKey = rowKeys.get(i);
|
||||||
|
if (startPos + common > rowKey.getRowLength() - 1) {
|
||||||
|
same = false;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
if (
|
||||||
|
startRowKey.getRowArray()[startRowKey.getRowOffset() + startPos + common]
|
||||||
|
!= rowKey.getRowArray()[rowKey.getRowOffset() + startPos + common]
|
||||||
|
) {
|
||||||
|
same = false;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if (same) {
|
||||||
|
common++;
|
||||||
|
} else {
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return common;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* No common prefix split it.
|
||||||
|
*/
|
||||||
|
static void constructAndSplitChild(TokenizerNode node, List<KeyValue.KeyOnlyKeyValue> rowKeys,
|
||||||
|
int start, int end, int startPos) {
|
||||||
|
int middle = start;
|
||||||
|
KeyValue.KeyOnlyKeyValue startRowKey = rowKeys.get(start);
|
||||||
|
for (int i = start + 1; i <= end; i++) {
|
||||||
|
if (startPos > rowKeys.get(i).getRowLength() - 1) {
|
||||||
|
middle = i - 1;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
KeyValue.KeyOnlyKeyValue rowKey = rowKeys.get(i);
|
||||||
|
if (
|
||||||
|
startRowKey.getRowArray()[startRowKey.getRowOffset() + startPos]
|
||||||
|
!= rowKey.getRowArray()[rowKey.getRowOffset() + startPos]
|
||||||
|
) {
|
||||||
|
middle = i - 1;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
if (i == end) {
|
||||||
|
middle = end;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
constructCommonNodeAndChild(node, rowKeys, start, middle, startPos);
|
||||||
|
if (middle + 1 <= end) {
|
||||||
|
// right
|
||||||
|
constructCommonNodeAndChild(node, rowKeys, middle + 1, end, startPos);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Get max common prefix as node and build children.
|
||||||
|
*/
|
||||||
|
static TokenizerNode constructCommonNodeAndChild(TokenizerNode node,
|
||||||
|
List<KeyValue.KeyOnlyKeyValue> rowKeys, int start, int end, int startPos) {
|
||||||
|
int common = maxCommonPrefix(rowKeys, start, end, startPos);
|
||||||
|
if (common > 0) {
|
||||||
|
TokenizerNode child = new TokenizerNode();
|
||||||
|
child.parent = node;
|
||||||
|
node.children.add(child);
|
||||||
|
byte[] commonB = Bytes.copy(rowKeys.get(start).getRowArray(),
|
||||||
|
rowKeys.get(start).getRowOffset() + startPos, common);
|
||||||
|
child.nodeData = commonB;
|
||||||
|
int newStart = start;
|
||||||
|
for (int i = start; i <= end; i++) {
|
||||||
|
if (rowKeys.get(i).getRowLength() == (startPos + common)) {
|
||||||
|
child.numOccurrences++;
|
||||||
|
if (child.index == null) {
|
||||||
|
child.index = new ArrayList<>(1);
|
||||||
|
}
|
||||||
|
child.index.add(i);
|
||||||
|
if (child.keys == null) {
|
||||||
|
child.keys = new ArrayList<>(1);
|
||||||
|
}
|
||||||
|
child.keys.add(rowKeys.get(i));
|
||||||
|
newStart = i + 1;
|
||||||
|
} else {
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if (start != end && newStart <= end) {
|
||||||
|
if (newStart == start) {
|
||||||
|
// no common prefix.
|
||||||
|
constructAndSplitChild(child, rowKeys, newStart, end, startPos + common);
|
||||||
|
} else {
|
||||||
|
// can have common prefix.
|
||||||
|
constructCommonNodeAndChild(child, rowKeys, newStart, end, startPos + common);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
// no common prefix, split
|
||||||
|
constructAndSplitChild(node, rowKeys, start, end, startPos);
|
||||||
|
}
|
||||||
|
return node;
|
||||||
|
}
|
||||||
|
|
||||||
|
static void getNodeMetaInfo(TokenizerNode node, TokenizerNodeMeta meta) {
|
||||||
|
if (node.nodeData.length > meta.maxNodeDataLength) {
|
||||||
|
meta.maxNodeDataLength = node.nodeData.length;
|
||||||
|
}
|
||||||
|
meta.totalNodeDataLength += node.nodeData.length;
|
||||||
|
meta.countNodeDataNum++;
|
||||||
|
|
||||||
|
if (node.children.size() > meta.maxFanOut) {
|
||||||
|
meta.maxFanOut = node.children.size();
|
||||||
|
}
|
||||||
|
meta.totalChildNum += node.children.size();
|
||||||
|
meta.countChildNum++;
|
||||||
|
|
||||||
|
if (node.numOccurrences > meta.maxNumOccurrences) {
|
||||||
|
meta.maxNumOccurrences = node.numOccurrences;
|
||||||
|
}
|
||||||
|
meta.totalIndexNum += node.numOccurrences;
|
||||||
|
meta.countNumOccurrences++;
|
||||||
|
|
||||||
|
if (node.index != null) {
|
||||||
|
for (Integer entry : node.index) {
|
||||||
|
if (entry > meta.maxIndex) {
|
||||||
|
meta.maxIndex = entry;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if (node.keys != null) {
|
||||||
|
for (KeyValue.KeyOnlyKeyValue keyValue : node.keys) {
|
||||||
|
int qualifierLength = keyValue.getQualifierLength();
|
||||||
|
if (qualifierLength > 0) {
|
||||||
|
meta.countQualifierNum++;
|
||||||
|
if (qualifierLength > meta.maxQualifierLength) {
|
||||||
|
meta.maxQualifierLength = qualifierLength;
|
||||||
|
}
|
||||||
|
meta.totalQualifierLength += qualifierLength;
|
||||||
|
node.qualifierNum++;
|
||||||
|
node.qualifierLength += qualifierLength;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if (node.children.isEmpty()) {
|
||||||
|
meta.leafNodes.add(node);
|
||||||
|
} else {
|
||||||
|
meta.nonLeafNodes.add(node);
|
||||||
|
}
|
||||||
|
for (TokenizerNode child : node.children) {
|
||||||
|
getNodeMetaInfo(child, meta);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
public static void serializePrefixTree(TokenizerNode node, PrefixTreeDataWidth dataWidth,
|
||||||
|
ByteArrayOutputStream os) throws IOException {
|
||||||
|
TokenizerNodeMeta meta = new TokenizerNodeMeta();
|
||||||
|
getNodeMetaInfo(node, meta);
|
||||||
|
|
||||||
|
dataWidth.nodeDataLengthWidth = UFIntTool.numBytes(meta.maxNodeDataLength);
|
||||||
|
dataWidth.fanOutWidth = UFIntTool.numBytes(meta.maxFanOut);
|
||||||
|
dataWidth.occurrencesWidth = UFIntTool.numBytes(meta.maxNumOccurrences * 2 + 1);
|
||||||
|
dataWidth.indexWidth = UFIntTool.numBytes(meta.maxIndex);
|
||||||
|
dataWidth.qualifierLengthWidth = UFIntTool.numBytes(meta.maxQualifierLength);
|
||||||
|
|
||||||
|
calculateSerializeInfo(meta, dataWidth);
|
||||||
|
|
||||||
|
serialize(meta, os, dataWidth);
|
||||||
|
}
|
||||||
|
|
||||||
|
static void calculateSerializeInfo(TokenizerNodeMeta meta, PrefixTreeDataWidth dataWidth) {
|
||||||
|
int totalLength = 0;
|
||||||
|
int nextNodeOffsetNum = 0;
|
||||||
|
for (TokenizerNode leafNode : meta.leafNodes) {
|
||||||
|
totalLength += dataWidth.nodeDataLengthWidth;
|
||||||
|
totalLength += leafNode.nodeData.length;
|
||||||
|
if (leafNode.parent != null) {
|
||||||
|
// exclude child's first bytes, child's first byte stored in parent node.
|
||||||
|
totalLength = totalLength - 1;
|
||||||
|
}
|
||||||
|
// fan Out
|
||||||
|
totalLength += dataWidth.fanOutWidth;
|
||||||
|
// fan Byte
|
||||||
|
totalLength += leafNode.children.size();
|
||||||
|
nextNodeOffsetNum += leafNode.children.size();
|
||||||
|
|
||||||
|
totalLength += dataWidth.occurrencesWidth;
|
||||||
|
totalLength += (leafNode.numOccurrences * dataWidth.indexWidth);
|
||||||
|
|
||||||
|
if (leafNode.qualifierNum > 0) {
|
||||||
|
// qualifier
|
||||||
|
for (int i = 0; i < leafNode.numOccurrences; i++) {
|
||||||
|
int qualifierLength = leafNode.keys.get(i).getQualifierLength();
|
||||||
|
if (qualifierLength > 0) {
|
||||||
|
totalLength += dataWidth.qualifierLengthWidth;
|
||||||
|
totalLength += qualifierLength;
|
||||||
|
totalLength += (Bytes.SIZEOF_BYTE + Bytes.SIZEOF_LONG);
|
||||||
|
} else {
|
||||||
|
totalLength += dataWidth.qualifierLengthWidth;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
for (TokenizerNode nonLeafNode : meta.nonLeafNodes) {
|
||||||
|
totalLength += dataWidth.nodeDataLengthWidth;
|
||||||
|
totalLength += nonLeafNode.nodeData.length;
|
||||||
|
if (nonLeafNode.parent != null) {
|
||||||
|
// exclude child's first bytes, child's first byte stored in parent node.
|
||||||
|
totalLength = totalLength - 1;
|
||||||
|
}
|
||||||
|
// fan Out
|
||||||
|
totalLength += dataWidth.fanOutWidth;
|
||||||
|
// fan Byte
|
||||||
|
totalLength += nonLeafNode.children.size();
|
||||||
|
nextNodeOffsetNum += nonLeafNode.children.size();
|
||||||
|
|
||||||
|
totalLength += dataWidth.occurrencesWidth;
|
||||||
|
totalLength += (nonLeafNode.numOccurrences * dataWidth.indexWidth);
|
||||||
|
|
||||||
|
if (nonLeafNode.qualifierNum > 0) {
|
||||||
|
// qualifier
|
||||||
|
for (int i = 0; i < nonLeafNode.numOccurrences; i++) {
|
||||||
|
int qualifierLength = nonLeafNode.keys.get(i).getQualifierLength();
|
||||||
|
if (qualifierLength > 0) {
|
||||||
|
totalLength += dataWidth.qualifierLengthWidth;
|
||||||
|
totalLength += qualifierLength;
|
||||||
|
totalLength += (Bytes.SIZEOF_BYTE + Bytes.SIZEOF_LONG);
|
||||||
|
} else {
|
||||||
|
totalLength += dataWidth.qualifierLengthWidth;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
int totalBytesWithoutOffsets = totalLength;
|
||||||
|
// figure out how wide our offset FInts are
|
||||||
|
int offsetWidth = 0;
|
||||||
|
while (true) {
|
||||||
|
++offsetWidth;
|
||||||
|
int numBytesFinder = totalBytesWithoutOffsets + (offsetWidth * nextNodeOffsetNum);
|
||||||
|
if (numBytesFinder < UFIntTool.maxValueForNumBytes(offsetWidth)) {
|
||||||
|
totalLength = numBytesFinder;
|
||||||
|
break;
|
||||||
|
} // it fits
|
||||||
|
}
|
||||||
|
dataWidth.childNodeOffsetWidth = offsetWidth;
|
||||||
|
|
||||||
|
// track the starting position of each node in final output
|
||||||
|
int negativeIndex = 0;
|
||||||
|
for (int i = meta.leafNodes.size() - 1; i >= 0; i--) {
|
||||||
|
TokenizerNode leaf = meta.leafNodes.get(i);
|
||||||
|
int leafNodeWidth = dataWidth.nodeDataLengthWidth + leaf.nodeData.length;
|
||||||
|
if (leaf.parent != null) {
|
||||||
|
// leaves store all but their first token byte
|
||||||
|
leafNodeWidth = leafNodeWidth - 1;
|
||||||
|
}
|
||||||
|
// leaf node, no children.
|
||||||
|
leafNodeWidth += dataWidth.fanOutWidth;
|
||||||
|
// no fanOut bytes and nextNodeOffset
|
||||||
|
// index
|
||||||
|
leafNodeWidth += dataWidth.occurrencesWidth + leaf.numOccurrences * dataWidth.indexWidth;
|
||||||
|
if (leaf.qualifierNum > 0) {
|
||||||
|
// qualifier
|
||||||
|
for (int j = 0; j < leaf.numOccurrences; j++) {
|
||||||
|
int qualifierLength = leaf.keys.get(j).getQualifierLength();
|
||||||
|
if (qualifierLength > 0) {
|
||||||
|
leafNodeWidth += dataWidth.qualifierLengthWidth;
|
||||||
|
leafNodeWidth += qualifierLength;
|
||||||
|
leafNodeWidth += (Bytes.SIZEOF_BYTE + Bytes.SIZEOF_LONG);
|
||||||
|
} else {
|
||||||
|
leafNodeWidth += dataWidth.qualifierLengthWidth;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
negativeIndex += leafNodeWidth;
|
||||||
|
leaf.nodeWidth = leafNodeWidth;
|
||||||
|
leaf.negativeIndex = negativeIndex;
|
||||||
|
}
|
||||||
|
for (int i = meta.nonLeafNodes.size() - 1; i >= 0; i--) {
|
||||||
|
TokenizerNode nonLeaf = meta.nonLeafNodes.get(i);
|
||||||
|
int leafNodeWidth = dataWidth.nodeDataLengthWidth + nonLeaf.nodeData.length;
|
||||||
|
if (nonLeaf.parent != null) {
|
||||||
|
leafNodeWidth = leafNodeWidth - 1;
|
||||||
|
}
|
||||||
|
// fanOut, children's first byte, and children's offset.
|
||||||
|
leafNodeWidth += dataWidth.fanOutWidth + nonLeaf.children.size()
|
||||||
|
+ nonLeaf.children.size() * dataWidth.childNodeOffsetWidth;
|
||||||
|
// index
|
||||||
|
leafNodeWidth += dataWidth.occurrencesWidth + nonLeaf.numOccurrences * dataWidth.indexWidth;
|
||||||
|
if (nonLeaf.qualifierNum > 0) {
|
||||||
|
// qualifier
|
||||||
|
for (int j = 0; j < nonLeaf.numOccurrences; j++) {
|
||||||
|
int qualifierLength = nonLeaf.keys.get(j).getQualifierLength();
|
||||||
|
if (qualifierLength > 0) {
|
||||||
|
leafNodeWidth += dataWidth.qualifierLengthWidth;
|
||||||
|
leafNodeWidth += qualifierLength;
|
||||||
|
leafNodeWidth += (Bytes.SIZEOF_BYTE + Bytes.SIZEOF_LONG);
|
||||||
|
} else {
|
||||||
|
leafNodeWidth += dataWidth.qualifierLengthWidth;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
negativeIndex += leafNodeWidth;
|
||||||
|
nonLeaf.nodeWidth = leafNodeWidth;
|
||||||
|
nonLeaf.negativeIndex = negativeIndex;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
static void serialize(TokenizerNodeMeta meta, ByteArrayOutputStream os,
|
||||||
|
PrefixTreeDataWidth dataWidth) throws IOException {
|
||||||
|
for (int i = 0; i < meta.nonLeafNodes.size(); i++) {
|
||||||
|
serialize(meta.nonLeafNodes.get(i), os, dataWidth);
|
||||||
|
}
|
||||||
|
for (int i = 0; i < meta.leafNodes.size(); i++) {
|
||||||
|
serialize(meta.leafNodes.get(i), os, dataWidth);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
static void serialize(TokenizerNode node, ByteArrayOutputStream os, PrefixTreeDataWidth dataWidth)
|
||||||
|
throws IOException {
|
||||||
|
if (node.parent != null) {
|
||||||
|
// The first byte do not need to store, it store in the parent.
|
||||||
|
if (node.nodeData.length - 1 > 0) {
|
||||||
|
UFIntTool.writeBytes(dataWidth.nodeDataLengthWidth, node.nodeData.length - 1, os);
|
||||||
|
os.write(node.nodeData, 1, node.nodeData.length - 1);
|
||||||
|
} else {
|
||||||
|
UFIntTool.writeBytes(dataWidth.nodeDataLengthWidth, 0, os);
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
UFIntTool.writeBytes(dataWidth.nodeDataLengthWidth, node.nodeData.length, os);
|
||||||
|
os.write(node.nodeData, 0, node.nodeData.length);
|
||||||
|
}
|
||||||
|
UFIntTool.writeBytes(dataWidth.fanOutWidth, node.children.size(), os);
|
||||||
|
for (TokenizerNode child : node.children) {
|
||||||
|
// child's first byte.
|
||||||
|
os.write(child.nodeData[0]);
|
||||||
|
}
|
||||||
|
for (TokenizerNode child : node.children) {
|
||||||
|
UFIntTool.writeBytes(dataWidth.childNodeOffsetWidth, node.negativeIndex - child.negativeIndex,
|
||||||
|
os);
|
||||||
|
}
|
||||||
|
int occurrences = node.numOccurrences << 1;
|
||||||
|
if (node.qualifierNum > 0) {
|
||||||
|
occurrences = occurrences | 0x01;
|
||||||
|
}
|
||||||
|
UFIntTool.writeBytes(dataWidth.occurrencesWidth, occurrences, os);
|
||||||
|
for (int i = 0; i < node.numOccurrences; i++) {
|
||||||
|
UFIntTool.writeBytes(dataWidth.indexWidth, node.index.get(i), os);
|
||||||
|
}
|
||||||
|
if (node.qualifierNum > 0) {
|
||||||
|
for (int i = 0; i < node.numOccurrences; i++) {
|
||||||
|
KeyValue.KeyOnlyKeyValue keyOnlyKeyValue = node.keys.get(i);
|
||||||
|
if (keyOnlyKeyValue.getQualifierLength() > 0) {
|
||||||
|
UFIntTool.writeBytes(dataWidth.qualifierLengthWidth, keyOnlyKeyValue.getQualifierLength(),
|
||||||
|
os);
|
||||||
|
os.write(keyOnlyKeyValue.getQualifierArray(), keyOnlyKeyValue.getQualifierOffset(),
|
||||||
|
keyOnlyKeyValue.getQualifierLength());
|
||||||
|
// write timestamp
|
||||||
|
StreamUtils.writeLong(os, keyOnlyKeyValue.getTimestamp());
|
||||||
|
// write the type
|
||||||
|
os.write(keyOnlyKeyValue.getTypeByte());
|
||||||
|
} else {
|
||||||
|
UFIntTool.writeBytes(dataWidth.qualifierLengthWidth, 0, os);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
public static void serialize(DataOutput out, PrefixTreeDataWidth dataWidth) throws IOException {
|
||||||
|
out.writeByte(dataWidth.nodeDataLengthWidth);
|
||||||
|
out.writeByte(dataWidth.fanOutWidth);
|
||||||
|
out.writeByte(dataWidth.occurrencesWidth);
|
||||||
|
out.writeByte(dataWidth.indexWidth);
|
||||||
|
out.writeByte(dataWidth.childNodeOffsetWidth);
|
||||||
|
out.writeByte(dataWidth.qualifierLengthWidth);
|
||||||
|
}
|
||||||
|
|
||||||
|
public static void deserialize(ByteBuff data, PrefixTreeDataWidth dataWidth) {
|
||||||
|
dataWidth.nodeDataLengthWidth = data.get();
|
||||||
|
dataWidth.fanOutWidth = data.get();
|
||||||
|
dataWidth.occurrencesWidth = data.get();
|
||||||
|
dataWidth.indexWidth = data.get();
|
||||||
|
dataWidth.childNodeOffsetWidth = data.get();
|
||||||
|
dataWidth.qualifierLengthWidth = data.get();
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Get the node index, that search key >= index and search key < (index + 1)
|
||||||
|
*/
|
||||||
|
public static int search(ByteBuffer data, int bbStartPos, Cell skey, int keyStartPos,
|
||||||
|
PrefixTreeDataWidth meta) {
|
||||||
|
int nodeDataLength = getNodeDataLength(data, bbStartPos, meta);
|
||||||
|
int cs = 0;
|
||||||
|
if (nodeDataLength > 0) {
|
||||||
|
cs = compareTo(skey, keyStartPos, Math.min(skey.getRowLength() - keyStartPos, nodeDataLength),
|
||||||
|
data, bbStartPos + meta.nodeDataLengthWidth, nodeDataLength);
|
||||||
|
}
|
||||||
|
|
||||||
|
int pos = bbStartPos + meta.nodeDataLengthWidth + nodeDataLength;
|
||||||
|
int fanOut = getNodeFanOut(data, pos, meta);
|
||||||
|
pos += meta.fanOutWidth + fanOut + fanOut * meta.childNodeOffsetWidth;
|
||||||
|
int tmpNumOccurrences = getNodeNumOccurrences(data, pos, meta);
|
||||||
|
int numOccurrences = tmpNumOccurrences >> 1;
|
||||||
|
int hasQualifier = tmpNumOccurrences & 0x01;
|
||||||
|
|
||||||
|
pos += meta.occurrencesWidth;
|
||||||
|
|
||||||
|
if (cs == 0) {
|
||||||
|
// continue search
|
||||||
|
if (fanOut == 0) {
|
||||||
|
// no children, should be numOccurrences > 0
|
||||||
|
if (skey.getRowLength() == keyStartPos + nodeDataLength) {
|
||||||
|
if (hasQualifier == 0) {
|
||||||
|
// == current node
|
||||||
|
return getNodeIndex(data, pos, 0, meta);
|
||||||
|
} else {
|
||||||
|
// compare qualifier
|
||||||
|
int qualifierPos = pos + numOccurrences * meta.indexWidth;
|
||||||
|
if (skey.getQualifierLength() == 0) {
|
||||||
|
int firstQualifierLength = getQualifierLength(data, qualifierPos, meta);
|
||||||
|
if (firstQualifierLength == 0) {
|
||||||
|
return getNodeIndex(data, pos, 0, meta);
|
||||||
|
} else {
|
||||||
|
// search key has no qualifier, but first index node has.
|
||||||
|
return getNodeIndex(data, pos, 0, meta) - 1;
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
for (int i = 0; i < numOccurrences; i++) {
|
||||||
|
int qualifierLength = getQualifierLength(data, qualifierPos, meta);
|
||||||
|
qualifierPos += meta.qualifierLengthWidth;
|
||||||
|
int qualifierCR = compareQualifierTo(skey, data, qualifierPos, qualifierLength);
|
||||||
|
if (qualifierCR == 0) {
|
||||||
|
// the same qualifier.
|
||||||
|
int timestampPos = qualifierPos + qualifierLength;
|
||||||
|
long timestamp = ByteBufferUtils.toLong(data, timestampPos);
|
||||||
|
byte byteType = ByteBufferUtils.toByte(data, timestampPos + Bytes.SIZEOF_LONG);
|
||||||
|
// higher numbers sort before those of lesser numbers.
|
||||||
|
if (skey.getTimestamp() > timestamp) {
|
||||||
|
return getNodeIndex(data, pos, i, meta) - 1;
|
||||||
|
} else if (skey.getTimestamp() < timestamp) {
|
||||||
|
return getNodeIndex(data, pos, i, meta);
|
||||||
|
}
|
||||||
|
// higher numbers sort before those of lesser numbers.
|
||||||
|
if ((0xff & skey.getTypeByte() - (0xff & byteType)) > 0) {
|
||||||
|
return getNodeIndex(data, pos, i, meta) - 1;
|
||||||
|
} else {
|
||||||
|
return getNodeIndex(data, pos, i, meta);
|
||||||
|
}
|
||||||
|
} else if (qualifierCR < 0) {
|
||||||
|
return getNodeIndex(data, pos, i, meta) - 1;
|
||||||
|
}
|
||||||
|
if (qualifierLength > 0) {
|
||||||
|
qualifierPos += (qualifierLength + Bytes.SIZEOF_LONG + Bytes.SIZEOF_BYTE);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return getNodeIndex(data, pos, numOccurrences - 1, meta);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
// > current node.
|
||||||
|
return getNodeIndex(data, pos, numOccurrences - 1, meta);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if (skey.getRowLength() > keyStartPos + nodeDataLength) {
|
||||||
|
int fanOffset = bbStartPos + meta.nodeDataLengthWidth + nodeDataLength + meta.fanOutWidth;
|
||||||
|
byte searchForByte = getCellByte(skey, keyStartPos + nodeDataLength);
|
||||||
|
|
||||||
|
int fanIndexInBlock =
|
||||||
|
unsignedBinarySearch(data, fanOffset, fanOffset + fanOut, searchForByte);
|
||||||
|
int nodeOffsetStartPos = fanOffset + fanOut;
|
||||||
|
if (fanIndexInBlock >= 0) {
|
||||||
|
// found it, but need to adjust for position of fan in overall block
|
||||||
|
int fanIndex = fanIndexInBlock - fanOffset;
|
||||||
|
int nodeOffset = getNodeOffset(data, nodeOffsetStartPos, fanIndex, meta);
|
||||||
|
return search(data, bbStartPos + nodeOffset, skey, keyStartPos + nodeDataLength + 1,
|
||||||
|
meta);
|
||||||
|
} else {
|
||||||
|
int fanIndex = fanIndexInBlock + fanOffset;// didn't find it, so compensate in reverse
|
||||||
|
int insertionPoint = (-fanIndex - 1) - 1;
|
||||||
|
if (insertionPoint < 0) {
|
||||||
|
// < first children
|
||||||
|
int nodeOffset = getNodeOffset(data, nodeOffsetStartPos, 0, meta);
|
||||||
|
return getFirstLeafNode(data, bbStartPos + nodeOffset, meta) - 1;
|
||||||
|
} else {
|
||||||
|
int nodeOffset = getNodeOffset(data, nodeOffsetStartPos, insertionPoint, meta);
|
||||||
|
return getLastLeafNode(data, bbStartPos + nodeOffset, meta);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
// skey.length == keyStartPos + nodeDataLength
|
||||||
|
if (numOccurrences > 0) {
|
||||||
|
// == current node and current node is a leaf node.
|
||||||
|
if (hasQualifier == 0) {
|
||||||
|
// == current node
|
||||||
|
return getNodeIndex(data, pos, 0, meta);
|
||||||
|
} else {
|
||||||
|
// need compare qualifier
|
||||||
|
int qualifierPos = pos + numOccurrences * meta.indexWidth;
|
||||||
|
if (skey.getQualifierLength() == 0) {
|
||||||
|
int firstQualifierLength = getQualifierLength(data, qualifierPos, meta);
|
||||||
|
if (firstQualifierLength == 0) {
|
||||||
|
return getNodeIndex(data, pos, 0, meta);
|
||||||
|
} else {
|
||||||
|
// search key has no qualifier, but first index node has.
|
||||||
|
return getNodeIndex(data, pos, 0, meta) - 1;
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
for (int i = 0; i < numOccurrences; i++) {
|
||||||
|
int qualifierLength = getQualifierLength(data, qualifierPos, meta);
|
||||||
|
qualifierPos += meta.qualifierLengthWidth;
|
||||||
|
int qualifierCR = compareQualifierTo(skey, data, qualifierPos, qualifierLength);
|
||||||
|
if (qualifierCR == 0) {
|
||||||
|
// the same qualifier.
|
||||||
|
int timestampPos = qualifierPos + qualifierLength;
|
||||||
|
long timestamp = ByteBufferUtils.toLong(data, timestampPos);
|
||||||
|
byte byteType = ByteBufferUtils.toByte(data, timestampPos + Bytes.SIZEOF_LONG);
|
||||||
|
// higher numbers sort before those of lesser numbers.
|
||||||
|
if (skey.getTimestamp() > timestamp) {
|
||||||
|
return getNodeIndex(data, pos, i, meta) - 1;
|
||||||
|
} else if (skey.getTimestamp() < timestamp) {
|
||||||
|
return getNodeIndex(data, pos, i, meta);
|
||||||
|
}
|
||||||
|
// higher numbers sort before those of lesser numbers.
|
||||||
|
if ((0xff & skey.getTypeByte() - (0xff & byteType)) > 0) {
|
||||||
|
return getNodeIndex(data, pos, i, meta) - 1;
|
||||||
|
} else {
|
||||||
|
return getNodeIndex(data, pos, i, meta);
|
||||||
|
}
|
||||||
|
} else if (qualifierCR < 0) {
|
||||||
|
return getNodeIndex(data, pos, i, meta) - 1;
|
||||||
|
}
|
||||||
|
if (qualifierLength > 0) {
|
||||||
|
qualifierPos += (qualifierLength + Bytes.SIZEOF_LONG + Bytes.SIZEOF_BYTE);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return getNodeIndex(data, pos, numOccurrences - 1, meta);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
// need -1, == current node and current node not a leaf node.
|
||||||
|
return getFirstLeafNode(data, bbStartPos, meta) - 1;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
} else if (cs > 0) {
|
||||||
|
// search key bigger than (>) current node, get biggest
|
||||||
|
if (fanOut == 0) {
|
||||||
|
if (numOccurrences > 0) {
|
||||||
|
return getNodeIndex(data, pos, numOccurrences - 1, meta);
|
||||||
|
} else {
|
||||||
|
throw new IllegalStateException(
|
||||||
|
"numOccurrences = " + numOccurrences + ", fanOut = " + fanOut + " not expected.");
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
return getLastLeafNode(data, bbStartPos, meta);
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
// search key small than (<) current node, get smallest.
|
||||||
|
if (numOccurrences > 0) {
|
||||||
|
return getNodeIndex(data, pos, 0, meta) - 1;
|
||||||
|
} else {
|
||||||
|
return getFirstLeafNode(data, bbStartPos, meta) - 1;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
static int compareTo(Cell skey, int o1, int l1, ByteBuffer data, int o2, int l2) {
|
||||||
|
if (skey instanceof ByteBufferExtendedCell) {
|
||||||
|
ByteBufferExtendedCell byteBufferExtendedCell = ((ByteBufferExtendedCell) skey);
|
||||||
|
return ByteBufferUtils.compareTo(byteBufferExtendedCell.getRowByteBuffer(),
|
||||||
|
byteBufferExtendedCell.getRowPosition() + o1, l1, data, o2, l2);
|
||||||
|
}
|
||||||
|
return ByteBufferUtils.compareTo(skey.getRowArray(), skey.getRowOffset() + o1, l1, data, o2,
|
||||||
|
l2);
|
||||||
|
}
|
||||||
|
|
||||||
|
static int compareQualifierTo(Cell skey, ByteBuffer data, int o2, int l2) {
|
||||||
|
if (skey instanceof ByteBufferExtendedCell) {
|
||||||
|
ByteBufferExtendedCell byteBufferExtendedCell = ((ByteBufferExtendedCell) skey);
|
||||||
|
return ByteBufferUtils.compareTo(byteBufferExtendedCell.getQualifierByteBuffer(),
|
||||||
|
byteBufferExtendedCell.getQualifierPosition(), byteBufferExtendedCell.getQualifierLength(),
|
||||||
|
data, o2, l2);
|
||||||
|
}
|
||||||
|
return ByteBufferUtils.compareTo(skey.getQualifierArray(), skey.getQualifierOffset(),
|
||||||
|
skey.getQualifierLength(), data, o2, l2);
|
||||||
|
}
|
||||||
|
|
||||||
|
static byte getCellByte(Cell skey, int position) {
|
||||||
|
if (skey instanceof ByteBufferExtendedCell) {
|
||||||
|
ByteBufferExtendedCell byteBufferExtendedCell = ((ByteBufferExtendedCell) skey);
|
||||||
|
return byteBufferExtendedCell.getRowByteBuffer()
|
||||||
|
.get(byteBufferExtendedCell.getRowPosition() + position);
|
||||||
|
}
|
||||||
|
return skey.getRowArray()[skey.getRowOffset() + position];
|
||||||
|
}
|
||||||
|
|
||||||
|
static int getNodeDataLength(ByteBuffer data, int offset, PrefixTreeDataWidth meta) {
|
||||||
|
int dataLength = (int) UFIntTool.fromBytes(data, offset, meta.nodeDataLengthWidth);
|
||||||
|
return dataLength;
|
||||||
|
}
|
||||||
|
|
||||||
|
static int getNodeFanOut(ByteBuffer data, int offset, PrefixTreeDataWidth meta) {
|
||||||
|
int fanOut = (int) UFIntTool.fromBytes(data, offset, meta.fanOutWidth);
|
||||||
|
return fanOut;
|
||||||
|
}
|
||||||
|
|
||||||
|
static int getNodeNumOccurrences(ByteBuffer data, int offset, PrefixTreeDataWidth meta) {
|
||||||
|
int numOccurrences = (int) UFIntTool.fromBytes(data, offset, meta.occurrencesWidth);
|
||||||
|
return numOccurrences;
|
||||||
|
}
|
||||||
|
|
||||||
|
static int getNodeOffset(ByteBuffer data, int offset, int index, PrefixTreeDataWidth meta) {
|
||||||
|
int nodeOffset = (int) UFIntTool.fromBytes(data, offset + (index * meta.childNodeOffsetWidth),
|
||||||
|
meta.childNodeOffsetWidth);
|
||||||
|
return nodeOffset;
|
||||||
|
}
|
||||||
|
|
||||||
|
static int getNodeIndex(ByteBuffer data, int offset, int index, PrefixTreeDataWidth meta) {
|
||||||
|
int nodeIndex =
|
||||||
|
(int) UFIntTool.fromBytes(data, offset + (index * meta.indexWidth), meta.indexWidth);
|
||||||
|
return nodeIndex;
|
||||||
|
}
|
||||||
|
|
||||||
|
static int getQualifierLength(ByteBuffer data, int offset, PrefixTreeDataWidth meta) {
|
||||||
|
int nodeIndex = (int) UFIntTool.fromBytes(data, offset, meta.qualifierLengthWidth);
|
||||||
|
return nodeIndex;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Get the node's first leaf node
|
||||||
|
*/
|
||||||
|
static int getFirstLeafNode(ByteBuffer data, int nodeStartPos, PrefixTreeDataWidth meta) {
|
||||||
|
int dataLength = getNodeDataLength(data, nodeStartPos, meta);
|
||||||
|
int pos = nodeStartPos + meta.nodeDataLengthWidth + dataLength;
|
||||||
|
int fanOut = getNodeFanOut(data, pos, meta);
|
||||||
|
pos += meta.fanOutWidth + fanOut + fanOut * meta.childNodeOffsetWidth;
|
||||||
|
int tmpNumOccurrences = getNodeNumOccurrences(data, pos, meta);
|
||||||
|
int numOccurrences = tmpNumOccurrences >> 1;
|
||||||
|
pos += meta.occurrencesWidth;
|
||||||
|
if (numOccurrences > 0 || fanOut == 0) {
|
||||||
|
// return current node.
|
||||||
|
return getNodeIndex(data, pos, 0, meta);
|
||||||
|
} else {
|
||||||
|
int nodeOffsetStartPos =
|
||||||
|
nodeStartPos + meta.nodeDataLengthWidth + dataLength + meta.fanOutWidth + fanOut;
|
||||||
|
int nodeOffset = getNodeOffset(data, nodeOffsetStartPos, 0, meta);
|
||||||
|
return getFirstLeafNode(data, nodeStartPos + nodeOffset, meta);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Get the node's last leaf node
|
||||||
|
*/
|
||||||
|
static int getLastLeafNode(ByteBuffer data, int bbStartPos, PrefixTreeDataWidth meta) {
|
||||||
|
int dataLength = getNodeDataLength(data, bbStartPos, meta);
|
||||||
|
int pos = bbStartPos + meta.nodeDataLengthWidth + dataLength;
|
||||||
|
int fanOut = getNodeFanOut(data, pos, meta);
|
||||||
|
pos += meta.fanOutWidth + fanOut + fanOut * meta.childNodeOffsetWidth;
|
||||||
|
int tmpNumOccurrences = getNodeNumOccurrences(data, pos, meta);
|
||||||
|
int numOccurrences = tmpNumOccurrences >> 1;
|
||||||
|
pos += meta.occurrencesWidth;
|
||||||
|
if (fanOut == 0) {
|
||||||
|
return getNodeIndex(data, pos, numOccurrences - 1, meta);
|
||||||
|
} else {
|
||||||
|
int nodeOffsetStartPos =
|
||||||
|
bbStartPos + meta.nodeDataLengthWidth + dataLength + meta.fanOutWidth + fanOut;
|
||||||
|
int nodeOffset = getNodeOffset(data, nodeOffsetStartPos, fanOut - 1, meta);
|
||||||
|
return getLastLeafNode(data, bbStartPos + nodeOffset, meta);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
public static int unsignedBinarySearch(ByteBuffer a, int fromIndex, int toIndex, byte key) {
|
||||||
|
int unsignedKey = key & 0xff;
|
||||||
|
int low = fromIndex;
|
||||||
|
int high = toIndex - 1;
|
||||||
|
|
||||||
|
while (low <= high) {
|
||||||
|
int mid = low + ((high - low) >> 1);
|
||||||
|
int midVal = a.get(mid) & 0xff;
|
||||||
|
|
||||||
|
if (midVal < unsignedKey) {
|
||||||
|
low = mid + 1;
|
||||||
|
} else if (midVal > unsignedKey) {
|
||||||
|
high = mid - 1;
|
||||||
|
} else {
|
||||||
|
return mid; // key found
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return -(low + 1); // key not found.
|
||||||
|
}
|
||||||
|
|
||||||
|
public static byte[] get(ByteBuffer data, int bbStartPos, PrefixTreeDataWidth dataWidth,
|
||||||
|
int index) {
|
||||||
|
return get(data, bbStartPos, dataWidth, index, new byte[0]);
|
||||||
|
}
|
||||||
|
|
||||||
|
static byte[] get(ByteBuffer data, int bbStartPos, PrefixTreeDataWidth meta, int index,
|
||||||
|
byte[] prefix) {
|
||||||
|
int dataLength = getNodeDataLength(data, bbStartPos, meta);
|
||||||
|
byte[] bdata = new byte[dataLength];
|
||||||
|
ByteBuffer dup = data.duplicate();
|
||||||
|
dup.position(bbStartPos + meta.nodeDataLengthWidth);
|
||||||
|
dup.get(bdata, 0, dataLength);
|
||||||
|
bdata = Bytes.add(prefix, bdata);
|
||||||
|
|
||||||
|
int pos = bbStartPos + meta.nodeDataLengthWidth + dataLength;
|
||||||
|
int fanOut = getNodeFanOut(data, pos, meta);
|
||||||
|
pos += meta.fanOutWidth + fanOut + fanOut * meta.childNodeOffsetWidth;
|
||||||
|
int tmpNumOccurrences = getNodeNumOccurrences(data, pos, meta);
|
||||||
|
int numOccurrences = tmpNumOccurrences >> 1;
|
||||||
|
// int hasQualifier = tmpNumOccurrences &= 0x01;
|
||||||
|
pos += meta.occurrencesWidth;
|
||||||
|
if (numOccurrences > 0) {
|
||||||
|
for (int i = 0; i < numOccurrences; i++) {
|
||||||
|
int currentNodeIndex = getNodeIndex(data, pos, i, meta);
|
||||||
|
if (currentNodeIndex == index) {
|
||||||
|
return bdata;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if (fanOut == 0) {
|
||||||
|
for (int i = 0; i < numOccurrences; i++) {
|
||||||
|
int currentNodeIndex = getNodeIndex(data, pos, i, meta);
|
||||||
|
if (currentNodeIndex == index) {
|
||||||
|
return bdata;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
throw new IllegalStateException("Unexpected, not find index=" + index + " node's data.");
|
||||||
|
} else {
|
||||||
|
int nodeOffsetStartPos =
|
||||||
|
bbStartPos + meta.nodeDataLengthWidth + dataLength + meta.fanOutWidth + fanOut;
|
||||||
|
int locateIndex = locateWhichChild(data, bbStartPos, meta, index, fanOut, nodeOffsetStartPos);
|
||||||
|
int nodeOffset = getNodeOffset(data, nodeOffsetStartPos, locateIndex, meta);
|
||||||
|
byte[] childBytes = new byte[1];
|
||||||
|
childBytes[0] = data
|
||||||
|
.get(bbStartPos + meta.nodeDataLengthWidth + dataLength + meta.fanOutWidth + locateIndex);
|
||||||
|
bdata = Bytes.add(bdata, childBytes);
|
||||||
|
return get(data, bbStartPos + nodeOffset, meta, index, bdata);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
static int locateWhichChild(ByteBuffer data, int bbStartPos, PrefixTreeDataWidth meta, int index,
|
||||||
|
int fanOut, int nodeOffsetStartPos) {
|
||||||
|
for (int i = 0; i < fanOut; i++) {
|
||||||
|
int nodeOffset = getNodeOffset(data, nodeOffsetStartPos, i, meta);
|
||||||
|
int lastLeafNode = getLastLeafNode(data, bbStartPos + nodeOffset, meta);
|
||||||
|
if (lastLeafNode >= index) {
|
||||||
|
return i;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
throw new IllegalStateException("Unexpected unable to find index=" + index);
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
|
@ -0,0 +1,210 @@
|
||||||
|
/*
|
||||||
|
* Licensed to the Apache Software Foundation (ASF) under one
|
||||||
|
* or more contributor license agreements. See the NOTICE file
|
||||||
|
* distributed with this work for additional information
|
||||||
|
* regarding copyright ownership. The ASF licenses this file
|
||||||
|
* to you under the Apache License, Version 2.0 (the
|
||||||
|
* "License"); you may not use this file except in compliance
|
||||||
|
* with the License. You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
* See the License for the specific language governing permissions and
|
||||||
|
* limitations under the License.
|
||||||
|
*/
|
||||||
|
package org.apache.hadoop.hbase.io.encoding;
|
||||||
|
|
||||||
|
import java.io.IOException;
|
||||||
|
import java.nio.ByteBuffer;
|
||||||
|
import java.util.ArrayList;
|
||||||
|
import java.util.List;
|
||||||
|
import org.apache.hadoop.hbase.Cell;
|
||||||
|
import org.apache.hadoop.hbase.CellUtil;
|
||||||
|
import org.apache.hadoop.hbase.KeyValue;
|
||||||
|
import org.apache.hadoop.hbase.PrivateCellUtil;
|
||||||
|
import org.apache.hadoop.hbase.io.ByteArrayOutputStream;
|
||||||
|
import org.apache.hadoop.hbase.io.encoding.PrefixTreeUtil.PrefixTreeDataWidth;
|
||||||
|
import org.apache.hadoop.hbase.io.encoding.PrefixTreeUtil.TokenizerNode;
|
||||||
|
import org.apache.hadoop.hbase.testclassification.IOTests;
|
||||||
|
import org.apache.hadoop.hbase.testclassification.SmallTests;
|
||||||
|
import org.apache.hadoop.hbase.util.Bytes;
|
||||||
|
import org.junit.Assert;
|
||||||
|
import org.junit.Test;
|
||||||
|
import org.junit.experimental.categories.Category;
|
||||||
|
import org.slf4j.Logger;
|
||||||
|
import org.slf4j.LoggerFactory;
|
||||||
|
|
||||||
|
@Category({ IOTests.class, SmallTests.class })
|
||||||
|
public class TestPrefixTreeUtilV2 {
|
||||||
|
private static final Logger LOG = LoggerFactory.getLogger(TestPrefixTreeUtilV2.class);
|
||||||
|
private static byte[] FAM = Bytes.toBytes("cf");
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void testSearchPrefixTree() throws IOException {
|
||||||
|
List<KeyValue> rows = new ArrayList<>();
|
||||||
|
|
||||||
|
rows.add(new KeyValue(Bytes.toBytes("00c7-202206201519-wx0t"), FAM, Bytes.toBytes("qh")));
|
||||||
|
rows.add(new KeyValue(Bytes.toBytes("00c7-202206201519-wx0zcldi7lnsiyas-N"), FAM,
|
||||||
|
Bytes.toBytes("qh")));
|
||||||
|
rows.add(new KeyValue(Bytes.toBytes("00c7-202206201520-wx0re"), FAM, Bytes.toBytes("qh")));
|
||||||
|
rows.add(new KeyValue(Bytes.toBytes("00c7-202206201520-wx0x7"), FAM, Bytes.toBytes("qh")));
|
||||||
|
rows.add(new KeyValue(Bytes.toBytes("00c7-202206201521"), FAM, Bytes.toBytes("qh")));
|
||||||
|
rows.add(new KeyValue(Bytes.toBytes("00c7-202206201521-wx05xfbtw2mopyhs-C"), FAM,
|
||||||
|
Bytes.toBytes("qh")));
|
||||||
|
rows.add(new KeyValue(Bytes.toBytes("00c7-202206201521-wx08"), FAM, Bytes.toBytes("qh")));
|
||||||
|
rows.add(new KeyValue(Bytes.toBytes("00c7-202206201521-wx0c"), FAM, Bytes.toBytes("qh")));
|
||||||
|
rows.add(new KeyValue(Bytes.toBytes("00c7-202206201522-wx0t"), FAM, Bytes.toBytes("qh")));
|
||||||
|
rows.add(new KeyValue(Bytes.toBytes("00c8-202206200751-wx0ah4gnbwptdyna-F"), FAM,
|
||||||
|
Bytes.toBytes("qh")));
|
||||||
|
|
||||||
|
List<KeyValue.KeyOnlyKeyValue> childs = new ArrayList<>(15);
|
||||||
|
childs.add(new KeyValue.KeyOnlyKeyValue(PrivateCellUtil
|
||||||
|
.getCellKeySerializedAsKeyValueKey(PrivateCellUtil.createFirstOnRow(rows.get(0)))));
|
||||||
|
childs.add(new KeyValue.KeyOnlyKeyValue(PrivateCellUtil
|
||||||
|
.getCellKeySerializedAsKeyValueKey(PrivateCellUtil.createFirstOnRow(rows.get(1)))));
|
||||||
|
childs.add(new KeyValue.KeyOnlyKeyValue(PrivateCellUtil
|
||||||
|
.getCellKeySerializedAsKeyValueKey(PrivateCellUtil.createFirstOnRow(rows.get(2)))));
|
||||||
|
|
||||||
|
childs.add(new KeyValue.KeyOnlyKeyValue(PrivateCellUtil
|
||||||
|
.getCellKeySerializedAsKeyValueKey(PrivateCellUtil.createFirstOnRowCol(rows.get(3)))));
|
||||||
|
childs.add(new KeyValue.KeyOnlyKeyValue(
|
||||||
|
PrivateCellUtil.getCellKeySerializedAsKeyValueKey(PrivateCellUtil.createFirstOnRowCol(
|
||||||
|
new KeyValue(Bytes.toBytes("00c7-202206201520-wx0x7"), FAM, Bytes.toBytes("qo"))))));
|
||||||
|
|
||||||
|
childs.add(new KeyValue.KeyOnlyKeyValue(PrivateCellUtil
|
||||||
|
.getCellKeySerializedAsKeyValueKey(PrivateCellUtil.createFirstOnRow(rows.get(4)))));
|
||||||
|
childs.add(new KeyValue.KeyOnlyKeyValue(
|
||||||
|
PrivateCellUtil.getCellKeySerializedAsKeyValueKey(PrivateCellUtil.createFirstOnRowCol(
|
||||||
|
new KeyValue(Bytes.toBytes("00c7-202206201521"), FAM, Bytes.toBytes("qb"))))));
|
||||||
|
childs.add(new KeyValue.KeyOnlyKeyValue(
|
||||||
|
PrivateCellUtil.getCellKeySerializedAsKeyValueKey(PrivateCellUtil.createFirstOnRowCol(
|
||||||
|
new KeyValue(Bytes.toBytes("00c7-202206201521"), FAM, Bytes.toBytes("qf"))))));
|
||||||
|
childs.add(new KeyValue.KeyOnlyKeyValue(PrivateCellUtil
|
||||||
|
.getCellKeySerializedAsKeyValueKey(PrivateCellUtil.createFirstOnRowCol(rows.get(4)))));
|
||||||
|
|
||||||
|
childs.add(new KeyValue.KeyOnlyKeyValue(PrivateCellUtil
|
||||||
|
.getCellKeySerializedAsKeyValueKey(PrivateCellUtil.createFirstOnRow(rows.get(5)))));
|
||||||
|
childs.add(new KeyValue.KeyOnlyKeyValue(PrivateCellUtil
|
||||||
|
.getCellKeySerializedAsKeyValueKey(PrivateCellUtil.createFirstOnRow(rows.get(6)))));
|
||||||
|
childs.add(new KeyValue.KeyOnlyKeyValue(PrivateCellUtil
|
||||||
|
.getCellKeySerializedAsKeyValueKey(PrivateCellUtil.createFirstOnRow(rows.get(7)))));
|
||||||
|
childs.add(new KeyValue.KeyOnlyKeyValue(PrivateCellUtil
|
||||||
|
.getCellKeySerializedAsKeyValueKey(PrivateCellUtil.createFirstOnRow(rows.get(8)))));
|
||||||
|
childs.add(new KeyValue.KeyOnlyKeyValue(PrivateCellUtil
|
||||||
|
.getCellKeySerializedAsKeyValueKey(PrivateCellUtil.createFirstOnRow(rows.get(9)))));
|
||||||
|
|
||||||
|
TokenizerNode node = PrefixTreeUtilV2.buildPrefixTree(childs);
|
||||||
|
|
||||||
|
PrefixTreeDataWidth dataWidth = new PrefixTreeDataWidth();
|
||||||
|
ByteArrayOutputStream outputStream = new ByteArrayOutputStream();
|
||||||
|
PrefixTreeUtilV2.serializePrefixTree(node, dataWidth, outputStream);
|
||||||
|
byte[] data = outputStream.toByteArray();
|
||||||
|
ByteBuffer prefixTreeNodeData = ByteBuffer.wrap(data);
|
||||||
|
for (int i = 0; i < childs.size(); i++) {
|
||||||
|
byte[] result = PrefixTreeUtilV2.get(prefixTreeNodeData, 0, dataWidth, i);
|
||||||
|
Assert.assertTrue(Bytes.compareTo(result, CellUtil.cloneRow(childs.get(i))) == 0);
|
||||||
|
}
|
||||||
|
|
||||||
|
for (int i = 0; i < childs.size(); i++) {
|
||||||
|
int result = PrefixTreeUtilV2.search(prefixTreeNodeData, 0, childs.get(i), 0, dataWidth);
|
||||||
|
Assert.assertEquals(i, result);
|
||||||
|
}
|
||||||
|
|
||||||
|
Cell skey = PrivateCellUtil.createFirstOnRow(Bytes.toBytes("00c7-202206201519"));
|
||||||
|
int result = PrefixTreeUtilV2.search(prefixTreeNodeData, 0, skey, 0, dataWidth);
|
||||||
|
Assert.assertEquals(-1, result);
|
||||||
|
|
||||||
|
skey = PrivateCellUtil.createFirstOnRow(Bytes.toBytes("00c7-202206201520"));
|
||||||
|
result = PrefixTreeUtilV2.search(prefixTreeNodeData, 0, skey, 0, dataWidth);
|
||||||
|
Assert.assertEquals(1, result);
|
||||||
|
|
||||||
|
skey = PrivateCellUtil.createFirstOnRowCol(
|
||||||
|
new KeyValue(Bytes.toBytes("00c7-202206201520-wx0x7"), FAM, Bytes.toBytes("qa")));
|
||||||
|
result = PrefixTreeUtilV2.search(prefixTreeNodeData, 0, skey, 0, dataWidth);
|
||||||
|
Assert.assertEquals(2, result);
|
||||||
|
|
||||||
|
skey = PrivateCellUtil.createFirstOnRowCol(
|
||||||
|
new KeyValue(Bytes.toBytes("00c7-202206201520-wx0x7"), FAM, Bytes.toBytes("qm")));
|
||||||
|
result = PrefixTreeUtilV2.search(prefixTreeNodeData, 0, skey, 0, dataWidth);
|
||||||
|
Assert.assertEquals(3, result);
|
||||||
|
|
||||||
|
skey = PrivateCellUtil.createFirstOnRowCol(
|
||||||
|
new KeyValue(Bytes.toBytes("00c7-202206201520-wx0x7"), FAM, Bytes.toBytes("qs")));
|
||||||
|
result = PrefixTreeUtilV2.search(prefixTreeNodeData, 0, skey, 0, dataWidth);
|
||||||
|
Assert.assertEquals(4, result);
|
||||||
|
|
||||||
|
skey = PrivateCellUtil.createFirstOnRow(Bytes.toBytes("00c7-202206201520-wx0x7-"));
|
||||||
|
result = PrefixTreeUtilV2.search(prefixTreeNodeData, 0, skey, 0, dataWidth);
|
||||||
|
Assert.assertEquals(4, result);
|
||||||
|
|
||||||
|
skey = PrivateCellUtil.createFirstOnRowCol(
|
||||||
|
new KeyValue(Bytes.toBytes("00c7-202206201521"), FAM, Bytes.toBytes("qa")));
|
||||||
|
result = PrefixTreeUtilV2.search(prefixTreeNodeData, 0, skey, 0, dataWidth);
|
||||||
|
Assert.assertEquals(5, result);
|
||||||
|
|
||||||
|
skey = PrivateCellUtil.createFirstOnRowCol(
|
||||||
|
new KeyValue(Bytes.toBytes("00c7-202206201521"), FAM, Bytes.toBytes("qe")));
|
||||||
|
result = PrefixTreeUtilV2.search(prefixTreeNodeData, 0, skey, 0, dataWidth);
|
||||||
|
Assert.assertEquals(6, result);
|
||||||
|
|
||||||
|
skey = PrivateCellUtil.createFirstOnRowCol(
|
||||||
|
new KeyValue(Bytes.toBytes("00c7-202206201521"), FAM, Bytes.toBytes("qg")));
|
||||||
|
result = PrefixTreeUtilV2.search(prefixTreeNodeData, 0, skey, 0, dataWidth);
|
||||||
|
Assert.assertEquals(7, result);
|
||||||
|
|
||||||
|
skey = PrivateCellUtil.createFirstOnRowCol(
|
||||||
|
new KeyValue(Bytes.toBytes("00c7-202206201521"), FAM, Bytes.toBytes("qu")));
|
||||||
|
result = PrefixTreeUtilV2.search(prefixTreeNodeData, 0, skey, 0, dataWidth);
|
||||||
|
Assert.assertEquals(8, result);
|
||||||
|
|
||||||
|
skey = PrivateCellUtil.createFirstOnRow(Bytes.toBytes("00c7-202206201521-wx0"));
|
||||||
|
result = PrefixTreeUtilV2.search(prefixTreeNodeData, 0, skey, 0, dataWidth);
|
||||||
|
Assert.assertEquals(8, result);
|
||||||
|
|
||||||
|
skey = PrivateCellUtil.createFirstOnRow(Bytes.toBytes("00c8-202206200751-wx0ah4gnbwptdyna-F-"));
|
||||||
|
result = PrefixTreeUtilV2.search(prefixTreeNodeData, 0, skey, 0, dataWidth);
|
||||||
|
Assert.assertEquals(13, result);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void testSearchPrefixTreeWithTimeStampType() throws IOException {
|
||||||
|
List<KeyValue> rows = new ArrayList<>();
|
||||||
|
|
||||||
|
rows.add(new KeyValue(Bytes.toBytes("00073123012802202_121_9223370375843575807"), FAM,
|
||||||
|
Bytes.toBytes("bg_id"), 1661023473524L, KeyValue.Type.Put));
|
||||||
|
rows.add(new KeyValue(Bytes.toBytes("00073124151608102_121_9223370375238775807"), FAM,
|
||||||
|
Bytes.toBytes("cur_run_date"), 1661713633365L, KeyValue.Type.Put));
|
||||||
|
rows.add(new KeyValue(Bytes.toBytes("00073124151608102_121_9223370375670775807"), FAM,
|
||||||
|
Bytes.toBytes("run"), Long.MAX_VALUE, KeyValue.Type.Maximum));
|
||||||
|
|
||||||
|
List<KeyValue.KeyOnlyKeyValue> childs = new ArrayList<>(3);
|
||||||
|
childs.add(
|
||||||
|
new KeyValue.KeyOnlyKeyValue(PrivateCellUtil.getCellKeySerializedAsKeyValueKey(rows.get(0))));
|
||||||
|
childs.add(
|
||||||
|
new KeyValue.KeyOnlyKeyValue(PrivateCellUtil.getCellKeySerializedAsKeyValueKey(rows.get(1))));
|
||||||
|
childs.add(
|
||||||
|
new KeyValue.KeyOnlyKeyValue(PrivateCellUtil.getCellKeySerializedAsKeyValueKey(rows.get(2))));
|
||||||
|
|
||||||
|
TokenizerNode node = PrefixTreeUtilV2.buildPrefixTree(childs);
|
||||||
|
|
||||||
|
PrefixTreeDataWidth dataWidth = new PrefixTreeDataWidth();
|
||||||
|
ByteArrayOutputStream outputStream = new ByteArrayOutputStream();
|
||||||
|
PrefixTreeUtilV2.serializePrefixTree(node, dataWidth, outputStream);
|
||||||
|
byte[] data = outputStream.toByteArray();
|
||||||
|
ByteBuffer prefixTreeNodeData = ByteBuffer.wrap(data);
|
||||||
|
|
||||||
|
for (int i = 0; i < childs.size(); i++) {
|
||||||
|
byte[] result = PrefixTreeUtilV2.get(prefixTreeNodeData, 0, dataWidth, i);
|
||||||
|
Assert.assertTrue(Bytes.compareTo(result, CellUtil.cloneRow(childs.get(i))) == 0);
|
||||||
|
}
|
||||||
|
|
||||||
|
for (int i = 0; i < childs.size(); i++) {
|
||||||
|
int result = PrefixTreeUtilV2.search(prefixTreeNodeData, 0, childs.get(i), 0, dataWidth);
|
||||||
|
Assert.assertEquals(i, result);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
|
@ -239,13 +239,8 @@ public class HFileWriterImpl implements HFile.Writer {
|
||||||
throw new IOException("Key cannot be null or empty");
|
throw new IOException("Key cannot be null or empty");
|
||||||
}
|
}
|
||||||
if (lastCell != null) {
|
if (lastCell != null) {
|
||||||
int keyComp = 0;
|
int keyComp = PrivateCellUtil.compareKeyIgnoresMvcc(this.hFileContext.getCellComparator(),
|
||||||
if (hFileContext.getIndexBlockEncoding() == IndexBlockEncoding.PREFIX_TREE) {
|
lastCell, cell);
|
||||||
keyComp = this.hFileContext.getCellComparator().compareRows(lastCell, cell);
|
|
||||||
} else {
|
|
||||||
keyComp = PrivateCellUtil.compareKeyIgnoresMvcc(this.hFileContext.getCellComparator(),
|
|
||||||
lastCell, cell);
|
|
||||||
}
|
|
||||||
if (keyComp > 0) {
|
if (keyComp > 0) {
|
||||||
String message = getLexicalErrorMessage(cell);
|
String message = getLexicalErrorMessage(cell);
|
||||||
throw new IOException(message);
|
throw new IOException(message);
|
||||||
|
|
Loading…
Reference in New Issue